feedtools 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +4 -0
- data/README +13 -0
- data/install.rb +30 -0
- data/lib/feed_tools.rb +1126 -0
- data/lib/feed_tools/vendor/builder.rb +13 -0
- data/lib/feed_tools/vendor/builder/blankslate.rb +53 -0
- data/lib/feed_tools/vendor/builder/xmlbase.rb +143 -0
- data/lib/feed_tools/vendor/builder/xmlevents.rb +63 -0
- data/lib/feed_tools/vendor/builder/xmlmarkup.rb +297 -0
- data/rakefile +111 -0
- metadata +62 -0
data/CHANGELOG
ADDED
data/README
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
FeedTools was designed to be a simple XML feed parser, generator, and translator with a built-in
|
2
|
+
caching system.
|
3
|
+
|
4
|
+
== Example
|
5
|
+
slashdot_feed = FeedTools::Feed.open('http://www.slashdot.org/index.rss')
|
6
|
+
slashdot_feed.title
|
7
|
+
=> "Slashdot"
|
8
|
+
slashdot_feed.description
|
9
|
+
=> "News for nerds, stuff that matters"
|
10
|
+
slashdot_feed.link
|
11
|
+
=> "http://slashdot.org/"
|
12
|
+
slashdot_feed.items.first.find_node("slash:hitparade/text()").to_s
|
13
|
+
=> "43,37,28,23,11,3,1"
|
data/install.rb
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'rbconfig'
|
2
|
+
require 'find'
|
3
|
+
require 'ftools'
|
4
|
+
|
5
|
+
include Config
|
6
|
+
|
7
|
+
# this was adapted from rdoc's install.rb by ways of Log4r
|
8
|
+
|
9
|
+
$sitedir = CONFIG["sitelibdir"]
|
10
|
+
unless $sitedir
|
11
|
+
version = CONFIG["MAJOR"] + "." + CONFIG["MINOR"]
|
12
|
+
$libdir = File.join(CONFIG["libdir"], "ruby", version)
|
13
|
+
$sitedir = $:.find {|x| x =~ /site_ruby/ }
|
14
|
+
if !$sitedir
|
15
|
+
$sitedir = File.join($libdir, "site_ruby")
|
16
|
+
elsif $sitedir !~ Regexp.quote(version)
|
17
|
+
$sitedir = File.join($sitedir, version)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
# the acual gruntwork
|
22
|
+
Dir.chdir("lib")
|
23
|
+
|
24
|
+
Find.find("feed_tools", "feed_tools.rb") { |f|
|
25
|
+
if f[-3..-1] == ".rb"
|
26
|
+
File::install(f, File.join($sitedir, *f.split(/\//)), 0644, true)
|
27
|
+
else
|
28
|
+
File::makedirs(File.join($sitedir, *f.split(/\//)))
|
29
|
+
end
|
30
|
+
}
|
data/lib/feed_tools.rb
ADDED
@@ -0,0 +1,1126 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2005 Robert Aman
|
3
|
+
#
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
5
|
+
# a copy of this software and associated documentation files (the
|
6
|
+
# "Software"), to deal in the Software without restriction, including
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
10
|
+
# the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be
|
13
|
+
# included in all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
19
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
20
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
21
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
|
+
#++
|
23
|
+
|
24
|
+
FEED_TOOLS_ENV = ENV['FEED_TOOLS_ENV'] || ENV['RAILS_ENV'] || 'production'
|
25
|
+
|
26
|
+
$:.unshift(File.dirname(__FILE__))
|
27
|
+
$:.unshift(File.dirname(__FILE__) + "/../../activerecord/lib")
|
28
|
+
|
29
|
+
begin
|
30
|
+
require 'active_record'
|
31
|
+
rescue LoadError
|
32
|
+
require 'rubygems'
|
33
|
+
require_gem 'activerecord'
|
34
|
+
end
|
35
|
+
|
36
|
+
begin
|
37
|
+
require 'rubygems'
|
38
|
+
require 'builder'
|
39
|
+
rescue LoadError
|
40
|
+
# RubyGems is not available, use included Builder
|
41
|
+
$:.unshift(File.dirname(__FILE__) + "/feed_tools/vendor")
|
42
|
+
require 'feed_tools/vendor/builder'
|
43
|
+
end
|
44
|
+
|
45
|
+
require 'open-uri'
|
46
|
+
require 'time'
|
47
|
+
require 'rexml/document'
|
48
|
+
require 'yaml'
|
49
|
+
require 'cgi'
|
50
|
+
|
51
|
+
module FeedTools
|
52
|
+
class Feed < ActiveRecord::Base
|
53
|
+
include REXML
|
54
|
+
|
55
|
+
has_many :feed_items_unsorted, :class_name => "FeedItem"
|
56
|
+
|
57
|
+
def initialize
|
58
|
+
@live = false
|
59
|
+
@feed_items_unsorted = nil
|
60
|
+
super
|
61
|
+
end
|
62
|
+
|
63
|
+
# Loads the feed specified by the url, pulling the data from the cache if it hasn't expired
|
64
|
+
# Be aware that this method translates from the feed: and rss: pseudo-protocols to the
|
65
|
+
# http: protocol as needed. This means that if you pass in a feed url that looks like
|
66
|
+
# 'feed://www.anywhere.com/feed.xml' it will end up being stored in the cache as
|
67
|
+
# 'http://www.anywhere.com/feed.xml' instead. This does affect the usage of methods like
|
68
|
+
# find_by_url, but otherwise should be fairly transparent.
|
69
|
+
def Feed.open(url)
|
70
|
+
# deal with all of the ugly possibilities involved in the rss: and feed: pseudo-protocols
|
71
|
+
if (url =~ /feed:/) == 0
|
72
|
+
url = url.gsub(/feed:\/\/http:\/\/\//, "http://")
|
73
|
+
url = url.gsub(/feed:\/\/http:\/\//, "http://")
|
74
|
+
url = url.gsub(/feed:http:\/\/\//, "http://")
|
75
|
+
url = url.gsub(/feed:http:\/\//, "http://")
|
76
|
+
url = url.gsub(/feed:\/\/\//, "http://")
|
77
|
+
url = url.gsub(/feed:\/\//, "http://")
|
78
|
+
url = url.gsub(/feed:\//, "http://")
|
79
|
+
url = url.gsub(/feed:/, "http://")
|
80
|
+
end
|
81
|
+
if (url =~ /rss:/) == 0
|
82
|
+
url = url.gsub(/rss:\/\/http:\/\/\//, "http://")
|
83
|
+
url = url.gsub(/rss:\/\/http:\/\//, "http://")
|
84
|
+
url = url.gsub(/rss:http:\/\/\//, "http://")
|
85
|
+
url = url.gsub(/rss:http:\/\//, "http://")
|
86
|
+
url = url.gsub(/rss:\/\/\//, "http://")
|
87
|
+
url = url.gsub(/rss:\/\//, "http://")
|
88
|
+
url = url.gsub(/rss:\//, "http://")
|
89
|
+
url = url.gsub(/rss:/, "http://")
|
90
|
+
end
|
91
|
+
|
92
|
+
feed = nil
|
93
|
+
begin
|
94
|
+
feed = Feed.find_by_url(url)
|
95
|
+
rescue ActiveRecord::StatementInvalid
|
96
|
+
# make sure that the necessary tables are present and recover if possible
|
97
|
+
FeedTools::Feed.prepare_connection
|
98
|
+
unless FeedTools::Feed.cache_exists?
|
99
|
+
FeedTools::Feed.create_cache
|
100
|
+
end
|
101
|
+
feed = Feed.find_by_url(url)
|
102
|
+
end
|
103
|
+
unless feed.nil?
|
104
|
+
feed.update_if_needed
|
105
|
+
else
|
106
|
+
feed = Feed.new
|
107
|
+
feed.url = url
|
108
|
+
feed.load_remote_feed
|
109
|
+
end
|
110
|
+
return feed
|
111
|
+
end
|
112
|
+
|
113
|
+
# Checks if the feed has expired and updates if it has
|
114
|
+
def update_if_needed
|
115
|
+
if expired?
|
116
|
+
load_remote_feed
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
# Verifies that the table structure exists
|
121
|
+
def Feed.cache_exists?
|
122
|
+
return Feed.table_exists? && FeedItem.table_exists?
|
123
|
+
end
|
124
|
+
|
125
|
+
# Verifies that the required fields exist; additional ones added by the user are fine
|
126
|
+
def Feed.table_exists?
|
127
|
+
begin
|
128
|
+
connection.execute "select id, url, link, image_link, title, description, " +
|
129
|
+
"tags, last_updated, etag, time_to_live from feeds limit 1"
|
130
|
+
rescue ActiveRecord::StatementInvalid
|
131
|
+
return false
|
132
|
+
end
|
133
|
+
return true
|
134
|
+
end
|
135
|
+
|
136
|
+
# Generates the table structure if necessary
|
137
|
+
def Feed.create_cache
|
138
|
+
unless Feed.cache_exists?
|
139
|
+
feed_items_mysql = <<-SQL_END
|
140
|
+
CREATE TABLE `feed_items` (
|
141
|
+
`id` int(6) unsigned NOT NULL auto_increment,
|
142
|
+
`feed_id` int(6) unsigned NOT NULL default '0',
|
143
|
+
`link` varchar(255) default NULL,
|
144
|
+
`title` varchar(255) default NULL,
|
145
|
+
`author` varchar(255) default NULL,
|
146
|
+
`description` text default NULL,
|
147
|
+
`time` datetime NOT NULL default '0000-00-00 00:00:00',
|
148
|
+
`tags` varchar(255) default NULL,
|
149
|
+
PRIMARY KEY (`id`)
|
150
|
+
) ENGINE=MyISAM DEFAULT CHARSET=latin1;
|
151
|
+
SQL_END
|
152
|
+
feed_items_sqlite = <<-SQL_END
|
153
|
+
CREATE TABLE 'feed_items' (
|
154
|
+
'id' INTEGER PRIMARY KEY NOT NULL,
|
155
|
+
'feed_id' INTEGER NOT NULL,
|
156
|
+
'link' VARCHAR(255) DEFAULT NULL,
|
157
|
+
'title' VARCHAR(255) DEFAULT NULL,
|
158
|
+
'author' VARCHAR(255) DEFAULT NULL,
|
159
|
+
'description' TEXT DEFAULT NULL,
|
160
|
+
'time' DATETIME DEFAULT NULL,
|
161
|
+
'tags' VARCHAR(255) DEFAULT NULL
|
162
|
+
);
|
163
|
+
SQL_END
|
164
|
+
feed_items_psql = <<-SQL_END
|
165
|
+
CREATE TABLE feed_items (
|
166
|
+
id SERIAL PRIMARY KEY NOT NULL,
|
167
|
+
feed_id int REFERENCES feeds,
|
168
|
+
link varchar(255) default NULL,
|
169
|
+
title varchar(255) default NULL,
|
170
|
+
author varchar(255) default NULL,
|
171
|
+
description text default NULL,
|
172
|
+
time datetime default NULL,
|
173
|
+
tags varchar(255) default NULL
|
174
|
+
);
|
175
|
+
SQL_END
|
176
|
+
unless FeedItem.table_exists?
|
177
|
+
table_creation_sql = nil
|
178
|
+
if configurations["adapter"] == "mysql"
|
179
|
+
table_creation_sql = feed_items_mysql
|
180
|
+
elsif configurations["adapter"] == "sqlite"
|
181
|
+
table_creation_sql = feed_items_sqlite
|
182
|
+
elsif configurations["adapter"] == "postgresql"
|
183
|
+
table_creation_sql = feeds_psql
|
184
|
+
end
|
185
|
+
if table_creation_sql.nil?
|
186
|
+
raise "Could not build feed_items table."
|
187
|
+
else
|
188
|
+
connection.execute table_creation_sql
|
189
|
+
end
|
190
|
+
end
|
191
|
+
feeds_mysql = <<-SQL_END
|
192
|
+
CREATE TABLE `feeds` (
|
193
|
+
`id` int(6) unsigned NOT NULL auto_increment,
|
194
|
+
`url` varchar(255) NOT NULL default '',
|
195
|
+
`link` varchar(255) NOT NULL default '',
|
196
|
+
`image_link` varchar(255) default NULL,
|
197
|
+
`title` varchar(255) default NULL,
|
198
|
+
`description` text default NULL,
|
199
|
+
`tags` varchar(255) default NULL,
|
200
|
+
`last_updated` datetime default NULL,
|
201
|
+
`etag` varchar(255) default NULL,
|
202
|
+
`time_to_live` int(4) default NULL,
|
203
|
+
PRIMARY KEY (`id`)
|
204
|
+
) ENGINE=MyISAM DEFAULT CHARSET=latin1;
|
205
|
+
SQL_END
|
206
|
+
feeds_sqlite = <<-SQL_END
|
207
|
+
CREATE TABLE 'feeds' (
|
208
|
+
'id' INTEGER PRIMARY KEY NOT NULL,
|
209
|
+
'url' VARCHAR(255) DEFAULT NULL,
|
210
|
+
'link' VARCHAR(255) DEFAULT NULL,
|
211
|
+
'image_link' VARCHAR(255) DEFAULT NULL,
|
212
|
+
'title' VARCHAR(255) DEFAULT NULL,
|
213
|
+
'description' TEXT DEFAULT NULL,
|
214
|
+
'tags' VARCHAR(255) DEFAULT NULL,
|
215
|
+
'last_updated' DATETIME DEFAULT NULL,
|
216
|
+
'etag' VARCHAR(255) DEFAULT NULL,
|
217
|
+
'time_to_live' INTEGER DEFAULT NULL
|
218
|
+
);
|
219
|
+
SQL_END
|
220
|
+
feeds_psql = <<-SQL_END
|
221
|
+
CREATE TABLE feeds (
|
222
|
+
id SERIAL PRIMARY KEY NOT NULL,
|
223
|
+
url varchar(255) default NULL,
|
224
|
+
link varchar(255) default NULL,
|
225
|
+
image_link varchar(255) default NULL,
|
226
|
+
title varchar(255) default NULL,
|
227
|
+
description text default NULL,
|
228
|
+
tags varchar(255) default NULL,
|
229
|
+
last_updated datetime default NULL,
|
230
|
+
etag varchar(255) default NULL,
|
231
|
+
time_to_live int default NULL
|
232
|
+
);
|
233
|
+
SQL_END
|
234
|
+
unless Feed.table_exists?
|
235
|
+
table_creation_sql = nil
|
236
|
+
if configurations["adapter"] == "mysql"
|
237
|
+
table_creation_sql = feeds_mysql
|
238
|
+
elsif configurations["adapter"] == "sqlite"
|
239
|
+
table_creation_sql = feeds_sqlite
|
240
|
+
elsif configurations["adapter"] == "postgresql"
|
241
|
+
table_creation_sql = feeds_psql
|
242
|
+
end
|
243
|
+
if table_creation_sql.nil?
|
244
|
+
raise "Could not build feed_items table."
|
245
|
+
else
|
246
|
+
connection.execute table_creation_sql
|
247
|
+
end
|
248
|
+
end
|
249
|
+
end
|
250
|
+
end
|
251
|
+
|
252
|
+
# Removes all feed entries from the cache
|
253
|
+
# This could obviously be a very dangerous operation if you use the cache for more than simply
|
254
|
+
# caching the feeds.
|
255
|
+
def Feed.clear_cache
|
256
|
+
FeedItem.delete_all
|
257
|
+
Feed.delete_all
|
258
|
+
end
|
259
|
+
|
260
|
+
# Removes all feed items from the cache and resets the last updated time for all feeds
|
261
|
+
# This is probably much safer than the clear_cache method
|
262
|
+
def Feed.expire_cache
|
263
|
+
FeedItem.delete_all
|
264
|
+
Feed.update_all("last_updated = NULL")
|
265
|
+
end
|
266
|
+
|
267
|
+
# Removes all feed items older than the specified number of seconds
|
268
|
+
def Feed.purge_cache(purge_time=1.week)
|
269
|
+
purge_date = (Time.now - purge_time).strftime("%Y-%m-%d %H:%M:%S")
|
270
|
+
FeedItem.delete_all("time < '#{purge_date}'")
|
271
|
+
end
|
272
|
+
|
273
|
+
# If ActiveRecord is not already connected, attempts to find a configuration file and use
|
274
|
+
# it to open a connection for ActiveRecord.
|
275
|
+
# This method is probably unnecessary for anything but testing and debugging purposes.
|
276
|
+
def Feed.prepare_connection
|
277
|
+
begin
|
278
|
+
ActiveRecord::Base.connection
|
279
|
+
rescue
|
280
|
+
possible_config_files = [
|
281
|
+
"./config/database.yml",
|
282
|
+
"./database.yml"
|
283
|
+
]
|
284
|
+
database_config_file = nil
|
285
|
+
for file in possible_config_files
|
286
|
+
if File.exists? file
|
287
|
+
database_config_file = file
|
288
|
+
break
|
289
|
+
end
|
290
|
+
end
|
291
|
+
database_config_hash = File.open(database_config_file) do |file|
|
292
|
+
config_hash = YAML::load(file)
|
293
|
+
unless config_hash[FEED_TOOLS_ENV].nil?
|
294
|
+
config_hash = config_hash[FEED_TOOLS_ENV]
|
295
|
+
end
|
296
|
+
config_hash
|
297
|
+
end
|
298
|
+
ActiveRecord::Base.configurations = database_config_hash
|
299
|
+
ActiveRecord::Base.establish_connection(database_config_hash)
|
300
|
+
ActiveRecord::Base.connection
|
301
|
+
end
|
302
|
+
end
|
303
|
+
|
304
|
+
def Feed.cache_enabled?
|
305
|
+
return true
|
306
|
+
end
|
307
|
+
|
308
|
+
def title
|
309
|
+
return (self["title"] or "Untitled Feed")
|
310
|
+
end
|
311
|
+
|
312
|
+
# Optional feed attribute.
|
313
|
+
# If you want to use it, the database table needs to have a language field added, otherwise
|
314
|
+
# it will just default to "en-US".
|
315
|
+
def language
|
316
|
+
begin
|
317
|
+
return (self["language"] or "en-US")
|
318
|
+
rescue
|
319
|
+
return "en-US"
|
320
|
+
end
|
321
|
+
end
|
322
|
+
|
323
|
+
def live?
|
324
|
+
if @live
|
325
|
+
return true
|
326
|
+
else
|
327
|
+
return false
|
328
|
+
end
|
329
|
+
end
|
330
|
+
|
331
|
+
def expired?
|
332
|
+
return last_updated == nil || (last_updated + time_to_live) < Time.now
|
333
|
+
end
|
334
|
+
|
335
|
+
# Forces this feed to expire.
|
336
|
+
def expire
|
337
|
+
FeedItem.delete_all("feed_id = '#{self.id}'")
|
338
|
+
@feed_items_unsorted = nil
|
339
|
+
self.last_updated = Time.mktime(1980)
|
340
|
+
self.save
|
341
|
+
end
|
342
|
+
|
343
|
+
# The ammount of time in seconds between the last time the feed was updated and the next
|
344
|
+
# valid time to retrieve a remote feed.
|
345
|
+
def time_to_live
|
346
|
+
return self['time_to_live'].nil? ? 1.hour : self['time_to_live'].hour
|
347
|
+
end
|
348
|
+
|
349
|
+
def tag_list
|
350
|
+
return tags.nil? ? nil : tags[1..-2].split("|")
|
351
|
+
end
|
352
|
+
|
353
|
+
def tag_list=(new_tag_list)
|
354
|
+
self.tags = "|" + (new_tag_list.map { |x| x.strip }).join("|") + "|"
|
355
|
+
end
|
356
|
+
|
357
|
+
def tag_string
|
358
|
+
return (tags.nil? ? nil : tags[1..-2]).split("|").join(", ")
|
359
|
+
end
|
360
|
+
|
361
|
+
def tag_string=(new_tag_string)
|
362
|
+
self.tags = "|" + (new_tag_string.split(",").map { |x| x.strip }).join("|") + "|"
|
363
|
+
end
|
364
|
+
|
365
|
+
# Returns a list of the feed_items, sorted by date
|
366
|
+
def feed_items
|
367
|
+
begin
|
368
|
+
if @feed_items_unsorted.nil?
|
369
|
+
@feed_items_unsorted = feed_items_unsorted
|
370
|
+
end
|
371
|
+
return @feed_items_unsorted.sort do |a,b|
|
372
|
+
b.time <=> a.time
|
373
|
+
end
|
374
|
+
rescue
|
375
|
+
unless @feed_items_unsorted.nil?
|
376
|
+
return @feed_items_unsorted
|
377
|
+
else
|
378
|
+
return feed_items_unsorted
|
379
|
+
end
|
380
|
+
end
|
381
|
+
end
|
382
|
+
|
383
|
+
# Attempts to load the feed from the remote location. Requires the url to be set.
|
384
|
+
# If an etag has been set, attempts to use it to prevent unnecessary reloading of identical
|
385
|
+
# content.
|
386
|
+
def load_remote_feed
|
387
|
+
@live = true
|
388
|
+
self.last_updated = Time.now
|
389
|
+
if (etag != nil)
|
390
|
+
# TODO: verify that the etag code works as intended
|
391
|
+
# -> may need to check what gets returned when the
|
392
|
+
# etag is matched
|
393
|
+
# =================================================
|
394
|
+
open(url, "If-None-Match" => @etag ) do |http|
|
395
|
+
etag = http.meta['etag']
|
396
|
+
parse_feed(http.read)
|
397
|
+
end
|
398
|
+
else
|
399
|
+
open(url) do |http|
|
400
|
+
etag = http.meta['etag']
|
401
|
+
parse_feed(http.read)
|
402
|
+
end
|
403
|
+
end
|
404
|
+
end
|
405
|
+
|
406
|
+
def parse_feed_hook(feed_data)
|
407
|
+
return nil
|
408
|
+
end
|
409
|
+
|
410
|
+
def parse_feed(feed_data)
|
411
|
+
root_node = Document.new(feed_data).root
|
412
|
+
metadata_node = XPath.first(root_node, "channel")
|
413
|
+
if metadata_node == nil
|
414
|
+
metadata_node = root_node
|
415
|
+
end
|
416
|
+
|
417
|
+
# get the feed title
|
418
|
+
title = XPath.first(metadata_node, "title/text()").to_s
|
419
|
+
|
420
|
+
# is the title escaped?
|
421
|
+
if XPath.first(metadata_node, "title/@mode").to_s == "escaped"
|
422
|
+
title = CGI.unescapeHTML(title)
|
423
|
+
end
|
424
|
+
|
425
|
+
# get the feed link
|
426
|
+
link = XPath.first(metadata_node, "link[@rel='alternate' @type='text/html']/@href").to_s
|
427
|
+
if link == ""
|
428
|
+
link = XPath.first(metadata_node, "link[@rel='alternate']/@href").to_s
|
429
|
+
end
|
430
|
+
if link == ""
|
431
|
+
link = XPath.first(metadata_node, "link/@href").to_s
|
432
|
+
end
|
433
|
+
if link == ""
|
434
|
+
link = XPath.first(metadata_node, "link/text()").to_s
|
435
|
+
end
|
436
|
+
if link == ""
|
437
|
+
# The ordering here is somewhat incorrect, but the more correct ordering would
|
438
|
+
# introduce much more serious problems, so I've chosen to go with the lesser of two
|
439
|
+
# evils. (The completely correct implementation would require a vestigial 'base' method
|
440
|
+
# on the Feed class to fully support CDF files. This method will support almost all CDF
|
441
|
+
# files without any unnecessary methods.) But given that this only exists to support
|
442
|
+
# CDF files, it's not a big deal. It's not like CDF files really exist in the wild.
|
443
|
+
# (The assumption this ordering makes is that the 'base' attribute points to a valid
|
444
|
+
# location, hopefully the same as the 'href' location. Chances are pretty good that this
|
445
|
+
# is true.)
|
446
|
+
link = XPath.first(metadata_node, "@base").to_s
|
447
|
+
end
|
448
|
+
if link == ""
|
449
|
+
link = XPath.first(metadata_node, "@href").to_s
|
450
|
+
end
|
451
|
+
|
452
|
+
# get the feed description
|
453
|
+
description = XPath.first(metadata_node, "description/text()").to_s
|
454
|
+
if description != ""
|
455
|
+
if XPath.first(metadata_node, "description/@encoding").to_s != ""
|
456
|
+
description = "[Embedded data objects are not supported.]"
|
457
|
+
else
|
458
|
+
description = CGI.unescapeHTML(description)
|
459
|
+
end
|
460
|
+
end
|
461
|
+
if description == ""
|
462
|
+
description = XPath.first(metadata_node, "tagline/text()").to_s
|
463
|
+
if description != "" && XPath.first(metadata_node, "tagline/@mode").to_s == "escaped"
|
464
|
+
description = CGI.unescapeHTML(description)
|
465
|
+
end
|
466
|
+
end
|
467
|
+
if description == "" && XPath.first(metadata_node, "tagline") == nil
|
468
|
+
description = XPath.first(metadata_node, "info/text()").to_s
|
469
|
+
if description != "" && XPath.first(metadata_node, "info/@mode").to_s == "escaped"
|
470
|
+
description = CGI.unescapeHTML(description)
|
471
|
+
end
|
472
|
+
end
|
473
|
+
if description == ""
|
474
|
+
description = CGI.unescapeHTML(XPath.first(metadata_node, "abstract/text()").to_s)
|
475
|
+
end
|
476
|
+
|
477
|
+
# get the image link
|
478
|
+
image_link = XPath.first(metadata_node, "image/url/text()").to_s
|
479
|
+
if image_link == ""
|
480
|
+
image_link = XPath.first(metadata_node, "image/@rdf:resource").to_s
|
481
|
+
end
|
482
|
+
if image_link == ""
|
483
|
+
image_link = XPath.first(metadata_node, "link[@type='image/jpeg']/@href").to_s
|
484
|
+
end
|
485
|
+
if image_link == ""
|
486
|
+
image_link = XPath.first(metadata_node, "link[@type='image/gif']/@href").to_s
|
487
|
+
end
|
488
|
+
if image_link == ""
|
489
|
+
image_link = XPath.first(metadata_node, "link[@type='image/png']/@href").to_s
|
490
|
+
end
|
491
|
+
if image_link == ""
|
492
|
+
image_link = XPath.first(metadata_node, "logo[@style='image']/@href").to_s
|
493
|
+
end
|
494
|
+
if image_link == ""
|
495
|
+
image_link = XPath.first(metadata_node, "logo/@href").to_s
|
496
|
+
end
|
497
|
+
|
498
|
+
# get the feed time to live (expressed in hours)
|
499
|
+
feed_time_to_live = nil
|
500
|
+
update_frequency = XPath.first(metadata_node, "syn:updateFrequency/text()").to_s
|
501
|
+
if update_frequency != ""
|
502
|
+
update_period = XPath.first(metadata_node, "syn:updatePeriod/text()").to_s
|
503
|
+
if update_period == "daily"
|
504
|
+
feed_time_to_live = update_frequency.to_i * 24
|
505
|
+
elsif update_period == "weekly"
|
506
|
+
feed_time_to_live = update_frequency.to_i * 24 * 7
|
507
|
+
elsif update_period == "monthly"
|
508
|
+
feed_time_to_live = update_frequency.to_i * 24 * 30
|
509
|
+
elsif update_period == "yearly"
|
510
|
+
feed_time_to_live = update_frequency.to_i * 24 * 365
|
511
|
+
else
|
512
|
+
# hourly
|
513
|
+
feed_time_to_live = update_frequency.to_i
|
514
|
+
end
|
515
|
+
end
|
516
|
+
if feed_time_to_live == nil
|
517
|
+
# expressed in minutes
|
518
|
+
update_frequency = XPath.first(metadata_node, "ttl/text()").to_s
|
519
|
+
if update_frequency != ""
|
520
|
+
feed_time_to_live = (update_frequency.to_i / 60)
|
521
|
+
end
|
522
|
+
end
|
523
|
+
|
524
|
+
# TODO: handle time_to_live for CDF files
|
525
|
+
# =======================================
|
526
|
+
|
527
|
+
# get the feed items
|
528
|
+
items = XPath.match(root_node, "item")
|
529
|
+
if items == nil || items == []
|
530
|
+
items = XPath.match(metadata_node, "item")
|
531
|
+
end
|
532
|
+
if items == nil || items == []
|
533
|
+
items = XPath.match(metadata_node, "entry")
|
534
|
+
end
|
535
|
+
|
536
|
+
# set all of the properties
|
537
|
+
if title != ""
|
538
|
+
self.title = title
|
539
|
+
else
|
540
|
+
self.title = nil
|
541
|
+
end
|
542
|
+
if link != ""
|
543
|
+
self.link = link
|
544
|
+
else
|
545
|
+
self.link = nil
|
546
|
+
end
|
547
|
+
if description != ""
|
548
|
+
self.description = description
|
549
|
+
else
|
550
|
+
self.description = nil
|
551
|
+
end
|
552
|
+
if image_link != ""
|
553
|
+
self.image_link = image_link
|
554
|
+
else
|
555
|
+
self.image_link = nil
|
556
|
+
end
|
557
|
+
if feed_time_to_live != nil
|
558
|
+
self.time_to_live = feed_time_to_live
|
559
|
+
else
|
560
|
+
self.time_to_live = nil
|
561
|
+
end
|
562
|
+
|
563
|
+
parse_feed_hook(feed_data)
|
564
|
+
if Feed.cache_enabled?
|
565
|
+
save
|
566
|
+
end
|
567
|
+
|
568
|
+
# check and make sure we don't have any cached feed_items with a nil link
|
569
|
+
# if we do, we need to start from scratch to avoid duplicates
|
570
|
+
for item_link in feed_items.map { |item| item.link }
|
571
|
+
if item_link.nil?
|
572
|
+
FeedItem.delete_all("feed_id = '#{self.id}'")
|
573
|
+
break
|
574
|
+
end
|
575
|
+
end
|
576
|
+
|
577
|
+
# parse the feed items
|
578
|
+
@feed_items_unsorted = []
|
579
|
+
if items != nil
|
580
|
+
for item_node in items
|
581
|
+
@feed_items_unsorted << handle_feed_item(item_node.to_s)
|
582
|
+
end
|
583
|
+
end
|
584
|
+
return self
|
585
|
+
end
|
586
|
+
|
587
|
+
# Locates the feed item in the database based on the supplied item xml data.
|
588
|
+
def find_feed_item_by_data(item_data)
|
589
|
+
item_node = Document.new(item_data).root
|
590
|
+
|
591
|
+
# get the link
|
592
|
+
item_link = XPath.first(item_node, "link[@rel='alternate']/@href").to_s
|
593
|
+
if item_link == ""
|
594
|
+
item_link = XPath.first(item_node, "link/@href").to_s
|
595
|
+
end
|
596
|
+
if item_link == ""
|
597
|
+
item_link = XPath.first(item_node, "link/text()").to_s
|
598
|
+
end
|
599
|
+
if item_link == ""
|
600
|
+
item_link = XPath.first(item_node, "@rdf:about").to_s
|
601
|
+
end
|
602
|
+
if item_link == ""
|
603
|
+
item_link = XPath.first(item_node, "guid/text()").to_s
|
604
|
+
end
|
605
|
+
item_title = XPath.first(item_node, "title/text()").to_s
|
606
|
+
|
607
|
+
feed_item = FeedItem.find_by_feed_id_and_link(self.id, item_link)
|
608
|
+
unless feed_item.nil?
|
609
|
+
# Some blogging tools alter the title of an item when the number of comments change (for
|
610
|
+
# example, TextPattern) and many email feed dumps use the same link for multiple
|
611
|
+
# items (for example, GMail). We try to take both of these cases into account here.
|
612
|
+
existing_title = feed_item.title
|
613
|
+
item_title = item_title.gsub(/\[\d*\]/,"").strip
|
614
|
+
existing_title = existing_title.gsub(/\[\d*\]/,"").strip
|
615
|
+
item_title = item_title.gsub(/\(\d*\)/,"").strip
|
616
|
+
existing_title = existing_title.gsub(/\(\d*\)/,"").strip
|
617
|
+
item_title = item_title.gsub(/\{\d*\}/,"").strip
|
618
|
+
existing_title = existing_title.gsub(/\{\d*\}/,"").strip
|
619
|
+
if existing_title != item_title
|
620
|
+
feed_item = nil
|
621
|
+
end
|
622
|
+
end
|
623
|
+
return feed_item
|
624
|
+
end
|
625
|
+
|
626
|
+
def handle_feed_item(item_data)
|
627
|
+
feed_item = find_feed_item_by_data(item_data)
|
628
|
+
if feed_item.nil?
|
629
|
+
feed_item = FeedItem.new
|
630
|
+
end
|
631
|
+
feed_item.feed = self
|
632
|
+
feed_item.parse_item(item_data)
|
633
|
+
return feed_item
|
634
|
+
end
|
635
|
+
|
636
|
+
def build_feed_hook(feed_type, version, xml_builder)
|
637
|
+
return nil
|
638
|
+
end
|
639
|
+
|
640
|
+
def build_feed(feed_type, version=0.0, xml_builder=Builder::XmlMarkup.new(:indent => 2))
|
641
|
+
if feed_type == "rss" && version == 0.0
|
642
|
+
version = 1.0
|
643
|
+
elsif feed_type == "atom" && version == 0.0
|
644
|
+
version = 0.3
|
645
|
+
end
|
646
|
+
if feed_type == "rss" && (version == 0.9 || version == 1.0 || version == 1.1)
|
647
|
+
# RDF-based rss format
|
648
|
+
return xml_builder.tag!("rdf:RDF") do
|
649
|
+
xml_builder.channel("rdf:about" => CGI.escapeHTML(link)) do
|
650
|
+
unless title.nil? || title == ""
|
651
|
+
xml_builder.title(title)
|
652
|
+
else
|
653
|
+
xml_builder.title
|
654
|
+
end
|
655
|
+
unless link.nil? || link == ""
|
656
|
+
xml_builder.link(link)
|
657
|
+
else
|
658
|
+
xml_builder.link
|
659
|
+
end
|
660
|
+
unless image_link.nil? || image_link == ""
|
661
|
+
xml_builder.image("rdf:resource" => CGI.escapeHTML(image_link))
|
662
|
+
end
|
663
|
+
unless description.nil? || description == ""
|
664
|
+
xml_builder.description(description)
|
665
|
+
else
|
666
|
+
xml_builder.description
|
667
|
+
end
|
668
|
+
unless language.nil? || language == ""
|
669
|
+
xml_builder.tag!("dc:language", language)
|
670
|
+
end
|
671
|
+
xml_builder.tag!("syn:updatePeriod", "hourly")
|
672
|
+
xml_builder.tag!("syn:updateFrequency", (time_to_live / 1.hour).to_s)
|
673
|
+
xml_builder.tag!("syn:updateBase", Time.mktime(1970).iso8601)
|
674
|
+
xml_builder.items do
|
675
|
+
xml_builder.tag!("rdf:Seq") do
|
676
|
+
unless feed_items.nil?
|
677
|
+
for item in feed_items
|
678
|
+
if item.link.nil?
|
679
|
+
raise "Cannot generate an rdf-based feed with a nil item link field."
|
680
|
+
end
|
681
|
+
xml_builder.tag!("rdf:li", "rdf:resource" => CGI.escapeHTML(item.link))
|
682
|
+
end
|
683
|
+
end
|
684
|
+
end
|
685
|
+
end
|
686
|
+
build_feed_hook(feed_type, version, xml_builder)
|
687
|
+
end
|
688
|
+
unless image_link.nil? || image_link == ""
|
689
|
+
xml_builder.image("rdf:about" => CGI.escapeHTML(image_link)) do
|
690
|
+
unless title.nil? || title == ""
|
691
|
+
xml_builder.title(title)
|
692
|
+
else
|
693
|
+
xml_builder.title
|
694
|
+
end
|
695
|
+
unless image_link.nil? || image_link == ""
|
696
|
+
xml_builder.url(image_link)
|
697
|
+
end
|
698
|
+
unless link.nil? || link == ""
|
699
|
+
xml_builder.link(link)
|
700
|
+
else
|
701
|
+
xml_builder.link
|
702
|
+
end
|
703
|
+
end
|
704
|
+
end
|
705
|
+
unless feed_items.nil?
|
706
|
+
for item in feed_items
|
707
|
+
item.build_feed_item(feed_type, version, xml_builder)
|
708
|
+
end
|
709
|
+
end
|
710
|
+
end
|
711
|
+
elsif feed_type == "rss"
|
712
|
+
# normal rss format
|
713
|
+
return xml_builder.rss("version" => version.to_s) do
|
714
|
+
unless title.nil? || title == ""
|
715
|
+
xml_builder.title(title)
|
716
|
+
end
|
717
|
+
unless link.nil? || link == ""
|
718
|
+
xml_builder.link(link)
|
719
|
+
end
|
720
|
+
unless description.nil? || description == ""
|
721
|
+
xml_builder.description(description)
|
722
|
+
end
|
723
|
+
xml_builder.ttl((time_to_live / 1.minute).to_s)
|
724
|
+
xml_builder.generator("http://www.sporkmonger.com/projects/feedtools")
|
725
|
+
build_feed_hook(feed_type, version, xml_builder)
|
726
|
+
unless feed_items.nil?
|
727
|
+
for item in feed_items
|
728
|
+
item.build_feed_item(feed_type, version, xml_builder)
|
729
|
+
end
|
730
|
+
end
|
731
|
+
end
|
732
|
+
elsif feed_type == "atom"
|
733
|
+
# normal atom format
|
734
|
+
return xml_builder.feed("xmlns" => "http://purl.org/atom/ns#",
|
735
|
+
"version" => version.to_s,
|
736
|
+
"xml:lang" => language) do
|
737
|
+
unless title.nil? || title == ""
|
738
|
+
xml_builder.title(title,
|
739
|
+
"mode" => "escaped",
|
740
|
+
"type" => "text/html")
|
741
|
+
end
|
742
|
+
unless link.nil? || link == ""
|
743
|
+
xml_builder.link("href" => link,
|
744
|
+
"rel" => "alternate",
|
745
|
+
"type" => "text/html",
|
746
|
+
"title" => title)
|
747
|
+
end
|
748
|
+
unless description.nil? || description == ""
|
749
|
+
xml_builder.tagline(description,
|
750
|
+
"mode" => "escaped",
|
751
|
+
"type" => "text/html")
|
752
|
+
end
|
753
|
+
xml_builder.generator("FeedTools",
|
754
|
+
"url" => "http://www.sporkmonger.com/projects/feedtools")
|
755
|
+
build_feed_hook(feed_type, version, xml_builder)
|
756
|
+
unless feed_items.nil?
|
757
|
+
for item in feed_items
|
758
|
+
item.build_feed_item(feed_type, version, xml_builder)
|
759
|
+
end
|
760
|
+
end
|
761
|
+
end
|
762
|
+
end
|
763
|
+
end
|
764
|
+
|
765
|
+
# Saves the current state of the feed to the database unless the feed lacks a remote location
|
766
|
+
def save
|
767
|
+
unless url.nil? || url == ""
|
768
|
+
super
|
769
|
+
end
|
770
|
+
end
|
771
|
+
end
|
772
|
+
|
773
|
+
class FeedItem < ActiveRecord::Base
|
774
|
+
include REXML
|
775
|
+
|
776
|
+
# Verifies that the required fields exist; additional ones added by the user are fine
|
777
|
+
def FeedItem.table_exists?
|
778
|
+
begin
|
779
|
+
connection.execute "select id, feed_id, link, title, author, description, " +
|
780
|
+
"time, tags from feed_items limit 1"
|
781
|
+
rescue ActiveRecord::StatementInvalid
|
782
|
+
return false
|
783
|
+
end
|
784
|
+
return true
|
785
|
+
end
|
786
|
+
|
787
|
+
def feed
|
788
|
+
if @feed != nil
|
789
|
+
return @feed
|
790
|
+
elsif @feed_id != nil
|
791
|
+
@feed = Feed.find_by_id(self.feed_id)
|
792
|
+
return @feed
|
793
|
+
else
|
794
|
+
return nil
|
795
|
+
end
|
796
|
+
end
|
797
|
+
|
798
|
+
def feed=(new_feed)
|
799
|
+
self.feed_id = new_feed.id
|
800
|
+
@feed = new_feed
|
801
|
+
end
|
802
|
+
|
803
|
+
def title
|
804
|
+
return (self['title'] or "Untitled Entry")
|
805
|
+
end
|
806
|
+
|
807
|
+
def tag_list
|
808
|
+
return tags.nil? ? nil : tags[1..-2].split("|")
|
809
|
+
end
|
810
|
+
|
811
|
+
def tag_list=(new_tag_list)
|
812
|
+
self.tags = "|" + (new_tag_list.map { |x| x.strip }).join("|") + "|"
|
813
|
+
end
|
814
|
+
|
815
|
+
def tag_string
|
816
|
+
return (tags.nil? ? nil : tags[1..-2]).split("|").join(", ")
|
817
|
+
end
|
818
|
+
|
819
|
+
def tag_string=(new_tag_string)
|
820
|
+
self.tags = "|" + (new_tag_string.split(",").map { |x| x.strip }).join("|") + "|"
|
821
|
+
end
|
822
|
+
|
823
|
+
def parse_feed_item_hook(item_data)
|
824
|
+
return nil
|
825
|
+
end
|
826
|
+
|
827
|
+
def parse_item(item_data)
|
828
|
+
item_node = Document.new(item_data).root
|
829
|
+
|
830
|
+
# get the feed base, in case the feed items use relative paths
|
831
|
+
base = feed.link
|
832
|
+
|
833
|
+
# get the link
|
834
|
+
link = XPath.first(item_node, "link[@rel='alternate']/@href").to_s
|
835
|
+
if link == ""
|
836
|
+
link = XPath.first(item_node, "link/@href").to_s
|
837
|
+
end
|
838
|
+
if link == ""
|
839
|
+
link = XPath.first(item_node, "link/text()").to_s
|
840
|
+
end
|
841
|
+
if link == ""
|
842
|
+
link = XPath.first(item_node, "@rdf:about").to_s
|
843
|
+
end
|
844
|
+
if link == ""
|
845
|
+
link = XPath.first(item_node, "guid/text()").to_s
|
846
|
+
end
|
847
|
+
if link != ""
|
848
|
+
link = CGI.unescapeHTML(link)
|
849
|
+
end
|
850
|
+
if link != "" && (link =~ /http:\/\//) != 0 && (link =~ /https:\/\//) != 0
|
851
|
+
# ensure that we don't end up with 'http://www.foobar.com//path/to/entry'
|
852
|
+
# future-proofed this so that it doesn't break when Ruby 1.9/2.0 starts
|
853
|
+
# returning single character Strings instead of FixNums
|
854
|
+
if (base[-1] == 47 && link[0] == 47) || (base[-1] == "/" && link[0] == "/")
|
855
|
+
link = link[1..-1]
|
856
|
+
end
|
857
|
+
# prepend the base to the link since they seem to have used a relative path
|
858
|
+
link = base + link
|
859
|
+
end
|
860
|
+
|
861
|
+
title = XPath.first(item_node, "title/text()").to_s
|
862
|
+
if title != ""
|
863
|
+
# some blogging tools (notably TextPattern I believe) include the number of
|
864
|
+
# comments in a post in the title... this is ugly, so we're gonna strip them out
|
865
|
+
title = title.gsub(/\[\d*\]/,"").strip
|
866
|
+
end
|
867
|
+
|
868
|
+
# get the item author
|
869
|
+
author = CGI.unescapeHTML(XPath.first(item_node, "author/name/text()").to_s)
|
870
|
+
if author == ""
|
871
|
+
author = CGI.unescapeHTML(XPath.first(item_node, "dc:creator/text()").to_s)
|
872
|
+
end
|
873
|
+
if author == ""
|
874
|
+
author = CGI.unescapeHTML(XPath.first(item_node, "author/text()").to_s)
|
875
|
+
end
|
876
|
+
|
877
|
+
# get the item content
|
878
|
+
description = ""
|
879
|
+
body = XPath.first(item_node, "xhtml:body")
|
880
|
+
if body == nil
|
881
|
+
body = XPath.first(item_node, "body")
|
882
|
+
end
|
883
|
+
if body != nil
|
884
|
+
description = body.inner_xml
|
885
|
+
end
|
886
|
+
if description == ""
|
887
|
+
description = CGI.unescapeHTML(XPath.first(item_node, "content:encoded/text()").to_s)
|
888
|
+
end
|
889
|
+
if description == ""
|
890
|
+
description = XPath.first(item_node, "description/text()").to_s
|
891
|
+
if description != ""
|
892
|
+
if XPath.first(item_node, "description/@encoding").to_s != ""
|
893
|
+
description = "[Embedded data objects are not supported.]"
|
894
|
+
else
|
895
|
+
description = CGI.unescapeHTML(description)
|
896
|
+
end
|
897
|
+
end
|
898
|
+
end
|
899
|
+
if description == ""
|
900
|
+
description = XPath.first(item_node,"content/text()").to_s
|
901
|
+
if description != "" && XPath.first(item_node, "content/@mode").to_s == "escaped"
|
902
|
+
description = CGI.unescapeHTML(description)
|
903
|
+
end
|
904
|
+
end
|
905
|
+
|
906
|
+
# get the item time
|
907
|
+
time = XPath.first(item_node, "pubDate/text()").to_s
|
908
|
+
if time == ""
|
909
|
+
time = XPath.first(item_node, "dc:date/text()").to_s
|
910
|
+
end
|
911
|
+
if time == ""
|
912
|
+
time = XPath.first(item_node, "issued/text()").to_s
|
913
|
+
end
|
914
|
+
|
915
|
+
# get the item tags
|
916
|
+
tags_array = []
|
917
|
+
if tags_array == nil || tags_array.size == 0
|
918
|
+
tags_array = []
|
919
|
+
tag_list = XPath.match(item_node, "dc:subject/rdf:Bag/rdf:li/text()")
|
920
|
+
if tag_list.size > 1
|
921
|
+
for tag in tag_list
|
922
|
+
tags_array << tag.to_s.downcase.strip
|
923
|
+
end
|
924
|
+
end
|
925
|
+
end
|
926
|
+
if tags_array == nil || tags_array.size == 0
|
927
|
+
tags_array = []
|
928
|
+
tag_list = XPath.match(item_node, "category/text()")
|
929
|
+
if tag_list.size > 1
|
930
|
+
for tag in tag_list
|
931
|
+
tags_array << tag.to_s.downcase.strip
|
932
|
+
end
|
933
|
+
end
|
934
|
+
end
|
935
|
+
if tags_array == nil || tags_array.size == 0
|
936
|
+
tags_array = []
|
937
|
+
tag_list = XPath.match(item_node, "dc:subject/text()")
|
938
|
+
if tag_list.size > 1
|
939
|
+
for tag in tag_list
|
940
|
+
tags_array << tag.to_s.downcase.strip
|
941
|
+
end
|
942
|
+
end
|
943
|
+
end
|
944
|
+
if tags_array == nil || tags_array.size == 0
|
945
|
+
tags_array = XPath.first(item_node,
|
946
|
+
"category/text()").to_s.downcase.split(" ")
|
947
|
+
end
|
948
|
+
if tags_array == nil || tags_array.size == 0
|
949
|
+
begin
|
950
|
+
tags_array = XPath.first(item_node,
|
951
|
+
"dc:subject/text()").to_s.downcase.split(" ")
|
952
|
+
rescue
|
953
|
+
tags_array = []
|
954
|
+
end
|
955
|
+
end
|
956
|
+
if tags_array == nil || tags_array.size == 0
|
957
|
+
tags_array = []
|
958
|
+
rdf_bag = XPath.match(item_node,
|
959
|
+
"taxo:topics/rdf:Bag/rdf:li")
|
960
|
+
if rdf_bag != nil && rdf_bag.size > 0
|
961
|
+
for tag_node in rdf_bag
|
962
|
+
begin
|
963
|
+
tag_url = XPath.first(tag_node, "@resource").to_s
|
964
|
+
tag_match = tag_url.scan(/\/(tag|tags)\/(\w+)/)
|
965
|
+
if tag_match.size > 0
|
966
|
+
tags_array << tag_match.first.last.downcase.strip
|
967
|
+
end
|
968
|
+
rescue
|
969
|
+
end
|
970
|
+
end
|
971
|
+
end
|
972
|
+
end
|
973
|
+
|
974
|
+
# set all of the properties
|
975
|
+
if link != ""
|
976
|
+
self.link = link
|
977
|
+
else
|
978
|
+
self.link = nil
|
979
|
+
end
|
980
|
+
if title != ""
|
981
|
+
self.title = title
|
982
|
+
end
|
983
|
+
if description != ""
|
984
|
+
self.description = description.strip
|
985
|
+
end
|
986
|
+
if time != ""
|
987
|
+
self.time = Time.parse(time) rescue Time.now
|
988
|
+
elsif @time == nil
|
989
|
+
self.time = Time.now
|
990
|
+
end
|
991
|
+
if tags_array.size > 0
|
992
|
+
self.tag_list = tags_array
|
993
|
+
end
|
994
|
+
parse_feed_item_hook(item_data)
|
995
|
+
if Feed.cache_enabled?
|
996
|
+
save
|
997
|
+
end
|
998
|
+
return self
|
999
|
+
end
|
1000
|
+
|
1001
|
+
def build_feed_item_hook(feed_type, version, xml_builder)
|
1002
|
+
end
|
1003
|
+
|
1004
|
+
def build_feed_item(feed_type, version, xml_builder=Builder::XmlMarkup.new(:indent => 2))
|
1005
|
+
if feed_type == "rss" && (version == 0.9 || version == 1.0 || version == 1.1)
|
1006
|
+
# RDF-based rss format
|
1007
|
+
if link.nil?
|
1008
|
+
raise "Cannot generate an rdf-based feed item with a nil link field."
|
1009
|
+
end
|
1010
|
+
return xml_builder.item("rdf:about" => CGI.escapeHTML(link)) do
|
1011
|
+
unless title.nil? || title == ""
|
1012
|
+
xml_builder.title(title)
|
1013
|
+
else
|
1014
|
+
xml_builder.title
|
1015
|
+
end
|
1016
|
+
unless link.nil? || link == ""
|
1017
|
+
xml_builder.link(link)
|
1018
|
+
else
|
1019
|
+
xml_builder.link
|
1020
|
+
end
|
1021
|
+
unless description.nil? || description == ""
|
1022
|
+
xml_builder.description(description)
|
1023
|
+
else
|
1024
|
+
xml_builder.description
|
1025
|
+
end
|
1026
|
+
unless time.nil?
|
1027
|
+
xml_builder.tag!("dc:date", time.iso8601)
|
1028
|
+
end
|
1029
|
+
unless tags.nil?
|
1030
|
+
xml_builder.tag!("dc:subject") do
|
1031
|
+
xml_builder.tag!("rdf:Bag") do
|
1032
|
+
for tag in tag_list
|
1033
|
+
xml_builder.tag!("rdf:li", tag)
|
1034
|
+
end
|
1035
|
+
end
|
1036
|
+
end
|
1037
|
+
end
|
1038
|
+
end
|
1039
|
+
elsif feed_type == "rss"
|
1040
|
+
# normal rss format
|
1041
|
+
return xml_builder.item do
|
1042
|
+
unless title.nil? || title == ""
|
1043
|
+
xml_builder.title(title)
|
1044
|
+
end
|
1045
|
+
unless link.nil? || link == ""
|
1046
|
+
xml_builder.link(link)
|
1047
|
+
end
|
1048
|
+
unless description.nil? || description == ""
|
1049
|
+
xml_builder.description(description)
|
1050
|
+
end
|
1051
|
+
unless time.nil?
|
1052
|
+
xml_builder.pubDate(time.rfc822)
|
1053
|
+
end
|
1054
|
+
unless tags.nil?
|
1055
|
+
xml_builder.tag!("dc:subject") do
|
1056
|
+
xml_builder.tag!("rdf:Bag") do
|
1057
|
+
for tag in tag_list
|
1058
|
+
xml_builder.tag!("rdf:li", tag)
|
1059
|
+
end
|
1060
|
+
end
|
1061
|
+
end
|
1062
|
+
end
|
1063
|
+
end
|
1064
|
+
elsif feed_type == "atom"
|
1065
|
+
# normal atom format
|
1066
|
+
return xml_builder.entry("xmlns" => "http://purl.org/atom/ns#") do
|
1067
|
+
unless title.nil? || title == ""
|
1068
|
+
xml_builder.title(title,
|
1069
|
+
"mode" => "escaped",
|
1070
|
+
"type" => "text/html")
|
1071
|
+
end
|
1072
|
+
unless link.nil? || link == ""
|
1073
|
+
xml_builder.link("href" => link,
|
1074
|
+
"rel" => "alternate",
|
1075
|
+
"type" => "text/html",
|
1076
|
+
"title" => title)
|
1077
|
+
end
|
1078
|
+
unless description.nil? || description == ""
|
1079
|
+
xml_builder.content(description,
|
1080
|
+
"mode" => "escaped",
|
1081
|
+
"type" => "text/html")
|
1082
|
+
end
|
1083
|
+
unless time.nil?
|
1084
|
+
xml_builder.issued(time.iso8601)
|
1085
|
+
end
|
1086
|
+
unless tags.nil?
|
1087
|
+
for tag in tag_list
|
1088
|
+
xml_builder.category(tag)
|
1089
|
+
end
|
1090
|
+
end
|
1091
|
+
end
|
1092
|
+
end
|
1093
|
+
end
|
1094
|
+
|
1095
|
+
# Saves the current state of the feed item to the database unless the feed lacks
|
1096
|
+
# a remote location
|
1097
|
+
def save
|
1098
|
+
unless feed.nil? || feed.url.nil? || feed.url == ""
|
1099
|
+
super
|
1100
|
+
end
|
1101
|
+
end
|
1102
|
+
end
|
1103
|
+
end
|
1104
|
+
|
1105
|
+
module REXML
|
1106
|
+
class Element
|
1107
|
+
# small extension to REXML to simplify parsing of xhtml feed items
|
1108
|
+
def inner_xml
|
1109
|
+
result = ""
|
1110
|
+
each_child do |child|
|
1111
|
+
result << child.to_s
|
1112
|
+
end
|
1113
|
+
return result
|
1114
|
+
end
|
1115
|
+
end
|
1116
|
+
end
|
1117
|
+
|
1118
|
+
begin
|
1119
|
+
FeedTools::Feed.prepare_connection
|
1120
|
+
unless FeedTools::Feed.cache_exists?
|
1121
|
+
FeedTools::Feed.create_cache
|
1122
|
+
end
|
1123
|
+
rescue
|
1124
|
+
# Nothing can be done until someone sets up the database connection.
|
1125
|
+
# We'll just assume for now that the user will take care of that.
|
1126
|
+
end
|