feedtools 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +4 -0
- data/README +13 -0
- data/install.rb +30 -0
- data/lib/feed_tools.rb +1126 -0
- data/lib/feed_tools/vendor/builder.rb +13 -0
- data/lib/feed_tools/vendor/builder/blankslate.rb +53 -0
- data/lib/feed_tools/vendor/builder/xmlbase.rb +143 -0
- data/lib/feed_tools/vendor/builder/xmlevents.rb +63 -0
- data/lib/feed_tools/vendor/builder/xmlmarkup.rb +297 -0
- data/rakefile +111 -0
- metadata +62 -0
data/CHANGELOG
ADDED
data/README
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
FeedTools was designed to be a simple XML feed parser, generator, and translator with a built-in
|
2
|
+
caching system.
|
3
|
+
|
4
|
+
== Example
|
5
|
+
slashdot_feed = FeedTools::Feed.open('http://www.slashdot.org/index.rss')
|
6
|
+
slashdot_feed.title
|
7
|
+
=> "Slashdot"
|
8
|
+
slashdot_feed.description
|
9
|
+
=> "News for nerds, stuff that matters"
|
10
|
+
slashdot_feed.link
|
11
|
+
=> "http://slashdot.org/"
|
12
|
+
slashdot_feed.items.first.find_node("slash:hitparade/text()").to_s
|
13
|
+
=> "43,37,28,23,11,3,1"
|
data/install.rb
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'rbconfig'
|
2
|
+
require 'find'
|
3
|
+
require 'ftools'
|
4
|
+
|
5
|
+
include Config
|
6
|
+
|
7
|
+
# this was adapted from rdoc's install.rb by ways of Log4r
|
8
|
+
|
9
|
+
$sitedir = CONFIG["sitelibdir"]
|
10
|
+
unless $sitedir
|
11
|
+
version = CONFIG["MAJOR"] + "." + CONFIG["MINOR"]
|
12
|
+
$libdir = File.join(CONFIG["libdir"], "ruby", version)
|
13
|
+
$sitedir = $:.find {|x| x =~ /site_ruby/ }
|
14
|
+
if !$sitedir
|
15
|
+
$sitedir = File.join($libdir, "site_ruby")
|
16
|
+
elsif $sitedir !~ Regexp.quote(version)
|
17
|
+
$sitedir = File.join($sitedir, version)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
# the acual gruntwork
|
22
|
+
Dir.chdir("lib")
|
23
|
+
|
24
|
+
Find.find("feed_tools", "feed_tools.rb") { |f|
|
25
|
+
if f[-3..-1] == ".rb"
|
26
|
+
File::install(f, File.join($sitedir, *f.split(/\//)), 0644, true)
|
27
|
+
else
|
28
|
+
File::makedirs(File.join($sitedir, *f.split(/\//)))
|
29
|
+
end
|
30
|
+
}
|
data/lib/feed_tools.rb
ADDED
@@ -0,0 +1,1126 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2005 Robert Aman
|
3
|
+
#
|
4
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
5
|
+
# a copy of this software and associated documentation files (the
|
6
|
+
# "Software"), to deal in the Software without restriction, including
|
7
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
8
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
9
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
10
|
+
# the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be
|
13
|
+
# included in all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
17
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
19
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
20
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
21
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
|
+
#++
|
23
|
+
|
24
|
+
FEED_TOOLS_ENV = ENV['FEED_TOOLS_ENV'] || ENV['RAILS_ENV'] || 'production'
|
25
|
+
|
26
|
+
$:.unshift(File.dirname(__FILE__))
|
27
|
+
$:.unshift(File.dirname(__FILE__) + "/../../activerecord/lib")
|
28
|
+
|
29
|
+
begin
|
30
|
+
require 'active_record'
|
31
|
+
rescue LoadError
|
32
|
+
require 'rubygems'
|
33
|
+
require_gem 'activerecord'
|
34
|
+
end
|
35
|
+
|
36
|
+
begin
|
37
|
+
require 'rubygems'
|
38
|
+
require 'builder'
|
39
|
+
rescue LoadError
|
40
|
+
# RubyGems is not available, use included Builder
|
41
|
+
$:.unshift(File.dirname(__FILE__) + "/feed_tools/vendor")
|
42
|
+
require 'feed_tools/vendor/builder'
|
43
|
+
end
|
44
|
+
|
45
|
+
require 'open-uri'
|
46
|
+
require 'time'
|
47
|
+
require 'rexml/document'
|
48
|
+
require 'yaml'
|
49
|
+
require 'cgi'
|
50
|
+
|
51
|
+
module FeedTools
|
52
|
+
class Feed < ActiveRecord::Base
|
53
|
+
include REXML
|
54
|
+
|
55
|
+
has_many :feed_items_unsorted, :class_name => "FeedItem"
|
56
|
+
|
57
|
+
def initialize
|
58
|
+
@live = false
|
59
|
+
@feed_items_unsorted = nil
|
60
|
+
super
|
61
|
+
end
|
62
|
+
|
63
|
+
# Loads the feed specified by the url, pulling the data from the cache if it hasn't expired
|
64
|
+
# Be aware that this method translates from the feed: and rss: pseudo-protocols to the
|
65
|
+
# http: protocol as needed. This means that if you pass in a feed url that looks like
|
66
|
+
# 'feed://www.anywhere.com/feed.xml' it will end up being stored in the cache as
|
67
|
+
# 'http://www.anywhere.com/feed.xml' instead. This does affect the usage of methods like
|
68
|
+
# find_by_url, but otherwise should be fairly transparent.
|
69
|
+
def Feed.open(url)
|
70
|
+
# deal with all of the ugly possibilities involved in the rss: and feed: pseudo-protocols
|
71
|
+
if (url =~ /feed:/) == 0
|
72
|
+
url = url.gsub(/feed:\/\/http:\/\/\//, "http://")
|
73
|
+
url = url.gsub(/feed:\/\/http:\/\//, "http://")
|
74
|
+
url = url.gsub(/feed:http:\/\/\//, "http://")
|
75
|
+
url = url.gsub(/feed:http:\/\//, "http://")
|
76
|
+
url = url.gsub(/feed:\/\/\//, "http://")
|
77
|
+
url = url.gsub(/feed:\/\//, "http://")
|
78
|
+
url = url.gsub(/feed:\//, "http://")
|
79
|
+
url = url.gsub(/feed:/, "http://")
|
80
|
+
end
|
81
|
+
if (url =~ /rss:/) == 0
|
82
|
+
url = url.gsub(/rss:\/\/http:\/\/\//, "http://")
|
83
|
+
url = url.gsub(/rss:\/\/http:\/\//, "http://")
|
84
|
+
url = url.gsub(/rss:http:\/\/\//, "http://")
|
85
|
+
url = url.gsub(/rss:http:\/\//, "http://")
|
86
|
+
url = url.gsub(/rss:\/\/\//, "http://")
|
87
|
+
url = url.gsub(/rss:\/\//, "http://")
|
88
|
+
url = url.gsub(/rss:\//, "http://")
|
89
|
+
url = url.gsub(/rss:/, "http://")
|
90
|
+
end
|
91
|
+
|
92
|
+
feed = nil
|
93
|
+
begin
|
94
|
+
feed = Feed.find_by_url(url)
|
95
|
+
rescue ActiveRecord::StatementInvalid
|
96
|
+
# make sure that the necessary tables are present and recover if possible
|
97
|
+
FeedTools::Feed.prepare_connection
|
98
|
+
unless FeedTools::Feed.cache_exists?
|
99
|
+
FeedTools::Feed.create_cache
|
100
|
+
end
|
101
|
+
feed = Feed.find_by_url(url)
|
102
|
+
end
|
103
|
+
unless feed.nil?
|
104
|
+
feed.update_if_needed
|
105
|
+
else
|
106
|
+
feed = Feed.new
|
107
|
+
feed.url = url
|
108
|
+
feed.load_remote_feed
|
109
|
+
end
|
110
|
+
return feed
|
111
|
+
end
|
112
|
+
|
113
|
+
# Checks if the feed has expired and updates if it has
|
114
|
+
def update_if_needed
|
115
|
+
if expired?
|
116
|
+
load_remote_feed
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
# Verifies that the table structure exists
|
121
|
+
def Feed.cache_exists?
|
122
|
+
return Feed.table_exists? && FeedItem.table_exists?
|
123
|
+
end
|
124
|
+
|
125
|
+
# Verifies that the required fields exist; additional ones added by the user are fine
|
126
|
+
def Feed.table_exists?
|
127
|
+
begin
|
128
|
+
connection.execute "select id, url, link, image_link, title, description, " +
|
129
|
+
"tags, last_updated, etag, time_to_live from feeds limit 1"
|
130
|
+
rescue ActiveRecord::StatementInvalid
|
131
|
+
return false
|
132
|
+
end
|
133
|
+
return true
|
134
|
+
end
|
135
|
+
|
136
|
+
# Generates the table structure if necessary
|
137
|
+
def Feed.create_cache
|
138
|
+
unless Feed.cache_exists?
|
139
|
+
feed_items_mysql = <<-SQL_END
|
140
|
+
CREATE TABLE `feed_items` (
|
141
|
+
`id` int(6) unsigned NOT NULL auto_increment,
|
142
|
+
`feed_id` int(6) unsigned NOT NULL default '0',
|
143
|
+
`link` varchar(255) default NULL,
|
144
|
+
`title` varchar(255) default NULL,
|
145
|
+
`author` varchar(255) default NULL,
|
146
|
+
`description` text default NULL,
|
147
|
+
`time` datetime NOT NULL default '0000-00-00 00:00:00',
|
148
|
+
`tags` varchar(255) default NULL,
|
149
|
+
PRIMARY KEY (`id`)
|
150
|
+
) ENGINE=MyISAM DEFAULT CHARSET=latin1;
|
151
|
+
SQL_END
|
152
|
+
feed_items_sqlite = <<-SQL_END
|
153
|
+
CREATE TABLE 'feed_items' (
|
154
|
+
'id' INTEGER PRIMARY KEY NOT NULL,
|
155
|
+
'feed_id' INTEGER NOT NULL,
|
156
|
+
'link' VARCHAR(255) DEFAULT NULL,
|
157
|
+
'title' VARCHAR(255) DEFAULT NULL,
|
158
|
+
'author' VARCHAR(255) DEFAULT NULL,
|
159
|
+
'description' TEXT DEFAULT NULL,
|
160
|
+
'time' DATETIME DEFAULT NULL,
|
161
|
+
'tags' VARCHAR(255) DEFAULT NULL
|
162
|
+
);
|
163
|
+
SQL_END
|
164
|
+
feed_items_psql = <<-SQL_END
|
165
|
+
CREATE TABLE feed_items (
|
166
|
+
id SERIAL PRIMARY KEY NOT NULL,
|
167
|
+
feed_id int REFERENCES feeds,
|
168
|
+
link varchar(255) default NULL,
|
169
|
+
title varchar(255) default NULL,
|
170
|
+
author varchar(255) default NULL,
|
171
|
+
description text default NULL,
|
172
|
+
time datetime default NULL,
|
173
|
+
tags varchar(255) default NULL
|
174
|
+
);
|
175
|
+
SQL_END
|
176
|
+
unless FeedItem.table_exists?
|
177
|
+
table_creation_sql = nil
|
178
|
+
if configurations["adapter"] == "mysql"
|
179
|
+
table_creation_sql = feed_items_mysql
|
180
|
+
elsif configurations["adapter"] == "sqlite"
|
181
|
+
table_creation_sql = feed_items_sqlite
|
182
|
+
elsif configurations["adapter"] == "postgresql"
|
183
|
+
table_creation_sql = feeds_psql
|
184
|
+
end
|
185
|
+
if table_creation_sql.nil?
|
186
|
+
raise "Could not build feed_items table."
|
187
|
+
else
|
188
|
+
connection.execute table_creation_sql
|
189
|
+
end
|
190
|
+
end
|
191
|
+
feeds_mysql = <<-SQL_END
|
192
|
+
CREATE TABLE `feeds` (
|
193
|
+
`id` int(6) unsigned NOT NULL auto_increment,
|
194
|
+
`url` varchar(255) NOT NULL default '',
|
195
|
+
`link` varchar(255) NOT NULL default '',
|
196
|
+
`image_link` varchar(255) default NULL,
|
197
|
+
`title` varchar(255) default NULL,
|
198
|
+
`description` text default NULL,
|
199
|
+
`tags` varchar(255) default NULL,
|
200
|
+
`last_updated` datetime default NULL,
|
201
|
+
`etag` varchar(255) default NULL,
|
202
|
+
`time_to_live` int(4) default NULL,
|
203
|
+
PRIMARY KEY (`id`)
|
204
|
+
) ENGINE=MyISAM DEFAULT CHARSET=latin1;
|
205
|
+
SQL_END
|
206
|
+
feeds_sqlite = <<-SQL_END
|
207
|
+
CREATE TABLE 'feeds' (
|
208
|
+
'id' INTEGER PRIMARY KEY NOT NULL,
|
209
|
+
'url' VARCHAR(255) DEFAULT NULL,
|
210
|
+
'link' VARCHAR(255) DEFAULT NULL,
|
211
|
+
'image_link' VARCHAR(255) DEFAULT NULL,
|
212
|
+
'title' VARCHAR(255) DEFAULT NULL,
|
213
|
+
'description' TEXT DEFAULT NULL,
|
214
|
+
'tags' VARCHAR(255) DEFAULT NULL,
|
215
|
+
'last_updated' DATETIME DEFAULT NULL,
|
216
|
+
'etag' VARCHAR(255) DEFAULT NULL,
|
217
|
+
'time_to_live' INTEGER DEFAULT NULL
|
218
|
+
);
|
219
|
+
SQL_END
|
220
|
+
feeds_psql = <<-SQL_END
|
221
|
+
CREATE TABLE feeds (
|
222
|
+
id SERIAL PRIMARY KEY NOT NULL,
|
223
|
+
url varchar(255) default NULL,
|
224
|
+
link varchar(255) default NULL,
|
225
|
+
image_link varchar(255) default NULL,
|
226
|
+
title varchar(255) default NULL,
|
227
|
+
description text default NULL,
|
228
|
+
tags varchar(255) default NULL,
|
229
|
+
last_updated datetime default NULL,
|
230
|
+
etag varchar(255) default NULL,
|
231
|
+
time_to_live int default NULL
|
232
|
+
);
|
233
|
+
SQL_END
|
234
|
+
unless Feed.table_exists?
|
235
|
+
table_creation_sql = nil
|
236
|
+
if configurations["adapter"] == "mysql"
|
237
|
+
table_creation_sql = feeds_mysql
|
238
|
+
elsif configurations["adapter"] == "sqlite"
|
239
|
+
table_creation_sql = feeds_sqlite
|
240
|
+
elsif configurations["adapter"] == "postgresql"
|
241
|
+
table_creation_sql = feeds_psql
|
242
|
+
end
|
243
|
+
if table_creation_sql.nil?
|
244
|
+
raise "Could not build feed_items table."
|
245
|
+
else
|
246
|
+
connection.execute table_creation_sql
|
247
|
+
end
|
248
|
+
end
|
249
|
+
end
|
250
|
+
end
|
251
|
+
|
252
|
+
# Removes all feed entries from the cache
|
253
|
+
# This could obviously be a very dangerous operation if you use the cache for more than simply
|
254
|
+
# caching the feeds.
|
255
|
+
def Feed.clear_cache
|
256
|
+
FeedItem.delete_all
|
257
|
+
Feed.delete_all
|
258
|
+
end
|
259
|
+
|
260
|
+
# Removes all feed items from the cache and resets the last updated time for all feeds
|
261
|
+
# This is probably much safer than the clear_cache method
|
262
|
+
def Feed.expire_cache
|
263
|
+
FeedItem.delete_all
|
264
|
+
Feed.update_all("last_updated = NULL")
|
265
|
+
end
|
266
|
+
|
267
|
+
# Removes all feed items older than the specified number of seconds
|
268
|
+
def Feed.purge_cache(purge_time=1.week)
|
269
|
+
purge_date = (Time.now - purge_time).strftime("%Y-%m-%d %H:%M:%S")
|
270
|
+
FeedItem.delete_all("time < '#{purge_date}'")
|
271
|
+
end
|
272
|
+
|
273
|
+
# If ActiveRecord is not already connected, attempts to find a configuration file and use
|
274
|
+
# it to open a connection for ActiveRecord.
|
275
|
+
# This method is probably unnecessary for anything but testing and debugging purposes.
|
276
|
+
def Feed.prepare_connection
|
277
|
+
begin
|
278
|
+
ActiveRecord::Base.connection
|
279
|
+
rescue
|
280
|
+
possible_config_files = [
|
281
|
+
"./config/database.yml",
|
282
|
+
"./database.yml"
|
283
|
+
]
|
284
|
+
database_config_file = nil
|
285
|
+
for file in possible_config_files
|
286
|
+
if File.exists? file
|
287
|
+
database_config_file = file
|
288
|
+
break
|
289
|
+
end
|
290
|
+
end
|
291
|
+
database_config_hash = File.open(database_config_file) do |file|
|
292
|
+
config_hash = YAML::load(file)
|
293
|
+
unless config_hash[FEED_TOOLS_ENV].nil?
|
294
|
+
config_hash = config_hash[FEED_TOOLS_ENV]
|
295
|
+
end
|
296
|
+
config_hash
|
297
|
+
end
|
298
|
+
ActiveRecord::Base.configurations = database_config_hash
|
299
|
+
ActiveRecord::Base.establish_connection(database_config_hash)
|
300
|
+
ActiveRecord::Base.connection
|
301
|
+
end
|
302
|
+
end
|
303
|
+
|
304
|
+
def Feed.cache_enabled?
|
305
|
+
return true
|
306
|
+
end
|
307
|
+
|
308
|
+
def title
|
309
|
+
return (self["title"] or "Untitled Feed")
|
310
|
+
end
|
311
|
+
|
312
|
+
# Optional feed attribute.
|
313
|
+
# If you want to use it, the database table needs to have a language field added, otherwise
|
314
|
+
# it will just default to "en-US".
|
315
|
+
def language
|
316
|
+
begin
|
317
|
+
return (self["language"] or "en-US")
|
318
|
+
rescue
|
319
|
+
return "en-US"
|
320
|
+
end
|
321
|
+
end
|
322
|
+
|
323
|
+
def live?
|
324
|
+
if @live
|
325
|
+
return true
|
326
|
+
else
|
327
|
+
return false
|
328
|
+
end
|
329
|
+
end
|
330
|
+
|
331
|
+
def expired?
|
332
|
+
return last_updated == nil || (last_updated + time_to_live) < Time.now
|
333
|
+
end
|
334
|
+
|
335
|
+
# Forces this feed to expire.
|
336
|
+
def expire
|
337
|
+
FeedItem.delete_all("feed_id = '#{self.id}'")
|
338
|
+
@feed_items_unsorted = nil
|
339
|
+
self.last_updated = Time.mktime(1980)
|
340
|
+
self.save
|
341
|
+
end
|
342
|
+
|
343
|
+
# The ammount of time in seconds between the last time the feed was updated and the next
|
344
|
+
# valid time to retrieve a remote feed.
|
345
|
+
def time_to_live
|
346
|
+
return self['time_to_live'].nil? ? 1.hour : self['time_to_live'].hour
|
347
|
+
end
|
348
|
+
|
349
|
+
def tag_list
|
350
|
+
return tags.nil? ? nil : tags[1..-2].split("|")
|
351
|
+
end
|
352
|
+
|
353
|
+
def tag_list=(new_tag_list)
|
354
|
+
self.tags = "|" + (new_tag_list.map { |x| x.strip }).join("|") + "|"
|
355
|
+
end
|
356
|
+
|
357
|
+
def tag_string
|
358
|
+
return (tags.nil? ? nil : tags[1..-2]).split("|").join(", ")
|
359
|
+
end
|
360
|
+
|
361
|
+
def tag_string=(new_tag_string)
|
362
|
+
self.tags = "|" + (new_tag_string.split(",").map { |x| x.strip }).join("|") + "|"
|
363
|
+
end
|
364
|
+
|
365
|
+
# Returns a list of the feed_items, sorted by date
|
366
|
+
def feed_items
|
367
|
+
begin
|
368
|
+
if @feed_items_unsorted.nil?
|
369
|
+
@feed_items_unsorted = feed_items_unsorted
|
370
|
+
end
|
371
|
+
return @feed_items_unsorted.sort do |a,b|
|
372
|
+
b.time <=> a.time
|
373
|
+
end
|
374
|
+
rescue
|
375
|
+
unless @feed_items_unsorted.nil?
|
376
|
+
return @feed_items_unsorted
|
377
|
+
else
|
378
|
+
return feed_items_unsorted
|
379
|
+
end
|
380
|
+
end
|
381
|
+
end
|
382
|
+
|
383
|
+
# Attempts to load the feed from the remote location. Requires the url to be set.
|
384
|
+
# If an etag has been set, attempts to use it to prevent unnecessary reloading of identical
|
385
|
+
# content.
|
386
|
+
def load_remote_feed
|
387
|
+
@live = true
|
388
|
+
self.last_updated = Time.now
|
389
|
+
if (etag != nil)
|
390
|
+
# TODO: verify that the etag code works as intended
|
391
|
+
# -> may need to check what gets returned when the
|
392
|
+
# etag is matched
|
393
|
+
# =================================================
|
394
|
+
open(url, "If-None-Match" => @etag ) do |http|
|
395
|
+
etag = http.meta['etag']
|
396
|
+
parse_feed(http.read)
|
397
|
+
end
|
398
|
+
else
|
399
|
+
open(url) do |http|
|
400
|
+
etag = http.meta['etag']
|
401
|
+
parse_feed(http.read)
|
402
|
+
end
|
403
|
+
end
|
404
|
+
end
|
405
|
+
|
406
|
+
def parse_feed_hook(feed_data)
|
407
|
+
return nil
|
408
|
+
end
|
409
|
+
|
410
|
+
def parse_feed(feed_data)
|
411
|
+
root_node = Document.new(feed_data).root
|
412
|
+
metadata_node = XPath.first(root_node, "channel")
|
413
|
+
if metadata_node == nil
|
414
|
+
metadata_node = root_node
|
415
|
+
end
|
416
|
+
|
417
|
+
# get the feed title
|
418
|
+
title = XPath.first(metadata_node, "title/text()").to_s
|
419
|
+
|
420
|
+
# is the title escaped?
|
421
|
+
if XPath.first(metadata_node, "title/@mode").to_s == "escaped"
|
422
|
+
title = CGI.unescapeHTML(title)
|
423
|
+
end
|
424
|
+
|
425
|
+
# get the feed link
|
426
|
+
link = XPath.first(metadata_node, "link[@rel='alternate' @type='text/html']/@href").to_s
|
427
|
+
if link == ""
|
428
|
+
link = XPath.first(metadata_node, "link[@rel='alternate']/@href").to_s
|
429
|
+
end
|
430
|
+
if link == ""
|
431
|
+
link = XPath.first(metadata_node, "link/@href").to_s
|
432
|
+
end
|
433
|
+
if link == ""
|
434
|
+
link = XPath.first(metadata_node, "link/text()").to_s
|
435
|
+
end
|
436
|
+
if link == ""
|
437
|
+
# The ordering here is somewhat incorrect, but the more correct ordering would
|
438
|
+
# introduce much more serious problems, so I've chosen to go with the lesser of two
|
439
|
+
# evils. (The completely correct implementation would require a vestigial 'base' method
|
440
|
+
# on the Feed class to fully support CDF files. This method will support almost all CDF
|
441
|
+
# files without any unnecessary methods.) But given that this only exists to support
|
442
|
+
# CDF files, it's not a big deal. It's not like CDF files really exist in the wild.
|
443
|
+
# (The assumption this ordering makes is that the 'base' attribute points to a valid
|
444
|
+
# location, hopefully the same as the 'href' location. Chances are pretty good that this
|
445
|
+
# is true.)
|
446
|
+
link = XPath.first(metadata_node, "@base").to_s
|
447
|
+
end
|
448
|
+
if link == ""
|
449
|
+
link = XPath.first(metadata_node, "@href").to_s
|
450
|
+
end
|
451
|
+
|
452
|
+
# get the feed description
|
453
|
+
description = XPath.first(metadata_node, "description/text()").to_s
|
454
|
+
if description != ""
|
455
|
+
if XPath.first(metadata_node, "description/@encoding").to_s != ""
|
456
|
+
description = "[Embedded data objects are not supported.]"
|
457
|
+
else
|
458
|
+
description = CGI.unescapeHTML(description)
|
459
|
+
end
|
460
|
+
end
|
461
|
+
if description == ""
|
462
|
+
description = XPath.first(metadata_node, "tagline/text()").to_s
|
463
|
+
if description != "" && XPath.first(metadata_node, "tagline/@mode").to_s == "escaped"
|
464
|
+
description = CGI.unescapeHTML(description)
|
465
|
+
end
|
466
|
+
end
|
467
|
+
if description == "" && XPath.first(metadata_node, "tagline") == nil
|
468
|
+
description = XPath.first(metadata_node, "info/text()").to_s
|
469
|
+
if description != "" && XPath.first(metadata_node, "info/@mode").to_s == "escaped"
|
470
|
+
description = CGI.unescapeHTML(description)
|
471
|
+
end
|
472
|
+
end
|
473
|
+
if description == ""
|
474
|
+
description = CGI.unescapeHTML(XPath.first(metadata_node, "abstract/text()").to_s)
|
475
|
+
end
|
476
|
+
|
477
|
+
# get the image link
|
478
|
+
image_link = XPath.first(metadata_node, "image/url/text()").to_s
|
479
|
+
if image_link == ""
|
480
|
+
image_link = XPath.first(metadata_node, "image/@rdf:resource").to_s
|
481
|
+
end
|
482
|
+
if image_link == ""
|
483
|
+
image_link = XPath.first(metadata_node, "link[@type='image/jpeg']/@href").to_s
|
484
|
+
end
|
485
|
+
if image_link == ""
|
486
|
+
image_link = XPath.first(metadata_node, "link[@type='image/gif']/@href").to_s
|
487
|
+
end
|
488
|
+
if image_link == ""
|
489
|
+
image_link = XPath.first(metadata_node, "link[@type='image/png']/@href").to_s
|
490
|
+
end
|
491
|
+
if image_link == ""
|
492
|
+
image_link = XPath.first(metadata_node, "logo[@style='image']/@href").to_s
|
493
|
+
end
|
494
|
+
if image_link == ""
|
495
|
+
image_link = XPath.first(metadata_node, "logo/@href").to_s
|
496
|
+
end
|
497
|
+
|
498
|
+
# get the feed time to live (expressed in hours)
|
499
|
+
feed_time_to_live = nil
|
500
|
+
update_frequency = XPath.first(metadata_node, "syn:updateFrequency/text()").to_s
|
501
|
+
if update_frequency != ""
|
502
|
+
update_period = XPath.first(metadata_node, "syn:updatePeriod/text()").to_s
|
503
|
+
if update_period == "daily"
|
504
|
+
feed_time_to_live = update_frequency.to_i * 24
|
505
|
+
elsif update_period == "weekly"
|
506
|
+
feed_time_to_live = update_frequency.to_i * 24 * 7
|
507
|
+
elsif update_period == "monthly"
|
508
|
+
feed_time_to_live = update_frequency.to_i * 24 * 30
|
509
|
+
elsif update_period == "yearly"
|
510
|
+
feed_time_to_live = update_frequency.to_i * 24 * 365
|
511
|
+
else
|
512
|
+
# hourly
|
513
|
+
feed_time_to_live = update_frequency.to_i
|
514
|
+
end
|
515
|
+
end
|
516
|
+
if feed_time_to_live == nil
|
517
|
+
# expressed in minutes
|
518
|
+
update_frequency = XPath.first(metadata_node, "ttl/text()").to_s
|
519
|
+
if update_frequency != ""
|
520
|
+
feed_time_to_live = (update_frequency.to_i / 60)
|
521
|
+
end
|
522
|
+
end
|
523
|
+
|
524
|
+
# TODO: handle time_to_live for CDF files
|
525
|
+
# =======================================
|
526
|
+
|
527
|
+
# get the feed items
|
528
|
+
items = XPath.match(root_node, "item")
|
529
|
+
if items == nil || items == []
|
530
|
+
items = XPath.match(metadata_node, "item")
|
531
|
+
end
|
532
|
+
if items == nil || items == []
|
533
|
+
items = XPath.match(metadata_node, "entry")
|
534
|
+
end
|
535
|
+
|
536
|
+
# set all of the properties
|
537
|
+
if title != ""
|
538
|
+
self.title = title
|
539
|
+
else
|
540
|
+
self.title = nil
|
541
|
+
end
|
542
|
+
if link != ""
|
543
|
+
self.link = link
|
544
|
+
else
|
545
|
+
self.link = nil
|
546
|
+
end
|
547
|
+
if description != ""
|
548
|
+
self.description = description
|
549
|
+
else
|
550
|
+
self.description = nil
|
551
|
+
end
|
552
|
+
if image_link != ""
|
553
|
+
self.image_link = image_link
|
554
|
+
else
|
555
|
+
self.image_link = nil
|
556
|
+
end
|
557
|
+
if feed_time_to_live != nil
|
558
|
+
self.time_to_live = feed_time_to_live
|
559
|
+
else
|
560
|
+
self.time_to_live = nil
|
561
|
+
end
|
562
|
+
|
563
|
+
parse_feed_hook(feed_data)
|
564
|
+
if Feed.cache_enabled?
|
565
|
+
save
|
566
|
+
end
|
567
|
+
|
568
|
+
# check and make sure we don't have any cached feed_items with a nil link
|
569
|
+
# if we do, we need to start from scratch to avoid duplicates
|
570
|
+
for item_link in feed_items.map { |item| item.link }
|
571
|
+
if item_link.nil?
|
572
|
+
FeedItem.delete_all("feed_id = '#{self.id}'")
|
573
|
+
break
|
574
|
+
end
|
575
|
+
end
|
576
|
+
|
577
|
+
# parse the feed items
|
578
|
+
@feed_items_unsorted = []
|
579
|
+
if items != nil
|
580
|
+
for item_node in items
|
581
|
+
@feed_items_unsorted << handle_feed_item(item_node.to_s)
|
582
|
+
end
|
583
|
+
end
|
584
|
+
return self
|
585
|
+
end
|
586
|
+
|
587
|
+
# Locates the feed item in the database based on the supplied item xml data.
|
588
|
+
def find_feed_item_by_data(item_data)
|
589
|
+
item_node = Document.new(item_data).root
|
590
|
+
|
591
|
+
# get the link
|
592
|
+
item_link = XPath.first(item_node, "link[@rel='alternate']/@href").to_s
|
593
|
+
if item_link == ""
|
594
|
+
item_link = XPath.first(item_node, "link/@href").to_s
|
595
|
+
end
|
596
|
+
if item_link == ""
|
597
|
+
item_link = XPath.first(item_node, "link/text()").to_s
|
598
|
+
end
|
599
|
+
if item_link == ""
|
600
|
+
item_link = XPath.first(item_node, "@rdf:about").to_s
|
601
|
+
end
|
602
|
+
if item_link == ""
|
603
|
+
item_link = XPath.first(item_node, "guid/text()").to_s
|
604
|
+
end
|
605
|
+
item_title = XPath.first(item_node, "title/text()").to_s
|
606
|
+
|
607
|
+
feed_item = FeedItem.find_by_feed_id_and_link(self.id, item_link)
|
608
|
+
unless feed_item.nil?
|
609
|
+
# Some blogging tools alter the title of an item when the number of comments change (for
|
610
|
+
# example, TextPattern) and many email feed dumps use the same link for multiple
|
611
|
+
# items (for example, GMail). We try to take both of these cases into account here.
|
612
|
+
existing_title = feed_item.title
|
613
|
+
item_title = item_title.gsub(/\[\d*\]/,"").strip
|
614
|
+
existing_title = existing_title.gsub(/\[\d*\]/,"").strip
|
615
|
+
item_title = item_title.gsub(/\(\d*\)/,"").strip
|
616
|
+
existing_title = existing_title.gsub(/\(\d*\)/,"").strip
|
617
|
+
item_title = item_title.gsub(/\{\d*\}/,"").strip
|
618
|
+
existing_title = existing_title.gsub(/\{\d*\}/,"").strip
|
619
|
+
if existing_title != item_title
|
620
|
+
feed_item = nil
|
621
|
+
end
|
622
|
+
end
|
623
|
+
return feed_item
|
624
|
+
end
|
625
|
+
|
626
|
+
def handle_feed_item(item_data)
|
627
|
+
feed_item = find_feed_item_by_data(item_data)
|
628
|
+
if feed_item.nil?
|
629
|
+
feed_item = FeedItem.new
|
630
|
+
end
|
631
|
+
feed_item.feed = self
|
632
|
+
feed_item.parse_item(item_data)
|
633
|
+
return feed_item
|
634
|
+
end
|
635
|
+
|
636
|
+
def build_feed_hook(feed_type, version, xml_builder)
|
637
|
+
return nil
|
638
|
+
end
|
639
|
+
|
640
|
+
def build_feed(feed_type, version=0.0, xml_builder=Builder::XmlMarkup.new(:indent => 2))
|
641
|
+
if feed_type == "rss" && version == 0.0
|
642
|
+
version = 1.0
|
643
|
+
elsif feed_type == "atom" && version == 0.0
|
644
|
+
version = 0.3
|
645
|
+
end
|
646
|
+
if feed_type == "rss" && (version == 0.9 || version == 1.0 || version == 1.1)
|
647
|
+
# RDF-based rss format
|
648
|
+
return xml_builder.tag!("rdf:RDF") do
|
649
|
+
xml_builder.channel("rdf:about" => CGI.escapeHTML(link)) do
|
650
|
+
unless title.nil? || title == ""
|
651
|
+
xml_builder.title(title)
|
652
|
+
else
|
653
|
+
xml_builder.title
|
654
|
+
end
|
655
|
+
unless link.nil? || link == ""
|
656
|
+
xml_builder.link(link)
|
657
|
+
else
|
658
|
+
xml_builder.link
|
659
|
+
end
|
660
|
+
unless image_link.nil? || image_link == ""
|
661
|
+
xml_builder.image("rdf:resource" => CGI.escapeHTML(image_link))
|
662
|
+
end
|
663
|
+
unless description.nil? || description == ""
|
664
|
+
xml_builder.description(description)
|
665
|
+
else
|
666
|
+
xml_builder.description
|
667
|
+
end
|
668
|
+
unless language.nil? || language == ""
|
669
|
+
xml_builder.tag!("dc:language", language)
|
670
|
+
end
|
671
|
+
xml_builder.tag!("syn:updatePeriod", "hourly")
|
672
|
+
xml_builder.tag!("syn:updateFrequency", (time_to_live / 1.hour).to_s)
|
673
|
+
xml_builder.tag!("syn:updateBase", Time.mktime(1970).iso8601)
|
674
|
+
xml_builder.items do
|
675
|
+
xml_builder.tag!("rdf:Seq") do
|
676
|
+
unless feed_items.nil?
|
677
|
+
for item in feed_items
|
678
|
+
if item.link.nil?
|
679
|
+
raise "Cannot generate an rdf-based feed with a nil item link field."
|
680
|
+
end
|
681
|
+
xml_builder.tag!("rdf:li", "rdf:resource" => CGI.escapeHTML(item.link))
|
682
|
+
end
|
683
|
+
end
|
684
|
+
end
|
685
|
+
end
|
686
|
+
build_feed_hook(feed_type, version, xml_builder)
|
687
|
+
end
|
688
|
+
unless image_link.nil? || image_link == ""
|
689
|
+
xml_builder.image("rdf:about" => CGI.escapeHTML(image_link)) do
|
690
|
+
unless title.nil? || title == ""
|
691
|
+
xml_builder.title(title)
|
692
|
+
else
|
693
|
+
xml_builder.title
|
694
|
+
end
|
695
|
+
unless image_link.nil? || image_link == ""
|
696
|
+
xml_builder.url(image_link)
|
697
|
+
end
|
698
|
+
unless link.nil? || link == ""
|
699
|
+
xml_builder.link(link)
|
700
|
+
else
|
701
|
+
xml_builder.link
|
702
|
+
end
|
703
|
+
end
|
704
|
+
end
|
705
|
+
unless feed_items.nil?
|
706
|
+
for item in feed_items
|
707
|
+
item.build_feed_item(feed_type, version, xml_builder)
|
708
|
+
end
|
709
|
+
end
|
710
|
+
end
|
711
|
+
elsif feed_type == "rss"
|
712
|
+
# normal rss format
|
713
|
+
return xml_builder.rss("version" => version.to_s) do
|
714
|
+
unless title.nil? || title == ""
|
715
|
+
xml_builder.title(title)
|
716
|
+
end
|
717
|
+
unless link.nil? || link == ""
|
718
|
+
xml_builder.link(link)
|
719
|
+
end
|
720
|
+
unless description.nil? || description == ""
|
721
|
+
xml_builder.description(description)
|
722
|
+
end
|
723
|
+
xml_builder.ttl((time_to_live / 1.minute).to_s)
|
724
|
+
xml_builder.generator("http://www.sporkmonger.com/projects/feedtools")
|
725
|
+
build_feed_hook(feed_type, version, xml_builder)
|
726
|
+
unless feed_items.nil?
|
727
|
+
for item in feed_items
|
728
|
+
item.build_feed_item(feed_type, version, xml_builder)
|
729
|
+
end
|
730
|
+
end
|
731
|
+
end
|
732
|
+
elsif feed_type == "atom"
|
733
|
+
# normal atom format
|
734
|
+
return xml_builder.feed("xmlns" => "http://purl.org/atom/ns#",
|
735
|
+
"version" => version.to_s,
|
736
|
+
"xml:lang" => language) do
|
737
|
+
unless title.nil? || title == ""
|
738
|
+
xml_builder.title(title,
|
739
|
+
"mode" => "escaped",
|
740
|
+
"type" => "text/html")
|
741
|
+
end
|
742
|
+
unless link.nil? || link == ""
|
743
|
+
xml_builder.link("href" => link,
|
744
|
+
"rel" => "alternate",
|
745
|
+
"type" => "text/html",
|
746
|
+
"title" => title)
|
747
|
+
end
|
748
|
+
unless description.nil? || description == ""
|
749
|
+
xml_builder.tagline(description,
|
750
|
+
"mode" => "escaped",
|
751
|
+
"type" => "text/html")
|
752
|
+
end
|
753
|
+
xml_builder.generator("FeedTools",
|
754
|
+
"url" => "http://www.sporkmonger.com/projects/feedtools")
|
755
|
+
build_feed_hook(feed_type, version, xml_builder)
|
756
|
+
unless feed_items.nil?
|
757
|
+
for item in feed_items
|
758
|
+
item.build_feed_item(feed_type, version, xml_builder)
|
759
|
+
end
|
760
|
+
end
|
761
|
+
end
|
762
|
+
end
|
763
|
+
end
|
764
|
+
|
765
|
+
# Saves the current state of the feed to the database unless the feed lacks a remote location
|
766
|
+
def save
|
767
|
+
unless url.nil? || url == ""
|
768
|
+
super
|
769
|
+
end
|
770
|
+
end
|
771
|
+
end
|
772
|
+
|
773
|
+
class FeedItem < ActiveRecord::Base
|
774
|
+
include REXML
|
775
|
+
|
776
|
+
# Verifies that the required fields exist; additional ones added by the user are fine
|
777
|
+
def FeedItem.table_exists?
|
778
|
+
begin
|
779
|
+
connection.execute "select id, feed_id, link, title, author, description, " +
|
780
|
+
"time, tags from feed_items limit 1"
|
781
|
+
rescue ActiveRecord::StatementInvalid
|
782
|
+
return false
|
783
|
+
end
|
784
|
+
return true
|
785
|
+
end
|
786
|
+
|
787
|
+
def feed
|
788
|
+
if @feed != nil
|
789
|
+
return @feed
|
790
|
+
elsif @feed_id != nil
|
791
|
+
@feed = Feed.find_by_id(self.feed_id)
|
792
|
+
return @feed
|
793
|
+
else
|
794
|
+
return nil
|
795
|
+
end
|
796
|
+
end
|
797
|
+
|
798
|
+
def feed=(new_feed)
|
799
|
+
self.feed_id = new_feed.id
|
800
|
+
@feed = new_feed
|
801
|
+
end
|
802
|
+
|
803
|
+
def title
|
804
|
+
return (self['title'] or "Untitled Entry")
|
805
|
+
end
|
806
|
+
|
807
|
+
def tag_list
|
808
|
+
return tags.nil? ? nil : tags[1..-2].split("|")
|
809
|
+
end
|
810
|
+
|
811
|
+
def tag_list=(new_tag_list)
|
812
|
+
self.tags = "|" + (new_tag_list.map { |x| x.strip }).join("|") + "|"
|
813
|
+
end
|
814
|
+
|
815
|
+
def tag_string
|
816
|
+
return (tags.nil? ? nil : tags[1..-2]).split("|").join(", ")
|
817
|
+
end
|
818
|
+
|
819
|
+
def tag_string=(new_tag_string)
|
820
|
+
self.tags = "|" + (new_tag_string.split(",").map { |x| x.strip }).join("|") + "|"
|
821
|
+
end
|
822
|
+
|
823
|
+
def parse_feed_item_hook(item_data)
|
824
|
+
return nil
|
825
|
+
end
|
826
|
+
|
827
|
+
def parse_item(item_data)
|
828
|
+
item_node = Document.new(item_data).root
|
829
|
+
|
830
|
+
# get the feed base, in case the feed items use relative paths
|
831
|
+
base = feed.link
|
832
|
+
|
833
|
+
# get the link
|
834
|
+
link = XPath.first(item_node, "link[@rel='alternate']/@href").to_s
|
835
|
+
if link == ""
|
836
|
+
link = XPath.first(item_node, "link/@href").to_s
|
837
|
+
end
|
838
|
+
if link == ""
|
839
|
+
link = XPath.first(item_node, "link/text()").to_s
|
840
|
+
end
|
841
|
+
if link == ""
|
842
|
+
link = XPath.first(item_node, "@rdf:about").to_s
|
843
|
+
end
|
844
|
+
if link == ""
|
845
|
+
link = XPath.first(item_node, "guid/text()").to_s
|
846
|
+
end
|
847
|
+
if link != ""
|
848
|
+
link = CGI.unescapeHTML(link)
|
849
|
+
end
|
850
|
+
if link != "" && (link =~ /http:\/\//) != 0 && (link =~ /https:\/\//) != 0
|
851
|
+
# ensure that we don't end up with 'http://www.foobar.com//path/to/entry'
|
852
|
+
# future-proofed this so that it doesn't break when Ruby 1.9/2.0 starts
|
853
|
+
# returning single character Strings instead of FixNums
|
854
|
+
if (base[-1] == 47 && link[0] == 47) || (base[-1] == "/" && link[0] == "/")
|
855
|
+
link = link[1..-1]
|
856
|
+
end
|
857
|
+
# prepend the base to the link since they seem to have used a relative path
|
858
|
+
link = base + link
|
859
|
+
end
|
860
|
+
|
861
|
+
title = XPath.first(item_node, "title/text()").to_s
|
862
|
+
if title != ""
|
863
|
+
# some blogging tools (notably TextPattern I believe) include the number of
|
864
|
+
# comments in a post in the title... this is ugly, so we're gonna strip them out
|
865
|
+
title = title.gsub(/\[\d*\]/,"").strip
|
866
|
+
end
|
867
|
+
|
868
|
+
# get the item author
|
869
|
+
author = CGI.unescapeHTML(XPath.first(item_node, "author/name/text()").to_s)
|
870
|
+
if author == ""
|
871
|
+
author = CGI.unescapeHTML(XPath.first(item_node, "dc:creator/text()").to_s)
|
872
|
+
end
|
873
|
+
if author == ""
|
874
|
+
author = CGI.unescapeHTML(XPath.first(item_node, "author/text()").to_s)
|
875
|
+
end
|
876
|
+
|
877
|
+
# get the item content
|
878
|
+
description = ""
|
879
|
+
body = XPath.first(item_node, "xhtml:body")
|
880
|
+
if body == nil
|
881
|
+
body = XPath.first(item_node, "body")
|
882
|
+
end
|
883
|
+
if body != nil
|
884
|
+
description = body.inner_xml
|
885
|
+
end
|
886
|
+
if description == ""
|
887
|
+
description = CGI.unescapeHTML(XPath.first(item_node, "content:encoded/text()").to_s)
|
888
|
+
end
|
889
|
+
if description == ""
|
890
|
+
description = XPath.first(item_node, "description/text()").to_s
|
891
|
+
if description != ""
|
892
|
+
if XPath.first(item_node, "description/@encoding").to_s != ""
|
893
|
+
description = "[Embedded data objects are not supported.]"
|
894
|
+
else
|
895
|
+
description = CGI.unescapeHTML(description)
|
896
|
+
end
|
897
|
+
end
|
898
|
+
end
|
899
|
+
if description == ""
|
900
|
+
description = XPath.first(item_node,"content/text()").to_s
|
901
|
+
if description != "" && XPath.first(item_node, "content/@mode").to_s == "escaped"
|
902
|
+
description = CGI.unescapeHTML(description)
|
903
|
+
end
|
904
|
+
end
|
905
|
+
|
906
|
+
# get the item time
|
907
|
+
time = XPath.first(item_node, "pubDate/text()").to_s
|
908
|
+
if time == ""
|
909
|
+
time = XPath.first(item_node, "dc:date/text()").to_s
|
910
|
+
end
|
911
|
+
if time == ""
|
912
|
+
time = XPath.first(item_node, "issued/text()").to_s
|
913
|
+
end
|
914
|
+
|
915
|
+
# get the item tags
|
916
|
+
tags_array = []
|
917
|
+
if tags_array == nil || tags_array.size == 0
|
918
|
+
tags_array = []
|
919
|
+
tag_list = XPath.match(item_node, "dc:subject/rdf:Bag/rdf:li/text()")
|
920
|
+
if tag_list.size > 1
|
921
|
+
for tag in tag_list
|
922
|
+
tags_array << tag.to_s.downcase.strip
|
923
|
+
end
|
924
|
+
end
|
925
|
+
end
|
926
|
+
if tags_array == nil || tags_array.size == 0
|
927
|
+
tags_array = []
|
928
|
+
tag_list = XPath.match(item_node, "category/text()")
|
929
|
+
if tag_list.size > 1
|
930
|
+
for tag in tag_list
|
931
|
+
tags_array << tag.to_s.downcase.strip
|
932
|
+
end
|
933
|
+
end
|
934
|
+
end
|
935
|
+
if tags_array == nil || tags_array.size == 0
|
936
|
+
tags_array = []
|
937
|
+
tag_list = XPath.match(item_node, "dc:subject/text()")
|
938
|
+
if tag_list.size > 1
|
939
|
+
for tag in tag_list
|
940
|
+
tags_array << tag.to_s.downcase.strip
|
941
|
+
end
|
942
|
+
end
|
943
|
+
end
|
944
|
+
if tags_array == nil || tags_array.size == 0
|
945
|
+
tags_array = XPath.first(item_node,
|
946
|
+
"category/text()").to_s.downcase.split(" ")
|
947
|
+
end
|
948
|
+
if tags_array == nil || tags_array.size == 0
|
949
|
+
begin
|
950
|
+
tags_array = XPath.first(item_node,
|
951
|
+
"dc:subject/text()").to_s.downcase.split(" ")
|
952
|
+
rescue
|
953
|
+
tags_array = []
|
954
|
+
end
|
955
|
+
end
|
956
|
+
if tags_array == nil || tags_array.size == 0
|
957
|
+
tags_array = []
|
958
|
+
rdf_bag = XPath.match(item_node,
|
959
|
+
"taxo:topics/rdf:Bag/rdf:li")
|
960
|
+
if rdf_bag != nil && rdf_bag.size > 0
|
961
|
+
for tag_node in rdf_bag
|
962
|
+
begin
|
963
|
+
tag_url = XPath.first(tag_node, "@resource").to_s
|
964
|
+
tag_match = tag_url.scan(/\/(tag|tags)\/(\w+)/)
|
965
|
+
if tag_match.size > 0
|
966
|
+
tags_array << tag_match.first.last.downcase.strip
|
967
|
+
end
|
968
|
+
rescue
|
969
|
+
end
|
970
|
+
end
|
971
|
+
end
|
972
|
+
end
|
973
|
+
|
974
|
+
# set all of the properties
|
975
|
+
if link != ""
|
976
|
+
self.link = link
|
977
|
+
else
|
978
|
+
self.link = nil
|
979
|
+
end
|
980
|
+
if title != ""
|
981
|
+
self.title = title
|
982
|
+
end
|
983
|
+
if description != ""
|
984
|
+
self.description = description.strip
|
985
|
+
end
|
986
|
+
if time != ""
|
987
|
+
self.time = Time.parse(time) rescue Time.now
|
988
|
+
elsif @time == nil
|
989
|
+
self.time = Time.now
|
990
|
+
end
|
991
|
+
if tags_array.size > 0
|
992
|
+
self.tag_list = tags_array
|
993
|
+
end
|
994
|
+
parse_feed_item_hook(item_data)
|
995
|
+
if Feed.cache_enabled?
|
996
|
+
save
|
997
|
+
end
|
998
|
+
return self
|
999
|
+
end
|
1000
|
+
|
1001
|
+
def build_feed_item_hook(feed_type, version, xml_builder)
|
1002
|
+
end
|
1003
|
+
|
1004
|
+
def build_feed_item(feed_type, version, xml_builder=Builder::XmlMarkup.new(:indent => 2))
|
1005
|
+
if feed_type == "rss" && (version == 0.9 || version == 1.0 || version == 1.1)
|
1006
|
+
# RDF-based rss format
|
1007
|
+
if link.nil?
|
1008
|
+
raise "Cannot generate an rdf-based feed item with a nil link field."
|
1009
|
+
end
|
1010
|
+
return xml_builder.item("rdf:about" => CGI.escapeHTML(link)) do
|
1011
|
+
unless title.nil? || title == ""
|
1012
|
+
xml_builder.title(title)
|
1013
|
+
else
|
1014
|
+
xml_builder.title
|
1015
|
+
end
|
1016
|
+
unless link.nil? || link == ""
|
1017
|
+
xml_builder.link(link)
|
1018
|
+
else
|
1019
|
+
xml_builder.link
|
1020
|
+
end
|
1021
|
+
unless description.nil? || description == ""
|
1022
|
+
xml_builder.description(description)
|
1023
|
+
else
|
1024
|
+
xml_builder.description
|
1025
|
+
end
|
1026
|
+
unless time.nil?
|
1027
|
+
xml_builder.tag!("dc:date", time.iso8601)
|
1028
|
+
end
|
1029
|
+
unless tags.nil?
|
1030
|
+
xml_builder.tag!("dc:subject") do
|
1031
|
+
xml_builder.tag!("rdf:Bag") do
|
1032
|
+
for tag in tag_list
|
1033
|
+
xml_builder.tag!("rdf:li", tag)
|
1034
|
+
end
|
1035
|
+
end
|
1036
|
+
end
|
1037
|
+
end
|
1038
|
+
end
|
1039
|
+
elsif feed_type == "rss"
|
1040
|
+
# normal rss format
|
1041
|
+
return xml_builder.item do
|
1042
|
+
unless title.nil? || title == ""
|
1043
|
+
xml_builder.title(title)
|
1044
|
+
end
|
1045
|
+
unless link.nil? || link == ""
|
1046
|
+
xml_builder.link(link)
|
1047
|
+
end
|
1048
|
+
unless description.nil? || description == ""
|
1049
|
+
xml_builder.description(description)
|
1050
|
+
end
|
1051
|
+
unless time.nil?
|
1052
|
+
xml_builder.pubDate(time.rfc822)
|
1053
|
+
end
|
1054
|
+
unless tags.nil?
|
1055
|
+
xml_builder.tag!("dc:subject") do
|
1056
|
+
xml_builder.tag!("rdf:Bag") do
|
1057
|
+
for tag in tag_list
|
1058
|
+
xml_builder.tag!("rdf:li", tag)
|
1059
|
+
end
|
1060
|
+
end
|
1061
|
+
end
|
1062
|
+
end
|
1063
|
+
end
|
1064
|
+
elsif feed_type == "atom"
|
1065
|
+
# normal atom format
|
1066
|
+
return xml_builder.entry("xmlns" => "http://purl.org/atom/ns#") do
|
1067
|
+
unless title.nil? || title == ""
|
1068
|
+
xml_builder.title(title,
|
1069
|
+
"mode" => "escaped",
|
1070
|
+
"type" => "text/html")
|
1071
|
+
end
|
1072
|
+
unless link.nil? || link == ""
|
1073
|
+
xml_builder.link("href" => link,
|
1074
|
+
"rel" => "alternate",
|
1075
|
+
"type" => "text/html",
|
1076
|
+
"title" => title)
|
1077
|
+
end
|
1078
|
+
unless description.nil? || description == ""
|
1079
|
+
xml_builder.content(description,
|
1080
|
+
"mode" => "escaped",
|
1081
|
+
"type" => "text/html")
|
1082
|
+
end
|
1083
|
+
unless time.nil?
|
1084
|
+
xml_builder.issued(time.iso8601)
|
1085
|
+
end
|
1086
|
+
unless tags.nil?
|
1087
|
+
for tag in tag_list
|
1088
|
+
xml_builder.category(tag)
|
1089
|
+
end
|
1090
|
+
end
|
1091
|
+
end
|
1092
|
+
end
|
1093
|
+
end
|
1094
|
+
|
1095
|
+
# Saves the current state of the feed item to the database unless the feed lacks
|
1096
|
+
# a remote location
|
1097
|
+
def save
|
1098
|
+
unless feed.nil? || feed.url.nil? || feed.url == ""
|
1099
|
+
super
|
1100
|
+
end
|
1101
|
+
end
|
1102
|
+
end
|
1103
|
+
end
|
1104
|
+
|
1105
|
+
module REXML
|
1106
|
+
class Element
|
1107
|
+
# small extension to REXML to simplify parsing of xhtml feed items
|
1108
|
+
def inner_xml
|
1109
|
+
result = ""
|
1110
|
+
each_child do |child|
|
1111
|
+
result << child.to_s
|
1112
|
+
end
|
1113
|
+
return result
|
1114
|
+
end
|
1115
|
+
end
|
1116
|
+
end
|
1117
|
+
|
1118
|
+
begin
|
1119
|
+
FeedTools::Feed.prepare_connection
|
1120
|
+
unless FeedTools::Feed.cache_exists?
|
1121
|
+
FeedTools::Feed.create_cache
|
1122
|
+
end
|
1123
|
+
rescue
|
1124
|
+
# Nothing can be done until someone sets up the database connection.
|
1125
|
+
# We'll just assume for now that the user will take care of that.
|
1126
|
+
end
|