feedtools 0.2.10 → 0.2.11
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +8 -0
- data/db/schema.mysql.sql +11 -10
- data/db/schema.postgresql.sql +10 -9
- data/db/schema.sqlite.sql +10 -9
- data/lib/feed_tools.rb +11 -3752
- data/lib/{database_feed_cache.rb → feed_tools/database_feed_cache.rb} +0 -0
- data/lib/feed_tools/feed.rb +2073 -0
- data/lib/feed_tools/feed_item.rb +1634 -0
- data/rakefile +1 -1
- metadata +5 -3
File without changes
|
@@ -0,0 +1,2073 @@
|
|
1
|
+
module FeedTools
|
2
|
+
# The <tt>FeedTools::Feed</tt> class represents a web feed's structure.
|
3
|
+
class Feed
|
4
|
+
include REXML # :nodoc:
|
5
|
+
|
6
|
+
# Represents a feed/feed item's category
|
7
|
+
class Category
|
8
|
+
|
9
|
+
# The category term value
|
10
|
+
attr_accessor :term
|
11
|
+
# The categorization scheme
|
12
|
+
attr_accessor :scheme
|
13
|
+
# A human-readable description of the category
|
14
|
+
attr_accessor :label
|
15
|
+
|
16
|
+
alias_method :value, :term
|
17
|
+
alias_method :category, :term
|
18
|
+
alias_method :domain, :scheme
|
19
|
+
end
|
20
|
+
|
21
|
+
# Represents a feed/feed item's author
|
22
|
+
class Author
|
23
|
+
|
24
|
+
# The author's real name
|
25
|
+
attr_accessor :name
|
26
|
+
# The author's email address
|
27
|
+
attr_accessor :email
|
28
|
+
# The url of the author's homepage
|
29
|
+
attr_accessor :url
|
30
|
+
# The raw value of the author tag if present
|
31
|
+
attr_accessor :raw
|
32
|
+
end
|
33
|
+
|
34
|
+
# Represents a feed's image
|
35
|
+
class Image
|
36
|
+
|
37
|
+
# The image's title
|
38
|
+
attr_accessor :title
|
39
|
+
# The image's description
|
40
|
+
attr_accessor :description
|
41
|
+
# The image's url
|
42
|
+
attr_accessor :url
|
43
|
+
# The url to link the image to
|
44
|
+
attr_accessor :link
|
45
|
+
# The width of the image
|
46
|
+
attr_accessor :width
|
47
|
+
# The height of the image
|
48
|
+
attr_accessor :height
|
49
|
+
# The style of the image
|
50
|
+
# Possible values are "icon", "image", or "image-wide"
|
51
|
+
attr_accessor :style
|
52
|
+
end
|
53
|
+
|
54
|
+
# Represents a feed's text input element.
|
55
|
+
# Be aware that this will be ignored for feed generation. It's a
|
56
|
+
# pointless element that aggregators usually ignore and it doesn't have an
|
57
|
+
# equivalent in all feeds types.
|
58
|
+
class TextInput
|
59
|
+
|
60
|
+
# The label of the Submit button in the text input area.
|
61
|
+
attr_accessor :title
|
62
|
+
# The description explains the text input area.
|
63
|
+
attr_accessor :description
|
64
|
+
# The URL of the CGI script that processes text input requests.
|
65
|
+
attr_accessor :link
|
66
|
+
# The name of the text object in the text input area.
|
67
|
+
attr_accessor :name
|
68
|
+
end
|
69
|
+
|
70
|
+
# Represents a feed's cloud.
|
71
|
+
# Be aware that this will be ignored for feed generation.
|
72
|
+
class Cloud
|
73
|
+
|
74
|
+
# The domain of the cloud.
|
75
|
+
attr_accessor :domain
|
76
|
+
# The path for the cloud.
|
77
|
+
attr_accessor :path
|
78
|
+
# The port the cloud is listening on.
|
79
|
+
attr_accessor :port
|
80
|
+
# The web services protocol the cloud uses.
|
81
|
+
# Possible values are either "xml-rpc" or "soap".
|
82
|
+
attr_accessor :protocol
|
83
|
+
# The procedure to use to request notification.
|
84
|
+
attr_accessor :register_procedure
|
85
|
+
end
|
86
|
+
|
87
|
+
# Represents a simple hyperlink
|
88
|
+
class Link
|
89
|
+
|
90
|
+
# The url that is being linked to
|
91
|
+
attr_accessor :url
|
92
|
+
# The content of the hyperlink
|
93
|
+
attr_accessor :value
|
94
|
+
|
95
|
+
alias_method :href, :url
|
96
|
+
end
|
97
|
+
|
98
|
+
# Initialize the feed object
|
99
|
+
def initialize
|
100
|
+
super
|
101
|
+
@cache_object = nil
|
102
|
+
@http_headers = nil
|
103
|
+
@xml_doc = nil
|
104
|
+
@xml_data = nil
|
105
|
+
@root_node = nil
|
106
|
+
@channel_node = nil
|
107
|
+
@url = nil
|
108
|
+
@id = nil
|
109
|
+
@title = nil
|
110
|
+
@description = nil
|
111
|
+
@link = nil
|
112
|
+
@time_to_live = nil
|
113
|
+
@items = nil
|
114
|
+
@live = false
|
115
|
+
end
|
116
|
+
|
117
|
+
# Raises an exception if an invalid option has been specified to
|
118
|
+
# prevent misspellings from slipping through
|
119
|
+
def Feed.validate_options(valid_option_keys, supplied_option_keys)
|
120
|
+
unknown_option_keys = supplied_option_keys - valid_option_keys
|
121
|
+
unless unknown_option_keys.empty?
|
122
|
+
raise ArgumentError, "Unknown options: #{unknown_option_keys}"
|
123
|
+
end
|
124
|
+
end
|
125
|
+
class << self; private :validate_options; end
|
126
|
+
|
127
|
+
# Loads the feed specified by the url, pulling the data from the
|
128
|
+
# cache if it hasn't expired.
|
129
|
+
# Options are:
|
130
|
+
# * <tt>:cache_only</tt> - If set to true, the feed will only be
|
131
|
+
# pulled from the cache.
|
132
|
+
def Feed.open(url, options={})
|
133
|
+
validate_options([ :cache_only ],
|
134
|
+
options.keys)
|
135
|
+
options = { :cache_only => false }.merge(options)
|
136
|
+
|
137
|
+
# clean up the url
|
138
|
+
url = FeedTools.normalize_url(url)
|
139
|
+
|
140
|
+
# create and load the new feed
|
141
|
+
feed = Feed.new
|
142
|
+
feed.url = url
|
143
|
+
feed.update! unless options[:cache_only]
|
144
|
+
return feed
|
145
|
+
end
|
146
|
+
|
147
|
+
# Loads the feed from the remote url if the feed has expired from the cache or cannot be
|
148
|
+
# retrieved from the cache for some reason.
|
149
|
+
def update!
|
150
|
+
if self.http_headers.nil? && !(self.cache_object.nil?) &&
|
151
|
+
!(self.cache_object.http_headers.nil?)
|
152
|
+
@http_headers = YAML.load(self.cache_object.http_headers)
|
153
|
+
@http_headers = {} unless @http_headers.kind_of? Hash
|
154
|
+
end
|
155
|
+
if self.expired? == false
|
156
|
+
@live = false
|
157
|
+
else
|
158
|
+
load_remote_feed!
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
# Attempts to load the feed from the remote location. Requires the url
|
163
|
+
# field to be set. If an etag or the last_modified date has been set,
|
164
|
+
# attempts to use them to prevent unnecessary reloading of identical
|
165
|
+
# content.
|
166
|
+
def load_remote_feed!
|
167
|
+
@live = true
|
168
|
+
if self.http_headers.nil? && !(self.cache_object.nil?) &&
|
169
|
+
!(self.cache_object.http_headers.nil?)
|
170
|
+
@http_headers = YAML.load(self.cache_object.http_headers)
|
171
|
+
end
|
172
|
+
|
173
|
+
if (self.url =~ /^feed:/) == 0
|
174
|
+
# Woah, Nelly, how'd that happen? You should've already been
|
175
|
+
# corrected. So let's fix that url. And please,
|
176
|
+
# just use less crappy browsers instead of badly defined
|
177
|
+
# pseudo-protocol hacks.
|
178
|
+
self.url = FeedTools.normalize_url(self.url)
|
179
|
+
end
|
180
|
+
|
181
|
+
# Find out what method we're going to be using to obtain this feed.
|
182
|
+
uri = URI.parse(self.url)
|
183
|
+
retrieval_method = "http"
|
184
|
+
case uri.scheme
|
185
|
+
when "http"
|
186
|
+
retrieval_method = "http"
|
187
|
+
when "ftp"
|
188
|
+
retrieval_method = "ftp"
|
189
|
+
when "file"
|
190
|
+
retrieval_method = "file"
|
191
|
+
when nil
|
192
|
+
raise FeedAccessError,
|
193
|
+
"No protocol was specified in the url."
|
194
|
+
else
|
195
|
+
raise FeedAccessError,
|
196
|
+
"Cannot retrieve feed using unrecognized protocol: " + uri.scheme
|
197
|
+
end
|
198
|
+
|
199
|
+
# No need for http headers unless we're actually doing http
|
200
|
+
if retrieval_method == "http"
|
201
|
+
# Set up the appropriate http headers
|
202
|
+
headers = {}
|
203
|
+
unless self.http_headers.nil?
|
204
|
+
headers["If-None-Match"] =
|
205
|
+
self.http_headers['etag'] unless self.http_headers['etag'].nil?
|
206
|
+
headers["If-Modified-Since"] =
|
207
|
+
self.http_headers['last-modified'] unless
|
208
|
+
self.http_headers['last-modified'].nil?
|
209
|
+
end
|
210
|
+
headers["User-Agent"] =
|
211
|
+
FeedTools.user_agent unless FeedTools.user_agent.nil?
|
212
|
+
|
213
|
+
# The http feed access method
|
214
|
+
http_fetch = lambda do |feed_url, http_headers, redirect_limit,
|
215
|
+
response_chain, no_headers|
|
216
|
+
raise FeedAccessError, 'Redirect too deep' if redirect_limit == 0
|
217
|
+
feed_uri = nil
|
218
|
+
begin
|
219
|
+
feed_uri = URI.parse(feed_url)
|
220
|
+
rescue URI::InvalidURIError
|
221
|
+
# Uh, maybe try to fix it?
|
222
|
+
feed_uri = URI.parse(FeedTools.normalize_url(feed_url))
|
223
|
+
end
|
224
|
+
|
225
|
+
# Borrowed from open-uri:
|
226
|
+
# According to RFC2616 14.23, Host: request-header field should be
|
227
|
+
# set to an origin server.
|
228
|
+
# But net/http wrongly set a proxy server if an absolute URI is
|
229
|
+
# specified as a request URI.
|
230
|
+
# So override it here explicitly.
|
231
|
+
http_headers['Host'] = feed_uri.host
|
232
|
+
http_headers['Host'] += ":#{feed_uri.port}" if feed_uri.port
|
233
|
+
|
234
|
+
Net::HTTP.start(feed_uri.host, (feed_uri.port or 80)) do |http|
|
235
|
+
final_uri = feed_uri.path
|
236
|
+
final_uri += ('?' + feed_uri.query) if feed_uri.query
|
237
|
+
http_headers = {} if no_headers
|
238
|
+
response = http.request_get(final_uri, http_headers)
|
239
|
+
|
240
|
+
case response
|
241
|
+
when Net::HTTPSuccess
|
242
|
+
# We've reached the final destination, process all previous
|
243
|
+
# redirections, and see if we need to update the url.
|
244
|
+
for redirected_response in response_chain
|
245
|
+
if redirected_response.last.code.to_i == 301
|
246
|
+
# Reset the cache object or we may get duplicate entries
|
247
|
+
self.cache_object = nil
|
248
|
+
self.url = redirected_response.last['location']
|
249
|
+
else
|
250
|
+
# Jump out as soon as we hit anything that isn't a
|
251
|
+
# permanently moved redirection.
|
252
|
+
break
|
253
|
+
end
|
254
|
+
end
|
255
|
+
response
|
256
|
+
when Net::HTTPRedirection
|
257
|
+
if response.code.to_i == 304
|
258
|
+
response.error!
|
259
|
+
else
|
260
|
+
if response['location'].nil?
|
261
|
+
raise FeedAccessError,
|
262
|
+
"No location to redirect to supplied: " + response.code
|
263
|
+
end
|
264
|
+
response_chain << [feed_url, response]
|
265
|
+
new_location = response['location']
|
266
|
+
if response_chain.assoc(new_location) != nil
|
267
|
+
raise FeedAccessError, "Redirection loop detected."
|
268
|
+
end
|
269
|
+
|
270
|
+
# Find out if we've already seen the url we've been
|
271
|
+
# redirected to.
|
272
|
+
cached_feed = FeedTools::Feed.open(new_location,
|
273
|
+
:cache_only => true)
|
274
|
+
found_redirect = false
|
275
|
+
if cached_feed.cache_object.new_record? != true
|
276
|
+
unless cached_feed.expired?
|
277
|
+
self.url = cached_feed.url
|
278
|
+
@live = false
|
279
|
+
found_redirect = true
|
280
|
+
end
|
281
|
+
end
|
282
|
+
unless found_redirect
|
283
|
+
# TODO: deal with stupid people using relative urls
|
284
|
+
# in Location header
|
285
|
+
# =================================================
|
286
|
+
http_fetch.call(new_location, http_headers,
|
287
|
+
redirect_limit - 1, response_chain, no_headers)
|
288
|
+
else
|
289
|
+
response
|
290
|
+
end
|
291
|
+
end
|
292
|
+
else
|
293
|
+
class << response
|
294
|
+
def response_chain
|
295
|
+
return @response_chain
|
296
|
+
end
|
297
|
+
end
|
298
|
+
response.instance_variable_set("@response_chain",
|
299
|
+
response_chain)
|
300
|
+
response.error!
|
301
|
+
end
|
302
|
+
end
|
303
|
+
end
|
304
|
+
|
305
|
+
begin
|
306
|
+
begin
|
307
|
+
@http_response = http_fetch.call(self.url, headers, 10, [], false)
|
308
|
+
rescue => error
|
309
|
+
if error.respond_to?(:response)
|
310
|
+
# You might not believe this, but...
|
311
|
+
#
|
312
|
+
# Under certain circumstances, web servers will try to block
|
313
|
+
# based on the User-Agent header. This is *retarded*. But
|
314
|
+
# we won't let their stupid error stop us!
|
315
|
+
#
|
316
|
+
# This is, of course, a quick-n-dirty hack. But at least
|
317
|
+
# we get to blame other people's bad software and/or bad
|
318
|
+
# configuration files.
|
319
|
+
if error.response.code.to_i == 404 &&
|
320
|
+
FeedTools.user_agent != nil
|
321
|
+
@http_response = http_fetch.call(self.url, {}, 10, [], true)
|
322
|
+
if @http_response != nil && @http_response.code.to_i == 200
|
323
|
+
warn("The server appears to be blocking based on the " +
|
324
|
+
"User-Agent header. This is stupid, and you should " +
|
325
|
+
"inform the webmaster of this.")
|
326
|
+
end
|
327
|
+
else
|
328
|
+
raise error
|
329
|
+
end
|
330
|
+
else
|
331
|
+
raise error
|
332
|
+
end
|
333
|
+
end
|
334
|
+
unless @http_response.kind_of? Net::HTTPRedirection
|
335
|
+
@http_headers = {}
|
336
|
+
self.http_response.each_header do |header|
|
337
|
+
self.http_headers[header.first.downcase] = header.last
|
338
|
+
end
|
339
|
+
self.last_retrieved = Time.now
|
340
|
+
self.xml_data = self.http_response.body
|
341
|
+
end
|
342
|
+
rescue FeedAccessError
|
343
|
+
@live = false
|
344
|
+
if self.xml_data.nil?
|
345
|
+
raise
|
346
|
+
end
|
347
|
+
rescue Timeout::Error
|
348
|
+
# if we time out, do nothing, it should fall back to the xml_data
|
349
|
+
# stored in the cache.
|
350
|
+
@live = false
|
351
|
+
if self.xml_data.nil?
|
352
|
+
raise
|
353
|
+
end
|
354
|
+
rescue Errno::ECONNRESET
|
355
|
+
# if the connection gets reset by peer, oh well, fall back to the
|
356
|
+
# xml_data stored in the cache
|
357
|
+
@live = false
|
358
|
+
if self.xml_data.nil?
|
359
|
+
raise
|
360
|
+
end
|
361
|
+
rescue => error
|
362
|
+
# heck, if anything at all bad happens, fall back to the xml_data
|
363
|
+
# stored in the cache.
|
364
|
+
|
365
|
+
# If we can, get the HTTPResponse...
|
366
|
+
@http_response = nil
|
367
|
+
if error.respond_to?(:each_header)
|
368
|
+
@http_response = error
|
369
|
+
end
|
370
|
+
if error.respond_to?(:response) &&
|
371
|
+
error.response.respond_to?(:each_header)
|
372
|
+
@http_response = error.response
|
373
|
+
end
|
374
|
+
if @http_response != nil
|
375
|
+
@http_headers = {}
|
376
|
+
self.http_response.each_header do |header|
|
377
|
+
self.http_headers[header.first] = header.last
|
378
|
+
end
|
379
|
+
if self.http_response.code.to_i == 304
|
380
|
+
self.last_retrieved = Time.now
|
381
|
+
end
|
382
|
+
end
|
383
|
+
@live = false
|
384
|
+
if self.xml_data.nil?
|
385
|
+
if error.respond_to?(:response) &&
|
386
|
+
error.response.respond_to?(:response_chain)
|
387
|
+
redirects = error.response.response_chain.map do |pair|
|
388
|
+
pair.first
|
389
|
+
end
|
390
|
+
error.message << (" - Redirects: " + redirects.inspect)
|
391
|
+
end
|
392
|
+
raise error
|
393
|
+
end
|
394
|
+
end
|
395
|
+
elsif retrieval_method == "https"
|
396
|
+
# Not supported... yet
|
397
|
+
elsif retrieval_method == "ftp"
|
398
|
+
# Not supported... yet
|
399
|
+
# Technically, CDF feeds are supposed to be able to be accessed directly
|
400
|
+
# from an ftp server. This is silly, but we'll humor Microsoft.
|
401
|
+
#
|
402
|
+
# Eventually.
|
403
|
+
elsif retrieval_method == "file"
|
404
|
+
# Now that we've gone to all that trouble to ensure the url begins
|
405
|
+
# with 'file://', strip the 'file://' off the front of the url.
|
406
|
+
file_name = self.url.gsub(/^file:\/\//, "")
|
407
|
+
begin
|
408
|
+
open(file_name) do |file|
|
409
|
+
@http_response = nil
|
410
|
+
@http_headers = {}
|
411
|
+
self.last_retrieved = Time.now
|
412
|
+
self.xml_data = file.read
|
413
|
+
end
|
414
|
+
rescue
|
415
|
+
@live = false
|
416
|
+
# In this case, pulling from the cache is probably not going
|
417
|
+
# to help at all, and the use should probably be immediately
|
418
|
+
# appraised of the problem. Raise the exception.
|
419
|
+
raise
|
420
|
+
end
|
421
|
+
end
|
422
|
+
unless self.cache_object.nil?
|
423
|
+
begin
|
424
|
+
self.save
|
425
|
+
rescue
|
426
|
+
end
|
427
|
+
end
|
428
|
+
end
|
429
|
+
|
430
|
+
# Returns the relevant information from an http request.
|
431
|
+
def http_response
|
432
|
+
return @http_response
|
433
|
+
end
|
434
|
+
|
435
|
+
# Returns a hash of the http headers from the response.
|
436
|
+
def http_headers
|
437
|
+
return @http_headers
|
438
|
+
end
|
439
|
+
|
440
|
+
# Returns the feed's raw xml data.
|
441
|
+
def xml_data
|
442
|
+
if @xml_data.nil?
|
443
|
+
unless self.cache_object.nil?
|
444
|
+
@xml_data = self.cache_object.xml_data
|
445
|
+
end
|
446
|
+
end
|
447
|
+
return @xml_data
|
448
|
+
end
|
449
|
+
|
450
|
+
# Sets the feed's xml data.
|
451
|
+
def xml_data=(new_xml_data)
|
452
|
+
@xml_data = new_xml_data
|
453
|
+
unless self.cache_object.nil?
|
454
|
+
self.cache_object.xml_data = new_xml_data
|
455
|
+
end
|
456
|
+
end
|
457
|
+
|
458
|
+
# Returns a REXML Document of the xml_data
|
459
|
+
def xml
|
460
|
+
if @xml_doc.nil?
|
461
|
+
begin
|
462
|
+
# TODO: :ignore_whitespace_nodes => :all
|
463
|
+
# Add that?
|
464
|
+
# ======================================
|
465
|
+
@xml_doc = Document.new(xml_data)
|
466
|
+
rescue
|
467
|
+
# Something failed, attempt to repair the xml with htree.
|
468
|
+
@xml_doc = HTree.parse(xml_data).to_rexml
|
469
|
+
end
|
470
|
+
end
|
471
|
+
return @xml_doc
|
472
|
+
end
|
473
|
+
|
474
|
+
# Returns the first node within the channel_node that matches the xpath query.
|
475
|
+
def find_node(xpath)
|
476
|
+
return XPath.first(channel_node, xpath)
|
477
|
+
end
|
478
|
+
|
479
|
+
# Returns all nodes within the channel_node that match the xpath query.
|
480
|
+
def find_all_nodes(xpath)
|
481
|
+
return XPath.match(channel_node, xpath)
|
482
|
+
end
|
483
|
+
|
484
|
+
# Returns the root node of the feed.
|
485
|
+
def root_node
|
486
|
+
if @root_node.nil?
|
487
|
+
# TODO: Fix this so that added content at the end of the file doesn't
|
488
|
+
# break this stuff.
|
489
|
+
# E.g.: http://smogzer.tripod.com/smog.rdf
|
490
|
+
# ===================================================================
|
491
|
+
@root_node = xml.root
|
492
|
+
end
|
493
|
+
return @root_node
|
494
|
+
end
|
495
|
+
|
496
|
+
# Returns the channel node of the feed.
|
497
|
+
def channel_node
|
498
|
+
if @channel_node.nil? && root_node != nil
|
499
|
+
@channel_node = XPath.first(root_node, "channel")
|
500
|
+
if @channel_node == nil
|
501
|
+
@channel_node = XPath.first(root_node, "CHANNEL")
|
502
|
+
end
|
503
|
+
if @channel_node == nil
|
504
|
+
@channel_node = XPath.first(root_node, "feedinfo")
|
505
|
+
end
|
506
|
+
if @channel_node == nil
|
507
|
+
@channel_node = root_node
|
508
|
+
end
|
509
|
+
end
|
510
|
+
return @channel_node
|
511
|
+
end
|
512
|
+
|
513
|
+
# The cache object that handles the feed persistence.
|
514
|
+
def cache_object
|
515
|
+
unless FeedTools.feed_cache.nil?
|
516
|
+
if @cache_object.nil?
|
517
|
+
begin
|
518
|
+
if @id != nil
|
519
|
+
@cache_object = FeedTools.feed_cache.find_by_id(@id)
|
520
|
+
elsif @url != nil
|
521
|
+
@cache_object = FeedTools.feed_cache.find_by_url(@url)
|
522
|
+
end
|
523
|
+
if @cache_object.nil?
|
524
|
+
@cache_object = FeedTools.feed_cache.new
|
525
|
+
end
|
526
|
+
rescue
|
527
|
+
end
|
528
|
+
end
|
529
|
+
end
|
530
|
+
return @cache_object
|
531
|
+
end
|
532
|
+
|
533
|
+
# Sets the cache object for this feed.
|
534
|
+
#
|
535
|
+
# This can be any object, but it must accept the following messages:
|
536
|
+
# url
|
537
|
+
# url=
|
538
|
+
# title
|
539
|
+
# title=
|
540
|
+
# link
|
541
|
+
# link=
|
542
|
+
# xml_data
|
543
|
+
# xml_data=
|
544
|
+
# etag
|
545
|
+
# etag=
|
546
|
+
# last_modified
|
547
|
+
# last_modified=
|
548
|
+
# save
|
549
|
+
def cache_object=(new_cache_object)
|
550
|
+
@cache_object = new_cache_object
|
551
|
+
end
|
552
|
+
|
553
|
+
# Returns the type of feed
|
554
|
+
# Possible values:
|
555
|
+
# "rss", "atom", "cdf", "!okay/news"
|
556
|
+
def feed_type
|
557
|
+
if @feed_type.nil?
|
558
|
+
case self.root_node.name.downcase
|
559
|
+
when "feed"
|
560
|
+
@feed_type = "atom"
|
561
|
+
when "rdf:rdf"
|
562
|
+
@feed_type = "rss"
|
563
|
+
when "rdf"
|
564
|
+
@feed_type = "rss"
|
565
|
+
when "rss"
|
566
|
+
@feed_type = "rss"
|
567
|
+
when "channel"
|
568
|
+
@feed_type = "cdf"
|
569
|
+
end
|
570
|
+
end
|
571
|
+
return @feed_type
|
572
|
+
end
|
573
|
+
|
574
|
+
# Sets the default feed type
|
575
|
+
def feed_type=(new_feed_type)
|
576
|
+
@feed_type = new_feed_type
|
577
|
+
end
|
578
|
+
|
579
|
+
# Returns the version number of the feed type.
|
580
|
+
# Intentionally does not differentiate between the Netscape and Userland
|
581
|
+
# versions of RSS 0.91.
|
582
|
+
def feed_version
|
583
|
+
if @feed_version.nil?
|
584
|
+
version = nil
|
585
|
+
begin
|
586
|
+
version = XPath.first(root_node, "@version").to_s.strip.to_f
|
587
|
+
rescue
|
588
|
+
end
|
589
|
+
version = nil if version == 0.0
|
590
|
+
default_namespace = XPath.first(root_node, "@xmlns").to_s.strip
|
591
|
+
case self.feed_type
|
592
|
+
when "atom"
|
593
|
+
if default_namespace == "http://www.w3.org/2005/Atom"
|
594
|
+
@feed_version = 1.0
|
595
|
+
elsif version != nil
|
596
|
+
@feed_version = version
|
597
|
+
elsif default_namespace == "http://purl.org/atom/ns#"
|
598
|
+
@feed_version = 0.3
|
599
|
+
end
|
600
|
+
when "rss"
|
601
|
+
if default_namespace == "http://my.netscape.com/rdf/simple/0.9/"
|
602
|
+
@feed_version = 0.9
|
603
|
+
elsif default_namespace == "http://purl.org/rss/1.0/"
|
604
|
+
@feed_version = 1.0
|
605
|
+
elsif default_namespace == "http://purl.org/net/rss1.1#"
|
606
|
+
@feed_version = 1.1
|
607
|
+
elsif version != nil
|
608
|
+
case version
|
609
|
+
when 2.1
|
610
|
+
@feed_version = 2.0
|
611
|
+
when 2.01
|
612
|
+
@feed_version = 2.0
|
613
|
+
else
|
614
|
+
@feed_version = version
|
615
|
+
end
|
616
|
+
end
|
617
|
+
when "cdf"
|
618
|
+
@feed_version = 0.4
|
619
|
+
when "!okay/news"
|
620
|
+
@feed_version = nil
|
621
|
+
end
|
622
|
+
end
|
623
|
+
return @feed_version
|
624
|
+
end
|
625
|
+
|
626
|
+
# Sets the default feed version
|
627
|
+
def feed_version=(new_feed_version)
|
628
|
+
@feed_version = new_feed_version
|
629
|
+
end
|
630
|
+
|
631
|
+
# Returns the feed's unique id
|
632
|
+
def id
|
633
|
+
if @id.nil?
|
634
|
+
unless channel_node.nil?
|
635
|
+
@id = XPath.first(channel_node, "id/text()").to_s
|
636
|
+
if @id == ""
|
637
|
+
@id = XPath.first(channel_node, "guid/text()").to_s
|
638
|
+
end
|
639
|
+
end
|
640
|
+
unless root_node.nil?
|
641
|
+
if @id == "" || @id.nil?
|
642
|
+
@id = XPath.first(root_node, "id/text()").to_s
|
643
|
+
end
|
644
|
+
if @id == ""
|
645
|
+
@id = XPath.first(root_node, "guid/text()").to_s
|
646
|
+
end
|
647
|
+
end
|
648
|
+
@id = nil if @id == ""
|
649
|
+
end
|
650
|
+
return @id
|
651
|
+
end
|
652
|
+
|
653
|
+
# Sets the feed's unique id
|
654
|
+
def id=(new_id)
|
655
|
+
@id = new_id
|
656
|
+
end
|
657
|
+
|
658
|
+
# Returns the feed url.
|
659
|
+
def url
|
660
|
+
if @url.nil? && self.xml_data != nil
|
661
|
+
@url = XPath.first(channel_node, "link[@rel='self']/@href").to_s
|
662
|
+
@url = nil if @url == ""
|
663
|
+
end
|
664
|
+
return @url
|
665
|
+
end
|
666
|
+
|
667
|
+
# Sets the feed url and prepares the cache_object if necessary.
|
668
|
+
def url=(new_url)
|
669
|
+
@url = FeedTools.normalize_url(new_url)
|
670
|
+
self.cache_object.url = new_url unless self.cache_object.nil?
|
671
|
+
end
|
672
|
+
|
673
|
+
# Returns the feed title
|
674
|
+
def title
|
675
|
+
if @title.nil?
|
676
|
+
unless channel_node.nil?
|
677
|
+
repair_entities = false
|
678
|
+
title_node = XPath.first(channel_node, "title")
|
679
|
+
if title_node.nil?
|
680
|
+
title_node = XPath.first(channel_node, "dc:title")
|
681
|
+
end
|
682
|
+
if title_node.nil?
|
683
|
+
title_node = XPath.first(channel_node, "TITLE")
|
684
|
+
end
|
685
|
+
end
|
686
|
+
if title_node.nil?
|
687
|
+
return nil
|
688
|
+
end
|
689
|
+
if XPath.first(title_node, "@type").to_s == "xhtml" ||
|
690
|
+
XPath.first(title_node, "@mode").to_s == "xhtml" ||
|
691
|
+
XPath.first(title_node, "@type").to_s == "xml" ||
|
692
|
+
XPath.first(title_node, "@mode").to_s == "xml" ||
|
693
|
+
XPath.first(title_node, "@type").to_s == "application/xhtml+xml"
|
694
|
+
@title = title_node.inner_xml
|
695
|
+
elsif XPath.first(title_node, "@type").to_s == "escaped" ||
|
696
|
+
XPath.first(title_node, "@mode").to_s == "escaped"
|
697
|
+
@title = FeedTools.unescape_entities(
|
698
|
+
XPath.first(title_node, "text()").to_s)
|
699
|
+
else
|
700
|
+
@title = title_node.inner_xml
|
701
|
+
repair_entities = true
|
702
|
+
end
|
703
|
+
unless @title.nil?
|
704
|
+
@title = FeedTools.sanitize_html(@title, :strip)
|
705
|
+
@title = FeedTools.unescape_entities(@title) if repair_entities
|
706
|
+
@title = FeedTools.tidy_html(@title)
|
707
|
+
end
|
708
|
+
@title.gsub!(/\n/, " ")
|
709
|
+
@title.strip!
|
710
|
+
@title = nil if @title == ""
|
711
|
+
self.cache_object.title = @title unless self.cache_object.nil?
|
712
|
+
end
|
713
|
+
return @title
|
714
|
+
end
|
715
|
+
|
716
|
+
# Sets the feed title
|
717
|
+
def title=(new_title)
|
718
|
+
@title = new_title
|
719
|
+
self.cache_object.title = new_title unless self.cache_object.nil?
|
720
|
+
end
|
721
|
+
|
722
|
+
# Returns the feed description
|
723
|
+
def description
|
724
|
+
if @description.nil?
|
725
|
+
unless channel_node.nil?
|
726
|
+
repair_entities = false
|
727
|
+
description_node = XPath.first(channel_node, "description")
|
728
|
+
if description_node.nil?
|
729
|
+
description_node = XPath.first(channel_node, "tagline")
|
730
|
+
end
|
731
|
+
if description_node.nil?
|
732
|
+
description_node = XPath.first(channel_node, "subtitle")
|
733
|
+
end
|
734
|
+
if description_node.nil?
|
735
|
+
description_node = XPath.first(channel_node, "summary")
|
736
|
+
end
|
737
|
+
if description_node.nil?
|
738
|
+
description_node = XPath.first(channel_node, "abstract")
|
739
|
+
end
|
740
|
+
if description_node.nil?
|
741
|
+
description_node = XPath.first(channel_node, "ABSTRACT")
|
742
|
+
end
|
743
|
+
if description_node.nil?
|
744
|
+
description_node = XPath.first(channel_node, "info")
|
745
|
+
end
|
746
|
+
if description_node.nil?
|
747
|
+
description_node = XPath.first(channel_node, "content:encoded")
|
748
|
+
@bozo = true unless description_node.nil?
|
749
|
+
end
|
750
|
+
if description_node.nil?
|
751
|
+
description_node = XPath.first(channel_node, "content")
|
752
|
+
@bozo = true unless description_node.nil?
|
753
|
+
end
|
754
|
+
if description_node.nil?
|
755
|
+
description_node = XPath.first(channel_node, "xhtml:body")
|
756
|
+
@bozo = true unless description_node.nil?
|
757
|
+
end
|
758
|
+
if description_node.nil?
|
759
|
+
description_node = XPath.first(channel_node, "body")
|
760
|
+
@bozo = true unless description_node.nil?
|
761
|
+
end
|
762
|
+
end
|
763
|
+
if description_node.nil?
|
764
|
+
return nil
|
765
|
+
end
|
766
|
+
unless description_node.nil?
|
767
|
+
if XPath.first(description_node, "@encoding").to_s != ""
|
768
|
+
@description =
|
769
|
+
"[Embedded data objects are not currently supported.]"
|
770
|
+
elsif XPath.first(description_node, "@type").to_s == "xhtml" ||
|
771
|
+
XPath.first(description_node, "@mode").to_s == "xhtml" ||
|
772
|
+
XPath.first(description_node, "@type").to_s == "xml" ||
|
773
|
+
XPath.first(description_node, "@mode").to_s == "xml" ||
|
774
|
+
XPath.first(description_node, "@type").to_s ==
|
775
|
+
"application/xhtml+xml"
|
776
|
+
@description = description_node.inner_xml
|
777
|
+
elsif XPath.first(description_node, "@type").to_s == "escaped" ||
|
778
|
+
XPath.first(description_node, "@mode").to_s == "escaped"
|
779
|
+
@description = FeedTools.unescape_entities(
|
780
|
+
description_node.inner_xml)
|
781
|
+
else
|
782
|
+
@description = description_node.inner_xml
|
783
|
+
repair_entities = true
|
784
|
+
end
|
785
|
+
end
|
786
|
+
if @description == ""
|
787
|
+
@description = self.itunes_summary
|
788
|
+
@description = "" if @description.nil?
|
789
|
+
end
|
790
|
+
if @description == ""
|
791
|
+
@description = self.itunes_subtitle
|
792
|
+
@description = "" if @description.nil?
|
793
|
+
end
|
794
|
+
|
795
|
+
unless @description.nil?
|
796
|
+
@description = FeedTools.sanitize_html(@description, :strip)
|
797
|
+
@description = FeedTools.unescape_entities(@description) if repair_entities
|
798
|
+
@description = FeedTools.tidy_html(@description)
|
799
|
+
end
|
800
|
+
|
801
|
+
@description = @description.strip unless @description.nil?
|
802
|
+
@description = nil if @description == ""
|
803
|
+
end
|
804
|
+
return @description
|
805
|
+
end
|
806
|
+
|
807
|
+
# Sets the feed description
|
808
|
+
def description=(new_description)
|
809
|
+
@description = new_description
|
810
|
+
end
|
811
|
+
|
812
|
+
# Returns the contents of the itunes:summary element
|
813
|
+
def itunes_summary
|
814
|
+
if @itunes_summary.nil?
|
815
|
+
unless channel_node.nil?
|
816
|
+
@itunes_summary = FeedTools.unescape_entities(XPath.first(channel_node,
|
817
|
+
"itunes:summary/text()").to_s)
|
818
|
+
end
|
819
|
+
unless root_node.nil?
|
820
|
+
if @itunes_summary == "" || @itunes_summary.nil?
|
821
|
+
@itunes_summary = FeedTools.unescape_entities(XPath.first(root_node,
|
822
|
+
"itunes:summary/text()").to_s)
|
823
|
+
end
|
824
|
+
end
|
825
|
+
if @itunes_summary == ""
|
826
|
+
@itunes_summary = nil
|
827
|
+
end
|
828
|
+
@itunes_summary =
|
829
|
+
FeedTools.sanitize_html(@itunes_summary) unless @itunes_summary.nil?
|
830
|
+
end
|
831
|
+
return @itunes_summary
|
832
|
+
end
|
833
|
+
|
834
|
+
# Sets the contents of the itunes:summary element
|
835
|
+
def itunes_summary=(new_itunes_summary)
|
836
|
+
@itunes_summary = new_itunes_summary
|
837
|
+
end
|
838
|
+
|
839
|
+
# Returns the contents of the itunes:subtitle element
|
840
|
+
def itunes_subtitle
|
841
|
+
if @itunes_subtitle.nil?
|
842
|
+
unless channel_node.nil?
|
843
|
+
@itunes_subtitle = FeedTools.unescape_entities(XPath.first(channel_node,
|
844
|
+
"itunes:subtitle/text()").to_s)
|
845
|
+
end
|
846
|
+
unless root_node.nil?
|
847
|
+
if @itunes_subtitle == "" || @itunes_subtitle.nil?
|
848
|
+
@itunes_subtitle = FeedTools.unescape_entities(XPath.first(root_node,
|
849
|
+
"itunes:subtitle/text()").to_s)
|
850
|
+
end
|
851
|
+
end
|
852
|
+
if @itunes_subtitle == ""
|
853
|
+
@itunes_subtitle = nil
|
854
|
+
end
|
855
|
+
unless @itunes_subtitle.nil?
|
856
|
+
@itunes_subtitle = FeedTools.sanitize_html(@itunes_subtitle)
|
857
|
+
end
|
858
|
+
end
|
859
|
+
return @itunes_subtitle
|
860
|
+
end
|
861
|
+
|
862
|
+
# Sets the contents of the itunes:subtitle element
|
863
|
+
def itunes_subtitle=(new_itunes_subtitle)
|
864
|
+
@itunes_subtitle = new_itunes_subtitle
|
865
|
+
end
|
866
|
+
|
867
|
+
# Returns the feed link
|
868
|
+
def link
|
869
|
+
if @link.nil?
|
870
|
+
unless channel_node.nil?
|
871
|
+
# get the feed link from the xml document
|
872
|
+
@link = XPath.first(channel_node, "link[@rel='alternate' @type='text/html']/@href").to_s
|
873
|
+
if @link == ""
|
874
|
+
@link = XPath.first(channel_node, "link[@rel='alternate']/@href").to_s
|
875
|
+
end
|
876
|
+
if @link == ""
|
877
|
+
@link = XPath.first(channel_node, "link/@href").to_s
|
878
|
+
end
|
879
|
+
if @link == ""
|
880
|
+
@link = XPath.first(channel_node, "link/text()").to_s
|
881
|
+
end
|
882
|
+
if @link == ""
|
883
|
+
@link = XPath.first(channel_node, "@href").to_s
|
884
|
+
end
|
885
|
+
if @link == ""
|
886
|
+
@link = XPath.first(channel_node, "@HREF").to_s
|
887
|
+
end
|
888
|
+
if @link == ""
|
889
|
+
@link = XPath.first(channel_node, "a/@href").to_s
|
890
|
+
end
|
891
|
+
if @link == ""
|
892
|
+
@link = XPath.first(channel_node, "A/@HREF").to_s
|
893
|
+
end
|
894
|
+
end
|
895
|
+
if @link == "" || @link.nil?
|
896
|
+
if FeedTools.is_uri? self.guid
|
897
|
+
@link = self.guid
|
898
|
+
end
|
899
|
+
end
|
900
|
+
if @link == "" && channel_node != nil
|
901
|
+
# Technically, we shouldn't use the base attribute for this, but if the href attribute
|
902
|
+
# is missing, it's already a given that we're looking at a messed up CDF file. We can
|
903
|
+
# always pray it's correct.
|
904
|
+
@link = XPath.first(channel_node, "@base").to_s
|
905
|
+
end
|
906
|
+
@link = FeedTools.normalize_url(@link)
|
907
|
+
unless self.cache_object.nil?
|
908
|
+
self.cache_object.link = @link
|
909
|
+
end
|
910
|
+
end
|
911
|
+
return @link
|
912
|
+
end
|
913
|
+
|
914
|
+
# Sets the feed link
|
915
|
+
def link=(new_link)
|
916
|
+
@link = new_link
|
917
|
+
unless self.cache_object.nil?
|
918
|
+
self.cache_object.link = new_link
|
919
|
+
end
|
920
|
+
end
|
921
|
+
|
922
|
+
# Returns the url to the icon file for this feed.
|
923
|
+
#
|
924
|
+
# This method uses the url from the link field in order to avoid grabbing
|
925
|
+
# the favicon for services like feedburner.
|
926
|
+
def icon
|
927
|
+
if @icon.nil?
|
928
|
+
icon_node = XPath.first(channel_node, "link[@rel='icon']")
|
929
|
+
if icon_node.nil?
|
930
|
+
icon_node = XPath.first(channel_node, "link[@rel='shortcut icon']")
|
931
|
+
end
|
932
|
+
if icon_node.nil?
|
933
|
+
icon_node = XPath.first(channel_node, "link[@type='image/x-icon']")
|
934
|
+
end
|
935
|
+
if icon_node.nil?
|
936
|
+
icon_node = XPath.first(channel_node, "icon")
|
937
|
+
end
|
938
|
+
if icon_node.nil?
|
939
|
+
icon_node = XPath.first(channel_node, "logo[@style='icon']")
|
940
|
+
end
|
941
|
+
if icon_node.nil?
|
942
|
+
icon_node = XPath.first(channel_node, "LOGO[@STYLE='ICON']")
|
943
|
+
end
|
944
|
+
unless icon_node.nil?
|
945
|
+
@icon = FeedTools.unescape_entities(
|
946
|
+
XPath.first(icon_node, "@href").to_s)
|
947
|
+
if @icon == ""
|
948
|
+
@icon = FeedTools.unescape_entities(
|
949
|
+
XPath.first(icon_node, "text()").to_s)
|
950
|
+
unless FeedTools.is_uri? @icon
|
951
|
+
@icon = ""
|
952
|
+
end
|
953
|
+
end
|
954
|
+
if @icon == "" && self.link != nil && self.link != ""
|
955
|
+
link_uri = URI.parse(FeedTools.normalize_url(self.link))
|
956
|
+
@icon =
|
957
|
+
link_uri.scheme + "://" + link_uri.host + "/favicon.ico"
|
958
|
+
end
|
959
|
+
@icon = nil if @icon == ""
|
960
|
+
end
|
961
|
+
end
|
962
|
+
return @icon
|
963
|
+
end
|
964
|
+
|
965
|
+
# Returns the feed author
|
966
|
+
def author
|
967
|
+
if @author.nil?
|
968
|
+
@author = FeedTools::Feed::Author.new
|
969
|
+
unless channel_node.nil?
|
970
|
+
author_node = XPath.first(channel_node, "author")
|
971
|
+
if author_node.nil?
|
972
|
+
author_node = XPath.first(channel_node, "managingEditor")
|
973
|
+
end
|
974
|
+
if author_node.nil?
|
975
|
+
author_node = XPath.first(channel_node, "dc:author")
|
976
|
+
end
|
977
|
+
if author_node.nil?
|
978
|
+
author_node = XPath.first(channel_node, "dc:creator")
|
979
|
+
end
|
980
|
+
if author_node.nil?
|
981
|
+
author_node = XPath.first(channel_node, "atom:author")
|
982
|
+
end
|
983
|
+
end
|
984
|
+
unless author_node.nil?
|
985
|
+
@author.raw = FeedTools.unescape_entities(
|
986
|
+
XPath.first(author_node, "text()").to_s)
|
987
|
+
@author.raw = nil if @author.raw == ""
|
988
|
+
unless @author.raw.nil?
|
989
|
+
raw_scan = @author.raw.scan(
|
990
|
+
/(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
|
991
|
+
if raw_scan.nil? || raw_scan.size == 0
|
992
|
+
raw_scan = @author.raw.scan(
|
993
|
+
/(\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\s*\((.*)\)/i)
|
994
|
+
author_raw_pair = raw_scan.first.reverse unless raw_scan.size == 0
|
995
|
+
else
|
996
|
+
author_raw_pair = raw_scan.first
|
997
|
+
end
|
998
|
+
if raw_scan.nil? || raw_scan.size == 0
|
999
|
+
email_scan = @author.raw.scan(
|
1000
|
+
/\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b/i)
|
1001
|
+
if email_scan != nil && email_scan.size > 0
|
1002
|
+
@author.email = email_scan.first.strip
|
1003
|
+
end
|
1004
|
+
end
|
1005
|
+
unless author_raw_pair.nil? || author_raw_pair.size == 0
|
1006
|
+
@author.name = author_raw_pair.first.strip
|
1007
|
+
@author.email = author_raw_pair.last.strip
|
1008
|
+
else
|
1009
|
+
unless @author.raw.include?("@")
|
1010
|
+
# We can be reasonably sure we are looking at something
|
1011
|
+
# that the creator didn't intend to contain an email address if
|
1012
|
+
# it got through the preceeding regexes and it doesn't
|
1013
|
+
# contain the tell-tale '@' symbol.
|
1014
|
+
@author.name = @author.raw
|
1015
|
+
end
|
1016
|
+
end
|
1017
|
+
end
|
1018
|
+
@author.name = "" if @author.name.nil?
|
1019
|
+
if @author.name == ""
|
1020
|
+
@author.name = FeedTools.unescape_entities(
|
1021
|
+
XPath.first(author_node, "name/text()").to_s)
|
1022
|
+
end
|
1023
|
+
if @author.name == ""
|
1024
|
+
@author.name = FeedTools.unescape_entities(
|
1025
|
+
XPath.first(author_node, "@name").to_s)
|
1026
|
+
end
|
1027
|
+
if @author.email == ""
|
1028
|
+
@author.email = FeedTools.unescape_entities(
|
1029
|
+
XPath.first(author_node, "email/text()").to_s)
|
1030
|
+
end
|
1031
|
+
if @author.email == ""
|
1032
|
+
@author.email = FeedTools.unescape_entities(
|
1033
|
+
XPath.first(author_node, "@email").to_s)
|
1034
|
+
end
|
1035
|
+
if @author.url == ""
|
1036
|
+
@author.url = FeedTools.unescape_entities(
|
1037
|
+
XPath.first(author_node, "url/text()").to_s)
|
1038
|
+
end
|
1039
|
+
if @author.url == ""
|
1040
|
+
@author.url = FeedTools.unescape_entities(
|
1041
|
+
XPath.first(author_node, "@url").to_s)
|
1042
|
+
end
|
1043
|
+
@author.name = nil if @author.name == ""
|
1044
|
+
@author.raw = nil if @author.raw == ""
|
1045
|
+
@author.email = nil if @author.email == ""
|
1046
|
+
@author.url = nil if @author.url == ""
|
1047
|
+
end
|
1048
|
+
# Fallback on the itunes module if we didn't find an author name
|
1049
|
+
begin
|
1050
|
+
@author.name = self.itunes_author if @author.name.nil?
|
1051
|
+
rescue
|
1052
|
+
@author.name = nil
|
1053
|
+
end
|
1054
|
+
end
|
1055
|
+
return @author
|
1056
|
+
end
|
1057
|
+
|
1058
|
+
# Sets the feed author
|
1059
|
+
def author=(new_author)
|
1060
|
+
if new_author.respond_to?(:name) &&
|
1061
|
+
new_author.respond_to?(:email) &&
|
1062
|
+
new_author.respond_to?(:url)
|
1063
|
+
# It's a complete author object, just set it.
|
1064
|
+
@author = new_author
|
1065
|
+
else
|
1066
|
+
# We're not looking at an author object, this is probably a string,
|
1067
|
+
# default to setting the author's name.
|
1068
|
+
if @author.nil?
|
1069
|
+
@author = FeedTools::Feed::Author.new
|
1070
|
+
end
|
1071
|
+
@author.name = new_author
|
1072
|
+
end
|
1073
|
+
end
|
1074
|
+
|
1075
|
+
# Returns the feed publisher
|
1076
|
+
def publisher
|
1077
|
+
if @publisher.nil?
|
1078
|
+
@publisher = FeedTools::Feed::Author.new
|
1079
|
+
|
1080
|
+
# Set the author name
|
1081
|
+
@publisher.raw = FeedTools.unescape_entities(
|
1082
|
+
XPath.first(channel_node, "dc:publisher/text()").to_s)
|
1083
|
+
if @publisher.raw == ""
|
1084
|
+
@publisher.raw = FeedTools.unescape_entities(
|
1085
|
+
XPath.first(channel_node, "webMaster/text()").to_s)
|
1086
|
+
end
|
1087
|
+
unless @publisher.raw == ""
|
1088
|
+
raw_scan = @publisher.raw.scan(
|
1089
|
+
/(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
|
1090
|
+
if raw_scan.nil? || raw_scan.size == 0
|
1091
|
+
raw_scan = @publisher.raw.scan(
|
1092
|
+
/(\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\s*\((.*)\)/i)
|
1093
|
+
unless raw_scan.size == 0
|
1094
|
+
publisher_raw_pair = raw_scan.first.reverse
|
1095
|
+
end
|
1096
|
+
else
|
1097
|
+
publisher_raw_pair = raw_scan.first
|
1098
|
+
end
|
1099
|
+
if raw_scan.nil? || raw_scan.size == 0
|
1100
|
+
email_scan = @publisher.raw.scan(
|
1101
|
+
/\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b/i)
|
1102
|
+
if email_scan != nil && email_scan.size > 0
|
1103
|
+
@publisher.email = email_scan.first.strip
|
1104
|
+
end
|
1105
|
+
end
|
1106
|
+
unless publisher_raw_pair.nil? || publisher_raw_pair.size == 0
|
1107
|
+
@publisher.name = publisher_raw_pair.first.strip
|
1108
|
+
@publisher.email = publisher_raw_pair.last.strip
|
1109
|
+
else
|
1110
|
+
unless @publisher.raw.include?("@")
|
1111
|
+
# We can be reasonably sure we are looking at something
|
1112
|
+
# that the creator didn't intend to contain an email address if
|
1113
|
+
# it got through the preceeding regexes and it doesn't
|
1114
|
+
# contain the tell-tale '@' symbol.
|
1115
|
+
@publisher.name = @publisher.raw
|
1116
|
+
end
|
1117
|
+
end
|
1118
|
+
end
|
1119
|
+
|
1120
|
+
@publisher.name = nil if @publisher.name == ""
|
1121
|
+
@publisher.raw = nil if @publisher.raw == ""
|
1122
|
+
@publisher.email = nil if @publisher.email == ""
|
1123
|
+
@publisher.url = nil if @publisher.url == ""
|
1124
|
+
end
|
1125
|
+
return @publisher
|
1126
|
+
end
|
1127
|
+
|
1128
|
+
# Sets the feed publisher
|
1129
|
+
def publisher=(new_publisher)
|
1130
|
+
if new_publisher.respond_to?(:name) &&
|
1131
|
+
new_publisher.respond_to?(:email) &&
|
1132
|
+
new_publisher.respond_to?(:url)
|
1133
|
+
# It's a complete Author object, just set it.
|
1134
|
+
@publisher = new_publisher
|
1135
|
+
else
|
1136
|
+
# We're not looking at an Author object, this is probably a string,
|
1137
|
+
# default to setting the publisher's name.
|
1138
|
+
if @publisher.nil?
|
1139
|
+
@publisher = FeedTools::Feed::Author.new
|
1140
|
+
end
|
1141
|
+
@publisher.name = new_publisher
|
1142
|
+
end
|
1143
|
+
end
|
1144
|
+
|
1145
|
+
# Returns the contents of the itunes:author element
|
1146
|
+
#
|
1147
|
+
# Returns any incorrectly placed channel-level itunes:author
|
1148
|
+
# elements. They're actually amazingly common. People don't read specs.
|
1149
|
+
# There is no setter for this, since this is a "bozo" attribute.
|
1150
|
+
def itunes_author
|
1151
|
+
if @itunes_author.nil?
|
1152
|
+
@itunes_author = FeedTools.unescape_entities(XPath.first(channel_node,
|
1153
|
+
"itunes:author/text()").to_s)
|
1154
|
+
@itunes_author = nil if @itunes_author == ""
|
1155
|
+
@bozo = true unless @itunes_author.nil?
|
1156
|
+
end
|
1157
|
+
return @itunes_author
|
1158
|
+
end
|
1159
|
+
|
1160
|
+
# Returns the feed item time
|
1161
|
+
def time
|
1162
|
+
if @time.nil?
|
1163
|
+
unless channel_node.nil?
|
1164
|
+
time_string = XPath.first(channel_node, "pubDate/text()").to_s
|
1165
|
+
if time_string == ""
|
1166
|
+
time_string = XPath.first(channel_node, "dc:date/text()").to_s
|
1167
|
+
end
|
1168
|
+
if time_string == ""
|
1169
|
+
time_string = XPath.first(channel_node, "issued/text()").to_s
|
1170
|
+
end
|
1171
|
+
if time_string == ""
|
1172
|
+
time_string = XPath.first(channel_node, "updated/text()").to_s
|
1173
|
+
end
|
1174
|
+
if time_string == ""
|
1175
|
+
time_string = XPath.first(channel_node, "time/text()").to_s
|
1176
|
+
end
|
1177
|
+
end
|
1178
|
+
begin
|
1179
|
+
if time_string != nil && time_string != ""
|
1180
|
+
@time = Time.parse(time_string) rescue self.succ_time
|
1181
|
+
elsif time_string == nil
|
1182
|
+
@time = self.succ_time
|
1183
|
+
end
|
1184
|
+
if @time == nil
|
1185
|
+
@time = Time.now
|
1186
|
+
end
|
1187
|
+
rescue
|
1188
|
+
@time = Time.now
|
1189
|
+
end
|
1190
|
+
end
|
1191
|
+
return @time
|
1192
|
+
end
|
1193
|
+
|
1194
|
+
# Sets the feed item time
|
1195
|
+
def time=(new_time)
|
1196
|
+
@time = new_time
|
1197
|
+
end
|
1198
|
+
|
1199
|
+
# Returns 1 second after the previous item's time.
|
1200
|
+
def succ_time #:nodoc:
|
1201
|
+
begin
|
1202
|
+
if feed.nil?
|
1203
|
+
return nil
|
1204
|
+
end
|
1205
|
+
feed.items
|
1206
|
+
unsorted_items = feed.instance_variable_get("@items")
|
1207
|
+
item_index = unsorted_items.index(self)
|
1208
|
+
if item_index.nil?
|
1209
|
+
return nil
|
1210
|
+
end
|
1211
|
+
if item_index <= 0
|
1212
|
+
return Time.now
|
1213
|
+
end
|
1214
|
+
previous_item = unsorted_items[item_index - 1]
|
1215
|
+
return previous_item.time.succ
|
1216
|
+
rescue
|
1217
|
+
return nil
|
1218
|
+
end
|
1219
|
+
end
|
1220
|
+
private :succ_time
|
1221
|
+
|
1222
|
+
# Returns the feed item updated time
|
1223
|
+
def updated
|
1224
|
+
if @updated.nil?
|
1225
|
+
unless channel_node.nil?
|
1226
|
+
updated_string = XPath.first(channel_node, "updated/text()").to_s
|
1227
|
+
if updated_string == ""
|
1228
|
+
updated_string = XPath.first(channel_node, "modified/text()").to_s
|
1229
|
+
end
|
1230
|
+
end
|
1231
|
+
if updated_string != nil && updated_string != ""
|
1232
|
+
@updated = Time.parse(updated_string) rescue nil
|
1233
|
+
else
|
1234
|
+
@updated = nil
|
1235
|
+
end
|
1236
|
+
end
|
1237
|
+
return @updated
|
1238
|
+
end
|
1239
|
+
|
1240
|
+
# Sets the feed item updated time
|
1241
|
+
def updated=(new_updated)
|
1242
|
+
@updated = new_updated
|
1243
|
+
end
|
1244
|
+
|
1245
|
+
# Returns the feed item issued time
|
1246
|
+
def issued
|
1247
|
+
if @issued.nil?
|
1248
|
+
unless channel_node.nil?
|
1249
|
+
issued_string = XPath.first(channel_node, "issued/text()").to_s
|
1250
|
+
if issued_string == ""
|
1251
|
+
issued_string = XPath.first(channel_node, "pubDate/text()").to_s
|
1252
|
+
end
|
1253
|
+
if issued_string == ""
|
1254
|
+
issued_string = XPath.first(channel_node, "dc:date/text()").to_s
|
1255
|
+
end
|
1256
|
+
if issued_string == ""
|
1257
|
+
issued_string = XPath.first(channel_node, "published/text()").to_s
|
1258
|
+
end
|
1259
|
+
end
|
1260
|
+
if issued_string != nil && issued_string != ""
|
1261
|
+
@issued = Time.parse(issued_string) rescue nil
|
1262
|
+
else
|
1263
|
+
@issued = nil
|
1264
|
+
end
|
1265
|
+
end
|
1266
|
+
return @issued
|
1267
|
+
end
|
1268
|
+
|
1269
|
+
# Sets the feed item issued time
|
1270
|
+
def issued=(new_issued)
|
1271
|
+
@issued = new_issued
|
1272
|
+
end
|
1273
|
+
|
1274
|
+
# Returns the feed item published time
|
1275
|
+
def published
|
1276
|
+
if @published.nil?
|
1277
|
+
unless channel_node.nil?
|
1278
|
+
published_string = XPath.first(channel_node, "published/text()").to_s
|
1279
|
+
if published_string == ""
|
1280
|
+
published_string = XPath.first(channel_node, "pubDate/text()").to_s
|
1281
|
+
end
|
1282
|
+
if published_string == ""
|
1283
|
+
published_string = XPath.first(channel_node, "dc:date/text()").to_s
|
1284
|
+
end
|
1285
|
+
if published_string == ""
|
1286
|
+
published_string = XPath.first(channel_node, "issued/text()").to_s
|
1287
|
+
end
|
1288
|
+
end
|
1289
|
+
if published_string != nil && published_string != ""
|
1290
|
+
@published = Time.parse(published_string) rescue nil
|
1291
|
+
else
|
1292
|
+
@published = nil
|
1293
|
+
end
|
1294
|
+
end
|
1295
|
+
return @published
|
1296
|
+
end
|
1297
|
+
|
1298
|
+
# Sets the feed item published time
|
1299
|
+
def published=(new_published)
|
1300
|
+
@published = new_published
|
1301
|
+
end
|
1302
|
+
|
1303
|
+
# Returns a list of the feed's categories
|
1304
|
+
def categories
|
1305
|
+
if @categories.nil?
|
1306
|
+
@categories = []
|
1307
|
+
category_nodes = XPath.match(channel_node, "category")
|
1308
|
+
if category_nodes.nil? || category_nodes.empty?
|
1309
|
+
category_nodes = XPath.match(channel_node, "dc:subject")
|
1310
|
+
end
|
1311
|
+
unless category_nodes.nil?
|
1312
|
+
for category_node in category_nodes
|
1313
|
+
category = FeedTools::Feed::Category.new
|
1314
|
+
category.term = XPath.first(category_node, "@term").to_s
|
1315
|
+
if category.term == ""
|
1316
|
+
category.term = XPath.first(category_node, "text()").to_s
|
1317
|
+
end
|
1318
|
+
category.term.strip! unless category.term.nil?
|
1319
|
+
category.term = nil if category.term == ""
|
1320
|
+
category.label = XPath.first(category_node, "@label").to_s
|
1321
|
+
category.label.strip! unless category.label.nil?
|
1322
|
+
category.label = nil if category.label == ""
|
1323
|
+
category.scheme = XPath.first(category_node, "@scheme").to_s
|
1324
|
+
if category.scheme == ""
|
1325
|
+
category.scheme = XPath.first(category_node, "@domain").to_s
|
1326
|
+
end
|
1327
|
+
category.scheme.strip! unless category.scheme.nil?
|
1328
|
+
category.scheme = nil if category.scheme == ""
|
1329
|
+
@categories << category
|
1330
|
+
end
|
1331
|
+
end
|
1332
|
+
end
|
1333
|
+
return @categories
|
1334
|
+
end
|
1335
|
+
|
1336
|
+
# Returns a list of the feed's images
|
1337
|
+
def images
|
1338
|
+
if @images.nil?
|
1339
|
+
@images = []
|
1340
|
+
unless channel_node.nil?
|
1341
|
+
image_nodes = XPath.match(channel_node, "image")
|
1342
|
+
if image_nodes.nil? || image_nodes.empty?
|
1343
|
+
image_nodes = XPath.match(channel_node, "link")
|
1344
|
+
end
|
1345
|
+
if image_nodes.nil? || image_nodes.empty?
|
1346
|
+
image_nodes = XPath.match(channel_node, "logo")
|
1347
|
+
end
|
1348
|
+
if image_nodes.nil? || image_nodes.empty?
|
1349
|
+
image_nodes = XPath.match(channel_node, "LOGO")
|
1350
|
+
end
|
1351
|
+
unless image_nodes.nil?
|
1352
|
+
for image_node in image_nodes
|
1353
|
+
image = FeedTools::Feed::Image.new
|
1354
|
+
image.url = XPath.first(image_node, "url/text()").to_s
|
1355
|
+
if image.url == ""
|
1356
|
+
image.url = XPath.first(image_node, "@rdf:resource").to_s
|
1357
|
+
end
|
1358
|
+
if image.url == "" && (image_node.name == "logo" ||
|
1359
|
+
(image_node.attributes['type'] =~ /^image/) == 0)
|
1360
|
+
image.url = XPath.first(image_node, "@href").to_s
|
1361
|
+
end
|
1362
|
+
if image.url == "" && image_node.name == "LOGO"
|
1363
|
+
image.url = XPath.first(image_node, "@HREF").to_s
|
1364
|
+
end
|
1365
|
+
image.url.strip! unless image.url.nil?
|
1366
|
+
image.url = nil if image.url == ""
|
1367
|
+
image.title = XPath.first(image_node, "title/text()").to_s
|
1368
|
+
image.title.strip! unless image.title.nil?
|
1369
|
+
image.title = nil if image.title == ""
|
1370
|
+
image.description =
|
1371
|
+
XPath.first(image_node, "description/text()").to_s
|
1372
|
+
image.description.strip! unless image.description.nil?
|
1373
|
+
image.description = nil if image.description == ""
|
1374
|
+
image.link = XPath.first(image_node, "link/text()").to_s
|
1375
|
+
image.link.strip! unless image.link.nil?
|
1376
|
+
image.link = nil if image.link == ""
|
1377
|
+
image.height = XPath.first(image_node, "height/text()").to_s.to_i
|
1378
|
+
image.height = nil if image.height <= 0
|
1379
|
+
image.width = XPath.first(image_node, "width/text()").to_s.to_i
|
1380
|
+
image.width = nil if image.width <= 0
|
1381
|
+
image.style = XPath.first(image_node, "@style").to_s.downcase
|
1382
|
+
if image.style == ""
|
1383
|
+
image.style = XPath.first(image_node, "@STYLE").to_s.downcase
|
1384
|
+
end
|
1385
|
+
image.style.strip! unless image.style.nil?
|
1386
|
+
image.style = nil if image.style == ""
|
1387
|
+
@images << image
|
1388
|
+
end
|
1389
|
+
end
|
1390
|
+
end
|
1391
|
+
end
|
1392
|
+
return @images
|
1393
|
+
end
|
1394
|
+
|
1395
|
+
# Returns the feed's text input field
|
1396
|
+
def text_input
|
1397
|
+
if @text_input.nil?
|
1398
|
+
@text_input = FeedTools::Feed::TextInput.new
|
1399
|
+
text_input_node = XPath.first(channel_node, "textInput")
|
1400
|
+
unless text_input_node.nil?
|
1401
|
+
@text_input.title =
|
1402
|
+
XPath.first(text_input_node, "title/text()").to_s
|
1403
|
+
@text_input.title = nil if @text_input.title == ""
|
1404
|
+
@text_input.description =
|
1405
|
+
XPath.first(text_input_node, "description/text()").to_s
|
1406
|
+
@text_input.description = nil if @text_input.description == ""
|
1407
|
+
@text_input.link =
|
1408
|
+
XPath.first(text_input_node, "link/text()").to_s
|
1409
|
+
@text_input.link = nil if @text_input.link == ""
|
1410
|
+
@text_input.name =
|
1411
|
+
XPath.first(text_input_node, "name/text()").to_s
|
1412
|
+
@text_input.name = nil if @text_input.name == ""
|
1413
|
+
end
|
1414
|
+
end
|
1415
|
+
return @text_input
|
1416
|
+
end
|
1417
|
+
|
1418
|
+
# Returns the feed's copyright information
|
1419
|
+
def copyright
|
1420
|
+
if @copyright.nil?
|
1421
|
+
unless channel_node.nil?
|
1422
|
+
@copyright = XPath.first(channel_node, "copyright/text()").to_s
|
1423
|
+
if @copyright == ""
|
1424
|
+
@copyright = XPath.first(channel_node, "rights/text()").to_s
|
1425
|
+
end
|
1426
|
+
if @copyright == ""
|
1427
|
+
@copyright = XPath.first(channel_node, "dc:rights/text()").to_s
|
1428
|
+
end
|
1429
|
+
if @copyright == ""
|
1430
|
+
@copyright = XPath.first(channel_node, "copyrights/text()").to_s
|
1431
|
+
end
|
1432
|
+
@copyright = FeedTools.sanitize_html(@copyright, :strip)
|
1433
|
+
@copyright = nil if @copyright == ""
|
1434
|
+
end
|
1435
|
+
end
|
1436
|
+
return @copyright
|
1437
|
+
end
|
1438
|
+
|
1439
|
+
# Sets the feed's copyright information
|
1440
|
+
def copyright=(new_copyright)
|
1441
|
+
@copyright = new_copyright
|
1442
|
+
end
|
1443
|
+
|
1444
|
+
# Returns the number of seconds before the feed should expire
|
1445
|
+
def time_to_live
|
1446
|
+
if @time_to_live.nil?
|
1447
|
+
unless channel_node.nil?
|
1448
|
+
# get the feed time to live from the xml document
|
1449
|
+
update_frequency = XPath.first(channel_node, "syn:updateFrequency/text()").to_s
|
1450
|
+
if update_frequency != ""
|
1451
|
+
update_period = XPath.first(channel_node, "syn:updatePeriod/text()").to_s
|
1452
|
+
if update_period == "daily"
|
1453
|
+
@time_to_live = update_frequency.to_i.day
|
1454
|
+
elsif update_period == "weekly"
|
1455
|
+
@time_to_live = update_frequency.to_i.week
|
1456
|
+
elsif update_period == "monthly"
|
1457
|
+
@time_to_live = update_frequency.to_i.month
|
1458
|
+
elsif update_period == "yearly"
|
1459
|
+
@time_to_live = update_frequency.to_i.year
|
1460
|
+
else
|
1461
|
+
# hourly
|
1462
|
+
@time_to_live = update_frequency.to_i.hour
|
1463
|
+
end
|
1464
|
+
end
|
1465
|
+
if @time_to_live.nil?
|
1466
|
+
# usually expressed in minutes
|
1467
|
+
update_frequency = XPath.first(channel_node, "ttl/text()").to_s
|
1468
|
+
if update_frequency != ""
|
1469
|
+
update_span = XPath.first(channel_node, "ttl/@span").to_s
|
1470
|
+
if update_span == "seconds"
|
1471
|
+
@time_to_live = update_frequency.to_i
|
1472
|
+
elsif update_span == "minutes"
|
1473
|
+
@time_to_live = update_frequency.to_i.minute
|
1474
|
+
elsif update_span == "hours"
|
1475
|
+
@time_to_live = update_frequency.to_i.hour
|
1476
|
+
elsif update_span == "days"
|
1477
|
+
@time_to_live = update_frequency.to_i.day
|
1478
|
+
elsif update_span == "weeks"
|
1479
|
+
@time_to_live = update_frequency.to_i.week
|
1480
|
+
elsif update_span == "months"
|
1481
|
+
@time_to_live = update_frequency.to_i.month
|
1482
|
+
elsif update_span == "years"
|
1483
|
+
@time_to_live = update_frequency.to_i.year
|
1484
|
+
elsif update_frequency.to_i >= 3000
|
1485
|
+
# Normally, this should default to minutes, but realistically,
|
1486
|
+
# if they meant minutes, you're rarely going to see a value higher
|
1487
|
+
# than 120. If we see >= 3000, we're either dealing with a stupid
|
1488
|
+
# pseudo-spec that decided to use seconds, or we're looking at
|
1489
|
+
# someone who only has weekly updated content. Worst case, we
|
1490
|
+
# misreport the time, and we update too often. Best case, we
|
1491
|
+
# avoid accidentally updating the feed only once a year. In the
|
1492
|
+
# interests of being pragmatic, and since the problem we avoid
|
1493
|
+
# is a far greater one than the one we cause, just run the check
|
1494
|
+
# and hope no one actually gets hurt.
|
1495
|
+
@time_to_live = update_frequency.to_i
|
1496
|
+
else
|
1497
|
+
@time_to_live = update_frequency.to_i.minute
|
1498
|
+
end
|
1499
|
+
end
|
1500
|
+
end
|
1501
|
+
if @time_to_live.nil?
|
1502
|
+
@time_to_live = 0
|
1503
|
+
update_frequency_days =
|
1504
|
+
XPath.first(channel_node, "schedule/intervaltime/@days").to_s
|
1505
|
+
update_frequency_hours =
|
1506
|
+
XPath.first(channel_node, "schedule/intervaltime/@hour").to_s
|
1507
|
+
update_frequency_minutes =
|
1508
|
+
XPath.first(channel_node, "schedule/intervaltime/@min").to_s
|
1509
|
+
update_frequency_seconds =
|
1510
|
+
XPath.first(channel_node, "schedule/intervaltime/@sec").to_s
|
1511
|
+
if update_frequency_days != ""
|
1512
|
+
@time_to_live = @time_to_live + update_frequency_days.to_i.day
|
1513
|
+
end
|
1514
|
+
if update_frequency_hours != ""
|
1515
|
+
@time_to_live = @time_to_live + update_frequency_hours.to_i.hour
|
1516
|
+
end
|
1517
|
+
if update_frequency_minutes != ""
|
1518
|
+
@time_to_live = @time_to_live + update_frequency_minutes.to_i.minute
|
1519
|
+
end
|
1520
|
+
if update_frequency_seconds != ""
|
1521
|
+
@time_to_live = @time_to_live + update_frequency_seconds.to_i
|
1522
|
+
end
|
1523
|
+
if @time_to_live == 0
|
1524
|
+
@time_to_live = 1.hour
|
1525
|
+
end
|
1526
|
+
end
|
1527
|
+
end
|
1528
|
+
end
|
1529
|
+
if @time_to_live.nil? || @time_to_live == 0
|
1530
|
+
# Default to one hour
|
1531
|
+
@time_to_live = 1.hour
|
1532
|
+
end
|
1533
|
+
@time_to_live = @time_to_live.round
|
1534
|
+
return @time_to_live
|
1535
|
+
end
|
1536
|
+
|
1537
|
+
# Sets the feed time to live
|
1538
|
+
def time_to_live=(new_time_to_live)
|
1539
|
+
@time_to_live = new_time_to_live.round
|
1540
|
+
@time_to_live = 1.hour if @time_to_live < 1.hour
|
1541
|
+
end
|
1542
|
+
|
1543
|
+
# Returns the feed's cloud
|
1544
|
+
def cloud
|
1545
|
+
if @cloud.nil?
|
1546
|
+
@cloud = FeedTools::Feed::Cloud.new
|
1547
|
+
@cloud.domain = XPath.first(channel_node, "cloud/@domain").to_s
|
1548
|
+
@cloud.port = XPath.first(channel_node, "cloud/@port").to_s
|
1549
|
+
@cloud.path = XPath.first(channel_node, "cloud/@path").to_s
|
1550
|
+
@cloud.register_procedure =
|
1551
|
+
XPath.first(channel_node, "cloud/@registerProcedure").to_s
|
1552
|
+
@cloud.protocol =
|
1553
|
+
XPath.first(channel_node, "cloud/@protocol").to_s.downcase
|
1554
|
+
@cloud.domain = nil if @cloud.domain == ""
|
1555
|
+
@cloud.port = nil if @cloud.port == ""
|
1556
|
+
@cloud.port = @cloud.port.to_i unless @cloud.port.nil?
|
1557
|
+
@cloud.port = nil if @cloud.port == 0
|
1558
|
+
@cloud.path = nil if @cloud.path == ""
|
1559
|
+
@cloud.register_procedure = nil if @cloud.register_procedure == ""
|
1560
|
+
@cloud.protocol = nil if @cloud.protocol == ""
|
1561
|
+
end
|
1562
|
+
return @cloud
|
1563
|
+
end
|
1564
|
+
|
1565
|
+
# Sets the feed's cloud
|
1566
|
+
def cloud=(new_cloud)
|
1567
|
+
@cloud = new_cloud
|
1568
|
+
end
|
1569
|
+
|
1570
|
+
# Returns the feed generator
|
1571
|
+
def generator
|
1572
|
+
if @generator.nil?
|
1573
|
+
@generator = XPath.first(channel_node, "generator/text()").to_s
|
1574
|
+
@generator = FeedTools.strip_html(@generator)
|
1575
|
+
@generator = nil if @generator == ""
|
1576
|
+
end
|
1577
|
+
return @generator
|
1578
|
+
end
|
1579
|
+
|
1580
|
+
# Sets the feed generator
|
1581
|
+
def generator=(new_generator)
|
1582
|
+
@generator = new_generator
|
1583
|
+
end
|
1584
|
+
|
1585
|
+
# Returns the feed docs
|
1586
|
+
def docs
|
1587
|
+
if @docs.nil?
|
1588
|
+
@docs = XPath.first(channel_node, "docs/text()").to_s
|
1589
|
+
@docs = FeedTools.strip_html(@docs)
|
1590
|
+
@docs = nil if @docs == ""
|
1591
|
+
end
|
1592
|
+
return @docs
|
1593
|
+
end
|
1594
|
+
|
1595
|
+
# Sets the feed docs
|
1596
|
+
def docs=(new_docs)
|
1597
|
+
@docs = new_docs
|
1598
|
+
end
|
1599
|
+
|
1600
|
+
# Returns the feed language
|
1601
|
+
def language
|
1602
|
+
if @language.nil?
|
1603
|
+
unless channel_node.nil?
|
1604
|
+
@language = XPath.first(channel_node, "language/text()").to_s
|
1605
|
+
if @language == ""
|
1606
|
+
@language = XPath.first(channel_node, "dc:language/text()").to_s
|
1607
|
+
end
|
1608
|
+
if @language == ""
|
1609
|
+
@language = XPath.first(channel_node, "xml:lang/text()").to_s
|
1610
|
+
end
|
1611
|
+
if @language == ""
|
1612
|
+
@language = XPath.first(root_node, "xml:lang/text()").to_s
|
1613
|
+
end
|
1614
|
+
end
|
1615
|
+
if @language == "" || @language.nil?
|
1616
|
+
@language = "en-us"
|
1617
|
+
end
|
1618
|
+
@language = @language.downcase
|
1619
|
+
@language = nil if @language == ""
|
1620
|
+
end
|
1621
|
+
return @language
|
1622
|
+
end
|
1623
|
+
|
1624
|
+
# Sets the feed language
|
1625
|
+
def language=(new_language)
|
1626
|
+
@language = new_language
|
1627
|
+
end
|
1628
|
+
|
1629
|
+
# Returns true if this feed contains explicit material.
|
1630
|
+
def explicit?
|
1631
|
+
if @explicit.nil?
|
1632
|
+
if XPath.first(channel_node,
|
1633
|
+
"media:adult/text()").to_s.downcase == "true" ||
|
1634
|
+
XPath.first(channel_node,
|
1635
|
+
"itunes:explicit/text()").to_s.downcase == "yes" ||
|
1636
|
+
XPath.first(channel_node,
|
1637
|
+
"itunes:explicit/text()").to_s.downcase == "true"
|
1638
|
+
@explicit = true
|
1639
|
+
else
|
1640
|
+
@explicit = false
|
1641
|
+
end
|
1642
|
+
end
|
1643
|
+
return @explicit
|
1644
|
+
end
|
1645
|
+
|
1646
|
+
# Sets whether or not the feed contains explicit material
|
1647
|
+
def explicit=(new_explicit)
|
1648
|
+
@explicit = (new_explicit ? true : false)
|
1649
|
+
end
|
1650
|
+
|
1651
|
+
# Returns the feed items
|
1652
|
+
def items
|
1653
|
+
if @items.nil?
|
1654
|
+
unless root_node.nil?
|
1655
|
+
raw_items = XPath.match(root_node, "item")
|
1656
|
+
if raw_items == nil || raw_items == []
|
1657
|
+
raw_items = XPath.match(channel_node, "item")
|
1658
|
+
end
|
1659
|
+
if raw_items == nil || raw_items == []
|
1660
|
+
raw_items = XPath.match(channel_node, "ITEM")
|
1661
|
+
end
|
1662
|
+
if raw_items == nil || raw_items == []
|
1663
|
+
raw_items = XPath.match(root_node, "ITEM")
|
1664
|
+
end
|
1665
|
+
if raw_items == nil || raw_items == []
|
1666
|
+
raw_items = XPath.match(channel_node, "entry")
|
1667
|
+
end
|
1668
|
+
if raw_items == nil || raw_items == []
|
1669
|
+
raw_items = XPath.match(root_node, "entry")
|
1670
|
+
end
|
1671
|
+
end
|
1672
|
+
|
1673
|
+
# create the individual feed items
|
1674
|
+
@items = []
|
1675
|
+
if raw_items != nil
|
1676
|
+
for item_node in raw_items
|
1677
|
+
new_item = FeedItem.new
|
1678
|
+
new_item.xml_data = item_node.to_s
|
1679
|
+
new_item.feed = self
|
1680
|
+
@items << new_item
|
1681
|
+
end
|
1682
|
+
end
|
1683
|
+
end
|
1684
|
+
|
1685
|
+
# Sort the items
|
1686
|
+
@items = @items.sort do |a,b|
|
1687
|
+
(b.time or Time.mktime(1970)) <=> (a.time or Time.mktime(1970))
|
1688
|
+
end
|
1689
|
+
return @items
|
1690
|
+
end
|
1691
|
+
|
1692
|
+
# The time that the feed was last requested from the remote server. Nil if it has
|
1693
|
+
# never been pulled, or if it was created from scratch.
|
1694
|
+
def last_retrieved
|
1695
|
+
unless self.cache_object.nil?
|
1696
|
+
@last_retrieved = self.cache_object.last_retrieved
|
1697
|
+
end
|
1698
|
+
return @last_retrieved
|
1699
|
+
end
|
1700
|
+
|
1701
|
+
# Sets the time that the feed was last updated.
|
1702
|
+
def last_retrieved=(new_last_retrieved)
|
1703
|
+
@last_retrieved = new_last_retrieved
|
1704
|
+
unless self.cache_object.nil?
|
1705
|
+
self.cache_object.last_retrieved = new_last_retrieved
|
1706
|
+
end
|
1707
|
+
end
|
1708
|
+
|
1709
|
+
# True if this feed contains audio content enclosures
|
1710
|
+
def podcast?
|
1711
|
+
podcast = false
|
1712
|
+
self.items.each do |item|
|
1713
|
+
item.enclosures.each do |enclosure|
|
1714
|
+
podcast = true if enclosure.audio?
|
1715
|
+
end
|
1716
|
+
end
|
1717
|
+
return podcast
|
1718
|
+
end
|
1719
|
+
|
1720
|
+
# True if this feed contains video content enclosures
|
1721
|
+
def vidlog?
|
1722
|
+
vidlog = false
|
1723
|
+
self.items.each do |item|
|
1724
|
+
item.enclosures.each do |enclosure|
|
1725
|
+
vidlog = true if enclosure.video?
|
1726
|
+
end
|
1727
|
+
end
|
1728
|
+
return vidlog
|
1729
|
+
end
|
1730
|
+
|
1731
|
+
# True if this feed is malformed somehow
|
1732
|
+
def bozo?
|
1733
|
+
if @bozo.nil?
|
1734
|
+
@bozo = false
|
1735
|
+
end
|
1736
|
+
return @bozo
|
1737
|
+
end
|
1738
|
+
|
1739
|
+
# True if the feed was not last retrieved from the cache.
|
1740
|
+
def live?
|
1741
|
+
return @live
|
1742
|
+
end
|
1743
|
+
|
1744
|
+
# True if the feed has expired and must be reacquired from the remote server.
|
1745
|
+
def expired?
|
1746
|
+
return self.last_retrieved == nil || (self.last_retrieved + self.time_to_live.hour) < Time.now
|
1747
|
+
end
|
1748
|
+
|
1749
|
+
# Forces this feed to expire.
|
1750
|
+
def expire!
|
1751
|
+
self.last_retrieved = Time.mktime(1970)
|
1752
|
+
self.save
|
1753
|
+
end
|
1754
|
+
|
1755
|
+
# A hook method that is called during the feed generation process. Overriding this method
|
1756
|
+
# will enable additional content to be inserted into the feed.
|
1757
|
+
def build_xml_hook(feed_type, version, xml_builder)
|
1758
|
+
return nil
|
1759
|
+
end
|
1760
|
+
|
1761
|
+
# Generates xml based on the content of the feed
|
1762
|
+
def build_xml(feed_type=(self.feed_type or "rss"), version=nil,
|
1763
|
+
xml_builder=Builder::XmlMarkup.new(:indent => 2))
|
1764
|
+
if feed_type == "rss" && (version == nil || version == 0.0)
|
1765
|
+
version = 1.0
|
1766
|
+
elsif feed_type == "atom" && (version == nil || version == 0.0)
|
1767
|
+
version = 0.3
|
1768
|
+
end
|
1769
|
+
if feed_type == "rss" && (version == 0.9 || version == 1.0 ||
|
1770
|
+
version == 1.1)
|
1771
|
+
# RDF-based rss format
|
1772
|
+
return xml_builder.tag!("rdf:RDF",
|
1773
|
+
"xmlns" => "http://purl.org/rss/1.0/",
|
1774
|
+
"xmlns:rdf" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
|
1775
|
+
"xmlns:dc" => "http://purl.org/dc/elements/1.1/",
|
1776
|
+
"xmlns:syn" => "http://purl.org/rss/1.0/modules/syndication/",
|
1777
|
+
"xmlns:taxo" => "http://purl.org/rss/1.0/modules/taxonomy/",
|
1778
|
+
"xmlns:itunes" => "http://www.itunes.com/DTDs/Podcast-1.0.dtd",
|
1779
|
+
"xmlns:media" => "http://search.yahoo.com/mrss") do
|
1780
|
+
channel_attributes = {}
|
1781
|
+
unless self.link.nil?
|
1782
|
+
channel_attributes["rdf:about"] = CGI.escapeHTML(self.link)
|
1783
|
+
end
|
1784
|
+
xml_builder.channel(channel_attributes) do
|
1785
|
+
unless title.nil? || title == ""
|
1786
|
+
xml_builder.title(title)
|
1787
|
+
else
|
1788
|
+
xml_builder.title
|
1789
|
+
end
|
1790
|
+
unless link.nil? || link == ""
|
1791
|
+
xml_builder.link(link)
|
1792
|
+
else
|
1793
|
+
xml_builder.link
|
1794
|
+
end
|
1795
|
+
unless images.nil? || images.empty?
|
1796
|
+
xml_builder.image("rdf:resource" => CGI.escapeHTML(
|
1797
|
+
images.first.url))
|
1798
|
+
end
|
1799
|
+
unless description.nil? || description == ""
|
1800
|
+
xml_builder.description(description)
|
1801
|
+
else
|
1802
|
+
xml_builder.description
|
1803
|
+
end
|
1804
|
+
unless language.nil? || language == ""
|
1805
|
+
xml_builder.tag!("dc:language", language)
|
1806
|
+
end
|
1807
|
+
xml_builder.tag!("syn:updatePeriod", "hourly")
|
1808
|
+
xml_builder.tag!("syn:updateFrequency", (time_to_live / 1.hour).to_s)
|
1809
|
+
xml_builder.tag!("syn:updateBase", Time.mktime(1970).iso8601)
|
1810
|
+
xml_builder.items do
|
1811
|
+
xml_builder.tag!("rdf:Seq") do
|
1812
|
+
unless items.nil?
|
1813
|
+
for item in items
|
1814
|
+
if item.link.nil?
|
1815
|
+
raise "Cannot generate an rdf-based feed with a nil item link field."
|
1816
|
+
end
|
1817
|
+
xml_builder.tag!("rdf:li", "rdf:resource" => CGI.escapeHTML(item.link))
|
1818
|
+
end
|
1819
|
+
end
|
1820
|
+
end
|
1821
|
+
end
|
1822
|
+
build_xml_hook(feed_type, version, xml_builder)
|
1823
|
+
end
|
1824
|
+
unless images.nil? || images.empty?
|
1825
|
+
best_image = nil
|
1826
|
+
for image in self.images
|
1827
|
+
if image.link != nil
|
1828
|
+
best_image = image
|
1829
|
+
break
|
1830
|
+
end
|
1831
|
+
end
|
1832
|
+
best_image = images.first if best_image.nil?
|
1833
|
+
xml_builder.image("rdf:about" => CGI.escapeHTML(best_image.url)) do
|
1834
|
+
if best_image.title != nil && best_image.title != ""
|
1835
|
+
xml_builder.title(best_image.title)
|
1836
|
+
elsif self.title != nil && self.title != ""
|
1837
|
+
xml_builder.title(self.title)
|
1838
|
+
else
|
1839
|
+
xml_builder.title
|
1840
|
+
end
|
1841
|
+
unless best_image.url.nil? || best_image.url == ""
|
1842
|
+
xml_builder.url(best_image.url)
|
1843
|
+
end
|
1844
|
+
if best_image.link != nil && best_image.link != ""
|
1845
|
+
xml_builder.link(best_image.link)
|
1846
|
+
elsif self.link != nil && self.link != ""
|
1847
|
+
xml_builder.link(self.link)
|
1848
|
+
else
|
1849
|
+
xml_builder.link
|
1850
|
+
end
|
1851
|
+
end
|
1852
|
+
end
|
1853
|
+
unless items.nil?
|
1854
|
+
for item in items
|
1855
|
+
item.build_xml(feed_type, version, xml_builder)
|
1856
|
+
end
|
1857
|
+
end
|
1858
|
+
end
|
1859
|
+
elsif feed_type == "rss"
|
1860
|
+
# normal rss format
|
1861
|
+
return xml_builder.rss("version" => "2.0",
|
1862
|
+
"xmlns:rdf" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
|
1863
|
+
"xmlns:dc" => "http://purl.org/dc/elements/1.1/",
|
1864
|
+
"xmlns:taxo" => "http://purl.org/rss/1.0/modules/taxonomy/",
|
1865
|
+
"xmlns:trackback" =>
|
1866
|
+
"http://madskills.com/public/xml/rss/module/trackback/",
|
1867
|
+
"xmlns:itunes" => "http://www.itunes.com/DTDs/Podcast-1.0.dtd",
|
1868
|
+
"xmlns:media" => "http://search.yahoo.com/mrss") do
|
1869
|
+
xml_builder.channel do
|
1870
|
+
unless title.nil? || title == ""
|
1871
|
+
xml_builder.title(title)
|
1872
|
+
end
|
1873
|
+
unless link.nil? || link == ""
|
1874
|
+
xml_builder.link(link)
|
1875
|
+
end
|
1876
|
+
unless description.nil? || description == ""
|
1877
|
+
xml_builder.description(description)
|
1878
|
+
end
|
1879
|
+
xml_builder.ttl((time_to_live / 1.minute).to_s)
|
1880
|
+
xml_builder.generator(
|
1881
|
+
"http://www.sporkmonger.com/projects/feedtools")
|
1882
|
+
build_xml_hook(feed_type, version, xml_builder)
|
1883
|
+
unless items.nil?
|
1884
|
+
for item in items
|
1885
|
+
item.build_xml(feed_type, version, xml_builder)
|
1886
|
+
end
|
1887
|
+
end
|
1888
|
+
end
|
1889
|
+
end
|
1890
|
+
elsif feed_type == "atom" && version == 0.3
|
1891
|
+
# normal atom format
|
1892
|
+
return xml_builder.feed("xmlns" => "http://purl.org/atom/ns#",
|
1893
|
+
"version" => version,
|
1894
|
+
"xml:lang" => language) do
|
1895
|
+
unless title.nil? || title == ""
|
1896
|
+
xml_builder.title(title,
|
1897
|
+
"mode" => "escaped",
|
1898
|
+
"type" => "text/html")
|
1899
|
+
end
|
1900
|
+
xml_builder.author do
|
1901
|
+
unless self.author.nil? || self.author.name.nil?
|
1902
|
+
xml_builder.name(self.author.name)
|
1903
|
+
else
|
1904
|
+
xml_builder.name("n/a")
|
1905
|
+
end
|
1906
|
+
unless self.author.nil? || self.author.email.nil?
|
1907
|
+
xml_builder.email(self.author.email)
|
1908
|
+
end
|
1909
|
+
unless self.author.nil? || self.author.url.nil?
|
1910
|
+
xml_builder.url(self.author.url)
|
1911
|
+
end
|
1912
|
+
end
|
1913
|
+
unless link.nil? || link == ""
|
1914
|
+
xml_builder.link("href" => link,
|
1915
|
+
"rel" => "alternate",
|
1916
|
+
"type" => "text/html",
|
1917
|
+
"title" => title)
|
1918
|
+
end
|
1919
|
+
unless description.nil? || description == ""
|
1920
|
+
xml_builder.tagline(description,
|
1921
|
+
"mode" => "escaped",
|
1922
|
+
"type" => "text/html")
|
1923
|
+
end
|
1924
|
+
xml_builder.generator("FeedTools",
|
1925
|
+
"url" => "http://www.sporkmonger.com/projects/feedtools")
|
1926
|
+
build_xml_hook(feed_type, version, xml_builder)
|
1927
|
+
unless items.nil?
|
1928
|
+
for item in items
|
1929
|
+
item.build_xml(feed_type, version, xml_builder)
|
1930
|
+
end
|
1931
|
+
end
|
1932
|
+
end
|
1933
|
+
elsif feed_type == "atom" && version == 1.0
|
1934
|
+
# normal atom format
|
1935
|
+
return xml_builder.feed("xmlns" => "http://www.w3.org/2005/Atom",
|
1936
|
+
"xml:lang" => language) do
|
1937
|
+
unless title.nil? || title == ""
|
1938
|
+
xml_builder.title(title,
|
1939
|
+
"type" => "html")
|
1940
|
+
end
|
1941
|
+
xml_builder.author do
|
1942
|
+
unless self.author.nil? || self.author.name.nil?
|
1943
|
+
xml_builder.name(self.author.name)
|
1944
|
+
else
|
1945
|
+
xml_builder.name("n/a")
|
1946
|
+
end
|
1947
|
+
unless self.author.nil? || self.author.email.nil?
|
1948
|
+
xml_builder.email(self.author.email)
|
1949
|
+
end
|
1950
|
+
unless self.author.nil? || self.author.url.nil?
|
1951
|
+
xml_builder.url(self.author.url)
|
1952
|
+
end
|
1953
|
+
end
|
1954
|
+
unless self.url.nil? || self.url == ""
|
1955
|
+
xml_builder.link("href" => self.url,
|
1956
|
+
"rel" => "self",
|
1957
|
+
"type" => "application/atom+xml")
|
1958
|
+
end
|
1959
|
+
unless self.link.nil? || self.link == ""
|
1960
|
+
xml_builder.link("href" => self.link,
|
1961
|
+
"rel" => "alternate",
|
1962
|
+
"type" => "text/html",
|
1963
|
+
"title" => self.title)
|
1964
|
+
end
|
1965
|
+
unless description.nil? || description == ""
|
1966
|
+
xml_builder.subtitle(description,
|
1967
|
+
"type" => "html")
|
1968
|
+
else
|
1969
|
+
xml_builder.subtitle(FeedTools.no_content_string,
|
1970
|
+
"type" => "html")
|
1971
|
+
end
|
1972
|
+
if self.updated != nil
|
1973
|
+
xml_builder.updated(self.updated.iso8601)
|
1974
|
+
elsif self.time != nil
|
1975
|
+
# Not technically correct, but a heck of a lot better
|
1976
|
+
# than the Time.now fall-back.
|
1977
|
+
xml_builder.updated(self.time.iso8601)
|
1978
|
+
else
|
1979
|
+
xml_builder.updated(Time.now.iso8601)
|
1980
|
+
end
|
1981
|
+
unless self.published.nil?
|
1982
|
+
xml_builder.published(self.published.iso8601)
|
1983
|
+
end
|
1984
|
+
xml_builder.generator("FeedTools - " +
|
1985
|
+
"http://www.sporkmonger.com/projects/feedtools")
|
1986
|
+
if self.id != nil
|
1987
|
+
unless FeedTools.is_uri? self.id
|
1988
|
+
if self.link != nil
|
1989
|
+
xml_builder.id(FeedTools.build_urn_uri(self.link))
|
1990
|
+
else
|
1991
|
+
raise "The unique id must be a valid URI."
|
1992
|
+
end
|
1993
|
+
else
|
1994
|
+
xml_builder.id(self.id)
|
1995
|
+
end
|
1996
|
+
elsif self.link != nil
|
1997
|
+
xml_builder.id(FeedTools.build_urn_uri(self.link))
|
1998
|
+
else
|
1999
|
+
raise "Cannot build feed, missing feed unique id."
|
2000
|
+
end
|
2001
|
+
build_xml_hook(feed_type, version, xml_builder)
|
2002
|
+
unless items.nil?
|
2003
|
+
for item in items
|
2004
|
+
item.build_xml(feed_type, version, xml_builder)
|
2005
|
+
end
|
2006
|
+
end
|
2007
|
+
end
|
2008
|
+
end
|
2009
|
+
end
|
2010
|
+
|
2011
|
+
# Persists the current feed state to the cache.
|
2012
|
+
def save
|
2013
|
+
if FeedTools.feed_cache.nil?
|
2014
|
+
raise "Caching is currently disabled. Cannot save to cache."
|
2015
|
+
elsif self.url.nil?
|
2016
|
+
raise "The url field must be set to save to the cache."
|
2017
|
+
elsif self.xml_data.nil?
|
2018
|
+
raise "The xml_data field must be set to save to the cache."
|
2019
|
+
elsif self.cache_object.nil?
|
2020
|
+
raise "The cache_object is currently nil. Cannot save to cache."
|
2021
|
+
else
|
2022
|
+
self.cache_object.url = self.url
|
2023
|
+
self.cache_object.title = self.title
|
2024
|
+
self.cache_object.link = self.link
|
2025
|
+
self.cache_object.xml_data = self.xml_data
|
2026
|
+
unless self.http_response.nil?
|
2027
|
+
self.cache_object.http_headers = self.http_headers.to_yaml
|
2028
|
+
end
|
2029
|
+
self.cache_object.last_retrieved = self.last_retrieved
|
2030
|
+
self.cache_object.save
|
2031
|
+
end
|
2032
|
+
end
|
2033
|
+
|
2034
|
+
alias_method :tagline, :description
|
2035
|
+
alias_method :tagline=, :description=
|
2036
|
+
alias_method :subtitle, :description
|
2037
|
+
alias_method :subtitle=, :description=
|
2038
|
+
alias_method :abstract, :description
|
2039
|
+
alias_method :abstract=, :description=
|
2040
|
+
alias_method :content, :description
|
2041
|
+
alias_method :content=, :description=
|
2042
|
+
alias_method :ttl, :time_to_live
|
2043
|
+
alias_method :ttl=, :time_to_live=
|
2044
|
+
alias_method :guid, :id
|
2045
|
+
alias_method :guid=, :id=
|
2046
|
+
alias_method :entries, :items
|
2047
|
+
|
2048
|
+
# passes missing methods to the cache_object
|
2049
|
+
def method_missing(msg, *params)
|
2050
|
+
if self.cache_object.nil?
|
2051
|
+
raise NoMethodError, "Invalid method #{msg.to_s}"
|
2052
|
+
end
|
2053
|
+
return self.cache_object.send(msg, params)
|
2054
|
+
end
|
2055
|
+
|
2056
|
+
# passes missing methods to the FeedTools.feed_cache
|
2057
|
+
def Feed.method_missing(msg, *params)
|
2058
|
+
if FeedTools.feed_cache.nil?
|
2059
|
+
raise NoMethodError, "Invalid method Feed.#{msg.to_s}"
|
2060
|
+
end
|
2061
|
+
result = FeedTools.feed_cache.send(msg, params)
|
2062
|
+
if result.kind_of? FeedTools.feed_cache
|
2063
|
+
result = Feed.open(result.url)
|
2064
|
+
end
|
2065
|
+
return result
|
2066
|
+
end
|
2067
|
+
|
2068
|
+
# Returns a simple representation of the feed object's state.
|
2069
|
+
def inspect
|
2070
|
+
return "#<FeedTools::Feed:0x#{self.object_id.to_s(16)} URL:#{self.url}>"
|
2071
|
+
end
|
2072
|
+
end
|
2073
|
+
end
|