feedtools 0.2.22 → 0.2.23
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +28 -0
- data/README +23 -2
- data/db/migration.rb +19 -0
- data/db/schema.mysql.sql +1 -1
- data/db/schema.postgresql.sql +1 -1
- data/db/schema.sqlite.sql +1 -1
- data/lib/feed_tools.rb +71 -388
- data/lib/feed_tools/database_feed_cache.rb +4 -3
- data/lib/feed_tools/feed.rb +809 -607
- data/lib/feed_tools/feed_item.rb +551 -574
- data/lib/feed_tools/feed_structures.rb +252 -0
- data/lib/feed_tools/helpers/feed_tools_helper.rb +6 -5
- data/lib/feed_tools/helpers/generic_helper.rb +16 -158
- data/lib/feed_tools/helpers/html_helper.rb +629 -0
- data/lib/feed_tools/helpers/retrieval_helper.rb +5 -0
- data/lib/feed_tools/helpers/uri_helper.rb +223 -0
- data/lib/feed_tools/helpers/xml_helper.rb +239 -0
- data/rakefile +10 -237
- data/test/unit/amp_test.rb +102 -94
- data/test/unit/atom_test.rb +239 -6
- data/test/unit/cache_test.rb +1 -1
- data/test/unit/encoding_test.rb +5 -5
- data/test/unit/generation_test.rb +34 -1
- data/test/unit/helper_test.rb +111 -17
- data/test/unit/rss_test.rb +21 -2
- metadata +7 -3
- data/lib/feed_tools/helpers/module_helper.rb +0 -27
@@ -31,7 +31,7 @@ module FeedTools
|
|
31
31
|
# The default caching mechanism for the FeedTools module
|
32
32
|
class DatabaseFeedCache < ActiveRecord::Base
|
33
33
|
# Overrides the default table name to use the "feeds" table.
|
34
|
-
|
34
|
+
set_table_name("cached_feeds")
|
35
35
|
|
36
36
|
# If ActiveRecord is not already connected, attempts to find a configuration file and use
|
37
37
|
# it to open a connection for ActiveRecord.
|
@@ -53,7 +53,8 @@ module FeedTools
|
|
53
53
|
"./config/database.yml",
|
54
54
|
"../config/database.yml",
|
55
55
|
"./database.yml",
|
56
|
-
"../database.yml"
|
56
|
+
"../database.yml",
|
57
|
+
"../../database.yml"
|
57
58
|
]
|
58
59
|
database_config_file = nil
|
59
60
|
for file in possible_config_files
|
@@ -108,7 +109,7 @@ module FeedTools
|
|
108
109
|
# True if the appropriate database table already exists
|
109
110
|
def DatabaseFeedCache.table_exists?
|
110
111
|
begin
|
111
|
-
ActiveRecord::Base.connection.execute "select id,
|
112
|
+
ActiveRecord::Base.connection.execute "select id, href, title, " +
|
112
113
|
"link, feed_data, feed_data_type, http_headers, last_retrieved " +
|
113
114
|
"from #{self.table_name()} limit 1"
|
114
115
|
rescue ActiveRecord::StatementInvalid
|
data/lib/feed_tools/feed.rb
CHANGED
@@ -21,124 +21,27 @@
|
|
21
21
|
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
22
|
#++
|
23
23
|
|
24
|
+
require 'rexml/document'
|
25
|
+
require 'feed_tools/feed_item'
|
26
|
+
require 'feed_tools/feed_structures'
|
24
27
|
require 'feed_tools/helpers/generic_helper'
|
28
|
+
require 'feed_tools/helpers/xml_helper'
|
29
|
+
require 'feed_tools/helpers/html_helper'
|
25
30
|
|
26
31
|
module FeedTools
|
27
32
|
# The <tt>FeedTools::Feed</tt> class represents a web feed's structure.
|
28
33
|
class Feed
|
29
|
-
# :stopdoc:
|
30
|
-
include REXML
|
31
|
-
class << self
|
32
|
-
include FeedTools::GenericHelper
|
33
|
-
private :validate_options
|
34
|
-
end
|
35
|
-
include FeedTools::GenericHelper
|
36
|
-
private :validate_options
|
37
|
-
# :startdoc:
|
38
|
-
|
39
|
-
# Represents a feed/feed item's category
|
40
|
-
class Category
|
41
|
-
|
42
|
-
# The category term value
|
43
|
-
attr_accessor :term
|
44
|
-
# The categorization scheme
|
45
|
-
attr_accessor :scheme
|
46
|
-
# A human-readable description of the category
|
47
|
-
attr_accessor :label
|
48
|
-
|
49
|
-
alias_method :value, :term
|
50
|
-
alias_method :category, :term
|
51
|
-
alias_method :domain, :scheme
|
52
|
-
end
|
53
|
-
|
54
|
-
# Represents a feed/feed item's author
|
55
|
-
class Author
|
56
|
-
|
57
|
-
# The author's real name
|
58
|
-
attr_accessor :name
|
59
|
-
# The author's email address
|
60
|
-
attr_accessor :email
|
61
|
-
# The url of the author's homepage
|
62
|
-
attr_accessor :url
|
63
|
-
# The raw value of the author tag if present
|
64
|
-
attr_accessor :raw
|
65
|
-
end
|
66
|
-
|
67
|
-
# Represents a feed's image
|
68
|
-
class Image
|
69
|
-
|
70
|
-
# The image's title
|
71
|
-
attr_accessor :title
|
72
|
-
# The image's description
|
73
|
-
attr_accessor :description
|
74
|
-
# The image's url
|
75
|
-
attr_accessor :url
|
76
|
-
# The url to link the image to
|
77
|
-
attr_accessor :link
|
78
|
-
# The width of the image
|
79
|
-
attr_accessor :width
|
80
|
-
# The height of the image
|
81
|
-
attr_accessor :height
|
82
|
-
# The style of the image
|
83
|
-
# Possible values are "icon", "image", or "image-wide"
|
84
|
-
attr_accessor :style
|
85
|
-
end
|
86
|
-
|
87
|
-
# Represents a feed's text input element.
|
88
|
-
# Be aware that this will be ignored for feed generation. It's a
|
89
|
-
# pointless element that aggregators usually ignore and it doesn't have an
|
90
|
-
# equivalent in all feeds types.
|
91
|
-
class TextInput
|
92
|
-
|
93
|
-
# The label of the Submit button in the text input area.
|
94
|
-
attr_accessor :title
|
95
|
-
# The description explains the text input area.
|
96
|
-
attr_accessor :description
|
97
|
-
# The URL of the CGI script that processes text input requests.
|
98
|
-
attr_accessor :link
|
99
|
-
# The name of the text object in the text input area.
|
100
|
-
attr_accessor :name
|
101
|
-
end
|
102
|
-
|
103
|
-
# Represents a feed's cloud.
|
104
|
-
# Be aware that this will be ignored for feed generation.
|
105
|
-
class Cloud
|
106
|
-
|
107
|
-
# The domain of the cloud.
|
108
|
-
attr_accessor :domain
|
109
|
-
# The path for the cloud.
|
110
|
-
attr_accessor :path
|
111
|
-
# The port the cloud is listening on.
|
112
|
-
attr_accessor :port
|
113
|
-
# The web services protocol the cloud uses.
|
114
|
-
# Possible values are either "xml-rpc" or "soap".
|
115
|
-
attr_accessor :protocol
|
116
|
-
# The procedure to use to request notification.
|
117
|
-
attr_accessor :register_procedure
|
118
|
-
end
|
119
|
-
|
120
|
-
# Represents a simple hyperlink
|
121
|
-
class Link
|
122
|
-
|
123
|
-
# The url that is being linked to
|
124
|
-
attr_accessor :url
|
125
|
-
# The content of the hyperlink
|
126
|
-
attr_accessor :value
|
127
|
-
|
128
|
-
alias_method :href, :url
|
129
|
-
end
|
130
|
-
|
131
34
|
# Initialize the feed object
|
132
35
|
def initialize
|
133
36
|
super
|
134
37
|
@cache_object = nil
|
135
38
|
@http_headers = nil
|
136
|
-
@
|
39
|
+
@xml_document = nil
|
137
40
|
@feed_data = nil
|
138
41
|
@feed_data_type = :xml
|
139
42
|
@root_node = nil
|
140
43
|
@channel_node = nil
|
141
|
-
@
|
44
|
+
@href = nil
|
142
45
|
@id = nil
|
143
46
|
@title = nil
|
144
47
|
@description = nil
|
@@ -147,6 +50,7 @@ module FeedTools
|
|
147
50
|
@time_to_live = nil
|
148
51
|
@entries = nil
|
149
52
|
@live = false
|
53
|
+
@encoding = nil
|
150
54
|
end
|
151
55
|
|
152
56
|
# Loads the feed specified by the url, pulling the data from the
|
@@ -155,7 +59,7 @@ module FeedTools
|
|
155
59
|
# * <tt>:cache_only</tt> - If set to true, the feed will only be
|
156
60
|
# pulled from the cache.
|
157
61
|
def Feed.open(url, options={})
|
158
|
-
validate_options([ :cache_only ],
|
62
|
+
FeedTools::GenericHelper.validate_options([ :cache_only ],
|
159
63
|
options.keys)
|
160
64
|
options = { :cache_only => false }.merge(options)
|
161
65
|
|
@@ -165,17 +69,17 @@ module FeedTools
|
|
165
69
|
end
|
166
70
|
|
167
71
|
# clean up the url
|
168
|
-
url = FeedTools.normalize_url(url)
|
72
|
+
url = FeedTools::UriHelper.normalize_url(url)
|
169
73
|
|
170
74
|
# create and load the new feed
|
171
75
|
feed = FeedTools::Feed.new
|
172
|
-
feed.
|
76
|
+
feed.href = url
|
173
77
|
feed.update! unless options[:cache_only]
|
174
78
|
return feed
|
175
79
|
end
|
176
80
|
|
177
|
-
# Loads the feed from the remote url if the feed has expired from the
|
178
|
-
# retrieved from the cache for some reason.
|
81
|
+
# Loads the feed from the remote url if the feed has expired from the
|
82
|
+
# cache or cannot be retrieved from the cache for some reason.
|
179
83
|
def update!
|
180
84
|
if !FeedTools.feed_cache.nil? &&
|
181
85
|
!FeedTools.feed_cache.set_up_correctly?
|
@@ -193,6 +97,32 @@ module FeedTools
|
|
193
97
|
@live = false
|
194
98
|
else
|
195
99
|
load_remote_feed!
|
100
|
+
|
101
|
+
# Handle autodiscovery
|
102
|
+
if self.http_headers['content-type'] =~ /text\/html/ ||
|
103
|
+
self.http_headers['content-type'] =~ /application\/xhtml\+xml/
|
104
|
+
|
105
|
+
autodiscovered_url = nil
|
106
|
+
autodiscovered_url =
|
107
|
+
FeedTools::HtmlHelper.extract_link_by_mime_type(self.feed_data,
|
108
|
+
"application/atom+xml")
|
109
|
+
if autodiscovered_url.nil?
|
110
|
+
autodiscovered_url =
|
111
|
+
FeedTools::HtmlHelper.extract_link_by_mime_type(self.feed_data,
|
112
|
+
"application/rss+xml")
|
113
|
+
end
|
114
|
+
if autodiscovered_url.nil?
|
115
|
+
autodiscovered_url =
|
116
|
+
FeedTools::HtmlHelper.extract_link_by_mime_type(self.feed_data,
|
117
|
+
"application/rdf+xml")
|
118
|
+
end
|
119
|
+
unless autodiscovered_url.nil?
|
120
|
+
self.feed_data = nil
|
121
|
+
self.href = autodiscovered_url
|
122
|
+
self.expire! unless self.cache_object.nil?
|
123
|
+
self.update!
|
124
|
+
end
|
125
|
+
end
|
196
126
|
end
|
197
127
|
end
|
198
128
|
|
@@ -207,20 +137,20 @@ module FeedTools
|
|
207
137
|
@http_headers = YAML.load(self.cache_object.http_headers)
|
208
138
|
end
|
209
139
|
|
210
|
-
if (self.
|
140
|
+
if (self.href =~ /^feed:/) == 0
|
211
141
|
# Woah, Nelly, how'd that happen? You should've already been
|
212
142
|
# corrected. So let's fix that url. And please,
|
213
143
|
# just use less crappy browsers instead of badly defined
|
214
144
|
# pseudo-protocol hacks.
|
215
|
-
self.
|
145
|
+
self.href = FeedTools::UriHelper.normalize_url(self.href)
|
216
146
|
end
|
217
147
|
|
218
148
|
# Find out what method we're going to be using to obtain this feed.
|
219
149
|
begin
|
220
|
-
uri = URI.parse(self.
|
150
|
+
uri = URI.parse(self.href)
|
221
151
|
rescue URI::InvalidURIError
|
222
152
|
raise FeedAccessError,
|
223
|
-
"Cannot retrieve feed using invalid URL: " + self.
|
153
|
+
"Cannot retrieve feed using invalid URL: " + self.href.to_s
|
224
154
|
end
|
225
155
|
retrieval_method = "http"
|
226
156
|
case uri.scheme
|
@@ -262,12 +192,15 @@ module FeedTools
|
|
262
192
|
feed_uri = URI.parse(feed_url)
|
263
193
|
rescue URI::InvalidURIError
|
264
194
|
# Uh, maybe try to fix it?
|
265
|
-
feed_uri = URI.parse(FeedTools.normalize_url(feed_url))
|
195
|
+
feed_uri = URI.parse(FeedTools::UriHelper.normalize_url(feed_url))
|
266
196
|
end
|
267
197
|
|
268
198
|
begin
|
269
|
-
|
270
|
-
|
199
|
+
proxy_address = (FeedTools.configurations[:proxy_address] || nil)
|
200
|
+
proxy_port = (FeedTools.configurations[:proxy_port].to_i || nil)
|
201
|
+
|
202
|
+
http = Net::HTTP::Proxy(proxy_address, proxy_port).new(
|
203
|
+
feed_uri.host, (feed_uri.port or 80))
|
271
204
|
http.start do
|
272
205
|
final_uri = feed_uri.path
|
273
206
|
final_uri += ('?' + feed_uri.query) if feed_uri.query
|
@@ -282,7 +215,7 @@ module FeedTools
|
|
282
215
|
if redirected_response.last.code.to_i == 301
|
283
216
|
# Reset the cache object or we may get duplicate entries
|
284
217
|
self.cache_object = nil
|
285
|
-
self.
|
218
|
+
self.href = redirected_response.last['location']
|
286
219
|
else
|
287
220
|
# Jump out as soon as we hit anything that isn't a
|
288
221
|
# permanently moved redirection.
|
@@ -316,7 +249,7 @@ module FeedTools
|
|
316
249
|
if !cached_feed.expired? &&
|
317
250
|
!cached_feed.http_headers.blank?
|
318
251
|
# Copy the cached state
|
319
|
-
self.
|
252
|
+
self.href = cached_feed.href
|
320
253
|
|
321
254
|
@feed_data = cached_feed.feed_data
|
322
255
|
@feed_data_type = cached_feed.feed_data_type
|
@@ -372,7 +305,7 @@ module FeedTools
|
|
372
305
|
|
373
306
|
begin
|
374
307
|
begin
|
375
|
-
@http_response = http_fetch.call(self.
|
308
|
+
@http_response = http_fetch.call(self.href, headers, 10, [], false)
|
376
309
|
rescue => error
|
377
310
|
if error.respond_to?(:response)
|
378
311
|
# You might not believe this, but...
|
@@ -385,8 +318,8 @@ module FeedTools
|
|
385
318
|
# we get to blame other people's bad software and/or bad
|
386
319
|
# configuration files.
|
387
320
|
if error.response.code.to_i == 404 &&
|
388
|
-
FeedTools.user_agent != nil
|
389
|
-
@http_response = http_fetch.call(self.
|
321
|
+
FeedTools.configurations[:user_agent] != nil
|
322
|
+
@http_response = http_fetch.call(self.href, {}, 10, [], true)
|
390
323
|
if @http_response != nil && @http_response.code.to_i == 200
|
391
324
|
warn("The server appears to be blocking based on the " +
|
392
325
|
"User-Agent header. This is stupid, and you should " +
|
@@ -464,14 +397,18 @@ module FeedTools
|
|
464
397
|
# Not supported... yet
|
465
398
|
elsif retrieval_method == "ftp"
|
466
399
|
# Not supported... yet
|
467
|
-
# Technically, CDF feeds are supposed to be able to be accessed
|
468
|
-
# from an ftp server. This is silly, but we'll humor
|
400
|
+
# Technically, CDF feeds are supposed to be able to be accessed
|
401
|
+
# directly from an ftp server. This is silly, but we'll humor
|
402
|
+
# Microsoft.
|
469
403
|
#
|
470
|
-
# Eventually.
|
404
|
+
# Eventually. If they're lucky. And someone demands it.
|
471
405
|
elsif retrieval_method == "file"
|
472
406
|
# Now that we've gone to all that trouble to ensure the url begins
|
473
407
|
# with 'file://', strip the 'file://' off the front of the url.
|
474
|
-
file_name = self.
|
408
|
+
file_name = self.href.gsub(/^file:\/\//, "")
|
409
|
+
if RUBY_PLATFORM =~ /mswin/
|
410
|
+
file_name = file_name[1..-1] if file_name[1..1] == "/"
|
411
|
+
end
|
475
412
|
begin
|
476
413
|
open(file_name) do |file|
|
477
414
|
@http_response = nil
|
@@ -520,7 +457,7 @@ module FeedTools
|
|
520
457
|
unless self.http_headers.blank?
|
521
458
|
@encoding = "utf-8"
|
522
459
|
else
|
523
|
-
@encoding = self.
|
460
|
+
@encoding = self.encoding_from_feed_data
|
524
461
|
end
|
525
462
|
end
|
526
463
|
return @encoding
|
@@ -528,8 +465,8 @@ module FeedTools
|
|
528
465
|
|
529
466
|
# Returns the encoding of feed calculated only from the xml data.
|
530
467
|
# I.e., the encoding we would come up with if we ignore RFC 3023.
|
531
|
-
def
|
532
|
-
if @
|
468
|
+
def encoding_from_feed_data
|
469
|
+
if @encoding_from_feed_data.nil?
|
533
470
|
raw_data = self.feed_data
|
534
471
|
encoding_from_xml_instruct =
|
535
472
|
raw_data.scan(
|
@@ -539,7 +476,7 @@ module FeedTools
|
|
539
476
|
encoding_from_xml_instruct.downcase!
|
540
477
|
end
|
541
478
|
if encoding_from_xml_instruct.blank?
|
542
|
-
doc = Document.new(raw_data)
|
479
|
+
doc = REXML::Document.new(raw_data)
|
543
480
|
encoding_from_xml_instruct = doc.encoding.downcase
|
544
481
|
if encoding_from_xml_instruct == "utf-8"
|
545
482
|
# REXML has a tendency to report utf-8 overzealously, take with
|
@@ -547,7 +484,7 @@ module FeedTools
|
|
547
484
|
encoding_from_xml_instruct = nil
|
548
485
|
end
|
549
486
|
else
|
550
|
-
@
|
487
|
+
@encoding_from_feed_data = encoding_from_xml_instruct
|
551
488
|
end
|
552
489
|
if encoding_from_xml_instruct.blank?
|
553
490
|
sniff_table = {
|
@@ -556,17 +493,17 @@ module FeedTools
|
|
556
493
|
}
|
557
494
|
sniff = self.feed_data[0..3]
|
558
495
|
if sniff_table[sniff] != nil
|
559
|
-
@
|
496
|
+
@encoding_from_feed_data = sniff_table[sniff].downcase
|
560
497
|
end
|
561
498
|
else
|
562
|
-
@
|
499
|
+
@encoding_from_feed_data = encoding_from_xml_instruct
|
563
500
|
end
|
564
|
-
if @
|
501
|
+
if @encoding_from_feed_data.blank?
|
565
502
|
# Safest assumption
|
566
|
-
@
|
503
|
+
@encoding_from_feed_data = "utf-8"
|
567
504
|
end
|
568
505
|
end
|
569
|
-
return @
|
506
|
+
return @encoding_from_feed_data
|
570
507
|
end
|
571
508
|
|
572
509
|
# Returns the feed's raw data.
|
@@ -581,11 +518,10 @@ module FeedTools
|
|
581
518
|
|
582
519
|
# Sets the feed's data.
|
583
520
|
def feed_data=(new_feed_data)
|
521
|
+
for var in self.instance_variables
|
522
|
+
self.instance_variable_set(var, nil)
|
523
|
+
end
|
584
524
|
@http_headers = {}
|
585
|
-
@cache_object = nil
|
586
|
-
@url = nil
|
587
|
-
@id = nil
|
588
|
-
@encoding = nil
|
589
525
|
@feed_data = new_feed_data
|
590
526
|
unless self.cache_object.nil?
|
591
527
|
self.cache_object.feed_data = new_feed_data
|
@@ -637,25 +573,25 @@ module FeedTools
|
|
637
573
|
end
|
638
574
|
|
639
575
|
# Returns a REXML Document of the feed_data
|
640
|
-
def
|
576
|
+
def xml_document
|
641
577
|
if self.feed_data_type != :xml
|
642
|
-
@
|
578
|
+
@xml_document = nil
|
643
579
|
else
|
644
|
-
if @
|
580
|
+
if @xml_document.nil?
|
645
581
|
begin
|
646
582
|
begin
|
647
|
-
@
|
648
|
-
:ignore_whitespace_nodes => :all)
|
583
|
+
@xml_document = REXML::Document.new(self.feed_data_utf_8)
|
649
584
|
rescue Object
|
650
585
|
# Something failed, attempt to repair the xml with htree.
|
651
|
-
@
|
586
|
+
@xml_document = HTree.parse(self.feed_data_utf_8).to_rexml
|
652
587
|
end
|
653
588
|
rescue Object
|
654
|
-
@
|
589
|
+
@xml_document = nil
|
590
|
+
raise
|
655
591
|
end
|
656
592
|
end
|
657
593
|
end
|
658
|
-
return @
|
594
|
+
return @xml_document
|
659
595
|
end
|
660
596
|
|
661
597
|
# Returns the first node within the channel_node that matches the xpath
|
@@ -664,7 +600,7 @@ module FeedTools
|
|
664
600
|
if self.feed_data_type != :xml
|
665
601
|
raise "The feed data type is not xml."
|
666
602
|
end
|
667
|
-
return try_xpaths(self.channel_node, [xpath],
|
603
|
+
return FeedTools::XmlHelper.try_xpaths(self.channel_node, [xpath],
|
668
604
|
:select_result_value => select_result_value)
|
669
605
|
end
|
670
606
|
|
@@ -673,7 +609,7 @@ module FeedTools
|
|
673
609
|
if self.feed_data_type != :xml
|
674
610
|
raise "The feed data type is not xml."
|
675
611
|
end
|
676
|
-
return try_xpaths_all(self.channel_node, [xpath],
|
612
|
+
return FeedTools::XmlHelper.try_xpaths_all(self.channel_node, [xpath],
|
677
613
|
:select_result_value => select_result_value)
|
678
614
|
end
|
679
615
|
|
@@ -685,10 +621,10 @@ module FeedTools
|
|
685
621
|
# E.g.: http://smogzer.tripod.com/smog.rdf
|
686
622
|
# ===================================================================
|
687
623
|
begin
|
688
|
-
if
|
624
|
+
if self.xml_document.nil?
|
689
625
|
return nil
|
690
626
|
else
|
691
|
-
@root_node =
|
627
|
+
@root_node = self.xml_document.root
|
692
628
|
end
|
693
629
|
rescue
|
694
630
|
return nil
|
@@ -699,14 +635,14 @@ module FeedTools
|
|
699
635
|
|
700
636
|
# Returns the channel node of the feed.
|
701
637
|
def channel_node
|
702
|
-
if @channel_node.nil? && root_node != nil
|
703
|
-
@channel_node = try_xpaths(root_node, [
|
638
|
+
if @channel_node.nil? && self.root_node != nil
|
639
|
+
@channel_node = FeedTools::XmlHelper.try_xpaths(self.root_node, [
|
704
640
|
"channel",
|
705
641
|
"CHANNEL",
|
706
642
|
"feedinfo"
|
707
643
|
])
|
708
644
|
if @channel_node == nil
|
709
|
-
@channel_node = root_node
|
645
|
+
@channel_node = self.root_node
|
710
646
|
end
|
711
647
|
end
|
712
648
|
return @channel_node
|
@@ -714,14 +650,19 @@ module FeedTools
|
|
714
650
|
|
715
651
|
# The cache object that handles the feed persistence.
|
716
652
|
def cache_object
|
717
|
-
if !@
|
653
|
+
if !@href.nil? && @href =~ /^file:\/\//
|
718
654
|
return nil
|
719
655
|
end
|
720
656
|
unless FeedTools.feed_cache.nil?
|
721
657
|
if @cache_object.nil?
|
722
658
|
begin
|
723
|
-
if @
|
724
|
-
|
659
|
+
if @href != nil
|
660
|
+
begin
|
661
|
+
@cache_object = FeedTools.feed_cache.find_by_href(@href)
|
662
|
+
rescue
|
663
|
+
warn("The feed cache seems to be having trouble with the " +
|
664
|
+
"find_by_href method. This may cause unexpected results.")
|
665
|
+
end
|
725
666
|
end
|
726
667
|
if @cache_object.nil?
|
727
668
|
@cache_object = FeedTools.feed_cache.new
|
@@ -736,8 +677,8 @@ module FeedTools
|
|
736
677
|
# Sets the cache object for this feed.
|
737
678
|
#
|
738
679
|
# This can be any object, but it must accept the following messages:
|
739
|
-
#
|
740
|
-
#
|
680
|
+
# href
|
681
|
+
# href=
|
741
682
|
# title
|
742
683
|
# title=
|
743
684
|
# link
|
@@ -773,7 +714,11 @@ module FeedTools
|
|
773
714
|
when "rss"
|
774
715
|
@feed_type = "rss"
|
775
716
|
when "channel"
|
776
|
-
|
717
|
+
if self.root_node.namespace == FEED_TOOLS_NAMESPACES['rss11']
|
718
|
+
@feed_type = "rss"
|
719
|
+
else
|
720
|
+
@feed_type = "cdf"
|
721
|
+
end
|
777
722
|
end
|
778
723
|
end
|
779
724
|
return @feed_type
|
@@ -794,26 +739,33 @@ module FeedTools
|
|
794
739
|
end
|
795
740
|
version = nil
|
796
741
|
begin
|
797
|
-
|
742
|
+
version_string = FeedTools::XmlHelper.try_xpaths(self.root_node, [
|
743
|
+
"@version"
|
744
|
+
], :select_result_value => true)
|
745
|
+
unless version_string.nil?
|
746
|
+
version = version_string.to_f
|
747
|
+
end
|
798
748
|
rescue
|
799
749
|
end
|
800
750
|
version = nil if version == 0.0
|
801
|
-
default_namespace =
|
751
|
+
default_namespace = FeedTools::XmlHelper.try_xpaths(self.root_node, [
|
752
|
+
"@xmlns"
|
753
|
+
], :select_result_value => true)
|
802
754
|
case self.feed_type
|
803
755
|
when "atom"
|
804
|
-
if default_namespace ==
|
756
|
+
if default_namespace == FEED_TOOLS_NAMESPACES['atom10']
|
805
757
|
@feed_version = 1.0
|
806
758
|
elsif version != nil
|
807
759
|
@feed_version = version
|
808
|
-
elsif default_namespace ==
|
760
|
+
elsif default_namespace == FEED_TOOLS_NAMESPACES['atom03']
|
809
761
|
@feed_version = 0.3
|
810
762
|
end
|
811
763
|
when "rss"
|
812
|
-
if default_namespace ==
|
764
|
+
if default_namespace == FEED_TOOLS_NAMESPACES['rss09']
|
813
765
|
@feed_version = 0.9
|
814
|
-
elsif default_namespace ==
|
766
|
+
elsif default_namespace == FEED_TOOLS_NAMESPACES['rss10']
|
815
767
|
@feed_version = 1.0
|
816
|
-
elsif default_namespace ==
|
768
|
+
elsif default_namespace == FEED_TOOLS_NAMESPACES['rss11']
|
817
769
|
@feed_version = 1.1
|
818
770
|
elsif version != nil
|
819
771
|
case version
|
@@ -828,7 +780,7 @@ module FeedTools
|
|
828
780
|
when "cdf"
|
829
781
|
@feed_version = 0.4
|
830
782
|
when "!okay/news"
|
831
|
-
@feed_version =
|
783
|
+
@feed_version = 1.0
|
832
784
|
end
|
833
785
|
end
|
834
786
|
return @feed_version
|
@@ -842,15 +794,15 @@ module FeedTools
|
|
842
794
|
# Returns the feed's unique id
|
843
795
|
def id
|
844
796
|
if @id.nil?
|
845
|
-
@id = select_not_blank([
|
846
|
-
try_xpaths(self.channel_node, [
|
797
|
+
@id = FeedTools::XmlHelper.select_not_blank([
|
798
|
+
FeedTools::XmlHelper.try_xpaths(self.channel_node, [
|
847
799
|
"atom10:id/text()",
|
848
800
|
"atom03:id/text()",
|
849
801
|
"atom:id/text()",
|
850
802
|
"id/text()",
|
851
803
|
"guid/text()"
|
852
804
|
], :select_result_value => true),
|
853
|
-
try_xpaths(self.root_node, [
|
805
|
+
FeedTools::XmlHelper.try_xpaths(self.root_node, [
|
854
806
|
"atom10:id/text()",
|
855
807
|
"atom03:id/text()",
|
856
808
|
"atom:id/text()",
|
@@ -868,106 +820,114 @@ module FeedTools
|
|
868
820
|
end
|
869
821
|
|
870
822
|
# Returns the feed url.
|
871
|
-
def
|
872
|
-
|
873
|
-
|
874
|
-
|
875
|
-
|
876
|
-
|
877
|
-
|
878
|
-
|
879
|
-
if self.feed_data != nil
|
823
|
+
def href
|
824
|
+
if @href_overridden != true || @href.nil?
|
825
|
+
original_href = @href
|
826
|
+
|
827
|
+
override_href = lambda do |current_href|
|
828
|
+
begin
|
829
|
+
if current_href.nil? && self.feed_data != nil
|
830
|
+
# The current url is nil and we have feed data to go on
|
880
831
|
true
|
832
|
+
elsif current_href != nil && !(["http", "https"].include?(
|
833
|
+
URI.parse(current_href.to_s).scheme))
|
834
|
+
if self.feed_data != nil
|
835
|
+
# The current url is set, but isn't a http/https url and
|
836
|
+
# we have feed data to use to replace the current url with
|
837
|
+
true
|
838
|
+
else
|
839
|
+
# The current url is set, but isn't a http/https url but
|
840
|
+
# we don't have feed data to use to replace the current url
|
841
|
+
# with so we'll have to wait until we do
|
842
|
+
false
|
843
|
+
end
|
881
844
|
else
|
845
|
+
# The current url is set to an http/https url and there's
|
846
|
+
# no compelling reason to override it
|
882
847
|
false
|
883
848
|
end
|
884
|
-
|
885
|
-
|
849
|
+
rescue
|
850
|
+
# Something went wrong, so we should err on the side of caution
|
851
|
+
# and attempt to override the url
|
852
|
+
true
|
886
853
|
end
|
887
|
-
|
888
|
-
|
889
|
-
|
890
|
-
|
891
|
-
|
892
|
-
|
893
|
-
|
894
|
-
|
895
|
-
|
896
|
-
|
897
|
-
|
898
|
-
|
899
|
-
|
900
|
-
|
901
|
-
|
902
|
-
|
903
|
-
|
904
|
-
|
905
|
-
|
906
|
-
|
907
|
-
|
908
|
-
|
909
|
-
|
910
|
-
|
911
|
-
|
912
|
-
|
913
|
-
|
914
|
-
|
915
|
-
|
916
|
-
|
917
|
-
|
918
|
-
|
854
|
+
end
|
855
|
+
if override_href.call(@href) && self.feed_data != nil
|
856
|
+
# rdf:about is ordered last because a lot of people put the url to
|
857
|
+
# the feed inside it instead of a link to their blog.
|
858
|
+
# Ordering it last gives them as many chances as humanly possible
|
859
|
+
# for them to redeem themselves. If the link turns out to be the
|
860
|
+
# same as the blog link, it will be reset to the original value.
|
861
|
+
for link_object in self.links
|
862
|
+
if link_object.rel == 'self'
|
863
|
+
if link_object.href != self.link
|
864
|
+
@href = link_object.href
|
865
|
+
@href_overridden = true
|
866
|
+
return @href
|
867
|
+
end
|
868
|
+
end
|
869
|
+
end
|
870
|
+
@href = FeedTools::XmlHelper.try_xpaths(self.channel_node, [
|
871
|
+
"admin:feed/@rdf:resource",
|
872
|
+
"admin:feed/@resource",
|
873
|
+
"feed/@rdf:resource",
|
874
|
+
"feed/@resource",
|
875
|
+
"@rdf:about",
|
876
|
+
"@about"
|
877
|
+
], :select_result_value => true) do |result|
|
878
|
+
override_href.call(FeedTools::UriHelper.normalize_url(result))
|
879
|
+
end
|
880
|
+
begin
|
881
|
+
if !(@href =~ /^file:/) &&
|
882
|
+
!FeedTools::UriHelper.is_uri?(@href)
|
883
|
+
@href = FeedTools::UriHelper.resolve_relative_uri(
|
884
|
+
@href, [self.base_uri])
|
885
|
+
end
|
886
|
+
rescue
|
887
|
+
end
|
888
|
+
if FeedTools.configurations[:url_normalization_enabled]
|
889
|
+
@href = FeedTools::UriHelper.normalize_url(@href)
|
890
|
+
end
|
891
|
+
@href.strip! unless @href.nil?
|
892
|
+
@href = nil if @href.blank?
|
893
|
+
@href_overridden = true
|
894
|
+
if @href == nil
|
895
|
+
@href = original_href
|
896
|
+
@href_overridden = false
|
897
|
+
end
|
898
|
+
if @href == self.link
|
899
|
+
@href = original_href
|
900
|
+
@href_overridden = false
|
901
|
+
end
|
902
|
+
end
|
903
|
+
end
|
904
|
+
return @href
|
919
905
|
end
|
920
906
|
|
921
907
|
# Sets the feed url and prepares the cache_object if necessary.
|
922
|
-
def
|
923
|
-
@
|
924
|
-
self.cache_object.
|
908
|
+
def href=(new_href)
|
909
|
+
@href = FeedTools::UriHelper.normalize_url(new_href)
|
910
|
+
self.cache_object.href = new_href unless self.cache_object.nil?
|
925
911
|
end
|
926
912
|
|
927
913
|
# Returns the feed title
|
928
914
|
def title
|
929
915
|
if @title.nil?
|
930
916
|
repair_entities = false
|
931
|
-
title_node = try_xpaths(self.channel_node, [
|
917
|
+
title_node = FeedTools::XmlHelper.try_xpaths(self.channel_node, [
|
932
918
|
"atom10:title",
|
933
919
|
"atom03:title",
|
934
920
|
"atom:title",
|
935
921
|
"title",
|
936
|
-
"dc:title"
|
922
|
+
"dc:title",
|
923
|
+
"channelTitle"
|
937
924
|
])
|
938
|
-
|
939
|
-
|
940
|
-
|
941
|
-
|
942
|
-
|
943
|
-
title_mode = try_xpaths(title_node, "@mode",
|
944
|
-
:select_result_value => true)
|
945
|
-
title_encoding = try_xpaths(title_node, "@encoding",
|
946
|
-
:select_result_value => true)
|
947
|
-
|
948
|
-
# Note that we're checking for misuse of type, mode and encoding here
|
949
|
-
if title_type == "base64" || title_mode == "base64" ||
|
950
|
-
title_encoding == "base64"
|
951
|
-
@title = Base64.decode64(title_node.inner_xml.strip)
|
952
|
-
elsif title_type == "xhtml" || title_mode == "xhtml" ||
|
953
|
-
title_type == "xml" || title_mode == "xml" ||
|
954
|
-
title_type == "application/xhtml+xml"
|
955
|
-
@title = title_node.inner_xml
|
956
|
-
elsif title_type == "escaped" || title_mode == "escaped"
|
957
|
-
@title = FeedTools.unescape_entities(
|
958
|
-
title_node.inner_xml)
|
959
|
-
else
|
960
|
-
@title = title_node.inner_xml
|
961
|
-
repair_entities = true
|
925
|
+
@title = FeedTools::HtmlHelper.process_text_construct(title_node,
|
926
|
+
self.feed_type, self.feed_version)
|
927
|
+
if self.feed_type == "atom" ||
|
928
|
+
FeedTools.configurations[:always_strip_wrapper_elements]
|
929
|
+
@title = FeedTools::HtmlHelper.strip_wrapper_element(@title)
|
962
930
|
end
|
963
|
-
unless @title.nil?
|
964
|
-
@title = FeedTools.sanitize_html(@title, :strip)
|
965
|
-
@title = FeedTools.unescape_entities(@title) if repair_entities
|
966
|
-
@title = FeedTools.tidy_html(@title) unless repair_entities
|
967
|
-
end
|
968
|
-
@title.gsub!(/>\n</, "><")
|
969
|
-
@title.gsub!(/\n/, " ")
|
970
|
-
@title.strip!
|
971
931
|
@title = nil if @title.blank?
|
972
932
|
self.cache_object.title = @title unless self.cache_object.nil?
|
973
933
|
end
|
@@ -984,7 +944,7 @@ module FeedTools
|
|
984
944
|
def subtitle
|
985
945
|
if @subtitle.nil?
|
986
946
|
repair_entities = false
|
987
|
-
subtitle_node = try_xpaths(self.channel_node, [
|
947
|
+
subtitle_node = FeedTools::XmlHelper.try_xpaths(self.channel_node, [
|
988
948
|
"atom10:subtitle",
|
989
949
|
"subtitle",
|
990
950
|
"atom03:tagline",
|
@@ -992,44 +952,24 @@ module FeedTools
|
|
992
952
|
"description",
|
993
953
|
"summary",
|
994
954
|
"abstract",
|
995
|
-
"ABSTRACT",
|
996
955
|
"content:encoded",
|
997
956
|
"encoded",
|
998
957
|
"content",
|
999
958
|
"xhtml:body",
|
1000
959
|
"body",
|
960
|
+
"xhtml:div",
|
961
|
+
"div",
|
962
|
+
"p:payload",
|
963
|
+
"payload",
|
964
|
+
"channelDescription",
|
1001
965
|
"blurb",
|
1002
966
|
"info"
|
1003
967
|
])
|
1004
|
-
|
1005
|
-
|
1006
|
-
|
1007
|
-
|
1008
|
-
|
1009
|
-
subtitle_mode = try_xpaths(subtitle_node, "@mode",
|
1010
|
-
:select_result_value => true)
|
1011
|
-
subtitle_encoding = try_xpaths(subtitle_node, "@encoding",
|
1012
|
-
:select_result_value => true)
|
1013
|
-
|
1014
|
-
# Note that we're checking for misuse of type, mode and encoding here
|
1015
|
-
if !subtitle_encoding.blank?
|
1016
|
-
@subtitle =
|
1017
|
-
"[Embedded data objects are not currently supported.]"
|
1018
|
-
elsif subtitle_node.cdatas.size > 0
|
1019
|
-
@subtitle = subtitle_node.cdatas.first.value
|
1020
|
-
elsif subtitle_type == "base64" || subtitle_mode == "base64" ||
|
1021
|
-
subtitle_encoding == "base64"
|
1022
|
-
@subtitle = Base64.decode64(subtitle_node.inner_xml.strip)
|
1023
|
-
elsif subtitle_type == "xhtml" || subtitle_mode == "xhtml" ||
|
1024
|
-
subtitle_type == "xml" || subtitle_mode == "xml" ||
|
1025
|
-
subtitle_type == "application/xhtml+xml"
|
1026
|
-
@subtitle = subtitle_node.inner_xml
|
1027
|
-
elsif subtitle_type == "escaped" || subtitle_mode == "escaped"
|
1028
|
-
@subtitle = FeedTools.unescape_entities(
|
1029
|
-
subtitle_node.inner_xml)
|
1030
|
-
else
|
1031
|
-
@subtitle = subtitle_node.inner_xml
|
1032
|
-
repair_entities = true
|
968
|
+
@subtitle = FeedTools::HtmlHelper.process_text_construct(
|
969
|
+
subtitle_node, self.feed_type, self.feed_version)
|
970
|
+
if self.feed_type == "atom" ||
|
971
|
+
FeedTools.configurations[:always_strip_wrapper_elements]
|
972
|
+
@subtitle = FeedTools::HtmlHelper.strip_wrapper_element(@subtitle)
|
1033
973
|
end
|
1034
974
|
if @subtitle.blank?
|
1035
975
|
@subtitle = self.itunes_summary
|
@@ -1037,15 +977,6 @@ module FeedTools
|
|
1037
977
|
if @subtitle.blank?
|
1038
978
|
@subtitle = self.itunes_subtitle
|
1039
979
|
end
|
1040
|
-
|
1041
|
-
unless @subtitle.blank?
|
1042
|
-
@subtitle = FeedTools.sanitize_html(@subtitle, :strip)
|
1043
|
-
@subtitle = FeedTools.unescape_entities(@subtitle) if repair_entities
|
1044
|
-
@subtitle = FeedTools.tidy_html(@subtitle)
|
1045
|
-
end
|
1046
|
-
|
1047
|
-
@subtitle = @subtitle.strip unless @subtitle.nil?
|
1048
|
-
@subtitle = nil if @subtitle.blank?
|
1049
980
|
end
|
1050
981
|
return @subtitle
|
1051
982
|
end
|
@@ -1058,17 +989,20 @@ module FeedTools
|
|
1058
989
|
# Returns the contents of the itunes:summary element
|
1059
990
|
def itunes_summary
|
1060
991
|
if @itunes_summary.nil?
|
1061
|
-
@itunes_summary = select_not_blank([
|
1062
|
-
try_xpaths(self.channel_node, [
|
992
|
+
@itunes_summary = FeedTools::XmlHelper.select_not_blank([
|
993
|
+
FeedTools::XmlHelper.try_xpaths(self.channel_node, [
|
1063
994
|
"itunes:summary/text()"
|
1064
|
-
]),
|
1065
|
-
try_xpaths(self.root_node, [
|
995
|
+
], :select_result_value => true),
|
996
|
+
FeedTools::XmlHelper.try_xpaths(self.root_node, [
|
1066
997
|
"itunes:summary/text()"
|
1067
|
-
])
|
998
|
+
], :select_result_value => true)
|
1068
999
|
])
|
1069
1000
|
unless @itunes_summary.blank?
|
1070
|
-
@itunes_summary =
|
1071
|
-
|
1001
|
+
@itunes_summary =
|
1002
|
+
FeedTools::HtmlHelper.unescape_entities(@itunes_summary)
|
1003
|
+
@itunes_summary =
|
1004
|
+
FeedTools::HtmlHelper.sanitize_html(@itunes_summary)
|
1005
|
+
@itunes_summary.strip!
|
1072
1006
|
else
|
1073
1007
|
@itunes_summary = nil
|
1074
1008
|
end
|
@@ -1084,17 +1018,20 @@ module FeedTools
|
|
1084
1018
|
# Returns the contents of the itunes:subtitle element
|
1085
1019
|
def itunes_subtitle
|
1086
1020
|
if @itunes_subtitle.nil?
|
1087
|
-
@itunes_subtitle = select_not_blank([
|
1088
|
-
try_xpaths(self.channel_node, [
|
1021
|
+
@itunes_subtitle = FeedTools::XmlHelper.select_not_blank([
|
1022
|
+
FeedTools::XmlHelper.try_xpaths(self.channel_node, [
|
1089
1023
|
"itunes:subtitle/text()"
|
1090
|
-
]),
|
1091
|
-
try_xpaths(self.root_node, [
|
1024
|
+
], :select_result_value => true),
|
1025
|
+
FeedTools::XmlHelper.try_xpaths(self.root_node, [
|
1092
1026
|
"itunes:subtitle/text()"
|
1093
|
-
])
|
1027
|
+
], :select_result_value => true)
|
1094
1028
|
])
|
1095
1029
|
unless @itunes_subtitle.blank?
|
1096
|
-
@itunes_subtitle =
|
1097
|
-
|
1030
|
+
@itunes_subtitle =
|
1031
|
+
FeedTools::HtmlHelper.unescape_entities(@itunes_subtitle)
|
1032
|
+
@itunes_subtitle =
|
1033
|
+
FeedTools::HtmlHelper.sanitize_html(@itunes_subtitle)
|
1034
|
+
@itunes_subtitle.strip!
|
1098
1035
|
else
|
1099
1036
|
@itunes_subtitle = nil
|
1100
1037
|
end
|
@@ -1107,84 +1044,89 @@ module FeedTools
|
|
1107
1044
|
@itunes_subtitle = new_itunes_subtitle
|
1108
1045
|
end
|
1109
1046
|
|
1047
|
+
# Returns the contents of the media:text element
|
1048
|
+
def media_text
|
1049
|
+
if @media_text.nil?
|
1050
|
+
@media_text = FeedTools::XmlHelper.select_not_blank([
|
1051
|
+
FeedTools::XmlHelper.try_xpaths(self.channel_node, [
|
1052
|
+
"media:text/text()"
|
1053
|
+
], :select_result_value => true),
|
1054
|
+
FeedTools::XmlHelper.try_xpaths(self.root_node, [
|
1055
|
+
"media:text/text()"
|
1056
|
+
], :select_result_value => true)
|
1057
|
+
])
|
1058
|
+
unless @media_text.blank?
|
1059
|
+
@media_text = FeedTools::HtmlHelper.unescape_entities(@media_text)
|
1060
|
+
@media_text = FeedTools::HtmlHelper.sanitize_html(@media_text)
|
1061
|
+
@media_text.strip!
|
1062
|
+
else
|
1063
|
+
@media_text = nil
|
1064
|
+
end
|
1065
|
+
end
|
1066
|
+
return @media_text
|
1067
|
+
end
|
1068
|
+
|
1069
|
+
# Sets the contents of the media:text element
|
1070
|
+
def media_text=(new_media_text)
|
1071
|
+
@media_text = new_media_text
|
1072
|
+
end
|
1073
|
+
|
1110
1074
|
# Returns the feed link
|
1111
1075
|
def link
|
1112
1076
|
if @link.nil?
|
1113
|
-
|
1114
|
-
|
1115
|
-
|
1116
|
-
|
1117
|
-
|
1118
|
-
|
1119
|
-
|
1120
|
-
|
1121
|
-
|
1122
|
-
|
1123
|
-
"link[@type='application/xhtml+xml']/@href",
|
1124
|
-
"link[@type='text/html']/@href",
|
1125
|
-
"link[@rel='alternate']/@href",
|
1126
|
-
"link/text()",
|
1127
|
-
"@href",
|
1128
|
-
"a/@href"
|
1129
|
-
], :select_result_value => true)
|
1130
|
-
if @link.blank?
|
1131
|
-
if FeedTools.is_uri?(self.guid) &&
|
1132
|
-
!(self.guid =~ /^urn:uuid:/) &&
|
1133
|
-
!(self.guid =~ /^tag:/)
|
1134
|
-
@link = self.guid
|
1077
|
+
max_score = 0
|
1078
|
+
for link_object in self.links.reverse
|
1079
|
+
score = 0
|
1080
|
+
if FeedTools::HtmlHelper.html_type?(link_object.type)
|
1081
|
+
score = score + 2
|
1082
|
+
elsif link_object.type != nil
|
1083
|
+
score = score - 1
|
1084
|
+
end
|
1085
|
+
if FeedTools::HtmlHelper.xml_type?(link_object.type)
|
1086
|
+
score = score + 1
|
1135
1087
|
end
|
1088
|
+
if link_object.rel == "alternate"
|
1089
|
+
score = score + 1
|
1090
|
+
end
|
1091
|
+
if link_object.rel == "self"
|
1092
|
+
score = score - 1
|
1093
|
+
end
|
1094
|
+
if score >= max_score
|
1095
|
+
max_score = score
|
1096
|
+
@link = link_object.href
|
1097
|
+
end
|
1098
|
+
end
|
1099
|
+
if @link.blank?
|
1100
|
+
@link = FeedTools::XmlHelper.try_xpaths(self.channel_node, [
|
1101
|
+
"@href",
|
1102
|
+
"@rdf:about",
|
1103
|
+
"@about"
|
1104
|
+
], :select_result_value => true)
|
1136
1105
|
end
|
1137
|
-
if @link.blank?
|
1138
|
-
|
1139
|
-
|
1140
|
-
|
1141
|
-
|
1106
|
+
if @link.blank?
|
1107
|
+
if FeedTools::UriHelper.is_uri?(self.id) &&
|
1108
|
+
(self.id =~ /^http/)
|
1109
|
+
@link = self.id
|
1110
|
+
end
|
1142
1111
|
end
|
1143
1112
|
if !@link.blank?
|
1144
|
-
@link = FeedTools.unescape_entities(@link)
|
1113
|
+
@link = FeedTools::HtmlHelper.unescape_entities(@link)
|
1145
1114
|
end
|
1146
|
-
if @link.blank?
|
1147
|
-
|
1148
|
-
|
1149
|
-
|
1150
|
-
|
1151
|
-
|
1152
|
-
|
1153
|
-
if link_node != nil
|
1154
|
-
if link_node.attributes['type'].to_s =~ /^image/ ||
|
1155
|
-
link_node.attributes['type'].to_s =~ /^application/ ||
|
1156
|
-
link_node.attributes['type'].to_s =~ /xml/ ||
|
1157
|
-
link_node.attributes['rel'].to_s =~ /self/
|
1158
|
-
for child in self.channel_node
|
1159
|
-
if child.class == REXML::Element
|
1160
|
-
if child.name.downcase == "link"
|
1161
|
-
if child.attributes['type'].to_s =~ /^image/ ||
|
1162
|
-
child.attributes['type'].to_s =~ /^application/ ||
|
1163
|
-
child.attributes['type'].to_s =~ /xml/ ||
|
1164
|
-
child.attributes['rel'].to_s =~ /self/
|
1165
|
-
@link = nil
|
1166
|
-
next
|
1167
|
-
else
|
1168
|
-
@link = child.attributes['href'].to_s
|
1169
|
-
if @link.blank?
|
1170
|
-
@link = child.inner_xml
|
1171
|
-
end
|
1172
|
-
if @link.blank?
|
1173
|
-
next
|
1174
|
-
end
|
1175
|
-
break
|
1176
|
-
end
|
1177
|
-
end
|
1178
|
-
end
|
1179
|
-
end
|
1180
|
-
else
|
1181
|
-
@link = link_node.attributes['href'].to_s
|
1115
|
+
@link = nil if @link.blank?
|
1116
|
+
begin
|
1117
|
+
if !(@link =~ /^file:/) &&
|
1118
|
+
!FeedTools::UriHelper.is_uri?(@link)
|
1119
|
+
channel_base_uri = nil
|
1120
|
+
unless self.channel_node.nil?
|
1121
|
+
channel_base_uri = self.channel_node.base_uri
|
1182
1122
|
end
|
1123
|
+
@link = FeedTools::UriHelper.resolve_relative_uri(
|
1124
|
+
@link, [channel_base_uri, self.base_uri])
|
1183
1125
|
end
|
1126
|
+
rescue
|
1184
1127
|
end
|
1185
|
-
@link = nil if @link.blank?
|
1186
1128
|
if FeedTools.configurations[:url_normalization_enabled]
|
1187
|
-
@link = FeedTools.normalize_url(@link)
|
1129
|
+
@link = FeedTools::UriHelper.normalize_url(@link)
|
1188
1130
|
end
|
1189
1131
|
unless self.cache_object.nil?
|
1190
1132
|
self.cache_object.link = @link
|
@@ -1200,11 +1142,143 @@ module FeedTools
|
|
1200
1142
|
self.cache_object.link = new_link
|
1201
1143
|
end
|
1202
1144
|
end
|
1145
|
+
|
1146
|
+
# Returns the links collection
|
1147
|
+
def links
|
1148
|
+
if @links.blank?
|
1149
|
+
@links = []
|
1150
|
+
link_nodes =
|
1151
|
+
FeedTools::XmlHelper.combine_xpaths_all(self.channel_node, [
|
1152
|
+
"atom10:link",
|
1153
|
+
"atom03:link",
|
1154
|
+
"atom:link",
|
1155
|
+
"link",
|
1156
|
+
"channelLink",
|
1157
|
+
"a",
|
1158
|
+
"url",
|
1159
|
+
"href"
|
1160
|
+
])
|
1161
|
+
for link_node in link_nodes
|
1162
|
+
link_object = FeedTools::Link.new
|
1163
|
+
link_object.href = FeedTools::XmlHelper.try_xpaths(link_node, [
|
1164
|
+
"@atom10:href",
|
1165
|
+
"@atom03:href",
|
1166
|
+
"@atom:href",
|
1167
|
+
"@href",
|
1168
|
+
"text()"
|
1169
|
+
], :select_result_value => true)
|
1170
|
+
if link_object.href.nil? && link_node.base_uri != nil
|
1171
|
+
link_object.href = ""
|
1172
|
+
end
|
1173
|
+
begin
|
1174
|
+
if !(link_object.href =~ /^file:/) &&
|
1175
|
+
!FeedTools::UriHelper.is_uri?(link_object.href)
|
1176
|
+
link_object.href = FeedTools::UriHelper.resolve_relative_uri(
|
1177
|
+
link_object.href,
|
1178
|
+
[link_node.base_uri, self.base_uri])
|
1179
|
+
end
|
1180
|
+
rescue
|
1181
|
+
end
|
1182
|
+
if FeedTools.configurations[:url_normalization_enabled]
|
1183
|
+
link_object.href =
|
1184
|
+
FeedTools::UriHelper.normalize_url(link_object.href)
|
1185
|
+
end
|
1186
|
+
link_object.href.strip! unless link_object.href.nil?
|
1187
|
+
next if link_object.href.blank?
|
1188
|
+
link_object.hreflang = FeedTools::XmlHelper.try_xpaths(link_node, [
|
1189
|
+
"@atom10:hreflang",
|
1190
|
+
"@atom03:hreflang",
|
1191
|
+
"@atom:hreflang",
|
1192
|
+
"@hreflang"
|
1193
|
+
], :select_result_value => true)
|
1194
|
+
unless link_object.hreflang.nil?
|
1195
|
+
link_object.hreflang = link_object.hreflang.downcase
|
1196
|
+
end
|
1197
|
+
link_object.rel = FeedTools::XmlHelper.try_xpaths(link_node, [
|
1198
|
+
"@atom10:rel",
|
1199
|
+
"@atom03:rel",
|
1200
|
+
"@atom:rel",
|
1201
|
+
"@rel"
|
1202
|
+
], :select_result_value => true)
|
1203
|
+
unless link_object.rel.nil?
|
1204
|
+
link_object.rel = link_object.rel.downcase
|
1205
|
+
end
|
1206
|
+
link_object.type = FeedTools::XmlHelper.try_xpaths(link_node, [
|
1207
|
+
"@atom10:type",
|
1208
|
+
"@atom03:type",
|
1209
|
+
"@atom:type",
|
1210
|
+
"@type"
|
1211
|
+
], :select_result_value => true)
|
1212
|
+
unless link_object.type.nil?
|
1213
|
+
link_object.type = link_object.type.downcase
|
1214
|
+
end
|
1215
|
+
link_object.title = FeedTools::XmlHelper.try_xpaths(link_node, [
|
1216
|
+
"@atom10:title",
|
1217
|
+
"@atom03:title",
|
1218
|
+
"@atom:title",
|
1219
|
+
"@title",
|
1220
|
+
"text()"
|
1221
|
+
], :select_result_value => true)
|
1222
|
+
# This catches the ambiguities between atom, rss, and cdf
|
1223
|
+
if link_object.title == link_object.href
|
1224
|
+
link_object.title = nil
|
1225
|
+
end
|
1226
|
+
link_object.length = FeedTools::XmlHelper.try_xpaths(link_node, [
|
1227
|
+
"@atom10:length",
|
1228
|
+
"@atom03:length",
|
1229
|
+
"@atom:length",
|
1230
|
+
"@length"
|
1231
|
+
], :select_result_value => true)
|
1232
|
+
if !link_object.length.nil?
|
1233
|
+
link_object.length = link_object.length.to_i
|
1234
|
+
else
|
1235
|
+
if !link_object.type.nil? && link_object.type[0..4] != "text" &&
|
1236
|
+
link_object.type[-3..-1] != "xml" &&
|
1237
|
+
link_object.href =~ /^http:\/\//
|
1238
|
+
# Retrieve the length with an http HEAD request
|
1239
|
+
else
|
1240
|
+
link_object.length = nil
|
1241
|
+
end
|
1242
|
+
end
|
1243
|
+
@links << link_object
|
1244
|
+
end
|
1245
|
+
end
|
1246
|
+
return @links
|
1247
|
+
end
|
1248
|
+
|
1249
|
+
# Sets the links collection
|
1250
|
+
def links=(new_links)
|
1251
|
+
@links = new_links
|
1252
|
+
end
|
1253
|
+
|
1254
|
+
# Returns the base uri for the feed, used for resolving relative paths
|
1255
|
+
def base_uri
|
1256
|
+
if @base_uri.nil?
|
1257
|
+
@base_uri = FeedTools::XmlHelper.try_xpaths(self.channel_node, [
|
1258
|
+
"@base"
|
1259
|
+
], :select_result_value => true)
|
1260
|
+
if @base_uri.blank?
|
1261
|
+
@base_uri =
|
1262
|
+
FeedTools::GenericHelper.recursion_trap(:feed_base_uri) do
|
1263
|
+
self.href
|
1264
|
+
end
|
1265
|
+
end
|
1266
|
+
if !@base_uri.blank?
|
1267
|
+
@base_uri = FeedTools::UriHelper.normalize_url(@base_uri)
|
1268
|
+
end
|
1269
|
+
end
|
1270
|
+
return @base_uri
|
1271
|
+
end
|
1272
|
+
|
1273
|
+
# Sets the base uri for the feed
|
1274
|
+
def base_uri=(new_base_uri)
|
1275
|
+
@base_uri = new_base_uri
|
1276
|
+
end
|
1203
1277
|
|
1204
1278
|
# Returns the url to the icon file for this feed.
|
1205
1279
|
def icon
|
1206
1280
|
if @icon.nil?
|
1207
|
-
icon_node = try_xpaths(self.channel_node, [
|
1281
|
+
icon_node = FeedTools::XmlHelper.try_xpaths(self.channel_node, [
|
1208
1282
|
"link[@rel='icon']",
|
1209
1283
|
"link[@rel='shortcut icon']",
|
1210
1284
|
"link[@type='image/x-icon']",
|
@@ -1213,15 +1287,26 @@ module FeedTools
|
|
1213
1287
|
"LOGO[@STYLE='ICON']"
|
1214
1288
|
])
|
1215
1289
|
unless icon_node.nil?
|
1216
|
-
@icon = FeedTools.
|
1217
|
-
|
1218
|
-
|
1219
|
-
@
|
1220
|
-
|
1221
|
-
|
1222
|
-
|
1290
|
+
@icon = FeedTools::XmlHelper.try_xpaths(icon_node, [
|
1291
|
+
"@atom10:href",
|
1292
|
+
"@atom03:href",
|
1293
|
+
"@atom:href",
|
1294
|
+
"@href",
|
1295
|
+
"text()"
|
1296
|
+
], :select_result_value => true)
|
1297
|
+
begin
|
1298
|
+
if !(@icon =~ /^file:/) &&
|
1299
|
+
!FeedTools::UriHelper.is_uri?(@icon)
|
1300
|
+
channel_base_uri = nil
|
1301
|
+
unless self.channel_node.nil?
|
1302
|
+
channel_base_uri = self.channel_node.base_uri
|
1303
|
+
end
|
1304
|
+
@icon = FeedTools::UriHelper.resolve_relative_uri(
|
1305
|
+
@icon, [channel_base_uri, self.base_uri])
|
1223
1306
|
end
|
1307
|
+
rescue
|
1224
1308
|
end
|
1309
|
+
@icon = nil unless FeedTools::UriHelper.is_uri?(@icon)
|
1225
1310
|
@icon = nil if @icon.blank?
|
1226
1311
|
end
|
1227
1312
|
end
|
@@ -1236,7 +1321,8 @@ module FeedTools
|
|
1236
1321
|
if @favicon.nil?
|
1237
1322
|
if !self.link.blank?
|
1238
1323
|
begin
|
1239
|
-
link_uri = URI.parse(
|
1324
|
+
link_uri = URI.parse(
|
1325
|
+
FeedTools::UriHelper.normalize_url(self.link))
|
1240
1326
|
if link_uri.scheme == "http"
|
1241
1327
|
@favicon =
|
1242
1328
|
"http://" + link_uri.host + "/favicon.ico"
|
@@ -1244,9 +1330,10 @@ module FeedTools
|
|
1244
1330
|
rescue
|
1245
1331
|
@favicon = nil
|
1246
1332
|
end
|
1247
|
-
if @favicon.nil? && !self.
|
1333
|
+
if @favicon.nil? && !self.href.blank?
|
1248
1334
|
begin
|
1249
|
-
feed_uri = URI.parse(
|
1335
|
+
feed_uri = URI.parse(
|
1336
|
+
FeedTools::UriHelper.normalize_url(self.href))
|
1250
1337
|
if feed_uri.scheme == "http"
|
1251
1338
|
@favicon =
|
1252
1339
|
"http://" + feed_uri.host + "/favicon.ico"
|
@@ -1265,8 +1352,8 @@ module FeedTools
|
|
1265
1352
|
# Returns the feed author
|
1266
1353
|
def author
|
1267
1354
|
if @author.nil?
|
1268
|
-
@author = FeedTools::
|
1269
|
-
author_node = try_xpaths(self.channel_node, [
|
1355
|
+
@author = FeedTools::Author.new
|
1356
|
+
author_node = FeedTools::XmlHelper.try_xpaths(self.channel_node, [
|
1270
1357
|
"atom10:author",
|
1271
1358
|
"atom03:author",
|
1272
1359
|
"atom:author",
|
@@ -1276,16 +1363,18 @@ module FeedTools
|
|
1276
1363
|
"dc:creator"
|
1277
1364
|
])
|
1278
1365
|
unless author_node.nil?
|
1279
|
-
@author.raw = FeedTools.
|
1280
|
-
|
1281
|
-
@author.raw =
|
1366
|
+
@author.raw = FeedTools::XmlHelper.try_xpaths(
|
1367
|
+
author_node, ["text()"], :select_result_value => true)
|
1368
|
+
@author.raw = FeedTools::HtmlHelper.unescape_entities(@author.raw)
|
1282
1369
|
unless @author.raw.nil?
|
1283
1370
|
raw_scan = @author.raw.scan(
|
1284
1371
|
/(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
|
1285
1372
|
if raw_scan.nil? || raw_scan.size == 0
|
1286
1373
|
raw_scan = @author.raw.scan(
|
1287
1374
|
/(\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\s*\((.*)\)/i)
|
1288
|
-
|
1375
|
+
unless raw_scan.size == 0
|
1376
|
+
author_raw_pair = raw_scan.first.reverse
|
1377
|
+
end
|
1289
1378
|
else
|
1290
1379
|
author_raw_pair = raw_scan.first
|
1291
1380
|
end
|
@@ -1302,16 +1391,16 @@ module FeedTools
|
|
1302
1391
|
else
|
1303
1392
|
unless @author.raw.include?("@")
|
1304
1393
|
# We can be reasonably sure we are looking at something
|
1305
|
-
# that the creator didn't intend to contain an email address
|
1306
|
-
# it got through the preceeding regexes and it doesn't
|
1394
|
+
# that the creator didn't intend to contain an email address
|
1395
|
+
# if it got through the preceeding regexes and it doesn't
|
1307
1396
|
# contain the tell-tale '@' symbol.
|
1308
1397
|
@author.name = @author.raw
|
1309
1398
|
end
|
1310
1399
|
end
|
1311
1400
|
end
|
1312
1401
|
if @author.name.blank?
|
1313
|
-
@author.name = FeedTools.unescape_entities(
|
1314
|
-
try_xpaths(author_node, [
|
1402
|
+
@author.name = FeedTools::HtmlHelper.unescape_entities(
|
1403
|
+
FeedTools::XmlHelper.try_xpaths(author_node, [
|
1315
1404
|
"atom10:name/text()",
|
1316
1405
|
"atom03:name/text()",
|
1317
1406
|
"atom:name/text()",
|
@@ -1321,8 +1410,8 @@ module FeedTools
|
|
1321
1410
|
)
|
1322
1411
|
end
|
1323
1412
|
if @author.email.blank?
|
1324
|
-
@author.email = FeedTools.unescape_entities(
|
1325
|
-
try_xpaths(author_node, [
|
1413
|
+
@author.email = FeedTools::HtmlHelper.unescape_entities(
|
1414
|
+
FeedTools::XmlHelper.try_xpaths(author_node, [
|
1326
1415
|
"atom10:email/text()",
|
1327
1416
|
"atom03:email/text()",
|
1328
1417
|
"atom:email/text()",
|
@@ -1332,8 +1421,8 @@ module FeedTools
|
|
1332
1421
|
)
|
1333
1422
|
end
|
1334
1423
|
if @author.url.blank?
|
1335
|
-
@author.url = FeedTools.unescape_entities(
|
1336
|
-
try_xpaths(author_node, [
|
1424
|
+
@author.url = FeedTools::HtmlHelper.unescape_entities(
|
1425
|
+
FeedTools::XmlHelper.try_xpaths(author_node, [
|
1337
1426
|
"atom10:url/text()",
|
1338
1427
|
"atom03:url/text()",
|
1339
1428
|
"atom:url/text()",
|
@@ -1342,7 +1431,7 @@ module FeedTools
|
|
1342
1431
|
"atom03:uri/text()",
|
1343
1432
|
"atom:uri/text()",
|
1344
1433
|
"uri/text()",
|
1345
|
-
"@
|
1434
|
+
"@href",
|
1346
1435
|
"@uri",
|
1347
1436
|
"@href"
|
1348
1437
|
], :select_result_value => true)
|
@@ -1352,6 +1441,16 @@ module FeedTools
|
|
1352
1441
|
@author.raw = nil if @author.raw.blank?
|
1353
1442
|
@author.email = nil if @author.email.blank?
|
1354
1443
|
@author.url = nil if @author.url.blank?
|
1444
|
+
if @author.url != nil
|
1445
|
+
begin
|
1446
|
+
if !(@author.url =~ /^file:/) &&
|
1447
|
+
!FeedTools::UriHelper.is_uri?(@author.url)
|
1448
|
+
@author.url = FeedTools::UriHelper.resolve_relative_uri(
|
1449
|
+
@author.url, [author_node.base_uri, self.base_uri])
|
1450
|
+
end
|
1451
|
+
rescue
|
1452
|
+
end
|
1453
|
+
end
|
1355
1454
|
end
|
1356
1455
|
# Fallback on the itunes module if we didn't find an author name
|
1357
1456
|
begin
|
@@ -1374,7 +1473,7 @@ module FeedTools
|
|
1374
1473
|
# We're not looking at an author object, this is probably a string,
|
1375
1474
|
# default to setting the author's name.
|
1376
1475
|
if @author.nil?
|
1377
|
-
@author = FeedTools::
|
1476
|
+
@author = FeedTools::Author.new
|
1378
1477
|
end
|
1379
1478
|
@author.name = new_author
|
1380
1479
|
end
|
@@ -1383,14 +1482,13 @@ module FeedTools
|
|
1383
1482
|
# Returns the feed publisher
|
1384
1483
|
def publisher
|
1385
1484
|
if @publisher.nil?
|
1386
|
-
@publisher = FeedTools::
|
1387
|
-
|
1388
|
-
|
1389
|
-
|
1390
|
-
|
1485
|
+
@publisher = FeedTools::Author.new
|
1486
|
+
@publisher.raw = FeedTools::HtmlHelper.unescape_entities(
|
1487
|
+
FeedTools::XmlHelper.try_xpaths(self.channel_node, [
|
1488
|
+
"webMaster/text()",
|
1489
|
+
"dc:publisher/text()"
|
1490
|
+
], :select_result_value => true))
|
1391
1491
|
|
1392
|
-
# Set the author name
|
1393
|
-
@publisher.raw = FeedTools.unescape_entities(publisher_node.to_s)
|
1394
1492
|
unless @publisher.raw.blank?
|
1395
1493
|
raw_scan = @publisher.raw.scan(
|
1396
1494
|
/(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
|
@@ -1428,6 +1526,20 @@ module FeedTools
|
|
1428
1526
|
@publisher.raw = nil if @publisher.raw.blank?
|
1429
1527
|
@publisher.email = nil if @publisher.email.blank?
|
1430
1528
|
@publisher.url = nil if @publisher.url.blank?
|
1529
|
+
if @publisher.url != nil
|
1530
|
+
begin
|
1531
|
+
if !(@publisher.url =~ /^file:/) &&
|
1532
|
+
!FeedTools::UriHelper.is_uri?(@publisher.url)
|
1533
|
+
channel_base_uri = nil
|
1534
|
+
unless self.channel_node.nil?
|
1535
|
+
channel_base_uri = self.channel_node.base_uri
|
1536
|
+
end
|
1537
|
+
@publisher.url = FeedTools::UriHelper.resolve_relative_uri(
|
1538
|
+
@publisher.url, [channel_base_uri, self.base_uri])
|
1539
|
+
end
|
1540
|
+
rescue
|
1541
|
+
end
|
1542
|
+
end
|
1431
1543
|
end
|
1432
1544
|
return @publisher
|
1433
1545
|
end
|
@@ -1443,7 +1555,7 @@ module FeedTools
|
|
1443
1555
|
# We're not looking at an Author object, this is probably a string,
|
1444
1556
|
# default to setting the publisher's name.
|
1445
1557
|
if @publisher.nil?
|
1446
|
-
@publisher = FeedTools::
|
1558
|
+
@publisher = FeedTools::Author.new
|
1447
1559
|
end
|
1448
1560
|
@publisher.name = new_publisher
|
1449
1561
|
end
|
@@ -1457,8 +1569,8 @@ module FeedTools
|
|
1457
1569
|
# attribute.
|
1458
1570
|
def itunes_author
|
1459
1571
|
if @itunes_author.nil?
|
1460
|
-
@itunes_author = FeedTools.unescape_entities(
|
1461
|
-
try_xpaths(self.channel_node, [
|
1572
|
+
@itunes_author = FeedTools::HtmlHelper.unescape_entities(
|
1573
|
+
FeedTools::XmlHelper.try_xpaths(self.channel_node, [
|
1462
1574
|
"itunes:author/text()"
|
1463
1575
|
], :select_result_value => true)
|
1464
1576
|
)
|
@@ -1470,7 +1582,7 @@ module FeedTools
|
|
1470
1582
|
# Returns the feed time
|
1471
1583
|
def time
|
1472
1584
|
if @time.nil?
|
1473
|
-
time_string = try_xpaths(self.channel_node, [
|
1585
|
+
time_string = FeedTools::XmlHelper.try_xpaths(self.channel_node, [
|
1474
1586
|
"atom10:updated/text()",
|
1475
1587
|
"atom03:updated/text()",
|
1476
1588
|
"atom:updated/text()",
|
@@ -1480,6 +1592,7 @@ module FeedTools
|
|
1480
1592
|
"atom:modified/text()",
|
1481
1593
|
"modified/text()",
|
1482
1594
|
"time/text()",
|
1595
|
+
"lastBuildDate/text()",
|
1483
1596
|
"atom10:issued/text()",
|
1484
1597
|
"atom03:issued/text()",
|
1485
1598
|
"atom:issued/text()",
|
@@ -1488,8 +1601,8 @@ module FeedTools
|
|
1488
1601
|
"atom03:published/text()",
|
1489
1602
|
"atom:published/text()",
|
1490
1603
|
"published/text()",
|
1491
|
-
"pubDate/text()",
|
1492
1604
|
"dc:date/text()",
|
1605
|
+
"pubDate/text()",
|
1493
1606
|
"date/text()"
|
1494
1607
|
], :select_result_value => true)
|
1495
1608
|
begin
|
@@ -1509,15 +1622,15 @@ module FeedTools
|
|
1509
1622
|
return @time
|
1510
1623
|
end
|
1511
1624
|
|
1512
|
-
# Sets the feed
|
1625
|
+
# Sets the feed time
|
1513
1626
|
def time=(new_time)
|
1514
1627
|
@time = new_time
|
1515
1628
|
end
|
1516
1629
|
|
1517
|
-
# Returns the feed
|
1630
|
+
# Returns the feed updated time
|
1518
1631
|
def updated
|
1519
1632
|
if @updated.nil?
|
1520
|
-
updated_string = try_xpaths(self.channel_node, [
|
1633
|
+
updated_string = FeedTools::XmlHelper.try_xpaths(self.channel_node, [
|
1521
1634
|
"atom10:updated/text()",
|
1522
1635
|
"atom03:updated/text()",
|
1523
1636
|
"atom:updated/text()",
|
@@ -1525,7 +1638,8 @@ module FeedTools
|
|
1525
1638
|
"atom10:modified/text()",
|
1526
1639
|
"atom03:modified/text()",
|
1527
1640
|
"atom:modified/text()",
|
1528
|
-
"modified/text()"
|
1641
|
+
"modified/text()",
|
1642
|
+
"lastBuildDate/text()"
|
1529
1643
|
], :select_result_value => true)
|
1530
1644
|
unless updated_string.blank?
|
1531
1645
|
@updated = Time.parse(updated_string).gmtime rescue nil
|
@@ -1536,26 +1650,27 @@ module FeedTools
|
|
1536
1650
|
return @updated
|
1537
1651
|
end
|
1538
1652
|
|
1539
|
-
# Sets the feed
|
1653
|
+
# Sets the feed updated time
|
1540
1654
|
def updated=(new_updated)
|
1541
1655
|
@updated = new_updated
|
1542
1656
|
end
|
1543
1657
|
|
1544
|
-
# Returns the feed
|
1658
|
+
# Returns the feed published time
|
1545
1659
|
def published
|
1546
1660
|
if @published.nil?
|
1547
|
-
published_string =
|
1548
|
-
|
1549
|
-
|
1550
|
-
|
1551
|
-
|
1552
|
-
|
1553
|
-
|
1554
|
-
|
1555
|
-
|
1556
|
-
|
1557
|
-
|
1558
|
-
|
1661
|
+
published_string =
|
1662
|
+
FeedTools::XmlHelper.try_xpaths(self.channel_node, [
|
1663
|
+
"atom10:published/text()",
|
1664
|
+
"atom03:published/text()",
|
1665
|
+
"atom:published/text()",
|
1666
|
+
"published/text()",
|
1667
|
+
"dc:date/text()",
|
1668
|
+
"pubDate/text()",
|
1669
|
+
"atom10:issued/text()",
|
1670
|
+
"atom03:issued/text()",
|
1671
|
+
"atom:issued/text()",
|
1672
|
+
"issued/text()"
|
1673
|
+
], :select_result_value => true)
|
1559
1674
|
unless published_string.blank?
|
1560
1675
|
@published = Time.parse(published_string).gmtime rescue nil
|
1561
1676
|
else
|
@@ -1565,7 +1680,7 @@ module FeedTools
|
|
1565
1680
|
return @published
|
1566
1681
|
end
|
1567
1682
|
|
1568
|
-
# Sets the feed
|
1683
|
+
# Sets the feed published time
|
1569
1684
|
def published=(new_published)
|
1570
1685
|
@published = new_published
|
1571
1686
|
end
|
@@ -1574,22 +1689,24 @@ module FeedTools
|
|
1574
1689
|
def categories
|
1575
1690
|
if @categories.nil?
|
1576
1691
|
@categories = []
|
1577
|
-
category_nodes =
|
1578
|
-
|
1579
|
-
|
1580
|
-
|
1692
|
+
category_nodes =
|
1693
|
+
FeedTools::XmlHelper.try_xpaths_all(self.channel_node, [
|
1694
|
+
"category",
|
1695
|
+
"dc:subject"
|
1696
|
+
])
|
1581
1697
|
unless category_nodes.nil?
|
1582
1698
|
for category_node in category_nodes
|
1583
|
-
category = FeedTools::
|
1584
|
-
category.term = try_xpaths(category_node, [
|
1699
|
+
category = FeedTools::Category.new
|
1700
|
+
category.term = FeedTools::XmlHelper.try_xpaths(category_node, [
|
1585
1701
|
"@term",
|
1586
1702
|
"text()"
|
1587
1703
|
], :select_result_value => true)
|
1588
1704
|
category.term.strip! unless category.term.blank?
|
1589
|
-
category.label = try_xpaths(
|
1705
|
+
category.label = FeedTools::XmlHelper.try_xpaths(
|
1706
|
+
category_node, ["@label"],
|
1590
1707
|
:select_result_value => true)
|
1591
1708
|
category.label.strip! unless category.label.blank?
|
1592
|
-
category.scheme = try_xpaths(category_node, [
|
1709
|
+
category.scheme = FeedTools::XmlHelper.try_xpaths(category_node, [
|
1593
1710
|
"@scheme",
|
1594
1711
|
"@domain"
|
1595
1712
|
], :select_result_value => true)
|
@@ -1605,63 +1722,69 @@ module FeedTools
|
|
1605
1722
|
def images
|
1606
1723
|
if @images.nil?
|
1607
1724
|
@images = []
|
1608
|
-
image_nodes =
|
1609
|
-
|
1610
|
-
|
1611
|
-
|
1612
|
-
|
1613
|
-
|
1614
|
-
|
1615
|
-
|
1616
|
-
])
|
1725
|
+
image_nodes = FeedTools::XmlHelper.combine_xpaths_all(
|
1726
|
+
self.channel_node, [
|
1727
|
+
"image",
|
1728
|
+
"logo",
|
1729
|
+
"apple-wallpapers:image",
|
1730
|
+
"imageUrl"
|
1731
|
+
]
|
1732
|
+
)
|
1617
1733
|
unless image_nodes.blank?
|
1618
1734
|
for image_node in image_nodes
|
1619
|
-
image = FeedTools::
|
1620
|
-
image.
|
1735
|
+
image = FeedTools::Image.new
|
1736
|
+
image.href = FeedTools::XmlHelper.try_xpaths(image_node, [
|
1621
1737
|
"url/text()",
|
1622
1738
|
"@rdf:resource",
|
1739
|
+
"@href",
|
1623
1740
|
"text()"
|
1624
1741
|
], :select_result_value => true)
|
1625
|
-
if image.
|
1626
|
-
|
1627
|
-
image.url = try_xpaths(image_node, [
|
1628
|
-
"@atom10:href",
|
1629
|
-
"@atom03:href",
|
1630
|
-
"@atom:href",
|
1631
|
-
"@href"
|
1632
|
-
], :select_result_value => true)
|
1633
|
-
if image.url == self.link && image.url != nil
|
1634
|
-
image.url = nil
|
1635
|
-
end
|
1742
|
+
if image.href.nil? && image_node.base_uri != nil
|
1743
|
+
image.href = ""
|
1636
1744
|
end
|
1637
|
-
|
1638
|
-
image.
|
1639
|
-
|
1640
|
-
|
1745
|
+
begin
|
1746
|
+
if !(image.href =~ /^file:/) &&
|
1747
|
+
!FeedTools::UriHelper.is_uri?(image.href)
|
1748
|
+
image.href = FeedTools::UriHelper.resolve_relative_uri(
|
1749
|
+
image.href, [image_node.base_uri, self.base_uri])
|
1750
|
+
end
|
1751
|
+
rescue
|
1641
1752
|
end
|
1642
|
-
|
1643
|
-
|
1753
|
+
if FeedTools.configurations[:url_normalization_enabled]
|
1754
|
+
image.href = FeedTools::UriHelper.normalize_url(image.href)
|
1755
|
+
end
|
1756
|
+
image.href.strip! unless image.href.nil?
|
1757
|
+
next if image.href.blank?
|
1758
|
+
image.title = FeedTools::XmlHelper.try_xpaths(image_node,
|
1644
1759
|
["title/text()"], :select_result_value => true)
|
1645
1760
|
image.title.strip! unless image.title.nil?
|
1646
|
-
image.description = try_xpaths(image_node,
|
1761
|
+
image.description = FeedTools::XmlHelper.try_xpaths(image_node,
|
1647
1762
|
["description/text()"], :select_result_value => true)
|
1648
1763
|
image.description.strip! unless image.description.nil?
|
1649
|
-
image.link = try_xpaths(image_node,
|
1764
|
+
image.link = FeedTools::XmlHelper.try_xpaths(image_node,
|
1650
1765
|
["link/text()"], :select_result_value => true)
|
1651
1766
|
image.link.strip! unless image.link.nil?
|
1652
|
-
image.height = try_xpaths(image_node,
|
1767
|
+
image.height = FeedTools::XmlHelper.try_xpaths(image_node,
|
1653
1768
|
["height/text()"], :select_result_value => true).to_i
|
1654
1769
|
image.height = nil if image.height <= 0
|
1655
|
-
image.width = try_xpaths(image_node,
|
1770
|
+
image.width = FeedTools::XmlHelper.try_xpaths(image_node,
|
1656
1771
|
["width/text()"], :select_result_value => true).to_i
|
1657
1772
|
image.width = nil if image.width <= 0
|
1658
|
-
image.style = try_xpaths(image_node, [
|
1773
|
+
image.style = FeedTools::XmlHelper.try_xpaths(image_node, [
|
1659
1774
|
"style/text()",
|
1660
1775
|
"@style"
|
1661
1776
|
], :select_result_value => true)
|
1662
1777
|
image.style.strip! unless image.style.nil?
|
1663
1778
|
image.style.downcase! unless image.style.nil?
|
1664
|
-
@images << image unless image.
|
1779
|
+
@images << image unless image.href.nil?
|
1780
|
+
end
|
1781
|
+
end
|
1782
|
+
for link_object in self.links
|
1783
|
+
if link_object.type != nil && link_object.type =~ /^image/
|
1784
|
+
image = FeedTools::Image.new
|
1785
|
+
image.href = link_object.href
|
1786
|
+
image.title = link_object.title
|
1787
|
+
@images << image unless image.href.nil?
|
1665
1788
|
end
|
1666
1789
|
end
|
1667
1790
|
end
|
@@ -1671,20 +1794,25 @@ module FeedTools
|
|
1671
1794
|
# Returns the feed's text input field
|
1672
1795
|
def text_input
|
1673
1796
|
if @text_input.nil?
|
1674
|
-
@text_input = FeedTools::
|
1675
|
-
text_input_node =
|
1797
|
+
@text_input = FeedTools::TextInput.new
|
1798
|
+
text_input_node =
|
1799
|
+
FeedTools::XmlHelper.try_xpaths(self.channel_node, ["textInput"])
|
1676
1800
|
unless text_input_node.nil?
|
1677
1801
|
@text_input.title =
|
1678
|
-
try_xpaths(text_input_node,
|
1802
|
+
FeedTools::XmlHelper.try_xpaths(text_input_node,
|
1803
|
+
["title/text()"],
|
1679
1804
|
:select_result_value => true)
|
1680
1805
|
@text_input.description =
|
1681
|
-
try_xpaths(text_input_node,
|
1806
|
+
FeedTools::XmlHelper.try_xpaths(text_input_node,
|
1807
|
+
["description/text()"],
|
1682
1808
|
:select_result_value => true)
|
1683
1809
|
@text_input.link =
|
1684
|
-
try_xpaths(text_input_node,
|
1810
|
+
FeedTools::XmlHelper.try_xpaths(text_input_node,
|
1811
|
+
["link/text()"],
|
1685
1812
|
:select_result_value => true)
|
1686
1813
|
@text_input.name =
|
1687
|
-
try_xpaths(text_input_node,
|
1814
|
+
FeedTools::XmlHelper.try_xpaths(text_input_node,
|
1815
|
+
["name/text()"],
|
1688
1816
|
:select_result_value => true)
|
1689
1817
|
end
|
1690
1818
|
end
|
@@ -1692,10 +1820,10 @@ module FeedTools
|
|
1692
1820
|
end
|
1693
1821
|
|
1694
1822
|
# Returns the feed's copyright information
|
1695
|
-
def
|
1696
|
-
if @
|
1823
|
+
def rights
|
1824
|
+
if @rights.nil?
|
1697
1825
|
repair_entities = false
|
1698
|
-
|
1826
|
+
rights_node = FeedTools::XmlHelper.try_xpaths(self.channel_node, [
|
1699
1827
|
"atom10:copyright",
|
1700
1828
|
"atom03:copyright",
|
1701
1829
|
"atom:copyright",
|
@@ -1704,63 +1832,40 @@ module FeedTools
|
|
1704
1832
|
"dc:rights",
|
1705
1833
|
"rights"
|
1706
1834
|
])
|
1707
|
-
|
1708
|
-
|
1835
|
+
@rights = FeedTools::HtmlHelper.process_text_construct(rights_node,
|
1836
|
+
self.feed_type, self.feed_version)
|
1837
|
+
if self.feed_type == "atom" ||
|
1838
|
+
FeedTools.configurations[:always_strip_wrapper_elements]
|
1839
|
+
@rights = FeedTools::HtmlHelper.strip_wrapper_element(@rights)
|
1709
1840
|
end
|
1710
|
-
copyright_type = try_xpaths(copyright_node, "@type",
|
1711
|
-
:select_result_value => true)
|
1712
|
-
copyright_mode = try_xpaths(copyright_node, "@mode",
|
1713
|
-
:select_result_value => true)
|
1714
|
-
copyright_encoding = try_xpaths(copyright_node, "@encoding",
|
1715
|
-
:select_result_value => true)
|
1716
|
-
|
1717
|
-
# Note that we're checking for misuse of type, mode and encoding here
|
1718
|
-
if !copyright_encoding.blank?
|
1719
|
-
@copyright =
|
1720
|
-
"[Embedded data objects are not currently supported.]"
|
1721
|
-
elsif copyright_node.cdatas.size > 0
|
1722
|
-
@copyright = copyright_node.cdatas.first.value
|
1723
|
-
elsif copyright_type == "base64" || copyright_mode == "base64" ||
|
1724
|
-
copyright_encoding == "base64"
|
1725
|
-
@copyright = Base64.decode64(copyright_node.inner_xml.strip)
|
1726
|
-
elsif copyright_type == "xhtml" || copyright_mode == "xhtml" ||
|
1727
|
-
copyright_type == "xml" || copyright_mode == "xml" ||
|
1728
|
-
copyright_type == "application/xhtml+xml"
|
1729
|
-
@copyright = copyright_node.inner_xml
|
1730
|
-
elsif copyright_type == "escaped" || copyright_mode == "escaped"
|
1731
|
-
@copyright = FeedTools.unescape_entities(
|
1732
|
-
copyright_node.inner_xml)
|
1733
|
-
else
|
1734
|
-
@copyright = copyright_node.inner_xml
|
1735
|
-
repair_entities = true
|
1736
|
-
end
|
1737
|
-
|
1738
|
-
unless @copyright.nil?
|
1739
|
-
@copyright = FeedTools.sanitize_html(@copyright, :strip)
|
1740
|
-
@copyright = FeedTools.unescape_entities(@copyright) if repair_entities
|
1741
|
-
@copyright = FeedTools.tidy_html(@copyright)
|
1742
|
-
end
|
1743
|
-
|
1744
|
-
@copyright = @copyright.strip unless @copyright.nil?
|
1745
|
-
@copyright = nil if @copyright.blank?
|
1746
1841
|
end
|
1747
|
-
return @
|
1842
|
+
return @rights
|
1748
1843
|
end
|
1749
1844
|
|
1750
|
-
# Sets the feed's
|
1751
|
-
def
|
1752
|
-
@
|
1845
|
+
# Sets the feed's rights information
|
1846
|
+
def rights=(new_rights)
|
1847
|
+
@rights = new_rights
|
1753
1848
|
end
|
1754
1849
|
|
1850
|
+
def license #:nodoc:
|
1851
|
+
raise "Not implemented yet."
|
1852
|
+
end
|
1853
|
+
|
1854
|
+
def license=(new_license) #:nodoc:
|
1855
|
+
raise "Not implemented yet."
|
1856
|
+
end
|
1857
|
+
|
1755
1858
|
# Returns the number of seconds before the feed should expire
|
1756
1859
|
def time_to_live
|
1757
1860
|
if @time_to_live.nil?
|
1758
1861
|
unless channel_node.nil?
|
1759
1862
|
# get the feed time to live from the xml document
|
1760
|
-
update_frequency = try_xpaths(
|
1863
|
+
update_frequency = FeedTools::XmlHelper.try_xpaths(
|
1864
|
+
self.channel_node,
|
1761
1865
|
["syn:updateFrequency/text()"], :select_result_value => true)
|
1762
1866
|
if !update_frequency.blank?
|
1763
|
-
update_period = try_xpaths(
|
1867
|
+
update_period = FeedTools::XmlHelper.try_xpaths(
|
1868
|
+
self.channel_node,
|
1764
1869
|
["syn:updatePeriod/text()"], :select_result_value => true)
|
1765
1870
|
if update_period == "daily"
|
1766
1871
|
@time_to_live = update_frequency.to_i.day
|
@@ -1777,10 +1882,12 @@ module FeedTools
|
|
1777
1882
|
end
|
1778
1883
|
if @time_to_live.nil?
|
1779
1884
|
# usually expressed in minutes
|
1780
|
-
update_frequency = try_xpaths(
|
1885
|
+
update_frequency = FeedTools::XmlHelper.try_xpaths(
|
1886
|
+
self.channel_node, ["ttl/text()"],
|
1781
1887
|
:select_result_value => true)
|
1782
1888
|
if !update_frequency.blank?
|
1783
|
-
update_span = try_xpaths(
|
1889
|
+
update_span = FeedTools::XmlHelper.try_xpaths(
|
1890
|
+
self.channel_node, ["ttl/@span"],
|
1784
1891
|
:select_result_value => true)
|
1785
1892
|
if update_span == "seconds"
|
1786
1893
|
@time_to_live = update_frequency.to_i
|
@@ -1804,24 +1911,28 @@ module FeedTools
|
|
1804
1911
|
if @time_to_live.nil?
|
1805
1912
|
@time_to_live = 0
|
1806
1913
|
update_frequency_days =
|
1807
|
-
|
1914
|
+
FeedTools::XmlHelper.try_xpaths(self.channel_node,
|
1915
|
+
["schedule/intervaltime/@day"], :select_result_value => true)
|
1808
1916
|
update_frequency_hours =
|
1809
|
-
|
1917
|
+
FeedTools::XmlHelper.try_xpaths(self.channel_node,
|
1918
|
+
["schedule/intervaltime/@hour"], :select_result_value => true)
|
1810
1919
|
update_frequency_minutes =
|
1811
|
-
|
1920
|
+
FeedTools::XmlHelper.try_xpaths(self.channel_node,
|
1921
|
+
["schedule/intervaltime/@min"], :select_result_value => true)
|
1812
1922
|
update_frequency_seconds =
|
1813
|
-
|
1814
|
-
|
1923
|
+
FeedTools::XmlHelper.try_xpaths(self.channel_node,
|
1924
|
+
["schedule/intervaltime/@sec"], :select_result_value => true)
|
1925
|
+
if !update_frequency_days.blank?
|
1815
1926
|
@time_to_live = @time_to_live + update_frequency_days.to_i.day
|
1816
1927
|
end
|
1817
|
-
if update_frequency_hours
|
1928
|
+
if !update_frequency_hours.blank?
|
1818
1929
|
@time_to_live = @time_to_live + update_frequency_hours.to_i.hour
|
1819
1930
|
end
|
1820
|
-
if update_frequency_minutes
|
1931
|
+
if !update_frequency_minutes.blank?
|
1821
1932
|
@time_to_live = @time_to_live +
|
1822
1933
|
update_frequency_minutes.to_i.minute
|
1823
1934
|
end
|
1824
|
-
if update_frequency_seconds
|
1935
|
+
if !update_frequency_seconds.blank?
|
1825
1936
|
@time_to_live = @time_to_live + update_frequency_seconds.to_i
|
1826
1937
|
end
|
1827
1938
|
if @time_to_live == 0
|
@@ -1851,18 +1962,23 @@ module FeedTools
|
|
1851
1962
|
# Returns the feed's cloud
|
1852
1963
|
def cloud
|
1853
1964
|
if @cloud.nil?
|
1854
|
-
@cloud = FeedTools::
|
1855
|
-
@cloud.domain = try_xpaths(
|
1965
|
+
@cloud = FeedTools::Cloud.new
|
1966
|
+
@cloud.domain = FeedTools::XmlHelper.try_xpaths(
|
1967
|
+
self.channel_node, ["cloud/@domain"],
|
1856
1968
|
:select_result_value => true)
|
1857
|
-
@cloud.port = try_xpaths(
|
1969
|
+
@cloud.port = FeedTools::XmlHelper.try_xpaths(
|
1970
|
+
self.channel_node, ["cloud/@port"],
|
1858
1971
|
:select_result_value => true)
|
1859
|
-
@cloud.path = try_xpaths(
|
1972
|
+
@cloud.path = FeedTools::XmlHelper.try_xpaths(
|
1973
|
+
self.channel_node, ["cloud/@path"],
|
1860
1974
|
:select_result_value => true)
|
1861
1975
|
@cloud.register_procedure =
|
1862
|
-
try_xpaths(
|
1976
|
+
FeedTools::XmlHelper.try_xpaths(
|
1977
|
+
self.channel_node, ["cloud/@registerProcedure"],
|
1863
1978
|
:select_result_value => true)
|
1864
1979
|
@cloud.protocol =
|
1865
|
-
try_xpaths(
|
1980
|
+
FeedTools::XmlHelper.try_xpaths(
|
1981
|
+
self.channel_node, ["cloud/@protocol"],
|
1866
1982
|
:select_result_value => true)
|
1867
1983
|
@cloud.protocol.downcase unless @cloud.protocol.nil?
|
1868
1984
|
@cloud.port = @cloud.port.to_s.to_i
|
@@ -1879,14 +1995,23 @@ module FeedTools
|
|
1879
1995
|
# Returns the feed generator
|
1880
1996
|
def generator
|
1881
1997
|
if @generator.nil?
|
1882
|
-
@generator = try_xpaths(
|
1998
|
+
@generator = FeedTools::XmlHelper.try_xpaths(
|
1999
|
+
self.channel_node, ["generator/text()"],
|
1883
2000
|
:select_result_value => true)
|
1884
|
-
|
2001
|
+
unless @generator.nil?
|
2002
|
+
@generator =
|
2003
|
+
FeedTools::HtmlHelper.convert_html_to_plain_text(@generator)
|
2004
|
+
end
|
1885
2005
|
end
|
1886
2006
|
return @generator
|
1887
2007
|
end
|
1888
2008
|
|
1889
2009
|
# Sets the feed generator
|
2010
|
+
#
|
2011
|
+
# Note: Setting this variable will NOT cause this to appear in any
|
2012
|
+
# generated output. The generator string is created from the
|
2013
|
+
# <tt>:generator_name</tt> and <tt>:generator_href</tt> configuration
|
2014
|
+
# variables.
|
1890
2015
|
def generator=(new_generator)
|
1891
2016
|
@generator = new_generator
|
1892
2017
|
end
|
@@ -1894,9 +2019,24 @@ module FeedTools
|
|
1894
2019
|
# Returns the feed docs
|
1895
2020
|
def docs
|
1896
2021
|
if @docs.nil?
|
1897
|
-
@docs = try_xpaths(
|
2022
|
+
@docs = FeedTools::XmlHelper.try_xpaths(
|
2023
|
+
self.channel_node, ["docs/text()"],
|
1898
2024
|
:select_result_value => true)
|
1899
|
-
|
2025
|
+
begin
|
2026
|
+
if !(@docs =~ /^file:/) &&
|
2027
|
+
!FeedTools::UriHelper.is_uri?(@docs)
|
2028
|
+
channel_base_uri = nil
|
2029
|
+
unless self.channel_node.nil?
|
2030
|
+
channel_base_uri = self.channel_node.base_uri
|
2031
|
+
end
|
2032
|
+
@docs = FeedTools::UriHelper.resolve_relative_uri(
|
2033
|
+
@docs, [channel_base_uri, self.base_uri])
|
2034
|
+
end
|
2035
|
+
rescue
|
2036
|
+
end
|
2037
|
+
if FeedTools.configurations[:url_normalization_enabled]
|
2038
|
+
@docs = FeedTools::UriHelper.normalize_url(@docs)
|
2039
|
+
end
|
1900
2040
|
end
|
1901
2041
|
return @docs
|
1902
2042
|
end
|
@@ -1909,15 +2049,15 @@ module FeedTools
|
|
1909
2049
|
# Returns the feed language
|
1910
2050
|
def language
|
1911
2051
|
if @language.nil?
|
1912
|
-
@language = select_not_blank([
|
1913
|
-
try_xpaths(self.channel_node, [
|
2052
|
+
@language = FeedTools::XmlHelper.select_not_blank([
|
2053
|
+
FeedTools::XmlHelper.try_xpaths(self.channel_node, [
|
1914
2054
|
"language/text()",
|
1915
2055
|
"dc:language/text()",
|
1916
2056
|
"@dc:language",
|
1917
2057
|
"@xml:lang",
|
1918
2058
|
"xml:lang/text()"
|
1919
2059
|
], :select_result_value => true),
|
1920
|
-
try_xpaths(self.root_node, [
|
2060
|
+
FeedTools::XmlHelper.try_xpaths(self.root_node, [
|
1921
2061
|
"@xml:lang",
|
1922
2062
|
"xml:lang/text()"
|
1923
2063
|
], :select_result_value => true)
|
@@ -1938,7 +2078,7 @@ module FeedTools
|
|
1938
2078
|
# Returns true if this feed contains explicit material.
|
1939
2079
|
def explicit?
|
1940
2080
|
if @explicit.nil?
|
1941
|
-
explicit_string = try_xpaths(self.channel_node, [
|
2081
|
+
explicit_string = FeedTools::XmlHelper.try_xpaths(self.channel_node, [
|
1942
2082
|
"media:adult/text()",
|
1943
2083
|
"itunes:explicit/text()"
|
1944
2084
|
], :select_result_value => true)
|
@@ -1958,24 +2098,32 @@ module FeedTools
|
|
1958
2098
|
|
1959
2099
|
# Returns the feed entries
|
1960
2100
|
def entries
|
1961
|
-
if @entries.
|
1962
|
-
raw_entries = select_not_blank([
|
1963
|
-
try_xpaths_all(self.channel_node, [
|
2101
|
+
if @entries.nil?
|
2102
|
+
raw_entries = FeedTools::XmlHelper.select_not_blank([
|
2103
|
+
FeedTools::XmlHelper.try_xpaths_all(self.channel_node, [
|
1964
2104
|
"atom10:entry",
|
1965
2105
|
"atom03:entry",
|
1966
2106
|
"atom:entry",
|
1967
2107
|
"entry"
|
1968
2108
|
]),
|
1969
|
-
try_xpaths_all(self.root_node, [
|
2109
|
+
FeedTools::XmlHelper.try_xpaths_all(self.root_node, [
|
1970
2110
|
"rss10:item",
|
2111
|
+
"rss11:items/rss11:item",
|
2112
|
+
"rss11:items/item",
|
2113
|
+
"items/rss11:item",
|
2114
|
+
"items/item",
|
1971
2115
|
"item",
|
1972
2116
|
"atom10:entry",
|
1973
2117
|
"atom03:entry",
|
1974
2118
|
"atom:entry",
|
1975
2119
|
"entry"
|
1976
2120
|
]),
|
1977
|
-
try_xpaths_all(self.channel_node, [
|
2121
|
+
FeedTools::XmlHelper.try_xpaths_all(self.channel_node, [
|
1978
2122
|
"rss10:item",
|
2123
|
+
"rss11:items/rss11:item",
|
2124
|
+
"rss11:items/item",
|
2125
|
+
"items/rss11:item",
|
2126
|
+
"items/item",
|
1979
2127
|
"item"
|
1980
2128
|
])
|
1981
2129
|
])
|
@@ -1987,14 +2135,27 @@ module FeedTools
|
|
1987
2135
|
new_entry = FeedItem.new
|
1988
2136
|
new_entry.feed_data = entry_node.to_s
|
1989
2137
|
new_entry.feed_data_type = self.feed_data_type
|
2138
|
+
new_entry.root_node = entry_node
|
2139
|
+
if new_entry.root_node.namespace.blank?
|
2140
|
+
new_entry.root_node.add_namespace(self.root_node.namespace)
|
2141
|
+
end
|
1990
2142
|
@entries << new_entry
|
1991
2143
|
end
|
1992
2144
|
end
|
1993
2145
|
end
|
1994
2146
|
|
1995
2147
|
# Sort the items
|
1996
|
-
|
1997
|
-
|
2148
|
+
if FeedTools.configurations[:entry_sorting_property] == "time"
|
2149
|
+
@entries = @entries.sort do |a, b|
|
2150
|
+
(b.time or Time.utc(1970)) <=> (a.time or Time.utc(1970))
|
2151
|
+
end
|
2152
|
+
elsif FeedTools.configurations[:entry_sorting_property] != nil
|
2153
|
+
sorting_property = FeedTools.configurations[:entry_sorting_property]
|
2154
|
+
@entries = @entries.sort do |a, b|
|
2155
|
+
eval("a.#{sorting_property}") <=> eval("b.#{sorting_property}")
|
2156
|
+
end
|
2157
|
+
else
|
2158
|
+
@entries = @entries.reverse
|
1998
2159
|
end
|
1999
2160
|
return @entries
|
2000
2161
|
end
|
@@ -2090,58 +2251,73 @@ module FeedTools
|
|
2090
2251
|
end
|
2091
2252
|
|
2092
2253
|
# Generates xml based on the content of the feed
|
2093
|
-
def build_xml(feed_type=(self.feed_type or "atom"),
|
2254
|
+
def build_xml(feed_type=(self.feed_type or "atom"), feed_version=nil,
|
2094
2255
|
xml_builder=Builder::XmlMarkup.new(
|
2095
2256
|
:indent => 2, :escape_attrs => false))
|
2096
2257
|
xml_builder.instruct! :xml, :version => "1.0",
|
2097
2258
|
:encoding => (FeedTools.configurations[:output_encoding] or "utf-8")
|
2098
|
-
if feed_type
|
2099
|
-
|
2100
|
-
|
2101
|
-
|
2259
|
+
if feed_type.nil?
|
2260
|
+
feed_type = self.feed_type
|
2261
|
+
end
|
2262
|
+
if feed_version.nil?
|
2263
|
+
feed_version = self.feed_version
|
2102
2264
|
end
|
2103
|
-
if feed_type == "rss" &&
|
2104
|
-
|
2265
|
+
if feed_type == "rss" &&
|
2266
|
+
(feed_version == nil || feed_version <= 0.0)
|
2267
|
+
feed_version = 1.0
|
2268
|
+
elsif feed_type == "atom" &&
|
2269
|
+
(feed_version == nil || feed_version <= 0.0)
|
2270
|
+
feed_version = 1.0
|
2271
|
+
end
|
2272
|
+
if feed_type == "rss" &&
|
2273
|
+
(feed_version == 0.9 || feed_version == 1.0 || feed_version == 1.1)
|
2105
2274
|
# RDF-based rss format
|
2106
2275
|
return xml_builder.tag!("rdf:RDF",
|
2107
2276
|
"xmlns" => FEED_TOOLS_NAMESPACES['rss10'],
|
2277
|
+
"xmlns:content" => FEED_TOOLS_NAMESPACES['content'],
|
2108
2278
|
"xmlns:rdf" => FEED_TOOLS_NAMESPACES['rdf'],
|
2109
2279
|
"xmlns:dc" => FEED_TOOLS_NAMESPACES['dc'],
|
2110
2280
|
"xmlns:syn" => FEED_TOOLS_NAMESPACES['syn'],
|
2281
|
+
"xmlns:admin" => FEED_TOOLS_NAMESPACES['admin'],
|
2111
2282
|
"xmlns:taxo" => FEED_TOOLS_NAMESPACES['taxo'],
|
2112
2283
|
"xmlns:itunes" => FEED_TOOLS_NAMESPACES['itunes'],
|
2113
2284
|
"xmlns:media" => FEED_TOOLS_NAMESPACES['media']) do
|
2114
2285
|
channel_attributes = {}
|
2115
2286
|
unless self.link.nil?
|
2116
2287
|
channel_attributes["rdf:about"] =
|
2117
|
-
FeedTools.escape_entities(self.link)
|
2288
|
+
FeedTools::HtmlHelper.escape_entities(self.link)
|
2118
2289
|
end
|
2119
2290
|
xml_builder.channel(channel_attributes) do
|
2120
|
-
unless title.
|
2121
|
-
xml_builder.title(
|
2291
|
+
unless self.title.blank?
|
2292
|
+
xml_builder.title(
|
2293
|
+
FeedTools::HtmlHelper.strip_html_tags(self.title))
|
2122
2294
|
else
|
2123
2295
|
xml_builder.title
|
2124
2296
|
end
|
2125
|
-
unless link.
|
2126
|
-
xml_builder.link(link)
|
2297
|
+
unless self.link.blank?
|
2298
|
+
xml_builder.link(self.link)
|
2127
2299
|
else
|
2128
2300
|
xml_builder.link
|
2129
2301
|
end
|
2130
|
-
unless images.
|
2131
|
-
xml_builder.image("rdf:resource" =>
|
2132
|
-
|
2302
|
+
unless images.blank?
|
2303
|
+
xml_builder.image("rdf:resource" =>
|
2304
|
+
FeedTools::HtmlHelper.escape_entities(
|
2305
|
+
images.first.url))
|
2133
2306
|
end
|
2134
2307
|
unless description.nil? || description == ""
|
2135
2308
|
xml_builder.description(description)
|
2136
2309
|
else
|
2137
2310
|
xml_builder.description
|
2138
2311
|
end
|
2139
|
-
unless language.
|
2140
|
-
xml_builder.tag!("dc:language", language)
|
2312
|
+
unless self.language.blank?
|
2313
|
+
xml_builder.tag!("dc:language", self.language)
|
2314
|
+
end
|
2315
|
+
unless self.rights.blank?
|
2316
|
+
xml_builder.tag!("dc:rights", self.rights)
|
2141
2317
|
end
|
2142
2318
|
xml_builder.tag!("syn:updatePeriod", "hourly")
|
2143
2319
|
xml_builder.tag!("syn:updateFrequency",
|
2144
|
-
(time_to_live / 1.hour).to_s)
|
2320
|
+
(self.time_to_live / 1.hour).to_s)
|
2145
2321
|
xml_builder.tag!("syn:updateBase", Time.mktime(1970).iso8601)
|
2146
2322
|
xml_builder.items do
|
2147
2323
|
xml_builder.tag!("rdf:Seq") do
|
@@ -2152,14 +2328,17 @@ module FeedTools
|
|
2152
2328
|
"item link field."
|
2153
2329
|
end
|
2154
2330
|
xml_builder.tag!("rdf:li", "rdf:resource" =>
|
2155
|
-
FeedTools.escape_entities(item.link))
|
2331
|
+
FeedTools::HtmlHelper.escape_entities(item.link))
|
2156
2332
|
end
|
2157
2333
|
end
|
2158
2334
|
end
|
2159
2335
|
end
|
2160
|
-
|
2336
|
+
xml_builder.tag!(
|
2337
|
+
"admin:generatorAgent",
|
2338
|
+
"rdf:resource" => FeedTools.configurations[:generator_href])
|
2339
|
+
build_xml_hook(feed_type, feed_version, xml_builder)
|
2161
2340
|
end
|
2162
|
-
unless
|
2341
|
+
unless self.images.blank?
|
2163
2342
|
best_image = nil
|
2164
2343
|
for image in self.images
|
2165
2344
|
if image.link != nil
|
@@ -2167,9 +2346,9 @@ module FeedTools
|
|
2167
2346
|
break
|
2168
2347
|
end
|
2169
2348
|
end
|
2170
|
-
best_image = images.first if best_image.nil?
|
2171
|
-
xml_builder.image(
|
2172
|
-
|
2349
|
+
best_image = self.images.first if best_image.nil?
|
2350
|
+
xml_builder.image("rdf:about" =>
|
2351
|
+
FeedTools::HtmlHelper.escape_entities(best_image.url)) do
|
2173
2352
|
if !best_image.title.blank?
|
2174
2353
|
xml_builder.title(best_image.title)
|
2175
2354
|
elsif !self.title.blank?
|
@@ -2191,13 +2370,14 @@ module FeedTools
|
|
2191
2370
|
end
|
2192
2371
|
unless items.nil?
|
2193
2372
|
for item in items
|
2194
|
-
item.build_xml(feed_type,
|
2373
|
+
item.build_xml(feed_type, feed_version, xml_builder)
|
2195
2374
|
end
|
2196
2375
|
end
|
2197
2376
|
end
|
2198
2377
|
elsif feed_type == "rss"
|
2199
2378
|
# normal rss format
|
2200
2379
|
return xml_builder.rss("version" => "2.0",
|
2380
|
+
"xmlns:content" => FEED_TOOLS_NAMESPACES['content'],
|
2201
2381
|
"xmlns:rdf" => FEED_TOOLS_NAMESPACES['rdf'],
|
2202
2382
|
"xmlns:dc" => FEED_TOOLS_NAMESPACES['dc'],
|
2203
2383
|
"xmlns:taxo" => FEED_TOOLS_NAMESPACES['taxo'],
|
@@ -2205,29 +2385,41 @@ module FeedTools
|
|
2205
2385
|
"xmlns:itunes" => FEED_TOOLS_NAMESPACES['itunes'],
|
2206
2386
|
"xmlns:media" => FEED_TOOLS_NAMESPACES['media']) do
|
2207
2387
|
xml_builder.channel do
|
2208
|
-
unless title.blank?
|
2209
|
-
xml_builder.title(
|
2388
|
+
unless self.title.blank?
|
2389
|
+
xml_builder.title(
|
2390
|
+
FeedTools::HtmlHelper.strip_html_tags(self.title))
|
2210
2391
|
end
|
2211
|
-
unless link.blank?
|
2392
|
+
unless self.link.blank?
|
2212
2393
|
xml_builder.link(link)
|
2213
2394
|
end
|
2214
|
-
unless description.blank?
|
2395
|
+
unless self.description.blank?
|
2215
2396
|
xml_builder.description(description)
|
2397
|
+
else
|
2398
|
+
xml_builder.description
|
2399
|
+
end
|
2400
|
+
unless self.published.blank?
|
2401
|
+
xml_builder.pubDate(self.published.rfc822)
|
2402
|
+
end
|
2403
|
+
unless self.updated.blank?
|
2404
|
+
xml_builder.lastBuildDate(self.updated.rfc822)
|
2405
|
+
end
|
2406
|
+
unless self.copyright.blank?
|
2407
|
+
xml_builder.copyright(self.copyright)
|
2216
2408
|
end
|
2217
2409
|
xml_builder.ttl((time_to_live / 1.minute).to_s)
|
2218
2410
|
xml_builder.generator(
|
2219
2411
|
FeedTools.configurations[:generator_href])
|
2220
|
-
build_xml_hook(feed_type,
|
2412
|
+
build_xml_hook(feed_type, feed_version, xml_builder)
|
2221
2413
|
unless items.nil?
|
2222
2414
|
for item in items
|
2223
|
-
item.build_xml(feed_type,
|
2415
|
+
item.build_xml(feed_type, feed_version, xml_builder)
|
2224
2416
|
end
|
2225
2417
|
end
|
2226
2418
|
end
|
2227
2419
|
end
|
2228
|
-
elsif feed_type == "atom" &&
|
2420
|
+
elsif feed_type == "atom" && feed_version == 0.3
|
2229
2421
|
raise "Atom 0.3 is obsolete."
|
2230
|
-
elsif feed_type == "atom" &&
|
2422
|
+
elsif feed_type == "atom" && feed_version == 1.0
|
2231
2423
|
# normal atom format
|
2232
2424
|
return xml_builder.feed("xmlns" => FEED_TOOLS_NAMESPACES['atom10'],
|
2233
2425
|
"xml:lang" => language) do
|
@@ -2248,18 +2440,18 @@ module FeedTools
|
|
2248
2440
|
xml_builder.uri(self.author.url)
|
2249
2441
|
end
|
2250
2442
|
end
|
2251
|
-
unless self.
|
2252
|
-
xml_builder.link("href" => self.
|
2443
|
+
unless self.href.blank?
|
2444
|
+
xml_builder.link("href" => self.href,
|
2253
2445
|
"rel" => "self",
|
2254
2446
|
"type" => "application/atom+xml")
|
2255
2447
|
end
|
2256
2448
|
unless self.link.blank?
|
2257
|
-
xml_builder.link(
|
2258
|
-
|
2259
|
-
|
2260
|
-
|
2449
|
+
xml_builder.link(
|
2450
|
+
"href" =>
|
2451
|
+
FeedTools::HtmlHelper.escape_entities(self.link),
|
2452
|
+
"rel" => "alternate")
|
2261
2453
|
end
|
2262
|
-
unless
|
2454
|
+
unless self.subtitle.blank?
|
2263
2455
|
xml_builder.subtitle(self.subtitle,
|
2264
2456
|
"type" => "html")
|
2265
2457
|
end
|
@@ -2272,12 +2464,15 @@ module FeedTools
|
|
2272
2464
|
else
|
2273
2465
|
xml_builder.updated(Time.now.gmtime.iso8601)
|
2274
2466
|
end
|
2467
|
+
unless self.rights.blank?
|
2468
|
+
xml_builder.rights(self.rights)
|
2469
|
+
end
|
2275
2470
|
xml_builder.generator(FeedTools.configurations[:generator_name] +
|
2276
2471
|
" - " + FeedTools.configurations[:generator_href])
|
2277
2472
|
if self.id != nil
|
2278
|
-
unless FeedTools.is_uri? self.id
|
2473
|
+
unless FeedTools::UriHelper.is_uri? self.id
|
2279
2474
|
if self.link != nil
|
2280
|
-
xml_builder.id(FeedTools.build_urn_uri(self.link))
|
2475
|
+
xml_builder.id(FeedTools::UriHelper.build_urn_uri(self.link))
|
2281
2476
|
else
|
2282
2477
|
raise "The unique id must be a valid URI."
|
2283
2478
|
end
|
@@ -2285,14 +2480,14 @@ module FeedTools
|
|
2285
2480
|
xml_builder.id(self.id)
|
2286
2481
|
end
|
2287
2482
|
elsif self.link != nil
|
2288
|
-
xml_builder.id(FeedTools.build_urn_uri(self.link))
|
2483
|
+
xml_builder.id(FeedTools::UriHelper.build_urn_uri(self.link))
|
2289
2484
|
else
|
2290
2485
|
raise "Cannot build feed, missing feed unique id."
|
2291
2486
|
end
|
2292
|
-
build_xml_hook(feed_type,
|
2487
|
+
build_xml_hook(feed_type, feed_version, xml_builder)
|
2293
2488
|
unless items.nil?
|
2294
2489
|
for item in items
|
2295
|
-
item.build_xml(feed_type,
|
2490
|
+
item.build_xml(feed_type, feed_version, xml_builder)
|
2296
2491
|
end
|
2297
2492
|
end
|
2298
2493
|
end
|
@@ -2303,15 +2498,15 @@ module FeedTools
|
|
2303
2498
|
|
2304
2499
|
# Persists the current feed state to the cache.
|
2305
2500
|
def save
|
2306
|
-
unless self.
|
2501
|
+
unless self.href =~ /^file:\/\//
|
2307
2502
|
if FeedTools.feed_cache.nil?
|
2308
2503
|
raise "Caching is currently disabled. Cannot save to cache."
|
2309
|
-
elsif self.
|
2504
|
+
elsif self.href.nil?
|
2310
2505
|
raise "The url field must be set to save to the cache."
|
2311
2506
|
elsif self.cache_object.nil?
|
2312
2507
|
raise "The cache_object is currently nil. Cannot save to cache."
|
2313
2508
|
else
|
2314
|
-
self.cache_object.
|
2509
|
+
self.cache_object.href = self.href
|
2315
2510
|
unless self.feed_data.nil?
|
2316
2511
|
self.cache_object.title = self.title
|
2317
2512
|
self.cache_object.link = self.link
|
@@ -2324,15 +2519,17 @@ module FeedTools
|
|
2324
2519
|
end
|
2325
2520
|
end
|
2326
2521
|
end
|
2327
|
-
|
2522
|
+
|
2523
|
+
alias_method :url, :href
|
2524
|
+
alias_method :url=, :href=
|
2328
2525
|
alias_method :tagline, :subtitle
|
2329
2526
|
alias_method :tagline=, :subtitle=
|
2330
2527
|
alias_method :description, :subtitle
|
2331
2528
|
alias_method :description=, :subtitle=
|
2332
2529
|
alias_method :abstract, :subtitle
|
2333
2530
|
alias_method :abstract=, :subtitle=
|
2334
|
-
alias_method :
|
2335
|
-
alias_method :
|
2531
|
+
alias_method :copyright, :rights
|
2532
|
+
alias_method :copyright=, :rights=
|
2336
2533
|
alias_method :ttl, :time_to_live
|
2337
2534
|
alias_method :ttl=, :time_to_live=
|
2338
2535
|
alias_method :guid, :id
|
@@ -2362,7 +2559,12 @@ module FeedTools
|
|
2362
2559
|
|
2363
2560
|
# Returns a simple representation of the feed object's state.
|
2364
2561
|
def inspect
|
2365
|
-
return "#<FeedTools::Feed:0x#{self.object_id.to_s(16)} URL:#{self.
|
2562
|
+
return "#<FeedTools::Feed:0x#{self.object_id.to_s(16)} URL:#{self.href}>"
|
2563
|
+
end
|
2564
|
+
|
2565
|
+
# Allows sorting feeds by title
|
2566
|
+
def <=>(other_feed)
|
2567
|
+
return self.title.to_s <=> other_feed.title.to_s
|
2366
2568
|
end
|
2367
2569
|
end
|
2368
2570
|
end
|