feedtools 0.2.22 → 0.2.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +28 -0
- data/README +23 -2
- data/db/migration.rb +19 -0
- data/db/schema.mysql.sql +1 -1
- data/db/schema.postgresql.sql +1 -1
- data/db/schema.sqlite.sql +1 -1
- data/lib/feed_tools.rb +71 -388
- data/lib/feed_tools/database_feed_cache.rb +4 -3
- data/lib/feed_tools/feed.rb +809 -607
- data/lib/feed_tools/feed_item.rb +551 -574
- data/lib/feed_tools/feed_structures.rb +252 -0
- data/lib/feed_tools/helpers/feed_tools_helper.rb +6 -5
- data/lib/feed_tools/helpers/generic_helper.rb +16 -158
- data/lib/feed_tools/helpers/html_helper.rb +629 -0
- data/lib/feed_tools/helpers/retrieval_helper.rb +5 -0
- data/lib/feed_tools/helpers/uri_helper.rb +223 -0
- data/lib/feed_tools/helpers/xml_helper.rb +239 -0
- data/rakefile +10 -237
- data/test/unit/amp_test.rb +102 -94
- data/test/unit/atom_test.rb +239 -6
- data/test/unit/cache_test.rb +1 -1
- data/test/unit/encoding_test.rb +5 -5
- data/test/unit/generation_test.rb +34 -1
- data/test/unit/helper_test.rb +111 -17
- data/test/unit/rss_test.rb +21 -2
- metadata +7 -3
- data/lib/feed_tools/helpers/module_helper.rb +0 -27
@@ -31,7 +31,7 @@ module FeedTools
|
|
31
31
|
# The default caching mechanism for the FeedTools module
|
32
32
|
class DatabaseFeedCache < ActiveRecord::Base
|
33
33
|
# Overrides the default table name to use the "feeds" table.
|
34
|
-
|
34
|
+
set_table_name("cached_feeds")
|
35
35
|
|
36
36
|
# If ActiveRecord is not already connected, attempts to find a configuration file and use
|
37
37
|
# it to open a connection for ActiveRecord.
|
@@ -53,7 +53,8 @@ module FeedTools
|
|
53
53
|
"./config/database.yml",
|
54
54
|
"../config/database.yml",
|
55
55
|
"./database.yml",
|
56
|
-
"../database.yml"
|
56
|
+
"../database.yml",
|
57
|
+
"../../database.yml"
|
57
58
|
]
|
58
59
|
database_config_file = nil
|
59
60
|
for file in possible_config_files
|
@@ -108,7 +109,7 @@ module FeedTools
|
|
108
109
|
# True if the appropriate database table already exists
|
109
110
|
def DatabaseFeedCache.table_exists?
|
110
111
|
begin
|
111
|
-
ActiveRecord::Base.connection.execute "select id,
|
112
|
+
ActiveRecord::Base.connection.execute "select id, href, title, " +
|
112
113
|
"link, feed_data, feed_data_type, http_headers, last_retrieved " +
|
113
114
|
"from #{self.table_name()} limit 1"
|
114
115
|
rescue ActiveRecord::StatementInvalid
|
data/lib/feed_tools/feed.rb
CHANGED
@@ -21,124 +21,27 @@
|
|
21
21
|
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
22
|
#++
|
23
23
|
|
24
|
+
require 'rexml/document'
|
25
|
+
require 'feed_tools/feed_item'
|
26
|
+
require 'feed_tools/feed_structures'
|
24
27
|
require 'feed_tools/helpers/generic_helper'
|
28
|
+
require 'feed_tools/helpers/xml_helper'
|
29
|
+
require 'feed_tools/helpers/html_helper'
|
25
30
|
|
26
31
|
module FeedTools
|
27
32
|
# The <tt>FeedTools::Feed</tt> class represents a web feed's structure.
|
28
33
|
class Feed
|
29
|
-
# :stopdoc:
|
30
|
-
include REXML
|
31
|
-
class << self
|
32
|
-
include FeedTools::GenericHelper
|
33
|
-
private :validate_options
|
34
|
-
end
|
35
|
-
include FeedTools::GenericHelper
|
36
|
-
private :validate_options
|
37
|
-
# :startdoc:
|
38
|
-
|
39
|
-
# Represents a feed/feed item's category
|
40
|
-
class Category
|
41
|
-
|
42
|
-
# The category term value
|
43
|
-
attr_accessor :term
|
44
|
-
# The categorization scheme
|
45
|
-
attr_accessor :scheme
|
46
|
-
# A human-readable description of the category
|
47
|
-
attr_accessor :label
|
48
|
-
|
49
|
-
alias_method :value, :term
|
50
|
-
alias_method :category, :term
|
51
|
-
alias_method :domain, :scheme
|
52
|
-
end
|
53
|
-
|
54
|
-
# Represents a feed/feed item's author
|
55
|
-
class Author
|
56
|
-
|
57
|
-
# The author's real name
|
58
|
-
attr_accessor :name
|
59
|
-
# The author's email address
|
60
|
-
attr_accessor :email
|
61
|
-
# The url of the author's homepage
|
62
|
-
attr_accessor :url
|
63
|
-
# The raw value of the author tag if present
|
64
|
-
attr_accessor :raw
|
65
|
-
end
|
66
|
-
|
67
|
-
# Represents a feed's image
|
68
|
-
class Image
|
69
|
-
|
70
|
-
# The image's title
|
71
|
-
attr_accessor :title
|
72
|
-
# The image's description
|
73
|
-
attr_accessor :description
|
74
|
-
# The image's url
|
75
|
-
attr_accessor :url
|
76
|
-
# The url to link the image to
|
77
|
-
attr_accessor :link
|
78
|
-
# The width of the image
|
79
|
-
attr_accessor :width
|
80
|
-
# The height of the image
|
81
|
-
attr_accessor :height
|
82
|
-
# The style of the image
|
83
|
-
# Possible values are "icon", "image", or "image-wide"
|
84
|
-
attr_accessor :style
|
85
|
-
end
|
86
|
-
|
87
|
-
# Represents a feed's text input element.
|
88
|
-
# Be aware that this will be ignored for feed generation. It's a
|
89
|
-
# pointless element that aggregators usually ignore and it doesn't have an
|
90
|
-
# equivalent in all feeds types.
|
91
|
-
class TextInput
|
92
|
-
|
93
|
-
# The label of the Submit button in the text input area.
|
94
|
-
attr_accessor :title
|
95
|
-
# The description explains the text input area.
|
96
|
-
attr_accessor :description
|
97
|
-
# The URL of the CGI script that processes text input requests.
|
98
|
-
attr_accessor :link
|
99
|
-
# The name of the text object in the text input area.
|
100
|
-
attr_accessor :name
|
101
|
-
end
|
102
|
-
|
103
|
-
# Represents a feed's cloud.
|
104
|
-
# Be aware that this will be ignored for feed generation.
|
105
|
-
class Cloud
|
106
|
-
|
107
|
-
# The domain of the cloud.
|
108
|
-
attr_accessor :domain
|
109
|
-
# The path for the cloud.
|
110
|
-
attr_accessor :path
|
111
|
-
# The port the cloud is listening on.
|
112
|
-
attr_accessor :port
|
113
|
-
# The web services protocol the cloud uses.
|
114
|
-
# Possible values are either "xml-rpc" or "soap".
|
115
|
-
attr_accessor :protocol
|
116
|
-
# The procedure to use to request notification.
|
117
|
-
attr_accessor :register_procedure
|
118
|
-
end
|
119
|
-
|
120
|
-
# Represents a simple hyperlink
|
121
|
-
class Link
|
122
|
-
|
123
|
-
# The url that is being linked to
|
124
|
-
attr_accessor :url
|
125
|
-
# The content of the hyperlink
|
126
|
-
attr_accessor :value
|
127
|
-
|
128
|
-
alias_method :href, :url
|
129
|
-
end
|
130
|
-
|
131
34
|
# Initialize the feed object
|
132
35
|
def initialize
|
133
36
|
super
|
134
37
|
@cache_object = nil
|
135
38
|
@http_headers = nil
|
136
|
-
@
|
39
|
+
@xml_document = nil
|
137
40
|
@feed_data = nil
|
138
41
|
@feed_data_type = :xml
|
139
42
|
@root_node = nil
|
140
43
|
@channel_node = nil
|
141
|
-
@
|
44
|
+
@href = nil
|
142
45
|
@id = nil
|
143
46
|
@title = nil
|
144
47
|
@description = nil
|
@@ -147,6 +50,7 @@ module FeedTools
|
|
147
50
|
@time_to_live = nil
|
148
51
|
@entries = nil
|
149
52
|
@live = false
|
53
|
+
@encoding = nil
|
150
54
|
end
|
151
55
|
|
152
56
|
# Loads the feed specified by the url, pulling the data from the
|
@@ -155,7 +59,7 @@ module FeedTools
|
|
155
59
|
# * <tt>:cache_only</tt> - If set to true, the feed will only be
|
156
60
|
# pulled from the cache.
|
157
61
|
def Feed.open(url, options={})
|
158
|
-
validate_options([ :cache_only ],
|
62
|
+
FeedTools::GenericHelper.validate_options([ :cache_only ],
|
159
63
|
options.keys)
|
160
64
|
options = { :cache_only => false }.merge(options)
|
161
65
|
|
@@ -165,17 +69,17 @@ module FeedTools
|
|
165
69
|
end
|
166
70
|
|
167
71
|
# clean up the url
|
168
|
-
url = FeedTools.normalize_url(url)
|
72
|
+
url = FeedTools::UriHelper.normalize_url(url)
|
169
73
|
|
170
74
|
# create and load the new feed
|
171
75
|
feed = FeedTools::Feed.new
|
172
|
-
feed.
|
76
|
+
feed.href = url
|
173
77
|
feed.update! unless options[:cache_only]
|
174
78
|
return feed
|
175
79
|
end
|
176
80
|
|
177
|
-
# Loads the feed from the remote url if the feed has expired from the
|
178
|
-
# retrieved from the cache for some reason.
|
81
|
+
# Loads the feed from the remote url if the feed has expired from the
|
82
|
+
# cache or cannot be retrieved from the cache for some reason.
|
179
83
|
def update!
|
180
84
|
if !FeedTools.feed_cache.nil? &&
|
181
85
|
!FeedTools.feed_cache.set_up_correctly?
|
@@ -193,6 +97,32 @@ module FeedTools
|
|
193
97
|
@live = false
|
194
98
|
else
|
195
99
|
load_remote_feed!
|
100
|
+
|
101
|
+
# Handle autodiscovery
|
102
|
+
if self.http_headers['content-type'] =~ /text\/html/ ||
|
103
|
+
self.http_headers['content-type'] =~ /application\/xhtml\+xml/
|
104
|
+
|
105
|
+
autodiscovered_url = nil
|
106
|
+
autodiscovered_url =
|
107
|
+
FeedTools::HtmlHelper.extract_link_by_mime_type(self.feed_data,
|
108
|
+
"application/atom+xml")
|
109
|
+
if autodiscovered_url.nil?
|
110
|
+
autodiscovered_url =
|
111
|
+
FeedTools::HtmlHelper.extract_link_by_mime_type(self.feed_data,
|
112
|
+
"application/rss+xml")
|
113
|
+
end
|
114
|
+
if autodiscovered_url.nil?
|
115
|
+
autodiscovered_url =
|
116
|
+
FeedTools::HtmlHelper.extract_link_by_mime_type(self.feed_data,
|
117
|
+
"application/rdf+xml")
|
118
|
+
end
|
119
|
+
unless autodiscovered_url.nil?
|
120
|
+
self.feed_data = nil
|
121
|
+
self.href = autodiscovered_url
|
122
|
+
self.expire! unless self.cache_object.nil?
|
123
|
+
self.update!
|
124
|
+
end
|
125
|
+
end
|
196
126
|
end
|
197
127
|
end
|
198
128
|
|
@@ -207,20 +137,20 @@ module FeedTools
|
|
207
137
|
@http_headers = YAML.load(self.cache_object.http_headers)
|
208
138
|
end
|
209
139
|
|
210
|
-
if (self.
|
140
|
+
if (self.href =~ /^feed:/) == 0
|
211
141
|
# Woah, Nelly, how'd that happen? You should've already been
|
212
142
|
# corrected. So let's fix that url. And please,
|
213
143
|
# just use less crappy browsers instead of badly defined
|
214
144
|
# pseudo-protocol hacks.
|
215
|
-
self.
|
145
|
+
self.href = FeedTools::UriHelper.normalize_url(self.href)
|
216
146
|
end
|
217
147
|
|
218
148
|
# Find out what method we're going to be using to obtain this feed.
|
219
149
|
begin
|
220
|
-
uri = URI.parse(self.
|
150
|
+
uri = URI.parse(self.href)
|
221
151
|
rescue URI::InvalidURIError
|
222
152
|
raise FeedAccessError,
|
223
|
-
"Cannot retrieve feed using invalid URL: " + self.
|
153
|
+
"Cannot retrieve feed using invalid URL: " + self.href.to_s
|
224
154
|
end
|
225
155
|
retrieval_method = "http"
|
226
156
|
case uri.scheme
|
@@ -262,12 +192,15 @@ module FeedTools
|
|
262
192
|
feed_uri = URI.parse(feed_url)
|
263
193
|
rescue URI::InvalidURIError
|
264
194
|
# Uh, maybe try to fix it?
|
265
|
-
feed_uri = URI.parse(FeedTools.normalize_url(feed_url))
|
195
|
+
feed_uri = URI.parse(FeedTools::UriHelper.normalize_url(feed_url))
|
266
196
|
end
|
267
197
|
|
268
198
|
begin
|
269
|
-
|
270
|
-
|
199
|
+
proxy_address = (FeedTools.configurations[:proxy_address] || nil)
|
200
|
+
proxy_port = (FeedTools.configurations[:proxy_port].to_i || nil)
|
201
|
+
|
202
|
+
http = Net::HTTP::Proxy(proxy_address, proxy_port).new(
|
203
|
+
feed_uri.host, (feed_uri.port or 80))
|
271
204
|
http.start do
|
272
205
|
final_uri = feed_uri.path
|
273
206
|
final_uri += ('?' + feed_uri.query) if feed_uri.query
|
@@ -282,7 +215,7 @@ module FeedTools
|
|
282
215
|
if redirected_response.last.code.to_i == 301
|
283
216
|
# Reset the cache object or we may get duplicate entries
|
284
217
|
self.cache_object = nil
|
285
|
-
self.
|
218
|
+
self.href = redirected_response.last['location']
|
286
219
|
else
|
287
220
|
# Jump out as soon as we hit anything that isn't a
|
288
221
|
# permanently moved redirection.
|
@@ -316,7 +249,7 @@ module FeedTools
|
|
316
249
|
if !cached_feed.expired? &&
|
317
250
|
!cached_feed.http_headers.blank?
|
318
251
|
# Copy the cached state
|
319
|
-
self.
|
252
|
+
self.href = cached_feed.href
|
320
253
|
|
321
254
|
@feed_data = cached_feed.feed_data
|
322
255
|
@feed_data_type = cached_feed.feed_data_type
|
@@ -372,7 +305,7 @@ module FeedTools
|
|
372
305
|
|
373
306
|
begin
|
374
307
|
begin
|
375
|
-
@http_response = http_fetch.call(self.
|
308
|
+
@http_response = http_fetch.call(self.href, headers, 10, [], false)
|
376
309
|
rescue => error
|
377
310
|
if error.respond_to?(:response)
|
378
311
|
# You might not believe this, but...
|
@@ -385,8 +318,8 @@ module FeedTools
|
|
385
318
|
# we get to blame other people's bad software and/or bad
|
386
319
|
# configuration files.
|
387
320
|
if error.response.code.to_i == 404 &&
|
388
|
-
FeedTools.user_agent != nil
|
389
|
-
@http_response = http_fetch.call(self.
|
321
|
+
FeedTools.configurations[:user_agent] != nil
|
322
|
+
@http_response = http_fetch.call(self.href, {}, 10, [], true)
|
390
323
|
if @http_response != nil && @http_response.code.to_i == 200
|
391
324
|
warn("The server appears to be blocking based on the " +
|
392
325
|
"User-Agent header. This is stupid, and you should " +
|
@@ -464,14 +397,18 @@ module FeedTools
|
|
464
397
|
# Not supported... yet
|
465
398
|
elsif retrieval_method == "ftp"
|
466
399
|
# Not supported... yet
|
467
|
-
# Technically, CDF feeds are supposed to be able to be accessed
|
468
|
-
# from an ftp server. This is silly, but we'll humor
|
400
|
+
# Technically, CDF feeds are supposed to be able to be accessed
|
401
|
+
# directly from an ftp server. This is silly, but we'll humor
|
402
|
+
# Microsoft.
|
469
403
|
#
|
470
|
-
# Eventually.
|
404
|
+
# Eventually. If they're lucky. And someone demands it.
|
471
405
|
elsif retrieval_method == "file"
|
472
406
|
# Now that we've gone to all that trouble to ensure the url begins
|
473
407
|
# with 'file://', strip the 'file://' off the front of the url.
|
474
|
-
file_name = self.
|
408
|
+
file_name = self.href.gsub(/^file:\/\//, "")
|
409
|
+
if RUBY_PLATFORM =~ /mswin/
|
410
|
+
file_name = file_name[1..-1] if file_name[1..1] == "/"
|
411
|
+
end
|
475
412
|
begin
|
476
413
|
open(file_name) do |file|
|
477
414
|
@http_response = nil
|
@@ -520,7 +457,7 @@ module FeedTools
|
|
520
457
|
unless self.http_headers.blank?
|
521
458
|
@encoding = "utf-8"
|
522
459
|
else
|
523
|
-
@encoding = self.
|
460
|
+
@encoding = self.encoding_from_feed_data
|
524
461
|
end
|
525
462
|
end
|
526
463
|
return @encoding
|
@@ -528,8 +465,8 @@ module FeedTools
|
|
528
465
|
|
529
466
|
# Returns the encoding of feed calculated only from the xml data.
|
530
467
|
# I.e., the encoding we would come up with if we ignore RFC 3023.
|
531
|
-
def
|
532
|
-
if @
|
468
|
+
def encoding_from_feed_data
|
469
|
+
if @encoding_from_feed_data.nil?
|
533
470
|
raw_data = self.feed_data
|
534
471
|
encoding_from_xml_instruct =
|
535
472
|
raw_data.scan(
|
@@ -539,7 +476,7 @@ module FeedTools
|
|
539
476
|
encoding_from_xml_instruct.downcase!
|
540
477
|
end
|
541
478
|
if encoding_from_xml_instruct.blank?
|
542
|
-
doc = Document.new(raw_data)
|
479
|
+
doc = REXML::Document.new(raw_data)
|
543
480
|
encoding_from_xml_instruct = doc.encoding.downcase
|
544
481
|
if encoding_from_xml_instruct == "utf-8"
|
545
482
|
# REXML has a tendency to report utf-8 overzealously, take with
|
@@ -547,7 +484,7 @@ module FeedTools
|
|
547
484
|
encoding_from_xml_instruct = nil
|
548
485
|
end
|
549
486
|
else
|
550
|
-
@
|
487
|
+
@encoding_from_feed_data = encoding_from_xml_instruct
|
551
488
|
end
|
552
489
|
if encoding_from_xml_instruct.blank?
|
553
490
|
sniff_table = {
|
@@ -556,17 +493,17 @@ module FeedTools
|
|
556
493
|
}
|
557
494
|
sniff = self.feed_data[0..3]
|
558
495
|
if sniff_table[sniff] != nil
|
559
|
-
@
|
496
|
+
@encoding_from_feed_data = sniff_table[sniff].downcase
|
560
497
|
end
|
561
498
|
else
|
562
|
-
@
|
499
|
+
@encoding_from_feed_data = encoding_from_xml_instruct
|
563
500
|
end
|
564
|
-
if @
|
501
|
+
if @encoding_from_feed_data.blank?
|
565
502
|
# Safest assumption
|
566
|
-
@
|
503
|
+
@encoding_from_feed_data = "utf-8"
|
567
504
|
end
|
568
505
|
end
|
569
|
-
return @
|
506
|
+
return @encoding_from_feed_data
|
570
507
|
end
|
571
508
|
|
572
509
|
# Returns the feed's raw data.
|
@@ -581,11 +518,10 @@ module FeedTools
|
|
581
518
|
|
582
519
|
# Sets the feed's data.
|
583
520
|
def feed_data=(new_feed_data)
|
521
|
+
for var in self.instance_variables
|
522
|
+
self.instance_variable_set(var, nil)
|
523
|
+
end
|
584
524
|
@http_headers = {}
|
585
|
-
@cache_object = nil
|
586
|
-
@url = nil
|
587
|
-
@id = nil
|
588
|
-
@encoding = nil
|
589
525
|
@feed_data = new_feed_data
|
590
526
|
unless self.cache_object.nil?
|
591
527
|
self.cache_object.feed_data = new_feed_data
|
@@ -637,25 +573,25 @@ module FeedTools
|
|
637
573
|
end
|
638
574
|
|
639
575
|
# Returns a REXML Document of the feed_data
|
640
|
-
def
|
576
|
+
def xml_document
|
641
577
|
if self.feed_data_type != :xml
|
642
|
-
@
|
578
|
+
@xml_document = nil
|
643
579
|
else
|
644
|
-
if @
|
580
|
+
if @xml_document.nil?
|
645
581
|
begin
|
646
582
|
begin
|
647
|
-
@
|
648
|
-
:ignore_whitespace_nodes => :all)
|
583
|
+
@xml_document = REXML::Document.new(self.feed_data_utf_8)
|
649
584
|
rescue Object
|
650
585
|
# Something failed, attempt to repair the xml with htree.
|
651
|
-
@
|
586
|
+
@xml_document = HTree.parse(self.feed_data_utf_8).to_rexml
|
652
587
|
end
|
653
588
|
rescue Object
|
654
|
-
@
|
589
|
+
@xml_document = nil
|
590
|
+
raise
|
655
591
|
end
|
656
592
|
end
|
657
593
|
end
|
658
|
-
return @
|
594
|
+
return @xml_document
|
659
595
|
end
|
660
596
|
|
661
597
|
# Returns the first node within the channel_node that matches the xpath
|
@@ -664,7 +600,7 @@ module FeedTools
|
|
664
600
|
if self.feed_data_type != :xml
|
665
601
|
raise "The feed data type is not xml."
|
666
602
|
end
|
667
|
-
return try_xpaths(self.channel_node, [xpath],
|
603
|
+
return FeedTools::XmlHelper.try_xpaths(self.channel_node, [xpath],
|
668
604
|
:select_result_value => select_result_value)
|
669
605
|
end
|
670
606
|
|
@@ -673,7 +609,7 @@ module FeedTools
|
|
673
609
|
if self.feed_data_type != :xml
|
674
610
|
raise "The feed data type is not xml."
|
675
611
|
end
|
676
|
-
return try_xpaths_all(self.channel_node, [xpath],
|
612
|
+
return FeedTools::XmlHelper.try_xpaths_all(self.channel_node, [xpath],
|
677
613
|
:select_result_value => select_result_value)
|
678
614
|
end
|
679
615
|
|
@@ -685,10 +621,10 @@ module FeedTools
|
|
685
621
|
# E.g.: http://smogzer.tripod.com/smog.rdf
|
686
622
|
# ===================================================================
|
687
623
|
begin
|
688
|
-
if
|
624
|
+
if self.xml_document.nil?
|
689
625
|
return nil
|
690
626
|
else
|
691
|
-
@root_node =
|
627
|
+
@root_node = self.xml_document.root
|
692
628
|
end
|
693
629
|
rescue
|
694
630
|
return nil
|
@@ -699,14 +635,14 @@ module FeedTools
|
|
699
635
|
|
700
636
|
# Returns the channel node of the feed.
|
701
637
|
def channel_node
|
702
|
-
if @channel_node.nil? && root_node != nil
|
703
|
-
@channel_node = try_xpaths(root_node, [
|
638
|
+
if @channel_node.nil? && self.root_node != nil
|
639
|
+
@channel_node = FeedTools::XmlHelper.try_xpaths(self.root_node, [
|
704
640
|
"channel",
|
705
641
|
"CHANNEL",
|
706
642
|
"feedinfo"
|
707
643
|
])
|
708
644
|
if @channel_node == nil
|
709
|
-
@channel_node = root_node
|
645
|
+
@channel_node = self.root_node
|
710
646
|
end
|
711
647
|
end
|
712
648
|
return @channel_node
|
@@ -714,14 +650,19 @@ module FeedTools
|
|
714
650
|
|
715
651
|
# The cache object that handles the feed persistence.
|
716
652
|
def cache_object
|
717
|
-
if !@
|
653
|
+
if !@href.nil? && @href =~ /^file:\/\//
|
718
654
|
return nil
|
719
655
|
end
|
720
656
|
unless FeedTools.feed_cache.nil?
|
721
657
|
if @cache_object.nil?
|
722
658
|
begin
|
723
|
-
if @
|
724
|
-
|
659
|
+
if @href != nil
|
660
|
+
begin
|
661
|
+
@cache_object = FeedTools.feed_cache.find_by_href(@href)
|
662
|
+
rescue
|
663
|
+
warn("The feed cache seems to be having trouble with the " +
|
664
|
+
"find_by_href method. This may cause unexpected results.")
|
665
|
+
end
|
725
666
|
end
|
726
667
|
if @cache_object.nil?
|
727
668
|
@cache_object = FeedTools.feed_cache.new
|
@@ -736,8 +677,8 @@ module FeedTools
|
|
736
677
|
# Sets the cache object for this feed.
|
737
678
|
#
|
738
679
|
# This can be any object, but it must accept the following messages:
|
739
|
-
#
|
740
|
-
#
|
680
|
+
# href
|
681
|
+
# href=
|
741
682
|
# title
|
742
683
|
# title=
|
743
684
|
# link
|
@@ -773,7 +714,11 @@ module FeedTools
|
|
773
714
|
when "rss"
|
774
715
|
@feed_type = "rss"
|
775
716
|
when "channel"
|
776
|
-
|
717
|
+
if self.root_node.namespace == FEED_TOOLS_NAMESPACES['rss11']
|
718
|
+
@feed_type = "rss"
|
719
|
+
else
|
720
|
+
@feed_type = "cdf"
|
721
|
+
end
|
777
722
|
end
|
778
723
|
end
|
779
724
|
return @feed_type
|
@@ -794,26 +739,33 @@ module FeedTools
|
|
794
739
|
end
|
795
740
|
version = nil
|
796
741
|
begin
|
797
|
-
|
742
|
+
version_string = FeedTools::XmlHelper.try_xpaths(self.root_node, [
|
743
|
+
"@version"
|
744
|
+
], :select_result_value => true)
|
745
|
+
unless version_string.nil?
|
746
|
+
version = version_string.to_f
|
747
|
+
end
|
798
748
|
rescue
|
799
749
|
end
|
800
750
|
version = nil if version == 0.0
|
801
|
-
default_namespace =
|
751
|
+
default_namespace = FeedTools::XmlHelper.try_xpaths(self.root_node, [
|
752
|
+
"@xmlns"
|
753
|
+
], :select_result_value => true)
|
802
754
|
case self.feed_type
|
803
755
|
when "atom"
|
804
|
-
if default_namespace ==
|
756
|
+
if default_namespace == FEED_TOOLS_NAMESPACES['atom10']
|
805
757
|
@feed_version = 1.0
|
806
758
|
elsif version != nil
|
807
759
|
@feed_version = version
|
808
|
-
elsif default_namespace ==
|
760
|
+
elsif default_namespace == FEED_TOOLS_NAMESPACES['atom03']
|
809
761
|
@feed_version = 0.3
|
810
762
|
end
|
811
763
|
when "rss"
|
812
|
-
if default_namespace ==
|
764
|
+
if default_namespace == FEED_TOOLS_NAMESPACES['rss09']
|
813
765
|
@feed_version = 0.9
|
814
|
-
elsif default_namespace ==
|
766
|
+
elsif default_namespace == FEED_TOOLS_NAMESPACES['rss10']
|
815
767
|
@feed_version = 1.0
|
816
|
-
elsif default_namespace ==
|
768
|
+
elsif default_namespace == FEED_TOOLS_NAMESPACES['rss11']
|
817
769
|
@feed_version = 1.1
|
818
770
|
elsif version != nil
|
819
771
|
case version
|
@@ -828,7 +780,7 @@ module FeedTools
|
|
828
780
|
when "cdf"
|
829
781
|
@feed_version = 0.4
|
830
782
|
when "!okay/news"
|
831
|
-
@feed_version =
|
783
|
+
@feed_version = 1.0
|
832
784
|
end
|
833
785
|
end
|
834
786
|
return @feed_version
|
@@ -842,15 +794,15 @@ module FeedTools
|
|
842
794
|
# Returns the feed's unique id
|
843
795
|
def id
|
844
796
|
if @id.nil?
|
845
|
-
@id = select_not_blank([
|
846
|
-
try_xpaths(self.channel_node, [
|
797
|
+
@id = FeedTools::XmlHelper.select_not_blank([
|
798
|
+
FeedTools::XmlHelper.try_xpaths(self.channel_node, [
|
847
799
|
"atom10:id/text()",
|
848
800
|
"atom03:id/text()",
|
849
801
|
"atom:id/text()",
|
850
802
|
"id/text()",
|
851
803
|
"guid/text()"
|
852
804
|
], :select_result_value => true),
|
853
|
-
try_xpaths(self.root_node, [
|
805
|
+
FeedTools::XmlHelper.try_xpaths(self.root_node, [
|
854
806
|
"atom10:id/text()",
|
855
807
|
"atom03:id/text()",
|
856
808
|
"atom:id/text()",
|
@@ -868,106 +820,114 @@ module FeedTools
|
|
868
820
|
end
|
869
821
|
|
870
822
|
# Returns the feed url.
|
871
|
-
def
|
872
|
-
|
873
|
-
|
874
|
-
|
875
|
-
|
876
|
-
|
877
|
-
|
878
|
-
|
879
|
-
if self.feed_data != nil
|
823
|
+
def href
|
824
|
+
if @href_overridden != true || @href.nil?
|
825
|
+
original_href = @href
|
826
|
+
|
827
|
+
override_href = lambda do |current_href|
|
828
|
+
begin
|
829
|
+
if current_href.nil? && self.feed_data != nil
|
830
|
+
# The current url is nil and we have feed data to go on
|
880
831
|
true
|
832
|
+
elsif current_href != nil && !(["http", "https"].include?(
|
833
|
+
URI.parse(current_href.to_s).scheme))
|
834
|
+
if self.feed_data != nil
|
835
|
+
# The current url is set, but isn't a http/https url and
|
836
|
+
# we have feed data to use to replace the current url with
|
837
|
+
true
|
838
|
+
else
|
839
|
+
# The current url is set, but isn't a http/https url but
|
840
|
+
# we don't have feed data to use to replace the current url
|
841
|
+
# with so we'll have to wait until we do
|
842
|
+
false
|
843
|
+
end
|
881
844
|
else
|
845
|
+
# The current url is set to an http/https url and there's
|
846
|
+
# no compelling reason to override it
|
882
847
|
false
|
883
848
|
end
|
884
|
-
|
885
|
-
|
849
|
+
rescue
|
850
|
+
# Something went wrong, so we should err on the side of caution
|
851
|
+
# and attempt to override the url
|
852
|
+
true
|
886
853
|
end
|
887
|
-
|
888
|
-
|
889
|
-
|
890
|
-
|
891
|
-
|
892
|
-
|
893
|
-
|
894
|
-
|
895
|
-
|
896
|
-
|
897
|
-
|
898
|
-
|
899
|
-
|
900
|
-
|
901
|
-
|
902
|
-
|
903
|
-
|
904
|
-
|
905
|
-
|
906
|
-
|
907
|
-
|
908
|
-
|
909
|
-
|
910
|
-
|
911
|
-
|
912
|
-
|
913
|
-
|
914
|
-
|
915
|
-
|
916
|
-
|
917
|
-
|
918
|
-
|
854
|
+
end
|
855
|
+
if override_href.call(@href) && self.feed_data != nil
|
856
|
+
# rdf:about is ordered last because a lot of people put the url to
|
857
|
+
# the feed inside it instead of a link to their blog.
|
858
|
+
# Ordering it last gives them as many chances as humanly possible
|
859
|
+
# for them to redeem themselves. If the link turns out to be the
|
860
|
+
# same as the blog link, it will be reset to the original value.
|
861
|
+
for link_object in self.links
|
862
|
+
if link_object.rel == 'self'
|
863
|
+
if link_object.href != self.link
|
864
|
+
@href = link_object.href
|
865
|
+
@href_overridden = true
|
866
|
+
return @href
|
867
|
+
end
|
868
|
+
end
|
869
|
+
end
|
870
|
+
@href = FeedTools::XmlHelper.try_xpaths(self.channel_node, [
|
871
|
+
"admin:feed/@rdf:resource",
|
872
|
+
"admin:feed/@resource",
|
873
|
+
"feed/@rdf:resource",
|
874
|
+
"feed/@resource",
|
875
|
+
"@rdf:about",
|
876
|
+
"@about"
|
877
|
+
], :select_result_value => true) do |result|
|
878
|
+
override_href.call(FeedTools::UriHelper.normalize_url(result))
|
879
|
+
end
|
880
|
+
begin
|
881
|
+
if !(@href =~ /^file:/) &&
|
882
|
+
!FeedTools::UriHelper.is_uri?(@href)
|
883
|
+
@href = FeedTools::UriHelper.resolve_relative_uri(
|
884
|
+
@href, [self.base_uri])
|
885
|
+
end
|
886
|
+
rescue
|
887
|
+
end
|
888
|
+
if FeedTools.configurations[:url_normalization_enabled]
|
889
|
+
@href = FeedTools::UriHelper.normalize_url(@href)
|
890
|
+
end
|
891
|
+
@href.strip! unless @href.nil?
|
892
|
+
@href = nil if @href.blank?
|
893
|
+
@href_overridden = true
|
894
|
+
if @href == nil
|
895
|
+
@href = original_href
|
896
|
+
@href_overridden = false
|
897
|
+
end
|
898
|
+
if @href == self.link
|
899
|
+
@href = original_href
|
900
|
+
@href_overridden = false
|
901
|
+
end
|
902
|
+
end
|
903
|
+
end
|
904
|
+
return @href
|
919
905
|
end
|
920
906
|
|
921
907
|
# Sets the feed url and prepares the cache_object if necessary.
|
922
|
-
def
|
923
|
-
@
|
924
|
-
self.cache_object.
|
908
|
+
def href=(new_href)
|
909
|
+
@href = FeedTools::UriHelper.normalize_url(new_href)
|
910
|
+
self.cache_object.href = new_href unless self.cache_object.nil?
|
925
911
|
end
|
926
912
|
|
927
913
|
# Returns the feed title
|
928
914
|
def title
|
929
915
|
if @title.nil?
|
930
916
|
repair_entities = false
|
931
|
-
title_node = try_xpaths(self.channel_node, [
|
917
|
+
title_node = FeedTools::XmlHelper.try_xpaths(self.channel_node, [
|
932
918
|
"atom10:title",
|
933
919
|
"atom03:title",
|
934
920
|
"atom:title",
|
935
921
|
"title",
|
936
|
-
"dc:title"
|
922
|
+
"dc:title",
|
923
|
+
"channelTitle"
|
937
924
|
])
|
938
|
-
|
939
|
-
|
940
|
-
|
941
|
-
|
942
|
-
|
943
|
-
title_mode = try_xpaths(title_node, "@mode",
|
944
|
-
:select_result_value => true)
|
945
|
-
title_encoding = try_xpaths(title_node, "@encoding",
|
946
|
-
:select_result_value => true)
|
947
|
-
|
948
|
-
# Note that we're checking for misuse of type, mode and encoding here
|
949
|
-
if title_type == "base64" || title_mode == "base64" ||
|
950
|
-
title_encoding == "base64"
|
951
|
-
@title = Base64.decode64(title_node.inner_xml.strip)
|
952
|
-
elsif title_type == "xhtml" || title_mode == "xhtml" ||
|
953
|
-
title_type == "xml" || title_mode == "xml" ||
|
954
|
-
title_type == "application/xhtml+xml"
|
955
|
-
@title = title_node.inner_xml
|
956
|
-
elsif title_type == "escaped" || title_mode == "escaped"
|
957
|
-
@title = FeedTools.unescape_entities(
|
958
|
-
title_node.inner_xml)
|
959
|
-
else
|
960
|
-
@title = title_node.inner_xml
|
961
|
-
repair_entities = true
|
925
|
+
@title = FeedTools::HtmlHelper.process_text_construct(title_node,
|
926
|
+
self.feed_type, self.feed_version)
|
927
|
+
if self.feed_type == "atom" ||
|
928
|
+
FeedTools.configurations[:always_strip_wrapper_elements]
|
929
|
+
@title = FeedTools::HtmlHelper.strip_wrapper_element(@title)
|
962
930
|
end
|
963
|
-
unless @title.nil?
|
964
|
-
@title = FeedTools.sanitize_html(@title, :strip)
|
965
|
-
@title = FeedTools.unescape_entities(@title) if repair_entities
|
966
|
-
@title = FeedTools.tidy_html(@title) unless repair_entities
|
967
|
-
end
|
968
|
-
@title.gsub!(/>\n</, "><")
|
969
|
-
@title.gsub!(/\n/, " ")
|
970
|
-
@title.strip!
|
971
931
|
@title = nil if @title.blank?
|
972
932
|
self.cache_object.title = @title unless self.cache_object.nil?
|
973
933
|
end
|
@@ -984,7 +944,7 @@ module FeedTools
|
|
984
944
|
def subtitle
|
985
945
|
if @subtitle.nil?
|
986
946
|
repair_entities = false
|
987
|
-
subtitle_node = try_xpaths(self.channel_node, [
|
947
|
+
subtitle_node = FeedTools::XmlHelper.try_xpaths(self.channel_node, [
|
988
948
|
"atom10:subtitle",
|
989
949
|
"subtitle",
|
990
950
|
"atom03:tagline",
|
@@ -992,44 +952,24 @@ module FeedTools
|
|
992
952
|
"description",
|
993
953
|
"summary",
|
994
954
|
"abstract",
|
995
|
-
"ABSTRACT",
|
996
955
|
"content:encoded",
|
997
956
|
"encoded",
|
998
957
|
"content",
|
999
958
|
"xhtml:body",
|
1000
959
|
"body",
|
960
|
+
"xhtml:div",
|
961
|
+
"div",
|
962
|
+
"p:payload",
|
963
|
+
"payload",
|
964
|
+
"channelDescription",
|
1001
965
|
"blurb",
|
1002
966
|
"info"
|
1003
967
|
])
|
1004
|
-
|
1005
|
-
|
1006
|
-
|
1007
|
-
|
1008
|
-
|
1009
|
-
subtitle_mode = try_xpaths(subtitle_node, "@mode",
|
1010
|
-
:select_result_value => true)
|
1011
|
-
subtitle_encoding = try_xpaths(subtitle_node, "@encoding",
|
1012
|
-
:select_result_value => true)
|
1013
|
-
|
1014
|
-
# Note that we're checking for misuse of type, mode and encoding here
|
1015
|
-
if !subtitle_encoding.blank?
|
1016
|
-
@subtitle =
|
1017
|
-
"[Embedded data objects are not currently supported.]"
|
1018
|
-
elsif subtitle_node.cdatas.size > 0
|
1019
|
-
@subtitle = subtitle_node.cdatas.first.value
|
1020
|
-
elsif subtitle_type == "base64" || subtitle_mode == "base64" ||
|
1021
|
-
subtitle_encoding == "base64"
|
1022
|
-
@subtitle = Base64.decode64(subtitle_node.inner_xml.strip)
|
1023
|
-
elsif subtitle_type == "xhtml" || subtitle_mode == "xhtml" ||
|
1024
|
-
subtitle_type == "xml" || subtitle_mode == "xml" ||
|
1025
|
-
subtitle_type == "application/xhtml+xml"
|
1026
|
-
@subtitle = subtitle_node.inner_xml
|
1027
|
-
elsif subtitle_type == "escaped" || subtitle_mode == "escaped"
|
1028
|
-
@subtitle = FeedTools.unescape_entities(
|
1029
|
-
subtitle_node.inner_xml)
|
1030
|
-
else
|
1031
|
-
@subtitle = subtitle_node.inner_xml
|
1032
|
-
repair_entities = true
|
968
|
+
@subtitle = FeedTools::HtmlHelper.process_text_construct(
|
969
|
+
subtitle_node, self.feed_type, self.feed_version)
|
970
|
+
if self.feed_type == "atom" ||
|
971
|
+
FeedTools.configurations[:always_strip_wrapper_elements]
|
972
|
+
@subtitle = FeedTools::HtmlHelper.strip_wrapper_element(@subtitle)
|
1033
973
|
end
|
1034
974
|
if @subtitle.blank?
|
1035
975
|
@subtitle = self.itunes_summary
|
@@ -1037,15 +977,6 @@ module FeedTools
|
|
1037
977
|
if @subtitle.blank?
|
1038
978
|
@subtitle = self.itunes_subtitle
|
1039
979
|
end
|
1040
|
-
|
1041
|
-
unless @subtitle.blank?
|
1042
|
-
@subtitle = FeedTools.sanitize_html(@subtitle, :strip)
|
1043
|
-
@subtitle = FeedTools.unescape_entities(@subtitle) if repair_entities
|
1044
|
-
@subtitle = FeedTools.tidy_html(@subtitle)
|
1045
|
-
end
|
1046
|
-
|
1047
|
-
@subtitle = @subtitle.strip unless @subtitle.nil?
|
1048
|
-
@subtitle = nil if @subtitle.blank?
|
1049
980
|
end
|
1050
981
|
return @subtitle
|
1051
982
|
end
|
@@ -1058,17 +989,20 @@ module FeedTools
|
|
1058
989
|
# Returns the contents of the itunes:summary element
|
1059
990
|
def itunes_summary
|
1060
991
|
if @itunes_summary.nil?
|
1061
|
-
@itunes_summary = select_not_blank([
|
1062
|
-
try_xpaths(self.channel_node, [
|
992
|
+
@itunes_summary = FeedTools::XmlHelper.select_not_blank([
|
993
|
+
FeedTools::XmlHelper.try_xpaths(self.channel_node, [
|
1063
994
|
"itunes:summary/text()"
|
1064
|
-
]),
|
1065
|
-
try_xpaths(self.root_node, [
|
995
|
+
], :select_result_value => true),
|
996
|
+
FeedTools::XmlHelper.try_xpaths(self.root_node, [
|
1066
997
|
"itunes:summary/text()"
|
1067
|
-
])
|
998
|
+
], :select_result_value => true)
|
1068
999
|
])
|
1069
1000
|
unless @itunes_summary.blank?
|
1070
|
-
@itunes_summary =
|
1071
|
-
|
1001
|
+
@itunes_summary =
|
1002
|
+
FeedTools::HtmlHelper.unescape_entities(@itunes_summary)
|
1003
|
+
@itunes_summary =
|
1004
|
+
FeedTools::HtmlHelper.sanitize_html(@itunes_summary)
|
1005
|
+
@itunes_summary.strip!
|
1072
1006
|
else
|
1073
1007
|
@itunes_summary = nil
|
1074
1008
|
end
|
@@ -1084,17 +1018,20 @@ module FeedTools
|
|
1084
1018
|
# Returns the contents of the itunes:subtitle element
|
1085
1019
|
def itunes_subtitle
|
1086
1020
|
if @itunes_subtitle.nil?
|
1087
|
-
@itunes_subtitle = select_not_blank([
|
1088
|
-
try_xpaths(self.channel_node, [
|
1021
|
+
@itunes_subtitle = FeedTools::XmlHelper.select_not_blank([
|
1022
|
+
FeedTools::XmlHelper.try_xpaths(self.channel_node, [
|
1089
1023
|
"itunes:subtitle/text()"
|
1090
|
-
]),
|
1091
|
-
try_xpaths(self.root_node, [
|
1024
|
+
], :select_result_value => true),
|
1025
|
+
FeedTools::XmlHelper.try_xpaths(self.root_node, [
|
1092
1026
|
"itunes:subtitle/text()"
|
1093
|
-
])
|
1027
|
+
], :select_result_value => true)
|
1094
1028
|
])
|
1095
1029
|
unless @itunes_subtitle.blank?
|
1096
|
-
@itunes_subtitle =
|
1097
|
-
|
1030
|
+
@itunes_subtitle =
|
1031
|
+
FeedTools::HtmlHelper.unescape_entities(@itunes_subtitle)
|
1032
|
+
@itunes_subtitle =
|
1033
|
+
FeedTools::HtmlHelper.sanitize_html(@itunes_subtitle)
|
1034
|
+
@itunes_subtitle.strip!
|
1098
1035
|
else
|
1099
1036
|
@itunes_subtitle = nil
|
1100
1037
|
end
|
@@ -1107,84 +1044,89 @@ module FeedTools
|
|
1107
1044
|
@itunes_subtitle = new_itunes_subtitle
|
1108
1045
|
end
|
1109
1046
|
|
1047
|
+
# Returns the contents of the media:text element
|
1048
|
+
def media_text
|
1049
|
+
if @media_text.nil?
|
1050
|
+
@media_text = FeedTools::XmlHelper.select_not_blank([
|
1051
|
+
FeedTools::XmlHelper.try_xpaths(self.channel_node, [
|
1052
|
+
"media:text/text()"
|
1053
|
+
], :select_result_value => true),
|
1054
|
+
FeedTools::XmlHelper.try_xpaths(self.root_node, [
|
1055
|
+
"media:text/text()"
|
1056
|
+
], :select_result_value => true)
|
1057
|
+
])
|
1058
|
+
unless @media_text.blank?
|
1059
|
+
@media_text = FeedTools::HtmlHelper.unescape_entities(@media_text)
|
1060
|
+
@media_text = FeedTools::HtmlHelper.sanitize_html(@media_text)
|
1061
|
+
@media_text.strip!
|
1062
|
+
else
|
1063
|
+
@media_text = nil
|
1064
|
+
end
|
1065
|
+
end
|
1066
|
+
return @media_text
|
1067
|
+
end
|
1068
|
+
|
1069
|
+
# Sets the contents of the media:text element
|
1070
|
+
def media_text=(new_media_text)
|
1071
|
+
@media_text = new_media_text
|
1072
|
+
end
|
1073
|
+
|
1110
1074
|
# Returns the feed link
|
1111
1075
|
def link
|
1112
1076
|
if @link.nil?
|
1113
|
-
|
1114
|
-
|
1115
|
-
|
1116
|
-
|
1117
|
-
|
1118
|
-
|
1119
|
-
|
1120
|
-
|
1121
|
-
|
1122
|
-
|
1123
|
-
"link[@type='application/xhtml+xml']/@href",
|
1124
|
-
"link[@type='text/html']/@href",
|
1125
|
-
"link[@rel='alternate']/@href",
|
1126
|
-
"link/text()",
|
1127
|
-
"@href",
|
1128
|
-
"a/@href"
|
1129
|
-
], :select_result_value => true)
|
1130
|
-
if @link.blank?
|
1131
|
-
if FeedTools.is_uri?(self.guid) &&
|
1132
|
-
!(self.guid =~ /^urn:uuid:/) &&
|
1133
|
-
!(self.guid =~ /^tag:/)
|
1134
|
-
@link = self.guid
|
1077
|
+
max_score = 0
|
1078
|
+
for link_object in self.links.reverse
|
1079
|
+
score = 0
|
1080
|
+
if FeedTools::HtmlHelper.html_type?(link_object.type)
|
1081
|
+
score = score + 2
|
1082
|
+
elsif link_object.type != nil
|
1083
|
+
score = score - 1
|
1084
|
+
end
|
1085
|
+
if FeedTools::HtmlHelper.xml_type?(link_object.type)
|
1086
|
+
score = score + 1
|
1135
1087
|
end
|
1088
|
+
if link_object.rel == "alternate"
|
1089
|
+
score = score + 1
|
1090
|
+
end
|
1091
|
+
if link_object.rel == "self"
|
1092
|
+
score = score - 1
|
1093
|
+
end
|
1094
|
+
if score >= max_score
|
1095
|
+
max_score = score
|
1096
|
+
@link = link_object.href
|
1097
|
+
end
|
1098
|
+
end
|
1099
|
+
if @link.blank?
|
1100
|
+
@link = FeedTools::XmlHelper.try_xpaths(self.channel_node, [
|
1101
|
+
"@href",
|
1102
|
+
"@rdf:about",
|
1103
|
+
"@about"
|
1104
|
+
], :select_result_value => true)
|
1136
1105
|
end
|
1137
|
-
if @link.blank?
|
1138
|
-
|
1139
|
-
|
1140
|
-
|
1141
|
-
|
1106
|
+
if @link.blank?
|
1107
|
+
if FeedTools::UriHelper.is_uri?(self.id) &&
|
1108
|
+
(self.id =~ /^http/)
|
1109
|
+
@link = self.id
|
1110
|
+
end
|
1142
1111
|
end
|
1143
1112
|
if !@link.blank?
|
1144
|
-
@link = FeedTools.unescape_entities(@link)
|
1113
|
+
@link = FeedTools::HtmlHelper.unescape_entities(@link)
|
1145
1114
|
end
|
1146
|
-
if @link.blank?
|
1147
|
-
|
1148
|
-
|
1149
|
-
|
1150
|
-
|
1151
|
-
|
1152
|
-
|
1153
|
-
if link_node != nil
|
1154
|
-
if link_node.attributes['type'].to_s =~ /^image/ ||
|
1155
|
-
link_node.attributes['type'].to_s =~ /^application/ ||
|
1156
|
-
link_node.attributes['type'].to_s =~ /xml/ ||
|
1157
|
-
link_node.attributes['rel'].to_s =~ /self/
|
1158
|
-
for child in self.channel_node
|
1159
|
-
if child.class == REXML::Element
|
1160
|
-
if child.name.downcase == "link"
|
1161
|
-
if child.attributes['type'].to_s =~ /^image/ ||
|
1162
|
-
child.attributes['type'].to_s =~ /^application/ ||
|
1163
|
-
child.attributes['type'].to_s =~ /xml/ ||
|
1164
|
-
child.attributes['rel'].to_s =~ /self/
|
1165
|
-
@link = nil
|
1166
|
-
next
|
1167
|
-
else
|
1168
|
-
@link = child.attributes['href'].to_s
|
1169
|
-
if @link.blank?
|
1170
|
-
@link = child.inner_xml
|
1171
|
-
end
|
1172
|
-
if @link.blank?
|
1173
|
-
next
|
1174
|
-
end
|
1175
|
-
break
|
1176
|
-
end
|
1177
|
-
end
|
1178
|
-
end
|
1179
|
-
end
|
1180
|
-
else
|
1181
|
-
@link = link_node.attributes['href'].to_s
|
1115
|
+
@link = nil if @link.blank?
|
1116
|
+
begin
|
1117
|
+
if !(@link =~ /^file:/) &&
|
1118
|
+
!FeedTools::UriHelper.is_uri?(@link)
|
1119
|
+
channel_base_uri = nil
|
1120
|
+
unless self.channel_node.nil?
|
1121
|
+
channel_base_uri = self.channel_node.base_uri
|
1182
1122
|
end
|
1123
|
+
@link = FeedTools::UriHelper.resolve_relative_uri(
|
1124
|
+
@link, [channel_base_uri, self.base_uri])
|
1183
1125
|
end
|
1126
|
+
rescue
|
1184
1127
|
end
|
1185
|
-
@link = nil if @link.blank?
|
1186
1128
|
if FeedTools.configurations[:url_normalization_enabled]
|
1187
|
-
@link = FeedTools.normalize_url(@link)
|
1129
|
+
@link = FeedTools::UriHelper.normalize_url(@link)
|
1188
1130
|
end
|
1189
1131
|
unless self.cache_object.nil?
|
1190
1132
|
self.cache_object.link = @link
|
@@ -1200,11 +1142,143 @@ module FeedTools
|
|
1200
1142
|
self.cache_object.link = new_link
|
1201
1143
|
end
|
1202
1144
|
end
|
1145
|
+
|
1146
|
+
# Returns the links collection
|
1147
|
+
def links
|
1148
|
+
if @links.blank?
|
1149
|
+
@links = []
|
1150
|
+
link_nodes =
|
1151
|
+
FeedTools::XmlHelper.combine_xpaths_all(self.channel_node, [
|
1152
|
+
"atom10:link",
|
1153
|
+
"atom03:link",
|
1154
|
+
"atom:link",
|
1155
|
+
"link",
|
1156
|
+
"channelLink",
|
1157
|
+
"a",
|
1158
|
+
"url",
|
1159
|
+
"href"
|
1160
|
+
])
|
1161
|
+
for link_node in link_nodes
|
1162
|
+
link_object = FeedTools::Link.new
|
1163
|
+
link_object.href = FeedTools::XmlHelper.try_xpaths(link_node, [
|
1164
|
+
"@atom10:href",
|
1165
|
+
"@atom03:href",
|
1166
|
+
"@atom:href",
|
1167
|
+
"@href",
|
1168
|
+
"text()"
|
1169
|
+
], :select_result_value => true)
|
1170
|
+
if link_object.href.nil? && link_node.base_uri != nil
|
1171
|
+
link_object.href = ""
|
1172
|
+
end
|
1173
|
+
begin
|
1174
|
+
if !(link_object.href =~ /^file:/) &&
|
1175
|
+
!FeedTools::UriHelper.is_uri?(link_object.href)
|
1176
|
+
link_object.href = FeedTools::UriHelper.resolve_relative_uri(
|
1177
|
+
link_object.href,
|
1178
|
+
[link_node.base_uri, self.base_uri])
|
1179
|
+
end
|
1180
|
+
rescue
|
1181
|
+
end
|
1182
|
+
if FeedTools.configurations[:url_normalization_enabled]
|
1183
|
+
link_object.href =
|
1184
|
+
FeedTools::UriHelper.normalize_url(link_object.href)
|
1185
|
+
end
|
1186
|
+
link_object.href.strip! unless link_object.href.nil?
|
1187
|
+
next if link_object.href.blank?
|
1188
|
+
link_object.hreflang = FeedTools::XmlHelper.try_xpaths(link_node, [
|
1189
|
+
"@atom10:hreflang",
|
1190
|
+
"@atom03:hreflang",
|
1191
|
+
"@atom:hreflang",
|
1192
|
+
"@hreflang"
|
1193
|
+
], :select_result_value => true)
|
1194
|
+
unless link_object.hreflang.nil?
|
1195
|
+
link_object.hreflang = link_object.hreflang.downcase
|
1196
|
+
end
|
1197
|
+
link_object.rel = FeedTools::XmlHelper.try_xpaths(link_node, [
|
1198
|
+
"@atom10:rel",
|
1199
|
+
"@atom03:rel",
|
1200
|
+
"@atom:rel",
|
1201
|
+
"@rel"
|
1202
|
+
], :select_result_value => true)
|
1203
|
+
unless link_object.rel.nil?
|
1204
|
+
link_object.rel = link_object.rel.downcase
|
1205
|
+
end
|
1206
|
+
link_object.type = FeedTools::XmlHelper.try_xpaths(link_node, [
|
1207
|
+
"@atom10:type",
|
1208
|
+
"@atom03:type",
|
1209
|
+
"@atom:type",
|
1210
|
+
"@type"
|
1211
|
+
], :select_result_value => true)
|
1212
|
+
unless link_object.type.nil?
|
1213
|
+
link_object.type = link_object.type.downcase
|
1214
|
+
end
|
1215
|
+
link_object.title = FeedTools::XmlHelper.try_xpaths(link_node, [
|
1216
|
+
"@atom10:title",
|
1217
|
+
"@atom03:title",
|
1218
|
+
"@atom:title",
|
1219
|
+
"@title",
|
1220
|
+
"text()"
|
1221
|
+
], :select_result_value => true)
|
1222
|
+
# This catches the ambiguities between atom, rss, and cdf
|
1223
|
+
if link_object.title == link_object.href
|
1224
|
+
link_object.title = nil
|
1225
|
+
end
|
1226
|
+
link_object.length = FeedTools::XmlHelper.try_xpaths(link_node, [
|
1227
|
+
"@atom10:length",
|
1228
|
+
"@atom03:length",
|
1229
|
+
"@atom:length",
|
1230
|
+
"@length"
|
1231
|
+
], :select_result_value => true)
|
1232
|
+
if !link_object.length.nil?
|
1233
|
+
link_object.length = link_object.length.to_i
|
1234
|
+
else
|
1235
|
+
if !link_object.type.nil? && link_object.type[0..4] != "text" &&
|
1236
|
+
link_object.type[-3..-1] != "xml" &&
|
1237
|
+
link_object.href =~ /^http:\/\//
|
1238
|
+
# Retrieve the length with an http HEAD request
|
1239
|
+
else
|
1240
|
+
link_object.length = nil
|
1241
|
+
end
|
1242
|
+
end
|
1243
|
+
@links << link_object
|
1244
|
+
end
|
1245
|
+
end
|
1246
|
+
return @links
|
1247
|
+
end
|
1248
|
+
|
1249
|
+
# Sets the links collection
|
1250
|
+
def links=(new_links)
|
1251
|
+
@links = new_links
|
1252
|
+
end
|
1253
|
+
|
1254
|
+
# Returns the base uri for the feed, used for resolving relative paths
|
1255
|
+
def base_uri
|
1256
|
+
if @base_uri.nil?
|
1257
|
+
@base_uri = FeedTools::XmlHelper.try_xpaths(self.channel_node, [
|
1258
|
+
"@base"
|
1259
|
+
], :select_result_value => true)
|
1260
|
+
if @base_uri.blank?
|
1261
|
+
@base_uri =
|
1262
|
+
FeedTools::GenericHelper.recursion_trap(:feed_base_uri) do
|
1263
|
+
self.href
|
1264
|
+
end
|
1265
|
+
end
|
1266
|
+
if !@base_uri.blank?
|
1267
|
+
@base_uri = FeedTools::UriHelper.normalize_url(@base_uri)
|
1268
|
+
end
|
1269
|
+
end
|
1270
|
+
return @base_uri
|
1271
|
+
end
|
1272
|
+
|
1273
|
+
# Sets the base uri for the feed
|
1274
|
+
def base_uri=(new_base_uri)
|
1275
|
+
@base_uri = new_base_uri
|
1276
|
+
end
|
1203
1277
|
|
1204
1278
|
# Returns the url to the icon file for this feed.
|
1205
1279
|
def icon
|
1206
1280
|
if @icon.nil?
|
1207
|
-
icon_node = try_xpaths(self.channel_node, [
|
1281
|
+
icon_node = FeedTools::XmlHelper.try_xpaths(self.channel_node, [
|
1208
1282
|
"link[@rel='icon']",
|
1209
1283
|
"link[@rel='shortcut icon']",
|
1210
1284
|
"link[@type='image/x-icon']",
|
@@ -1213,15 +1287,26 @@ module FeedTools
|
|
1213
1287
|
"LOGO[@STYLE='ICON']"
|
1214
1288
|
])
|
1215
1289
|
unless icon_node.nil?
|
1216
|
-
@icon = FeedTools.
|
1217
|
-
|
1218
|
-
|
1219
|
-
@
|
1220
|
-
|
1221
|
-
|
1222
|
-
|
1290
|
+
@icon = FeedTools::XmlHelper.try_xpaths(icon_node, [
|
1291
|
+
"@atom10:href",
|
1292
|
+
"@atom03:href",
|
1293
|
+
"@atom:href",
|
1294
|
+
"@href",
|
1295
|
+
"text()"
|
1296
|
+
], :select_result_value => true)
|
1297
|
+
begin
|
1298
|
+
if !(@icon =~ /^file:/) &&
|
1299
|
+
!FeedTools::UriHelper.is_uri?(@icon)
|
1300
|
+
channel_base_uri = nil
|
1301
|
+
unless self.channel_node.nil?
|
1302
|
+
channel_base_uri = self.channel_node.base_uri
|
1303
|
+
end
|
1304
|
+
@icon = FeedTools::UriHelper.resolve_relative_uri(
|
1305
|
+
@icon, [channel_base_uri, self.base_uri])
|
1223
1306
|
end
|
1307
|
+
rescue
|
1224
1308
|
end
|
1309
|
+
@icon = nil unless FeedTools::UriHelper.is_uri?(@icon)
|
1225
1310
|
@icon = nil if @icon.blank?
|
1226
1311
|
end
|
1227
1312
|
end
|
@@ -1236,7 +1321,8 @@ module FeedTools
|
|
1236
1321
|
if @favicon.nil?
|
1237
1322
|
if !self.link.blank?
|
1238
1323
|
begin
|
1239
|
-
link_uri = URI.parse(
|
1324
|
+
link_uri = URI.parse(
|
1325
|
+
FeedTools::UriHelper.normalize_url(self.link))
|
1240
1326
|
if link_uri.scheme == "http"
|
1241
1327
|
@favicon =
|
1242
1328
|
"http://" + link_uri.host + "/favicon.ico"
|
@@ -1244,9 +1330,10 @@ module FeedTools
|
|
1244
1330
|
rescue
|
1245
1331
|
@favicon = nil
|
1246
1332
|
end
|
1247
|
-
if @favicon.nil? && !self.
|
1333
|
+
if @favicon.nil? && !self.href.blank?
|
1248
1334
|
begin
|
1249
|
-
feed_uri = URI.parse(
|
1335
|
+
feed_uri = URI.parse(
|
1336
|
+
FeedTools::UriHelper.normalize_url(self.href))
|
1250
1337
|
if feed_uri.scheme == "http"
|
1251
1338
|
@favicon =
|
1252
1339
|
"http://" + feed_uri.host + "/favicon.ico"
|
@@ -1265,8 +1352,8 @@ module FeedTools
|
|
1265
1352
|
# Returns the feed author
|
1266
1353
|
def author
|
1267
1354
|
if @author.nil?
|
1268
|
-
@author = FeedTools::
|
1269
|
-
author_node = try_xpaths(self.channel_node, [
|
1355
|
+
@author = FeedTools::Author.new
|
1356
|
+
author_node = FeedTools::XmlHelper.try_xpaths(self.channel_node, [
|
1270
1357
|
"atom10:author",
|
1271
1358
|
"atom03:author",
|
1272
1359
|
"atom:author",
|
@@ -1276,16 +1363,18 @@ module FeedTools
|
|
1276
1363
|
"dc:creator"
|
1277
1364
|
])
|
1278
1365
|
unless author_node.nil?
|
1279
|
-
@author.raw = FeedTools.
|
1280
|
-
|
1281
|
-
@author.raw =
|
1366
|
+
@author.raw = FeedTools::XmlHelper.try_xpaths(
|
1367
|
+
author_node, ["text()"], :select_result_value => true)
|
1368
|
+
@author.raw = FeedTools::HtmlHelper.unescape_entities(@author.raw)
|
1282
1369
|
unless @author.raw.nil?
|
1283
1370
|
raw_scan = @author.raw.scan(
|
1284
1371
|
/(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
|
1285
1372
|
if raw_scan.nil? || raw_scan.size == 0
|
1286
1373
|
raw_scan = @author.raw.scan(
|
1287
1374
|
/(\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\s*\((.*)\)/i)
|
1288
|
-
|
1375
|
+
unless raw_scan.size == 0
|
1376
|
+
author_raw_pair = raw_scan.first.reverse
|
1377
|
+
end
|
1289
1378
|
else
|
1290
1379
|
author_raw_pair = raw_scan.first
|
1291
1380
|
end
|
@@ -1302,16 +1391,16 @@ module FeedTools
|
|
1302
1391
|
else
|
1303
1392
|
unless @author.raw.include?("@")
|
1304
1393
|
# We can be reasonably sure we are looking at something
|
1305
|
-
# that the creator didn't intend to contain an email address
|
1306
|
-
# it got through the preceeding regexes and it doesn't
|
1394
|
+
# that the creator didn't intend to contain an email address
|
1395
|
+
# if it got through the preceeding regexes and it doesn't
|
1307
1396
|
# contain the tell-tale '@' symbol.
|
1308
1397
|
@author.name = @author.raw
|
1309
1398
|
end
|
1310
1399
|
end
|
1311
1400
|
end
|
1312
1401
|
if @author.name.blank?
|
1313
|
-
@author.name = FeedTools.unescape_entities(
|
1314
|
-
try_xpaths(author_node, [
|
1402
|
+
@author.name = FeedTools::HtmlHelper.unescape_entities(
|
1403
|
+
FeedTools::XmlHelper.try_xpaths(author_node, [
|
1315
1404
|
"atom10:name/text()",
|
1316
1405
|
"atom03:name/text()",
|
1317
1406
|
"atom:name/text()",
|
@@ -1321,8 +1410,8 @@ module FeedTools
|
|
1321
1410
|
)
|
1322
1411
|
end
|
1323
1412
|
if @author.email.blank?
|
1324
|
-
@author.email = FeedTools.unescape_entities(
|
1325
|
-
try_xpaths(author_node, [
|
1413
|
+
@author.email = FeedTools::HtmlHelper.unescape_entities(
|
1414
|
+
FeedTools::XmlHelper.try_xpaths(author_node, [
|
1326
1415
|
"atom10:email/text()",
|
1327
1416
|
"atom03:email/text()",
|
1328
1417
|
"atom:email/text()",
|
@@ -1332,8 +1421,8 @@ module FeedTools
|
|
1332
1421
|
)
|
1333
1422
|
end
|
1334
1423
|
if @author.url.blank?
|
1335
|
-
@author.url = FeedTools.unescape_entities(
|
1336
|
-
try_xpaths(author_node, [
|
1424
|
+
@author.url = FeedTools::HtmlHelper.unescape_entities(
|
1425
|
+
FeedTools::XmlHelper.try_xpaths(author_node, [
|
1337
1426
|
"atom10:url/text()",
|
1338
1427
|
"atom03:url/text()",
|
1339
1428
|
"atom:url/text()",
|
@@ -1342,7 +1431,7 @@ module FeedTools
|
|
1342
1431
|
"atom03:uri/text()",
|
1343
1432
|
"atom:uri/text()",
|
1344
1433
|
"uri/text()",
|
1345
|
-
"@
|
1434
|
+
"@href",
|
1346
1435
|
"@uri",
|
1347
1436
|
"@href"
|
1348
1437
|
], :select_result_value => true)
|
@@ -1352,6 +1441,16 @@ module FeedTools
|
|
1352
1441
|
@author.raw = nil if @author.raw.blank?
|
1353
1442
|
@author.email = nil if @author.email.blank?
|
1354
1443
|
@author.url = nil if @author.url.blank?
|
1444
|
+
if @author.url != nil
|
1445
|
+
begin
|
1446
|
+
if !(@author.url =~ /^file:/) &&
|
1447
|
+
!FeedTools::UriHelper.is_uri?(@author.url)
|
1448
|
+
@author.url = FeedTools::UriHelper.resolve_relative_uri(
|
1449
|
+
@author.url, [author_node.base_uri, self.base_uri])
|
1450
|
+
end
|
1451
|
+
rescue
|
1452
|
+
end
|
1453
|
+
end
|
1355
1454
|
end
|
1356
1455
|
# Fallback on the itunes module if we didn't find an author name
|
1357
1456
|
begin
|
@@ -1374,7 +1473,7 @@ module FeedTools
|
|
1374
1473
|
# We're not looking at an author object, this is probably a string,
|
1375
1474
|
# default to setting the author's name.
|
1376
1475
|
if @author.nil?
|
1377
|
-
@author = FeedTools::
|
1476
|
+
@author = FeedTools::Author.new
|
1378
1477
|
end
|
1379
1478
|
@author.name = new_author
|
1380
1479
|
end
|
@@ -1383,14 +1482,13 @@ module FeedTools
|
|
1383
1482
|
# Returns the feed publisher
|
1384
1483
|
def publisher
|
1385
1484
|
if @publisher.nil?
|
1386
|
-
@publisher = FeedTools::
|
1387
|
-
|
1388
|
-
|
1389
|
-
|
1390
|
-
|
1485
|
+
@publisher = FeedTools::Author.new
|
1486
|
+
@publisher.raw = FeedTools::HtmlHelper.unescape_entities(
|
1487
|
+
FeedTools::XmlHelper.try_xpaths(self.channel_node, [
|
1488
|
+
"webMaster/text()",
|
1489
|
+
"dc:publisher/text()"
|
1490
|
+
], :select_result_value => true))
|
1391
1491
|
|
1392
|
-
# Set the author name
|
1393
|
-
@publisher.raw = FeedTools.unescape_entities(publisher_node.to_s)
|
1394
1492
|
unless @publisher.raw.blank?
|
1395
1493
|
raw_scan = @publisher.raw.scan(
|
1396
1494
|
/(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
|
@@ -1428,6 +1526,20 @@ module FeedTools
|
|
1428
1526
|
@publisher.raw = nil if @publisher.raw.blank?
|
1429
1527
|
@publisher.email = nil if @publisher.email.blank?
|
1430
1528
|
@publisher.url = nil if @publisher.url.blank?
|
1529
|
+
if @publisher.url != nil
|
1530
|
+
begin
|
1531
|
+
if !(@publisher.url =~ /^file:/) &&
|
1532
|
+
!FeedTools::UriHelper.is_uri?(@publisher.url)
|
1533
|
+
channel_base_uri = nil
|
1534
|
+
unless self.channel_node.nil?
|
1535
|
+
channel_base_uri = self.channel_node.base_uri
|
1536
|
+
end
|
1537
|
+
@publisher.url = FeedTools::UriHelper.resolve_relative_uri(
|
1538
|
+
@publisher.url, [channel_base_uri, self.base_uri])
|
1539
|
+
end
|
1540
|
+
rescue
|
1541
|
+
end
|
1542
|
+
end
|
1431
1543
|
end
|
1432
1544
|
return @publisher
|
1433
1545
|
end
|
@@ -1443,7 +1555,7 @@ module FeedTools
|
|
1443
1555
|
# We're not looking at an Author object, this is probably a string,
|
1444
1556
|
# default to setting the publisher's name.
|
1445
1557
|
if @publisher.nil?
|
1446
|
-
@publisher = FeedTools::
|
1558
|
+
@publisher = FeedTools::Author.new
|
1447
1559
|
end
|
1448
1560
|
@publisher.name = new_publisher
|
1449
1561
|
end
|
@@ -1457,8 +1569,8 @@ module FeedTools
|
|
1457
1569
|
# attribute.
|
1458
1570
|
def itunes_author
|
1459
1571
|
if @itunes_author.nil?
|
1460
|
-
@itunes_author = FeedTools.unescape_entities(
|
1461
|
-
try_xpaths(self.channel_node, [
|
1572
|
+
@itunes_author = FeedTools::HtmlHelper.unescape_entities(
|
1573
|
+
FeedTools::XmlHelper.try_xpaths(self.channel_node, [
|
1462
1574
|
"itunes:author/text()"
|
1463
1575
|
], :select_result_value => true)
|
1464
1576
|
)
|
@@ -1470,7 +1582,7 @@ module FeedTools
|
|
1470
1582
|
# Returns the feed time
|
1471
1583
|
def time
|
1472
1584
|
if @time.nil?
|
1473
|
-
time_string = try_xpaths(self.channel_node, [
|
1585
|
+
time_string = FeedTools::XmlHelper.try_xpaths(self.channel_node, [
|
1474
1586
|
"atom10:updated/text()",
|
1475
1587
|
"atom03:updated/text()",
|
1476
1588
|
"atom:updated/text()",
|
@@ -1480,6 +1592,7 @@ module FeedTools
|
|
1480
1592
|
"atom:modified/text()",
|
1481
1593
|
"modified/text()",
|
1482
1594
|
"time/text()",
|
1595
|
+
"lastBuildDate/text()",
|
1483
1596
|
"atom10:issued/text()",
|
1484
1597
|
"atom03:issued/text()",
|
1485
1598
|
"atom:issued/text()",
|
@@ -1488,8 +1601,8 @@ module FeedTools
|
|
1488
1601
|
"atom03:published/text()",
|
1489
1602
|
"atom:published/text()",
|
1490
1603
|
"published/text()",
|
1491
|
-
"pubDate/text()",
|
1492
1604
|
"dc:date/text()",
|
1605
|
+
"pubDate/text()",
|
1493
1606
|
"date/text()"
|
1494
1607
|
], :select_result_value => true)
|
1495
1608
|
begin
|
@@ -1509,15 +1622,15 @@ module FeedTools
|
|
1509
1622
|
return @time
|
1510
1623
|
end
|
1511
1624
|
|
1512
|
-
# Sets the feed
|
1625
|
+
# Sets the feed time
|
1513
1626
|
def time=(new_time)
|
1514
1627
|
@time = new_time
|
1515
1628
|
end
|
1516
1629
|
|
1517
|
-
# Returns the feed
|
1630
|
+
# Returns the feed updated time
|
1518
1631
|
def updated
|
1519
1632
|
if @updated.nil?
|
1520
|
-
updated_string = try_xpaths(self.channel_node, [
|
1633
|
+
updated_string = FeedTools::XmlHelper.try_xpaths(self.channel_node, [
|
1521
1634
|
"atom10:updated/text()",
|
1522
1635
|
"atom03:updated/text()",
|
1523
1636
|
"atom:updated/text()",
|
@@ -1525,7 +1638,8 @@ module FeedTools
|
|
1525
1638
|
"atom10:modified/text()",
|
1526
1639
|
"atom03:modified/text()",
|
1527
1640
|
"atom:modified/text()",
|
1528
|
-
"modified/text()"
|
1641
|
+
"modified/text()",
|
1642
|
+
"lastBuildDate/text()"
|
1529
1643
|
], :select_result_value => true)
|
1530
1644
|
unless updated_string.blank?
|
1531
1645
|
@updated = Time.parse(updated_string).gmtime rescue nil
|
@@ -1536,26 +1650,27 @@ module FeedTools
|
|
1536
1650
|
return @updated
|
1537
1651
|
end
|
1538
1652
|
|
1539
|
-
# Sets the feed
|
1653
|
+
# Sets the feed updated time
|
1540
1654
|
def updated=(new_updated)
|
1541
1655
|
@updated = new_updated
|
1542
1656
|
end
|
1543
1657
|
|
1544
|
-
# Returns the feed
|
1658
|
+
# Returns the feed published time
|
1545
1659
|
def published
|
1546
1660
|
if @published.nil?
|
1547
|
-
published_string =
|
1548
|
-
|
1549
|
-
|
1550
|
-
|
1551
|
-
|
1552
|
-
|
1553
|
-
|
1554
|
-
|
1555
|
-
|
1556
|
-
|
1557
|
-
|
1558
|
-
|
1661
|
+
published_string =
|
1662
|
+
FeedTools::XmlHelper.try_xpaths(self.channel_node, [
|
1663
|
+
"atom10:published/text()",
|
1664
|
+
"atom03:published/text()",
|
1665
|
+
"atom:published/text()",
|
1666
|
+
"published/text()",
|
1667
|
+
"dc:date/text()",
|
1668
|
+
"pubDate/text()",
|
1669
|
+
"atom10:issued/text()",
|
1670
|
+
"atom03:issued/text()",
|
1671
|
+
"atom:issued/text()",
|
1672
|
+
"issued/text()"
|
1673
|
+
], :select_result_value => true)
|
1559
1674
|
unless published_string.blank?
|
1560
1675
|
@published = Time.parse(published_string).gmtime rescue nil
|
1561
1676
|
else
|
@@ -1565,7 +1680,7 @@ module FeedTools
|
|
1565
1680
|
return @published
|
1566
1681
|
end
|
1567
1682
|
|
1568
|
-
# Sets the feed
|
1683
|
+
# Sets the feed published time
|
1569
1684
|
def published=(new_published)
|
1570
1685
|
@published = new_published
|
1571
1686
|
end
|
@@ -1574,22 +1689,24 @@ module FeedTools
|
|
1574
1689
|
def categories
|
1575
1690
|
if @categories.nil?
|
1576
1691
|
@categories = []
|
1577
|
-
category_nodes =
|
1578
|
-
|
1579
|
-
|
1580
|
-
|
1692
|
+
category_nodes =
|
1693
|
+
FeedTools::XmlHelper.try_xpaths_all(self.channel_node, [
|
1694
|
+
"category",
|
1695
|
+
"dc:subject"
|
1696
|
+
])
|
1581
1697
|
unless category_nodes.nil?
|
1582
1698
|
for category_node in category_nodes
|
1583
|
-
category = FeedTools::
|
1584
|
-
category.term = try_xpaths(category_node, [
|
1699
|
+
category = FeedTools::Category.new
|
1700
|
+
category.term = FeedTools::XmlHelper.try_xpaths(category_node, [
|
1585
1701
|
"@term",
|
1586
1702
|
"text()"
|
1587
1703
|
], :select_result_value => true)
|
1588
1704
|
category.term.strip! unless category.term.blank?
|
1589
|
-
category.label = try_xpaths(
|
1705
|
+
category.label = FeedTools::XmlHelper.try_xpaths(
|
1706
|
+
category_node, ["@label"],
|
1590
1707
|
:select_result_value => true)
|
1591
1708
|
category.label.strip! unless category.label.blank?
|
1592
|
-
category.scheme = try_xpaths(category_node, [
|
1709
|
+
category.scheme = FeedTools::XmlHelper.try_xpaths(category_node, [
|
1593
1710
|
"@scheme",
|
1594
1711
|
"@domain"
|
1595
1712
|
], :select_result_value => true)
|
@@ -1605,63 +1722,69 @@ module FeedTools
|
|
1605
1722
|
def images
|
1606
1723
|
if @images.nil?
|
1607
1724
|
@images = []
|
1608
|
-
image_nodes =
|
1609
|
-
|
1610
|
-
|
1611
|
-
|
1612
|
-
|
1613
|
-
|
1614
|
-
|
1615
|
-
|
1616
|
-
])
|
1725
|
+
image_nodes = FeedTools::XmlHelper.combine_xpaths_all(
|
1726
|
+
self.channel_node, [
|
1727
|
+
"image",
|
1728
|
+
"logo",
|
1729
|
+
"apple-wallpapers:image",
|
1730
|
+
"imageUrl"
|
1731
|
+
]
|
1732
|
+
)
|
1617
1733
|
unless image_nodes.blank?
|
1618
1734
|
for image_node in image_nodes
|
1619
|
-
image = FeedTools::
|
1620
|
-
image.
|
1735
|
+
image = FeedTools::Image.new
|
1736
|
+
image.href = FeedTools::XmlHelper.try_xpaths(image_node, [
|
1621
1737
|
"url/text()",
|
1622
1738
|
"@rdf:resource",
|
1739
|
+
"@href",
|
1623
1740
|
"text()"
|
1624
1741
|
], :select_result_value => true)
|
1625
|
-
if image.
|
1626
|
-
|
1627
|
-
image.url = try_xpaths(image_node, [
|
1628
|
-
"@atom10:href",
|
1629
|
-
"@atom03:href",
|
1630
|
-
"@atom:href",
|
1631
|
-
"@href"
|
1632
|
-
], :select_result_value => true)
|
1633
|
-
if image.url == self.link && image.url != nil
|
1634
|
-
image.url = nil
|
1635
|
-
end
|
1742
|
+
if image.href.nil? && image_node.base_uri != nil
|
1743
|
+
image.href = ""
|
1636
1744
|
end
|
1637
|
-
|
1638
|
-
image.
|
1639
|
-
|
1640
|
-
|
1745
|
+
begin
|
1746
|
+
if !(image.href =~ /^file:/) &&
|
1747
|
+
!FeedTools::UriHelper.is_uri?(image.href)
|
1748
|
+
image.href = FeedTools::UriHelper.resolve_relative_uri(
|
1749
|
+
image.href, [image_node.base_uri, self.base_uri])
|
1750
|
+
end
|
1751
|
+
rescue
|
1641
1752
|
end
|
1642
|
-
|
1643
|
-
|
1753
|
+
if FeedTools.configurations[:url_normalization_enabled]
|
1754
|
+
image.href = FeedTools::UriHelper.normalize_url(image.href)
|
1755
|
+
end
|
1756
|
+
image.href.strip! unless image.href.nil?
|
1757
|
+
next if image.href.blank?
|
1758
|
+
image.title = FeedTools::XmlHelper.try_xpaths(image_node,
|
1644
1759
|
["title/text()"], :select_result_value => true)
|
1645
1760
|
image.title.strip! unless image.title.nil?
|
1646
|
-
image.description = try_xpaths(image_node,
|
1761
|
+
image.description = FeedTools::XmlHelper.try_xpaths(image_node,
|
1647
1762
|
["description/text()"], :select_result_value => true)
|
1648
1763
|
image.description.strip! unless image.description.nil?
|
1649
|
-
image.link = try_xpaths(image_node,
|
1764
|
+
image.link = FeedTools::XmlHelper.try_xpaths(image_node,
|
1650
1765
|
["link/text()"], :select_result_value => true)
|
1651
1766
|
image.link.strip! unless image.link.nil?
|
1652
|
-
image.height = try_xpaths(image_node,
|
1767
|
+
image.height = FeedTools::XmlHelper.try_xpaths(image_node,
|
1653
1768
|
["height/text()"], :select_result_value => true).to_i
|
1654
1769
|
image.height = nil if image.height <= 0
|
1655
|
-
image.width = try_xpaths(image_node,
|
1770
|
+
image.width = FeedTools::XmlHelper.try_xpaths(image_node,
|
1656
1771
|
["width/text()"], :select_result_value => true).to_i
|
1657
1772
|
image.width = nil if image.width <= 0
|
1658
|
-
image.style = try_xpaths(image_node, [
|
1773
|
+
image.style = FeedTools::XmlHelper.try_xpaths(image_node, [
|
1659
1774
|
"style/text()",
|
1660
1775
|
"@style"
|
1661
1776
|
], :select_result_value => true)
|
1662
1777
|
image.style.strip! unless image.style.nil?
|
1663
1778
|
image.style.downcase! unless image.style.nil?
|
1664
|
-
@images << image unless image.
|
1779
|
+
@images << image unless image.href.nil?
|
1780
|
+
end
|
1781
|
+
end
|
1782
|
+
for link_object in self.links
|
1783
|
+
if link_object.type != nil && link_object.type =~ /^image/
|
1784
|
+
image = FeedTools::Image.new
|
1785
|
+
image.href = link_object.href
|
1786
|
+
image.title = link_object.title
|
1787
|
+
@images << image unless image.href.nil?
|
1665
1788
|
end
|
1666
1789
|
end
|
1667
1790
|
end
|
@@ -1671,20 +1794,25 @@ module FeedTools
|
|
1671
1794
|
# Returns the feed's text input field
|
1672
1795
|
def text_input
|
1673
1796
|
if @text_input.nil?
|
1674
|
-
@text_input = FeedTools::
|
1675
|
-
text_input_node =
|
1797
|
+
@text_input = FeedTools::TextInput.new
|
1798
|
+
text_input_node =
|
1799
|
+
FeedTools::XmlHelper.try_xpaths(self.channel_node, ["textInput"])
|
1676
1800
|
unless text_input_node.nil?
|
1677
1801
|
@text_input.title =
|
1678
|
-
try_xpaths(text_input_node,
|
1802
|
+
FeedTools::XmlHelper.try_xpaths(text_input_node,
|
1803
|
+
["title/text()"],
|
1679
1804
|
:select_result_value => true)
|
1680
1805
|
@text_input.description =
|
1681
|
-
try_xpaths(text_input_node,
|
1806
|
+
FeedTools::XmlHelper.try_xpaths(text_input_node,
|
1807
|
+
["description/text()"],
|
1682
1808
|
:select_result_value => true)
|
1683
1809
|
@text_input.link =
|
1684
|
-
try_xpaths(text_input_node,
|
1810
|
+
FeedTools::XmlHelper.try_xpaths(text_input_node,
|
1811
|
+
["link/text()"],
|
1685
1812
|
:select_result_value => true)
|
1686
1813
|
@text_input.name =
|
1687
|
-
try_xpaths(text_input_node,
|
1814
|
+
FeedTools::XmlHelper.try_xpaths(text_input_node,
|
1815
|
+
["name/text()"],
|
1688
1816
|
:select_result_value => true)
|
1689
1817
|
end
|
1690
1818
|
end
|
@@ -1692,10 +1820,10 @@ module FeedTools
|
|
1692
1820
|
end
|
1693
1821
|
|
1694
1822
|
# Returns the feed's copyright information
|
1695
|
-
def
|
1696
|
-
if @
|
1823
|
+
def rights
|
1824
|
+
if @rights.nil?
|
1697
1825
|
repair_entities = false
|
1698
|
-
|
1826
|
+
rights_node = FeedTools::XmlHelper.try_xpaths(self.channel_node, [
|
1699
1827
|
"atom10:copyright",
|
1700
1828
|
"atom03:copyright",
|
1701
1829
|
"atom:copyright",
|
@@ -1704,63 +1832,40 @@ module FeedTools
|
|
1704
1832
|
"dc:rights",
|
1705
1833
|
"rights"
|
1706
1834
|
])
|
1707
|
-
|
1708
|
-
|
1835
|
+
@rights = FeedTools::HtmlHelper.process_text_construct(rights_node,
|
1836
|
+
self.feed_type, self.feed_version)
|
1837
|
+
if self.feed_type == "atom" ||
|
1838
|
+
FeedTools.configurations[:always_strip_wrapper_elements]
|
1839
|
+
@rights = FeedTools::HtmlHelper.strip_wrapper_element(@rights)
|
1709
1840
|
end
|
1710
|
-
copyright_type = try_xpaths(copyright_node, "@type",
|
1711
|
-
:select_result_value => true)
|
1712
|
-
copyright_mode = try_xpaths(copyright_node, "@mode",
|
1713
|
-
:select_result_value => true)
|
1714
|
-
copyright_encoding = try_xpaths(copyright_node, "@encoding",
|
1715
|
-
:select_result_value => true)
|
1716
|
-
|
1717
|
-
# Note that we're checking for misuse of type, mode and encoding here
|
1718
|
-
if !copyright_encoding.blank?
|
1719
|
-
@copyright =
|
1720
|
-
"[Embedded data objects are not currently supported.]"
|
1721
|
-
elsif copyright_node.cdatas.size > 0
|
1722
|
-
@copyright = copyright_node.cdatas.first.value
|
1723
|
-
elsif copyright_type == "base64" || copyright_mode == "base64" ||
|
1724
|
-
copyright_encoding == "base64"
|
1725
|
-
@copyright = Base64.decode64(copyright_node.inner_xml.strip)
|
1726
|
-
elsif copyright_type == "xhtml" || copyright_mode == "xhtml" ||
|
1727
|
-
copyright_type == "xml" || copyright_mode == "xml" ||
|
1728
|
-
copyright_type == "application/xhtml+xml"
|
1729
|
-
@copyright = copyright_node.inner_xml
|
1730
|
-
elsif copyright_type == "escaped" || copyright_mode == "escaped"
|
1731
|
-
@copyright = FeedTools.unescape_entities(
|
1732
|
-
copyright_node.inner_xml)
|
1733
|
-
else
|
1734
|
-
@copyright = copyright_node.inner_xml
|
1735
|
-
repair_entities = true
|
1736
|
-
end
|
1737
|
-
|
1738
|
-
unless @copyright.nil?
|
1739
|
-
@copyright = FeedTools.sanitize_html(@copyright, :strip)
|
1740
|
-
@copyright = FeedTools.unescape_entities(@copyright) if repair_entities
|
1741
|
-
@copyright = FeedTools.tidy_html(@copyright)
|
1742
|
-
end
|
1743
|
-
|
1744
|
-
@copyright = @copyright.strip unless @copyright.nil?
|
1745
|
-
@copyright = nil if @copyright.blank?
|
1746
1841
|
end
|
1747
|
-
return @
|
1842
|
+
return @rights
|
1748
1843
|
end
|
1749
1844
|
|
1750
|
-
# Sets the feed's
|
1751
|
-
def
|
1752
|
-
@
|
1845
|
+
# Sets the feed's rights information
|
1846
|
+
def rights=(new_rights)
|
1847
|
+
@rights = new_rights
|
1753
1848
|
end
|
1754
1849
|
|
1850
|
+
def license #:nodoc:
|
1851
|
+
raise "Not implemented yet."
|
1852
|
+
end
|
1853
|
+
|
1854
|
+
def license=(new_license) #:nodoc:
|
1855
|
+
raise "Not implemented yet."
|
1856
|
+
end
|
1857
|
+
|
1755
1858
|
# Returns the number of seconds before the feed should expire
|
1756
1859
|
def time_to_live
|
1757
1860
|
if @time_to_live.nil?
|
1758
1861
|
unless channel_node.nil?
|
1759
1862
|
# get the feed time to live from the xml document
|
1760
|
-
update_frequency = try_xpaths(
|
1863
|
+
update_frequency = FeedTools::XmlHelper.try_xpaths(
|
1864
|
+
self.channel_node,
|
1761
1865
|
["syn:updateFrequency/text()"], :select_result_value => true)
|
1762
1866
|
if !update_frequency.blank?
|
1763
|
-
update_period = try_xpaths(
|
1867
|
+
update_period = FeedTools::XmlHelper.try_xpaths(
|
1868
|
+
self.channel_node,
|
1764
1869
|
["syn:updatePeriod/text()"], :select_result_value => true)
|
1765
1870
|
if update_period == "daily"
|
1766
1871
|
@time_to_live = update_frequency.to_i.day
|
@@ -1777,10 +1882,12 @@ module FeedTools
|
|
1777
1882
|
end
|
1778
1883
|
if @time_to_live.nil?
|
1779
1884
|
# usually expressed in minutes
|
1780
|
-
update_frequency = try_xpaths(
|
1885
|
+
update_frequency = FeedTools::XmlHelper.try_xpaths(
|
1886
|
+
self.channel_node, ["ttl/text()"],
|
1781
1887
|
:select_result_value => true)
|
1782
1888
|
if !update_frequency.blank?
|
1783
|
-
update_span = try_xpaths(
|
1889
|
+
update_span = FeedTools::XmlHelper.try_xpaths(
|
1890
|
+
self.channel_node, ["ttl/@span"],
|
1784
1891
|
:select_result_value => true)
|
1785
1892
|
if update_span == "seconds"
|
1786
1893
|
@time_to_live = update_frequency.to_i
|
@@ -1804,24 +1911,28 @@ module FeedTools
|
|
1804
1911
|
if @time_to_live.nil?
|
1805
1912
|
@time_to_live = 0
|
1806
1913
|
update_frequency_days =
|
1807
|
-
|
1914
|
+
FeedTools::XmlHelper.try_xpaths(self.channel_node,
|
1915
|
+
["schedule/intervaltime/@day"], :select_result_value => true)
|
1808
1916
|
update_frequency_hours =
|
1809
|
-
|
1917
|
+
FeedTools::XmlHelper.try_xpaths(self.channel_node,
|
1918
|
+
["schedule/intervaltime/@hour"], :select_result_value => true)
|
1810
1919
|
update_frequency_minutes =
|
1811
|
-
|
1920
|
+
FeedTools::XmlHelper.try_xpaths(self.channel_node,
|
1921
|
+
["schedule/intervaltime/@min"], :select_result_value => true)
|
1812
1922
|
update_frequency_seconds =
|
1813
|
-
|
1814
|
-
|
1923
|
+
FeedTools::XmlHelper.try_xpaths(self.channel_node,
|
1924
|
+
["schedule/intervaltime/@sec"], :select_result_value => true)
|
1925
|
+
if !update_frequency_days.blank?
|
1815
1926
|
@time_to_live = @time_to_live + update_frequency_days.to_i.day
|
1816
1927
|
end
|
1817
|
-
if update_frequency_hours
|
1928
|
+
if !update_frequency_hours.blank?
|
1818
1929
|
@time_to_live = @time_to_live + update_frequency_hours.to_i.hour
|
1819
1930
|
end
|
1820
|
-
if update_frequency_minutes
|
1931
|
+
if !update_frequency_minutes.blank?
|
1821
1932
|
@time_to_live = @time_to_live +
|
1822
1933
|
update_frequency_minutes.to_i.minute
|
1823
1934
|
end
|
1824
|
-
if update_frequency_seconds
|
1935
|
+
if !update_frequency_seconds.blank?
|
1825
1936
|
@time_to_live = @time_to_live + update_frequency_seconds.to_i
|
1826
1937
|
end
|
1827
1938
|
if @time_to_live == 0
|
@@ -1851,18 +1962,23 @@ module FeedTools
|
|
1851
1962
|
# Returns the feed's cloud
|
1852
1963
|
def cloud
|
1853
1964
|
if @cloud.nil?
|
1854
|
-
@cloud = FeedTools::
|
1855
|
-
@cloud.domain = try_xpaths(
|
1965
|
+
@cloud = FeedTools::Cloud.new
|
1966
|
+
@cloud.domain = FeedTools::XmlHelper.try_xpaths(
|
1967
|
+
self.channel_node, ["cloud/@domain"],
|
1856
1968
|
:select_result_value => true)
|
1857
|
-
@cloud.port = try_xpaths(
|
1969
|
+
@cloud.port = FeedTools::XmlHelper.try_xpaths(
|
1970
|
+
self.channel_node, ["cloud/@port"],
|
1858
1971
|
:select_result_value => true)
|
1859
|
-
@cloud.path = try_xpaths(
|
1972
|
+
@cloud.path = FeedTools::XmlHelper.try_xpaths(
|
1973
|
+
self.channel_node, ["cloud/@path"],
|
1860
1974
|
:select_result_value => true)
|
1861
1975
|
@cloud.register_procedure =
|
1862
|
-
try_xpaths(
|
1976
|
+
FeedTools::XmlHelper.try_xpaths(
|
1977
|
+
self.channel_node, ["cloud/@registerProcedure"],
|
1863
1978
|
:select_result_value => true)
|
1864
1979
|
@cloud.protocol =
|
1865
|
-
try_xpaths(
|
1980
|
+
FeedTools::XmlHelper.try_xpaths(
|
1981
|
+
self.channel_node, ["cloud/@protocol"],
|
1866
1982
|
:select_result_value => true)
|
1867
1983
|
@cloud.protocol.downcase unless @cloud.protocol.nil?
|
1868
1984
|
@cloud.port = @cloud.port.to_s.to_i
|
@@ -1879,14 +1995,23 @@ module FeedTools
|
|
1879
1995
|
# Returns the feed generator
|
1880
1996
|
def generator
|
1881
1997
|
if @generator.nil?
|
1882
|
-
@generator = try_xpaths(
|
1998
|
+
@generator = FeedTools::XmlHelper.try_xpaths(
|
1999
|
+
self.channel_node, ["generator/text()"],
|
1883
2000
|
:select_result_value => true)
|
1884
|
-
|
2001
|
+
unless @generator.nil?
|
2002
|
+
@generator =
|
2003
|
+
FeedTools::HtmlHelper.convert_html_to_plain_text(@generator)
|
2004
|
+
end
|
1885
2005
|
end
|
1886
2006
|
return @generator
|
1887
2007
|
end
|
1888
2008
|
|
1889
2009
|
# Sets the feed generator
|
2010
|
+
#
|
2011
|
+
# Note: Setting this variable will NOT cause this to appear in any
|
2012
|
+
# generated output. The generator string is created from the
|
2013
|
+
# <tt>:generator_name</tt> and <tt>:generator_href</tt> configuration
|
2014
|
+
# variables.
|
1890
2015
|
def generator=(new_generator)
|
1891
2016
|
@generator = new_generator
|
1892
2017
|
end
|
@@ -1894,9 +2019,24 @@ module FeedTools
|
|
1894
2019
|
# Returns the feed docs
|
1895
2020
|
def docs
|
1896
2021
|
if @docs.nil?
|
1897
|
-
@docs = try_xpaths(
|
2022
|
+
@docs = FeedTools::XmlHelper.try_xpaths(
|
2023
|
+
self.channel_node, ["docs/text()"],
|
1898
2024
|
:select_result_value => true)
|
1899
|
-
|
2025
|
+
begin
|
2026
|
+
if !(@docs =~ /^file:/) &&
|
2027
|
+
!FeedTools::UriHelper.is_uri?(@docs)
|
2028
|
+
channel_base_uri = nil
|
2029
|
+
unless self.channel_node.nil?
|
2030
|
+
channel_base_uri = self.channel_node.base_uri
|
2031
|
+
end
|
2032
|
+
@docs = FeedTools::UriHelper.resolve_relative_uri(
|
2033
|
+
@docs, [channel_base_uri, self.base_uri])
|
2034
|
+
end
|
2035
|
+
rescue
|
2036
|
+
end
|
2037
|
+
if FeedTools.configurations[:url_normalization_enabled]
|
2038
|
+
@docs = FeedTools::UriHelper.normalize_url(@docs)
|
2039
|
+
end
|
1900
2040
|
end
|
1901
2041
|
return @docs
|
1902
2042
|
end
|
@@ -1909,15 +2049,15 @@ module FeedTools
|
|
1909
2049
|
# Returns the feed language
|
1910
2050
|
def language
|
1911
2051
|
if @language.nil?
|
1912
|
-
@language = select_not_blank([
|
1913
|
-
try_xpaths(self.channel_node, [
|
2052
|
+
@language = FeedTools::XmlHelper.select_not_blank([
|
2053
|
+
FeedTools::XmlHelper.try_xpaths(self.channel_node, [
|
1914
2054
|
"language/text()",
|
1915
2055
|
"dc:language/text()",
|
1916
2056
|
"@dc:language",
|
1917
2057
|
"@xml:lang",
|
1918
2058
|
"xml:lang/text()"
|
1919
2059
|
], :select_result_value => true),
|
1920
|
-
try_xpaths(self.root_node, [
|
2060
|
+
FeedTools::XmlHelper.try_xpaths(self.root_node, [
|
1921
2061
|
"@xml:lang",
|
1922
2062
|
"xml:lang/text()"
|
1923
2063
|
], :select_result_value => true)
|
@@ -1938,7 +2078,7 @@ module FeedTools
|
|
1938
2078
|
# Returns true if this feed contains explicit material.
|
1939
2079
|
def explicit?
|
1940
2080
|
if @explicit.nil?
|
1941
|
-
explicit_string = try_xpaths(self.channel_node, [
|
2081
|
+
explicit_string = FeedTools::XmlHelper.try_xpaths(self.channel_node, [
|
1942
2082
|
"media:adult/text()",
|
1943
2083
|
"itunes:explicit/text()"
|
1944
2084
|
], :select_result_value => true)
|
@@ -1958,24 +2098,32 @@ module FeedTools
|
|
1958
2098
|
|
1959
2099
|
# Returns the feed entries
|
1960
2100
|
def entries
|
1961
|
-
if @entries.
|
1962
|
-
raw_entries = select_not_blank([
|
1963
|
-
try_xpaths_all(self.channel_node, [
|
2101
|
+
if @entries.nil?
|
2102
|
+
raw_entries = FeedTools::XmlHelper.select_not_blank([
|
2103
|
+
FeedTools::XmlHelper.try_xpaths_all(self.channel_node, [
|
1964
2104
|
"atom10:entry",
|
1965
2105
|
"atom03:entry",
|
1966
2106
|
"atom:entry",
|
1967
2107
|
"entry"
|
1968
2108
|
]),
|
1969
|
-
try_xpaths_all(self.root_node, [
|
2109
|
+
FeedTools::XmlHelper.try_xpaths_all(self.root_node, [
|
1970
2110
|
"rss10:item",
|
2111
|
+
"rss11:items/rss11:item",
|
2112
|
+
"rss11:items/item",
|
2113
|
+
"items/rss11:item",
|
2114
|
+
"items/item",
|
1971
2115
|
"item",
|
1972
2116
|
"atom10:entry",
|
1973
2117
|
"atom03:entry",
|
1974
2118
|
"atom:entry",
|
1975
2119
|
"entry"
|
1976
2120
|
]),
|
1977
|
-
try_xpaths_all(self.channel_node, [
|
2121
|
+
FeedTools::XmlHelper.try_xpaths_all(self.channel_node, [
|
1978
2122
|
"rss10:item",
|
2123
|
+
"rss11:items/rss11:item",
|
2124
|
+
"rss11:items/item",
|
2125
|
+
"items/rss11:item",
|
2126
|
+
"items/item",
|
1979
2127
|
"item"
|
1980
2128
|
])
|
1981
2129
|
])
|
@@ -1987,14 +2135,27 @@ module FeedTools
|
|
1987
2135
|
new_entry = FeedItem.new
|
1988
2136
|
new_entry.feed_data = entry_node.to_s
|
1989
2137
|
new_entry.feed_data_type = self.feed_data_type
|
2138
|
+
new_entry.root_node = entry_node
|
2139
|
+
if new_entry.root_node.namespace.blank?
|
2140
|
+
new_entry.root_node.add_namespace(self.root_node.namespace)
|
2141
|
+
end
|
1990
2142
|
@entries << new_entry
|
1991
2143
|
end
|
1992
2144
|
end
|
1993
2145
|
end
|
1994
2146
|
|
1995
2147
|
# Sort the items
|
1996
|
-
|
1997
|
-
|
2148
|
+
if FeedTools.configurations[:entry_sorting_property] == "time"
|
2149
|
+
@entries = @entries.sort do |a, b|
|
2150
|
+
(b.time or Time.utc(1970)) <=> (a.time or Time.utc(1970))
|
2151
|
+
end
|
2152
|
+
elsif FeedTools.configurations[:entry_sorting_property] != nil
|
2153
|
+
sorting_property = FeedTools.configurations[:entry_sorting_property]
|
2154
|
+
@entries = @entries.sort do |a, b|
|
2155
|
+
eval("a.#{sorting_property}") <=> eval("b.#{sorting_property}")
|
2156
|
+
end
|
2157
|
+
else
|
2158
|
+
@entries = @entries.reverse
|
1998
2159
|
end
|
1999
2160
|
return @entries
|
2000
2161
|
end
|
@@ -2090,58 +2251,73 @@ module FeedTools
|
|
2090
2251
|
end
|
2091
2252
|
|
2092
2253
|
# Generates xml based on the content of the feed
|
2093
|
-
def build_xml(feed_type=(self.feed_type or "atom"),
|
2254
|
+
def build_xml(feed_type=(self.feed_type or "atom"), feed_version=nil,
|
2094
2255
|
xml_builder=Builder::XmlMarkup.new(
|
2095
2256
|
:indent => 2, :escape_attrs => false))
|
2096
2257
|
xml_builder.instruct! :xml, :version => "1.0",
|
2097
2258
|
:encoding => (FeedTools.configurations[:output_encoding] or "utf-8")
|
2098
|
-
if feed_type
|
2099
|
-
|
2100
|
-
|
2101
|
-
|
2259
|
+
if feed_type.nil?
|
2260
|
+
feed_type = self.feed_type
|
2261
|
+
end
|
2262
|
+
if feed_version.nil?
|
2263
|
+
feed_version = self.feed_version
|
2102
2264
|
end
|
2103
|
-
if feed_type == "rss" &&
|
2104
|
-
|
2265
|
+
if feed_type == "rss" &&
|
2266
|
+
(feed_version == nil || feed_version <= 0.0)
|
2267
|
+
feed_version = 1.0
|
2268
|
+
elsif feed_type == "atom" &&
|
2269
|
+
(feed_version == nil || feed_version <= 0.0)
|
2270
|
+
feed_version = 1.0
|
2271
|
+
end
|
2272
|
+
if feed_type == "rss" &&
|
2273
|
+
(feed_version == 0.9 || feed_version == 1.0 || feed_version == 1.1)
|
2105
2274
|
# RDF-based rss format
|
2106
2275
|
return xml_builder.tag!("rdf:RDF",
|
2107
2276
|
"xmlns" => FEED_TOOLS_NAMESPACES['rss10'],
|
2277
|
+
"xmlns:content" => FEED_TOOLS_NAMESPACES['content'],
|
2108
2278
|
"xmlns:rdf" => FEED_TOOLS_NAMESPACES['rdf'],
|
2109
2279
|
"xmlns:dc" => FEED_TOOLS_NAMESPACES['dc'],
|
2110
2280
|
"xmlns:syn" => FEED_TOOLS_NAMESPACES['syn'],
|
2281
|
+
"xmlns:admin" => FEED_TOOLS_NAMESPACES['admin'],
|
2111
2282
|
"xmlns:taxo" => FEED_TOOLS_NAMESPACES['taxo'],
|
2112
2283
|
"xmlns:itunes" => FEED_TOOLS_NAMESPACES['itunes'],
|
2113
2284
|
"xmlns:media" => FEED_TOOLS_NAMESPACES['media']) do
|
2114
2285
|
channel_attributes = {}
|
2115
2286
|
unless self.link.nil?
|
2116
2287
|
channel_attributes["rdf:about"] =
|
2117
|
-
FeedTools.escape_entities(self.link)
|
2288
|
+
FeedTools::HtmlHelper.escape_entities(self.link)
|
2118
2289
|
end
|
2119
2290
|
xml_builder.channel(channel_attributes) do
|
2120
|
-
unless title.
|
2121
|
-
xml_builder.title(
|
2291
|
+
unless self.title.blank?
|
2292
|
+
xml_builder.title(
|
2293
|
+
FeedTools::HtmlHelper.strip_html_tags(self.title))
|
2122
2294
|
else
|
2123
2295
|
xml_builder.title
|
2124
2296
|
end
|
2125
|
-
unless link.
|
2126
|
-
xml_builder.link(link)
|
2297
|
+
unless self.link.blank?
|
2298
|
+
xml_builder.link(self.link)
|
2127
2299
|
else
|
2128
2300
|
xml_builder.link
|
2129
2301
|
end
|
2130
|
-
unless images.
|
2131
|
-
xml_builder.image("rdf:resource" =>
|
2132
|
-
|
2302
|
+
unless images.blank?
|
2303
|
+
xml_builder.image("rdf:resource" =>
|
2304
|
+
FeedTools::HtmlHelper.escape_entities(
|
2305
|
+
images.first.url))
|
2133
2306
|
end
|
2134
2307
|
unless description.nil? || description == ""
|
2135
2308
|
xml_builder.description(description)
|
2136
2309
|
else
|
2137
2310
|
xml_builder.description
|
2138
2311
|
end
|
2139
|
-
unless language.
|
2140
|
-
xml_builder.tag!("dc:language", language)
|
2312
|
+
unless self.language.blank?
|
2313
|
+
xml_builder.tag!("dc:language", self.language)
|
2314
|
+
end
|
2315
|
+
unless self.rights.blank?
|
2316
|
+
xml_builder.tag!("dc:rights", self.rights)
|
2141
2317
|
end
|
2142
2318
|
xml_builder.tag!("syn:updatePeriod", "hourly")
|
2143
2319
|
xml_builder.tag!("syn:updateFrequency",
|
2144
|
-
(time_to_live / 1.hour).to_s)
|
2320
|
+
(self.time_to_live / 1.hour).to_s)
|
2145
2321
|
xml_builder.tag!("syn:updateBase", Time.mktime(1970).iso8601)
|
2146
2322
|
xml_builder.items do
|
2147
2323
|
xml_builder.tag!("rdf:Seq") do
|
@@ -2152,14 +2328,17 @@ module FeedTools
|
|
2152
2328
|
"item link field."
|
2153
2329
|
end
|
2154
2330
|
xml_builder.tag!("rdf:li", "rdf:resource" =>
|
2155
|
-
FeedTools.escape_entities(item.link))
|
2331
|
+
FeedTools::HtmlHelper.escape_entities(item.link))
|
2156
2332
|
end
|
2157
2333
|
end
|
2158
2334
|
end
|
2159
2335
|
end
|
2160
|
-
|
2336
|
+
xml_builder.tag!(
|
2337
|
+
"admin:generatorAgent",
|
2338
|
+
"rdf:resource" => FeedTools.configurations[:generator_href])
|
2339
|
+
build_xml_hook(feed_type, feed_version, xml_builder)
|
2161
2340
|
end
|
2162
|
-
unless
|
2341
|
+
unless self.images.blank?
|
2163
2342
|
best_image = nil
|
2164
2343
|
for image in self.images
|
2165
2344
|
if image.link != nil
|
@@ -2167,9 +2346,9 @@ module FeedTools
|
|
2167
2346
|
break
|
2168
2347
|
end
|
2169
2348
|
end
|
2170
|
-
best_image = images.first if best_image.nil?
|
2171
|
-
xml_builder.image(
|
2172
|
-
|
2349
|
+
best_image = self.images.first if best_image.nil?
|
2350
|
+
xml_builder.image("rdf:about" =>
|
2351
|
+
FeedTools::HtmlHelper.escape_entities(best_image.url)) do
|
2173
2352
|
if !best_image.title.blank?
|
2174
2353
|
xml_builder.title(best_image.title)
|
2175
2354
|
elsif !self.title.blank?
|
@@ -2191,13 +2370,14 @@ module FeedTools
|
|
2191
2370
|
end
|
2192
2371
|
unless items.nil?
|
2193
2372
|
for item in items
|
2194
|
-
item.build_xml(feed_type,
|
2373
|
+
item.build_xml(feed_type, feed_version, xml_builder)
|
2195
2374
|
end
|
2196
2375
|
end
|
2197
2376
|
end
|
2198
2377
|
elsif feed_type == "rss"
|
2199
2378
|
# normal rss format
|
2200
2379
|
return xml_builder.rss("version" => "2.0",
|
2380
|
+
"xmlns:content" => FEED_TOOLS_NAMESPACES['content'],
|
2201
2381
|
"xmlns:rdf" => FEED_TOOLS_NAMESPACES['rdf'],
|
2202
2382
|
"xmlns:dc" => FEED_TOOLS_NAMESPACES['dc'],
|
2203
2383
|
"xmlns:taxo" => FEED_TOOLS_NAMESPACES['taxo'],
|
@@ -2205,29 +2385,41 @@ module FeedTools
|
|
2205
2385
|
"xmlns:itunes" => FEED_TOOLS_NAMESPACES['itunes'],
|
2206
2386
|
"xmlns:media" => FEED_TOOLS_NAMESPACES['media']) do
|
2207
2387
|
xml_builder.channel do
|
2208
|
-
unless title.blank?
|
2209
|
-
xml_builder.title(
|
2388
|
+
unless self.title.blank?
|
2389
|
+
xml_builder.title(
|
2390
|
+
FeedTools::HtmlHelper.strip_html_tags(self.title))
|
2210
2391
|
end
|
2211
|
-
unless link.blank?
|
2392
|
+
unless self.link.blank?
|
2212
2393
|
xml_builder.link(link)
|
2213
2394
|
end
|
2214
|
-
unless description.blank?
|
2395
|
+
unless self.description.blank?
|
2215
2396
|
xml_builder.description(description)
|
2397
|
+
else
|
2398
|
+
xml_builder.description
|
2399
|
+
end
|
2400
|
+
unless self.published.blank?
|
2401
|
+
xml_builder.pubDate(self.published.rfc822)
|
2402
|
+
end
|
2403
|
+
unless self.updated.blank?
|
2404
|
+
xml_builder.lastBuildDate(self.updated.rfc822)
|
2405
|
+
end
|
2406
|
+
unless self.copyright.blank?
|
2407
|
+
xml_builder.copyright(self.copyright)
|
2216
2408
|
end
|
2217
2409
|
xml_builder.ttl((time_to_live / 1.minute).to_s)
|
2218
2410
|
xml_builder.generator(
|
2219
2411
|
FeedTools.configurations[:generator_href])
|
2220
|
-
build_xml_hook(feed_type,
|
2412
|
+
build_xml_hook(feed_type, feed_version, xml_builder)
|
2221
2413
|
unless items.nil?
|
2222
2414
|
for item in items
|
2223
|
-
item.build_xml(feed_type,
|
2415
|
+
item.build_xml(feed_type, feed_version, xml_builder)
|
2224
2416
|
end
|
2225
2417
|
end
|
2226
2418
|
end
|
2227
2419
|
end
|
2228
|
-
elsif feed_type == "atom" &&
|
2420
|
+
elsif feed_type == "atom" && feed_version == 0.3
|
2229
2421
|
raise "Atom 0.3 is obsolete."
|
2230
|
-
elsif feed_type == "atom" &&
|
2422
|
+
elsif feed_type == "atom" && feed_version == 1.0
|
2231
2423
|
# normal atom format
|
2232
2424
|
return xml_builder.feed("xmlns" => FEED_TOOLS_NAMESPACES['atom10'],
|
2233
2425
|
"xml:lang" => language) do
|
@@ -2248,18 +2440,18 @@ module FeedTools
|
|
2248
2440
|
xml_builder.uri(self.author.url)
|
2249
2441
|
end
|
2250
2442
|
end
|
2251
|
-
unless self.
|
2252
|
-
xml_builder.link("href" => self.
|
2443
|
+
unless self.href.blank?
|
2444
|
+
xml_builder.link("href" => self.href,
|
2253
2445
|
"rel" => "self",
|
2254
2446
|
"type" => "application/atom+xml")
|
2255
2447
|
end
|
2256
2448
|
unless self.link.blank?
|
2257
|
-
xml_builder.link(
|
2258
|
-
|
2259
|
-
|
2260
|
-
|
2449
|
+
xml_builder.link(
|
2450
|
+
"href" =>
|
2451
|
+
FeedTools::HtmlHelper.escape_entities(self.link),
|
2452
|
+
"rel" => "alternate")
|
2261
2453
|
end
|
2262
|
-
unless
|
2454
|
+
unless self.subtitle.blank?
|
2263
2455
|
xml_builder.subtitle(self.subtitle,
|
2264
2456
|
"type" => "html")
|
2265
2457
|
end
|
@@ -2272,12 +2464,15 @@ module FeedTools
|
|
2272
2464
|
else
|
2273
2465
|
xml_builder.updated(Time.now.gmtime.iso8601)
|
2274
2466
|
end
|
2467
|
+
unless self.rights.blank?
|
2468
|
+
xml_builder.rights(self.rights)
|
2469
|
+
end
|
2275
2470
|
xml_builder.generator(FeedTools.configurations[:generator_name] +
|
2276
2471
|
" - " + FeedTools.configurations[:generator_href])
|
2277
2472
|
if self.id != nil
|
2278
|
-
unless FeedTools.is_uri? self.id
|
2473
|
+
unless FeedTools::UriHelper.is_uri? self.id
|
2279
2474
|
if self.link != nil
|
2280
|
-
xml_builder.id(FeedTools.build_urn_uri(self.link))
|
2475
|
+
xml_builder.id(FeedTools::UriHelper.build_urn_uri(self.link))
|
2281
2476
|
else
|
2282
2477
|
raise "The unique id must be a valid URI."
|
2283
2478
|
end
|
@@ -2285,14 +2480,14 @@ module FeedTools
|
|
2285
2480
|
xml_builder.id(self.id)
|
2286
2481
|
end
|
2287
2482
|
elsif self.link != nil
|
2288
|
-
xml_builder.id(FeedTools.build_urn_uri(self.link))
|
2483
|
+
xml_builder.id(FeedTools::UriHelper.build_urn_uri(self.link))
|
2289
2484
|
else
|
2290
2485
|
raise "Cannot build feed, missing feed unique id."
|
2291
2486
|
end
|
2292
|
-
build_xml_hook(feed_type,
|
2487
|
+
build_xml_hook(feed_type, feed_version, xml_builder)
|
2293
2488
|
unless items.nil?
|
2294
2489
|
for item in items
|
2295
|
-
item.build_xml(feed_type,
|
2490
|
+
item.build_xml(feed_type, feed_version, xml_builder)
|
2296
2491
|
end
|
2297
2492
|
end
|
2298
2493
|
end
|
@@ -2303,15 +2498,15 @@ module FeedTools
|
|
2303
2498
|
|
2304
2499
|
# Persists the current feed state to the cache.
|
2305
2500
|
def save
|
2306
|
-
unless self.
|
2501
|
+
unless self.href =~ /^file:\/\//
|
2307
2502
|
if FeedTools.feed_cache.nil?
|
2308
2503
|
raise "Caching is currently disabled. Cannot save to cache."
|
2309
|
-
elsif self.
|
2504
|
+
elsif self.href.nil?
|
2310
2505
|
raise "The url field must be set to save to the cache."
|
2311
2506
|
elsif self.cache_object.nil?
|
2312
2507
|
raise "The cache_object is currently nil. Cannot save to cache."
|
2313
2508
|
else
|
2314
|
-
self.cache_object.
|
2509
|
+
self.cache_object.href = self.href
|
2315
2510
|
unless self.feed_data.nil?
|
2316
2511
|
self.cache_object.title = self.title
|
2317
2512
|
self.cache_object.link = self.link
|
@@ -2324,15 +2519,17 @@ module FeedTools
|
|
2324
2519
|
end
|
2325
2520
|
end
|
2326
2521
|
end
|
2327
|
-
|
2522
|
+
|
2523
|
+
alias_method :url, :href
|
2524
|
+
alias_method :url=, :href=
|
2328
2525
|
alias_method :tagline, :subtitle
|
2329
2526
|
alias_method :tagline=, :subtitle=
|
2330
2527
|
alias_method :description, :subtitle
|
2331
2528
|
alias_method :description=, :subtitle=
|
2332
2529
|
alias_method :abstract, :subtitle
|
2333
2530
|
alias_method :abstract=, :subtitle=
|
2334
|
-
alias_method :
|
2335
|
-
alias_method :
|
2531
|
+
alias_method :copyright, :rights
|
2532
|
+
alias_method :copyright=, :rights=
|
2336
2533
|
alias_method :ttl, :time_to_live
|
2337
2534
|
alias_method :ttl=, :time_to_live=
|
2338
2535
|
alias_method :guid, :id
|
@@ -2362,7 +2559,12 @@ module FeedTools
|
|
2362
2559
|
|
2363
2560
|
# Returns a simple representation of the feed object's state.
|
2364
2561
|
def inspect
|
2365
|
-
return "#<FeedTools::Feed:0x#{self.object_id.to_s(16)} URL:#{self.
|
2562
|
+
return "#<FeedTools::Feed:0x#{self.object_id.to_s(16)} URL:#{self.href}>"
|
2563
|
+
end
|
2564
|
+
|
2565
|
+
# Allows sorting feeds by title
|
2566
|
+
def <=>(other_feed)
|
2567
|
+
return self.title.to_s <=> other_feed.title.to_s
|
2366
2568
|
end
|
2367
2569
|
end
|
2368
2570
|
end
|