feedtools 0.2.18 → 0.2.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +28 -0
- data/lib/feed_tools.rb +328 -63
- data/lib/feed_tools/feed.rb +767 -764
- data/lib/feed_tools/feed_item.rb +684 -625
- data/lib/feed_tools/helpers/debug_helper.rb +37 -0
- data/lib/feed_tools/helpers/feed_tools_helper.rb +45 -41
- data/lib/feed_tools/helpers/generic_helper.rb +164 -0
- data/lib/feed_tools/helpers/retrieval_helper.rb +36 -0
- data/rakefile +298 -2
- data/test/unit/amp_test.rb +70 -69
- data/test/unit/atom_test.rb +91 -9
- data/test/unit/cache_test.rb +30 -11
- data/test/unit/cdf_test.rb +6 -4
- data/test/unit/encoding_test.rb +99 -0
- data/test/unit/generation_test.rb +3 -40
- data/test/unit/helper_test.rb +66 -6
- data/test/unit/interface_test.rb +34 -0
- data/test/unit/itunes_test.rb +19 -0
- data/test/unit/nonstandard_test.rb +22 -4
- data/test/unit/rdf_test.rb +19 -0
- data/test/unit/rss_test.rb +137 -43
- metadata +18 -8
- data/lib/feed_tools/vendor/builder.rb +0 -15
- data/lib/feed_tools/vendor/builder/blankslate.rb +0 -55
- data/lib/feed_tools/vendor/builder/xmlbase.rb +0 -144
- data/lib/feed_tools/vendor/builder/xmlevents.rb +0 -65
- data/lib/feed_tools/vendor/builder/xmlmarkup.rb +0 -299
data/lib/feed_tools/feed.rb
CHANGED
@@ -29,9 +29,11 @@ module FeedTools
|
|
29
29
|
# :stopdoc:
|
30
30
|
include REXML
|
31
31
|
class << self
|
32
|
-
include GenericHelper
|
32
|
+
include FeedTools::GenericHelper
|
33
33
|
private :validate_options
|
34
34
|
end
|
35
|
+
include FeedTools::GenericHelper
|
36
|
+
private :validate_options
|
35
37
|
# :startdoc:
|
36
38
|
|
37
39
|
# Represents a feed/feed item's category
|
@@ -143,7 +145,7 @@ module FeedTools
|
|
143
145
|
@link = nil
|
144
146
|
@last_retrieved = nil
|
145
147
|
@time_to_live = nil
|
146
|
-
@
|
148
|
+
@entries = nil
|
147
149
|
@live = false
|
148
150
|
end
|
149
151
|
|
@@ -175,10 +177,12 @@ module FeedTools
|
|
175
177
|
# Loads the feed from the remote url if the feed has expired from the cache or cannot be
|
176
178
|
# retrieved from the cache for some reason.
|
177
179
|
def update!
|
178
|
-
if self.http_headers.
|
180
|
+
if self.http_headers.blank? && !(self.cache_object.nil?) &&
|
179
181
|
!(self.cache_object.http_headers.nil?)
|
180
182
|
@http_headers = YAML.load(self.cache_object.http_headers)
|
181
183
|
@http_headers = {} unless @http_headers.kind_of? Hash
|
184
|
+
elsif self.http_headers.blank?
|
185
|
+
@http_headers = {}
|
182
186
|
end
|
183
187
|
if self.expired? == false
|
184
188
|
@live = false
|
@@ -240,11 +244,12 @@ module FeedTools
|
|
240
244
|
self.http_headers['last-modified'] unless
|
241
245
|
self.http_headers['last-modified'].nil?
|
242
246
|
end
|
243
|
-
|
244
|
-
|
247
|
+
unless FeedTools.configurations[:user_agent].nil?
|
248
|
+
headers["User-Agent"] = FeedTools.configurations[:user_agent]
|
249
|
+
end
|
245
250
|
|
246
251
|
# The http feed access method
|
247
|
-
http_fetch = lambda do |feed_url,
|
252
|
+
http_fetch = lambda do |feed_url, request_headers, redirect_limit,
|
248
253
|
response_chain, no_headers|
|
249
254
|
raise FeedAccessError, 'Redirect too deep' if redirect_limit == 0
|
250
255
|
feed_uri = nil
|
@@ -256,11 +261,13 @@ module FeedTools
|
|
256
261
|
end
|
257
262
|
|
258
263
|
begin
|
259
|
-
|
264
|
+
# TODO: Proxy host and proxy port would go here if implemented
|
265
|
+
http = Net::HTTP.new(feed_uri.host, (feed_uri.port or 80))
|
266
|
+
http.start do
|
260
267
|
final_uri = feed_uri.path
|
261
268
|
final_uri += ('?' + feed_uri.query) if feed_uri.query
|
262
|
-
|
263
|
-
response = http.request_get(final_uri,
|
269
|
+
request_headers = {} if no_headers
|
270
|
+
response = http.request_get(final_uri, request_headers)
|
264
271
|
|
265
272
|
case response
|
266
273
|
when Net::HTTPSuccess
|
@@ -289,7 +296,8 @@ module FeedTools
|
|
289
296
|
response_chain << [feed_url, response]
|
290
297
|
new_location = response['location']
|
291
298
|
if response_chain.assoc(new_location) != nil
|
292
|
-
raise FeedAccessError,
|
299
|
+
raise FeedAccessError,
|
300
|
+
"Redirection loop detected: #{new_location}"
|
293
301
|
end
|
294
302
|
|
295
303
|
# Find out if we've already seen the url we've been
|
@@ -300,13 +308,21 @@ module FeedTools
|
|
300
308
|
:cache_only => true)
|
301
309
|
if cached_feed.cache_object != nil &&
|
302
310
|
cached_feed.cache_object.new_record? != true
|
303
|
-
|
304
|
-
|
311
|
+
if !cached_feed.expired? &&
|
312
|
+
!cached_feed.http_headers.blank?
|
313
|
+
# Copy the cached state
|
305
314
|
self.url = cached_feed.url
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
315
|
+
|
316
|
+
@feed_data = cached_feed.feed_data
|
317
|
+
@feed_data_type = cached_feed.feed_data_type
|
318
|
+
|
319
|
+
if @feed_data.blank?
|
320
|
+
raise "Invalid cache data."
|
321
|
+
end
|
322
|
+
|
323
|
+
@title = nil; self.title
|
324
|
+
@link = nil; self.link
|
325
|
+
|
310
326
|
self.last_retrieved = cached_feed.last_retrieved
|
311
327
|
self.http_headers = cached_feed.http_headers
|
312
328
|
self.cache_object = cached_feed.cache_object
|
@@ -342,6 +358,10 @@ module FeedTools
|
|
342
358
|
raise FeedAccessError, 'Socket error prevented feed retrieval'
|
343
359
|
rescue Timeout::Error
|
344
360
|
raise FeedAccessError, 'Timeout while attempting to retrieve feed'
|
361
|
+
rescue Errno::ENETUNREACH
|
362
|
+
raise FeedAccessError, 'Network was unreachable'
|
363
|
+
rescue Errno::ECONNRESET
|
364
|
+
raise FeedAccessError, 'Connection was reset by peer'
|
345
365
|
end
|
346
366
|
end
|
347
367
|
|
@@ -375,12 +395,12 @@ module FeedTools
|
|
375
395
|
end
|
376
396
|
end
|
377
397
|
unless @http_response.kind_of? Net::HTTPRedirection
|
398
|
+
@feed_data = self.http_response.body
|
378
399
|
@http_headers = {}
|
379
400
|
self.http_response.each_header do |key, value|
|
380
401
|
self.http_headers[key.downcase] = value
|
381
402
|
end
|
382
403
|
self.last_retrieved = Time.now.gmtime
|
383
|
-
self.feed_data = self.http_response.body
|
384
404
|
end
|
385
405
|
rescue FeedAccessError
|
386
406
|
@live = false
|
@@ -451,9 +471,9 @@ module FeedTools
|
|
451
471
|
open(file_name) do |file|
|
452
472
|
@http_response = nil
|
453
473
|
@http_headers = {}
|
474
|
+
@feed_data = file.read
|
475
|
+
@feed_data_type = :xml
|
454
476
|
self.last_retrieved = Time.now.gmtime
|
455
|
-
self.feed_data = file.read
|
456
|
-
self.feed_data_type = :xml
|
457
477
|
end
|
458
478
|
rescue
|
459
479
|
@live = false
|
@@ -478,8 +498,71 @@ module FeedTools
|
|
478
498
|
|
479
499
|
# Returns a hash of the http headers from the response.
|
480
500
|
def http_headers
|
501
|
+
if @http_headers.blank?
|
502
|
+
if !self.cache_object.nil? && !self.cache_object.http_headers.nil?
|
503
|
+
@http_headers = YAML.load(self.cache_object.http_headers)
|
504
|
+
@http_headers = {} unless @http_headers.kind_of? Hash
|
505
|
+
else
|
506
|
+
@http_headers = {}
|
507
|
+
end
|
508
|
+
end
|
481
509
|
return @http_headers
|
482
510
|
end
|
511
|
+
|
512
|
+
# Returns the encoding that the feed was parsed with
|
513
|
+
def encoding
|
514
|
+
if @encoding.nil?
|
515
|
+
unless self.http_headers.blank?
|
516
|
+
@encoding = "utf-8"
|
517
|
+
else
|
518
|
+
@encoding = self.encoding_from_xml_data
|
519
|
+
end
|
520
|
+
end
|
521
|
+
return @encoding
|
522
|
+
end
|
523
|
+
|
524
|
+
# Returns the encoding of feed calculated only from the xml data.
|
525
|
+
# I.e., the encoding we would come up with if we ignore RFC 3023.
|
526
|
+
def encoding_from_xml_data
|
527
|
+
if @encoding_from_xml_data.nil?
|
528
|
+
raw_data = self.feed_data
|
529
|
+
encoding_from_xml_instruct =
|
530
|
+
raw_data.scan(
|
531
|
+
/^<\?xml [^>]*encoding="([\w]*)"[^>]*\?>/
|
532
|
+
).flatten.first
|
533
|
+
unless encoding_from_xml_instruct.blank?
|
534
|
+
encoding_from_xml_instruct.downcase!
|
535
|
+
end
|
536
|
+
if encoding_from_xml_instruct.blank?
|
537
|
+
doc = Document.new(raw_data)
|
538
|
+
encoding_from_xml_instruct = doc.encoding.downcase
|
539
|
+
if encoding_from_xml_instruct == "utf-8"
|
540
|
+
# REXML has a tendency to report utf-8 overzealously, take with
|
541
|
+
# grain of salt
|
542
|
+
encoding_from_xml_instruct = nil
|
543
|
+
end
|
544
|
+
else
|
545
|
+
@encoding_from_xml_data = encoding_from_xml_instruct
|
546
|
+
end
|
547
|
+
if encoding_from_xml_instruct.blank?
|
548
|
+
sniff_table = {
|
549
|
+
"Lo\247\224" => "ebcdic-cp-us",
|
550
|
+
"<?xm" => "utf-8"
|
551
|
+
}
|
552
|
+
sniff = self.feed_data[0..3]
|
553
|
+
if sniff_table[sniff] != nil
|
554
|
+
@encoding_from_xml_data = sniff_table[sniff].downcase
|
555
|
+
end
|
556
|
+
else
|
557
|
+
@encoding_from_xml_data = encoding_from_xml_instruct
|
558
|
+
end
|
559
|
+
if @encoding_from_xml_data.blank?
|
560
|
+
# Safest assumption
|
561
|
+
@encoding_from_xml_data = "utf-8"
|
562
|
+
end
|
563
|
+
end
|
564
|
+
return @encoding_from_xml_data
|
565
|
+
end
|
483
566
|
|
484
567
|
# Returns the feed's raw data.
|
485
568
|
def feed_data
|
@@ -493,12 +576,40 @@ module FeedTools
|
|
493
576
|
|
494
577
|
# Sets the feed's data.
|
495
578
|
def feed_data=(new_feed_data)
|
579
|
+
@http_headers = {}
|
580
|
+
@cache_object = nil
|
581
|
+
@url = nil
|
582
|
+
@id = nil
|
583
|
+
@encoding = nil
|
496
584
|
@feed_data = new_feed_data
|
497
585
|
unless self.cache_object.nil?
|
498
586
|
self.cache_object.feed_data = new_feed_data
|
499
587
|
end
|
500
588
|
end
|
501
589
|
|
590
|
+
# Returns the feed's raw data as utf-8.
|
591
|
+
def feed_data_utf_8(force_encoding=nil)
|
592
|
+
if @feed_data_utf_8.nil?
|
593
|
+
raw_data = self.feed_data
|
594
|
+
if force_encoding.nil?
|
595
|
+
use_encoding = self.encoding
|
596
|
+
else
|
597
|
+
use_encoding = force_encoding
|
598
|
+
end
|
599
|
+
if use_encoding != "utf-8"
|
600
|
+
begin
|
601
|
+
@feed_data_utf_8 =
|
602
|
+
Iconv.new('utf-8', use_encoding).iconv(raw_data)
|
603
|
+
rescue
|
604
|
+
return raw_data
|
605
|
+
end
|
606
|
+
else
|
607
|
+
return self.feed_data
|
608
|
+
end
|
609
|
+
end
|
610
|
+
return @feed_data_utf_8
|
611
|
+
end
|
612
|
+
|
502
613
|
# Returns the data type of the feed
|
503
614
|
# Possible values:
|
504
615
|
# * :xml
|
@@ -526,24 +637,15 @@ module FeedTools
|
|
526
637
|
@xml_doc = nil
|
527
638
|
else
|
528
639
|
if @xml_doc.nil?
|
529
|
-
# INQUIRY: Is there any way of saying "dude, rescue *everything*"?
|
530
640
|
begin
|
531
641
|
begin
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
@xml_doc = Document.new(feed_data)
|
536
|
-
rescue Exception
|
537
|
-
# Something failed especially badly, attempt to repair the
|
538
|
-
# xml with htree.
|
539
|
-
@xml_doc = HTree.parse(feed_data).to_rexml
|
540
|
-
rescue
|
642
|
+
@xml_doc = Document.new(self.feed_data_utf_8,
|
643
|
+
:ignore_whitespace_nodes => :all)
|
644
|
+
rescue Object
|
541
645
|
# Something failed, attempt to repair the xml with htree.
|
542
|
-
@xml_doc = HTree.parse(
|
646
|
+
@xml_doc = HTree.parse(self.feed_data_utf_8).to_rexml
|
543
647
|
end
|
544
|
-
rescue
|
545
|
-
@xml_doc = nil
|
546
|
-
rescue
|
648
|
+
rescue Object
|
547
649
|
@xml_doc = nil
|
548
650
|
end
|
549
651
|
end
|
@@ -551,14 +653,23 @@ module FeedTools
|
|
551
653
|
return @xml_doc
|
552
654
|
end
|
553
655
|
|
554
|
-
# Returns the first node within the channel_node that matches the xpath
|
555
|
-
|
556
|
-
|
656
|
+
# Returns the first node within the channel_node that matches the xpath
|
657
|
+
# query.
|
658
|
+
def find_node(xpath, select_result_value=false)
|
659
|
+
if self.feed_data_type != :xml
|
660
|
+
raise "The feed data type is not xml."
|
661
|
+
end
|
662
|
+
return try_xpaths(self.channel_node, [xpath],
|
663
|
+
:select_result_value => select_result_value)
|
557
664
|
end
|
558
665
|
|
559
666
|
# Returns all nodes within the channel_node that match the xpath query.
|
560
|
-
def find_all_nodes(xpath)
|
561
|
-
|
667
|
+
def find_all_nodes(xpath, select_result_value=false)
|
668
|
+
if self.feed_data_type != :xml
|
669
|
+
raise "The feed data type is not xml."
|
670
|
+
end
|
671
|
+
return try_xpaths_all(self.channel_node, [xpath],
|
672
|
+
:select_result_value => select_result_value)
|
562
673
|
end
|
563
674
|
|
564
675
|
# Returns the root node of the feed.
|
@@ -568,7 +679,15 @@ module FeedTools
|
|
568
679
|
# break this stuff.
|
569
680
|
# E.g.: http://smogzer.tripod.com/smog.rdf
|
570
681
|
# ===================================================================
|
571
|
-
|
682
|
+
begin
|
683
|
+
if xml.nil?
|
684
|
+
return nil
|
685
|
+
else
|
686
|
+
@root_node = xml.root
|
687
|
+
end
|
688
|
+
rescue
|
689
|
+
return nil
|
690
|
+
end
|
572
691
|
end
|
573
692
|
return @root_node
|
574
693
|
end
|
@@ -576,13 +695,11 @@ module FeedTools
|
|
576
695
|
# Returns the channel node of the feed.
|
577
696
|
def channel_node
|
578
697
|
if @channel_node.nil? && root_node != nil
|
579
|
-
@channel_node =
|
580
|
-
|
581
|
-
|
582
|
-
|
583
|
-
|
584
|
-
@channel_node = XPath.first(root_node, "feedinfo")
|
585
|
-
end
|
698
|
+
@channel_node = try_xpaths(root_node, [
|
699
|
+
"channel",
|
700
|
+
"CHANNEL",
|
701
|
+
"feedinfo"
|
702
|
+
])
|
586
703
|
if @channel_node == nil
|
587
704
|
@channel_node = root_node
|
588
705
|
end
|
@@ -592,12 +709,13 @@ module FeedTools
|
|
592
709
|
|
593
710
|
# The cache object that handles the feed persistence.
|
594
711
|
def cache_object
|
712
|
+
if !@url.nil? && @url =~ /^file:\/\//
|
713
|
+
return nil
|
714
|
+
end
|
595
715
|
unless FeedTools.feed_cache.nil?
|
596
716
|
if @cache_object.nil?
|
597
717
|
begin
|
598
|
-
if @
|
599
|
-
@cache_object = FeedTools.feed_cache.find_by_id(@id)
|
600
|
-
elsif @url != nil
|
718
|
+
if @url != nil
|
601
719
|
@cache_object = FeedTools.feed_cache.find_by_url(@url)
|
602
720
|
end
|
603
721
|
if @cache_object.nil?
|
@@ -719,43 +837,22 @@ module FeedTools
|
|
719
837
|
# Returns the feed's unique id
|
720
838
|
def id
|
721
839
|
if @id.nil?
|
722
|
-
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
|
730
|
-
|
731
|
-
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
|
736
|
-
|
737
|
-
|
738
|
-
end
|
739
|
-
unless root_node.nil?
|
740
|
-
if @id == "" || @id.nil?
|
741
|
-
@id = XPath.first(root_node, "id/text()").to_s
|
742
|
-
end
|
743
|
-
if @id == ""
|
744
|
-
@id = XPath.first(channel_node, "atom10:id/text()",
|
745
|
-
FEED_TOOLS_NAMESPACES).to_s
|
746
|
-
end
|
747
|
-
if @id == ""
|
748
|
-
@id = XPath.first(channel_node, "atom03:id/text()",
|
749
|
-
FEED_TOOLS_NAMESPACES).to_s
|
750
|
-
end
|
751
|
-
if @id == ""
|
752
|
-
@id = XPath.first(channel_node, "atom:id/text()").to_s
|
753
|
-
end
|
754
|
-
if @id == ""
|
755
|
-
@id = XPath.first(root_node, "guid/text()").to_s
|
756
|
-
end
|
757
|
-
end
|
758
|
-
@id = nil if @id == ""
|
840
|
+
@id = select_not_blank([
|
841
|
+
try_xpaths(self.channel_node, [
|
842
|
+
"atom10:id/text()",
|
843
|
+
"atom03:id/text()",
|
844
|
+
"atom:id/text()",
|
845
|
+
"id/text()",
|
846
|
+
"guid/text()"
|
847
|
+
], :select_result_value => true),
|
848
|
+
try_xpaths(self.root_node, [
|
849
|
+
"atom10:id/text()",
|
850
|
+
"atom03:id/text()",
|
851
|
+
"atom:id/text()",
|
852
|
+
"id/text()",
|
853
|
+
"guid/text()"
|
854
|
+
], :select_result_value => true)
|
855
|
+
])
|
759
856
|
end
|
760
857
|
return @id
|
761
858
|
end
|
@@ -768,12 +865,12 @@ module FeedTools
|
|
768
865
|
# Returns the feed url.
|
769
866
|
def url
|
770
867
|
original_url = @url
|
771
|
-
override_url = lambda do
|
868
|
+
override_url = lambda do |result|
|
772
869
|
begin
|
773
|
-
if
|
870
|
+
if result.nil? && self.feed_data != nil
|
774
871
|
true
|
775
|
-
elsif
|
776
|
-
!(["http", "https"].include?(URI.parse(
|
872
|
+
elsif result != nil &&
|
873
|
+
!(["http", "https"].include?(URI.parse(result.to_s).scheme))
|
777
874
|
if self.feed_data != nil
|
778
875
|
true
|
779
876
|
else
|
@@ -786,47 +883,32 @@ module FeedTools
|
|
786
883
|
true
|
787
884
|
end
|
788
885
|
end
|
789
|
-
if override_url.call
|
790
|
-
|
791
|
-
|
792
|
-
|
793
|
-
|
794
|
-
|
795
|
-
|
796
|
-
|
797
|
-
|
798
|
-
|
799
|
-
|
800
|
-
|
801
|
-
|
802
|
-
|
803
|
-
|
804
|
-
@
|
805
|
-
|
806
|
-
|
807
|
-
|
808
|
-
|
809
|
-
end
|
810
|
-
if override_url.call
|
811
|
-
@url = XPath.first(channel_node, "admin:feed/@rdf:resource",
|
812
|
-
FEED_TOOLS_NAMESPACES).to_s
|
813
|
-
@url = nil if @url == ""
|
814
|
-
end
|
815
|
-
if override_url.call
|
816
|
-
@url = XPath.first(channel_node, "admin:feed/@resource").to_s
|
817
|
-
@url = nil if @url == ""
|
818
|
-
end
|
819
|
-
if override_url.call
|
820
|
-
@url = XPath.first(channel_node, "feed/@rdf:resource").to_s
|
821
|
-
@url = nil if @url == ""
|
822
|
-
end
|
823
|
-
if override_url.call
|
824
|
-
@url = XPath.first(channel_node, "feed/@resource").to_s
|
825
|
-
@url = nil if @url == ""
|
826
|
-
end
|
886
|
+
if override_url.call(@url)
|
887
|
+
# rdf:about is ordered last because a lot of people accidentally
|
888
|
+
# put the link in that field instead of the url to the feed.
|
889
|
+
# Ordering it last gives them as many chances as humanly possible
|
890
|
+
# for them to redeem themselves. If the link turns out to be the
|
891
|
+
@url = try_xpaths(self.channel_node, [
|
892
|
+
"link[@rel='self']/@href",
|
893
|
+
"atom10:link[@rel='self']/@href",
|
894
|
+
"atom03:link[@rel='self']/@href",
|
895
|
+
"atom:link[@rel='self']/@href",
|
896
|
+
"admin:feed/@rdf:resource",
|
897
|
+
"admin:feed/@resource",
|
898
|
+
"feed/@rdf:resource",
|
899
|
+
"feed/@resource",
|
900
|
+
"@rdf:about",
|
901
|
+
"@about"
|
902
|
+
], :select_result_value => true) do |result|
|
903
|
+
override_url.call(FeedTools.normalize_url(result))
|
904
|
+
end
|
905
|
+
@url = FeedTools.normalize_url(@url)
|
827
906
|
if @url == nil
|
828
907
|
@url = original_url
|
829
908
|
end
|
909
|
+
if @url == self.link
|
910
|
+
@url = original_url
|
911
|
+
end
|
830
912
|
end
|
831
913
|
return @url
|
832
914
|
end
|
@@ -840,37 +922,23 @@ module FeedTools
|
|
840
922
|
# Returns the feed title
|
841
923
|
def title
|
842
924
|
if @title.nil?
|
843
|
-
|
844
|
-
|
845
|
-
|
846
|
-
|
847
|
-
|
848
|
-
|
849
|
-
|
850
|
-
|
851
|
-
title_node = XPath.first(channel_node, "atom03:title",
|
852
|
-
FEED_TOOLS_NAMESPACES)
|
853
|
-
end
|
854
|
-
if title_node.nil?
|
855
|
-
title_node = XPath.first(channel_node, "atom:title")
|
856
|
-
end
|
857
|
-
if title_node.nil?
|
858
|
-
title_node = XPath.first(channel_node, "dc:title",
|
859
|
-
FEED_TOOLS_NAMESPACES)
|
860
|
-
end
|
861
|
-
if title_node.nil?
|
862
|
-
title_node = XPath.first(channel_node, "dc:title")
|
863
|
-
end
|
864
|
-
if title_node.nil?
|
865
|
-
title_node = XPath.first(channel_node, "TITLE")
|
866
|
-
end
|
867
|
-
end
|
925
|
+
repair_entities = false
|
926
|
+
title_node = try_xpaths(self.channel_node, [
|
927
|
+
"atom10:title",
|
928
|
+
"atom03:title",
|
929
|
+
"atom:title",
|
930
|
+
"title",
|
931
|
+
"dc:title"
|
932
|
+
])
|
868
933
|
if title_node.nil?
|
869
934
|
return nil
|
870
935
|
end
|
871
|
-
title_type =
|
872
|
-
|
873
|
-
|
936
|
+
title_type = try_xpaths(title_node, "@type",
|
937
|
+
:select_result_value => true)
|
938
|
+
title_mode = try_xpaths(title_node, "@mode",
|
939
|
+
:select_result_value => true)
|
940
|
+
title_encoding = try_xpaths(title_node, "@encoding",
|
941
|
+
:select_result_value => true)
|
874
942
|
|
875
943
|
# Note that we're checking for misuse of type, mode and encoding here
|
876
944
|
if title_type == "base64" || title_mode == "base64" ||
|
@@ -895,7 +963,7 @@ module FeedTools
|
|
895
963
|
@title.gsub!(/>\n</, "><")
|
896
964
|
@title.gsub!(/\n/, " ")
|
897
965
|
@title.strip!
|
898
|
-
@title = nil if @title
|
966
|
+
@title = nil if @title.blank?
|
899
967
|
self.cache_object.title = @title unless self.cache_object.nil?
|
900
968
|
end
|
901
969
|
return @title
|
@@ -907,124 +975,98 @@ module FeedTools
|
|
907
975
|
self.cache_object.title = new_title unless self.cache_object.nil?
|
908
976
|
end
|
909
977
|
|
910
|
-
# Returns the feed
|
911
|
-
def
|
912
|
-
if @
|
913
|
-
|
914
|
-
|
915
|
-
|
916
|
-
|
917
|
-
|
918
|
-
|
919
|
-
|
920
|
-
|
921
|
-
|
922
|
-
|
923
|
-
|
924
|
-
|
925
|
-
|
926
|
-
|
927
|
-
|
928
|
-
|
929
|
-
|
930
|
-
|
931
|
-
|
932
|
-
description_node = XPath.first(channel_node, "info")
|
933
|
-
end
|
934
|
-
if description_node.nil?
|
935
|
-
description_node = XPath.first(channel_node, "content:encoded")
|
936
|
-
end
|
937
|
-
if description_node.nil?
|
938
|
-
description_node = XPath.first(channel_node, "content:encoded",
|
939
|
-
FEED_TOOLS_NAMESPACES)
|
940
|
-
end
|
941
|
-
if description_node.nil?
|
942
|
-
description_node = XPath.first(root_node, "encoded")
|
943
|
-
end
|
944
|
-
if description_node.nil?
|
945
|
-
description_node = XPath.first(channel_node, "content")
|
946
|
-
end
|
947
|
-
if description_node.nil?
|
948
|
-
description_node = XPath.first(channel_node, "xhtml:body")
|
949
|
-
end
|
950
|
-
if description_node.nil?
|
951
|
-
description_node = XPath.first(channel_node, "body")
|
952
|
-
end
|
953
|
-
if description_node.nil?
|
954
|
-
description_node = XPath.first(channel_node, "blurb")
|
955
|
-
end
|
956
|
-
end
|
957
|
-
if description_node.nil?
|
978
|
+
# Returns the feed subtitle
|
979
|
+
def subtitle
|
980
|
+
if @subtitle.nil?
|
981
|
+
repair_entities = false
|
982
|
+
subtitle_node = try_xpaths(self.channel_node, [
|
983
|
+
"atom10:subtitle",
|
984
|
+
"subtitle",
|
985
|
+
"atom03:tagline",
|
986
|
+
"tagline",
|
987
|
+
"description",
|
988
|
+
"summary",
|
989
|
+
"abstract",
|
990
|
+
"ABSTRACT",
|
991
|
+
"content:encoded",
|
992
|
+
"encoded",
|
993
|
+
"content",
|
994
|
+
"xhtml:body",
|
995
|
+
"body",
|
996
|
+
"blurb",
|
997
|
+
"info"
|
998
|
+
])
|
999
|
+
if subtitle_node.nil?
|
958
1000
|
return nil
|
959
1001
|
end
|
960
|
-
|
961
|
-
|
962
|
-
|
1002
|
+
subtitle_type = try_xpaths(subtitle_node, "@type",
|
1003
|
+
:select_result_value => true)
|
1004
|
+
subtitle_mode = try_xpaths(subtitle_node, "@mode",
|
1005
|
+
:select_result_value => true)
|
1006
|
+
subtitle_encoding = try_xpaths(subtitle_node, "@encoding",
|
1007
|
+
:select_result_value => true)
|
963
1008
|
|
964
1009
|
# Note that we're checking for misuse of type, mode and encoding here
|
965
|
-
if
|
966
|
-
@
|
1010
|
+
if !subtitle_encoding.blank?
|
1011
|
+
@subtitle =
|
967
1012
|
"[Embedded data objects are not currently supported.]"
|
968
|
-
elsif
|
969
|
-
@
|
970
|
-
elsif
|
971
|
-
|
972
|
-
@
|
973
|
-
elsif
|
974
|
-
|
975
|
-
|
976
|
-
@
|
977
|
-
elsif
|
978
|
-
@
|
979
|
-
|
1013
|
+
elsif subtitle_node.cdatas.size > 0
|
1014
|
+
@subtitle = subtitle_node.cdatas.first.value
|
1015
|
+
elsif subtitle_type == "base64" || subtitle_mode == "base64" ||
|
1016
|
+
subtitle_encoding == "base64"
|
1017
|
+
@subtitle = Base64.decode64(subtitle_node.inner_xml.strip)
|
1018
|
+
elsif subtitle_type == "xhtml" || subtitle_mode == "xhtml" ||
|
1019
|
+
subtitle_type == "xml" || subtitle_mode == "xml" ||
|
1020
|
+
subtitle_type == "application/xhtml+xml"
|
1021
|
+
@subtitle = subtitle_node.inner_xml
|
1022
|
+
elsif subtitle_type == "escaped" || subtitle_mode == "escaped"
|
1023
|
+
@subtitle = FeedTools.unescape_entities(
|
1024
|
+
subtitle_node.inner_xml)
|
980
1025
|
else
|
981
|
-
@
|
1026
|
+
@subtitle = subtitle_node.inner_xml
|
982
1027
|
repair_entities = true
|
983
1028
|
end
|
984
|
-
if @
|
985
|
-
@
|
986
|
-
@description = "" if @description.nil?
|
1029
|
+
if @subtitle.blank?
|
1030
|
+
@subtitle = self.itunes_summary
|
987
1031
|
end
|
988
|
-
if @
|
989
|
-
@
|
990
|
-
@description = "" if @description.nil?
|
1032
|
+
if @subtitle.blank?
|
1033
|
+
@subtitle = self.itunes_subtitle
|
991
1034
|
end
|
992
1035
|
|
993
|
-
unless @
|
994
|
-
@
|
995
|
-
@
|
996
|
-
@
|
1036
|
+
unless @subtitle.blank?
|
1037
|
+
@subtitle = FeedTools.sanitize_html(@subtitle, :strip)
|
1038
|
+
@subtitle = FeedTools.unescape_entities(@subtitle) if repair_entities
|
1039
|
+
@subtitle = FeedTools.tidy_html(@subtitle)
|
997
1040
|
end
|
998
1041
|
|
999
|
-
@
|
1000
|
-
@
|
1042
|
+
@subtitle = @subtitle.strip unless @subtitle.nil?
|
1043
|
+
@subtitle = nil if @subtitle.blank?
|
1001
1044
|
end
|
1002
|
-
return @
|
1045
|
+
return @subtitle
|
1003
1046
|
end
|
1004
1047
|
|
1005
|
-
# Sets the feed
|
1006
|
-
def
|
1007
|
-
@
|
1048
|
+
# Sets the feed subtitle
|
1049
|
+
def subtitle=(new_subtitle)
|
1050
|
+
@subtitle = new_subtitle
|
1008
1051
|
end
|
1009
1052
|
|
1010
1053
|
# Returns the contents of the itunes:summary element
|
1011
1054
|
def itunes_summary
|
1012
1055
|
if @itunes_summary.nil?
|
1013
|
-
|
1014
|
-
|
1015
|
-
"itunes:summary/text()"
|
1016
|
-
|
1017
|
-
|
1018
|
-
|
1019
|
-
|
1020
|
-
|
1021
|
-
|
1022
|
-
|
1023
|
-
|
1056
|
+
@itunes_summary = select_not_blank([
|
1057
|
+
try_xpaths(self.channel_node, [
|
1058
|
+
"itunes:summary/text()"
|
1059
|
+
]),
|
1060
|
+
try_xpaths(self.root_node, [
|
1061
|
+
"itunes:summary/text()"
|
1062
|
+
])
|
1063
|
+
])
|
1064
|
+
unless @itunes_summary.blank?
|
1065
|
+
@itunes_summary = FeedTools.unescape_entities(@itunes_summary)
|
1066
|
+
@itunes_summary = FeedTools.sanitize_html(@itunes_summary)
|
1067
|
+
else
|
1024
1068
|
@itunes_summary = nil
|
1025
1069
|
end
|
1026
|
-
@itunes_summary =
|
1027
|
-
FeedTools.sanitize_html(@itunes_summary) unless @itunes_summary.nil?
|
1028
1070
|
end
|
1029
1071
|
return @itunes_summary
|
1030
1072
|
end
|
@@ -1037,21 +1079,19 @@ module FeedTools
|
|
1037
1079
|
# Returns the contents of the itunes:subtitle element
|
1038
1080
|
def itunes_subtitle
|
1039
1081
|
if @itunes_subtitle.nil?
|
1040
|
-
|
1041
|
-
|
1042
|
-
"itunes:subtitle/text()"
|
1043
|
-
|
1044
|
-
|
1045
|
-
|
1046
|
-
|
1047
|
-
|
1048
|
-
|
1049
|
-
|
1050
|
-
if @itunes_subtitle == ""
|
1051
|
-
@itunes_subtitle = nil
|
1052
|
-
end
|
1053
|
-
unless @itunes_subtitle.nil?
|
1082
|
+
@itunes_subtitle = select_not_blank([
|
1083
|
+
try_xpaths(self.channel_node, [
|
1084
|
+
"itunes:subtitle/text()"
|
1085
|
+
]),
|
1086
|
+
try_xpaths(self.root_node, [
|
1087
|
+
"itunes:subtitle/text()"
|
1088
|
+
])
|
1089
|
+
])
|
1090
|
+
unless @itunes_subtitle.blank?
|
1091
|
+
@itunes_subtitle = FeedTools.unescape_entities(@itunes_subtitle)
|
1054
1092
|
@itunes_subtitle = FeedTools.sanitize_html(@itunes_subtitle)
|
1093
|
+
else
|
1094
|
+
@itunes_subtitle = nil
|
1055
1095
|
end
|
1056
1096
|
end
|
1057
1097
|
return @itunes_subtitle
|
@@ -1065,43 +1105,80 @@ module FeedTools
|
|
1065
1105
|
# Returns the feed link
|
1066
1106
|
def link
|
1067
1107
|
if @link.nil?
|
1068
|
-
|
1069
|
-
|
1070
|
-
|
1071
|
-
|
1072
|
-
|
1073
|
-
|
1074
|
-
|
1075
|
-
|
1076
|
-
|
1077
|
-
|
1078
|
-
|
1079
|
-
|
1080
|
-
|
1081
|
-
|
1082
|
-
|
1083
|
-
|
1084
|
-
|
1085
|
-
|
1086
|
-
if
|
1087
|
-
@link = XPath.first(channel_node, "a/@href").to_s
|
1088
|
-
end
|
1089
|
-
if @link == ""
|
1090
|
-
@link = XPath.first(channel_node, "A/@HREF").to_s
|
1091
|
-
end
|
1092
|
-
end
|
1093
|
-
if @link == "" || @link.nil?
|
1094
|
-
if FeedTools.is_uri? self.guid
|
1108
|
+
@link = try_xpaths(self.channel_node, [
|
1109
|
+
"atom10:link[@type='application/xhtml+xml']/@href",
|
1110
|
+
"atom10:link[@type='text/html']/@href",
|
1111
|
+
"atom10:link[@rel='alternate']/@href",
|
1112
|
+
"atom03:link[@type='application/xhtml+xml']/@href",
|
1113
|
+
"atom03:link[@type='text/html']/@href",
|
1114
|
+
"atom03:link[@rel='alternate']/@href",
|
1115
|
+
"atom:link[@type='application/xhtml+xml']/@href",
|
1116
|
+
"atom:link[@type='text/html']/@href",
|
1117
|
+
"atom:link[@rel='alternate']/@href",
|
1118
|
+
"link[@type='application/xhtml+xml']/@href",
|
1119
|
+
"link[@type='text/html']/@href",
|
1120
|
+
"link[@rel='alternate']/@href",
|
1121
|
+
"link/text()",
|
1122
|
+
"@href",
|
1123
|
+
"a/@href"
|
1124
|
+
], :select_result_value => true)
|
1125
|
+
if @link.blank?
|
1126
|
+
if FeedTools.is_uri?(self.guid)
|
1095
1127
|
@link = self.guid
|
1096
1128
|
end
|
1097
1129
|
end
|
1098
|
-
if @link
|
1099
|
-
# Technically, we shouldn't use the base attribute for this, but
|
1100
|
-
# is missing, it's already a given that we're
|
1101
|
-
# always pray it's correct.
|
1130
|
+
if @link.blank? && channel_node != nil
|
1131
|
+
# Technically, we shouldn't use the base attribute for this, but
|
1132
|
+
# if the href attribute is missing, it's already a given that we're
|
1133
|
+
# looking at a messed up CDF file. We can always pray it's correct.
|
1102
1134
|
@link = XPath.first(channel_node, "@base").to_s
|
1103
1135
|
end
|
1104
|
-
|
1136
|
+
if !@link.blank?
|
1137
|
+
@link = FeedTools.unescape_entities(@link)
|
1138
|
+
end
|
1139
|
+
if @link.blank?
|
1140
|
+
link_node = try_xpaths(self.channel_node, [
|
1141
|
+
"atom10:link",
|
1142
|
+
"atom03:link",
|
1143
|
+
"atom:link",
|
1144
|
+
"link"
|
1145
|
+
])
|
1146
|
+
if link_node != nil
|
1147
|
+
if link_node.attributes['type'].to_s =~ /^image/ ||
|
1148
|
+
link_node.attributes['type'].to_s =~ /^application/ ||
|
1149
|
+
link_node.attributes['type'].to_s =~ /xml/ ||
|
1150
|
+
link_node.attributes['rel'].to_s =~ /self/
|
1151
|
+
for child in self.channel_node
|
1152
|
+
if child.class == REXML::Element
|
1153
|
+
if child.name.downcase == "link"
|
1154
|
+
if child.attributes['type'].to_s =~ /^image/ ||
|
1155
|
+
child.attributes['type'].to_s =~ /^application/ ||
|
1156
|
+
child.attributes['type'].to_s =~ /xml/ ||
|
1157
|
+
child.attributes['rel'].to_s =~ /self/
|
1158
|
+
@link = nil
|
1159
|
+
next
|
1160
|
+
else
|
1161
|
+
@link = child.attributes['href'].to_s
|
1162
|
+
if @link.blank?
|
1163
|
+
@link = child.inner_xml
|
1164
|
+
end
|
1165
|
+
if @link.blank?
|
1166
|
+
next
|
1167
|
+
end
|
1168
|
+
break
|
1169
|
+
end
|
1170
|
+
end
|
1171
|
+
end
|
1172
|
+
end
|
1173
|
+
else
|
1174
|
+
@link = link_node.attributes['href'].to_s
|
1175
|
+
end
|
1176
|
+
end
|
1177
|
+
end
|
1178
|
+
@link = nil if @link.blank?
|
1179
|
+
if FeedTools.configurations[:url_normalization_enabled]
|
1180
|
+
@link = FeedTools.normalize_url(@link)
|
1181
|
+
end
|
1105
1182
|
unless self.cache_object.nil?
|
1106
1183
|
self.cache_object.link = @link
|
1107
1184
|
end
|
@@ -1118,87 +1195,83 @@ module FeedTools
|
|
1118
1195
|
end
|
1119
1196
|
|
1120
1197
|
# Returns the url to the icon file for this feed.
|
1121
|
-
#
|
1122
|
-
# This method uses the url from the link field in order to avoid grabbing
|
1123
|
-
# the favicon for services like feedburner.
|
1124
1198
|
def icon
|
1125
1199
|
if @icon.nil?
|
1126
|
-
icon_node =
|
1127
|
-
|
1128
|
-
|
1129
|
-
|
1130
|
-
|
1131
|
-
|
1132
|
-
|
1133
|
-
|
1134
|
-
icon_node = XPath.first(channel_node, "icon")
|
1135
|
-
end
|
1136
|
-
if icon_node.nil?
|
1137
|
-
icon_node = XPath.first(channel_node, "logo[@style='icon']")
|
1138
|
-
end
|
1139
|
-
if icon_node.nil?
|
1140
|
-
icon_node = XPath.first(channel_node, "LOGO[@STYLE='ICON']")
|
1141
|
-
end
|
1200
|
+
icon_node = try_xpaths(self.channel_node, [
|
1201
|
+
"link[@rel='icon']",
|
1202
|
+
"link[@rel='shortcut icon']",
|
1203
|
+
"link[@type='image/x-icon']",
|
1204
|
+
"icon",
|
1205
|
+
"logo[@style='icon']",
|
1206
|
+
"LOGO[@STYLE='ICON']"
|
1207
|
+
])
|
1142
1208
|
unless icon_node.nil?
|
1143
1209
|
@icon = FeedTools.unescape_entities(
|
1144
1210
|
XPath.first(icon_node, "@href").to_s)
|
1145
|
-
if @icon
|
1211
|
+
if @icon.blank?
|
1146
1212
|
@icon = FeedTools.unescape_entities(
|
1147
1213
|
XPath.first(icon_node, "text()").to_s)
|
1148
1214
|
unless FeedTools.is_uri? @icon
|
1149
|
-
@icon =
|
1215
|
+
@icon = nil
|
1150
1216
|
end
|
1151
1217
|
end
|
1152
|
-
|
1218
|
+
@icon = nil if @icon.blank?
|
1219
|
+
end
|
1220
|
+
end
|
1221
|
+
return @icon
|
1222
|
+
end
|
1223
|
+
|
1224
|
+
# Returns the favicon url for this feed.
|
1225
|
+
# This method first tries to use the url from the link field instead of
|
1226
|
+
# the feed url, in order to avoid grabbing the favicon for services like
|
1227
|
+
# feedburner.
|
1228
|
+
def favicon
|
1229
|
+
if @favicon.nil?
|
1230
|
+
if !self.link.blank?
|
1231
|
+
begin
|
1153
1232
|
link_uri = URI.parse(FeedTools.normalize_url(self.link))
|
1154
|
-
|
1155
|
-
|
1233
|
+
if link_uri.scheme == "http"
|
1234
|
+
@favicon =
|
1235
|
+
"http://" + link_uri.host + "/favicon.ico"
|
1236
|
+
end
|
1237
|
+
rescue
|
1238
|
+
@favicon = nil
|
1239
|
+
end
|
1240
|
+
if @favicon.nil? && !self.url.blank?
|
1241
|
+
begin
|
1242
|
+
feed_uri = URI.parse(FeedTools.normalize_url(self.url))
|
1243
|
+
if feed_uri.scheme == "http"
|
1244
|
+
@favicon =
|
1245
|
+
"http://" + feed_uri.host + "/favicon.ico"
|
1246
|
+
end
|
1247
|
+
rescue
|
1248
|
+
@favicon = nil
|
1249
|
+
end
|
1156
1250
|
end
|
1157
|
-
|
1251
|
+
else
|
1252
|
+
@favicon = nil
|
1158
1253
|
end
|
1159
1254
|
end
|
1160
|
-
return @
|
1255
|
+
return @favicon
|
1161
1256
|
end
|
1162
1257
|
|
1163
1258
|
# Returns the feed author
|
1164
1259
|
def author
|
1165
1260
|
if @author.nil?
|
1166
1261
|
@author = FeedTools::Feed::Author.new
|
1167
|
-
|
1168
|
-
|
1169
|
-
|
1170
|
-
|
1171
|
-
|
1172
|
-
|
1173
|
-
|
1174
|
-
|
1175
|
-
|
1176
|
-
end
|
1177
|
-
if author_node.nil?
|
1178
|
-
author_node = XPath.first(channel_node, "author")
|
1179
|
-
end
|
1180
|
-
if author_node.nil?
|
1181
|
-
author_node = XPath.first(channel_node, "managingEditor")
|
1182
|
-
end
|
1183
|
-
if author_node.nil?
|
1184
|
-
author_node = XPath.first(channel_node, "dc:author",
|
1185
|
-
FEED_TOOLS_NAMESPACES)
|
1186
|
-
end
|
1187
|
-
if author_node.nil?
|
1188
|
-
author_node = XPath.first(channel_node, "dc:author")
|
1189
|
-
end
|
1190
|
-
if author_node.nil?
|
1191
|
-
author_node = XPath.first(channel_node, "dc:creator",
|
1192
|
-
FEED_TOOLS_NAMESPACES)
|
1193
|
-
end
|
1194
|
-
if author_node.nil?
|
1195
|
-
author_node = XPath.first(channel_node, "dc:creator")
|
1196
|
-
end
|
1197
|
-
end
|
1262
|
+
author_node = try_xpaths(self.channel_node, [
|
1263
|
+
"atom10:author",
|
1264
|
+
"atom03:author",
|
1265
|
+
"atom:author",
|
1266
|
+
"author",
|
1267
|
+
"managingEditor",
|
1268
|
+
"dc:author",
|
1269
|
+
"dc:creator"
|
1270
|
+
])
|
1198
1271
|
unless author_node.nil?
|
1199
1272
|
@author.raw = FeedTools.unescape_entities(
|
1200
|
-
XPath.first(author_node, "text()").to_s)
|
1201
|
-
@author.raw = nil if @author.raw
|
1273
|
+
XPath.first(author_node, "text()").to_s).strip
|
1274
|
+
@author.raw = nil if @author.raw.blank?
|
1202
1275
|
unless @author.raw.nil?
|
1203
1276
|
raw_scan = @author.raw.scan(
|
1204
1277
|
/(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
|
@@ -1229,35 +1302,37 @@ module FeedTools
|
|
1229
1302
|
end
|
1230
1303
|
end
|
1231
1304
|
end
|
1232
|
-
|
1233
|
-
if @author.name == ""
|
1305
|
+
if @author.name.blank?
|
1234
1306
|
@author.name = FeedTools.unescape_entities(
|
1235
|
-
|
1236
|
-
|
1237
|
-
|
1238
|
-
|
1239
|
-
|
1240
|
-
end
|
1241
|
-
if @author.email == ""
|
1242
|
-
@author.email = FeedTools.unescape_entities(
|
1243
|
-
XPath.first(author_node, "email/text()").to_s)
|
1307
|
+
try_xpaths(author_node, [
|
1308
|
+
"name/text()",
|
1309
|
+
"@name"
|
1310
|
+
], :select_result_value => true)
|
1311
|
+
)
|
1244
1312
|
end
|
1245
|
-
if @author.email
|
1313
|
+
if @author.email.blank?
|
1246
1314
|
@author.email = FeedTools.unescape_entities(
|
1247
|
-
|
1315
|
+
try_xpaths(author_node, [
|
1316
|
+
"email/text()",
|
1317
|
+
"@email"
|
1318
|
+
], :select_result_value => true)
|
1319
|
+
)
|
1248
1320
|
end
|
1249
|
-
if @author.url
|
1321
|
+
if @author.url.blank?
|
1250
1322
|
@author.url = FeedTools.unescape_entities(
|
1251
|
-
|
1252
|
-
|
1253
|
-
|
1254
|
-
|
1255
|
-
|
1256
|
-
|
1257
|
-
|
1258
|
-
|
1259
|
-
|
1260
|
-
@author.
|
1323
|
+
try_xpaths(author_node, [
|
1324
|
+
"url/text()",
|
1325
|
+
"uri/text()",
|
1326
|
+
"@url",
|
1327
|
+
"@uri",
|
1328
|
+
"@href"
|
1329
|
+
], :select_result_value => true)
|
1330
|
+
)
|
1331
|
+
end
|
1332
|
+
@author.name = nil if @author.name.blank?
|
1333
|
+
@author.raw = nil if @author.raw.blank?
|
1334
|
+
@author.email = nil if @author.email.blank?
|
1335
|
+
@author.url = nil if @author.url.blank?
|
1261
1336
|
end
|
1262
1337
|
# Fallback on the itunes module if we didn't find an author name
|
1263
1338
|
begin
|
@@ -1290,15 +1365,14 @@ module FeedTools
|
|
1290
1365
|
def publisher
|
1291
1366
|
if @publisher.nil?
|
1292
1367
|
@publisher = FeedTools::Feed::Author.new
|
1368
|
+
publisher_node = try_xpaths(self.channel_node, [
|
1369
|
+
"webMaster/text()",
|
1370
|
+
"dc:publisher/text()"
|
1371
|
+
])
|
1293
1372
|
|
1294
1373
|
# Set the author name
|
1295
|
-
@publisher.raw = FeedTools.unescape_entities(
|
1296
|
-
|
1297
|
-
if @publisher.raw == ""
|
1298
|
-
@publisher.raw = FeedTools.unescape_entities(
|
1299
|
-
XPath.first(channel_node, "webMaster/text()").to_s)
|
1300
|
-
end
|
1301
|
-
unless @publisher.raw == ""
|
1374
|
+
@publisher.raw = FeedTools.unescape_entities(publisher_node.to_s)
|
1375
|
+
unless @publisher.raw.blank?
|
1302
1376
|
raw_scan = @publisher.raw.scan(
|
1303
1377
|
/(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
|
1304
1378
|
if raw_scan.nil? || raw_scan.size == 0
|
@@ -1331,10 +1405,10 @@ module FeedTools
|
|
1331
1405
|
end
|
1332
1406
|
end
|
1333
1407
|
|
1334
|
-
@publisher.name = nil if @publisher.name
|
1335
|
-
@publisher.raw = nil if @publisher.raw
|
1336
|
-
@publisher.email = nil if @publisher.email
|
1337
|
-
@publisher.url = nil if @publisher.url
|
1408
|
+
@publisher.name = nil if @publisher.name.blank?
|
1409
|
+
@publisher.raw = nil if @publisher.raw.blank?
|
1410
|
+
@publisher.email = nil if @publisher.email.blank?
|
1411
|
+
@publisher.url = nil if @publisher.url.blank?
|
1338
1412
|
end
|
1339
1413
|
return @publisher
|
1340
1414
|
end
|
@@ -1364,9 +1438,12 @@ module FeedTools
|
|
1364
1438
|
# attribute.
|
1365
1439
|
def itunes_author
|
1366
1440
|
if @itunes_author.nil?
|
1367
|
-
@itunes_author = FeedTools.unescape_entities(
|
1368
|
-
|
1369
|
-
|
1441
|
+
@itunes_author = FeedTools.unescape_entities(
|
1442
|
+
try_xpaths(self.channel_node, [
|
1443
|
+
"itunes:author/text()"
|
1444
|
+
], :select_result_value => true)
|
1445
|
+
)
|
1446
|
+
@itunes_author = nil if @itunes_author.blank?
|
1370
1447
|
end
|
1371
1448
|
return @itunes_author
|
1372
1449
|
end
|
@@ -1374,29 +1451,40 @@ module FeedTools
|
|
1374
1451
|
# Returns the feed time
|
1375
1452
|
def time
|
1376
1453
|
if @time.nil?
|
1377
|
-
|
1378
|
-
|
1379
|
-
|
1380
|
-
|
1381
|
-
|
1382
|
-
|
1383
|
-
|
1384
|
-
|
1385
|
-
|
1386
|
-
|
1387
|
-
|
1388
|
-
|
1389
|
-
|
1390
|
-
|
1391
|
-
|
1454
|
+
time_string = try_xpaths(self.channel_node, [
|
1455
|
+
"atom10:updated/text()",
|
1456
|
+
"atom03:updated/text()",
|
1457
|
+
"atom:updated/text()",
|
1458
|
+
"updated/text()",
|
1459
|
+
"atom10:modified/text()",
|
1460
|
+
"atom03:modified/text()",
|
1461
|
+
"atom:modified/text()",
|
1462
|
+
"modified/text()",
|
1463
|
+
"time/text()",
|
1464
|
+
"atom10:issued/text()",
|
1465
|
+
"atom03:issued/text()",
|
1466
|
+
"atom:issued/text()",
|
1467
|
+
"issued/text()",
|
1468
|
+
"atom10:published/text()",
|
1469
|
+
"atom03:published/text()",
|
1470
|
+
"atom:published/text()",
|
1471
|
+
"published/text()",
|
1472
|
+
"pubDate/text()",
|
1473
|
+
"dc:date/text()",
|
1474
|
+
"date/text()"
|
1475
|
+
], :select_result_value => true)
|
1392
1476
|
begin
|
1393
|
-
|
1477
|
+
unless time_string.blank?
|
1394
1478
|
@time = Time.parse(time_string).gmtime
|
1395
1479
|
else
|
1396
|
-
|
1480
|
+
if FeedTools.configurations[:timestamp_estimation_enabled]
|
1481
|
+
@time = Time.now.gmtime
|
1482
|
+
end
|
1397
1483
|
end
|
1398
1484
|
rescue
|
1399
|
-
|
1485
|
+
if FeedTools.configurations[:timestamp_estimation_enabled]
|
1486
|
+
@time = Time.now.gmtime
|
1487
|
+
end
|
1400
1488
|
end
|
1401
1489
|
end
|
1402
1490
|
return @time
|
@@ -1410,13 +1498,11 @@ module FeedTools
|
|
1410
1498
|
# Returns the feed item updated time
|
1411
1499
|
def updated
|
1412
1500
|
if @updated.nil?
|
1413
|
-
|
1414
|
-
|
1415
|
-
|
1416
|
-
|
1417
|
-
|
1418
|
-
end
|
1419
|
-
if updated_string != nil && updated_string != ""
|
1501
|
+
updated_string = try_xpaths(self.channel_node, [
|
1502
|
+
"updated/text()",
|
1503
|
+
"modified/text()"
|
1504
|
+
], :select_result_value => true)
|
1505
|
+
unless updated_string.blank?
|
1420
1506
|
@updated = Time.parse(updated_string).gmtime rescue nil
|
1421
1507
|
else
|
1422
1508
|
@updated = nil
|
@@ -1430,51 +1516,16 @@ module FeedTools
|
|
1430
1516
|
@updated = new_updated
|
1431
1517
|
end
|
1432
1518
|
|
1433
|
-
# Returns the feed item issued time
|
1434
|
-
def issued
|
1435
|
-
if @issued.nil?
|
1436
|
-
unless channel_node.nil?
|
1437
|
-
issued_string = XPath.first(channel_node, "issued/text()").to_s
|
1438
|
-
if issued_string == ""
|
1439
|
-
issued_string = XPath.first(channel_node, "pubDate/text()").to_s
|
1440
|
-
end
|
1441
|
-
if issued_string == ""
|
1442
|
-
issued_string = XPath.first(channel_node, "dc:date/text()").to_s
|
1443
|
-
end
|
1444
|
-
if issued_string == ""
|
1445
|
-
issued_string = XPath.first(channel_node, "published/text()").to_s
|
1446
|
-
end
|
1447
|
-
end
|
1448
|
-
if issued_string != nil && issued_string != ""
|
1449
|
-
@issued = Time.parse(issued_string).gmtime rescue nil
|
1450
|
-
else
|
1451
|
-
@issued = nil
|
1452
|
-
end
|
1453
|
-
end
|
1454
|
-
return @issued
|
1455
|
-
end
|
1456
|
-
|
1457
|
-
# Sets the feed item issued time
|
1458
|
-
def issued=(new_issued)
|
1459
|
-
@issued = new_issued
|
1460
|
-
end
|
1461
|
-
|
1462
1519
|
# Returns the feed item published time
|
1463
1520
|
def published
|
1464
1521
|
if @published.nil?
|
1465
|
-
|
1466
|
-
|
1467
|
-
|
1468
|
-
|
1469
|
-
|
1470
|
-
|
1471
|
-
|
1472
|
-
end
|
1473
|
-
if published_string == ""
|
1474
|
-
published_string = XPath.first(channel_node, "issued/text()").to_s
|
1475
|
-
end
|
1476
|
-
end
|
1477
|
-
if published_string != nil && published_string != ""
|
1522
|
+
published_string = try_xpaths(self.channel_node, [
|
1523
|
+
"published/text()",
|
1524
|
+
"pubDate/text()",
|
1525
|
+
"issued/text()",
|
1526
|
+
"dc:date/text()"
|
1527
|
+
], :select_result_value => true)
|
1528
|
+
unless published_string.blank?
|
1478
1529
|
@published = Time.parse(published_string).gmtime rescue nil
|
1479
1530
|
else
|
1480
1531
|
@published = nil
|
@@ -1492,28 +1543,26 @@ module FeedTools
|
|
1492
1543
|
def categories
|
1493
1544
|
if @categories.nil?
|
1494
1545
|
@categories = []
|
1495
|
-
category_nodes =
|
1496
|
-
|
1497
|
-
|
1498
|
-
|
1546
|
+
category_nodes = try_xpaths_all(self.channel_node, [
|
1547
|
+
"category",
|
1548
|
+
"dc:subject"
|
1549
|
+
])
|
1499
1550
|
unless category_nodes.nil?
|
1500
1551
|
for category_node in category_nodes
|
1501
1552
|
category = FeedTools::Feed::Category.new
|
1502
|
-
category.term =
|
1503
|
-
|
1504
|
-
|
1505
|
-
|
1506
|
-
category.term.strip! unless category.term.
|
1507
|
-
category.
|
1508
|
-
|
1509
|
-
category.label.strip! unless category.label.
|
1510
|
-
category.
|
1511
|
-
|
1512
|
-
|
1513
|
-
|
1514
|
-
|
1515
|
-
category.scheme.strip! unless category.scheme.nil?
|
1516
|
-
category.scheme = nil if category.scheme == ""
|
1553
|
+
category.term = try_xpaths(category_node, [
|
1554
|
+
"@term",
|
1555
|
+
"text()"
|
1556
|
+
], :select_result_value => true)
|
1557
|
+
category.term.strip! unless category.term.blank?
|
1558
|
+
category.label = try_xpaths(category_node, ["@label"],
|
1559
|
+
:select_result_value => true)
|
1560
|
+
category.label.strip! unless category.label.blank?
|
1561
|
+
category.scheme = try_xpaths(category_node, [
|
1562
|
+
"@scheme",
|
1563
|
+
"@domain"
|
1564
|
+
], :select_result_value => true)
|
1565
|
+
category.scheme.strip! unless category.scheme.blank?
|
1517
1566
|
@categories << category
|
1518
1567
|
end
|
1519
1568
|
end
|
@@ -1525,55 +1574,61 @@ module FeedTools
|
|
1525
1574
|
def images
|
1526
1575
|
if @images.nil?
|
1527
1576
|
@images = []
|
1528
|
-
|
1529
|
-
|
1530
|
-
|
1531
|
-
|
1532
|
-
|
1533
|
-
|
1534
|
-
|
1535
|
-
|
1536
|
-
|
1537
|
-
|
1538
|
-
|
1539
|
-
|
1540
|
-
|
1541
|
-
|
1542
|
-
|
1543
|
-
|
1544
|
-
|
1545
|
-
|
1546
|
-
|
1547
|
-
|
1548
|
-
|
1577
|
+
image_nodes = try_xpaths_all(self.channel_node, [
|
1578
|
+
"image",
|
1579
|
+
"logo",
|
1580
|
+
"atom10:link",
|
1581
|
+
"atom03:link",
|
1582
|
+
"atom:link",
|
1583
|
+
"link"
|
1584
|
+
])
|
1585
|
+
unless image_nodes.blank?
|
1586
|
+
for image_node in image_nodes
|
1587
|
+
image = FeedTools::Feed::Image.new
|
1588
|
+
image.url = try_xpaths(image_node, [
|
1589
|
+
"url/text()",
|
1590
|
+
"@rdf:resource"
|
1591
|
+
], :select_result_value => true)
|
1592
|
+
if image.url.blank? && (image_node.name == "logo" ||
|
1593
|
+
(image_node.attributes['type'].to_s =~ /^image/) == 0)
|
1594
|
+
image.url = try_xpaths(image_node, [
|
1595
|
+
"@atom10:href",
|
1596
|
+
"@atom03:href",
|
1597
|
+
"@atom:href",
|
1598
|
+
"@href"
|
1599
|
+
], :select_result_value => true)
|
1600
|
+
if image.url == self.link && image.url != nil
|
1601
|
+
image.url = nil
|
1549
1602
|
end
|
1550
|
-
if image.url == "" && image_node.name == "LOGO"
|
1551
|
-
image.url = XPath.first(image_node, "@HREF").to_s
|
1552
|
-
end
|
1553
|
-
image.url.strip! unless image.url.nil?
|
1554
|
-
image.url = nil if image.url == ""
|
1555
|
-
image.title = XPath.first(image_node, "title/text()").to_s
|
1556
|
-
image.title.strip! unless image.title.nil?
|
1557
|
-
image.title = nil if image.title == ""
|
1558
|
-
image.description =
|
1559
|
-
XPath.first(image_node, "description/text()").to_s
|
1560
|
-
image.description.strip! unless image.description.nil?
|
1561
|
-
image.description = nil if image.description == ""
|
1562
|
-
image.link = XPath.first(image_node, "link/text()").to_s
|
1563
|
-
image.link.strip! unless image.link.nil?
|
1564
|
-
image.link = nil if image.link == ""
|
1565
|
-
image.height = XPath.first(image_node, "height/text()").to_s.to_i
|
1566
|
-
image.height = nil if image.height <= 0
|
1567
|
-
image.width = XPath.first(image_node, "width/text()").to_s.to_i
|
1568
|
-
image.width = nil if image.width <= 0
|
1569
|
-
image.style = XPath.first(image_node, "@style").to_s.downcase
|
1570
|
-
if image.style == ""
|
1571
|
-
image.style = XPath.first(image_node, "@STYLE").to_s.downcase
|
1572
|
-
end
|
1573
|
-
image.style.strip! unless image.style.nil?
|
1574
|
-
image.style = nil if image.style == ""
|
1575
|
-
@images << image
|
1576
1603
|
end
|
1604
|
+
if image.url.blank? && image_node.name == "LOGO"
|
1605
|
+
image.url = try_xpaths(image_node, [
|
1606
|
+
"@href"
|
1607
|
+
], :select_result_value => true)
|
1608
|
+
end
|
1609
|
+
image.url.strip! unless image.url.nil?
|
1610
|
+
image.title = try_xpaths(image_node,
|
1611
|
+
["title/text()"], :select_result_value => true)
|
1612
|
+
image.title.strip! unless image.title.nil?
|
1613
|
+
image.description = try_xpaths(image_node,
|
1614
|
+
["description/text()"], :select_result_value => true)
|
1615
|
+
image.description.strip! unless image.description.nil?
|
1616
|
+
image.link = try_xpaths(image_node,
|
1617
|
+
["link/text()"], :select_result_value => true)
|
1618
|
+
image.link.strip! unless image.link.nil?
|
1619
|
+
image.height = try_xpaths(image_node,
|
1620
|
+
["height/text()"], :select_result_value => true).to_i
|
1621
|
+
image.height = nil if image.height <= 0
|
1622
|
+
image.width = try_xpaths(image_node,
|
1623
|
+
["width/text()"], :select_result_value => true).to_i
|
1624
|
+
image.width = nil if image.width <= 0
|
1625
|
+
image.style = try_xpaths(image_node, [
|
1626
|
+
"style/text()",
|
1627
|
+
"@style"
|
1628
|
+
], :select_result_value => true)
|
1629
|
+
image.style.strip! unless image.style.nil?
|
1630
|
+
image.style.downcase! unless image.style.nil?
|
1631
|
+
@images << image unless image.url.nil?
|
1577
1632
|
end
|
1578
1633
|
end
|
1579
1634
|
end
|
@@ -1584,20 +1639,20 @@ module FeedTools
|
|
1584
1639
|
def text_input
|
1585
1640
|
if @text_input.nil?
|
1586
1641
|
@text_input = FeedTools::Feed::TextInput.new
|
1587
|
-
text_input_node =
|
1642
|
+
text_input_node = try_xpaths(self.channel_node, ["textInput"])
|
1588
1643
|
unless text_input_node.nil?
|
1589
1644
|
@text_input.title =
|
1590
|
-
|
1591
|
-
|
1645
|
+
try_xpaths(text_input_node, ["title/text()"],
|
1646
|
+
:select_result_value => true)
|
1592
1647
|
@text_input.description =
|
1593
|
-
|
1594
|
-
|
1648
|
+
try_xpaths(text_input_node, ["description/text()"],
|
1649
|
+
:select_result_value => true)
|
1595
1650
|
@text_input.link =
|
1596
|
-
|
1597
|
-
|
1651
|
+
try_xpaths(text_input_node, ["link/text()"],
|
1652
|
+
:select_result_value => true)
|
1598
1653
|
@text_input.name =
|
1599
|
-
|
1600
|
-
|
1654
|
+
try_xpaths(text_input_node, ["name/text()"],
|
1655
|
+
:select_result_value => true)
|
1601
1656
|
end
|
1602
1657
|
end
|
1603
1658
|
return @text_input
|
@@ -1606,43 +1661,28 @@ module FeedTools
|
|
1606
1661
|
# Returns the feed's copyright information
|
1607
1662
|
def copyright
|
1608
1663
|
if @copyright.nil?
|
1609
|
-
|
1610
|
-
|
1611
|
-
|
1612
|
-
|
1613
|
-
|
1614
|
-
|
1615
|
-
|
1616
|
-
|
1617
|
-
|
1618
|
-
|
1619
|
-
end
|
1620
|
-
if copyright_node.nil?
|
1621
|
-
copyright_node = XPath.first(channel_node, "copyright",
|
1622
|
-
FEED_TOOLS_NAMESPACES)
|
1623
|
-
end
|
1624
|
-
if copyright_node.nil?
|
1625
|
-
copyright_node = XPath.first(channel_node, "atom03:copyright",
|
1626
|
-
FEED_TOOLS_NAMESPACES)
|
1627
|
-
end
|
1628
|
-
if copyright_node.nil?
|
1629
|
-
copyright_node = XPath.first(channel_node, "atom10:copyright",
|
1630
|
-
FEED_TOOLS_NAMESPACES)
|
1631
|
-
end
|
1632
|
-
if copyright_node.nil?
|
1633
|
-
copyright_node = XPath.first(channel_node, "copyrights",
|
1634
|
-
FEED_TOOLS_NAMESPACES)
|
1635
|
-
end
|
1636
|
-
end
|
1664
|
+
repair_entities = false
|
1665
|
+
copyright_node = try_xpaths(self.channel_node, [
|
1666
|
+
"atom10:copyright",
|
1667
|
+
"atom03:copyright",
|
1668
|
+
"atom:copyright",
|
1669
|
+
"copyright",
|
1670
|
+
"copyrights",
|
1671
|
+
"dc:rights",
|
1672
|
+
"rights"
|
1673
|
+
])
|
1637
1674
|
if copyright_node.nil?
|
1638
1675
|
return nil
|
1639
1676
|
end
|
1640
|
-
copyright_type =
|
1641
|
-
|
1642
|
-
|
1677
|
+
copyright_type = try_xpaths(copyright_node, "@type",
|
1678
|
+
:select_result_value => true)
|
1679
|
+
copyright_mode = try_xpaths(copyright_node, "@mode",
|
1680
|
+
:select_result_value => true)
|
1681
|
+
copyright_encoding = try_xpaths(copyright_node, "@encoding",
|
1682
|
+
:select_result_value => true)
|
1643
1683
|
|
1644
1684
|
# Note that we're checking for misuse of type, mode and encoding here
|
1645
|
-
if copyright_encoding
|
1685
|
+
if !copyright_encoding.blank?
|
1646
1686
|
@copyright =
|
1647
1687
|
"[Embedded data objects are not currently supported.]"
|
1648
1688
|
elsif copyright_node.cdatas.size > 0
|
@@ -1669,7 +1709,7 @@ module FeedTools
|
|
1669
1709
|
end
|
1670
1710
|
|
1671
1711
|
@copyright = @copyright.strip unless @copyright.nil?
|
1672
|
-
@copyright = nil if @copyright
|
1712
|
+
@copyright = nil if @copyright.blank?
|
1673
1713
|
end
|
1674
1714
|
return @copyright
|
1675
1715
|
end
|
@@ -1684,9 +1724,11 @@ module FeedTools
|
|
1684
1724
|
if @time_to_live.nil?
|
1685
1725
|
unless channel_node.nil?
|
1686
1726
|
# get the feed time to live from the xml document
|
1687
|
-
update_frequency =
|
1688
|
-
|
1689
|
-
|
1727
|
+
update_frequency = try_xpaths(self.channel_node,
|
1728
|
+
["syn:updateFrequency/text()"], :select_result_value => true)
|
1729
|
+
if !update_frequency.blank?
|
1730
|
+
update_period = try_xpaths(self.channel_node,
|
1731
|
+
["syn:updatePeriod/text()"], :select_result_value => true)
|
1690
1732
|
if update_period == "daily"
|
1691
1733
|
@time_to_live = update_frequency.to_i.day
|
1692
1734
|
elsif update_period == "weekly"
|
@@ -1702,9 +1744,11 @@ module FeedTools
|
|
1702
1744
|
end
|
1703
1745
|
if @time_to_live.nil?
|
1704
1746
|
# usually expressed in minutes
|
1705
|
-
update_frequency =
|
1706
|
-
|
1707
|
-
|
1747
|
+
update_frequency = try_xpaths(self.channel_node, ["ttl/text()"],
|
1748
|
+
:select_result_value => true)
|
1749
|
+
if !update_frequency.blank?
|
1750
|
+
update_span = try_xpaths(self.channel_node, ["ttl/@span"],
|
1751
|
+
:select_result_value => true)
|
1708
1752
|
if update_span == "seconds"
|
1709
1753
|
@time_to_live = update_frequency.to_i
|
1710
1754
|
elsif update_span == "minutes"
|
@@ -1719,19 +1763,6 @@ module FeedTools
|
|
1719
1763
|
@time_to_live = update_frequency.to_i.month
|
1720
1764
|
elsif update_span == "years"
|
1721
1765
|
@time_to_live = update_frequency.to_i.year
|
1722
|
-
elsif update_frequency.to_i >= 3000
|
1723
|
-
# Normally, this should default to minutes, but realistically,
|
1724
|
-
# if they meant minutes, you're rarely going to see a value
|
1725
|
-
# higher than 120. If we see >= 3000, we're either dealing
|
1726
|
-
# with a stupid pseudo-spec that decided to use seconds, or
|
1727
|
-
# we're looking at someone who only has weekly updated
|
1728
|
-
# content. Worst case, we misreport the time, and we update
|
1729
|
-
# too often. Best case, we avoid accidentally updating the
|
1730
|
-
# feed only once a year. In the interests of being pragmatic,
|
1731
|
-
# and since the problem we avoid is a far greater one than
|
1732
|
-
# the one we cause, just run the check and hope no one
|
1733
|
-
# actually gets hurt.
|
1734
|
-
@time_to_live = update_frequency.to_i
|
1735
1766
|
else
|
1736
1767
|
@time_to_live = update_frequency.to_i.minute
|
1737
1768
|
end
|
@@ -1740,7 +1771,7 @@ module FeedTools
|
|
1740
1771
|
if @time_to_live.nil?
|
1741
1772
|
@time_to_live = 0
|
1742
1773
|
update_frequency_days =
|
1743
|
-
XPath.first(channel_node, "
|
1774
|
+
XPath.first(channel_node, "SCHEDULE/INTERVALTIME/@DAY").to_s
|
1744
1775
|
update_frequency_hours =
|
1745
1776
|
XPath.first(channel_node, "schedule/intervaltime/@hour").to_s
|
1746
1777
|
update_frequency_minutes =
|
@@ -1769,6 +1800,10 @@ module FeedTools
|
|
1769
1800
|
if @time_to_live.nil? || @time_to_live == 0
|
1770
1801
|
# Default to one hour
|
1771
1802
|
@time_to_live = 1.hour
|
1803
|
+
elsif FeedTools.configurations[:max_ttl] != nil &&
|
1804
|
+
FeedTools.configurations[:max_ttl] != 0 &&
|
1805
|
+
@time_to_live >= FeedTools.configurations[:max_ttl].to_i
|
1806
|
+
@time_to_live = FeedTools.configurations[:max_ttl].to_i
|
1772
1807
|
end
|
1773
1808
|
@time_to_live = @time_to_live.round
|
1774
1809
|
return @time_to_live
|
@@ -1784,20 +1819,21 @@ module FeedTools
|
|
1784
1819
|
def cloud
|
1785
1820
|
if @cloud.nil?
|
1786
1821
|
@cloud = FeedTools::Feed::Cloud.new
|
1787
|
-
@cloud.domain =
|
1788
|
-
|
1789
|
-
@cloud.
|
1822
|
+
@cloud.domain = try_xpaths(self.channel_node, ["cloud/@domain"],
|
1823
|
+
:select_result_value => true)
|
1824
|
+
@cloud.port = try_xpaths(self.channel_node, ["cloud/@port"],
|
1825
|
+
:select_result_value => true)
|
1826
|
+
@cloud.path = try_xpaths(self.channel_node, ["cloud/@path"],
|
1827
|
+
:select_result_value => true)
|
1790
1828
|
@cloud.register_procedure =
|
1791
|
-
|
1829
|
+
try_xpaths(self.channel_node, ["cloud/@registerProcedure"],
|
1830
|
+
:select_result_value => true)
|
1792
1831
|
@cloud.protocol =
|
1793
|
-
|
1794
|
-
|
1795
|
-
@cloud.
|
1796
|
-
@cloud.port = @cloud.port.to_i
|
1832
|
+
try_xpaths(self.channel_node, ["cloud/@protocol"],
|
1833
|
+
:select_result_value => true)
|
1834
|
+
@cloud.protocol.downcase unless @cloud.protocol.nil?
|
1835
|
+
@cloud.port = @cloud.port.to_s.to_i
|
1797
1836
|
@cloud.port = nil if @cloud.port == 0
|
1798
|
-
@cloud.path = nil if @cloud.path == ""
|
1799
|
-
@cloud.register_procedure = nil if @cloud.register_procedure == ""
|
1800
|
-
@cloud.protocol = nil if @cloud.protocol == ""
|
1801
1837
|
end
|
1802
1838
|
return @cloud
|
1803
1839
|
end
|
@@ -1810,9 +1846,9 @@ module FeedTools
|
|
1810
1846
|
# Returns the feed generator
|
1811
1847
|
def generator
|
1812
1848
|
if @generator.nil?
|
1813
|
-
@generator =
|
1814
|
-
|
1815
|
-
@generator =
|
1849
|
+
@generator = try_xpaths(self.channel_node, ["generator/text()"],
|
1850
|
+
:select_result_value => true)
|
1851
|
+
@generator = FeedTools.strip_html(@generator) unless @generator.nil?
|
1816
1852
|
end
|
1817
1853
|
return @generator
|
1818
1854
|
end
|
@@ -1825,9 +1861,9 @@ module FeedTools
|
|
1825
1861
|
# Returns the feed docs
|
1826
1862
|
def docs
|
1827
1863
|
if @docs.nil?
|
1828
|
-
@docs =
|
1829
|
-
|
1830
|
-
@docs =
|
1864
|
+
@docs = try_xpaths(self.channel_node, ["docs/text()"],
|
1865
|
+
:select_result_value => true)
|
1866
|
+
@docs = FeedTools.strip_html(@docs) unless @docs.nil?
|
1831
1867
|
end
|
1832
1868
|
return @docs
|
1833
1869
|
end
|
@@ -1840,23 +1876,23 @@ module FeedTools
|
|
1840
1876
|
# Returns the feed language
|
1841
1877
|
def language
|
1842
1878
|
if @language.nil?
|
1843
|
-
|
1844
|
-
|
1845
|
-
|
1846
|
-
|
1847
|
-
|
1848
|
-
|
1849
|
-
|
1850
|
-
|
1851
|
-
|
1852
|
-
@
|
1853
|
-
|
1854
|
-
|
1855
|
-
|
1879
|
+
@language = select_not_blank([
|
1880
|
+
try_xpaths(self.channel_node, [
|
1881
|
+
"language/text()",
|
1882
|
+
"dc:language/text()",
|
1883
|
+
"@dc:language",
|
1884
|
+
"@xml:lang",
|
1885
|
+
"xml:lang/text()"
|
1886
|
+
], :select_result_value => true),
|
1887
|
+
try_xpaths(self.root_node, [
|
1888
|
+
"@xml:lang",
|
1889
|
+
"xml:lang/text()"
|
1890
|
+
], :select_result_value => true)
|
1891
|
+
])
|
1892
|
+
if @language.blank?
|
1856
1893
|
@language = "en-us"
|
1857
1894
|
end
|
1858
1895
|
@language = @language.downcase
|
1859
|
-
@language = nil if @language == ""
|
1860
1896
|
end
|
1861
1897
|
return @language
|
1862
1898
|
end
|
@@ -1869,12 +1905,11 @@ module FeedTools
|
|
1869
1905
|
# Returns true if this feed contains explicit material.
|
1870
1906
|
def explicit?
|
1871
1907
|
if @explicit.nil?
|
1872
|
-
|
1873
|
-
|
1874
|
-
|
1875
|
-
|
1876
|
-
|
1877
|
-
"itunes:explicit/text()").to_s.downcase == "true"
|
1908
|
+
explicit_string = try_xpaths(self.channel_node, [
|
1909
|
+
"media:adult/text()",
|
1910
|
+
"itunes:explicit/text()"
|
1911
|
+
], :select_result_value => true)
|
1912
|
+
if explicit_string == "true" || explicit_string == "yes"
|
1878
1913
|
@explicit = true
|
1879
1914
|
else
|
1880
1915
|
@explicit = false
|
@@ -1888,66 +1923,68 @@ module FeedTools
|
|
1888
1923
|
@explicit = (new_explicit ? true : false)
|
1889
1924
|
end
|
1890
1925
|
|
1891
|
-
# Returns the feed
|
1892
|
-
def
|
1893
|
-
if @
|
1894
|
-
|
1895
|
-
|
1896
|
-
|
1897
|
-
|
1898
|
-
|
1899
|
-
|
1900
|
-
|
1901
|
-
|
1902
|
-
|
1903
|
-
|
1904
|
-
|
1905
|
-
|
1906
|
-
|
1907
|
-
|
1908
|
-
|
1909
|
-
|
1910
|
-
|
1911
|
-
|
1926
|
+
# Returns the feed entries
|
1927
|
+
def entries
|
1928
|
+
if @entries.blank?
|
1929
|
+
raw_entries = select_not_blank([
|
1930
|
+
try_xpaths_all(self.channel_node, [
|
1931
|
+
"atom10:entry",
|
1932
|
+
"atom03:entry",
|
1933
|
+
"atom:entry",
|
1934
|
+
"entry"
|
1935
|
+
]),
|
1936
|
+
try_xpaths_all(self.root_node, [
|
1937
|
+
"rss10:item",
|
1938
|
+
"item",
|
1939
|
+
"atom10:entry",
|
1940
|
+
"atom03:entry",
|
1941
|
+
"atom:entry",
|
1942
|
+
"entry"
|
1943
|
+
]),
|
1944
|
+
try_xpaths_all(self.channel_node, [
|
1945
|
+
"rss10:item",
|
1946
|
+
"item"
|
1947
|
+
])
|
1948
|
+
])
|
1912
1949
|
|
1913
1950
|
# create the individual feed items
|
1914
|
-
@
|
1915
|
-
|
1916
|
-
for
|
1917
|
-
|
1918
|
-
|
1919
|
-
|
1920
|
-
@
|
1951
|
+
@entries = []
|
1952
|
+
unless raw_entries.blank?
|
1953
|
+
for entry_node in raw_entries.reverse
|
1954
|
+
new_entry = FeedItem.new
|
1955
|
+
new_entry.feed_data = entry_node.to_s
|
1956
|
+
new_entry.feed_data_type = self.feed_data_type
|
1957
|
+
@entries << new_entry
|
1921
1958
|
end
|
1922
1959
|
end
|
1923
1960
|
end
|
1924
1961
|
|
1925
1962
|
# Sort the items
|
1926
|
-
@
|
1927
|
-
(b.time or Time.
|
1963
|
+
@entries = @entries.sort do |a, b|
|
1964
|
+
(b.time or Time.utc(1970)) <=> (a.time or Time.utc(1970))
|
1928
1965
|
end
|
1929
|
-
return @
|
1966
|
+
return @entries
|
1930
1967
|
end
|
1931
1968
|
|
1932
|
-
# Sets the
|
1933
|
-
def
|
1934
|
-
for
|
1935
|
-
unless
|
1969
|
+
# Sets the entries array to a new array.
|
1970
|
+
def entries=(new_entries)
|
1971
|
+
for entry in new_entries
|
1972
|
+
unless entry.kind_of? FeedTools::FeedItem
|
1936
1973
|
raise ArgumentError,
|
1937
|
-
"You should only add FeedItem objects to the
|
1974
|
+
"You should only add FeedItem objects to the entries array."
|
1938
1975
|
end
|
1939
1976
|
end
|
1940
|
-
@
|
1977
|
+
@entries = new_entries
|
1941
1978
|
end
|
1942
1979
|
|
1943
1980
|
# Syntactic sugar for appending feed items to a feed.
|
1944
|
-
def <<(
|
1945
|
-
@
|
1946
|
-
unless
|
1981
|
+
def <<(new_entry)
|
1982
|
+
@entries ||= []
|
1983
|
+
unless new_entry.kind_of? FeedTools::FeedItem
|
1947
1984
|
raise ArgumentError,
|
1948
|
-
"You should only add FeedItem objects to the
|
1985
|
+
"You should only add FeedItem objects to the entries array."
|
1949
1986
|
end
|
1950
|
-
@
|
1987
|
+
@entries << new_entry
|
1951
1988
|
end
|
1952
1989
|
|
1953
1990
|
# The time that the feed was last requested from the remote server. Nil
|
@@ -2020,11 +2057,14 @@ module FeedTools
|
|
2020
2057
|
end
|
2021
2058
|
|
2022
2059
|
# Generates xml based on the content of the feed
|
2023
|
-
def build_xml(feed_type=(self.feed_type or "
|
2024
|
-
xml_builder=Builder::XmlMarkup.new(
|
2025
|
-
|
2060
|
+
def build_xml(feed_type=(self.feed_type or "atom"), version=nil,
|
2061
|
+
xml_builder=Builder::XmlMarkup.new(
|
2062
|
+
:indent => 2, :escape_attrs => false))
|
2063
|
+
xml_builder.instruct! :xml, :version => "1.0",
|
2064
|
+
:encoding => (FeedTools.configurations[:output_encoding] or "utf-8")
|
2065
|
+
if feed_type == "rss" && (version == nil || version <= 0.0)
|
2026
2066
|
version = 1.0
|
2027
|
-
elsif feed_type == "atom" && (version == nil || version
|
2067
|
+
elsif feed_type == "atom" && (version == nil || version <= 0.0)
|
2028
2068
|
version = 1.0
|
2029
2069
|
end
|
2030
2070
|
if feed_type == "rss" && (version == 0.9 || version == 1.0 ||
|
@@ -2040,7 +2080,8 @@ module FeedTools
|
|
2040
2080
|
"xmlns:media" => FEED_TOOLS_NAMESPACES['media']) do
|
2041
2081
|
channel_attributes = {}
|
2042
2082
|
unless self.link.nil?
|
2043
|
-
channel_attributes["rdf:about"] =
|
2083
|
+
channel_attributes["rdf:about"] =
|
2084
|
+
FeedTools.escape_entities(self.link)
|
2044
2085
|
end
|
2045
2086
|
xml_builder.channel(channel_attributes) do
|
2046
2087
|
unless title.nil? || title == ""
|
@@ -2054,7 +2095,7 @@ module FeedTools
|
|
2054
2095
|
xml_builder.link
|
2055
2096
|
end
|
2056
2097
|
unless images.nil? || images.empty?
|
2057
|
-
xml_builder.image("rdf:resource" =>
|
2098
|
+
xml_builder.image("rdf:resource" => FeedTools.escape_entities(
|
2058
2099
|
images.first.url))
|
2059
2100
|
end
|
2060
2101
|
unless description.nil? || description == ""
|
@@ -2078,7 +2119,7 @@ module FeedTools
|
|
2078
2119
|
"item link field."
|
2079
2120
|
end
|
2080
2121
|
xml_builder.tag!("rdf:li", "rdf:resource" =>
|
2081
|
-
|
2122
|
+
FeedTools.escape_entities(item.link))
|
2082
2123
|
end
|
2083
2124
|
end
|
2084
2125
|
end
|
@@ -2095,20 +2136,20 @@ module FeedTools
|
|
2095
2136
|
end
|
2096
2137
|
best_image = images.first if best_image.nil?
|
2097
2138
|
xml_builder.image(
|
2098
|
-
"rdf:about" =>
|
2099
|
-
if best_image.title
|
2139
|
+
"rdf:about" => FeedTools.escape_entities(best_image.url)) do
|
2140
|
+
if !best_image.title.blank?
|
2100
2141
|
xml_builder.title(best_image.title)
|
2101
|
-
elsif self.title
|
2142
|
+
elsif !self.title.blank?
|
2102
2143
|
xml_builder.title(self.title)
|
2103
2144
|
else
|
2104
2145
|
xml_builder.title
|
2105
2146
|
end
|
2106
|
-
unless best_image.url.
|
2147
|
+
unless best_image.url.blank?
|
2107
2148
|
xml_builder.url(best_image.url)
|
2108
2149
|
end
|
2109
|
-
if best_image.link
|
2150
|
+
if !best_image.link.blank?
|
2110
2151
|
xml_builder.link(best_image.link)
|
2111
|
-
elsif self.link
|
2152
|
+
elsif !self.link.blank?
|
2112
2153
|
xml_builder.link(self.link)
|
2113
2154
|
else
|
2114
2155
|
xml_builder.link
|
@@ -2131,18 +2172,18 @@ module FeedTools
|
|
2131
2172
|
"xmlns:itunes" => FEED_TOOLS_NAMESPACES['itunes'],
|
2132
2173
|
"xmlns:media" => FEED_TOOLS_NAMESPACES['media']) do
|
2133
2174
|
xml_builder.channel do
|
2134
|
-
unless title.
|
2175
|
+
unless title.blank?
|
2135
2176
|
xml_builder.title(title)
|
2136
2177
|
end
|
2137
|
-
unless link.
|
2178
|
+
unless link.blank?
|
2138
2179
|
xml_builder.link(link)
|
2139
2180
|
end
|
2140
|
-
unless description.
|
2181
|
+
unless description.blank?
|
2141
2182
|
xml_builder.description(description)
|
2142
2183
|
end
|
2143
2184
|
xml_builder.ttl((time_to_live / 1.minute).to_s)
|
2144
2185
|
xml_builder.generator(
|
2145
|
-
|
2186
|
+
FeedTools.configurations[:generator_href])
|
2146
2187
|
build_xml_hook(feed_type, version, xml_builder)
|
2147
2188
|
unless items.nil?
|
2148
2189
|
for item in items
|
@@ -2152,53 +2193,12 @@ module FeedTools
|
|
2152
2193
|
end
|
2153
2194
|
end
|
2154
2195
|
elsif feed_type == "atom" && version == 0.3
|
2155
|
-
|
2156
|
-
return xml_builder.feed("xmlns" => FEED_TOOLS_NAMESPACES['atom03'],
|
2157
|
-
"version" => version,
|
2158
|
-
"xml:lang" => language) do
|
2159
|
-
unless title.nil? || title == ""
|
2160
|
-
xml_builder.title(title,
|
2161
|
-
"mode" => "escaped",
|
2162
|
-
"type" => "text/html")
|
2163
|
-
end
|
2164
|
-
xml_builder.author do
|
2165
|
-
unless self.author.nil? || self.author.name.nil?
|
2166
|
-
xml_builder.name(self.author.name)
|
2167
|
-
else
|
2168
|
-
xml_builder.name("n/a")
|
2169
|
-
end
|
2170
|
-
unless self.author.nil? || self.author.email.nil?
|
2171
|
-
xml_builder.email(self.author.email)
|
2172
|
-
end
|
2173
|
-
unless self.author.nil? || self.author.url.nil?
|
2174
|
-
xml_builder.url(self.author.url)
|
2175
|
-
end
|
2176
|
-
end
|
2177
|
-
unless link.nil? || link == ""
|
2178
|
-
xml_builder.link("href" => link,
|
2179
|
-
"rel" => "alternate",
|
2180
|
-
"type" => "text/html",
|
2181
|
-
"title" => title)
|
2182
|
-
end
|
2183
|
-
unless description.nil? || description == ""
|
2184
|
-
xml_builder.tagline(description,
|
2185
|
-
"mode" => "escaped",
|
2186
|
-
"type" => "text/html")
|
2187
|
-
end
|
2188
|
-
xml_builder.generator("FeedTools",
|
2189
|
-
"url" => "http://www.sporkmonger.com/projects/feedtools")
|
2190
|
-
build_xml_hook(feed_type, version, xml_builder)
|
2191
|
-
unless items.nil?
|
2192
|
-
for item in items
|
2193
|
-
item.build_xml(feed_type, version, xml_builder)
|
2194
|
-
end
|
2195
|
-
end
|
2196
|
-
end
|
2196
|
+
raise "Atom 0.3 is obsolete."
|
2197
2197
|
elsif feed_type == "atom" && version == 1.0
|
2198
2198
|
# normal atom format
|
2199
2199
|
return xml_builder.feed("xmlns" => FEED_TOOLS_NAMESPACES['atom10'],
|
2200
2200
|
"xml:lang" => language) do
|
2201
|
-
unless title.
|
2201
|
+
unless title.blank?
|
2202
2202
|
xml_builder.title(title,
|
2203
2203
|
"type" => "html")
|
2204
2204
|
end
|
@@ -2212,22 +2212,22 @@ module FeedTools
|
|
2212
2212
|
xml_builder.email(self.author.email)
|
2213
2213
|
end
|
2214
2214
|
unless self.author.nil? || self.author.url.nil?
|
2215
|
-
xml_builder.
|
2215
|
+
xml_builder.uri(self.author.url)
|
2216
2216
|
end
|
2217
2217
|
end
|
2218
|
-
unless self.url.
|
2218
|
+
unless self.url.blank?
|
2219
2219
|
xml_builder.link("href" => self.url,
|
2220
2220
|
"rel" => "self",
|
2221
2221
|
"type" => "application/atom+xml")
|
2222
2222
|
end
|
2223
|
-
unless self.link.
|
2224
|
-
xml_builder.link("href" => self.link,
|
2223
|
+
unless self.link.blank?
|
2224
|
+
xml_builder.link("href" => FeedTools.escape_entities(self.link),
|
2225
2225
|
"rel" => "alternate",
|
2226
2226
|
"type" => "text/html",
|
2227
|
-
"title" => self.title)
|
2227
|
+
"title" => FeedTools.escape_entities(self.title))
|
2228
2228
|
end
|
2229
|
-
unless description.
|
2230
|
-
xml_builder.subtitle(
|
2229
|
+
unless description.blank?
|
2230
|
+
xml_builder.subtitle(self.subtitle,
|
2231
2231
|
"type" => "html")
|
2232
2232
|
else
|
2233
2233
|
xml_builder.subtitle(FeedTools.no_content_string,
|
@@ -2242,8 +2242,8 @@ module FeedTools
|
|
2242
2242
|
else
|
2243
2243
|
xml_builder.updated(Time.now.gmtime.iso8601)
|
2244
2244
|
end
|
2245
|
-
xml_builder.generator(
|
2246
|
-
"
|
2245
|
+
xml_builder.generator(FeedTools.configurations[:generator_name] +
|
2246
|
+
" - " + FeedTools.configurations[:generator_href])
|
2247
2247
|
if self.id != nil
|
2248
2248
|
unless FeedTools.is_uri? self.id
|
2249
2249
|
if self.link != nil
|
@@ -2266,46 +2266,49 @@ module FeedTools
|
|
2266
2266
|
end
|
2267
2267
|
end
|
2268
2268
|
end
|
2269
|
+
else
|
2270
|
+
raise "Unsupported feed format/version."
|
2269
2271
|
end
|
2270
2272
|
end
|
2271
2273
|
|
2272
2274
|
# Persists the current feed state to the cache.
|
2273
2275
|
def save
|
2274
|
-
|
2275
|
-
|
2276
|
-
|
2277
|
-
|
2278
|
-
|
2279
|
-
|
2280
|
-
|
2281
|
-
|
2282
|
-
|
2283
|
-
self.
|
2284
|
-
|
2285
|
-
|
2286
|
-
|
2287
|
-
|
2288
|
-
|
2276
|
+
unless self.url =~ /^file:\/\//
|
2277
|
+
if FeedTools.feed_cache.nil?
|
2278
|
+
raise "Caching is currently disabled. Cannot save to cache."
|
2279
|
+
elsif self.url.nil?
|
2280
|
+
raise "The url field must be set to save to the cache."
|
2281
|
+
elsif self.cache_object.nil?
|
2282
|
+
raise "The cache_object is currently nil. Cannot save to cache."
|
2283
|
+
else
|
2284
|
+
self.cache_object.url = self.url
|
2285
|
+
unless self.feed_data.nil?
|
2286
|
+
self.cache_object.title = self.title
|
2287
|
+
self.cache_object.link = self.link
|
2288
|
+
self.cache_object.feed_data = self.feed_data
|
2289
|
+
self.cache_object.feed_data_type = self.feed_data_type.to_s
|
2290
|
+
end
|
2289
2291
|
self.cache_object.http_headers = self.http_headers.to_yaml
|
2292
|
+
self.cache_object.last_retrieved = self.last_retrieved
|
2293
|
+
self.cache_object.save
|
2290
2294
|
end
|
2291
|
-
self.cache_object.last_retrieved = self.last_retrieved
|
2292
|
-
self.cache_object.save
|
2293
2295
|
end
|
2294
2296
|
end
|
2295
2297
|
|
2296
|
-
alias_method :tagline, :
|
2297
|
-
alias_method :tagline=, :
|
2298
|
-
alias_method :
|
2299
|
-
alias_method :
|
2300
|
-
alias_method :abstract, :
|
2301
|
-
alias_method :abstract=, :
|
2302
|
-
alias_method :content, :
|
2303
|
-
alias_method :content=, :
|
2298
|
+
alias_method :tagline, :subtitle
|
2299
|
+
alias_method :tagline=, :subtitle=
|
2300
|
+
alias_method :description, :subtitle
|
2301
|
+
alias_method :description=, :subtitle=
|
2302
|
+
alias_method :abstract, :subtitle
|
2303
|
+
alias_method :abstract=, :subtitle=
|
2304
|
+
alias_method :content, :subtitle
|
2305
|
+
alias_method :content=, :subtitle=
|
2304
2306
|
alias_method :ttl, :time_to_live
|
2305
2307
|
alias_method :ttl=, :time_to_live=
|
2306
2308
|
alias_method :guid, :id
|
2307
2309
|
alias_method :guid=, :id=
|
2308
|
-
alias_method :
|
2310
|
+
alias_method :items, :entries
|
2311
|
+
alias_method :items=, :entries=
|
2309
2312
|
|
2310
2313
|
# passes missing methods to the cache_object
|
2311
2314
|
def method_missing(msg, *params)
|