feedtools 0.2.18 → 0.2.19
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +28 -0
- data/lib/feed_tools.rb +328 -63
- data/lib/feed_tools/feed.rb +767 -764
- data/lib/feed_tools/feed_item.rb +684 -625
- data/lib/feed_tools/helpers/debug_helper.rb +37 -0
- data/lib/feed_tools/helpers/feed_tools_helper.rb +45 -41
- data/lib/feed_tools/helpers/generic_helper.rb +164 -0
- data/lib/feed_tools/helpers/retrieval_helper.rb +36 -0
- data/rakefile +298 -2
- data/test/unit/amp_test.rb +70 -69
- data/test/unit/atom_test.rb +91 -9
- data/test/unit/cache_test.rb +30 -11
- data/test/unit/cdf_test.rb +6 -4
- data/test/unit/encoding_test.rb +99 -0
- data/test/unit/generation_test.rb +3 -40
- data/test/unit/helper_test.rb +66 -6
- data/test/unit/interface_test.rb +34 -0
- data/test/unit/itunes_test.rb +19 -0
- data/test/unit/nonstandard_test.rb +22 -4
- data/test/unit/rdf_test.rb +19 -0
- data/test/unit/rss_test.rb +137 -43
- metadata +18 -8
- data/lib/feed_tools/vendor/builder.rb +0 -15
- data/lib/feed_tools/vendor/builder/blankslate.rb +0 -55
- data/lib/feed_tools/vendor/builder/xmlbase.rb +0 -144
- data/lib/feed_tools/vendor/builder/xmlevents.rb +0 -65
- data/lib/feed_tools/vendor/builder/xmlmarkup.rb +0 -299
data/lib/feed_tools/feed.rb
CHANGED
@@ -29,9 +29,11 @@ module FeedTools
|
|
29
29
|
# :stopdoc:
|
30
30
|
include REXML
|
31
31
|
class << self
|
32
|
-
include GenericHelper
|
32
|
+
include FeedTools::GenericHelper
|
33
33
|
private :validate_options
|
34
34
|
end
|
35
|
+
include FeedTools::GenericHelper
|
36
|
+
private :validate_options
|
35
37
|
# :startdoc:
|
36
38
|
|
37
39
|
# Represents a feed/feed item's category
|
@@ -143,7 +145,7 @@ module FeedTools
|
|
143
145
|
@link = nil
|
144
146
|
@last_retrieved = nil
|
145
147
|
@time_to_live = nil
|
146
|
-
@
|
148
|
+
@entries = nil
|
147
149
|
@live = false
|
148
150
|
end
|
149
151
|
|
@@ -175,10 +177,12 @@ module FeedTools
|
|
175
177
|
# Loads the feed from the remote url if the feed has expired from the cache or cannot be
|
176
178
|
# retrieved from the cache for some reason.
|
177
179
|
def update!
|
178
|
-
if self.http_headers.
|
180
|
+
if self.http_headers.blank? && !(self.cache_object.nil?) &&
|
179
181
|
!(self.cache_object.http_headers.nil?)
|
180
182
|
@http_headers = YAML.load(self.cache_object.http_headers)
|
181
183
|
@http_headers = {} unless @http_headers.kind_of? Hash
|
184
|
+
elsif self.http_headers.blank?
|
185
|
+
@http_headers = {}
|
182
186
|
end
|
183
187
|
if self.expired? == false
|
184
188
|
@live = false
|
@@ -240,11 +244,12 @@ module FeedTools
|
|
240
244
|
self.http_headers['last-modified'] unless
|
241
245
|
self.http_headers['last-modified'].nil?
|
242
246
|
end
|
243
|
-
|
244
|
-
|
247
|
+
unless FeedTools.configurations[:user_agent].nil?
|
248
|
+
headers["User-Agent"] = FeedTools.configurations[:user_agent]
|
249
|
+
end
|
245
250
|
|
246
251
|
# The http feed access method
|
247
|
-
http_fetch = lambda do |feed_url,
|
252
|
+
http_fetch = lambda do |feed_url, request_headers, redirect_limit,
|
248
253
|
response_chain, no_headers|
|
249
254
|
raise FeedAccessError, 'Redirect too deep' if redirect_limit == 0
|
250
255
|
feed_uri = nil
|
@@ -256,11 +261,13 @@ module FeedTools
|
|
256
261
|
end
|
257
262
|
|
258
263
|
begin
|
259
|
-
|
264
|
+
# TODO: Proxy host and proxy port would go here if implemented
|
265
|
+
http = Net::HTTP.new(feed_uri.host, (feed_uri.port or 80))
|
266
|
+
http.start do
|
260
267
|
final_uri = feed_uri.path
|
261
268
|
final_uri += ('?' + feed_uri.query) if feed_uri.query
|
262
|
-
|
263
|
-
response = http.request_get(final_uri,
|
269
|
+
request_headers = {} if no_headers
|
270
|
+
response = http.request_get(final_uri, request_headers)
|
264
271
|
|
265
272
|
case response
|
266
273
|
when Net::HTTPSuccess
|
@@ -289,7 +296,8 @@ module FeedTools
|
|
289
296
|
response_chain << [feed_url, response]
|
290
297
|
new_location = response['location']
|
291
298
|
if response_chain.assoc(new_location) != nil
|
292
|
-
raise FeedAccessError,
|
299
|
+
raise FeedAccessError,
|
300
|
+
"Redirection loop detected: #{new_location}"
|
293
301
|
end
|
294
302
|
|
295
303
|
# Find out if we've already seen the url we've been
|
@@ -300,13 +308,21 @@ module FeedTools
|
|
300
308
|
:cache_only => true)
|
301
309
|
if cached_feed.cache_object != nil &&
|
302
310
|
cached_feed.cache_object.new_record? != true
|
303
|
-
|
304
|
-
|
311
|
+
if !cached_feed.expired? &&
|
312
|
+
!cached_feed.http_headers.blank?
|
313
|
+
# Copy the cached state
|
305
314
|
self.url = cached_feed.url
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
315
|
+
|
316
|
+
@feed_data = cached_feed.feed_data
|
317
|
+
@feed_data_type = cached_feed.feed_data_type
|
318
|
+
|
319
|
+
if @feed_data.blank?
|
320
|
+
raise "Invalid cache data."
|
321
|
+
end
|
322
|
+
|
323
|
+
@title = nil; self.title
|
324
|
+
@link = nil; self.link
|
325
|
+
|
310
326
|
self.last_retrieved = cached_feed.last_retrieved
|
311
327
|
self.http_headers = cached_feed.http_headers
|
312
328
|
self.cache_object = cached_feed.cache_object
|
@@ -342,6 +358,10 @@ module FeedTools
|
|
342
358
|
raise FeedAccessError, 'Socket error prevented feed retrieval'
|
343
359
|
rescue Timeout::Error
|
344
360
|
raise FeedAccessError, 'Timeout while attempting to retrieve feed'
|
361
|
+
rescue Errno::ENETUNREACH
|
362
|
+
raise FeedAccessError, 'Network was unreachable'
|
363
|
+
rescue Errno::ECONNRESET
|
364
|
+
raise FeedAccessError, 'Connection was reset by peer'
|
345
365
|
end
|
346
366
|
end
|
347
367
|
|
@@ -375,12 +395,12 @@ module FeedTools
|
|
375
395
|
end
|
376
396
|
end
|
377
397
|
unless @http_response.kind_of? Net::HTTPRedirection
|
398
|
+
@feed_data = self.http_response.body
|
378
399
|
@http_headers = {}
|
379
400
|
self.http_response.each_header do |key, value|
|
380
401
|
self.http_headers[key.downcase] = value
|
381
402
|
end
|
382
403
|
self.last_retrieved = Time.now.gmtime
|
383
|
-
self.feed_data = self.http_response.body
|
384
404
|
end
|
385
405
|
rescue FeedAccessError
|
386
406
|
@live = false
|
@@ -451,9 +471,9 @@ module FeedTools
|
|
451
471
|
open(file_name) do |file|
|
452
472
|
@http_response = nil
|
453
473
|
@http_headers = {}
|
474
|
+
@feed_data = file.read
|
475
|
+
@feed_data_type = :xml
|
454
476
|
self.last_retrieved = Time.now.gmtime
|
455
|
-
self.feed_data = file.read
|
456
|
-
self.feed_data_type = :xml
|
457
477
|
end
|
458
478
|
rescue
|
459
479
|
@live = false
|
@@ -478,8 +498,71 @@ module FeedTools
|
|
478
498
|
|
479
499
|
# Returns a hash of the http headers from the response.
|
480
500
|
def http_headers
|
501
|
+
if @http_headers.blank?
|
502
|
+
if !self.cache_object.nil? && !self.cache_object.http_headers.nil?
|
503
|
+
@http_headers = YAML.load(self.cache_object.http_headers)
|
504
|
+
@http_headers = {} unless @http_headers.kind_of? Hash
|
505
|
+
else
|
506
|
+
@http_headers = {}
|
507
|
+
end
|
508
|
+
end
|
481
509
|
return @http_headers
|
482
510
|
end
|
511
|
+
|
512
|
+
# Returns the encoding that the feed was parsed with
|
513
|
+
def encoding
|
514
|
+
if @encoding.nil?
|
515
|
+
unless self.http_headers.blank?
|
516
|
+
@encoding = "utf-8"
|
517
|
+
else
|
518
|
+
@encoding = self.encoding_from_xml_data
|
519
|
+
end
|
520
|
+
end
|
521
|
+
return @encoding
|
522
|
+
end
|
523
|
+
|
524
|
+
# Returns the encoding of feed calculated only from the xml data.
|
525
|
+
# I.e., the encoding we would come up with if we ignore RFC 3023.
|
526
|
+
def encoding_from_xml_data
|
527
|
+
if @encoding_from_xml_data.nil?
|
528
|
+
raw_data = self.feed_data
|
529
|
+
encoding_from_xml_instruct =
|
530
|
+
raw_data.scan(
|
531
|
+
/^<\?xml [^>]*encoding="([\w]*)"[^>]*\?>/
|
532
|
+
).flatten.first
|
533
|
+
unless encoding_from_xml_instruct.blank?
|
534
|
+
encoding_from_xml_instruct.downcase!
|
535
|
+
end
|
536
|
+
if encoding_from_xml_instruct.blank?
|
537
|
+
doc = Document.new(raw_data)
|
538
|
+
encoding_from_xml_instruct = doc.encoding.downcase
|
539
|
+
if encoding_from_xml_instruct == "utf-8"
|
540
|
+
# REXML has a tendency to report utf-8 overzealously, take with
|
541
|
+
# grain of salt
|
542
|
+
encoding_from_xml_instruct = nil
|
543
|
+
end
|
544
|
+
else
|
545
|
+
@encoding_from_xml_data = encoding_from_xml_instruct
|
546
|
+
end
|
547
|
+
if encoding_from_xml_instruct.blank?
|
548
|
+
sniff_table = {
|
549
|
+
"Lo\247\224" => "ebcdic-cp-us",
|
550
|
+
"<?xm" => "utf-8"
|
551
|
+
}
|
552
|
+
sniff = self.feed_data[0..3]
|
553
|
+
if sniff_table[sniff] != nil
|
554
|
+
@encoding_from_xml_data = sniff_table[sniff].downcase
|
555
|
+
end
|
556
|
+
else
|
557
|
+
@encoding_from_xml_data = encoding_from_xml_instruct
|
558
|
+
end
|
559
|
+
if @encoding_from_xml_data.blank?
|
560
|
+
# Safest assumption
|
561
|
+
@encoding_from_xml_data = "utf-8"
|
562
|
+
end
|
563
|
+
end
|
564
|
+
return @encoding_from_xml_data
|
565
|
+
end
|
483
566
|
|
484
567
|
# Returns the feed's raw data.
|
485
568
|
def feed_data
|
@@ -493,12 +576,40 @@ module FeedTools
|
|
493
576
|
|
494
577
|
# Sets the feed's data.
|
495
578
|
def feed_data=(new_feed_data)
|
579
|
+
@http_headers = {}
|
580
|
+
@cache_object = nil
|
581
|
+
@url = nil
|
582
|
+
@id = nil
|
583
|
+
@encoding = nil
|
496
584
|
@feed_data = new_feed_data
|
497
585
|
unless self.cache_object.nil?
|
498
586
|
self.cache_object.feed_data = new_feed_data
|
499
587
|
end
|
500
588
|
end
|
501
589
|
|
590
|
+
# Returns the feed's raw data as utf-8.
|
591
|
+
def feed_data_utf_8(force_encoding=nil)
|
592
|
+
if @feed_data_utf_8.nil?
|
593
|
+
raw_data = self.feed_data
|
594
|
+
if force_encoding.nil?
|
595
|
+
use_encoding = self.encoding
|
596
|
+
else
|
597
|
+
use_encoding = force_encoding
|
598
|
+
end
|
599
|
+
if use_encoding != "utf-8"
|
600
|
+
begin
|
601
|
+
@feed_data_utf_8 =
|
602
|
+
Iconv.new('utf-8', use_encoding).iconv(raw_data)
|
603
|
+
rescue
|
604
|
+
return raw_data
|
605
|
+
end
|
606
|
+
else
|
607
|
+
return self.feed_data
|
608
|
+
end
|
609
|
+
end
|
610
|
+
return @feed_data_utf_8
|
611
|
+
end
|
612
|
+
|
502
613
|
# Returns the data type of the feed
|
503
614
|
# Possible values:
|
504
615
|
# * :xml
|
@@ -526,24 +637,15 @@ module FeedTools
|
|
526
637
|
@xml_doc = nil
|
527
638
|
else
|
528
639
|
if @xml_doc.nil?
|
529
|
-
# INQUIRY: Is there any way of saying "dude, rescue *everything*"?
|
530
640
|
begin
|
531
641
|
begin
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
@xml_doc = Document.new(feed_data)
|
536
|
-
rescue Exception
|
537
|
-
# Something failed especially badly, attempt to repair the
|
538
|
-
# xml with htree.
|
539
|
-
@xml_doc = HTree.parse(feed_data).to_rexml
|
540
|
-
rescue
|
642
|
+
@xml_doc = Document.new(self.feed_data_utf_8,
|
643
|
+
:ignore_whitespace_nodes => :all)
|
644
|
+
rescue Object
|
541
645
|
# Something failed, attempt to repair the xml with htree.
|
542
|
-
@xml_doc = HTree.parse(
|
646
|
+
@xml_doc = HTree.parse(self.feed_data_utf_8).to_rexml
|
543
647
|
end
|
544
|
-
rescue
|
545
|
-
@xml_doc = nil
|
546
|
-
rescue
|
648
|
+
rescue Object
|
547
649
|
@xml_doc = nil
|
548
650
|
end
|
549
651
|
end
|
@@ -551,14 +653,23 @@ module FeedTools
|
|
551
653
|
return @xml_doc
|
552
654
|
end
|
553
655
|
|
554
|
-
# Returns the first node within the channel_node that matches the xpath
|
555
|
-
|
556
|
-
|
656
|
+
# Returns the first node within the channel_node that matches the xpath
|
657
|
+
# query.
|
658
|
+
def find_node(xpath, select_result_value=false)
|
659
|
+
if self.feed_data_type != :xml
|
660
|
+
raise "The feed data type is not xml."
|
661
|
+
end
|
662
|
+
return try_xpaths(self.channel_node, [xpath],
|
663
|
+
:select_result_value => select_result_value)
|
557
664
|
end
|
558
665
|
|
559
666
|
# Returns all nodes within the channel_node that match the xpath query.
|
560
|
-
def find_all_nodes(xpath)
|
561
|
-
|
667
|
+
def find_all_nodes(xpath, select_result_value=false)
|
668
|
+
if self.feed_data_type != :xml
|
669
|
+
raise "The feed data type is not xml."
|
670
|
+
end
|
671
|
+
return try_xpaths_all(self.channel_node, [xpath],
|
672
|
+
:select_result_value => select_result_value)
|
562
673
|
end
|
563
674
|
|
564
675
|
# Returns the root node of the feed.
|
@@ -568,7 +679,15 @@ module FeedTools
|
|
568
679
|
# break this stuff.
|
569
680
|
# E.g.: http://smogzer.tripod.com/smog.rdf
|
570
681
|
# ===================================================================
|
571
|
-
|
682
|
+
begin
|
683
|
+
if xml.nil?
|
684
|
+
return nil
|
685
|
+
else
|
686
|
+
@root_node = xml.root
|
687
|
+
end
|
688
|
+
rescue
|
689
|
+
return nil
|
690
|
+
end
|
572
691
|
end
|
573
692
|
return @root_node
|
574
693
|
end
|
@@ -576,13 +695,11 @@ module FeedTools
|
|
576
695
|
# Returns the channel node of the feed.
|
577
696
|
def channel_node
|
578
697
|
if @channel_node.nil? && root_node != nil
|
579
|
-
@channel_node =
|
580
|
-
|
581
|
-
|
582
|
-
|
583
|
-
|
584
|
-
@channel_node = XPath.first(root_node, "feedinfo")
|
585
|
-
end
|
698
|
+
@channel_node = try_xpaths(root_node, [
|
699
|
+
"channel",
|
700
|
+
"CHANNEL",
|
701
|
+
"feedinfo"
|
702
|
+
])
|
586
703
|
if @channel_node == nil
|
587
704
|
@channel_node = root_node
|
588
705
|
end
|
@@ -592,12 +709,13 @@ module FeedTools
|
|
592
709
|
|
593
710
|
# The cache object that handles the feed persistence.
|
594
711
|
def cache_object
|
712
|
+
if !@url.nil? && @url =~ /^file:\/\//
|
713
|
+
return nil
|
714
|
+
end
|
595
715
|
unless FeedTools.feed_cache.nil?
|
596
716
|
if @cache_object.nil?
|
597
717
|
begin
|
598
|
-
if @
|
599
|
-
@cache_object = FeedTools.feed_cache.find_by_id(@id)
|
600
|
-
elsif @url != nil
|
718
|
+
if @url != nil
|
601
719
|
@cache_object = FeedTools.feed_cache.find_by_url(@url)
|
602
720
|
end
|
603
721
|
if @cache_object.nil?
|
@@ -719,43 +837,22 @@ module FeedTools
|
|
719
837
|
# Returns the feed's unique id
|
720
838
|
def id
|
721
839
|
if @id.nil?
|
722
|
-
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
|
730
|
-
|
731
|
-
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
|
736
|
-
|
737
|
-
|
738
|
-
end
|
739
|
-
unless root_node.nil?
|
740
|
-
if @id == "" || @id.nil?
|
741
|
-
@id = XPath.first(root_node, "id/text()").to_s
|
742
|
-
end
|
743
|
-
if @id == ""
|
744
|
-
@id = XPath.first(channel_node, "atom10:id/text()",
|
745
|
-
FEED_TOOLS_NAMESPACES).to_s
|
746
|
-
end
|
747
|
-
if @id == ""
|
748
|
-
@id = XPath.first(channel_node, "atom03:id/text()",
|
749
|
-
FEED_TOOLS_NAMESPACES).to_s
|
750
|
-
end
|
751
|
-
if @id == ""
|
752
|
-
@id = XPath.first(channel_node, "atom:id/text()").to_s
|
753
|
-
end
|
754
|
-
if @id == ""
|
755
|
-
@id = XPath.first(root_node, "guid/text()").to_s
|
756
|
-
end
|
757
|
-
end
|
758
|
-
@id = nil if @id == ""
|
840
|
+
@id = select_not_blank([
|
841
|
+
try_xpaths(self.channel_node, [
|
842
|
+
"atom10:id/text()",
|
843
|
+
"atom03:id/text()",
|
844
|
+
"atom:id/text()",
|
845
|
+
"id/text()",
|
846
|
+
"guid/text()"
|
847
|
+
], :select_result_value => true),
|
848
|
+
try_xpaths(self.root_node, [
|
849
|
+
"atom10:id/text()",
|
850
|
+
"atom03:id/text()",
|
851
|
+
"atom:id/text()",
|
852
|
+
"id/text()",
|
853
|
+
"guid/text()"
|
854
|
+
], :select_result_value => true)
|
855
|
+
])
|
759
856
|
end
|
760
857
|
return @id
|
761
858
|
end
|
@@ -768,12 +865,12 @@ module FeedTools
|
|
768
865
|
# Returns the feed url.
|
769
866
|
def url
|
770
867
|
original_url = @url
|
771
|
-
override_url = lambda do
|
868
|
+
override_url = lambda do |result|
|
772
869
|
begin
|
773
|
-
if
|
870
|
+
if result.nil? && self.feed_data != nil
|
774
871
|
true
|
775
|
-
elsif
|
776
|
-
!(["http", "https"].include?(URI.parse(
|
872
|
+
elsif result != nil &&
|
873
|
+
!(["http", "https"].include?(URI.parse(result.to_s).scheme))
|
777
874
|
if self.feed_data != nil
|
778
875
|
true
|
779
876
|
else
|
@@ -786,47 +883,32 @@ module FeedTools
|
|
786
883
|
true
|
787
884
|
end
|
788
885
|
end
|
789
|
-
if override_url.call
|
790
|
-
|
791
|
-
|
792
|
-
|
793
|
-
|
794
|
-
|
795
|
-
|
796
|
-
|
797
|
-
|
798
|
-
|
799
|
-
|
800
|
-
|
801
|
-
|
802
|
-
|
803
|
-
|
804
|
-
@
|
805
|
-
|
806
|
-
|
807
|
-
|
808
|
-
|
809
|
-
end
|
810
|
-
if override_url.call
|
811
|
-
@url = XPath.first(channel_node, "admin:feed/@rdf:resource",
|
812
|
-
FEED_TOOLS_NAMESPACES).to_s
|
813
|
-
@url = nil if @url == ""
|
814
|
-
end
|
815
|
-
if override_url.call
|
816
|
-
@url = XPath.first(channel_node, "admin:feed/@resource").to_s
|
817
|
-
@url = nil if @url == ""
|
818
|
-
end
|
819
|
-
if override_url.call
|
820
|
-
@url = XPath.first(channel_node, "feed/@rdf:resource").to_s
|
821
|
-
@url = nil if @url == ""
|
822
|
-
end
|
823
|
-
if override_url.call
|
824
|
-
@url = XPath.first(channel_node, "feed/@resource").to_s
|
825
|
-
@url = nil if @url == ""
|
826
|
-
end
|
886
|
+
if override_url.call(@url)
|
887
|
+
# rdf:about is ordered last because a lot of people accidentally
|
888
|
+
# put the link in that field instead of the url to the feed.
|
889
|
+
# Ordering it last gives them as many chances as humanly possible
|
890
|
+
# for them to redeem themselves. If the link turns out to be the
|
891
|
+
@url = try_xpaths(self.channel_node, [
|
892
|
+
"link[@rel='self']/@href",
|
893
|
+
"atom10:link[@rel='self']/@href",
|
894
|
+
"atom03:link[@rel='self']/@href",
|
895
|
+
"atom:link[@rel='self']/@href",
|
896
|
+
"admin:feed/@rdf:resource",
|
897
|
+
"admin:feed/@resource",
|
898
|
+
"feed/@rdf:resource",
|
899
|
+
"feed/@resource",
|
900
|
+
"@rdf:about",
|
901
|
+
"@about"
|
902
|
+
], :select_result_value => true) do |result|
|
903
|
+
override_url.call(FeedTools.normalize_url(result))
|
904
|
+
end
|
905
|
+
@url = FeedTools.normalize_url(@url)
|
827
906
|
if @url == nil
|
828
907
|
@url = original_url
|
829
908
|
end
|
909
|
+
if @url == self.link
|
910
|
+
@url = original_url
|
911
|
+
end
|
830
912
|
end
|
831
913
|
return @url
|
832
914
|
end
|
@@ -840,37 +922,23 @@ module FeedTools
|
|
840
922
|
# Returns the feed title
|
841
923
|
def title
|
842
924
|
if @title.nil?
|
843
|
-
|
844
|
-
|
845
|
-
|
846
|
-
|
847
|
-
|
848
|
-
|
849
|
-
|
850
|
-
|
851
|
-
title_node = XPath.first(channel_node, "atom03:title",
|
852
|
-
FEED_TOOLS_NAMESPACES)
|
853
|
-
end
|
854
|
-
if title_node.nil?
|
855
|
-
title_node = XPath.first(channel_node, "atom:title")
|
856
|
-
end
|
857
|
-
if title_node.nil?
|
858
|
-
title_node = XPath.first(channel_node, "dc:title",
|
859
|
-
FEED_TOOLS_NAMESPACES)
|
860
|
-
end
|
861
|
-
if title_node.nil?
|
862
|
-
title_node = XPath.first(channel_node, "dc:title")
|
863
|
-
end
|
864
|
-
if title_node.nil?
|
865
|
-
title_node = XPath.first(channel_node, "TITLE")
|
866
|
-
end
|
867
|
-
end
|
925
|
+
repair_entities = false
|
926
|
+
title_node = try_xpaths(self.channel_node, [
|
927
|
+
"atom10:title",
|
928
|
+
"atom03:title",
|
929
|
+
"atom:title",
|
930
|
+
"title",
|
931
|
+
"dc:title"
|
932
|
+
])
|
868
933
|
if title_node.nil?
|
869
934
|
return nil
|
870
935
|
end
|
871
|
-
title_type =
|
872
|
-
|
873
|
-
|
936
|
+
title_type = try_xpaths(title_node, "@type",
|
937
|
+
:select_result_value => true)
|
938
|
+
title_mode = try_xpaths(title_node, "@mode",
|
939
|
+
:select_result_value => true)
|
940
|
+
title_encoding = try_xpaths(title_node, "@encoding",
|
941
|
+
:select_result_value => true)
|
874
942
|
|
875
943
|
# Note that we're checking for misuse of type, mode and encoding here
|
876
944
|
if title_type == "base64" || title_mode == "base64" ||
|
@@ -895,7 +963,7 @@ module FeedTools
|
|
895
963
|
@title.gsub!(/>\n</, "><")
|
896
964
|
@title.gsub!(/\n/, " ")
|
897
965
|
@title.strip!
|
898
|
-
@title = nil if @title
|
966
|
+
@title = nil if @title.blank?
|
899
967
|
self.cache_object.title = @title unless self.cache_object.nil?
|
900
968
|
end
|
901
969
|
return @title
|
@@ -907,124 +975,98 @@ module FeedTools
|
|
907
975
|
self.cache_object.title = new_title unless self.cache_object.nil?
|
908
976
|
end
|
909
977
|
|
910
|
-
# Returns the feed
|
911
|
-
def
|
912
|
-
if @
|
913
|
-
|
914
|
-
|
915
|
-
|
916
|
-
|
917
|
-
|
918
|
-
|
919
|
-
|
920
|
-
|
921
|
-
|
922
|
-
|
923
|
-
|
924
|
-
|
925
|
-
|
926
|
-
|
927
|
-
|
928
|
-
|
929
|
-
|
930
|
-
|
931
|
-
|
932
|
-
description_node = XPath.first(channel_node, "info")
|
933
|
-
end
|
934
|
-
if description_node.nil?
|
935
|
-
description_node = XPath.first(channel_node, "content:encoded")
|
936
|
-
end
|
937
|
-
if description_node.nil?
|
938
|
-
description_node = XPath.first(channel_node, "content:encoded",
|
939
|
-
FEED_TOOLS_NAMESPACES)
|
940
|
-
end
|
941
|
-
if description_node.nil?
|
942
|
-
description_node = XPath.first(root_node, "encoded")
|
943
|
-
end
|
944
|
-
if description_node.nil?
|
945
|
-
description_node = XPath.first(channel_node, "content")
|
946
|
-
end
|
947
|
-
if description_node.nil?
|
948
|
-
description_node = XPath.first(channel_node, "xhtml:body")
|
949
|
-
end
|
950
|
-
if description_node.nil?
|
951
|
-
description_node = XPath.first(channel_node, "body")
|
952
|
-
end
|
953
|
-
if description_node.nil?
|
954
|
-
description_node = XPath.first(channel_node, "blurb")
|
955
|
-
end
|
956
|
-
end
|
957
|
-
if description_node.nil?
|
978
|
+
# Returns the feed subtitle
|
979
|
+
def subtitle
|
980
|
+
if @subtitle.nil?
|
981
|
+
repair_entities = false
|
982
|
+
subtitle_node = try_xpaths(self.channel_node, [
|
983
|
+
"atom10:subtitle",
|
984
|
+
"subtitle",
|
985
|
+
"atom03:tagline",
|
986
|
+
"tagline",
|
987
|
+
"description",
|
988
|
+
"summary",
|
989
|
+
"abstract",
|
990
|
+
"ABSTRACT",
|
991
|
+
"content:encoded",
|
992
|
+
"encoded",
|
993
|
+
"content",
|
994
|
+
"xhtml:body",
|
995
|
+
"body",
|
996
|
+
"blurb",
|
997
|
+
"info"
|
998
|
+
])
|
999
|
+
if subtitle_node.nil?
|
958
1000
|
return nil
|
959
1001
|
end
|
960
|
-
|
961
|
-
|
962
|
-
|
1002
|
+
subtitle_type = try_xpaths(subtitle_node, "@type",
|
1003
|
+
:select_result_value => true)
|
1004
|
+
subtitle_mode = try_xpaths(subtitle_node, "@mode",
|
1005
|
+
:select_result_value => true)
|
1006
|
+
subtitle_encoding = try_xpaths(subtitle_node, "@encoding",
|
1007
|
+
:select_result_value => true)
|
963
1008
|
|
964
1009
|
# Note that we're checking for misuse of type, mode and encoding here
|
965
|
-
if
|
966
|
-
@
|
1010
|
+
if !subtitle_encoding.blank?
|
1011
|
+
@subtitle =
|
967
1012
|
"[Embedded data objects are not currently supported.]"
|
968
|
-
elsif
|
969
|
-
@
|
970
|
-
elsif
|
971
|
-
|
972
|
-
@
|
973
|
-
elsif
|
974
|
-
|
975
|
-
|
976
|
-
@
|
977
|
-
elsif
|
978
|
-
@
|
979
|
-
|
1013
|
+
elsif subtitle_node.cdatas.size > 0
|
1014
|
+
@subtitle = subtitle_node.cdatas.first.value
|
1015
|
+
elsif subtitle_type == "base64" || subtitle_mode == "base64" ||
|
1016
|
+
subtitle_encoding == "base64"
|
1017
|
+
@subtitle = Base64.decode64(subtitle_node.inner_xml.strip)
|
1018
|
+
elsif subtitle_type == "xhtml" || subtitle_mode == "xhtml" ||
|
1019
|
+
subtitle_type == "xml" || subtitle_mode == "xml" ||
|
1020
|
+
subtitle_type == "application/xhtml+xml"
|
1021
|
+
@subtitle = subtitle_node.inner_xml
|
1022
|
+
elsif subtitle_type == "escaped" || subtitle_mode == "escaped"
|
1023
|
+
@subtitle = FeedTools.unescape_entities(
|
1024
|
+
subtitle_node.inner_xml)
|
980
1025
|
else
|
981
|
-
@
|
1026
|
+
@subtitle = subtitle_node.inner_xml
|
982
1027
|
repair_entities = true
|
983
1028
|
end
|
984
|
-
if @
|
985
|
-
@
|
986
|
-
@description = "" if @description.nil?
|
1029
|
+
if @subtitle.blank?
|
1030
|
+
@subtitle = self.itunes_summary
|
987
1031
|
end
|
988
|
-
if @
|
989
|
-
@
|
990
|
-
@description = "" if @description.nil?
|
1032
|
+
if @subtitle.blank?
|
1033
|
+
@subtitle = self.itunes_subtitle
|
991
1034
|
end
|
992
1035
|
|
993
|
-
unless @
|
994
|
-
@
|
995
|
-
@
|
996
|
-
@
|
1036
|
+
unless @subtitle.blank?
|
1037
|
+
@subtitle = FeedTools.sanitize_html(@subtitle, :strip)
|
1038
|
+
@subtitle = FeedTools.unescape_entities(@subtitle) if repair_entities
|
1039
|
+
@subtitle = FeedTools.tidy_html(@subtitle)
|
997
1040
|
end
|
998
1041
|
|
999
|
-
@
|
1000
|
-
@
|
1042
|
+
@subtitle = @subtitle.strip unless @subtitle.nil?
|
1043
|
+
@subtitle = nil if @subtitle.blank?
|
1001
1044
|
end
|
1002
|
-
return @
|
1045
|
+
return @subtitle
|
1003
1046
|
end
|
1004
1047
|
|
1005
|
-
# Sets the feed
|
1006
|
-
def
|
1007
|
-
@
|
1048
|
+
# Sets the feed subtitle
|
1049
|
+
def subtitle=(new_subtitle)
|
1050
|
+
@subtitle = new_subtitle
|
1008
1051
|
end
|
1009
1052
|
|
1010
1053
|
# Returns the contents of the itunes:summary element
|
1011
1054
|
def itunes_summary
|
1012
1055
|
if @itunes_summary.nil?
|
1013
|
-
|
1014
|
-
|
1015
|
-
"itunes:summary/text()"
|
1016
|
-
|
1017
|
-
|
1018
|
-
|
1019
|
-
|
1020
|
-
|
1021
|
-
|
1022
|
-
|
1023
|
-
|
1056
|
+
@itunes_summary = select_not_blank([
|
1057
|
+
try_xpaths(self.channel_node, [
|
1058
|
+
"itunes:summary/text()"
|
1059
|
+
]),
|
1060
|
+
try_xpaths(self.root_node, [
|
1061
|
+
"itunes:summary/text()"
|
1062
|
+
])
|
1063
|
+
])
|
1064
|
+
unless @itunes_summary.blank?
|
1065
|
+
@itunes_summary = FeedTools.unescape_entities(@itunes_summary)
|
1066
|
+
@itunes_summary = FeedTools.sanitize_html(@itunes_summary)
|
1067
|
+
else
|
1024
1068
|
@itunes_summary = nil
|
1025
1069
|
end
|
1026
|
-
@itunes_summary =
|
1027
|
-
FeedTools.sanitize_html(@itunes_summary) unless @itunes_summary.nil?
|
1028
1070
|
end
|
1029
1071
|
return @itunes_summary
|
1030
1072
|
end
|
@@ -1037,21 +1079,19 @@ module FeedTools
|
|
1037
1079
|
# Returns the contents of the itunes:subtitle element
|
1038
1080
|
def itunes_subtitle
|
1039
1081
|
if @itunes_subtitle.nil?
|
1040
|
-
|
1041
|
-
|
1042
|
-
"itunes:subtitle/text()"
|
1043
|
-
|
1044
|
-
|
1045
|
-
|
1046
|
-
|
1047
|
-
|
1048
|
-
|
1049
|
-
|
1050
|
-
if @itunes_subtitle == ""
|
1051
|
-
@itunes_subtitle = nil
|
1052
|
-
end
|
1053
|
-
unless @itunes_subtitle.nil?
|
1082
|
+
@itunes_subtitle = select_not_blank([
|
1083
|
+
try_xpaths(self.channel_node, [
|
1084
|
+
"itunes:subtitle/text()"
|
1085
|
+
]),
|
1086
|
+
try_xpaths(self.root_node, [
|
1087
|
+
"itunes:subtitle/text()"
|
1088
|
+
])
|
1089
|
+
])
|
1090
|
+
unless @itunes_subtitle.blank?
|
1091
|
+
@itunes_subtitle = FeedTools.unescape_entities(@itunes_subtitle)
|
1054
1092
|
@itunes_subtitle = FeedTools.sanitize_html(@itunes_subtitle)
|
1093
|
+
else
|
1094
|
+
@itunes_subtitle = nil
|
1055
1095
|
end
|
1056
1096
|
end
|
1057
1097
|
return @itunes_subtitle
|
@@ -1065,43 +1105,80 @@ module FeedTools
|
|
1065
1105
|
# Returns the feed link
|
1066
1106
|
def link
|
1067
1107
|
if @link.nil?
|
1068
|
-
|
1069
|
-
|
1070
|
-
|
1071
|
-
|
1072
|
-
|
1073
|
-
|
1074
|
-
|
1075
|
-
|
1076
|
-
|
1077
|
-
|
1078
|
-
|
1079
|
-
|
1080
|
-
|
1081
|
-
|
1082
|
-
|
1083
|
-
|
1084
|
-
|
1085
|
-
|
1086
|
-
if
|
1087
|
-
@link = XPath.first(channel_node, "a/@href").to_s
|
1088
|
-
end
|
1089
|
-
if @link == ""
|
1090
|
-
@link = XPath.first(channel_node, "A/@HREF").to_s
|
1091
|
-
end
|
1092
|
-
end
|
1093
|
-
if @link == "" || @link.nil?
|
1094
|
-
if FeedTools.is_uri? self.guid
|
1108
|
+
@link = try_xpaths(self.channel_node, [
|
1109
|
+
"atom10:link[@type='application/xhtml+xml']/@href",
|
1110
|
+
"atom10:link[@type='text/html']/@href",
|
1111
|
+
"atom10:link[@rel='alternate']/@href",
|
1112
|
+
"atom03:link[@type='application/xhtml+xml']/@href",
|
1113
|
+
"atom03:link[@type='text/html']/@href",
|
1114
|
+
"atom03:link[@rel='alternate']/@href",
|
1115
|
+
"atom:link[@type='application/xhtml+xml']/@href",
|
1116
|
+
"atom:link[@type='text/html']/@href",
|
1117
|
+
"atom:link[@rel='alternate']/@href",
|
1118
|
+
"link[@type='application/xhtml+xml']/@href",
|
1119
|
+
"link[@type='text/html']/@href",
|
1120
|
+
"link[@rel='alternate']/@href",
|
1121
|
+
"link/text()",
|
1122
|
+
"@href",
|
1123
|
+
"a/@href"
|
1124
|
+
], :select_result_value => true)
|
1125
|
+
if @link.blank?
|
1126
|
+
if FeedTools.is_uri?(self.guid)
|
1095
1127
|
@link = self.guid
|
1096
1128
|
end
|
1097
1129
|
end
|
1098
|
-
if @link
|
1099
|
-
# Technically, we shouldn't use the base attribute for this, but
|
1100
|
-
# is missing, it's already a given that we're
|
1101
|
-
# always pray it's correct.
|
1130
|
+
if @link.blank? && channel_node != nil
|
1131
|
+
# Technically, we shouldn't use the base attribute for this, but
|
1132
|
+
# if the href attribute is missing, it's already a given that we're
|
1133
|
+
# looking at a messed up CDF file. We can always pray it's correct.
|
1102
1134
|
@link = XPath.first(channel_node, "@base").to_s
|
1103
1135
|
end
|
1104
|
-
|
1136
|
+
if !@link.blank?
|
1137
|
+
@link = FeedTools.unescape_entities(@link)
|
1138
|
+
end
|
1139
|
+
if @link.blank?
|
1140
|
+
link_node = try_xpaths(self.channel_node, [
|
1141
|
+
"atom10:link",
|
1142
|
+
"atom03:link",
|
1143
|
+
"atom:link",
|
1144
|
+
"link"
|
1145
|
+
])
|
1146
|
+
if link_node != nil
|
1147
|
+
if link_node.attributes['type'].to_s =~ /^image/ ||
|
1148
|
+
link_node.attributes['type'].to_s =~ /^application/ ||
|
1149
|
+
link_node.attributes['type'].to_s =~ /xml/ ||
|
1150
|
+
link_node.attributes['rel'].to_s =~ /self/
|
1151
|
+
for child in self.channel_node
|
1152
|
+
if child.class == REXML::Element
|
1153
|
+
if child.name.downcase == "link"
|
1154
|
+
if child.attributes['type'].to_s =~ /^image/ ||
|
1155
|
+
child.attributes['type'].to_s =~ /^application/ ||
|
1156
|
+
child.attributes['type'].to_s =~ /xml/ ||
|
1157
|
+
child.attributes['rel'].to_s =~ /self/
|
1158
|
+
@link = nil
|
1159
|
+
next
|
1160
|
+
else
|
1161
|
+
@link = child.attributes['href'].to_s
|
1162
|
+
if @link.blank?
|
1163
|
+
@link = child.inner_xml
|
1164
|
+
end
|
1165
|
+
if @link.blank?
|
1166
|
+
next
|
1167
|
+
end
|
1168
|
+
break
|
1169
|
+
end
|
1170
|
+
end
|
1171
|
+
end
|
1172
|
+
end
|
1173
|
+
else
|
1174
|
+
@link = link_node.attributes['href'].to_s
|
1175
|
+
end
|
1176
|
+
end
|
1177
|
+
end
|
1178
|
+
@link = nil if @link.blank?
|
1179
|
+
if FeedTools.configurations[:url_normalization_enabled]
|
1180
|
+
@link = FeedTools.normalize_url(@link)
|
1181
|
+
end
|
1105
1182
|
unless self.cache_object.nil?
|
1106
1183
|
self.cache_object.link = @link
|
1107
1184
|
end
|
@@ -1118,87 +1195,83 @@ module FeedTools
|
|
1118
1195
|
end
|
1119
1196
|
|
1120
1197
|
# Returns the url to the icon file for this feed.
|
1121
|
-
#
|
1122
|
-
# This method uses the url from the link field in order to avoid grabbing
|
1123
|
-
# the favicon for services like feedburner.
|
1124
1198
|
def icon
|
1125
1199
|
if @icon.nil?
|
1126
|
-
icon_node =
|
1127
|
-
|
1128
|
-
|
1129
|
-
|
1130
|
-
|
1131
|
-
|
1132
|
-
|
1133
|
-
|
1134
|
-
icon_node = XPath.first(channel_node, "icon")
|
1135
|
-
end
|
1136
|
-
if icon_node.nil?
|
1137
|
-
icon_node = XPath.first(channel_node, "logo[@style='icon']")
|
1138
|
-
end
|
1139
|
-
if icon_node.nil?
|
1140
|
-
icon_node = XPath.first(channel_node, "LOGO[@STYLE='ICON']")
|
1141
|
-
end
|
1200
|
+
icon_node = try_xpaths(self.channel_node, [
|
1201
|
+
"link[@rel='icon']",
|
1202
|
+
"link[@rel='shortcut icon']",
|
1203
|
+
"link[@type='image/x-icon']",
|
1204
|
+
"icon",
|
1205
|
+
"logo[@style='icon']",
|
1206
|
+
"LOGO[@STYLE='ICON']"
|
1207
|
+
])
|
1142
1208
|
unless icon_node.nil?
|
1143
1209
|
@icon = FeedTools.unescape_entities(
|
1144
1210
|
XPath.first(icon_node, "@href").to_s)
|
1145
|
-
if @icon
|
1211
|
+
if @icon.blank?
|
1146
1212
|
@icon = FeedTools.unescape_entities(
|
1147
1213
|
XPath.first(icon_node, "text()").to_s)
|
1148
1214
|
unless FeedTools.is_uri? @icon
|
1149
|
-
@icon =
|
1215
|
+
@icon = nil
|
1150
1216
|
end
|
1151
1217
|
end
|
1152
|
-
|
1218
|
+
@icon = nil if @icon.blank?
|
1219
|
+
end
|
1220
|
+
end
|
1221
|
+
return @icon
|
1222
|
+
end
|
1223
|
+
|
1224
|
+
# Returns the favicon url for this feed.
|
1225
|
+
# This method first tries to use the url from the link field instead of
|
1226
|
+
# the feed url, in order to avoid grabbing the favicon for services like
|
1227
|
+
# feedburner.
|
1228
|
+
def favicon
|
1229
|
+
if @favicon.nil?
|
1230
|
+
if !self.link.blank?
|
1231
|
+
begin
|
1153
1232
|
link_uri = URI.parse(FeedTools.normalize_url(self.link))
|
1154
|
-
|
1155
|
-
|
1233
|
+
if link_uri.scheme == "http"
|
1234
|
+
@favicon =
|
1235
|
+
"http://" + link_uri.host + "/favicon.ico"
|
1236
|
+
end
|
1237
|
+
rescue
|
1238
|
+
@favicon = nil
|
1239
|
+
end
|
1240
|
+
if @favicon.nil? && !self.url.blank?
|
1241
|
+
begin
|
1242
|
+
feed_uri = URI.parse(FeedTools.normalize_url(self.url))
|
1243
|
+
if feed_uri.scheme == "http"
|
1244
|
+
@favicon =
|
1245
|
+
"http://" + feed_uri.host + "/favicon.ico"
|
1246
|
+
end
|
1247
|
+
rescue
|
1248
|
+
@favicon = nil
|
1249
|
+
end
|
1156
1250
|
end
|
1157
|
-
|
1251
|
+
else
|
1252
|
+
@favicon = nil
|
1158
1253
|
end
|
1159
1254
|
end
|
1160
|
-
return @
|
1255
|
+
return @favicon
|
1161
1256
|
end
|
1162
1257
|
|
1163
1258
|
# Returns the feed author
|
1164
1259
|
def author
|
1165
1260
|
if @author.nil?
|
1166
1261
|
@author = FeedTools::Feed::Author.new
|
1167
|
-
|
1168
|
-
|
1169
|
-
|
1170
|
-
|
1171
|
-
|
1172
|
-
|
1173
|
-
|
1174
|
-
|
1175
|
-
|
1176
|
-
end
|
1177
|
-
if author_node.nil?
|
1178
|
-
author_node = XPath.first(channel_node, "author")
|
1179
|
-
end
|
1180
|
-
if author_node.nil?
|
1181
|
-
author_node = XPath.first(channel_node, "managingEditor")
|
1182
|
-
end
|
1183
|
-
if author_node.nil?
|
1184
|
-
author_node = XPath.first(channel_node, "dc:author",
|
1185
|
-
FEED_TOOLS_NAMESPACES)
|
1186
|
-
end
|
1187
|
-
if author_node.nil?
|
1188
|
-
author_node = XPath.first(channel_node, "dc:author")
|
1189
|
-
end
|
1190
|
-
if author_node.nil?
|
1191
|
-
author_node = XPath.first(channel_node, "dc:creator",
|
1192
|
-
FEED_TOOLS_NAMESPACES)
|
1193
|
-
end
|
1194
|
-
if author_node.nil?
|
1195
|
-
author_node = XPath.first(channel_node, "dc:creator")
|
1196
|
-
end
|
1197
|
-
end
|
1262
|
+
author_node = try_xpaths(self.channel_node, [
|
1263
|
+
"atom10:author",
|
1264
|
+
"atom03:author",
|
1265
|
+
"atom:author",
|
1266
|
+
"author",
|
1267
|
+
"managingEditor",
|
1268
|
+
"dc:author",
|
1269
|
+
"dc:creator"
|
1270
|
+
])
|
1198
1271
|
unless author_node.nil?
|
1199
1272
|
@author.raw = FeedTools.unescape_entities(
|
1200
|
-
XPath.first(author_node, "text()").to_s)
|
1201
|
-
@author.raw = nil if @author.raw
|
1273
|
+
XPath.first(author_node, "text()").to_s).strip
|
1274
|
+
@author.raw = nil if @author.raw.blank?
|
1202
1275
|
unless @author.raw.nil?
|
1203
1276
|
raw_scan = @author.raw.scan(
|
1204
1277
|
/(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
|
@@ -1229,35 +1302,37 @@ module FeedTools
|
|
1229
1302
|
end
|
1230
1303
|
end
|
1231
1304
|
end
|
1232
|
-
|
1233
|
-
if @author.name == ""
|
1305
|
+
if @author.name.blank?
|
1234
1306
|
@author.name = FeedTools.unescape_entities(
|
1235
|
-
|
1236
|
-
|
1237
|
-
|
1238
|
-
|
1239
|
-
|
1240
|
-
end
|
1241
|
-
if @author.email == ""
|
1242
|
-
@author.email = FeedTools.unescape_entities(
|
1243
|
-
XPath.first(author_node, "email/text()").to_s)
|
1307
|
+
try_xpaths(author_node, [
|
1308
|
+
"name/text()",
|
1309
|
+
"@name"
|
1310
|
+
], :select_result_value => true)
|
1311
|
+
)
|
1244
1312
|
end
|
1245
|
-
if @author.email
|
1313
|
+
if @author.email.blank?
|
1246
1314
|
@author.email = FeedTools.unescape_entities(
|
1247
|
-
|
1315
|
+
try_xpaths(author_node, [
|
1316
|
+
"email/text()",
|
1317
|
+
"@email"
|
1318
|
+
], :select_result_value => true)
|
1319
|
+
)
|
1248
1320
|
end
|
1249
|
-
if @author.url
|
1321
|
+
if @author.url.blank?
|
1250
1322
|
@author.url = FeedTools.unescape_entities(
|
1251
|
-
|
1252
|
-
|
1253
|
-
|
1254
|
-
|
1255
|
-
|
1256
|
-
|
1257
|
-
|
1258
|
-
|
1259
|
-
|
1260
|
-
@author.
|
1323
|
+
try_xpaths(author_node, [
|
1324
|
+
"url/text()",
|
1325
|
+
"uri/text()",
|
1326
|
+
"@url",
|
1327
|
+
"@uri",
|
1328
|
+
"@href"
|
1329
|
+
], :select_result_value => true)
|
1330
|
+
)
|
1331
|
+
end
|
1332
|
+
@author.name = nil if @author.name.blank?
|
1333
|
+
@author.raw = nil if @author.raw.blank?
|
1334
|
+
@author.email = nil if @author.email.blank?
|
1335
|
+
@author.url = nil if @author.url.blank?
|
1261
1336
|
end
|
1262
1337
|
# Fallback on the itunes module if we didn't find an author name
|
1263
1338
|
begin
|
@@ -1290,15 +1365,14 @@ module FeedTools
|
|
1290
1365
|
def publisher
|
1291
1366
|
if @publisher.nil?
|
1292
1367
|
@publisher = FeedTools::Feed::Author.new
|
1368
|
+
publisher_node = try_xpaths(self.channel_node, [
|
1369
|
+
"webMaster/text()",
|
1370
|
+
"dc:publisher/text()"
|
1371
|
+
])
|
1293
1372
|
|
1294
1373
|
# Set the author name
|
1295
|
-
@publisher.raw = FeedTools.unescape_entities(
|
1296
|
-
|
1297
|
-
if @publisher.raw == ""
|
1298
|
-
@publisher.raw = FeedTools.unescape_entities(
|
1299
|
-
XPath.first(channel_node, "webMaster/text()").to_s)
|
1300
|
-
end
|
1301
|
-
unless @publisher.raw == ""
|
1374
|
+
@publisher.raw = FeedTools.unescape_entities(publisher_node.to_s)
|
1375
|
+
unless @publisher.raw.blank?
|
1302
1376
|
raw_scan = @publisher.raw.scan(
|
1303
1377
|
/(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
|
1304
1378
|
if raw_scan.nil? || raw_scan.size == 0
|
@@ -1331,10 +1405,10 @@ module FeedTools
|
|
1331
1405
|
end
|
1332
1406
|
end
|
1333
1407
|
|
1334
|
-
@publisher.name = nil if @publisher.name
|
1335
|
-
@publisher.raw = nil if @publisher.raw
|
1336
|
-
@publisher.email = nil if @publisher.email
|
1337
|
-
@publisher.url = nil if @publisher.url
|
1408
|
+
@publisher.name = nil if @publisher.name.blank?
|
1409
|
+
@publisher.raw = nil if @publisher.raw.blank?
|
1410
|
+
@publisher.email = nil if @publisher.email.blank?
|
1411
|
+
@publisher.url = nil if @publisher.url.blank?
|
1338
1412
|
end
|
1339
1413
|
return @publisher
|
1340
1414
|
end
|
@@ -1364,9 +1438,12 @@ module FeedTools
|
|
1364
1438
|
# attribute.
|
1365
1439
|
def itunes_author
|
1366
1440
|
if @itunes_author.nil?
|
1367
|
-
@itunes_author = FeedTools.unescape_entities(
|
1368
|
-
|
1369
|
-
|
1441
|
+
@itunes_author = FeedTools.unescape_entities(
|
1442
|
+
try_xpaths(self.channel_node, [
|
1443
|
+
"itunes:author/text()"
|
1444
|
+
], :select_result_value => true)
|
1445
|
+
)
|
1446
|
+
@itunes_author = nil if @itunes_author.blank?
|
1370
1447
|
end
|
1371
1448
|
return @itunes_author
|
1372
1449
|
end
|
@@ -1374,29 +1451,40 @@ module FeedTools
|
|
1374
1451
|
# Returns the feed time
|
1375
1452
|
def time
|
1376
1453
|
if @time.nil?
|
1377
|
-
|
1378
|
-
|
1379
|
-
|
1380
|
-
|
1381
|
-
|
1382
|
-
|
1383
|
-
|
1384
|
-
|
1385
|
-
|
1386
|
-
|
1387
|
-
|
1388
|
-
|
1389
|
-
|
1390
|
-
|
1391
|
-
|
1454
|
+
time_string = try_xpaths(self.channel_node, [
|
1455
|
+
"atom10:updated/text()",
|
1456
|
+
"atom03:updated/text()",
|
1457
|
+
"atom:updated/text()",
|
1458
|
+
"updated/text()",
|
1459
|
+
"atom10:modified/text()",
|
1460
|
+
"atom03:modified/text()",
|
1461
|
+
"atom:modified/text()",
|
1462
|
+
"modified/text()",
|
1463
|
+
"time/text()",
|
1464
|
+
"atom10:issued/text()",
|
1465
|
+
"atom03:issued/text()",
|
1466
|
+
"atom:issued/text()",
|
1467
|
+
"issued/text()",
|
1468
|
+
"atom10:published/text()",
|
1469
|
+
"atom03:published/text()",
|
1470
|
+
"atom:published/text()",
|
1471
|
+
"published/text()",
|
1472
|
+
"pubDate/text()",
|
1473
|
+
"dc:date/text()",
|
1474
|
+
"date/text()"
|
1475
|
+
], :select_result_value => true)
|
1392
1476
|
begin
|
1393
|
-
|
1477
|
+
unless time_string.blank?
|
1394
1478
|
@time = Time.parse(time_string).gmtime
|
1395
1479
|
else
|
1396
|
-
|
1480
|
+
if FeedTools.configurations[:timestamp_estimation_enabled]
|
1481
|
+
@time = Time.now.gmtime
|
1482
|
+
end
|
1397
1483
|
end
|
1398
1484
|
rescue
|
1399
|
-
|
1485
|
+
if FeedTools.configurations[:timestamp_estimation_enabled]
|
1486
|
+
@time = Time.now.gmtime
|
1487
|
+
end
|
1400
1488
|
end
|
1401
1489
|
end
|
1402
1490
|
return @time
|
@@ -1410,13 +1498,11 @@ module FeedTools
|
|
1410
1498
|
# Returns the feed item updated time
|
1411
1499
|
def updated
|
1412
1500
|
if @updated.nil?
|
1413
|
-
|
1414
|
-
|
1415
|
-
|
1416
|
-
|
1417
|
-
|
1418
|
-
end
|
1419
|
-
if updated_string != nil && updated_string != ""
|
1501
|
+
updated_string = try_xpaths(self.channel_node, [
|
1502
|
+
"updated/text()",
|
1503
|
+
"modified/text()"
|
1504
|
+
], :select_result_value => true)
|
1505
|
+
unless updated_string.blank?
|
1420
1506
|
@updated = Time.parse(updated_string).gmtime rescue nil
|
1421
1507
|
else
|
1422
1508
|
@updated = nil
|
@@ -1430,51 +1516,16 @@ module FeedTools
|
|
1430
1516
|
@updated = new_updated
|
1431
1517
|
end
|
1432
1518
|
|
1433
|
-
# Returns the feed item issued time
|
1434
|
-
def issued
|
1435
|
-
if @issued.nil?
|
1436
|
-
unless channel_node.nil?
|
1437
|
-
issued_string = XPath.first(channel_node, "issued/text()").to_s
|
1438
|
-
if issued_string == ""
|
1439
|
-
issued_string = XPath.first(channel_node, "pubDate/text()").to_s
|
1440
|
-
end
|
1441
|
-
if issued_string == ""
|
1442
|
-
issued_string = XPath.first(channel_node, "dc:date/text()").to_s
|
1443
|
-
end
|
1444
|
-
if issued_string == ""
|
1445
|
-
issued_string = XPath.first(channel_node, "published/text()").to_s
|
1446
|
-
end
|
1447
|
-
end
|
1448
|
-
if issued_string != nil && issued_string != ""
|
1449
|
-
@issued = Time.parse(issued_string).gmtime rescue nil
|
1450
|
-
else
|
1451
|
-
@issued = nil
|
1452
|
-
end
|
1453
|
-
end
|
1454
|
-
return @issued
|
1455
|
-
end
|
1456
|
-
|
1457
|
-
# Sets the feed item issued time
|
1458
|
-
def issued=(new_issued)
|
1459
|
-
@issued = new_issued
|
1460
|
-
end
|
1461
|
-
|
1462
1519
|
# Returns the feed item published time
|
1463
1520
|
def published
|
1464
1521
|
if @published.nil?
|
1465
|
-
|
1466
|
-
|
1467
|
-
|
1468
|
-
|
1469
|
-
|
1470
|
-
|
1471
|
-
|
1472
|
-
end
|
1473
|
-
if published_string == ""
|
1474
|
-
published_string = XPath.first(channel_node, "issued/text()").to_s
|
1475
|
-
end
|
1476
|
-
end
|
1477
|
-
if published_string != nil && published_string != ""
|
1522
|
+
published_string = try_xpaths(self.channel_node, [
|
1523
|
+
"published/text()",
|
1524
|
+
"pubDate/text()",
|
1525
|
+
"issued/text()",
|
1526
|
+
"dc:date/text()"
|
1527
|
+
], :select_result_value => true)
|
1528
|
+
unless published_string.blank?
|
1478
1529
|
@published = Time.parse(published_string).gmtime rescue nil
|
1479
1530
|
else
|
1480
1531
|
@published = nil
|
@@ -1492,28 +1543,26 @@ module FeedTools
|
|
1492
1543
|
def categories
|
1493
1544
|
if @categories.nil?
|
1494
1545
|
@categories = []
|
1495
|
-
category_nodes =
|
1496
|
-
|
1497
|
-
|
1498
|
-
|
1546
|
+
category_nodes = try_xpaths_all(self.channel_node, [
|
1547
|
+
"category",
|
1548
|
+
"dc:subject"
|
1549
|
+
])
|
1499
1550
|
unless category_nodes.nil?
|
1500
1551
|
for category_node in category_nodes
|
1501
1552
|
category = FeedTools::Feed::Category.new
|
1502
|
-
category.term =
|
1503
|
-
|
1504
|
-
|
1505
|
-
|
1506
|
-
category.term.strip! unless category.term.
|
1507
|
-
category.
|
1508
|
-
|
1509
|
-
category.label.strip! unless category.label.
|
1510
|
-
category.
|
1511
|
-
|
1512
|
-
|
1513
|
-
|
1514
|
-
|
1515
|
-
category.scheme.strip! unless category.scheme.nil?
|
1516
|
-
category.scheme = nil if category.scheme == ""
|
1553
|
+
category.term = try_xpaths(category_node, [
|
1554
|
+
"@term",
|
1555
|
+
"text()"
|
1556
|
+
], :select_result_value => true)
|
1557
|
+
category.term.strip! unless category.term.blank?
|
1558
|
+
category.label = try_xpaths(category_node, ["@label"],
|
1559
|
+
:select_result_value => true)
|
1560
|
+
category.label.strip! unless category.label.blank?
|
1561
|
+
category.scheme = try_xpaths(category_node, [
|
1562
|
+
"@scheme",
|
1563
|
+
"@domain"
|
1564
|
+
], :select_result_value => true)
|
1565
|
+
category.scheme.strip! unless category.scheme.blank?
|
1517
1566
|
@categories << category
|
1518
1567
|
end
|
1519
1568
|
end
|
@@ -1525,55 +1574,61 @@ module FeedTools
|
|
1525
1574
|
def images
|
1526
1575
|
if @images.nil?
|
1527
1576
|
@images = []
|
1528
|
-
|
1529
|
-
|
1530
|
-
|
1531
|
-
|
1532
|
-
|
1533
|
-
|
1534
|
-
|
1535
|
-
|
1536
|
-
|
1537
|
-
|
1538
|
-
|
1539
|
-
|
1540
|
-
|
1541
|
-
|
1542
|
-
|
1543
|
-
|
1544
|
-
|
1545
|
-
|
1546
|
-
|
1547
|
-
|
1548
|
-
|
1577
|
+
image_nodes = try_xpaths_all(self.channel_node, [
|
1578
|
+
"image",
|
1579
|
+
"logo",
|
1580
|
+
"atom10:link",
|
1581
|
+
"atom03:link",
|
1582
|
+
"atom:link",
|
1583
|
+
"link"
|
1584
|
+
])
|
1585
|
+
unless image_nodes.blank?
|
1586
|
+
for image_node in image_nodes
|
1587
|
+
image = FeedTools::Feed::Image.new
|
1588
|
+
image.url = try_xpaths(image_node, [
|
1589
|
+
"url/text()",
|
1590
|
+
"@rdf:resource"
|
1591
|
+
], :select_result_value => true)
|
1592
|
+
if image.url.blank? && (image_node.name == "logo" ||
|
1593
|
+
(image_node.attributes['type'].to_s =~ /^image/) == 0)
|
1594
|
+
image.url = try_xpaths(image_node, [
|
1595
|
+
"@atom10:href",
|
1596
|
+
"@atom03:href",
|
1597
|
+
"@atom:href",
|
1598
|
+
"@href"
|
1599
|
+
], :select_result_value => true)
|
1600
|
+
if image.url == self.link && image.url != nil
|
1601
|
+
image.url = nil
|
1549
1602
|
end
|
1550
|
-
if image.url == "" && image_node.name == "LOGO"
|
1551
|
-
image.url = XPath.first(image_node, "@HREF").to_s
|
1552
|
-
end
|
1553
|
-
image.url.strip! unless image.url.nil?
|
1554
|
-
image.url = nil if image.url == ""
|
1555
|
-
image.title = XPath.first(image_node, "title/text()").to_s
|
1556
|
-
image.title.strip! unless image.title.nil?
|
1557
|
-
image.title = nil if image.title == ""
|
1558
|
-
image.description =
|
1559
|
-
XPath.first(image_node, "description/text()").to_s
|
1560
|
-
image.description.strip! unless image.description.nil?
|
1561
|
-
image.description = nil if image.description == ""
|
1562
|
-
image.link = XPath.first(image_node, "link/text()").to_s
|
1563
|
-
image.link.strip! unless image.link.nil?
|
1564
|
-
image.link = nil if image.link == ""
|
1565
|
-
image.height = XPath.first(image_node, "height/text()").to_s.to_i
|
1566
|
-
image.height = nil if image.height <= 0
|
1567
|
-
image.width = XPath.first(image_node, "width/text()").to_s.to_i
|
1568
|
-
image.width = nil if image.width <= 0
|
1569
|
-
image.style = XPath.first(image_node, "@style").to_s.downcase
|
1570
|
-
if image.style == ""
|
1571
|
-
image.style = XPath.first(image_node, "@STYLE").to_s.downcase
|
1572
|
-
end
|
1573
|
-
image.style.strip! unless image.style.nil?
|
1574
|
-
image.style = nil if image.style == ""
|
1575
|
-
@images << image
|
1576
1603
|
end
|
1604
|
+
if image.url.blank? && image_node.name == "LOGO"
|
1605
|
+
image.url = try_xpaths(image_node, [
|
1606
|
+
"@href"
|
1607
|
+
], :select_result_value => true)
|
1608
|
+
end
|
1609
|
+
image.url.strip! unless image.url.nil?
|
1610
|
+
image.title = try_xpaths(image_node,
|
1611
|
+
["title/text()"], :select_result_value => true)
|
1612
|
+
image.title.strip! unless image.title.nil?
|
1613
|
+
image.description = try_xpaths(image_node,
|
1614
|
+
["description/text()"], :select_result_value => true)
|
1615
|
+
image.description.strip! unless image.description.nil?
|
1616
|
+
image.link = try_xpaths(image_node,
|
1617
|
+
["link/text()"], :select_result_value => true)
|
1618
|
+
image.link.strip! unless image.link.nil?
|
1619
|
+
image.height = try_xpaths(image_node,
|
1620
|
+
["height/text()"], :select_result_value => true).to_i
|
1621
|
+
image.height = nil if image.height <= 0
|
1622
|
+
image.width = try_xpaths(image_node,
|
1623
|
+
["width/text()"], :select_result_value => true).to_i
|
1624
|
+
image.width = nil if image.width <= 0
|
1625
|
+
image.style = try_xpaths(image_node, [
|
1626
|
+
"style/text()",
|
1627
|
+
"@style"
|
1628
|
+
], :select_result_value => true)
|
1629
|
+
image.style.strip! unless image.style.nil?
|
1630
|
+
image.style.downcase! unless image.style.nil?
|
1631
|
+
@images << image unless image.url.nil?
|
1577
1632
|
end
|
1578
1633
|
end
|
1579
1634
|
end
|
@@ -1584,20 +1639,20 @@ module FeedTools
|
|
1584
1639
|
def text_input
|
1585
1640
|
if @text_input.nil?
|
1586
1641
|
@text_input = FeedTools::Feed::TextInput.new
|
1587
|
-
text_input_node =
|
1642
|
+
text_input_node = try_xpaths(self.channel_node, ["textInput"])
|
1588
1643
|
unless text_input_node.nil?
|
1589
1644
|
@text_input.title =
|
1590
|
-
|
1591
|
-
|
1645
|
+
try_xpaths(text_input_node, ["title/text()"],
|
1646
|
+
:select_result_value => true)
|
1592
1647
|
@text_input.description =
|
1593
|
-
|
1594
|
-
|
1648
|
+
try_xpaths(text_input_node, ["description/text()"],
|
1649
|
+
:select_result_value => true)
|
1595
1650
|
@text_input.link =
|
1596
|
-
|
1597
|
-
|
1651
|
+
try_xpaths(text_input_node, ["link/text()"],
|
1652
|
+
:select_result_value => true)
|
1598
1653
|
@text_input.name =
|
1599
|
-
|
1600
|
-
|
1654
|
+
try_xpaths(text_input_node, ["name/text()"],
|
1655
|
+
:select_result_value => true)
|
1601
1656
|
end
|
1602
1657
|
end
|
1603
1658
|
return @text_input
|
@@ -1606,43 +1661,28 @@ module FeedTools
|
|
1606
1661
|
# Returns the feed's copyright information
|
1607
1662
|
def copyright
|
1608
1663
|
if @copyright.nil?
|
1609
|
-
|
1610
|
-
|
1611
|
-
|
1612
|
-
|
1613
|
-
|
1614
|
-
|
1615
|
-
|
1616
|
-
|
1617
|
-
|
1618
|
-
|
1619
|
-
end
|
1620
|
-
if copyright_node.nil?
|
1621
|
-
copyright_node = XPath.first(channel_node, "copyright",
|
1622
|
-
FEED_TOOLS_NAMESPACES)
|
1623
|
-
end
|
1624
|
-
if copyright_node.nil?
|
1625
|
-
copyright_node = XPath.first(channel_node, "atom03:copyright",
|
1626
|
-
FEED_TOOLS_NAMESPACES)
|
1627
|
-
end
|
1628
|
-
if copyright_node.nil?
|
1629
|
-
copyright_node = XPath.first(channel_node, "atom10:copyright",
|
1630
|
-
FEED_TOOLS_NAMESPACES)
|
1631
|
-
end
|
1632
|
-
if copyright_node.nil?
|
1633
|
-
copyright_node = XPath.first(channel_node, "copyrights",
|
1634
|
-
FEED_TOOLS_NAMESPACES)
|
1635
|
-
end
|
1636
|
-
end
|
1664
|
+
repair_entities = false
|
1665
|
+
copyright_node = try_xpaths(self.channel_node, [
|
1666
|
+
"atom10:copyright",
|
1667
|
+
"atom03:copyright",
|
1668
|
+
"atom:copyright",
|
1669
|
+
"copyright",
|
1670
|
+
"copyrights",
|
1671
|
+
"dc:rights",
|
1672
|
+
"rights"
|
1673
|
+
])
|
1637
1674
|
if copyright_node.nil?
|
1638
1675
|
return nil
|
1639
1676
|
end
|
1640
|
-
copyright_type =
|
1641
|
-
|
1642
|
-
|
1677
|
+
copyright_type = try_xpaths(copyright_node, "@type",
|
1678
|
+
:select_result_value => true)
|
1679
|
+
copyright_mode = try_xpaths(copyright_node, "@mode",
|
1680
|
+
:select_result_value => true)
|
1681
|
+
copyright_encoding = try_xpaths(copyright_node, "@encoding",
|
1682
|
+
:select_result_value => true)
|
1643
1683
|
|
1644
1684
|
# Note that we're checking for misuse of type, mode and encoding here
|
1645
|
-
if copyright_encoding
|
1685
|
+
if !copyright_encoding.blank?
|
1646
1686
|
@copyright =
|
1647
1687
|
"[Embedded data objects are not currently supported.]"
|
1648
1688
|
elsif copyright_node.cdatas.size > 0
|
@@ -1669,7 +1709,7 @@ module FeedTools
|
|
1669
1709
|
end
|
1670
1710
|
|
1671
1711
|
@copyright = @copyright.strip unless @copyright.nil?
|
1672
|
-
@copyright = nil if @copyright
|
1712
|
+
@copyright = nil if @copyright.blank?
|
1673
1713
|
end
|
1674
1714
|
return @copyright
|
1675
1715
|
end
|
@@ -1684,9 +1724,11 @@ module FeedTools
|
|
1684
1724
|
if @time_to_live.nil?
|
1685
1725
|
unless channel_node.nil?
|
1686
1726
|
# get the feed time to live from the xml document
|
1687
|
-
update_frequency =
|
1688
|
-
|
1689
|
-
|
1727
|
+
update_frequency = try_xpaths(self.channel_node,
|
1728
|
+
["syn:updateFrequency/text()"], :select_result_value => true)
|
1729
|
+
if !update_frequency.blank?
|
1730
|
+
update_period = try_xpaths(self.channel_node,
|
1731
|
+
["syn:updatePeriod/text()"], :select_result_value => true)
|
1690
1732
|
if update_period == "daily"
|
1691
1733
|
@time_to_live = update_frequency.to_i.day
|
1692
1734
|
elsif update_period == "weekly"
|
@@ -1702,9 +1744,11 @@ module FeedTools
|
|
1702
1744
|
end
|
1703
1745
|
if @time_to_live.nil?
|
1704
1746
|
# usually expressed in minutes
|
1705
|
-
update_frequency =
|
1706
|
-
|
1707
|
-
|
1747
|
+
update_frequency = try_xpaths(self.channel_node, ["ttl/text()"],
|
1748
|
+
:select_result_value => true)
|
1749
|
+
if !update_frequency.blank?
|
1750
|
+
update_span = try_xpaths(self.channel_node, ["ttl/@span"],
|
1751
|
+
:select_result_value => true)
|
1708
1752
|
if update_span == "seconds"
|
1709
1753
|
@time_to_live = update_frequency.to_i
|
1710
1754
|
elsif update_span == "minutes"
|
@@ -1719,19 +1763,6 @@ module FeedTools
|
|
1719
1763
|
@time_to_live = update_frequency.to_i.month
|
1720
1764
|
elsif update_span == "years"
|
1721
1765
|
@time_to_live = update_frequency.to_i.year
|
1722
|
-
elsif update_frequency.to_i >= 3000
|
1723
|
-
# Normally, this should default to minutes, but realistically,
|
1724
|
-
# if they meant minutes, you're rarely going to see a value
|
1725
|
-
# higher than 120. If we see >= 3000, we're either dealing
|
1726
|
-
# with a stupid pseudo-spec that decided to use seconds, or
|
1727
|
-
# we're looking at someone who only has weekly updated
|
1728
|
-
# content. Worst case, we misreport the time, and we update
|
1729
|
-
# too often. Best case, we avoid accidentally updating the
|
1730
|
-
# feed only once a year. In the interests of being pragmatic,
|
1731
|
-
# and since the problem we avoid is a far greater one than
|
1732
|
-
# the one we cause, just run the check and hope no one
|
1733
|
-
# actually gets hurt.
|
1734
|
-
@time_to_live = update_frequency.to_i
|
1735
1766
|
else
|
1736
1767
|
@time_to_live = update_frequency.to_i.minute
|
1737
1768
|
end
|
@@ -1740,7 +1771,7 @@ module FeedTools
|
|
1740
1771
|
if @time_to_live.nil?
|
1741
1772
|
@time_to_live = 0
|
1742
1773
|
update_frequency_days =
|
1743
|
-
XPath.first(channel_node, "
|
1774
|
+
XPath.first(channel_node, "SCHEDULE/INTERVALTIME/@DAY").to_s
|
1744
1775
|
update_frequency_hours =
|
1745
1776
|
XPath.first(channel_node, "schedule/intervaltime/@hour").to_s
|
1746
1777
|
update_frequency_minutes =
|
@@ -1769,6 +1800,10 @@ module FeedTools
|
|
1769
1800
|
if @time_to_live.nil? || @time_to_live == 0
|
1770
1801
|
# Default to one hour
|
1771
1802
|
@time_to_live = 1.hour
|
1803
|
+
elsif FeedTools.configurations[:max_ttl] != nil &&
|
1804
|
+
FeedTools.configurations[:max_ttl] != 0 &&
|
1805
|
+
@time_to_live >= FeedTools.configurations[:max_ttl].to_i
|
1806
|
+
@time_to_live = FeedTools.configurations[:max_ttl].to_i
|
1772
1807
|
end
|
1773
1808
|
@time_to_live = @time_to_live.round
|
1774
1809
|
return @time_to_live
|
@@ -1784,20 +1819,21 @@ module FeedTools
|
|
1784
1819
|
def cloud
|
1785
1820
|
if @cloud.nil?
|
1786
1821
|
@cloud = FeedTools::Feed::Cloud.new
|
1787
|
-
@cloud.domain =
|
1788
|
-
|
1789
|
-
@cloud.
|
1822
|
+
@cloud.domain = try_xpaths(self.channel_node, ["cloud/@domain"],
|
1823
|
+
:select_result_value => true)
|
1824
|
+
@cloud.port = try_xpaths(self.channel_node, ["cloud/@port"],
|
1825
|
+
:select_result_value => true)
|
1826
|
+
@cloud.path = try_xpaths(self.channel_node, ["cloud/@path"],
|
1827
|
+
:select_result_value => true)
|
1790
1828
|
@cloud.register_procedure =
|
1791
|
-
|
1829
|
+
try_xpaths(self.channel_node, ["cloud/@registerProcedure"],
|
1830
|
+
:select_result_value => true)
|
1792
1831
|
@cloud.protocol =
|
1793
|
-
|
1794
|
-
|
1795
|
-
@cloud.
|
1796
|
-
@cloud.port = @cloud.port.to_i
|
1832
|
+
try_xpaths(self.channel_node, ["cloud/@protocol"],
|
1833
|
+
:select_result_value => true)
|
1834
|
+
@cloud.protocol.downcase unless @cloud.protocol.nil?
|
1835
|
+
@cloud.port = @cloud.port.to_s.to_i
|
1797
1836
|
@cloud.port = nil if @cloud.port == 0
|
1798
|
-
@cloud.path = nil if @cloud.path == ""
|
1799
|
-
@cloud.register_procedure = nil if @cloud.register_procedure == ""
|
1800
|
-
@cloud.protocol = nil if @cloud.protocol == ""
|
1801
1837
|
end
|
1802
1838
|
return @cloud
|
1803
1839
|
end
|
@@ -1810,9 +1846,9 @@ module FeedTools
|
|
1810
1846
|
# Returns the feed generator
|
1811
1847
|
def generator
|
1812
1848
|
if @generator.nil?
|
1813
|
-
@generator =
|
1814
|
-
|
1815
|
-
@generator =
|
1849
|
+
@generator = try_xpaths(self.channel_node, ["generator/text()"],
|
1850
|
+
:select_result_value => true)
|
1851
|
+
@generator = FeedTools.strip_html(@generator) unless @generator.nil?
|
1816
1852
|
end
|
1817
1853
|
return @generator
|
1818
1854
|
end
|
@@ -1825,9 +1861,9 @@ module FeedTools
|
|
1825
1861
|
# Returns the feed docs
|
1826
1862
|
def docs
|
1827
1863
|
if @docs.nil?
|
1828
|
-
@docs =
|
1829
|
-
|
1830
|
-
@docs =
|
1864
|
+
@docs = try_xpaths(self.channel_node, ["docs/text()"],
|
1865
|
+
:select_result_value => true)
|
1866
|
+
@docs = FeedTools.strip_html(@docs) unless @docs.nil?
|
1831
1867
|
end
|
1832
1868
|
return @docs
|
1833
1869
|
end
|
@@ -1840,23 +1876,23 @@ module FeedTools
|
|
1840
1876
|
# Returns the feed language
|
1841
1877
|
def language
|
1842
1878
|
if @language.nil?
|
1843
|
-
|
1844
|
-
|
1845
|
-
|
1846
|
-
|
1847
|
-
|
1848
|
-
|
1849
|
-
|
1850
|
-
|
1851
|
-
|
1852
|
-
@
|
1853
|
-
|
1854
|
-
|
1855
|
-
|
1879
|
+
@language = select_not_blank([
|
1880
|
+
try_xpaths(self.channel_node, [
|
1881
|
+
"language/text()",
|
1882
|
+
"dc:language/text()",
|
1883
|
+
"@dc:language",
|
1884
|
+
"@xml:lang",
|
1885
|
+
"xml:lang/text()"
|
1886
|
+
], :select_result_value => true),
|
1887
|
+
try_xpaths(self.root_node, [
|
1888
|
+
"@xml:lang",
|
1889
|
+
"xml:lang/text()"
|
1890
|
+
], :select_result_value => true)
|
1891
|
+
])
|
1892
|
+
if @language.blank?
|
1856
1893
|
@language = "en-us"
|
1857
1894
|
end
|
1858
1895
|
@language = @language.downcase
|
1859
|
-
@language = nil if @language == ""
|
1860
1896
|
end
|
1861
1897
|
return @language
|
1862
1898
|
end
|
@@ -1869,12 +1905,11 @@ module FeedTools
|
|
1869
1905
|
# Returns true if this feed contains explicit material.
|
1870
1906
|
def explicit?
|
1871
1907
|
if @explicit.nil?
|
1872
|
-
|
1873
|
-
|
1874
|
-
|
1875
|
-
|
1876
|
-
|
1877
|
-
"itunes:explicit/text()").to_s.downcase == "true"
|
1908
|
+
explicit_string = try_xpaths(self.channel_node, [
|
1909
|
+
"media:adult/text()",
|
1910
|
+
"itunes:explicit/text()"
|
1911
|
+
], :select_result_value => true)
|
1912
|
+
if explicit_string == "true" || explicit_string == "yes"
|
1878
1913
|
@explicit = true
|
1879
1914
|
else
|
1880
1915
|
@explicit = false
|
@@ -1888,66 +1923,68 @@ module FeedTools
|
|
1888
1923
|
@explicit = (new_explicit ? true : false)
|
1889
1924
|
end
|
1890
1925
|
|
1891
|
-
# Returns the feed
|
1892
|
-
def
|
1893
|
-
if @
|
1894
|
-
|
1895
|
-
|
1896
|
-
|
1897
|
-
|
1898
|
-
|
1899
|
-
|
1900
|
-
|
1901
|
-
|
1902
|
-
|
1903
|
-
|
1904
|
-
|
1905
|
-
|
1906
|
-
|
1907
|
-
|
1908
|
-
|
1909
|
-
|
1910
|
-
|
1911
|
-
|
1926
|
+
# Returns the feed entries
|
1927
|
+
def entries
|
1928
|
+
if @entries.blank?
|
1929
|
+
raw_entries = select_not_blank([
|
1930
|
+
try_xpaths_all(self.channel_node, [
|
1931
|
+
"atom10:entry",
|
1932
|
+
"atom03:entry",
|
1933
|
+
"atom:entry",
|
1934
|
+
"entry"
|
1935
|
+
]),
|
1936
|
+
try_xpaths_all(self.root_node, [
|
1937
|
+
"rss10:item",
|
1938
|
+
"item",
|
1939
|
+
"atom10:entry",
|
1940
|
+
"atom03:entry",
|
1941
|
+
"atom:entry",
|
1942
|
+
"entry"
|
1943
|
+
]),
|
1944
|
+
try_xpaths_all(self.channel_node, [
|
1945
|
+
"rss10:item",
|
1946
|
+
"item"
|
1947
|
+
])
|
1948
|
+
])
|
1912
1949
|
|
1913
1950
|
# create the individual feed items
|
1914
|
-
@
|
1915
|
-
|
1916
|
-
for
|
1917
|
-
|
1918
|
-
|
1919
|
-
|
1920
|
-
@
|
1951
|
+
@entries = []
|
1952
|
+
unless raw_entries.blank?
|
1953
|
+
for entry_node in raw_entries.reverse
|
1954
|
+
new_entry = FeedItem.new
|
1955
|
+
new_entry.feed_data = entry_node.to_s
|
1956
|
+
new_entry.feed_data_type = self.feed_data_type
|
1957
|
+
@entries << new_entry
|
1921
1958
|
end
|
1922
1959
|
end
|
1923
1960
|
end
|
1924
1961
|
|
1925
1962
|
# Sort the items
|
1926
|
-
@
|
1927
|
-
(b.time or Time.
|
1963
|
+
@entries = @entries.sort do |a, b|
|
1964
|
+
(b.time or Time.utc(1970)) <=> (a.time or Time.utc(1970))
|
1928
1965
|
end
|
1929
|
-
return @
|
1966
|
+
return @entries
|
1930
1967
|
end
|
1931
1968
|
|
1932
|
-
# Sets the
|
1933
|
-
def
|
1934
|
-
for
|
1935
|
-
unless
|
1969
|
+
# Sets the entries array to a new array.
|
1970
|
+
def entries=(new_entries)
|
1971
|
+
for entry in new_entries
|
1972
|
+
unless entry.kind_of? FeedTools::FeedItem
|
1936
1973
|
raise ArgumentError,
|
1937
|
-
"You should only add FeedItem objects to the
|
1974
|
+
"You should only add FeedItem objects to the entries array."
|
1938
1975
|
end
|
1939
1976
|
end
|
1940
|
-
@
|
1977
|
+
@entries = new_entries
|
1941
1978
|
end
|
1942
1979
|
|
1943
1980
|
# Syntactic sugar for appending feed items to a feed.
|
1944
|
-
def <<(
|
1945
|
-
@
|
1946
|
-
unless
|
1981
|
+
def <<(new_entry)
|
1982
|
+
@entries ||= []
|
1983
|
+
unless new_entry.kind_of? FeedTools::FeedItem
|
1947
1984
|
raise ArgumentError,
|
1948
|
-
"You should only add FeedItem objects to the
|
1985
|
+
"You should only add FeedItem objects to the entries array."
|
1949
1986
|
end
|
1950
|
-
@
|
1987
|
+
@entries << new_entry
|
1951
1988
|
end
|
1952
1989
|
|
1953
1990
|
# The time that the feed was last requested from the remote server. Nil
|
@@ -2020,11 +2057,14 @@ module FeedTools
|
|
2020
2057
|
end
|
2021
2058
|
|
2022
2059
|
# Generates xml based on the content of the feed
|
2023
|
-
def build_xml(feed_type=(self.feed_type or "
|
2024
|
-
xml_builder=Builder::XmlMarkup.new(
|
2025
|
-
|
2060
|
+
def build_xml(feed_type=(self.feed_type or "atom"), version=nil,
|
2061
|
+
xml_builder=Builder::XmlMarkup.new(
|
2062
|
+
:indent => 2, :escape_attrs => false))
|
2063
|
+
xml_builder.instruct! :xml, :version => "1.0",
|
2064
|
+
:encoding => (FeedTools.configurations[:output_encoding] or "utf-8")
|
2065
|
+
if feed_type == "rss" && (version == nil || version <= 0.0)
|
2026
2066
|
version = 1.0
|
2027
|
-
elsif feed_type == "atom" && (version == nil || version
|
2067
|
+
elsif feed_type == "atom" && (version == nil || version <= 0.0)
|
2028
2068
|
version = 1.0
|
2029
2069
|
end
|
2030
2070
|
if feed_type == "rss" && (version == 0.9 || version == 1.0 ||
|
@@ -2040,7 +2080,8 @@ module FeedTools
|
|
2040
2080
|
"xmlns:media" => FEED_TOOLS_NAMESPACES['media']) do
|
2041
2081
|
channel_attributes = {}
|
2042
2082
|
unless self.link.nil?
|
2043
|
-
channel_attributes["rdf:about"] =
|
2083
|
+
channel_attributes["rdf:about"] =
|
2084
|
+
FeedTools.escape_entities(self.link)
|
2044
2085
|
end
|
2045
2086
|
xml_builder.channel(channel_attributes) do
|
2046
2087
|
unless title.nil? || title == ""
|
@@ -2054,7 +2095,7 @@ module FeedTools
|
|
2054
2095
|
xml_builder.link
|
2055
2096
|
end
|
2056
2097
|
unless images.nil? || images.empty?
|
2057
|
-
xml_builder.image("rdf:resource" =>
|
2098
|
+
xml_builder.image("rdf:resource" => FeedTools.escape_entities(
|
2058
2099
|
images.first.url))
|
2059
2100
|
end
|
2060
2101
|
unless description.nil? || description == ""
|
@@ -2078,7 +2119,7 @@ module FeedTools
|
|
2078
2119
|
"item link field."
|
2079
2120
|
end
|
2080
2121
|
xml_builder.tag!("rdf:li", "rdf:resource" =>
|
2081
|
-
|
2122
|
+
FeedTools.escape_entities(item.link))
|
2082
2123
|
end
|
2083
2124
|
end
|
2084
2125
|
end
|
@@ -2095,20 +2136,20 @@ module FeedTools
|
|
2095
2136
|
end
|
2096
2137
|
best_image = images.first if best_image.nil?
|
2097
2138
|
xml_builder.image(
|
2098
|
-
"rdf:about" =>
|
2099
|
-
if best_image.title
|
2139
|
+
"rdf:about" => FeedTools.escape_entities(best_image.url)) do
|
2140
|
+
if !best_image.title.blank?
|
2100
2141
|
xml_builder.title(best_image.title)
|
2101
|
-
elsif self.title
|
2142
|
+
elsif !self.title.blank?
|
2102
2143
|
xml_builder.title(self.title)
|
2103
2144
|
else
|
2104
2145
|
xml_builder.title
|
2105
2146
|
end
|
2106
|
-
unless best_image.url.
|
2147
|
+
unless best_image.url.blank?
|
2107
2148
|
xml_builder.url(best_image.url)
|
2108
2149
|
end
|
2109
|
-
if best_image.link
|
2150
|
+
if !best_image.link.blank?
|
2110
2151
|
xml_builder.link(best_image.link)
|
2111
|
-
elsif self.link
|
2152
|
+
elsif !self.link.blank?
|
2112
2153
|
xml_builder.link(self.link)
|
2113
2154
|
else
|
2114
2155
|
xml_builder.link
|
@@ -2131,18 +2172,18 @@ module FeedTools
|
|
2131
2172
|
"xmlns:itunes" => FEED_TOOLS_NAMESPACES['itunes'],
|
2132
2173
|
"xmlns:media" => FEED_TOOLS_NAMESPACES['media']) do
|
2133
2174
|
xml_builder.channel do
|
2134
|
-
unless title.
|
2175
|
+
unless title.blank?
|
2135
2176
|
xml_builder.title(title)
|
2136
2177
|
end
|
2137
|
-
unless link.
|
2178
|
+
unless link.blank?
|
2138
2179
|
xml_builder.link(link)
|
2139
2180
|
end
|
2140
|
-
unless description.
|
2181
|
+
unless description.blank?
|
2141
2182
|
xml_builder.description(description)
|
2142
2183
|
end
|
2143
2184
|
xml_builder.ttl((time_to_live / 1.minute).to_s)
|
2144
2185
|
xml_builder.generator(
|
2145
|
-
|
2186
|
+
FeedTools.configurations[:generator_href])
|
2146
2187
|
build_xml_hook(feed_type, version, xml_builder)
|
2147
2188
|
unless items.nil?
|
2148
2189
|
for item in items
|
@@ -2152,53 +2193,12 @@ module FeedTools
|
|
2152
2193
|
end
|
2153
2194
|
end
|
2154
2195
|
elsif feed_type == "atom" && version == 0.3
|
2155
|
-
|
2156
|
-
return xml_builder.feed("xmlns" => FEED_TOOLS_NAMESPACES['atom03'],
|
2157
|
-
"version" => version,
|
2158
|
-
"xml:lang" => language) do
|
2159
|
-
unless title.nil? || title == ""
|
2160
|
-
xml_builder.title(title,
|
2161
|
-
"mode" => "escaped",
|
2162
|
-
"type" => "text/html")
|
2163
|
-
end
|
2164
|
-
xml_builder.author do
|
2165
|
-
unless self.author.nil? || self.author.name.nil?
|
2166
|
-
xml_builder.name(self.author.name)
|
2167
|
-
else
|
2168
|
-
xml_builder.name("n/a")
|
2169
|
-
end
|
2170
|
-
unless self.author.nil? || self.author.email.nil?
|
2171
|
-
xml_builder.email(self.author.email)
|
2172
|
-
end
|
2173
|
-
unless self.author.nil? || self.author.url.nil?
|
2174
|
-
xml_builder.url(self.author.url)
|
2175
|
-
end
|
2176
|
-
end
|
2177
|
-
unless link.nil? || link == ""
|
2178
|
-
xml_builder.link("href" => link,
|
2179
|
-
"rel" => "alternate",
|
2180
|
-
"type" => "text/html",
|
2181
|
-
"title" => title)
|
2182
|
-
end
|
2183
|
-
unless description.nil? || description == ""
|
2184
|
-
xml_builder.tagline(description,
|
2185
|
-
"mode" => "escaped",
|
2186
|
-
"type" => "text/html")
|
2187
|
-
end
|
2188
|
-
xml_builder.generator("FeedTools",
|
2189
|
-
"url" => "http://www.sporkmonger.com/projects/feedtools")
|
2190
|
-
build_xml_hook(feed_type, version, xml_builder)
|
2191
|
-
unless items.nil?
|
2192
|
-
for item in items
|
2193
|
-
item.build_xml(feed_type, version, xml_builder)
|
2194
|
-
end
|
2195
|
-
end
|
2196
|
-
end
|
2196
|
+
raise "Atom 0.3 is obsolete."
|
2197
2197
|
elsif feed_type == "atom" && version == 1.0
|
2198
2198
|
# normal atom format
|
2199
2199
|
return xml_builder.feed("xmlns" => FEED_TOOLS_NAMESPACES['atom10'],
|
2200
2200
|
"xml:lang" => language) do
|
2201
|
-
unless title.
|
2201
|
+
unless title.blank?
|
2202
2202
|
xml_builder.title(title,
|
2203
2203
|
"type" => "html")
|
2204
2204
|
end
|
@@ -2212,22 +2212,22 @@ module FeedTools
|
|
2212
2212
|
xml_builder.email(self.author.email)
|
2213
2213
|
end
|
2214
2214
|
unless self.author.nil? || self.author.url.nil?
|
2215
|
-
xml_builder.
|
2215
|
+
xml_builder.uri(self.author.url)
|
2216
2216
|
end
|
2217
2217
|
end
|
2218
|
-
unless self.url.
|
2218
|
+
unless self.url.blank?
|
2219
2219
|
xml_builder.link("href" => self.url,
|
2220
2220
|
"rel" => "self",
|
2221
2221
|
"type" => "application/atom+xml")
|
2222
2222
|
end
|
2223
|
-
unless self.link.
|
2224
|
-
xml_builder.link("href" => self.link,
|
2223
|
+
unless self.link.blank?
|
2224
|
+
xml_builder.link("href" => FeedTools.escape_entities(self.link),
|
2225
2225
|
"rel" => "alternate",
|
2226
2226
|
"type" => "text/html",
|
2227
|
-
"title" => self.title)
|
2227
|
+
"title" => FeedTools.escape_entities(self.title))
|
2228
2228
|
end
|
2229
|
-
unless description.
|
2230
|
-
xml_builder.subtitle(
|
2229
|
+
unless description.blank?
|
2230
|
+
xml_builder.subtitle(self.subtitle,
|
2231
2231
|
"type" => "html")
|
2232
2232
|
else
|
2233
2233
|
xml_builder.subtitle(FeedTools.no_content_string,
|
@@ -2242,8 +2242,8 @@ module FeedTools
|
|
2242
2242
|
else
|
2243
2243
|
xml_builder.updated(Time.now.gmtime.iso8601)
|
2244
2244
|
end
|
2245
|
-
xml_builder.generator(
|
2246
|
-
"
|
2245
|
+
xml_builder.generator(FeedTools.configurations[:generator_name] +
|
2246
|
+
" - " + FeedTools.configurations[:generator_href])
|
2247
2247
|
if self.id != nil
|
2248
2248
|
unless FeedTools.is_uri? self.id
|
2249
2249
|
if self.link != nil
|
@@ -2266,46 +2266,49 @@ module FeedTools
|
|
2266
2266
|
end
|
2267
2267
|
end
|
2268
2268
|
end
|
2269
|
+
else
|
2270
|
+
raise "Unsupported feed format/version."
|
2269
2271
|
end
|
2270
2272
|
end
|
2271
2273
|
|
2272
2274
|
# Persists the current feed state to the cache.
|
2273
2275
|
def save
|
2274
|
-
|
2275
|
-
|
2276
|
-
|
2277
|
-
|
2278
|
-
|
2279
|
-
|
2280
|
-
|
2281
|
-
|
2282
|
-
|
2283
|
-
self.
|
2284
|
-
|
2285
|
-
|
2286
|
-
|
2287
|
-
|
2288
|
-
|
2276
|
+
unless self.url =~ /^file:\/\//
|
2277
|
+
if FeedTools.feed_cache.nil?
|
2278
|
+
raise "Caching is currently disabled. Cannot save to cache."
|
2279
|
+
elsif self.url.nil?
|
2280
|
+
raise "The url field must be set to save to the cache."
|
2281
|
+
elsif self.cache_object.nil?
|
2282
|
+
raise "The cache_object is currently nil. Cannot save to cache."
|
2283
|
+
else
|
2284
|
+
self.cache_object.url = self.url
|
2285
|
+
unless self.feed_data.nil?
|
2286
|
+
self.cache_object.title = self.title
|
2287
|
+
self.cache_object.link = self.link
|
2288
|
+
self.cache_object.feed_data = self.feed_data
|
2289
|
+
self.cache_object.feed_data_type = self.feed_data_type.to_s
|
2290
|
+
end
|
2289
2291
|
self.cache_object.http_headers = self.http_headers.to_yaml
|
2292
|
+
self.cache_object.last_retrieved = self.last_retrieved
|
2293
|
+
self.cache_object.save
|
2290
2294
|
end
|
2291
|
-
self.cache_object.last_retrieved = self.last_retrieved
|
2292
|
-
self.cache_object.save
|
2293
2295
|
end
|
2294
2296
|
end
|
2295
2297
|
|
2296
|
-
alias_method :tagline, :
|
2297
|
-
alias_method :tagline=, :
|
2298
|
-
alias_method :
|
2299
|
-
alias_method :
|
2300
|
-
alias_method :abstract, :
|
2301
|
-
alias_method :abstract=, :
|
2302
|
-
alias_method :content, :
|
2303
|
-
alias_method :content=, :
|
2298
|
+
alias_method :tagline, :subtitle
|
2299
|
+
alias_method :tagline=, :subtitle=
|
2300
|
+
alias_method :description, :subtitle
|
2301
|
+
alias_method :description=, :subtitle=
|
2302
|
+
alias_method :abstract, :subtitle
|
2303
|
+
alias_method :abstract=, :subtitle=
|
2304
|
+
alias_method :content, :subtitle
|
2305
|
+
alias_method :content=, :subtitle=
|
2304
2306
|
alias_method :ttl, :time_to_live
|
2305
2307
|
alias_method :ttl=, :time_to_live=
|
2306
2308
|
alias_method :guid, :id
|
2307
2309
|
alias_method :guid=, :id=
|
2308
|
-
alias_method :
|
2310
|
+
alias_method :items, :entries
|
2311
|
+
alias_method :items=, :entries=
|
2309
2312
|
|
2310
2313
|
# passes missing methods to the cache_object
|
2311
2314
|
def method_missing(msg, *params)
|