feedtools 0.2.18 → 0.2.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -29,9 +29,11 @@ module FeedTools
29
29
  # :stopdoc:
30
30
  include REXML
31
31
  class << self
32
- include GenericHelper
32
+ include FeedTools::GenericHelper
33
33
  private :validate_options
34
34
  end
35
+ include FeedTools::GenericHelper
36
+ private :validate_options
35
37
  # :startdoc:
36
38
 
37
39
  # Represents a feed/feed item's category
@@ -143,7 +145,7 @@ module FeedTools
143
145
  @link = nil
144
146
  @last_retrieved = nil
145
147
  @time_to_live = nil
146
- @items = nil
148
+ @entries = nil
147
149
  @live = false
148
150
  end
149
151
 
@@ -175,10 +177,12 @@ module FeedTools
175
177
  # Loads the feed from the remote url if the feed has expired from the cache or cannot be
176
178
  # retrieved from the cache for some reason.
177
179
  def update!
178
- if self.http_headers.nil? && !(self.cache_object.nil?) &&
180
+ if self.http_headers.blank? && !(self.cache_object.nil?) &&
179
181
  !(self.cache_object.http_headers.nil?)
180
182
  @http_headers = YAML.load(self.cache_object.http_headers)
181
183
  @http_headers = {} unless @http_headers.kind_of? Hash
184
+ elsif self.http_headers.blank?
185
+ @http_headers = {}
182
186
  end
183
187
  if self.expired? == false
184
188
  @live = false
@@ -240,11 +244,12 @@ module FeedTools
240
244
  self.http_headers['last-modified'] unless
241
245
  self.http_headers['last-modified'].nil?
242
246
  end
243
- headers["User-Agent"] =
244
- FeedTools.user_agent unless FeedTools.user_agent.nil?
247
+ unless FeedTools.configurations[:user_agent].nil?
248
+ headers["User-Agent"] = FeedTools.configurations[:user_agent]
249
+ end
245
250
 
246
251
  # The http feed access method
247
- http_fetch = lambda do |feed_url, http_headers, redirect_limit,
252
+ http_fetch = lambda do |feed_url, request_headers, redirect_limit,
248
253
  response_chain, no_headers|
249
254
  raise FeedAccessError, 'Redirect too deep' if redirect_limit == 0
250
255
  feed_uri = nil
@@ -256,11 +261,13 @@ module FeedTools
256
261
  end
257
262
 
258
263
  begin
259
- Net::HTTP.start(feed_uri.host, (feed_uri.port or 80)) do |http|
264
+ # TODO: Proxy host and proxy port would go here if implemented
265
+ http = Net::HTTP.new(feed_uri.host, (feed_uri.port or 80))
266
+ http.start do
260
267
  final_uri = feed_uri.path
261
268
  final_uri += ('?' + feed_uri.query) if feed_uri.query
262
- http_headers = {} if no_headers
263
- response = http.request_get(final_uri, http_headers)
269
+ request_headers = {} if no_headers
270
+ response = http.request_get(final_uri, request_headers)
264
271
 
265
272
  case response
266
273
  when Net::HTTPSuccess
@@ -289,7 +296,8 @@ module FeedTools
289
296
  response_chain << [feed_url, response]
290
297
  new_location = response['location']
291
298
  if response_chain.assoc(new_location) != nil
292
- raise FeedAccessError, "Redirection loop detected: #{new_location}"
299
+ raise FeedAccessError,
300
+ "Redirection loop detected: #{new_location}"
293
301
  end
294
302
 
295
303
  # Find out if we've already seen the url we've been
@@ -300,13 +308,21 @@ module FeedTools
300
308
  :cache_only => true)
301
309
  if cached_feed.cache_object != nil &&
302
310
  cached_feed.cache_object.new_record? != true
303
- unless cached_feed.expired?
304
- # Copy the cached state, starting with the url
311
+ if !cached_feed.expired? &&
312
+ !cached_feed.http_headers.blank?
313
+ # Copy the cached state
305
314
  self.url = cached_feed.url
306
- self.title = cached_feed.title
307
- self.link = cached_feed.link
308
- self.feed_data = cached_feed.feed_data
309
- self.feed_data_type = cached_feed.feed_data_type
315
+
316
+ @feed_data = cached_feed.feed_data
317
+ @feed_data_type = cached_feed.feed_data_type
318
+
319
+ if @feed_data.blank?
320
+ raise "Invalid cache data."
321
+ end
322
+
323
+ @title = nil; self.title
324
+ @link = nil; self.link
325
+
310
326
  self.last_retrieved = cached_feed.last_retrieved
311
327
  self.http_headers = cached_feed.http_headers
312
328
  self.cache_object = cached_feed.cache_object
@@ -342,6 +358,10 @@ module FeedTools
342
358
  raise FeedAccessError, 'Socket error prevented feed retrieval'
343
359
  rescue Timeout::Error
344
360
  raise FeedAccessError, 'Timeout while attempting to retrieve feed'
361
+ rescue Errno::ENETUNREACH
362
+ raise FeedAccessError, 'Network was unreachable'
363
+ rescue Errno::ECONNRESET
364
+ raise FeedAccessError, 'Connection was reset by peer'
345
365
  end
346
366
  end
347
367
 
@@ -375,12 +395,12 @@ module FeedTools
375
395
  end
376
396
  end
377
397
  unless @http_response.kind_of? Net::HTTPRedirection
398
+ @feed_data = self.http_response.body
378
399
  @http_headers = {}
379
400
  self.http_response.each_header do |key, value|
380
401
  self.http_headers[key.downcase] = value
381
402
  end
382
403
  self.last_retrieved = Time.now.gmtime
383
- self.feed_data = self.http_response.body
384
404
  end
385
405
  rescue FeedAccessError
386
406
  @live = false
@@ -451,9 +471,9 @@ module FeedTools
451
471
  open(file_name) do |file|
452
472
  @http_response = nil
453
473
  @http_headers = {}
474
+ @feed_data = file.read
475
+ @feed_data_type = :xml
454
476
  self.last_retrieved = Time.now.gmtime
455
- self.feed_data = file.read
456
- self.feed_data_type = :xml
457
477
  end
458
478
  rescue
459
479
  @live = false
@@ -478,8 +498,71 @@ module FeedTools
478
498
 
479
499
  # Returns a hash of the http headers from the response.
480
500
  def http_headers
501
+ if @http_headers.blank?
502
+ if !self.cache_object.nil? && !self.cache_object.http_headers.nil?
503
+ @http_headers = YAML.load(self.cache_object.http_headers)
504
+ @http_headers = {} unless @http_headers.kind_of? Hash
505
+ else
506
+ @http_headers = {}
507
+ end
508
+ end
481
509
  return @http_headers
482
510
  end
511
+
512
+ # Returns the encoding that the feed was parsed with
513
+ def encoding
514
+ if @encoding.nil?
515
+ unless self.http_headers.blank?
516
+ @encoding = "utf-8"
517
+ else
518
+ @encoding = self.encoding_from_xml_data
519
+ end
520
+ end
521
+ return @encoding
522
+ end
523
+
524
+ # Returns the encoding of feed calculated only from the xml data.
525
+ # I.e., the encoding we would come up with if we ignore RFC 3023.
526
+ def encoding_from_xml_data
527
+ if @encoding_from_xml_data.nil?
528
+ raw_data = self.feed_data
529
+ encoding_from_xml_instruct =
530
+ raw_data.scan(
531
+ /^<\?xml [^>]*encoding="([\w]*)"[^>]*\?>/
532
+ ).flatten.first
533
+ unless encoding_from_xml_instruct.blank?
534
+ encoding_from_xml_instruct.downcase!
535
+ end
536
+ if encoding_from_xml_instruct.blank?
537
+ doc = Document.new(raw_data)
538
+ encoding_from_xml_instruct = doc.encoding.downcase
539
+ if encoding_from_xml_instruct == "utf-8"
540
+ # REXML has a tendency to report utf-8 overzealously, take with
541
+ # grain of salt
542
+ encoding_from_xml_instruct = nil
543
+ end
544
+ else
545
+ @encoding_from_xml_data = encoding_from_xml_instruct
546
+ end
547
+ if encoding_from_xml_instruct.blank?
548
+ sniff_table = {
549
+ "Lo\247\224" => "ebcdic-cp-us",
550
+ "<?xm" => "utf-8"
551
+ }
552
+ sniff = self.feed_data[0..3]
553
+ if sniff_table[sniff] != nil
554
+ @encoding_from_xml_data = sniff_table[sniff].downcase
555
+ end
556
+ else
557
+ @encoding_from_xml_data = encoding_from_xml_instruct
558
+ end
559
+ if @encoding_from_xml_data.blank?
560
+ # Safest assumption
561
+ @encoding_from_xml_data = "utf-8"
562
+ end
563
+ end
564
+ return @encoding_from_xml_data
565
+ end
483
566
 
484
567
  # Returns the feed's raw data.
485
568
  def feed_data
@@ -493,12 +576,40 @@ module FeedTools
493
576
 
494
577
  # Sets the feed's data.
495
578
  def feed_data=(new_feed_data)
579
+ @http_headers = {}
580
+ @cache_object = nil
581
+ @url = nil
582
+ @id = nil
583
+ @encoding = nil
496
584
  @feed_data = new_feed_data
497
585
  unless self.cache_object.nil?
498
586
  self.cache_object.feed_data = new_feed_data
499
587
  end
500
588
  end
501
589
 
590
+ # Returns the feed's raw data as utf-8.
591
+ def feed_data_utf_8(force_encoding=nil)
592
+ if @feed_data_utf_8.nil?
593
+ raw_data = self.feed_data
594
+ if force_encoding.nil?
595
+ use_encoding = self.encoding
596
+ else
597
+ use_encoding = force_encoding
598
+ end
599
+ if use_encoding != "utf-8"
600
+ begin
601
+ @feed_data_utf_8 =
602
+ Iconv.new('utf-8', use_encoding).iconv(raw_data)
603
+ rescue
604
+ return raw_data
605
+ end
606
+ else
607
+ return self.feed_data
608
+ end
609
+ end
610
+ return @feed_data_utf_8
611
+ end
612
+
502
613
  # Returns the data type of the feed
503
614
  # Possible values:
504
615
  # * :xml
@@ -526,24 +637,15 @@ module FeedTools
526
637
  @xml_doc = nil
527
638
  else
528
639
  if @xml_doc.nil?
529
- # INQUIRY: Is there any way of saying "dude, rescue *everything*"?
530
640
  begin
531
641
  begin
532
- # TODO: :ignore_whitespace_nodes => :all
533
- # Add that?
534
- # ======================================
535
- @xml_doc = Document.new(feed_data)
536
- rescue Exception
537
- # Something failed especially badly, attempt to repair the
538
- # xml with htree.
539
- @xml_doc = HTree.parse(feed_data).to_rexml
540
- rescue
642
+ @xml_doc = Document.new(self.feed_data_utf_8,
643
+ :ignore_whitespace_nodes => :all)
644
+ rescue Object
541
645
  # Something failed, attempt to repair the xml with htree.
542
- @xml_doc = HTree.parse(feed_data).to_rexml
646
+ @xml_doc = HTree.parse(self.feed_data_utf_8).to_rexml
543
647
  end
544
- rescue Exception
545
- @xml_doc = nil
546
- rescue
648
+ rescue Object
547
649
  @xml_doc = nil
548
650
  end
549
651
  end
@@ -551,14 +653,23 @@ module FeedTools
551
653
  return @xml_doc
552
654
  end
553
655
 
554
- # Returns the first node within the channel_node that matches the xpath query.
555
- def find_node(xpath)
556
- return XPath.first(channel_node, xpath)
656
+ # Returns the first node within the channel_node that matches the xpath
657
+ # query.
658
+ def find_node(xpath, select_result_value=false)
659
+ if self.feed_data_type != :xml
660
+ raise "The feed data type is not xml."
661
+ end
662
+ return try_xpaths(self.channel_node, [xpath],
663
+ :select_result_value => select_result_value)
557
664
  end
558
665
 
559
666
  # Returns all nodes within the channel_node that match the xpath query.
560
- def find_all_nodes(xpath)
561
- return XPath.match(channel_node, xpath)
667
+ def find_all_nodes(xpath, select_result_value=false)
668
+ if self.feed_data_type != :xml
669
+ raise "The feed data type is not xml."
670
+ end
671
+ return try_xpaths_all(self.channel_node, [xpath],
672
+ :select_result_value => select_result_value)
562
673
  end
563
674
 
564
675
  # Returns the root node of the feed.
@@ -568,7 +679,15 @@ module FeedTools
568
679
  # break this stuff.
569
680
  # E.g.: http://smogzer.tripod.com/smog.rdf
570
681
  # ===================================================================
571
- @root_node = xml.root
682
+ begin
683
+ if xml.nil?
684
+ return nil
685
+ else
686
+ @root_node = xml.root
687
+ end
688
+ rescue
689
+ return nil
690
+ end
572
691
  end
573
692
  return @root_node
574
693
  end
@@ -576,13 +695,11 @@ module FeedTools
576
695
  # Returns the channel node of the feed.
577
696
  def channel_node
578
697
  if @channel_node.nil? && root_node != nil
579
- @channel_node = XPath.first(root_node, "channel")
580
- if @channel_node == nil
581
- @channel_node = XPath.first(root_node, "CHANNEL")
582
- end
583
- if @channel_node == nil
584
- @channel_node = XPath.first(root_node, "feedinfo")
585
- end
698
+ @channel_node = try_xpaths(root_node, [
699
+ "channel",
700
+ "CHANNEL",
701
+ "feedinfo"
702
+ ])
586
703
  if @channel_node == nil
587
704
  @channel_node = root_node
588
705
  end
@@ -592,12 +709,13 @@ module FeedTools
592
709
 
593
710
  # The cache object that handles the feed persistence.
594
711
  def cache_object
712
+ if !@url.nil? && @url =~ /^file:\/\//
713
+ return nil
714
+ end
595
715
  unless FeedTools.feed_cache.nil?
596
716
  if @cache_object.nil?
597
717
  begin
598
- if @id != nil
599
- @cache_object = FeedTools.feed_cache.find_by_id(@id)
600
- elsif @url != nil
718
+ if @url != nil
601
719
  @cache_object = FeedTools.feed_cache.find_by_url(@url)
602
720
  end
603
721
  if @cache_object.nil?
@@ -719,43 +837,22 @@ module FeedTools
719
837
  # Returns the feed's unique id
720
838
  def id
721
839
  if @id.nil?
722
- unless channel_node.nil?
723
- @id = XPath.first(channel_node, "id/text()").to_s
724
- if @id == ""
725
- @id = XPath.first(channel_node, "atom10:id/text()",
726
- FEED_TOOLS_NAMESPACES).to_s
727
- end
728
- if @id == ""
729
- @id = XPath.first(channel_node, "atom03:id/text()",
730
- FEED_TOOLS_NAMESPACES).to_s
731
- end
732
- if @id == ""
733
- @id = XPath.first(channel_node, "atom:id/text()").to_s
734
- end
735
- if @id == ""
736
- @id = XPath.first(channel_node, "guid/text()").to_s
737
- end
738
- end
739
- unless root_node.nil?
740
- if @id == "" || @id.nil?
741
- @id = XPath.first(root_node, "id/text()").to_s
742
- end
743
- if @id == ""
744
- @id = XPath.first(channel_node, "atom10:id/text()",
745
- FEED_TOOLS_NAMESPACES).to_s
746
- end
747
- if @id == ""
748
- @id = XPath.first(channel_node, "atom03:id/text()",
749
- FEED_TOOLS_NAMESPACES).to_s
750
- end
751
- if @id == ""
752
- @id = XPath.first(channel_node, "atom:id/text()").to_s
753
- end
754
- if @id == ""
755
- @id = XPath.first(root_node, "guid/text()").to_s
756
- end
757
- end
758
- @id = nil if @id == ""
840
+ @id = select_not_blank([
841
+ try_xpaths(self.channel_node, [
842
+ "atom10:id/text()",
843
+ "atom03:id/text()",
844
+ "atom:id/text()",
845
+ "id/text()",
846
+ "guid/text()"
847
+ ], :select_result_value => true),
848
+ try_xpaths(self.root_node, [
849
+ "atom10:id/text()",
850
+ "atom03:id/text()",
851
+ "atom:id/text()",
852
+ "id/text()",
853
+ "guid/text()"
854
+ ], :select_result_value => true)
855
+ ])
759
856
  end
760
857
  return @id
761
858
  end
@@ -768,12 +865,12 @@ module FeedTools
768
865
  # Returns the feed url.
769
866
  def url
770
867
  original_url = @url
771
- override_url = lambda do
868
+ override_url = lambda do |result|
772
869
  begin
773
- if @url == nil && self.feed_data != nil
870
+ if result.nil? && self.feed_data != nil
774
871
  true
775
- elsif @url != nil &&
776
- !(["http", "https"].include?(URI.parse(@url).scheme))
872
+ elsif result != nil &&
873
+ !(["http", "https"].include?(URI.parse(result.to_s).scheme))
777
874
  if self.feed_data != nil
778
875
  true
779
876
  else
@@ -786,47 +883,32 @@ module FeedTools
786
883
  true
787
884
  end
788
885
  end
789
- if override_url.call
790
- @url = XPath.first(channel_node, "link[@rel='self']/@href").to_s
791
- @url = nil if @url == ""
792
- if override_url.call
793
- @url = XPath.first(channel_node, "atom:link[@rel='self']/@href").to_s
794
- @url = nil if @url == ""
795
- end
796
- if override_url.call
797
- @url = XPath.first(channel_node, "atom10:link[@rel='self']/@href",
798
- FEED_TOOLS_NAMESPACES).to_s
799
- @url = nil if @url == ""
800
- end
801
- if override_url.call
802
- @url = XPath.first(channel_node, "atom03:link[@rel='self']/@href",
803
- FEED_TOOLS_NAMESPACES).to_s
804
- @url = nil if @url == ""
805
- end
806
- if override_url.call
807
- @url = XPath.first(channel_node, "admin:feed/@rdf:resource").to_s
808
- @url = nil if @url == ""
809
- end
810
- if override_url.call
811
- @url = XPath.first(channel_node, "admin:feed/@rdf:resource",
812
- FEED_TOOLS_NAMESPACES).to_s
813
- @url = nil if @url == ""
814
- end
815
- if override_url.call
816
- @url = XPath.first(channel_node, "admin:feed/@resource").to_s
817
- @url = nil if @url == ""
818
- end
819
- if override_url.call
820
- @url = XPath.first(channel_node, "feed/@rdf:resource").to_s
821
- @url = nil if @url == ""
822
- end
823
- if override_url.call
824
- @url = XPath.first(channel_node, "feed/@resource").to_s
825
- @url = nil if @url == ""
826
- end
886
+ if override_url.call(@url)
887
+ # rdf:about is ordered last because a lot of people accidentally
888
+ # put the link in that field instead of the url to the feed.
889
+ # Ordering it last gives them as many chances as humanly possible
890
+ # for them to redeem themselves. If the link turns out to be the
891
+ @url = try_xpaths(self.channel_node, [
892
+ "link[@rel='self']/@href",
893
+ "atom10:link[@rel='self']/@href",
894
+ "atom03:link[@rel='self']/@href",
895
+ "atom:link[@rel='self']/@href",
896
+ "admin:feed/@rdf:resource",
897
+ "admin:feed/@resource",
898
+ "feed/@rdf:resource",
899
+ "feed/@resource",
900
+ "@rdf:about",
901
+ "@about"
902
+ ], :select_result_value => true) do |result|
903
+ override_url.call(FeedTools.normalize_url(result))
904
+ end
905
+ @url = FeedTools.normalize_url(@url)
827
906
  if @url == nil
828
907
  @url = original_url
829
908
  end
909
+ if @url == self.link
910
+ @url = original_url
911
+ end
830
912
  end
831
913
  return @url
832
914
  end
@@ -840,37 +922,23 @@ module FeedTools
840
922
  # Returns the feed title
841
923
  def title
842
924
  if @title.nil?
843
- unless channel_node.nil?
844
- repair_entities = false
845
- title_node = XPath.first(channel_node, "atom10:title",
846
- FEED_TOOLS_NAMESPACES)
847
- if title_node.nil?
848
- title_node = XPath.first(channel_node, "title")
849
- end
850
- if title_node.nil?
851
- title_node = XPath.first(channel_node, "atom03:title",
852
- FEED_TOOLS_NAMESPACES)
853
- end
854
- if title_node.nil?
855
- title_node = XPath.first(channel_node, "atom:title")
856
- end
857
- if title_node.nil?
858
- title_node = XPath.first(channel_node, "dc:title",
859
- FEED_TOOLS_NAMESPACES)
860
- end
861
- if title_node.nil?
862
- title_node = XPath.first(channel_node, "dc:title")
863
- end
864
- if title_node.nil?
865
- title_node = XPath.first(channel_node, "TITLE")
866
- end
867
- end
925
+ repair_entities = false
926
+ title_node = try_xpaths(self.channel_node, [
927
+ "atom10:title",
928
+ "atom03:title",
929
+ "atom:title",
930
+ "title",
931
+ "dc:title"
932
+ ])
868
933
  if title_node.nil?
869
934
  return nil
870
935
  end
871
- title_type = XPath.first(title_node, "@type").to_s
872
- title_mode = XPath.first(title_node, "@mode").to_s
873
- title_encoding = XPath.first(title_node, "@encoding").to_s
936
+ title_type = try_xpaths(title_node, "@type",
937
+ :select_result_value => true)
938
+ title_mode = try_xpaths(title_node, "@mode",
939
+ :select_result_value => true)
940
+ title_encoding = try_xpaths(title_node, "@encoding",
941
+ :select_result_value => true)
874
942
 
875
943
  # Note that we're checking for misuse of type, mode and encoding here
876
944
  if title_type == "base64" || title_mode == "base64" ||
@@ -895,7 +963,7 @@ module FeedTools
895
963
  @title.gsub!(/>\n</, "><")
896
964
  @title.gsub!(/\n/, " ")
897
965
  @title.strip!
898
- @title = nil if @title == ""
966
+ @title = nil if @title.blank?
899
967
  self.cache_object.title = @title unless self.cache_object.nil?
900
968
  end
901
969
  return @title
@@ -907,124 +975,98 @@ module FeedTools
907
975
  self.cache_object.title = new_title unless self.cache_object.nil?
908
976
  end
909
977
 
910
- # Returns the feed description
911
- def description
912
- if @description.nil?
913
- unless channel_node.nil?
914
- repair_entities = false
915
- description_node = XPath.first(channel_node, "description")
916
- if description_node.nil?
917
- description_node = XPath.first(channel_node, "tagline")
918
- end
919
- if description_node.nil?
920
- description_node = XPath.first(channel_node, "subtitle")
921
- end
922
- if description_node.nil?
923
- description_node = XPath.first(channel_node, "summary")
924
- end
925
- if description_node.nil?
926
- description_node = XPath.first(channel_node, "abstract")
927
- end
928
- if description_node.nil?
929
- description_node = XPath.first(channel_node, "ABSTRACT")
930
- end
931
- if description_node.nil?
932
- description_node = XPath.first(channel_node, "info")
933
- end
934
- if description_node.nil?
935
- description_node = XPath.first(channel_node, "content:encoded")
936
- end
937
- if description_node.nil?
938
- description_node = XPath.first(channel_node, "content:encoded",
939
- FEED_TOOLS_NAMESPACES)
940
- end
941
- if description_node.nil?
942
- description_node = XPath.first(root_node, "encoded")
943
- end
944
- if description_node.nil?
945
- description_node = XPath.first(channel_node, "content")
946
- end
947
- if description_node.nil?
948
- description_node = XPath.first(channel_node, "xhtml:body")
949
- end
950
- if description_node.nil?
951
- description_node = XPath.first(channel_node, "body")
952
- end
953
- if description_node.nil?
954
- description_node = XPath.first(channel_node, "blurb")
955
- end
956
- end
957
- if description_node.nil?
978
+ # Returns the feed subtitle
979
+ def subtitle
980
+ if @subtitle.nil?
981
+ repair_entities = false
982
+ subtitle_node = try_xpaths(self.channel_node, [
983
+ "atom10:subtitle",
984
+ "subtitle",
985
+ "atom03:tagline",
986
+ "tagline",
987
+ "description",
988
+ "summary",
989
+ "abstract",
990
+ "ABSTRACT",
991
+ "content:encoded",
992
+ "encoded",
993
+ "content",
994
+ "xhtml:body",
995
+ "body",
996
+ "blurb",
997
+ "info"
998
+ ])
999
+ if subtitle_node.nil?
958
1000
  return nil
959
1001
  end
960
- description_type = XPath.first(description_node, "@type").to_s
961
- description_mode = XPath.first(description_node, "@mode").to_s
962
- description_encoding = XPath.first(description_node, "@encoding").to_s
1002
+ subtitle_type = try_xpaths(subtitle_node, "@type",
1003
+ :select_result_value => true)
1004
+ subtitle_mode = try_xpaths(subtitle_node, "@mode",
1005
+ :select_result_value => true)
1006
+ subtitle_encoding = try_xpaths(subtitle_node, "@encoding",
1007
+ :select_result_value => true)
963
1008
 
964
1009
  # Note that we're checking for misuse of type, mode and encoding here
965
- if description_encoding != ""
966
- @description =
1010
+ if !subtitle_encoding.blank?
1011
+ @subtitle =
967
1012
  "[Embedded data objects are not currently supported.]"
968
- elsif description_node.cdatas.size > 0
969
- @description = description_node.cdatas.first.value
970
- elsif description_type == "base64" || description_mode == "base64" ||
971
- description_encoding == "base64"
972
- @description = Base64.decode64(description_node.inner_xml.strip)
973
- elsif description_type == "xhtml" || description_mode == "xhtml" ||
974
- description_type == "xml" || description_mode == "xml" ||
975
- description_type == "application/xhtml+xml"
976
- @description = description_node.inner_xml
977
- elsif description_type == "escaped" || description_mode == "escaped"
978
- @description = FeedTools.unescape_entities(
979
- description_node.inner_xml)
1013
+ elsif subtitle_node.cdatas.size > 0
1014
+ @subtitle = subtitle_node.cdatas.first.value
1015
+ elsif subtitle_type == "base64" || subtitle_mode == "base64" ||
1016
+ subtitle_encoding == "base64"
1017
+ @subtitle = Base64.decode64(subtitle_node.inner_xml.strip)
1018
+ elsif subtitle_type == "xhtml" || subtitle_mode == "xhtml" ||
1019
+ subtitle_type == "xml" || subtitle_mode == "xml" ||
1020
+ subtitle_type == "application/xhtml+xml"
1021
+ @subtitle = subtitle_node.inner_xml
1022
+ elsif subtitle_type == "escaped" || subtitle_mode == "escaped"
1023
+ @subtitle = FeedTools.unescape_entities(
1024
+ subtitle_node.inner_xml)
980
1025
  else
981
- @description = description_node.inner_xml
1026
+ @subtitle = subtitle_node.inner_xml
982
1027
  repair_entities = true
983
1028
  end
984
- if @description == ""
985
- @description = self.itunes_summary
986
- @description = "" if @description.nil?
1029
+ if @subtitle.blank?
1030
+ @subtitle = self.itunes_summary
987
1031
  end
988
- if @description == ""
989
- @description = self.itunes_subtitle
990
- @description = "" if @description.nil?
1032
+ if @subtitle.blank?
1033
+ @subtitle = self.itunes_subtitle
991
1034
  end
992
1035
 
993
- unless @description.nil?
994
- @description = FeedTools.sanitize_html(@description, :strip)
995
- @description = FeedTools.unescape_entities(@description) if repair_entities
996
- @description = FeedTools.tidy_html(@description)
1036
+ unless @subtitle.blank?
1037
+ @subtitle = FeedTools.sanitize_html(@subtitle, :strip)
1038
+ @subtitle = FeedTools.unescape_entities(@subtitle) if repair_entities
1039
+ @subtitle = FeedTools.tidy_html(@subtitle)
997
1040
  end
998
1041
 
999
- @description = @description.strip unless @description.nil?
1000
- @description = nil if @description == ""
1042
+ @subtitle = @subtitle.strip unless @subtitle.nil?
1043
+ @subtitle = nil if @subtitle.blank?
1001
1044
  end
1002
- return @description
1045
+ return @subtitle
1003
1046
  end
1004
1047
 
1005
- # Sets the feed description
1006
- def description=(new_description)
1007
- @description = new_description
1048
+ # Sets the feed subtitle
1049
+ def subtitle=(new_subtitle)
1050
+ @subtitle = new_subtitle
1008
1051
  end
1009
1052
 
1010
1053
  # Returns the contents of the itunes:summary element
1011
1054
  def itunes_summary
1012
1055
  if @itunes_summary.nil?
1013
- unless channel_node.nil?
1014
- @itunes_summary = FeedTools.unescape_entities(XPath.first(channel_node,
1015
- "itunes:summary/text()").to_s)
1016
- end
1017
- unless root_node.nil?
1018
- if @itunes_summary == "" || @itunes_summary.nil?
1019
- @itunes_summary = FeedTools.unescape_entities(XPath.first(root_node,
1020
- "itunes:summary/text()").to_s)
1021
- end
1022
- end
1023
- if @itunes_summary == ""
1056
+ @itunes_summary = select_not_blank([
1057
+ try_xpaths(self.channel_node, [
1058
+ "itunes:summary/text()"
1059
+ ]),
1060
+ try_xpaths(self.root_node, [
1061
+ "itunes:summary/text()"
1062
+ ])
1063
+ ])
1064
+ unless @itunes_summary.blank?
1065
+ @itunes_summary = FeedTools.unescape_entities(@itunes_summary)
1066
+ @itunes_summary = FeedTools.sanitize_html(@itunes_summary)
1067
+ else
1024
1068
  @itunes_summary = nil
1025
1069
  end
1026
- @itunes_summary =
1027
- FeedTools.sanitize_html(@itunes_summary) unless @itunes_summary.nil?
1028
1070
  end
1029
1071
  return @itunes_summary
1030
1072
  end
@@ -1037,21 +1079,19 @@ module FeedTools
1037
1079
  # Returns the contents of the itunes:subtitle element
1038
1080
  def itunes_subtitle
1039
1081
  if @itunes_subtitle.nil?
1040
- unless channel_node.nil?
1041
- @itunes_subtitle = FeedTools.unescape_entities(XPath.first(channel_node,
1042
- "itunes:subtitle/text()").to_s)
1043
- end
1044
- unless root_node.nil?
1045
- if @itunes_subtitle == "" || @itunes_subtitle.nil?
1046
- @itunes_subtitle = FeedTools.unescape_entities(XPath.first(root_node,
1047
- "itunes:subtitle/text()").to_s)
1048
- end
1049
- end
1050
- if @itunes_subtitle == ""
1051
- @itunes_subtitle = nil
1052
- end
1053
- unless @itunes_subtitle.nil?
1082
+ @itunes_subtitle = select_not_blank([
1083
+ try_xpaths(self.channel_node, [
1084
+ "itunes:subtitle/text()"
1085
+ ]),
1086
+ try_xpaths(self.root_node, [
1087
+ "itunes:subtitle/text()"
1088
+ ])
1089
+ ])
1090
+ unless @itunes_subtitle.blank?
1091
+ @itunes_subtitle = FeedTools.unescape_entities(@itunes_subtitle)
1054
1092
  @itunes_subtitle = FeedTools.sanitize_html(@itunes_subtitle)
1093
+ else
1094
+ @itunes_subtitle = nil
1055
1095
  end
1056
1096
  end
1057
1097
  return @itunes_subtitle
@@ -1065,43 +1105,80 @@ module FeedTools
1065
1105
  # Returns the feed link
1066
1106
  def link
1067
1107
  if @link.nil?
1068
- unless channel_node.nil?
1069
- # get the feed link from the xml document
1070
- @link = XPath.first(channel_node, "link[@rel='alternate' @type='text/html']/@href").to_s
1071
- if @link == ""
1072
- @link = XPath.first(channel_node, "link[@rel='alternate']/@href").to_s
1073
- end
1074
- if @link == ""
1075
- @link = XPath.first(channel_node, "link/@href").to_s
1076
- end
1077
- if @link == ""
1078
- @link = XPath.first(channel_node, "link/text()").to_s
1079
- end
1080
- if @link == ""
1081
- @link = XPath.first(channel_node, "@href").to_s
1082
- end
1083
- if @link == ""
1084
- @link = XPath.first(channel_node, "@HREF").to_s
1085
- end
1086
- if @link == ""
1087
- @link = XPath.first(channel_node, "a/@href").to_s
1088
- end
1089
- if @link == ""
1090
- @link = XPath.first(channel_node, "A/@HREF").to_s
1091
- end
1092
- end
1093
- if @link == "" || @link.nil?
1094
- if FeedTools.is_uri? self.guid
1108
+ @link = try_xpaths(self.channel_node, [
1109
+ "atom10:link[@type='application/xhtml+xml']/@href",
1110
+ "atom10:link[@type='text/html']/@href",
1111
+ "atom10:link[@rel='alternate']/@href",
1112
+ "atom03:link[@type='application/xhtml+xml']/@href",
1113
+ "atom03:link[@type='text/html']/@href",
1114
+ "atom03:link[@rel='alternate']/@href",
1115
+ "atom:link[@type='application/xhtml+xml']/@href",
1116
+ "atom:link[@type='text/html']/@href",
1117
+ "atom:link[@rel='alternate']/@href",
1118
+ "link[@type='application/xhtml+xml']/@href",
1119
+ "link[@type='text/html']/@href",
1120
+ "link[@rel='alternate']/@href",
1121
+ "link/text()",
1122
+ "@href",
1123
+ "a/@href"
1124
+ ], :select_result_value => true)
1125
+ if @link.blank?
1126
+ if FeedTools.is_uri?(self.guid)
1095
1127
  @link = self.guid
1096
1128
  end
1097
1129
  end
1098
- if @link == "" && channel_node != nil
1099
- # Technically, we shouldn't use the base attribute for this, but if the href attribute
1100
- # is missing, it's already a given that we're looking at a messed up CDF file. We can
1101
- # always pray it's correct.
1130
+ if @link.blank? && channel_node != nil
1131
+ # Technically, we shouldn't use the base attribute for this, but
1132
+ # if the href attribute is missing, it's already a given that we're
1133
+ # looking at a messed up CDF file. We can always pray it's correct.
1102
1134
  @link = XPath.first(channel_node, "@base").to_s
1103
1135
  end
1104
- @link = FeedTools.normalize_url(@link)
1136
+ if !@link.blank?
1137
+ @link = FeedTools.unescape_entities(@link)
1138
+ end
1139
+ if @link.blank?
1140
+ link_node = try_xpaths(self.channel_node, [
1141
+ "atom10:link",
1142
+ "atom03:link",
1143
+ "atom:link",
1144
+ "link"
1145
+ ])
1146
+ if link_node != nil
1147
+ if link_node.attributes['type'].to_s =~ /^image/ ||
1148
+ link_node.attributes['type'].to_s =~ /^application/ ||
1149
+ link_node.attributes['type'].to_s =~ /xml/ ||
1150
+ link_node.attributes['rel'].to_s =~ /self/
1151
+ for child in self.channel_node
1152
+ if child.class == REXML::Element
1153
+ if child.name.downcase == "link"
1154
+ if child.attributes['type'].to_s =~ /^image/ ||
1155
+ child.attributes['type'].to_s =~ /^application/ ||
1156
+ child.attributes['type'].to_s =~ /xml/ ||
1157
+ child.attributes['rel'].to_s =~ /self/
1158
+ @link = nil
1159
+ next
1160
+ else
1161
+ @link = child.attributes['href'].to_s
1162
+ if @link.blank?
1163
+ @link = child.inner_xml
1164
+ end
1165
+ if @link.blank?
1166
+ next
1167
+ end
1168
+ break
1169
+ end
1170
+ end
1171
+ end
1172
+ end
1173
+ else
1174
+ @link = link_node.attributes['href'].to_s
1175
+ end
1176
+ end
1177
+ end
1178
+ @link = nil if @link.blank?
1179
+ if FeedTools.configurations[:url_normalization_enabled]
1180
+ @link = FeedTools.normalize_url(@link)
1181
+ end
1105
1182
  unless self.cache_object.nil?
1106
1183
  self.cache_object.link = @link
1107
1184
  end
@@ -1118,87 +1195,83 @@ module FeedTools
1118
1195
  end
1119
1196
 
1120
1197
  # Returns the url to the icon file for this feed.
1121
- #
1122
- # This method uses the url from the link field in order to avoid grabbing
1123
- # the favicon for services like feedburner.
1124
1198
  def icon
1125
1199
  if @icon.nil?
1126
- icon_node = XPath.first(channel_node, "link[@rel='icon']")
1127
- if icon_node.nil?
1128
- icon_node = XPath.first(channel_node, "link[@rel='shortcut icon']")
1129
- end
1130
- if icon_node.nil?
1131
- icon_node = XPath.first(channel_node, "link[@type='image/x-icon']")
1132
- end
1133
- if icon_node.nil?
1134
- icon_node = XPath.first(channel_node, "icon")
1135
- end
1136
- if icon_node.nil?
1137
- icon_node = XPath.first(channel_node, "logo[@style='icon']")
1138
- end
1139
- if icon_node.nil?
1140
- icon_node = XPath.first(channel_node, "LOGO[@STYLE='ICON']")
1141
- end
1200
+ icon_node = try_xpaths(self.channel_node, [
1201
+ "link[@rel='icon']",
1202
+ "link[@rel='shortcut icon']",
1203
+ "link[@type='image/x-icon']",
1204
+ "icon",
1205
+ "logo[@style='icon']",
1206
+ "LOGO[@STYLE='ICON']"
1207
+ ])
1142
1208
  unless icon_node.nil?
1143
1209
  @icon = FeedTools.unescape_entities(
1144
1210
  XPath.first(icon_node, "@href").to_s)
1145
- if @icon == ""
1211
+ if @icon.blank?
1146
1212
  @icon = FeedTools.unescape_entities(
1147
1213
  XPath.first(icon_node, "text()").to_s)
1148
1214
  unless FeedTools.is_uri? @icon
1149
- @icon = ""
1215
+ @icon = nil
1150
1216
  end
1151
1217
  end
1152
- if @icon == "" && self.link != nil && self.link != ""
1218
+ @icon = nil if @icon.blank?
1219
+ end
1220
+ end
1221
+ return @icon
1222
+ end
1223
+
1224
+ # Returns the favicon url for this feed.
1225
+ # This method first tries to use the url from the link field instead of
1226
+ # the feed url, in order to avoid grabbing the favicon for services like
1227
+ # feedburner.
1228
+ def favicon
1229
+ if @favicon.nil?
1230
+ if !self.link.blank?
1231
+ begin
1153
1232
  link_uri = URI.parse(FeedTools.normalize_url(self.link))
1154
- @icon =
1155
- link_uri.scheme + "://" + link_uri.host + "/favicon.ico"
1233
+ if link_uri.scheme == "http"
1234
+ @favicon =
1235
+ "http://" + link_uri.host + "/favicon.ico"
1236
+ end
1237
+ rescue
1238
+ @favicon = nil
1239
+ end
1240
+ if @favicon.nil? && !self.url.blank?
1241
+ begin
1242
+ feed_uri = URI.parse(FeedTools.normalize_url(self.url))
1243
+ if feed_uri.scheme == "http"
1244
+ @favicon =
1245
+ "http://" + feed_uri.host + "/favicon.ico"
1246
+ end
1247
+ rescue
1248
+ @favicon = nil
1249
+ end
1156
1250
  end
1157
- @icon = nil if @icon == ""
1251
+ else
1252
+ @favicon = nil
1158
1253
  end
1159
1254
  end
1160
- return @icon
1255
+ return @favicon
1161
1256
  end
1162
1257
 
1163
1258
  # Returns the feed author
1164
1259
  def author
1165
1260
  if @author.nil?
1166
1261
  @author = FeedTools::Feed::Author.new
1167
- unless channel_node.nil?
1168
- author_node = XPath.first(channel_node, "atom10:author",
1169
- FEED_TOOLS_NAMESPACES)
1170
- if author_node.nil?
1171
- author_node = XPath.first(channel_node, "atom03:author",
1172
- FEED_TOOLS_NAMESPACES)
1173
- end
1174
- if author_node.nil?
1175
- author_node = XPath.first(channel_node, "atom:author")
1176
- end
1177
- if author_node.nil?
1178
- author_node = XPath.first(channel_node, "author")
1179
- end
1180
- if author_node.nil?
1181
- author_node = XPath.first(channel_node, "managingEditor")
1182
- end
1183
- if author_node.nil?
1184
- author_node = XPath.first(channel_node, "dc:author",
1185
- FEED_TOOLS_NAMESPACES)
1186
- end
1187
- if author_node.nil?
1188
- author_node = XPath.first(channel_node, "dc:author")
1189
- end
1190
- if author_node.nil?
1191
- author_node = XPath.first(channel_node, "dc:creator",
1192
- FEED_TOOLS_NAMESPACES)
1193
- end
1194
- if author_node.nil?
1195
- author_node = XPath.first(channel_node, "dc:creator")
1196
- end
1197
- end
1262
+ author_node = try_xpaths(self.channel_node, [
1263
+ "atom10:author",
1264
+ "atom03:author",
1265
+ "atom:author",
1266
+ "author",
1267
+ "managingEditor",
1268
+ "dc:author",
1269
+ "dc:creator"
1270
+ ])
1198
1271
  unless author_node.nil?
1199
1272
  @author.raw = FeedTools.unescape_entities(
1200
- XPath.first(author_node, "text()").to_s)
1201
- @author.raw = nil if @author.raw == ""
1273
+ XPath.first(author_node, "text()").to_s).strip
1274
+ @author.raw = nil if @author.raw.blank?
1202
1275
  unless @author.raw.nil?
1203
1276
  raw_scan = @author.raw.scan(
1204
1277
  /(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
@@ -1229,35 +1302,37 @@ module FeedTools
1229
1302
  end
1230
1303
  end
1231
1304
  end
1232
- @author.name = "" if @author.name.nil?
1233
- if @author.name == ""
1305
+ if @author.name.blank?
1234
1306
  @author.name = FeedTools.unescape_entities(
1235
- XPath.first(author_node, "name/text()").to_s)
1236
- end
1237
- if @author.name == ""
1238
- @author.name = FeedTools.unescape_entities(
1239
- XPath.first(author_node, "@name").to_s)
1240
- end
1241
- if @author.email == ""
1242
- @author.email = FeedTools.unescape_entities(
1243
- XPath.first(author_node, "email/text()").to_s)
1307
+ try_xpaths(author_node, [
1308
+ "name/text()",
1309
+ "@name"
1310
+ ], :select_result_value => true)
1311
+ )
1244
1312
  end
1245
- if @author.email == ""
1313
+ if @author.email.blank?
1246
1314
  @author.email = FeedTools.unescape_entities(
1247
- XPath.first(author_node, "@email").to_s)
1315
+ try_xpaths(author_node, [
1316
+ "email/text()",
1317
+ "@email"
1318
+ ], :select_result_value => true)
1319
+ )
1248
1320
  end
1249
- if @author.url == ""
1321
+ if @author.url.blank?
1250
1322
  @author.url = FeedTools.unescape_entities(
1251
- XPath.first(author_node, "url/text()").to_s)
1252
- end
1253
- if @author.url == ""
1254
- @author.url = FeedTools.unescape_entities(
1255
- XPath.first(author_node, "@url").to_s)
1256
- end
1257
- @author.name = nil if @author.name == ""
1258
- @author.raw = nil if @author.raw == ""
1259
- @author.email = nil if @author.email == ""
1260
- @author.url = nil if @author.url == ""
1323
+ try_xpaths(author_node, [
1324
+ "url/text()",
1325
+ "uri/text()",
1326
+ "@url",
1327
+ "@uri",
1328
+ "@href"
1329
+ ], :select_result_value => true)
1330
+ )
1331
+ end
1332
+ @author.name = nil if @author.name.blank?
1333
+ @author.raw = nil if @author.raw.blank?
1334
+ @author.email = nil if @author.email.blank?
1335
+ @author.url = nil if @author.url.blank?
1261
1336
  end
1262
1337
  # Fallback on the itunes module if we didn't find an author name
1263
1338
  begin
@@ -1290,15 +1365,14 @@ module FeedTools
1290
1365
  def publisher
1291
1366
  if @publisher.nil?
1292
1367
  @publisher = FeedTools::Feed::Author.new
1368
+ publisher_node = try_xpaths(self.channel_node, [
1369
+ "webMaster/text()",
1370
+ "dc:publisher/text()"
1371
+ ])
1293
1372
 
1294
1373
  # Set the author name
1295
- @publisher.raw = FeedTools.unescape_entities(
1296
- XPath.first(channel_node, "dc:publisher/text()").to_s)
1297
- if @publisher.raw == ""
1298
- @publisher.raw = FeedTools.unescape_entities(
1299
- XPath.first(channel_node, "webMaster/text()").to_s)
1300
- end
1301
- unless @publisher.raw == ""
1374
+ @publisher.raw = FeedTools.unescape_entities(publisher_node.to_s)
1375
+ unless @publisher.raw.blank?
1302
1376
  raw_scan = @publisher.raw.scan(
1303
1377
  /(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
1304
1378
  if raw_scan.nil? || raw_scan.size == 0
@@ -1331,10 +1405,10 @@ module FeedTools
1331
1405
  end
1332
1406
  end
1333
1407
 
1334
- @publisher.name = nil if @publisher.name == ""
1335
- @publisher.raw = nil if @publisher.raw == ""
1336
- @publisher.email = nil if @publisher.email == ""
1337
- @publisher.url = nil if @publisher.url == ""
1408
+ @publisher.name = nil if @publisher.name.blank?
1409
+ @publisher.raw = nil if @publisher.raw.blank?
1410
+ @publisher.email = nil if @publisher.email.blank?
1411
+ @publisher.url = nil if @publisher.url.blank?
1338
1412
  end
1339
1413
  return @publisher
1340
1414
  end
@@ -1364,9 +1438,12 @@ module FeedTools
1364
1438
  # attribute.
1365
1439
  def itunes_author
1366
1440
  if @itunes_author.nil?
1367
- @itunes_author = FeedTools.unescape_entities(XPath.first(channel_node,
1368
- "itunes:author/text()").to_s)
1369
- @itunes_author = nil if @itunes_author == ""
1441
+ @itunes_author = FeedTools.unescape_entities(
1442
+ try_xpaths(self.channel_node, [
1443
+ "itunes:author/text()"
1444
+ ], :select_result_value => true)
1445
+ )
1446
+ @itunes_author = nil if @itunes_author.blank?
1370
1447
  end
1371
1448
  return @itunes_author
1372
1449
  end
@@ -1374,29 +1451,40 @@ module FeedTools
1374
1451
  # Returns the feed time
1375
1452
  def time
1376
1453
  if @time.nil?
1377
- unless channel_node.nil?
1378
- time_string = XPath.first(channel_node, "pubDate/text()").to_s
1379
- if time_string == ""
1380
- time_string = XPath.first(channel_node, "dc:date/text()").to_s
1381
- end
1382
- if time_string == ""
1383
- time_string = XPath.first(channel_node, "issued/text()").to_s
1384
- end
1385
- if time_string == ""
1386
- time_string = XPath.first(channel_node, "updated/text()").to_s
1387
- end
1388
- if time_string == ""
1389
- time_string = XPath.first(channel_node, "time/text()").to_s
1390
- end
1391
- end
1454
+ time_string = try_xpaths(self.channel_node, [
1455
+ "atom10:updated/text()",
1456
+ "atom03:updated/text()",
1457
+ "atom:updated/text()",
1458
+ "updated/text()",
1459
+ "atom10:modified/text()",
1460
+ "atom03:modified/text()",
1461
+ "atom:modified/text()",
1462
+ "modified/text()",
1463
+ "time/text()",
1464
+ "atom10:issued/text()",
1465
+ "atom03:issued/text()",
1466
+ "atom:issued/text()",
1467
+ "issued/text()",
1468
+ "atom10:published/text()",
1469
+ "atom03:published/text()",
1470
+ "atom:published/text()",
1471
+ "published/text()",
1472
+ "pubDate/text()",
1473
+ "dc:date/text()",
1474
+ "date/text()"
1475
+ ], :select_result_value => true)
1392
1476
  begin
1393
- if time_string != nil && time_string != ""
1477
+ unless time_string.blank?
1394
1478
  @time = Time.parse(time_string).gmtime
1395
1479
  else
1396
- @time = Time.now.gmtime
1480
+ if FeedTools.configurations[:timestamp_estimation_enabled]
1481
+ @time = Time.now.gmtime
1482
+ end
1397
1483
  end
1398
1484
  rescue
1399
- @time = Time.now.gmtime
1485
+ if FeedTools.configurations[:timestamp_estimation_enabled]
1486
+ @time = Time.now.gmtime
1487
+ end
1400
1488
  end
1401
1489
  end
1402
1490
  return @time
@@ -1410,13 +1498,11 @@ module FeedTools
1410
1498
  # Returns the feed item updated time
1411
1499
  def updated
1412
1500
  if @updated.nil?
1413
- unless channel_node.nil?
1414
- updated_string = XPath.first(channel_node, "updated/text()").to_s
1415
- if updated_string == ""
1416
- updated_string = XPath.first(channel_node, "modified/text()").to_s
1417
- end
1418
- end
1419
- if updated_string != nil && updated_string != ""
1501
+ updated_string = try_xpaths(self.channel_node, [
1502
+ "updated/text()",
1503
+ "modified/text()"
1504
+ ], :select_result_value => true)
1505
+ unless updated_string.blank?
1420
1506
  @updated = Time.parse(updated_string).gmtime rescue nil
1421
1507
  else
1422
1508
  @updated = nil
@@ -1430,51 +1516,16 @@ module FeedTools
1430
1516
  @updated = new_updated
1431
1517
  end
1432
1518
 
1433
- # Returns the feed item issued time
1434
- def issued
1435
- if @issued.nil?
1436
- unless channel_node.nil?
1437
- issued_string = XPath.first(channel_node, "issued/text()").to_s
1438
- if issued_string == ""
1439
- issued_string = XPath.first(channel_node, "pubDate/text()").to_s
1440
- end
1441
- if issued_string == ""
1442
- issued_string = XPath.first(channel_node, "dc:date/text()").to_s
1443
- end
1444
- if issued_string == ""
1445
- issued_string = XPath.first(channel_node, "published/text()").to_s
1446
- end
1447
- end
1448
- if issued_string != nil && issued_string != ""
1449
- @issued = Time.parse(issued_string).gmtime rescue nil
1450
- else
1451
- @issued = nil
1452
- end
1453
- end
1454
- return @issued
1455
- end
1456
-
1457
- # Sets the feed item issued time
1458
- def issued=(new_issued)
1459
- @issued = new_issued
1460
- end
1461
-
1462
1519
  # Returns the feed item published time
1463
1520
  def published
1464
1521
  if @published.nil?
1465
- unless channel_node.nil?
1466
- published_string = XPath.first(channel_node, "published/text()").to_s
1467
- if published_string == ""
1468
- published_string = XPath.first(channel_node, "pubDate/text()").to_s
1469
- end
1470
- if published_string == ""
1471
- published_string = XPath.first(channel_node, "dc:date/text()").to_s
1472
- end
1473
- if published_string == ""
1474
- published_string = XPath.first(channel_node, "issued/text()").to_s
1475
- end
1476
- end
1477
- if published_string != nil && published_string != ""
1522
+ published_string = try_xpaths(self.channel_node, [
1523
+ "published/text()",
1524
+ "pubDate/text()",
1525
+ "issued/text()",
1526
+ "dc:date/text()"
1527
+ ], :select_result_value => true)
1528
+ unless published_string.blank?
1478
1529
  @published = Time.parse(published_string).gmtime rescue nil
1479
1530
  else
1480
1531
  @published = nil
@@ -1492,28 +1543,26 @@ module FeedTools
1492
1543
  def categories
1493
1544
  if @categories.nil?
1494
1545
  @categories = []
1495
- category_nodes = XPath.match(channel_node, "category")
1496
- if category_nodes.nil? || category_nodes.empty?
1497
- category_nodes = XPath.match(channel_node, "dc:subject")
1498
- end
1546
+ category_nodes = try_xpaths_all(self.channel_node, [
1547
+ "category",
1548
+ "dc:subject"
1549
+ ])
1499
1550
  unless category_nodes.nil?
1500
1551
  for category_node in category_nodes
1501
1552
  category = FeedTools::Feed::Category.new
1502
- category.term = XPath.first(category_node, "@term").to_s
1503
- if category.term == ""
1504
- category.term = XPath.first(category_node, "text()").to_s
1505
- end
1506
- category.term.strip! unless category.term.nil?
1507
- category.term = nil if category.term == ""
1508
- category.label = XPath.first(category_node, "@label").to_s
1509
- category.label.strip! unless category.label.nil?
1510
- category.label = nil if category.label == ""
1511
- category.scheme = XPath.first(category_node, "@scheme").to_s
1512
- if category.scheme == ""
1513
- category.scheme = XPath.first(category_node, "@domain").to_s
1514
- end
1515
- category.scheme.strip! unless category.scheme.nil?
1516
- category.scheme = nil if category.scheme == ""
1553
+ category.term = try_xpaths(category_node, [
1554
+ "@term",
1555
+ "text()"
1556
+ ], :select_result_value => true)
1557
+ category.term.strip! unless category.term.blank?
1558
+ category.label = try_xpaths(category_node, ["@label"],
1559
+ :select_result_value => true)
1560
+ category.label.strip! unless category.label.blank?
1561
+ category.scheme = try_xpaths(category_node, [
1562
+ "@scheme",
1563
+ "@domain"
1564
+ ], :select_result_value => true)
1565
+ category.scheme.strip! unless category.scheme.blank?
1517
1566
  @categories << category
1518
1567
  end
1519
1568
  end
@@ -1525,55 +1574,61 @@ module FeedTools
1525
1574
  def images
1526
1575
  if @images.nil?
1527
1576
  @images = []
1528
- unless channel_node.nil?
1529
- image_nodes = XPath.match(channel_node, "image")
1530
- if image_nodes.nil? || image_nodes.empty?
1531
- image_nodes = XPath.match(channel_node, "link")
1532
- end
1533
- if image_nodes.nil? || image_nodes.empty?
1534
- image_nodes = XPath.match(channel_node, "logo")
1535
- end
1536
- if image_nodes.nil? || image_nodes.empty?
1537
- image_nodes = XPath.match(channel_node, "LOGO")
1538
- end
1539
- unless image_nodes.nil?
1540
- for image_node in image_nodes
1541
- image = FeedTools::Feed::Image.new
1542
- image.url = XPath.first(image_node, "url/text()").to_s
1543
- if image.url == ""
1544
- image.url = XPath.first(image_node, "@rdf:resource").to_s
1545
- end
1546
- if image.url == "" && (image_node.name == "logo" ||
1547
- (image_node.attributes['type'] =~ /^image/) == 0)
1548
- image.url = XPath.first(image_node, "@href").to_s
1577
+ image_nodes = try_xpaths_all(self.channel_node, [
1578
+ "image",
1579
+ "logo",
1580
+ "atom10:link",
1581
+ "atom03:link",
1582
+ "atom:link",
1583
+ "link"
1584
+ ])
1585
+ unless image_nodes.blank?
1586
+ for image_node in image_nodes
1587
+ image = FeedTools::Feed::Image.new
1588
+ image.url = try_xpaths(image_node, [
1589
+ "url/text()",
1590
+ "@rdf:resource"
1591
+ ], :select_result_value => true)
1592
+ if image.url.blank? && (image_node.name == "logo" ||
1593
+ (image_node.attributes['type'].to_s =~ /^image/) == 0)
1594
+ image.url = try_xpaths(image_node, [
1595
+ "@atom10:href",
1596
+ "@atom03:href",
1597
+ "@atom:href",
1598
+ "@href"
1599
+ ], :select_result_value => true)
1600
+ if image.url == self.link && image.url != nil
1601
+ image.url = nil
1549
1602
  end
1550
- if image.url == "" && image_node.name == "LOGO"
1551
- image.url = XPath.first(image_node, "@HREF").to_s
1552
- end
1553
- image.url.strip! unless image.url.nil?
1554
- image.url = nil if image.url == ""
1555
- image.title = XPath.first(image_node, "title/text()").to_s
1556
- image.title.strip! unless image.title.nil?
1557
- image.title = nil if image.title == ""
1558
- image.description =
1559
- XPath.first(image_node, "description/text()").to_s
1560
- image.description.strip! unless image.description.nil?
1561
- image.description = nil if image.description == ""
1562
- image.link = XPath.first(image_node, "link/text()").to_s
1563
- image.link.strip! unless image.link.nil?
1564
- image.link = nil if image.link == ""
1565
- image.height = XPath.first(image_node, "height/text()").to_s.to_i
1566
- image.height = nil if image.height <= 0
1567
- image.width = XPath.first(image_node, "width/text()").to_s.to_i
1568
- image.width = nil if image.width <= 0
1569
- image.style = XPath.first(image_node, "@style").to_s.downcase
1570
- if image.style == ""
1571
- image.style = XPath.first(image_node, "@STYLE").to_s.downcase
1572
- end
1573
- image.style.strip! unless image.style.nil?
1574
- image.style = nil if image.style == ""
1575
- @images << image
1576
1603
  end
1604
+ if image.url.blank? && image_node.name == "LOGO"
1605
+ image.url = try_xpaths(image_node, [
1606
+ "@href"
1607
+ ], :select_result_value => true)
1608
+ end
1609
+ image.url.strip! unless image.url.nil?
1610
+ image.title = try_xpaths(image_node,
1611
+ ["title/text()"], :select_result_value => true)
1612
+ image.title.strip! unless image.title.nil?
1613
+ image.description = try_xpaths(image_node,
1614
+ ["description/text()"], :select_result_value => true)
1615
+ image.description.strip! unless image.description.nil?
1616
+ image.link = try_xpaths(image_node,
1617
+ ["link/text()"], :select_result_value => true)
1618
+ image.link.strip! unless image.link.nil?
1619
+ image.height = try_xpaths(image_node,
1620
+ ["height/text()"], :select_result_value => true).to_i
1621
+ image.height = nil if image.height <= 0
1622
+ image.width = try_xpaths(image_node,
1623
+ ["width/text()"], :select_result_value => true).to_i
1624
+ image.width = nil if image.width <= 0
1625
+ image.style = try_xpaths(image_node, [
1626
+ "style/text()",
1627
+ "@style"
1628
+ ], :select_result_value => true)
1629
+ image.style.strip! unless image.style.nil?
1630
+ image.style.downcase! unless image.style.nil?
1631
+ @images << image unless image.url.nil?
1577
1632
  end
1578
1633
  end
1579
1634
  end
@@ -1584,20 +1639,20 @@ module FeedTools
1584
1639
  def text_input
1585
1640
  if @text_input.nil?
1586
1641
  @text_input = FeedTools::Feed::TextInput.new
1587
- text_input_node = XPath.first(channel_node, "textInput")
1642
+ text_input_node = try_xpaths(self.channel_node, ["textInput"])
1588
1643
  unless text_input_node.nil?
1589
1644
  @text_input.title =
1590
- XPath.first(text_input_node, "title/text()").to_s
1591
- @text_input.title = nil if @text_input.title == ""
1645
+ try_xpaths(text_input_node, ["title/text()"],
1646
+ :select_result_value => true)
1592
1647
  @text_input.description =
1593
- XPath.first(text_input_node, "description/text()").to_s
1594
- @text_input.description = nil if @text_input.description == ""
1648
+ try_xpaths(text_input_node, ["description/text()"],
1649
+ :select_result_value => true)
1595
1650
  @text_input.link =
1596
- XPath.first(text_input_node, "link/text()").to_s
1597
- @text_input.link = nil if @text_input.link == ""
1651
+ try_xpaths(text_input_node, ["link/text()"],
1652
+ :select_result_value => true)
1598
1653
  @text_input.name =
1599
- XPath.first(text_input_node, "name/text()").to_s
1600
- @text_input.name = nil if @text_input.name == ""
1654
+ try_xpaths(text_input_node, ["name/text()"],
1655
+ :select_result_value => true)
1601
1656
  end
1602
1657
  end
1603
1658
  return @text_input
@@ -1606,43 +1661,28 @@ module FeedTools
1606
1661
  # Returns the feed's copyright information
1607
1662
  def copyright
1608
1663
  if @copyright.nil?
1609
- unless root_node.nil?
1610
- repair_entities = false
1611
- copyright_node = XPath.first(channel_node, "dc:rights")
1612
- if copyright_node.nil?
1613
- copyright_node = XPath.first(channel_node, "dc:rights",
1614
- FEED_TOOLS_NAMESPACES)
1615
- end
1616
- if copyright_node.nil?
1617
- copyright_node = XPath.first(channel_node, "rights",
1618
- FEED_TOOLS_NAMESPACES)
1619
- end
1620
- if copyright_node.nil?
1621
- copyright_node = XPath.first(channel_node, "copyright",
1622
- FEED_TOOLS_NAMESPACES)
1623
- end
1624
- if copyright_node.nil?
1625
- copyright_node = XPath.first(channel_node, "atom03:copyright",
1626
- FEED_TOOLS_NAMESPACES)
1627
- end
1628
- if copyright_node.nil?
1629
- copyright_node = XPath.first(channel_node, "atom10:copyright",
1630
- FEED_TOOLS_NAMESPACES)
1631
- end
1632
- if copyright_node.nil?
1633
- copyright_node = XPath.first(channel_node, "copyrights",
1634
- FEED_TOOLS_NAMESPACES)
1635
- end
1636
- end
1664
+ repair_entities = false
1665
+ copyright_node = try_xpaths(self.channel_node, [
1666
+ "atom10:copyright",
1667
+ "atom03:copyright",
1668
+ "atom:copyright",
1669
+ "copyright",
1670
+ "copyrights",
1671
+ "dc:rights",
1672
+ "rights"
1673
+ ])
1637
1674
  if copyright_node.nil?
1638
1675
  return nil
1639
1676
  end
1640
- copyright_type = XPath.first(copyright_node, "@type").to_s
1641
- copyright_mode = XPath.first(copyright_node, "@mode").to_s
1642
- copyright_encoding = XPath.first(copyright_node, "@encoding").to_s
1677
+ copyright_type = try_xpaths(copyright_node, "@type",
1678
+ :select_result_value => true)
1679
+ copyright_mode = try_xpaths(copyright_node, "@mode",
1680
+ :select_result_value => true)
1681
+ copyright_encoding = try_xpaths(copyright_node, "@encoding",
1682
+ :select_result_value => true)
1643
1683
 
1644
1684
  # Note that we're checking for misuse of type, mode and encoding here
1645
- if copyright_encoding != ""
1685
+ if !copyright_encoding.blank?
1646
1686
  @copyright =
1647
1687
  "[Embedded data objects are not currently supported.]"
1648
1688
  elsif copyright_node.cdatas.size > 0
@@ -1669,7 +1709,7 @@ module FeedTools
1669
1709
  end
1670
1710
 
1671
1711
  @copyright = @copyright.strip unless @copyright.nil?
1672
- @copyright = nil if @copyright == ""
1712
+ @copyright = nil if @copyright.blank?
1673
1713
  end
1674
1714
  return @copyright
1675
1715
  end
@@ -1684,9 +1724,11 @@ module FeedTools
1684
1724
  if @time_to_live.nil?
1685
1725
  unless channel_node.nil?
1686
1726
  # get the feed time to live from the xml document
1687
- update_frequency = XPath.first(channel_node, "syn:updateFrequency/text()").to_s
1688
- if update_frequency != ""
1689
- update_period = XPath.first(channel_node, "syn:updatePeriod/text()").to_s
1727
+ update_frequency = try_xpaths(self.channel_node,
1728
+ ["syn:updateFrequency/text()"], :select_result_value => true)
1729
+ if !update_frequency.blank?
1730
+ update_period = try_xpaths(self.channel_node,
1731
+ ["syn:updatePeriod/text()"], :select_result_value => true)
1690
1732
  if update_period == "daily"
1691
1733
  @time_to_live = update_frequency.to_i.day
1692
1734
  elsif update_period == "weekly"
@@ -1702,9 +1744,11 @@ module FeedTools
1702
1744
  end
1703
1745
  if @time_to_live.nil?
1704
1746
  # usually expressed in minutes
1705
- update_frequency = XPath.first(channel_node, "ttl/text()").to_s
1706
- if update_frequency != ""
1707
- update_span = XPath.first(channel_node, "ttl/@span").to_s
1747
+ update_frequency = try_xpaths(self.channel_node, ["ttl/text()"],
1748
+ :select_result_value => true)
1749
+ if !update_frequency.blank?
1750
+ update_span = try_xpaths(self.channel_node, ["ttl/@span"],
1751
+ :select_result_value => true)
1708
1752
  if update_span == "seconds"
1709
1753
  @time_to_live = update_frequency.to_i
1710
1754
  elsif update_span == "minutes"
@@ -1719,19 +1763,6 @@ module FeedTools
1719
1763
  @time_to_live = update_frequency.to_i.month
1720
1764
  elsif update_span == "years"
1721
1765
  @time_to_live = update_frequency.to_i.year
1722
- elsif update_frequency.to_i >= 3000
1723
- # Normally, this should default to minutes, but realistically,
1724
- # if they meant minutes, you're rarely going to see a value
1725
- # higher than 120. If we see >= 3000, we're either dealing
1726
- # with a stupid pseudo-spec that decided to use seconds, or
1727
- # we're looking at someone who only has weekly updated
1728
- # content. Worst case, we misreport the time, and we update
1729
- # too often. Best case, we avoid accidentally updating the
1730
- # feed only once a year. In the interests of being pragmatic,
1731
- # and since the problem we avoid is a far greater one than
1732
- # the one we cause, just run the check and hope no one
1733
- # actually gets hurt.
1734
- @time_to_live = update_frequency.to_i
1735
1766
  else
1736
1767
  @time_to_live = update_frequency.to_i.minute
1737
1768
  end
@@ -1740,7 +1771,7 @@ module FeedTools
1740
1771
  if @time_to_live.nil?
1741
1772
  @time_to_live = 0
1742
1773
  update_frequency_days =
1743
- XPath.first(channel_node, "schedule/intervaltime/@days").to_s
1774
+ XPath.first(channel_node, "SCHEDULE/INTERVALTIME/@DAY").to_s
1744
1775
  update_frequency_hours =
1745
1776
  XPath.first(channel_node, "schedule/intervaltime/@hour").to_s
1746
1777
  update_frequency_minutes =
@@ -1769,6 +1800,10 @@ module FeedTools
1769
1800
  if @time_to_live.nil? || @time_to_live == 0
1770
1801
  # Default to one hour
1771
1802
  @time_to_live = 1.hour
1803
+ elsif FeedTools.configurations[:max_ttl] != nil &&
1804
+ FeedTools.configurations[:max_ttl] != 0 &&
1805
+ @time_to_live >= FeedTools.configurations[:max_ttl].to_i
1806
+ @time_to_live = FeedTools.configurations[:max_ttl].to_i
1772
1807
  end
1773
1808
  @time_to_live = @time_to_live.round
1774
1809
  return @time_to_live
@@ -1784,20 +1819,21 @@ module FeedTools
1784
1819
  def cloud
1785
1820
  if @cloud.nil?
1786
1821
  @cloud = FeedTools::Feed::Cloud.new
1787
- @cloud.domain = XPath.first(channel_node, "cloud/@domain").to_s
1788
- @cloud.port = XPath.first(channel_node, "cloud/@port").to_s
1789
- @cloud.path = XPath.first(channel_node, "cloud/@path").to_s
1822
+ @cloud.domain = try_xpaths(self.channel_node, ["cloud/@domain"],
1823
+ :select_result_value => true)
1824
+ @cloud.port = try_xpaths(self.channel_node, ["cloud/@port"],
1825
+ :select_result_value => true)
1826
+ @cloud.path = try_xpaths(self.channel_node, ["cloud/@path"],
1827
+ :select_result_value => true)
1790
1828
  @cloud.register_procedure =
1791
- XPath.first(channel_node, "cloud/@registerProcedure").to_s
1829
+ try_xpaths(self.channel_node, ["cloud/@registerProcedure"],
1830
+ :select_result_value => true)
1792
1831
  @cloud.protocol =
1793
- XPath.first(channel_node, "cloud/@protocol").to_s.downcase
1794
- @cloud.domain = nil if @cloud.domain == ""
1795
- @cloud.port = nil if @cloud.port == ""
1796
- @cloud.port = @cloud.port.to_i unless @cloud.port.nil?
1832
+ try_xpaths(self.channel_node, ["cloud/@protocol"],
1833
+ :select_result_value => true)
1834
+ @cloud.protocol.downcase unless @cloud.protocol.nil?
1835
+ @cloud.port = @cloud.port.to_s.to_i
1797
1836
  @cloud.port = nil if @cloud.port == 0
1798
- @cloud.path = nil if @cloud.path == ""
1799
- @cloud.register_procedure = nil if @cloud.register_procedure == ""
1800
- @cloud.protocol = nil if @cloud.protocol == ""
1801
1837
  end
1802
1838
  return @cloud
1803
1839
  end
@@ -1810,9 +1846,9 @@ module FeedTools
1810
1846
  # Returns the feed generator
1811
1847
  def generator
1812
1848
  if @generator.nil?
1813
- @generator = XPath.first(channel_node, "generator/text()").to_s
1814
- @generator = FeedTools.strip_html(@generator)
1815
- @generator = nil if @generator == ""
1849
+ @generator = try_xpaths(self.channel_node, ["generator/text()"],
1850
+ :select_result_value => true)
1851
+ @generator = FeedTools.strip_html(@generator) unless @generator.nil?
1816
1852
  end
1817
1853
  return @generator
1818
1854
  end
@@ -1825,9 +1861,9 @@ module FeedTools
1825
1861
  # Returns the feed docs
1826
1862
  def docs
1827
1863
  if @docs.nil?
1828
- @docs = XPath.first(channel_node, "docs/text()").to_s
1829
- @docs = FeedTools.strip_html(@docs)
1830
- @docs = nil if @docs == ""
1864
+ @docs = try_xpaths(self.channel_node, ["docs/text()"],
1865
+ :select_result_value => true)
1866
+ @docs = FeedTools.strip_html(@docs) unless @docs.nil?
1831
1867
  end
1832
1868
  return @docs
1833
1869
  end
@@ -1840,23 +1876,23 @@ module FeedTools
1840
1876
  # Returns the feed language
1841
1877
  def language
1842
1878
  if @language.nil?
1843
- unless channel_node.nil?
1844
- @language = XPath.first(channel_node, "language/text()").to_s
1845
- if @language == ""
1846
- @language = XPath.first(channel_node, "dc:language/text()").to_s
1847
- end
1848
- if @language == ""
1849
- @language = XPath.first(channel_node, "xml:lang/text()").to_s
1850
- end
1851
- if @language == ""
1852
- @language = XPath.first(root_node, "xml:lang/text()").to_s
1853
- end
1854
- end
1855
- if @language == "" || @language.nil?
1879
+ @language = select_not_blank([
1880
+ try_xpaths(self.channel_node, [
1881
+ "language/text()",
1882
+ "dc:language/text()",
1883
+ "@dc:language",
1884
+ "@xml:lang",
1885
+ "xml:lang/text()"
1886
+ ], :select_result_value => true),
1887
+ try_xpaths(self.root_node, [
1888
+ "@xml:lang",
1889
+ "xml:lang/text()"
1890
+ ], :select_result_value => true)
1891
+ ])
1892
+ if @language.blank?
1856
1893
  @language = "en-us"
1857
1894
  end
1858
1895
  @language = @language.downcase
1859
- @language = nil if @language == ""
1860
1896
  end
1861
1897
  return @language
1862
1898
  end
@@ -1869,12 +1905,11 @@ module FeedTools
1869
1905
  # Returns true if this feed contains explicit material.
1870
1906
  def explicit?
1871
1907
  if @explicit.nil?
1872
- if XPath.first(channel_node,
1873
- "media:adult/text()").to_s.downcase == "true" ||
1874
- XPath.first(channel_node,
1875
- "itunes:explicit/text()").to_s.downcase == "yes" ||
1876
- XPath.first(channel_node,
1877
- "itunes:explicit/text()").to_s.downcase == "true"
1908
+ explicit_string = try_xpaths(self.channel_node, [
1909
+ "media:adult/text()",
1910
+ "itunes:explicit/text()"
1911
+ ], :select_result_value => true)
1912
+ if explicit_string == "true" || explicit_string == "yes"
1878
1913
  @explicit = true
1879
1914
  else
1880
1915
  @explicit = false
@@ -1888,66 +1923,68 @@ module FeedTools
1888
1923
  @explicit = (new_explicit ? true : false)
1889
1924
  end
1890
1925
 
1891
- # Returns the feed items
1892
- def items
1893
- if @items.nil?
1894
- unless root_node.nil?
1895
- raw_items = XPath.match(root_node, "item")
1896
- if raw_items == nil || raw_items == []
1897
- raw_items = XPath.match(channel_node, "item")
1898
- end
1899
- if raw_items == nil || raw_items == []
1900
- raw_items = XPath.match(channel_node, "ITEM")
1901
- end
1902
- if raw_items == nil || raw_items == []
1903
- raw_items = XPath.match(root_node, "ITEM")
1904
- end
1905
- if raw_items == nil || raw_items == []
1906
- raw_items = XPath.match(channel_node, "entry")
1907
- end
1908
- if raw_items == nil || raw_items == []
1909
- raw_items = XPath.match(root_node, "entry")
1910
- end
1911
- end
1926
+ # Returns the feed entries
1927
+ def entries
1928
+ if @entries.blank?
1929
+ raw_entries = select_not_blank([
1930
+ try_xpaths_all(self.channel_node, [
1931
+ "atom10:entry",
1932
+ "atom03:entry",
1933
+ "atom:entry",
1934
+ "entry"
1935
+ ]),
1936
+ try_xpaths_all(self.root_node, [
1937
+ "rss10:item",
1938
+ "item",
1939
+ "atom10:entry",
1940
+ "atom03:entry",
1941
+ "atom:entry",
1942
+ "entry"
1943
+ ]),
1944
+ try_xpaths_all(self.channel_node, [
1945
+ "rss10:item",
1946
+ "item"
1947
+ ])
1948
+ ])
1912
1949
 
1913
1950
  # create the individual feed items
1914
- @items = []
1915
- if raw_items != nil
1916
- for item_node in raw_items.reverse
1917
- new_item = FeedItem.new
1918
- new_item.feed_data = item_node.to_s
1919
- new_item.feed_data_type = self.feed_data_type
1920
- @items << new_item
1951
+ @entries = []
1952
+ unless raw_entries.blank?
1953
+ for entry_node in raw_entries.reverse
1954
+ new_entry = FeedItem.new
1955
+ new_entry.feed_data = entry_node.to_s
1956
+ new_entry.feed_data_type = self.feed_data_type
1957
+ @entries << new_entry
1921
1958
  end
1922
1959
  end
1923
1960
  end
1924
1961
 
1925
1962
  # Sort the items
1926
- @items = @items.sort do |a,b|
1927
- (b.time or Time.mktime(1970)) <=> (a.time or Time.mktime(1970))
1963
+ @entries = @entries.sort do |a, b|
1964
+ (b.time or Time.utc(1970)) <=> (a.time or Time.utc(1970))
1928
1965
  end
1929
- return @items
1966
+ return @entries
1930
1967
  end
1931
1968
 
1932
- # Sets the items array to a new array.
1933
- def items=(new_items)
1934
- for item in new_items
1935
- unless item.kind_of? FeedTools::FeedItem
1969
+ # Sets the entries array to a new array.
1970
+ def entries=(new_entries)
1971
+ for entry in new_entries
1972
+ unless entry.kind_of? FeedTools::FeedItem
1936
1973
  raise ArgumentError,
1937
- "You should only add FeedItem objects to the items array."
1974
+ "You should only add FeedItem objects to the entries array."
1938
1975
  end
1939
1976
  end
1940
- @items = new_items
1977
+ @entries = new_entries
1941
1978
  end
1942
1979
 
1943
1980
  # Syntactic sugar for appending feed items to a feed.
1944
- def <<(new_item)
1945
- @items ||= []
1946
- unless new_item.kind_of? FeedTools::FeedItem
1981
+ def <<(new_entry)
1982
+ @entries ||= []
1983
+ unless new_entry.kind_of? FeedTools::FeedItem
1947
1984
  raise ArgumentError,
1948
- "You should only add FeedItem objects to the items array."
1985
+ "You should only add FeedItem objects to the entries array."
1949
1986
  end
1950
- @items << new_item
1987
+ @entries << new_entry
1951
1988
  end
1952
1989
 
1953
1990
  # The time that the feed was last requested from the remote server. Nil
@@ -2020,11 +2057,14 @@ module FeedTools
2020
2057
  end
2021
2058
 
2022
2059
  # Generates xml based on the content of the feed
2023
- def build_xml(feed_type=(self.feed_type or "rss"), version=nil,
2024
- xml_builder=Builder::XmlMarkup.new(:indent => 2))
2025
- if feed_type == "rss" && (version == nil || version == 0.0)
2060
+ def build_xml(feed_type=(self.feed_type or "atom"), version=nil,
2061
+ xml_builder=Builder::XmlMarkup.new(
2062
+ :indent => 2, :escape_attrs => false))
2063
+ xml_builder.instruct! :xml, :version => "1.0",
2064
+ :encoding => (FeedTools.configurations[:output_encoding] or "utf-8")
2065
+ if feed_type == "rss" && (version == nil || version <= 0.0)
2026
2066
  version = 1.0
2027
- elsif feed_type == "atom" && (version == nil || version == 0.0)
2067
+ elsif feed_type == "atom" && (version == nil || version <= 0.0)
2028
2068
  version = 1.0
2029
2069
  end
2030
2070
  if feed_type == "rss" && (version == 0.9 || version == 1.0 ||
@@ -2040,7 +2080,8 @@ module FeedTools
2040
2080
  "xmlns:media" => FEED_TOOLS_NAMESPACES['media']) do
2041
2081
  channel_attributes = {}
2042
2082
  unless self.link.nil?
2043
- channel_attributes["rdf:about"] = CGI.escapeHTML(self.link)
2083
+ channel_attributes["rdf:about"] =
2084
+ FeedTools.escape_entities(self.link)
2044
2085
  end
2045
2086
  xml_builder.channel(channel_attributes) do
2046
2087
  unless title.nil? || title == ""
@@ -2054,7 +2095,7 @@ module FeedTools
2054
2095
  xml_builder.link
2055
2096
  end
2056
2097
  unless images.nil? || images.empty?
2057
- xml_builder.image("rdf:resource" => CGI.escapeHTML(
2098
+ xml_builder.image("rdf:resource" => FeedTools.escape_entities(
2058
2099
  images.first.url))
2059
2100
  end
2060
2101
  unless description.nil? || description == ""
@@ -2078,7 +2119,7 @@ module FeedTools
2078
2119
  "item link field."
2079
2120
  end
2080
2121
  xml_builder.tag!("rdf:li", "rdf:resource" =>
2081
- CGI.escapeHTML(item.link))
2122
+ FeedTools.escape_entities(item.link))
2082
2123
  end
2083
2124
  end
2084
2125
  end
@@ -2095,20 +2136,20 @@ module FeedTools
2095
2136
  end
2096
2137
  best_image = images.first if best_image.nil?
2097
2138
  xml_builder.image(
2098
- "rdf:about" => CGI.escapeHTML(best_image.url)) do
2099
- if best_image.title != nil && best_image.title != ""
2139
+ "rdf:about" => FeedTools.escape_entities(best_image.url)) do
2140
+ if !best_image.title.blank?
2100
2141
  xml_builder.title(best_image.title)
2101
- elsif self.title != nil && self.title != ""
2142
+ elsif !self.title.blank?
2102
2143
  xml_builder.title(self.title)
2103
2144
  else
2104
2145
  xml_builder.title
2105
2146
  end
2106
- unless best_image.url.nil? || best_image.url == ""
2147
+ unless best_image.url.blank?
2107
2148
  xml_builder.url(best_image.url)
2108
2149
  end
2109
- if best_image.link != nil && best_image.link != ""
2150
+ if !best_image.link.blank?
2110
2151
  xml_builder.link(best_image.link)
2111
- elsif self.link != nil && self.link != ""
2152
+ elsif !self.link.blank?
2112
2153
  xml_builder.link(self.link)
2113
2154
  else
2114
2155
  xml_builder.link
@@ -2131,18 +2172,18 @@ module FeedTools
2131
2172
  "xmlns:itunes" => FEED_TOOLS_NAMESPACES['itunes'],
2132
2173
  "xmlns:media" => FEED_TOOLS_NAMESPACES['media']) do
2133
2174
  xml_builder.channel do
2134
- unless title.nil? || title == ""
2175
+ unless title.blank?
2135
2176
  xml_builder.title(title)
2136
2177
  end
2137
- unless link.nil? || link == ""
2178
+ unless link.blank?
2138
2179
  xml_builder.link(link)
2139
2180
  end
2140
- unless description.nil? || description == ""
2181
+ unless description.blank?
2141
2182
  xml_builder.description(description)
2142
2183
  end
2143
2184
  xml_builder.ttl((time_to_live / 1.minute).to_s)
2144
2185
  xml_builder.generator(
2145
- "http://www.sporkmonger.com/projects/feedtools")
2186
+ FeedTools.configurations[:generator_href])
2146
2187
  build_xml_hook(feed_type, version, xml_builder)
2147
2188
  unless items.nil?
2148
2189
  for item in items
@@ -2152,53 +2193,12 @@ module FeedTools
2152
2193
  end
2153
2194
  end
2154
2195
  elsif feed_type == "atom" && version == 0.3
2155
- # normal atom format
2156
- return xml_builder.feed("xmlns" => FEED_TOOLS_NAMESPACES['atom03'],
2157
- "version" => version,
2158
- "xml:lang" => language) do
2159
- unless title.nil? || title == ""
2160
- xml_builder.title(title,
2161
- "mode" => "escaped",
2162
- "type" => "text/html")
2163
- end
2164
- xml_builder.author do
2165
- unless self.author.nil? || self.author.name.nil?
2166
- xml_builder.name(self.author.name)
2167
- else
2168
- xml_builder.name("n/a")
2169
- end
2170
- unless self.author.nil? || self.author.email.nil?
2171
- xml_builder.email(self.author.email)
2172
- end
2173
- unless self.author.nil? || self.author.url.nil?
2174
- xml_builder.url(self.author.url)
2175
- end
2176
- end
2177
- unless link.nil? || link == ""
2178
- xml_builder.link("href" => link,
2179
- "rel" => "alternate",
2180
- "type" => "text/html",
2181
- "title" => title)
2182
- end
2183
- unless description.nil? || description == ""
2184
- xml_builder.tagline(description,
2185
- "mode" => "escaped",
2186
- "type" => "text/html")
2187
- end
2188
- xml_builder.generator("FeedTools",
2189
- "url" => "http://www.sporkmonger.com/projects/feedtools")
2190
- build_xml_hook(feed_type, version, xml_builder)
2191
- unless items.nil?
2192
- for item in items
2193
- item.build_xml(feed_type, version, xml_builder)
2194
- end
2195
- end
2196
- end
2196
+ raise "Atom 0.3 is obsolete."
2197
2197
  elsif feed_type == "atom" && version == 1.0
2198
2198
  # normal atom format
2199
2199
  return xml_builder.feed("xmlns" => FEED_TOOLS_NAMESPACES['atom10'],
2200
2200
  "xml:lang" => language) do
2201
- unless title.nil? || title == ""
2201
+ unless title.blank?
2202
2202
  xml_builder.title(title,
2203
2203
  "type" => "html")
2204
2204
  end
@@ -2212,22 +2212,22 @@ module FeedTools
2212
2212
  xml_builder.email(self.author.email)
2213
2213
  end
2214
2214
  unless self.author.nil? || self.author.url.nil?
2215
- xml_builder.url(self.author.url)
2215
+ xml_builder.uri(self.author.url)
2216
2216
  end
2217
2217
  end
2218
- unless self.url.nil? || self.url == ""
2218
+ unless self.url.blank?
2219
2219
  xml_builder.link("href" => self.url,
2220
2220
  "rel" => "self",
2221
2221
  "type" => "application/atom+xml")
2222
2222
  end
2223
- unless self.link.nil? || self.link == ""
2224
- xml_builder.link("href" => self.link,
2223
+ unless self.link.blank?
2224
+ xml_builder.link("href" => FeedTools.escape_entities(self.link),
2225
2225
  "rel" => "alternate",
2226
2226
  "type" => "text/html",
2227
- "title" => self.title)
2227
+ "title" => FeedTools.escape_entities(self.title))
2228
2228
  end
2229
- unless description.nil? || description == ""
2230
- xml_builder.subtitle(description,
2229
+ unless description.blank?
2230
+ xml_builder.subtitle(self.subtitle,
2231
2231
  "type" => "html")
2232
2232
  else
2233
2233
  xml_builder.subtitle(FeedTools.no_content_string,
@@ -2242,8 +2242,8 @@ module FeedTools
2242
2242
  else
2243
2243
  xml_builder.updated(Time.now.gmtime.iso8601)
2244
2244
  end
2245
- xml_builder.generator("FeedTools - " +
2246
- "http://www.sporkmonger.com/projects/feedtools")
2245
+ xml_builder.generator(FeedTools.configurations[:generator_name] +
2246
+ " - " + FeedTools.configurations[:generator_href])
2247
2247
  if self.id != nil
2248
2248
  unless FeedTools.is_uri? self.id
2249
2249
  if self.link != nil
@@ -2266,46 +2266,49 @@ module FeedTools
2266
2266
  end
2267
2267
  end
2268
2268
  end
2269
+ else
2270
+ raise "Unsupported feed format/version."
2269
2271
  end
2270
2272
  end
2271
2273
 
2272
2274
  # Persists the current feed state to the cache.
2273
2275
  def save
2274
- if FeedTools.feed_cache.nil?
2275
- raise "Caching is currently disabled. Cannot save to cache."
2276
- elsif self.url.nil?
2277
- raise "The url field must be set to save to the cache."
2278
- elsif self.cache_object.nil?
2279
- raise "The cache_object is currently nil. Cannot save to cache."
2280
- else
2281
- self.cache_object.url = self.url
2282
- unless self.feed_data.nil?
2283
- self.cache_object.title = self.title
2284
- self.cache_object.link = self.link
2285
- self.cache_object.feed_data = self.feed_data
2286
- self.cache_object.feed_data_type = self.feed_data_type.to_s
2287
- end
2288
- unless self.http_response.nil?
2276
+ unless self.url =~ /^file:\/\//
2277
+ if FeedTools.feed_cache.nil?
2278
+ raise "Caching is currently disabled. Cannot save to cache."
2279
+ elsif self.url.nil?
2280
+ raise "The url field must be set to save to the cache."
2281
+ elsif self.cache_object.nil?
2282
+ raise "The cache_object is currently nil. Cannot save to cache."
2283
+ else
2284
+ self.cache_object.url = self.url
2285
+ unless self.feed_data.nil?
2286
+ self.cache_object.title = self.title
2287
+ self.cache_object.link = self.link
2288
+ self.cache_object.feed_data = self.feed_data
2289
+ self.cache_object.feed_data_type = self.feed_data_type.to_s
2290
+ end
2289
2291
  self.cache_object.http_headers = self.http_headers.to_yaml
2292
+ self.cache_object.last_retrieved = self.last_retrieved
2293
+ self.cache_object.save
2290
2294
  end
2291
- self.cache_object.last_retrieved = self.last_retrieved
2292
- self.cache_object.save
2293
2295
  end
2294
2296
  end
2295
2297
 
2296
- alias_method :tagline, :description
2297
- alias_method :tagline=, :description=
2298
- alias_method :subtitle, :description
2299
- alias_method :subtitle=, :description=
2300
- alias_method :abstract, :description
2301
- alias_method :abstract=, :description=
2302
- alias_method :content, :description
2303
- alias_method :content=, :description=
2298
+ alias_method :tagline, :subtitle
2299
+ alias_method :tagline=, :subtitle=
2300
+ alias_method :description, :subtitle
2301
+ alias_method :description=, :subtitle=
2302
+ alias_method :abstract, :subtitle
2303
+ alias_method :abstract=, :subtitle=
2304
+ alias_method :content, :subtitle
2305
+ alias_method :content=, :subtitle=
2304
2306
  alias_method :ttl, :time_to_live
2305
2307
  alias_method :ttl=, :time_to_live=
2306
2308
  alias_method :guid, :id
2307
2309
  alias_method :guid=, :id=
2308
- alias_method :entries, :items
2310
+ alias_method :items, :entries
2311
+ alias_method :items=, :entries=
2309
2312
 
2310
2313
  # passes missing methods to the cache_object
2311
2314
  def method_missing(msg, *params)