feedtools 0.2.18 → 0.2.19

Sign up to get free protection for your applications and to get access to all the features.
@@ -29,9 +29,11 @@ module FeedTools
29
29
  # :stopdoc:
30
30
  include REXML
31
31
  class << self
32
- include GenericHelper
32
+ include FeedTools::GenericHelper
33
33
  private :validate_options
34
34
  end
35
+ include FeedTools::GenericHelper
36
+ private :validate_options
35
37
  # :startdoc:
36
38
 
37
39
  # Represents a feed/feed item's category
@@ -143,7 +145,7 @@ module FeedTools
143
145
  @link = nil
144
146
  @last_retrieved = nil
145
147
  @time_to_live = nil
146
- @items = nil
148
+ @entries = nil
147
149
  @live = false
148
150
  end
149
151
 
@@ -175,10 +177,12 @@ module FeedTools
175
177
  # Loads the feed from the remote url if the feed has expired from the cache or cannot be
176
178
  # retrieved from the cache for some reason.
177
179
  def update!
178
- if self.http_headers.nil? && !(self.cache_object.nil?) &&
180
+ if self.http_headers.blank? && !(self.cache_object.nil?) &&
179
181
  !(self.cache_object.http_headers.nil?)
180
182
  @http_headers = YAML.load(self.cache_object.http_headers)
181
183
  @http_headers = {} unless @http_headers.kind_of? Hash
184
+ elsif self.http_headers.blank?
185
+ @http_headers = {}
182
186
  end
183
187
  if self.expired? == false
184
188
  @live = false
@@ -240,11 +244,12 @@ module FeedTools
240
244
  self.http_headers['last-modified'] unless
241
245
  self.http_headers['last-modified'].nil?
242
246
  end
243
- headers["User-Agent"] =
244
- FeedTools.user_agent unless FeedTools.user_agent.nil?
247
+ unless FeedTools.configurations[:user_agent].nil?
248
+ headers["User-Agent"] = FeedTools.configurations[:user_agent]
249
+ end
245
250
 
246
251
  # The http feed access method
247
- http_fetch = lambda do |feed_url, http_headers, redirect_limit,
252
+ http_fetch = lambda do |feed_url, request_headers, redirect_limit,
248
253
  response_chain, no_headers|
249
254
  raise FeedAccessError, 'Redirect too deep' if redirect_limit == 0
250
255
  feed_uri = nil
@@ -256,11 +261,13 @@ module FeedTools
256
261
  end
257
262
 
258
263
  begin
259
- Net::HTTP.start(feed_uri.host, (feed_uri.port or 80)) do |http|
264
+ # TODO: Proxy host and proxy port would go here if implemented
265
+ http = Net::HTTP.new(feed_uri.host, (feed_uri.port or 80))
266
+ http.start do
260
267
  final_uri = feed_uri.path
261
268
  final_uri += ('?' + feed_uri.query) if feed_uri.query
262
- http_headers = {} if no_headers
263
- response = http.request_get(final_uri, http_headers)
269
+ request_headers = {} if no_headers
270
+ response = http.request_get(final_uri, request_headers)
264
271
 
265
272
  case response
266
273
  when Net::HTTPSuccess
@@ -289,7 +296,8 @@ module FeedTools
289
296
  response_chain << [feed_url, response]
290
297
  new_location = response['location']
291
298
  if response_chain.assoc(new_location) != nil
292
- raise FeedAccessError, "Redirection loop detected: #{new_location}"
299
+ raise FeedAccessError,
300
+ "Redirection loop detected: #{new_location}"
293
301
  end
294
302
 
295
303
  # Find out if we've already seen the url we've been
@@ -300,13 +308,21 @@ module FeedTools
300
308
  :cache_only => true)
301
309
  if cached_feed.cache_object != nil &&
302
310
  cached_feed.cache_object.new_record? != true
303
- unless cached_feed.expired?
304
- # Copy the cached state, starting with the url
311
+ if !cached_feed.expired? &&
312
+ !cached_feed.http_headers.blank?
313
+ # Copy the cached state
305
314
  self.url = cached_feed.url
306
- self.title = cached_feed.title
307
- self.link = cached_feed.link
308
- self.feed_data = cached_feed.feed_data
309
- self.feed_data_type = cached_feed.feed_data_type
315
+
316
+ @feed_data = cached_feed.feed_data
317
+ @feed_data_type = cached_feed.feed_data_type
318
+
319
+ if @feed_data.blank?
320
+ raise "Invalid cache data."
321
+ end
322
+
323
+ @title = nil; self.title
324
+ @link = nil; self.link
325
+
310
326
  self.last_retrieved = cached_feed.last_retrieved
311
327
  self.http_headers = cached_feed.http_headers
312
328
  self.cache_object = cached_feed.cache_object
@@ -342,6 +358,10 @@ module FeedTools
342
358
  raise FeedAccessError, 'Socket error prevented feed retrieval'
343
359
  rescue Timeout::Error
344
360
  raise FeedAccessError, 'Timeout while attempting to retrieve feed'
361
+ rescue Errno::ENETUNREACH
362
+ raise FeedAccessError, 'Network was unreachable'
363
+ rescue Errno::ECONNRESET
364
+ raise FeedAccessError, 'Connection was reset by peer'
345
365
  end
346
366
  end
347
367
 
@@ -375,12 +395,12 @@ module FeedTools
375
395
  end
376
396
  end
377
397
  unless @http_response.kind_of? Net::HTTPRedirection
398
+ @feed_data = self.http_response.body
378
399
  @http_headers = {}
379
400
  self.http_response.each_header do |key, value|
380
401
  self.http_headers[key.downcase] = value
381
402
  end
382
403
  self.last_retrieved = Time.now.gmtime
383
- self.feed_data = self.http_response.body
384
404
  end
385
405
  rescue FeedAccessError
386
406
  @live = false
@@ -451,9 +471,9 @@ module FeedTools
451
471
  open(file_name) do |file|
452
472
  @http_response = nil
453
473
  @http_headers = {}
474
+ @feed_data = file.read
475
+ @feed_data_type = :xml
454
476
  self.last_retrieved = Time.now.gmtime
455
- self.feed_data = file.read
456
- self.feed_data_type = :xml
457
477
  end
458
478
  rescue
459
479
  @live = false
@@ -478,8 +498,71 @@ module FeedTools
478
498
 
479
499
  # Returns a hash of the http headers from the response.
480
500
  def http_headers
501
+ if @http_headers.blank?
502
+ if !self.cache_object.nil? && !self.cache_object.http_headers.nil?
503
+ @http_headers = YAML.load(self.cache_object.http_headers)
504
+ @http_headers = {} unless @http_headers.kind_of? Hash
505
+ else
506
+ @http_headers = {}
507
+ end
508
+ end
481
509
  return @http_headers
482
510
  end
511
+
512
+ # Returns the encoding that the feed was parsed with
513
+ def encoding
514
+ if @encoding.nil?
515
+ unless self.http_headers.blank?
516
+ @encoding = "utf-8"
517
+ else
518
+ @encoding = self.encoding_from_xml_data
519
+ end
520
+ end
521
+ return @encoding
522
+ end
523
+
524
+ # Returns the encoding of feed calculated only from the xml data.
525
+ # I.e., the encoding we would come up with if we ignore RFC 3023.
526
+ def encoding_from_xml_data
527
+ if @encoding_from_xml_data.nil?
528
+ raw_data = self.feed_data
529
+ encoding_from_xml_instruct =
530
+ raw_data.scan(
531
+ /^<\?xml [^>]*encoding="([\w]*)"[^>]*\?>/
532
+ ).flatten.first
533
+ unless encoding_from_xml_instruct.blank?
534
+ encoding_from_xml_instruct.downcase!
535
+ end
536
+ if encoding_from_xml_instruct.blank?
537
+ doc = Document.new(raw_data)
538
+ encoding_from_xml_instruct = doc.encoding.downcase
539
+ if encoding_from_xml_instruct == "utf-8"
540
+ # REXML has a tendency to report utf-8 overzealously, take with
541
+ # grain of salt
542
+ encoding_from_xml_instruct = nil
543
+ end
544
+ else
545
+ @encoding_from_xml_data = encoding_from_xml_instruct
546
+ end
547
+ if encoding_from_xml_instruct.blank?
548
+ sniff_table = {
549
+ "Lo\247\224" => "ebcdic-cp-us",
550
+ "<?xm" => "utf-8"
551
+ }
552
+ sniff = self.feed_data[0..3]
553
+ if sniff_table[sniff] != nil
554
+ @encoding_from_xml_data = sniff_table[sniff].downcase
555
+ end
556
+ else
557
+ @encoding_from_xml_data = encoding_from_xml_instruct
558
+ end
559
+ if @encoding_from_xml_data.blank?
560
+ # Safest assumption
561
+ @encoding_from_xml_data = "utf-8"
562
+ end
563
+ end
564
+ return @encoding_from_xml_data
565
+ end
483
566
 
484
567
  # Returns the feed's raw data.
485
568
  def feed_data
@@ -493,12 +576,40 @@ module FeedTools
493
576
 
494
577
  # Sets the feed's data.
495
578
  def feed_data=(new_feed_data)
579
+ @http_headers = {}
580
+ @cache_object = nil
581
+ @url = nil
582
+ @id = nil
583
+ @encoding = nil
496
584
  @feed_data = new_feed_data
497
585
  unless self.cache_object.nil?
498
586
  self.cache_object.feed_data = new_feed_data
499
587
  end
500
588
  end
501
589
 
590
+ # Returns the feed's raw data as utf-8.
591
+ def feed_data_utf_8(force_encoding=nil)
592
+ if @feed_data_utf_8.nil?
593
+ raw_data = self.feed_data
594
+ if force_encoding.nil?
595
+ use_encoding = self.encoding
596
+ else
597
+ use_encoding = force_encoding
598
+ end
599
+ if use_encoding != "utf-8"
600
+ begin
601
+ @feed_data_utf_8 =
602
+ Iconv.new('utf-8', use_encoding).iconv(raw_data)
603
+ rescue
604
+ return raw_data
605
+ end
606
+ else
607
+ return self.feed_data
608
+ end
609
+ end
610
+ return @feed_data_utf_8
611
+ end
612
+
502
613
  # Returns the data type of the feed
503
614
  # Possible values:
504
615
  # * :xml
@@ -526,24 +637,15 @@ module FeedTools
526
637
  @xml_doc = nil
527
638
  else
528
639
  if @xml_doc.nil?
529
- # INQUIRY: Is there any way of saying "dude, rescue *everything*"?
530
640
  begin
531
641
  begin
532
- # TODO: :ignore_whitespace_nodes => :all
533
- # Add that?
534
- # ======================================
535
- @xml_doc = Document.new(feed_data)
536
- rescue Exception
537
- # Something failed especially badly, attempt to repair the
538
- # xml with htree.
539
- @xml_doc = HTree.parse(feed_data).to_rexml
540
- rescue
642
+ @xml_doc = Document.new(self.feed_data_utf_8,
643
+ :ignore_whitespace_nodes => :all)
644
+ rescue Object
541
645
  # Something failed, attempt to repair the xml with htree.
542
- @xml_doc = HTree.parse(feed_data).to_rexml
646
+ @xml_doc = HTree.parse(self.feed_data_utf_8).to_rexml
543
647
  end
544
- rescue Exception
545
- @xml_doc = nil
546
- rescue
648
+ rescue Object
547
649
  @xml_doc = nil
548
650
  end
549
651
  end
@@ -551,14 +653,23 @@ module FeedTools
551
653
  return @xml_doc
552
654
  end
553
655
 
554
- # Returns the first node within the channel_node that matches the xpath query.
555
- def find_node(xpath)
556
- return XPath.first(channel_node, xpath)
656
+ # Returns the first node within the channel_node that matches the xpath
657
+ # query.
658
+ def find_node(xpath, select_result_value=false)
659
+ if self.feed_data_type != :xml
660
+ raise "The feed data type is not xml."
661
+ end
662
+ return try_xpaths(self.channel_node, [xpath],
663
+ :select_result_value => select_result_value)
557
664
  end
558
665
 
559
666
  # Returns all nodes within the channel_node that match the xpath query.
560
- def find_all_nodes(xpath)
561
- return XPath.match(channel_node, xpath)
667
+ def find_all_nodes(xpath, select_result_value=false)
668
+ if self.feed_data_type != :xml
669
+ raise "The feed data type is not xml."
670
+ end
671
+ return try_xpaths_all(self.channel_node, [xpath],
672
+ :select_result_value => select_result_value)
562
673
  end
563
674
 
564
675
  # Returns the root node of the feed.
@@ -568,7 +679,15 @@ module FeedTools
568
679
  # break this stuff.
569
680
  # E.g.: http://smogzer.tripod.com/smog.rdf
570
681
  # ===================================================================
571
- @root_node = xml.root
682
+ begin
683
+ if xml.nil?
684
+ return nil
685
+ else
686
+ @root_node = xml.root
687
+ end
688
+ rescue
689
+ return nil
690
+ end
572
691
  end
573
692
  return @root_node
574
693
  end
@@ -576,13 +695,11 @@ module FeedTools
576
695
  # Returns the channel node of the feed.
577
696
  def channel_node
578
697
  if @channel_node.nil? && root_node != nil
579
- @channel_node = XPath.first(root_node, "channel")
580
- if @channel_node == nil
581
- @channel_node = XPath.first(root_node, "CHANNEL")
582
- end
583
- if @channel_node == nil
584
- @channel_node = XPath.first(root_node, "feedinfo")
585
- end
698
+ @channel_node = try_xpaths(root_node, [
699
+ "channel",
700
+ "CHANNEL",
701
+ "feedinfo"
702
+ ])
586
703
  if @channel_node == nil
587
704
  @channel_node = root_node
588
705
  end
@@ -592,12 +709,13 @@ module FeedTools
592
709
 
593
710
  # The cache object that handles the feed persistence.
594
711
  def cache_object
712
+ if !@url.nil? && @url =~ /^file:\/\//
713
+ return nil
714
+ end
595
715
  unless FeedTools.feed_cache.nil?
596
716
  if @cache_object.nil?
597
717
  begin
598
- if @id != nil
599
- @cache_object = FeedTools.feed_cache.find_by_id(@id)
600
- elsif @url != nil
718
+ if @url != nil
601
719
  @cache_object = FeedTools.feed_cache.find_by_url(@url)
602
720
  end
603
721
  if @cache_object.nil?
@@ -719,43 +837,22 @@ module FeedTools
719
837
  # Returns the feed's unique id
720
838
  def id
721
839
  if @id.nil?
722
- unless channel_node.nil?
723
- @id = XPath.first(channel_node, "id/text()").to_s
724
- if @id == ""
725
- @id = XPath.first(channel_node, "atom10:id/text()",
726
- FEED_TOOLS_NAMESPACES).to_s
727
- end
728
- if @id == ""
729
- @id = XPath.first(channel_node, "atom03:id/text()",
730
- FEED_TOOLS_NAMESPACES).to_s
731
- end
732
- if @id == ""
733
- @id = XPath.first(channel_node, "atom:id/text()").to_s
734
- end
735
- if @id == ""
736
- @id = XPath.first(channel_node, "guid/text()").to_s
737
- end
738
- end
739
- unless root_node.nil?
740
- if @id == "" || @id.nil?
741
- @id = XPath.first(root_node, "id/text()").to_s
742
- end
743
- if @id == ""
744
- @id = XPath.first(channel_node, "atom10:id/text()",
745
- FEED_TOOLS_NAMESPACES).to_s
746
- end
747
- if @id == ""
748
- @id = XPath.first(channel_node, "atom03:id/text()",
749
- FEED_TOOLS_NAMESPACES).to_s
750
- end
751
- if @id == ""
752
- @id = XPath.first(channel_node, "atom:id/text()").to_s
753
- end
754
- if @id == ""
755
- @id = XPath.first(root_node, "guid/text()").to_s
756
- end
757
- end
758
- @id = nil if @id == ""
840
+ @id = select_not_blank([
841
+ try_xpaths(self.channel_node, [
842
+ "atom10:id/text()",
843
+ "atom03:id/text()",
844
+ "atom:id/text()",
845
+ "id/text()",
846
+ "guid/text()"
847
+ ], :select_result_value => true),
848
+ try_xpaths(self.root_node, [
849
+ "atom10:id/text()",
850
+ "atom03:id/text()",
851
+ "atom:id/text()",
852
+ "id/text()",
853
+ "guid/text()"
854
+ ], :select_result_value => true)
855
+ ])
759
856
  end
760
857
  return @id
761
858
  end
@@ -768,12 +865,12 @@ module FeedTools
768
865
  # Returns the feed url.
769
866
  def url
770
867
  original_url = @url
771
- override_url = lambda do
868
+ override_url = lambda do |result|
772
869
  begin
773
- if @url == nil && self.feed_data != nil
870
+ if result.nil? && self.feed_data != nil
774
871
  true
775
- elsif @url != nil &&
776
- !(["http", "https"].include?(URI.parse(@url).scheme))
872
+ elsif result != nil &&
873
+ !(["http", "https"].include?(URI.parse(result.to_s).scheme))
777
874
  if self.feed_data != nil
778
875
  true
779
876
  else
@@ -786,47 +883,32 @@ module FeedTools
786
883
  true
787
884
  end
788
885
  end
789
- if override_url.call
790
- @url = XPath.first(channel_node, "link[@rel='self']/@href").to_s
791
- @url = nil if @url == ""
792
- if override_url.call
793
- @url = XPath.first(channel_node, "atom:link[@rel='self']/@href").to_s
794
- @url = nil if @url == ""
795
- end
796
- if override_url.call
797
- @url = XPath.first(channel_node, "atom10:link[@rel='self']/@href",
798
- FEED_TOOLS_NAMESPACES).to_s
799
- @url = nil if @url == ""
800
- end
801
- if override_url.call
802
- @url = XPath.first(channel_node, "atom03:link[@rel='self']/@href",
803
- FEED_TOOLS_NAMESPACES).to_s
804
- @url = nil if @url == ""
805
- end
806
- if override_url.call
807
- @url = XPath.first(channel_node, "admin:feed/@rdf:resource").to_s
808
- @url = nil if @url == ""
809
- end
810
- if override_url.call
811
- @url = XPath.first(channel_node, "admin:feed/@rdf:resource",
812
- FEED_TOOLS_NAMESPACES).to_s
813
- @url = nil if @url == ""
814
- end
815
- if override_url.call
816
- @url = XPath.first(channel_node, "admin:feed/@resource").to_s
817
- @url = nil if @url == ""
818
- end
819
- if override_url.call
820
- @url = XPath.first(channel_node, "feed/@rdf:resource").to_s
821
- @url = nil if @url == ""
822
- end
823
- if override_url.call
824
- @url = XPath.first(channel_node, "feed/@resource").to_s
825
- @url = nil if @url == ""
826
- end
886
+ if override_url.call(@url)
887
+ # rdf:about is ordered last because a lot of people accidentally
888
+ # put the link in that field instead of the url to the feed.
889
+ # Ordering it last gives them as many chances as humanly possible
890
+ # for them to redeem themselves. If the link turns out to be the
891
+ @url = try_xpaths(self.channel_node, [
892
+ "link[@rel='self']/@href",
893
+ "atom10:link[@rel='self']/@href",
894
+ "atom03:link[@rel='self']/@href",
895
+ "atom:link[@rel='self']/@href",
896
+ "admin:feed/@rdf:resource",
897
+ "admin:feed/@resource",
898
+ "feed/@rdf:resource",
899
+ "feed/@resource",
900
+ "@rdf:about",
901
+ "@about"
902
+ ], :select_result_value => true) do |result|
903
+ override_url.call(FeedTools.normalize_url(result))
904
+ end
905
+ @url = FeedTools.normalize_url(@url)
827
906
  if @url == nil
828
907
  @url = original_url
829
908
  end
909
+ if @url == self.link
910
+ @url = original_url
911
+ end
830
912
  end
831
913
  return @url
832
914
  end
@@ -840,37 +922,23 @@ module FeedTools
840
922
  # Returns the feed title
841
923
  def title
842
924
  if @title.nil?
843
- unless channel_node.nil?
844
- repair_entities = false
845
- title_node = XPath.first(channel_node, "atom10:title",
846
- FEED_TOOLS_NAMESPACES)
847
- if title_node.nil?
848
- title_node = XPath.first(channel_node, "title")
849
- end
850
- if title_node.nil?
851
- title_node = XPath.first(channel_node, "atom03:title",
852
- FEED_TOOLS_NAMESPACES)
853
- end
854
- if title_node.nil?
855
- title_node = XPath.first(channel_node, "atom:title")
856
- end
857
- if title_node.nil?
858
- title_node = XPath.first(channel_node, "dc:title",
859
- FEED_TOOLS_NAMESPACES)
860
- end
861
- if title_node.nil?
862
- title_node = XPath.first(channel_node, "dc:title")
863
- end
864
- if title_node.nil?
865
- title_node = XPath.first(channel_node, "TITLE")
866
- end
867
- end
925
+ repair_entities = false
926
+ title_node = try_xpaths(self.channel_node, [
927
+ "atom10:title",
928
+ "atom03:title",
929
+ "atom:title",
930
+ "title",
931
+ "dc:title"
932
+ ])
868
933
  if title_node.nil?
869
934
  return nil
870
935
  end
871
- title_type = XPath.first(title_node, "@type").to_s
872
- title_mode = XPath.first(title_node, "@mode").to_s
873
- title_encoding = XPath.first(title_node, "@encoding").to_s
936
+ title_type = try_xpaths(title_node, "@type",
937
+ :select_result_value => true)
938
+ title_mode = try_xpaths(title_node, "@mode",
939
+ :select_result_value => true)
940
+ title_encoding = try_xpaths(title_node, "@encoding",
941
+ :select_result_value => true)
874
942
 
875
943
  # Note that we're checking for misuse of type, mode and encoding here
876
944
  if title_type == "base64" || title_mode == "base64" ||
@@ -895,7 +963,7 @@ module FeedTools
895
963
  @title.gsub!(/>\n</, "><")
896
964
  @title.gsub!(/\n/, " ")
897
965
  @title.strip!
898
- @title = nil if @title == ""
966
+ @title = nil if @title.blank?
899
967
  self.cache_object.title = @title unless self.cache_object.nil?
900
968
  end
901
969
  return @title
@@ -907,124 +975,98 @@ module FeedTools
907
975
  self.cache_object.title = new_title unless self.cache_object.nil?
908
976
  end
909
977
 
910
- # Returns the feed description
911
- def description
912
- if @description.nil?
913
- unless channel_node.nil?
914
- repair_entities = false
915
- description_node = XPath.first(channel_node, "description")
916
- if description_node.nil?
917
- description_node = XPath.first(channel_node, "tagline")
918
- end
919
- if description_node.nil?
920
- description_node = XPath.first(channel_node, "subtitle")
921
- end
922
- if description_node.nil?
923
- description_node = XPath.first(channel_node, "summary")
924
- end
925
- if description_node.nil?
926
- description_node = XPath.first(channel_node, "abstract")
927
- end
928
- if description_node.nil?
929
- description_node = XPath.first(channel_node, "ABSTRACT")
930
- end
931
- if description_node.nil?
932
- description_node = XPath.first(channel_node, "info")
933
- end
934
- if description_node.nil?
935
- description_node = XPath.first(channel_node, "content:encoded")
936
- end
937
- if description_node.nil?
938
- description_node = XPath.first(channel_node, "content:encoded",
939
- FEED_TOOLS_NAMESPACES)
940
- end
941
- if description_node.nil?
942
- description_node = XPath.first(root_node, "encoded")
943
- end
944
- if description_node.nil?
945
- description_node = XPath.first(channel_node, "content")
946
- end
947
- if description_node.nil?
948
- description_node = XPath.first(channel_node, "xhtml:body")
949
- end
950
- if description_node.nil?
951
- description_node = XPath.first(channel_node, "body")
952
- end
953
- if description_node.nil?
954
- description_node = XPath.first(channel_node, "blurb")
955
- end
956
- end
957
- if description_node.nil?
978
+ # Returns the feed subtitle
979
+ def subtitle
980
+ if @subtitle.nil?
981
+ repair_entities = false
982
+ subtitle_node = try_xpaths(self.channel_node, [
983
+ "atom10:subtitle",
984
+ "subtitle",
985
+ "atom03:tagline",
986
+ "tagline",
987
+ "description",
988
+ "summary",
989
+ "abstract",
990
+ "ABSTRACT",
991
+ "content:encoded",
992
+ "encoded",
993
+ "content",
994
+ "xhtml:body",
995
+ "body",
996
+ "blurb",
997
+ "info"
998
+ ])
999
+ if subtitle_node.nil?
958
1000
  return nil
959
1001
  end
960
- description_type = XPath.first(description_node, "@type").to_s
961
- description_mode = XPath.first(description_node, "@mode").to_s
962
- description_encoding = XPath.first(description_node, "@encoding").to_s
1002
+ subtitle_type = try_xpaths(subtitle_node, "@type",
1003
+ :select_result_value => true)
1004
+ subtitle_mode = try_xpaths(subtitle_node, "@mode",
1005
+ :select_result_value => true)
1006
+ subtitle_encoding = try_xpaths(subtitle_node, "@encoding",
1007
+ :select_result_value => true)
963
1008
 
964
1009
  # Note that we're checking for misuse of type, mode and encoding here
965
- if description_encoding != ""
966
- @description =
1010
+ if !subtitle_encoding.blank?
1011
+ @subtitle =
967
1012
  "[Embedded data objects are not currently supported.]"
968
- elsif description_node.cdatas.size > 0
969
- @description = description_node.cdatas.first.value
970
- elsif description_type == "base64" || description_mode == "base64" ||
971
- description_encoding == "base64"
972
- @description = Base64.decode64(description_node.inner_xml.strip)
973
- elsif description_type == "xhtml" || description_mode == "xhtml" ||
974
- description_type == "xml" || description_mode == "xml" ||
975
- description_type == "application/xhtml+xml"
976
- @description = description_node.inner_xml
977
- elsif description_type == "escaped" || description_mode == "escaped"
978
- @description = FeedTools.unescape_entities(
979
- description_node.inner_xml)
1013
+ elsif subtitle_node.cdatas.size > 0
1014
+ @subtitle = subtitle_node.cdatas.first.value
1015
+ elsif subtitle_type == "base64" || subtitle_mode == "base64" ||
1016
+ subtitle_encoding == "base64"
1017
+ @subtitle = Base64.decode64(subtitle_node.inner_xml.strip)
1018
+ elsif subtitle_type == "xhtml" || subtitle_mode == "xhtml" ||
1019
+ subtitle_type == "xml" || subtitle_mode == "xml" ||
1020
+ subtitle_type == "application/xhtml+xml"
1021
+ @subtitle = subtitle_node.inner_xml
1022
+ elsif subtitle_type == "escaped" || subtitle_mode == "escaped"
1023
+ @subtitle = FeedTools.unescape_entities(
1024
+ subtitle_node.inner_xml)
980
1025
  else
981
- @description = description_node.inner_xml
1026
+ @subtitle = subtitle_node.inner_xml
982
1027
  repair_entities = true
983
1028
  end
984
- if @description == ""
985
- @description = self.itunes_summary
986
- @description = "" if @description.nil?
1029
+ if @subtitle.blank?
1030
+ @subtitle = self.itunes_summary
987
1031
  end
988
- if @description == ""
989
- @description = self.itunes_subtitle
990
- @description = "" if @description.nil?
1032
+ if @subtitle.blank?
1033
+ @subtitle = self.itunes_subtitle
991
1034
  end
992
1035
 
993
- unless @description.nil?
994
- @description = FeedTools.sanitize_html(@description, :strip)
995
- @description = FeedTools.unescape_entities(@description) if repair_entities
996
- @description = FeedTools.tidy_html(@description)
1036
+ unless @subtitle.blank?
1037
+ @subtitle = FeedTools.sanitize_html(@subtitle, :strip)
1038
+ @subtitle = FeedTools.unescape_entities(@subtitle) if repair_entities
1039
+ @subtitle = FeedTools.tidy_html(@subtitle)
997
1040
  end
998
1041
 
999
- @description = @description.strip unless @description.nil?
1000
- @description = nil if @description == ""
1042
+ @subtitle = @subtitle.strip unless @subtitle.nil?
1043
+ @subtitle = nil if @subtitle.blank?
1001
1044
  end
1002
- return @description
1045
+ return @subtitle
1003
1046
  end
1004
1047
 
1005
- # Sets the feed description
1006
- def description=(new_description)
1007
- @description = new_description
1048
+ # Sets the feed subtitle
1049
+ def subtitle=(new_subtitle)
1050
+ @subtitle = new_subtitle
1008
1051
  end
1009
1052
 
1010
1053
  # Returns the contents of the itunes:summary element
1011
1054
  def itunes_summary
1012
1055
  if @itunes_summary.nil?
1013
- unless channel_node.nil?
1014
- @itunes_summary = FeedTools.unescape_entities(XPath.first(channel_node,
1015
- "itunes:summary/text()").to_s)
1016
- end
1017
- unless root_node.nil?
1018
- if @itunes_summary == "" || @itunes_summary.nil?
1019
- @itunes_summary = FeedTools.unescape_entities(XPath.first(root_node,
1020
- "itunes:summary/text()").to_s)
1021
- end
1022
- end
1023
- if @itunes_summary == ""
1056
+ @itunes_summary = select_not_blank([
1057
+ try_xpaths(self.channel_node, [
1058
+ "itunes:summary/text()"
1059
+ ]),
1060
+ try_xpaths(self.root_node, [
1061
+ "itunes:summary/text()"
1062
+ ])
1063
+ ])
1064
+ unless @itunes_summary.blank?
1065
+ @itunes_summary = FeedTools.unescape_entities(@itunes_summary)
1066
+ @itunes_summary = FeedTools.sanitize_html(@itunes_summary)
1067
+ else
1024
1068
  @itunes_summary = nil
1025
1069
  end
1026
- @itunes_summary =
1027
- FeedTools.sanitize_html(@itunes_summary) unless @itunes_summary.nil?
1028
1070
  end
1029
1071
  return @itunes_summary
1030
1072
  end
@@ -1037,21 +1079,19 @@ module FeedTools
1037
1079
  # Returns the contents of the itunes:subtitle element
1038
1080
  def itunes_subtitle
1039
1081
  if @itunes_subtitle.nil?
1040
- unless channel_node.nil?
1041
- @itunes_subtitle = FeedTools.unescape_entities(XPath.first(channel_node,
1042
- "itunes:subtitle/text()").to_s)
1043
- end
1044
- unless root_node.nil?
1045
- if @itunes_subtitle == "" || @itunes_subtitle.nil?
1046
- @itunes_subtitle = FeedTools.unescape_entities(XPath.first(root_node,
1047
- "itunes:subtitle/text()").to_s)
1048
- end
1049
- end
1050
- if @itunes_subtitle == ""
1051
- @itunes_subtitle = nil
1052
- end
1053
- unless @itunes_subtitle.nil?
1082
+ @itunes_subtitle = select_not_blank([
1083
+ try_xpaths(self.channel_node, [
1084
+ "itunes:subtitle/text()"
1085
+ ]),
1086
+ try_xpaths(self.root_node, [
1087
+ "itunes:subtitle/text()"
1088
+ ])
1089
+ ])
1090
+ unless @itunes_subtitle.blank?
1091
+ @itunes_subtitle = FeedTools.unescape_entities(@itunes_subtitle)
1054
1092
  @itunes_subtitle = FeedTools.sanitize_html(@itunes_subtitle)
1093
+ else
1094
+ @itunes_subtitle = nil
1055
1095
  end
1056
1096
  end
1057
1097
  return @itunes_subtitle
@@ -1065,43 +1105,80 @@ module FeedTools
1065
1105
  # Returns the feed link
1066
1106
  def link
1067
1107
  if @link.nil?
1068
- unless channel_node.nil?
1069
- # get the feed link from the xml document
1070
- @link = XPath.first(channel_node, "link[@rel='alternate' @type='text/html']/@href").to_s
1071
- if @link == ""
1072
- @link = XPath.first(channel_node, "link[@rel='alternate']/@href").to_s
1073
- end
1074
- if @link == ""
1075
- @link = XPath.first(channel_node, "link/@href").to_s
1076
- end
1077
- if @link == ""
1078
- @link = XPath.first(channel_node, "link/text()").to_s
1079
- end
1080
- if @link == ""
1081
- @link = XPath.first(channel_node, "@href").to_s
1082
- end
1083
- if @link == ""
1084
- @link = XPath.first(channel_node, "@HREF").to_s
1085
- end
1086
- if @link == ""
1087
- @link = XPath.first(channel_node, "a/@href").to_s
1088
- end
1089
- if @link == ""
1090
- @link = XPath.first(channel_node, "A/@HREF").to_s
1091
- end
1092
- end
1093
- if @link == "" || @link.nil?
1094
- if FeedTools.is_uri? self.guid
1108
+ @link = try_xpaths(self.channel_node, [
1109
+ "atom10:link[@type='application/xhtml+xml']/@href",
1110
+ "atom10:link[@type='text/html']/@href",
1111
+ "atom10:link[@rel='alternate']/@href",
1112
+ "atom03:link[@type='application/xhtml+xml']/@href",
1113
+ "atom03:link[@type='text/html']/@href",
1114
+ "atom03:link[@rel='alternate']/@href",
1115
+ "atom:link[@type='application/xhtml+xml']/@href",
1116
+ "atom:link[@type='text/html']/@href",
1117
+ "atom:link[@rel='alternate']/@href",
1118
+ "link[@type='application/xhtml+xml']/@href",
1119
+ "link[@type='text/html']/@href",
1120
+ "link[@rel='alternate']/@href",
1121
+ "link/text()",
1122
+ "@href",
1123
+ "a/@href"
1124
+ ], :select_result_value => true)
1125
+ if @link.blank?
1126
+ if FeedTools.is_uri?(self.guid)
1095
1127
  @link = self.guid
1096
1128
  end
1097
1129
  end
1098
- if @link == "" && channel_node != nil
1099
- # Technically, we shouldn't use the base attribute for this, but if the href attribute
1100
- # is missing, it's already a given that we're looking at a messed up CDF file. We can
1101
- # always pray it's correct.
1130
+ if @link.blank? && channel_node != nil
1131
+ # Technically, we shouldn't use the base attribute for this, but
1132
+ # if the href attribute is missing, it's already a given that we're
1133
+ # looking at a messed up CDF file. We can always pray it's correct.
1102
1134
  @link = XPath.first(channel_node, "@base").to_s
1103
1135
  end
1104
- @link = FeedTools.normalize_url(@link)
1136
+ if !@link.blank?
1137
+ @link = FeedTools.unescape_entities(@link)
1138
+ end
1139
+ if @link.blank?
1140
+ link_node = try_xpaths(self.channel_node, [
1141
+ "atom10:link",
1142
+ "atom03:link",
1143
+ "atom:link",
1144
+ "link"
1145
+ ])
1146
+ if link_node != nil
1147
+ if link_node.attributes['type'].to_s =~ /^image/ ||
1148
+ link_node.attributes['type'].to_s =~ /^application/ ||
1149
+ link_node.attributes['type'].to_s =~ /xml/ ||
1150
+ link_node.attributes['rel'].to_s =~ /self/
1151
+ for child in self.channel_node
1152
+ if child.class == REXML::Element
1153
+ if child.name.downcase == "link"
1154
+ if child.attributes['type'].to_s =~ /^image/ ||
1155
+ child.attributes['type'].to_s =~ /^application/ ||
1156
+ child.attributes['type'].to_s =~ /xml/ ||
1157
+ child.attributes['rel'].to_s =~ /self/
1158
+ @link = nil
1159
+ next
1160
+ else
1161
+ @link = child.attributes['href'].to_s
1162
+ if @link.blank?
1163
+ @link = child.inner_xml
1164
+ end
1165
+ if @link.blank?
1166
+ next
1167
+ end
1168
+ break
1169
+ end
1170
+ end
1171
+ end
1172
+ end
1173
+ else
1174
+ @link = link_node.attributes['href'].to_s
1175
+ end
1176
+ end
1177
+ end
1178
+ @link = nil if @link.blank?
1179
+ if FeedTools.configurations[:url_normalization_enabled]
1180
+ @link = FeedTools.normalize_url(@link)
1181
+ end
1105
1182
  unless self.cache_object.nil?
1106
1183
  self.cache_object.link = @link
1107
1184
  end
@@ -1118,87 +1195,83 @@ module FeedTools
1118
1195
  end
1119
1196
 
1120
1197
  # Returns the url to the icon file for this feed.
1121
- #
1122
- # This method uses the url from the link field in order to avoid grabbing
1123
- # the favicon for services like feedburner.
1124
1198
  def icon
1125
1199
  if @icon.nil?
1126
- icon_node = XPath.first(channel_node, "link[@rel='icon']")
1127
- if icon_node.nil?
1128
- icon_node = XPath.first(channel_node, "link[@rel='shortcut icon']")
1129
- end
1130
- if icon_node.nil?
1131
- icon_node = XPath.first(channel_node, "link[@type='image/x-icon']")
1132
- end
1133
- if icon_node.nil?
1134
- icon_node = XPath.first(channel_node, "icon")
1135
- end
1136
- if icon_node.nil?
1137
- icon_node = XPath.first(channel_node, "logo[@style='icon']")
1138
- end
1139
- if icon_node.nil?
1140
- icon_node = XPath.first(channel_node, "LOGO[@STYLE='ICON']")
1141
- end
1200
+ icon_node = try_xpaths(self.channel_node, [
1201
+ "link[@rel='icon']",
1202
+ "link[@rel='shortcut icon']",
1203
+ "link[@type='image/x-icon']",
1204
+ "icon",
1205
+ "logo[@style='icon']",
1206
+ "LOGO[@STYLE='ICON']"
1207
+ ])
1142
1208
  unless icon_node.nil?
1143
1209
  @icon = FeedTools.unescape_entities(
1144
1210
  XPath.first(icon_node, "@href").to_s)
1145
- if @icon == ""
1211
+ if @icon.blank?
1146
1212
  @icon = FeedTools.unescape_entities(
1147
1213
  XPath.first(icon_node, "text()").to_s)
1148
1214
  unless FeedTools.is_uri? @icon
1149
- @icon = ""
1215
+ @icon = nil
1150
1216
  end
1151
1217
  end
1152
- if @icon == "" && self.link != nil && self.link != ""
1218
+ @icon = nil if @icon.blank?
1219
+ end
1220
+ end
1221
+ return @icon
1222
+ end
1223
+
1224
+ # Returns the favicon url for this feed.
1225
+ # This method first tries to use the url from the link field instead of
1226
+ # the feed url, in order to avoid grabbing the favicon for services like
1227
+ # feedburner.
1228
+ def favicon
1229
+ if @favicon.nil?
1230
+ if !self.link.blank?
1231
+ begin
1153
1232
  link_uri = URI.parse(FeedTools.normalize_url(self.link))
1154
- @icon =
1155
- link_uri.scheme + "://" + link_uri.host + "/favicon.ico"
1233
+ if link_uri.scheme == "http"
1234
+ @favicon =
1235
+ "http://" + link_uri.host + "/favicon.ico"
1236
+ end
1237
+ rescue
1238
+ @favicon = nil
1239
+ end
1240
+ if @favicon.nil? && !self.url.blank?
1241
+ begin
1242
+ feed_uri = URI.parse(FeedTools.normalize_url(self.url))
1243
+ if feed_uri.scheme == "http"
1244
+ @favicon =
1245
+ "http://" + feed_uri.host + "/favicon.ico"
1246
+ end
1247
+ rescue
1248
+ @favicon = nil
1249
+ end
1156
1250
  end
1157
- @icon = nil if @icon == ""
1251
+ else
1252
+ @favicon = nil
1158
1253
  end
1159
1254
  end
1160
- return @icon
1255
+ return @favicon
1161
1256
  end
1162
1257
 
1163
1258
  # Returns the feed author
1164
1259
  def author
1165
1260
  if @author.nil?
1166
1261
  @author = FeedTools::Feed::Author.new
1167
- unless channel_node.nil?
1168
- author_node = XPath.first(channel_node, "atom10:author",
1169
- FEED_TOOLS_NAMESPACES)
1170
- if author_node.nil?
1171
- author_node = XPath.first(channel_node, "atom03:author",
1172
- FEED_TOOLS_NAMESPACES)
1173
- end
1174
- if author_node.nil?
1175
- author_node = XPath.first(channel_node, "atom:author")
1176
- end
1177
- if author_node.nil?
1178
- author_node = XPath.first(channel_node, "author")
1179
- end
1180
- if author_node.nil?
1181
- author_node = XPath.first(channel_node, "managingEditor")
1182
- end
1183
- if author_node.nil?
1184
- author_node = XPath.first(channel_node, "dc:author",
1185
- FEED_TOOLS_NAMESPACES)
1186
- end
1187
- if author_node.nil?
1188
- author_node = XPath.first(channel_node, "dc:author")
1189
- end
1190
- if author_node.nil?
1191
- author_node = XPath.first(channel_node, "dc:creator",
1192
- FEED_TOOLS_NAMESPACES)
1193
- end
1194
- if author_node.nil?
1195
- author_node = XPath.first(channel_node, "dc:creator")
1196
- end
1197
- end
1262
+ author_node = try_xpaths(self.channel_node, [
1263
+ "atom10:author",
1264
+ "atom03:author",
1265
+ "atom:author",
1266
+ "author",
1267
+ "managingEditor",
1268
+ "dc:author",
1269
+ "dc:creator"
1270
+ ])
1198
1271
  unless author_node.nil?
1199
1272
  @author.raw = FeedTools.unescape_entities(
1200
- XPath.first(author_node, "text()").to_s)
1201
- @author.raw = nil if @author.raw == ""
1273
+ XPath.first(author_node, "text()").to_s).strip
1274
+ @author.raw = nil if @author.raw.blank?
1202
1275
  unless @author.raw.nil?
1203
1276
  raw_scan = @author.raw.scan(
1204
1277
  /(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
@@ -1229,35 +1302,37 @@ module FeedTools
1229
1302
  end
1230
1303
  end
1231
1304
  end
1232
- @author.name = "" if @author.name.nil?
1233
- if @author.name == ""
1305
+ if @author.name.blank?
1234
1306
  @author.name = FeedTools.unescape_entities(
1235
- XPath.first(author_node, "name/text()").to_s)
1236
- end
1237
- if @author.name == ""
1238
- @author.name = FeedTools.unescape_entities(
1239
- XPath.first(author_node, "@name").to_s)
1240
- end
1241
- if @author.email == ""
1242
- @author.email = FeedTools.unescape_entities(
1243
- XPath.first(author_node, "email/text()").to_s)
1307
+ try_xpaths(author_node, [
1308
+ "name/text()",
1309
+ "@name"
1310
+ ], :select_result_value => true)
1311
+ )
1244
1312
  end
1245
- if @author.email == ""
1313
+ if @author.email.blank?
1246
1314
  @author.email = FeedTools.unescape_entities(
1247
- XPath.first(author_node, "@email").to_s)
1315
+ try_xpaths(author_node, [
1316
+ "email/text()",
1317
+ "@email"
1318
+ ], :select_result_value => true)
1319
+ )
1248
1320
  end
1249
- if @author.url == ""
1321
+ if @author.url.blank?
1250
1322
  @author.url = FeedTools.unescape_entities(
1251
- XPath.first(author_node, "url/text()").to_s)
1252
- end
1253
- if @author.url == ""
1254
- @author.url = FeedTools.unescape_entities(
1255
- XPath.first(author_node, "@url").to_s)
1256
- end
1257
- @author.name = nil if @author.name == ""
1258
- @author.raw = nil if @author.raw == ""
1259
- @author.email = nil if @author.email == ""
1260
- @author.url = nil if @author.url == ""
1323
+ try_xpaths(author_node, [
1324
+ "url/text()",
1325
+ "uri/text()",
1326
+ "@url",
1327
+ "@uri",
1328
+ "@href"
1329
+ ], :select_result_value => true)
1330
+ )
1331
+ end
1332
+ @author.name = nil if @author.name.blank?
1333
+ @author.raw = nil if @author.raw.blank?
1334
+ @author.email = nil if @author.email.blank?
1335
+ @author.url = nil if @author.url.blank?
1261
1336
  end
1262
1337
  # Fallback on the itunes module if we didn't find an author name
1263
1338
  begin
@@ -1290,15 +1365,14 @@ module FeedTools
1290
1365
  def publisher
1291
1366
  if @publisher.nil?
1292
1367
  @publisher = FeedTools::Feed::Author.new
1368
+ publisher_node = try_xpaths(self.channel_node, [
1369
+ "webMaster/text()",
1370
+ "dc:publisher/text()"
1371
+ ])
1293
1372
 
1294
1373
  # Set the author name
1295
- @publisher.raw = FeedTools.unescape_entities(
1296
- XPath.first(channel_node, "dc:publisher/text()").to_s)
1297
- if @publisher.raw == ""
1298
- @publisher.raw = FeedTools.unescape_entities(
1299
- XPath.first(channel_node, "webMaster/text()").to_s)
1300
- end
1301
- unless @publisher.raw == ""
1374
+ @publisher.raw = FeedTools.unescape_entities(publisher_node.to_s)
1375
+ unless @publisher.raw.blank?
1302
1376
  raw_scan = @publisher.raw.scan(
1303
1377
  /(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
1304
1378
  if raw_scan.nil? || raw_scan.size == 0
@@ -1331,10 +1405,10 @@ module FeedTools
1331
1405
  end
1332
1406
  end
1333
1407
 
1334
- @publisher.name = nil if @publisher.name == ""
1335
- @publisher.raw = nil if @publisher.raw == ""
1336
- @publisher.email = nil if @publisher.email == ""
1337
- @publisher.url = nil if @publisher.url == ""
1408
+ @publisher.name = nil if @publisher.name.blank?
1409
+ @publisher.raw = nil if @publisher.raw.blank?
1410
+ @publisher.email = nil if @publisher.email.blank?
1411
+ @publisher.url = nil if @publisher.url.blank?
1338
1412
  end
1339
1413
  return @publisher
1340
1414
  end
@@ -1364,9 +1438,12 @@ module FeedTools
1364
1438
  # attribute.
1365
1439
  def itunes_author
1366
1440
  if @itunes_author.nil?
1367
- @itunes_author = FeedTools.unescape_entities(XPath.first(channel_node,
1368
- "itunes:author/text()").to_s)
1369
- @itunes_author = nil if @itunes_author == ""
1441
+ @itunes_author = FeedTools.unescape_entities(
1442
+ try_xpaths(self.channel_node, [
1443
+ "itunes:author/text()"
1444
+ ], :select_result_value => true)
1445
+ )
1446
+ @itunes_author = nil if @itunes_author.blank?
1370
1447
  end
1371
1448
  return @itunes_author
1372
1449
  end
@@ -1374,29 +1451,40 @@ module FeedTools
1374
1451
  # Returns the feed time
1375
1452
  def time
1376
1453
  if @time.nil?
1377
- unless channel_node.nil?
1378
- time_string = XPath.first(channel_node, "pubDate/text()").to_s
1379
- if time_string == ""
1380
- time_string = XPath.first(channel_node, "dc:date/text()").to_s
1381
- end
1382
- if time_string == ""
1383
- time_string = XPath.first(channel_node, "issued/text()").to_s
1384
- end
1385
- if time_string == ""
1386
- time_string = XPath.first(channel_node, "updated/text()").to_s
1387
- end
1388
- if time_string == ""
1389
- time_string = XPath.first(channel_node, "time/text()").to_s
1390
- end
1391
- end
1454
+ time_string = try_xpaths(self.channel_node, [
1455
+ "atom10:updated/text()",
1456
+ "atom03:updated/text()",
1457
+ "atom:updated/text()",
1458
+ "updated/text()",
1459
+ "atom10:modified/text()",
1460
+ "atom03:modified/text()",
1461
+ "atom:modified/text()",
1462
+ "modified/text()",
1463
+ "time/text()",
1464
+ "atom10:issued/text()",
1465
+ "atom03:issued/text()",
1466
+ "atom:issued/text()",
1467
+ "issued/text()",
1468
+ "atom10:published/text()",
1469
+ "atom03:published/text()",
1470
+ "atom:published/text()",
1471
+ "published/text()",
1472
+ "pubDate/text()",
1473
+ "dc:date/text()",
1474
+ "date/text()"
1475
+ ], :select_result_value => true)
1392
1476
  begin
1393
- if time_string != nil && time_string != ""
1477
+ unless time_string.blank?
1394
1478
  @time = Time.parse(time_string).gmtime
1395
1479
  else
1396
- @time = Time.now.gmtime
1480
+ if FeedTools.configurations[:timestamp_estimation_enabled]
1481
+ @time = Time.now.gmtime
1482
+ end
1397
1483
  end
1398
1484
  rescue
1399
- @time = Time.now.gmtime
1485
+ if FeedTools.configurations[:timestamp_estimation_enabled]
1486
+ @time = Time.now.gmtime
1487
+ end
1400
1488
  end
1401
1489
  end
1402
1490
  return @time
@@ -1410,13 +1498,11 @@ module FeedTools
1410
1498
  # Returns the feed item updated time
1411
1499
  def updated
1412
1500
  if @updated.nil?
1413
- unless channel_node.nil?
1414
- updated_string = XPath.first(channel_node, "updated/text()").to_s
1415
- if updated_string == ""
1416
- updated_string = XPath.first(channel_node, "modified/text()").to_s
1417
- end
1418
- end
1419
- if updated_string != nil && updated_string != ""
1501
+ updated_string = try_xpaths(self.channel_node, [
1502
+ "updated/text()",
1503
+ "modified/text()"
1504
+ ], :select_result_value => true)
1505
+ unless updated_string.blank?
1420
1506
  @updated = Time.parse(updated_string).gmtime rescue nil
1421
1507
  else
1422
1508
  @updated = nil
@@ -1430,51 +1516,16 @@ module FeedTools
1430
1516
  @updated = new_updated
1431
1517
  end
1432
1518
 
1433
- # Returns the feed item issued time
1434
- def issued
1435
- if @issued.nil?
1436
- unless channel_node.nil?
1437
- issued_string = XPath.first(channel_node, "issued/text()").to_s
1438
- if issued_string == ""
1439
- issued_string = XPath.first(channel_node, "pubDate/text()").to_s
1440
- end
1441
- if issued_string == ""
1442
- issued_string = XPath.first(channel_node, "dc:date/text()").to_s
1443
- end
1444
- if issued_string == ""
1445
- issued_string = XPath.first(channel_node, "published/text()").to_s
1446
- end
1447
- end
1448
- if issued_string != nil && issued_string != ""
1449
- @issued = Time.parse(issued_string).gmtime rescue nil
1450
- else
1451
- @issued = nil
1452
- end
1453
- end
1454
- return @issued
1455
- end
1456
-
1457
- # Sets the feed item issued time
1458
- def issued=(new_issued)
1459
- @issued = new_issued
1460
- end
1461
-
1462
1519
  # Returns the feed item published time
1463
1520
  def published
1464
1521
  if @published.nil?
1465
- unless channel_node.nil?
1466
- published_string = XPath.first(channel_node, "published/text()").to_s
1467
- if published_string == ""
1468
- published_string = XPath.first(channel_node, "pubDate/text()").to_s
1469
- end
1470
- if published_string == ""
1471
- published_string = XPath.first(channel_node, "dc:date/text()").to_s
1472
- end
1473
- if published_string == ""
1474
- published_string = XPath.first(channel_node, "issued/text()").to_s
1475
- end
1476
- end
1477
- if published_string != nil && published_string != ""
1522
+ published_string = try_xpaths(self.channel_node, [
1523
+ "published/text()",
1524
+ "pubDate/text()",
1525
+ "issued/text()",
1526
+ "dc:date/text()"
1527
+ ], :select_result_value => true)
1528
+ unless published_string.blank?
1478
1529
  @published = Time.parse(published_string).gmtime rescue nil
1479
1530
  else
1480
1531
  @published = nil
@@ -1492,28 +1543,26 @@ module FeedTools
1492
1543
  def categories
1493
1544
  if @categories.nil?
1494
1545
  @categories = []
1495
- category_nodes = XPath.match(channel_node, "category")
1496
- if category_nodes.nil? || category_nodes.empty?
1497
- category_nodes = XPath.match(channel_node, "dc:subject")
1498
- end
1546
+ category_nodes = try_xpaths_all(self.channel_node, [
1547
+ "category",
1548
+ "dc:subject"
1549
+ ])
1499
1550
  unless category_nodes.nil?
1500
1551
  for category_node in category_nodes
1501
1552
  category = FeedTools::Feed::Category.new
1502
- category.term = XPath.first(category_node, "@term").to_s
1503
- if category.term == ""
1504
- category.term = XPath.first(category_node, "text()").to_s
1505
- end
1506
- category.term.strip! unless category.term.nil?
1507
- category.term = nil if category.term == ""
1508
- category.label = XPath.first(category_node, "@label").to_s
1509
- category.label.strip! unless category.label.nil?
1510
- category.label = nil if category.label == ""
1511
- category.scheme = XPath.first(category_node, "@scheme").to_s
1512
- if category.scheme == ""
1513
- category.scheme = XPath.first(category_node, "@domain").to_s
1514
- end
1515
- category.scheme.strip! unless category.scheme.nil?
1516
- category.scheme = nil if category.scheme == ""
1553
+ category.term = try_xpaths(category_node, [
1554
+ "@term",
1555
+ "text()"
1556
+ ], :select_result_value => true)
1557
+ category.term.strip! unless category.term.blank?
1558
+ category.label = try_xpaths(category_node, ["@label"],
1559
+ :select_result_value => true)
1560
+ category.label.strip! unless category.label.blank?
1561
+ category.scheme = try_xpaths(category_node, [
1562
+ "@scheme",
1563
+ "@domain"
1564
+ ], :select_result_value => true)
1565
+ category.scheme.strip! unless category.scheme.blank?
1517
1566
  @categories << category
1518
1567
  end
1519
1568
  end
@@ -1525,55 +1574,61 @@ module FeedTools
1525
1574
  def images
1526
1575
  if @images.nil?
1527
1576
  @images = []
1528
- unless channel_node.nil?
1529
- image_nodes = XPath.match(channel_node, "image")
1530
- if image_nodes.nil? || image_nodes.empty?
1531
- image_nodes = XPath.match(channel_node, "link")
1532
- end
1533
- if image_nodes.nil? || image_nodes.empty?
1534
- image_nodes = XPath.match(channel_node, "logo")
1535
- end
1536
- if image_nodes.nil? || image_nodes.empty?
1537
- image_nodes = XPath.match(channel_node, "LOGO")
1538
- end
1539
- unless image_nodes.nil?
1540
- for image_node in image_nodes
1541
- image = FeedTools::Feed::Image.new
1542
- image.url = XPath.first(image_node, "url/text()").to_s
1543
- if image.url == ""
1544
- image.url = XPath.first(image_node, "@rdf:resource").to_s
1545
- end
1546
- if image.url == "" && (image_node.name == "logo" ||
1547
- (image_node.attributes['type'] =~ /^image/) == 0)
1548
- image.url = XPath.first(image_node, "@href").to_s
1577
+ image_nodes = try_xpaths_all(self.channel_node, [
1578
+ "image",
1579
+ "logo",
1580
+ "atom10:link",
1581
+ "atom03:link",
1582
+ "atom:link",
1583
+ "link"
1584
+ ])
1585
+ unless image_nodes.blank?
1586
+ for image_node in image_nodes
1587
+ image = FeedTools::Feed::Image.new
1588
+ image.url = try_xpaths(image_node, [
1589
+ "url/text()",
1590
+ "@rdf:resource"
1591
+ ], :select_result_value => true)
1592
+ if image.url.blank? && (image_node.name == "logo" ||
1593
+ (image_node.attributes['type'].to_s =~ /^image/) == 0)
1594
+ image.url = try_xpaths(image_node, [
1595
+ "@atom10:href",
1596
+ "@atom03:href",
1597
+ "@atom:href",
1598
+ "@href"
1599
+ ], :select_result_value => true)
1600
+ if image.url == self.link && image.url != nil
1601
+ image.url = nil
1549
1602
  end
1550
- if image.url == "" && image_node.name == "LOGO"
1551
- image.url = XPath.first(image_node, "@HREF").to_s
1552
- end
1553
- image.url.strip! unless image.url.nil?
1554
- image.url = nil if image.url == ""
1555
- image.title = XPath.first(image_node, "title/text()").to_s
1556
- image.title.strip! unless image.title.nil?
1557
- image.title = nil if image.title == ""
1558
- image.description =
1559
- XPath.first(image_node, "description/text()").to_s
1560
- image.description.strip! unless image.description.nil?
1561
- image.description = nil if image.description == ""
1562
- image.link = XPath.first(image_node, "link/text()").to_s
1563
- image.link.strip! unless image.link.nil?
1564
- image.link = nil if image.link == ""
1565
- image.height = XPath.first(image_node, "height/text()").to_s.to_i
1566
- image.height = nil if image.height <= 0
1567
- image.width = XPath.first(image_node, "width/text()").to_s.to_i
1568
- image.width = nil if image.width <= 0
1569
- image.style = XPath.first(image_node, "@style").to_s.downcase
1570
- if image.style == ""
1571
- image.style = XPath.first(image_node, "@STYLE").to_s.downcase
1572
- end
1573
- image.style.strip! unless image.style.nil?
1574
- image.style = nil if image.style == ""
1575
- @images << image
1576
1603
  end
1604
+ if image.url.blank? && image_node.name == "LOGO"
1605
+ image.url = try_xpaths(image_node, [
1606
+ "@href"
1607
+ ], :select_result_value => true)
1608
+ end
1609
+ image.url.strip! unless image.url.nil?
1610
+ image.title = try_xpaths(image_node,
1611
+ ["title/text()"], :select_result_value => true)
1612
+ image.title.strip! unless image.title.nil?
1613
+ image.description = try_xpaths(image_node,
1614
+ ["description/text()"], :select_result_value => true)
1615
+ image.description.strip! unless image.description.nil?
1616
+ image.link = try_xpaths(image_node,
1617
+ ["link/text()"], :select_result_value => true)
1618
+ image.link.strip! unless image.link.nil?
1619
+ image.height = try_xpaths(image_node,
1620
+ ["height/text()"], :select_result_value => true).to_i
1621
+ image.height = nil if image.height <= 0
1622
+ image.width = try_xpaths(image_node,
1623
+ ["width/text()"], :select_result_value => true).to_i
1624
+ image.width = nil if image.width <= 0
1625
+ image.style = try_xpaths(image_node, [
1626
+ "style/text()",
1627
+ "@style"
1628
+ ], :select_result_value => true)
1629
+ image.style.strip! unless image.style.nil?
1630
+ image.style.downcase! unless image.style.nil?
1631
+ @images << image unless image.url.nil?
1577
1632
  end
1578
1633
  end
1579
1634
  end
@@ -1584,20 +1639,20 @@ module FeedTools
1584
1639
  def text_input
1585
1640
  if @text_input.nil?
1586
1641
  @text_input = FeedTools::Feed::TextInput.new
1587
- text_input_node = XPath.first(channel_node, "textInput")
1642
+ text_input_node = try_xpaths(self.channel_node, ["textInput"])
1588
1643
  unless text_input_node.nil?
1589
1644
  @text_input.title =
1590
- XPath.first(text_input_node, "title/text()").to_s
1591
- @text_input.title = nil if @text_input.title == ""
1645
+ try_xpaths(text_input_node, ["title/text()"],
1646
+ :select_result_value => true)
1592
1647
  @text_input.description =
1593
- XPath.first(text_input_node, "description/text()").to_s
1594
- @text_input.description = nil if @text_input.description == ""
1648
+ try_xpaths(text_input_node, ["description/text()"],
1649
+ :select_result_value => true)
1595
1650
  @text_input.link =
1596
- XPath.first(text_input_node, "link/text()").to_s
1597
- @text_input.link = nil if @text_input.link == ""
1651
+ try_xpaths(text_input_node, ["link/text()"],
1652
+ :select_result_value => true)
1598
1653
  @text_input.name =
1599
- XPath.first(text_input_node, "name/text()").to_s
1600
- @text_input.name = nil if @text_input.name == ""
1654
+ try_xpaths(text_input_node, ["name/text()"],
1655
+ :select_result_value => true)
1601
1656
  end
1602
1657
  end
1603
1658
  return @text_input
@@ -1606,43 +1661,28 @@ module FeedTools
1606
1661
  # Returns the feed's copyright information
1607
1662
  def copyright
1608
1663
  if @copyright.nil?
1609
- unless root_node.nil?
1610
- repair_entities = false
1611
- copyright_node = XPath.first(channel_node, "dc:rights")
1612
- if copyright_node.nil?
1613
- copyright_node = XPath.first(channel_node, "dc:rights",
1614
- FEED_TOOLS_NAMESPACES)
1615
- end
1616
- if copyright_node.nil?
1617
- copyright_node = XPath.first(channel_node, "rights",
1618
- FEED_TOOLS_NAMESPACES)
1619
- end
1620
- if copyright_node.nil?
1621
- copyright_node = XPath.first(channel_node, "copyright",
1622
- FEED_TOOLS_NAMESPACES)
1623
- end
1624
- if copyright_node.nil?
1625
- copyright_node = XPath.first(channel_node, "atom03:copyright",
1626
- FEED_TOOLS_NAMESPACES)
1627
- end
1628
- if copyright_node.nil?
1629
- copyright_node = XPath.first(channel_node, "atom10:copyright",
1630
- FEED_TOOLS_NAMESPACES)
1631
- end
1632
- if copyright_node.nil?
1633
- copyright_node = XPath.first(channel_node, "copyrights",
1634
- FEED_TOOLS_NAMESPACES)
1635
- end
1636
- end
1664
+ repair_entities = false
1665
+ copyright_node = try_xpaths(self.channel_node, [
1666
+ "atom10:copyright",
1667
+ "atom03:copyright",
1668
+ "atom:copyright",
1669
+ "copyright",
1670
+ "copyrights",
1671
+ "dc:rights",
1672
+ "rights"
1673
+ ])
1637
1674
  if copyright_node.nil?
1638
1675
  return nil
1639
1676
  end
1640
- copyright_type = XPath.first(copyright_node, "@type").to_s
1641
- copyright_mode = XPath.first(copyright_node, "@mode").to_s
1642
- copyright_encoding = XPath.first(copyright_node, "@encoding").to_s
1677
+ copyright_type = try_xpaths(copyright_node, "@type",
1678
+ :select_result_value => true)
1679
+ copyright_mode = try_xpaths(copyright_node, "@mode",
1680
+ :select_result_value => true)
1681
+ copyright_encoding = try_xpaths(copyright_node, "@encoding",
1682
+ :select_result_value => true)
1643
1683
 
1644
1684
  # Note that we're checking for misuse of type, mode and encoding here
1645
- if copyright_encoding != ""
1685
+ if !copyright_encoding.blank?
1646
1686
  @copyright =
1647
1687
  "[Embedded data objects are not currently supported.]"
1648
1688
  elsif copyright_node.cdatas.size > 0
@@ -1669,7 +1709,7 @@ module FeedTools
1669
1709
  end
1670
1710
 
1671
1711
  @copyright = @copyright.strip unless @copyright.nil?
1672
- @copyright = nil if @copyright == ""
1712
+ @copyright = nil if @copyright.blank?
1673
1713
  end
1674
1714
  return @copyright
1675
1715
  end
@@ -1684,9 +1724,11 @@ module FeedTools
1684
1724
  if @time_to_live.nil?
1685
1725
  unless channel_node.nil?
1686
1726
  # get the feed time to live from the xml document
1687
- update_frequency = XPath.first(channel_node, "syn:updateFrequency/text()").to_s
1688
- if update_frequency != ""
1689
- update_period = XPath.first(channel_node, "syn:updatePeriod/text()").to_s
1727
+ update_frequency = try_xpaths(self.channel_node,
1728
+ ["syn:updateFrequency/text()"], :select_result_value => true)
1729
+ if !update_frequency.blank?
1730
+ update_period = try_xpaths(self.channel_node,
1731
+ ["syn:updatePeriod/text()"], :select_result_value => true)
1690
1732
  if update_period == "daily"
1691
1733
  @time_to_live = update_frequency.to_i.day
1692
1734
  elsif update_period == "weekly"
@@ -1702,9 +1744,11 @@ module FeedTools
1702
1744
  end
1703
1745
  if @time_to_live.nil?
1704
1746
  # usually expressed in minutes
1705
- update_frequency = XPath.first(channel_node, "ttl/text()").to_s
1706
- if update_frequency != ""
1707
- update_span = XPath.first(channel_node, "ttl/@span").to_s
1747
+ update_frequency = try_xpaths(self.channel_node, ["ttl/text()"],
1748
+ :select_result_value => true)
1749
+ if !update_frequency.blank?
1750
+ update_span = try_xpaths(self.channel_node, ["ttl/@span"],
1751
+ :select_result_value => true)
1708
1752
  if update_span == "seconds"
1709
1753
  @time_to_live = update_frequency.to_i
1710
1754
  elsif update_span == "minutes"
@@ -1719,19 +1763,6 @@ module FeedTools
1719
1763
  @time_to_live = update_frequency.to_i.month
1720
1764
  elsif update_span == "years"
1721
1765
  @time_to_live = update_frequency.to_i.year
1722
- elsif update_frequency.to_i >= 3000
1723
- # Normally, this should default to minutes, but realistically,
1724
- # if they meant minutes, you're rarely going to see a value
1725
- # higher than 120. If we see >= 3000, we're either dealing
1726
- # with a stupid pseudo-spec that decided to use seconds, or
1727
- # we're looking at someone who only has weekly updated
1728
- # content. Worst case, we misreport the time, and we update
1729
- # too often. Best case, we avoid accidentally updating the
1730
- # feed only once a year. In the interests of being pragmatic,
1731
- # and since the problem we avoid is a far greater one than
1732
- # the one we cause, just run the check and hope no one
1733
- # actually gets hurt.
1734
- @time_to_live = update_frequency.to_i
1735
1766
  else
1736
1767
  @time_to_live = update_frequency.to_i.minute
1737
1768
  end
@@ -1740,7 +1771,7 @@ module FeedTools
1740
1771
  if @time_to_live.nil?
1741
1772
  @time_to_live = 0
1742
1773
  update_frequency_days =
1743
- XPath.first(channel_node, "schedule/intervaltime/@days").to_s
1774
+ XPath.first(channel_node, "SCHEDULE/INTERVALTIME/@DAY").to_s
1744
1775
  update_frequency_hours =
1745
1776
  XPath.first(channel_node, "schedule/intervaltime/@hour").to_s
1746
1777
  update_frequency_minutes =
@@ -1769,6 +1800,10 @@ module FeedTools
1769
1800
  if @time_to_live.nil? || @time_to_live == 0
1770
1801
  # Default to one hour
1771
1802
  @time_to_live = 1.hour
1803
+ elsif FeedTools.configurations[:max_ttl] != nil &&
1804
+ FeedTools.configurations[:max_ttl] != 0 &&
1805
+ @time_to_live >= FeedTools.configurations[:max_ttl].to_i
1806
+ @time_to_live = FeedTools.configurations[:max_ttl].to_i
1772
1807
  end
1773
1808
  @time_to_live = @time_to_live.round
1774
1809
  return @time_to_live
@@ -1784,20 +1819,21 @@ module FeedTools
1784
1819
  def cloud
1785
1820
  if @cloud.nil?
1786
1821
  @cloud = FeedTools::Feed::Cloud.new
1787
- @cloud.domain = XPath.first(channel_node, "cloud/@domain").to_s
1788
- @cloud.port = XPath.first(channel_node, "cloud/@port").to_s
1789
- @cloud.path = XPath.first(channel_node, "cloud/@path").to_s
1822
+ @cloud.domain = try_xpaths(self.channel_node, ["cloud/@domain"],
1823
+ :select_result_value => true)
1824
+ @cloud.port = try_xpaths(self.channel_node, ["cloud/@port"],
1825
+ :select_result_value => true)
1826
+ @cloud.path = try_xpaths(self.channel_node, ["cloud/@path"],
1827
+ :select_result_value => true)
1790
1828
  @cloud.register_procedure =
1791
- XPath.first(channel_node, "cloud/@registerProcedure").to_s
1829
+ try_xpaths(self.channel_node, ["cloud/@registerProcedure"],
1830
+ :select_result_value => true)
1792
1831
  @cloud.protocol =
1793
- XPath.first(channel_node, "cloud/@protocol").to_s.downcase
1794
- @cloud.domain = nil if @cloud.domain == ""
1795
- @cloud.port = nil if @cloud.port == ""
1796
- @cloud.port = @cloud.port.to_i unless @cloud.port.nil?
1832
+ try_xpaths(self.channel_node, ["cloud/@protocol"],
1833
+ :select_result_value => true)
1834
+ @cloud.protocol.downcase unless @cloud.protocol.nil?
1835
+ @cloud.port = @cloud.port.to_s.to_i
1797
1836
  @cloud.port = nil if @cloud.port == 0
1798
- @cloud.path = nil if @cloud.path == ""
1799
- @cloud.register_procedure = nil if @cloud.register_procedure == ""
1800
- @cloud.protocol = nil if @cloud.protocol == ""
1801
1837
  end
1802
1838
  return @cloud
1803
1839
  end
@@ -1810,9 +1846,9 @@ module FeedTools
1810
1846
  # Returns the feed generator
1811
1847
  def generator
1812
1848
  if @generator.nil?
1813
- @generator = XPath.first(channel_node, "generator/text()").to_s
1814
- @generator = FeedTools.strip_html(@generator)
1815
- @generator = nil if @generator == ""
1849
+ @generator = try_xpaths(self.channel_node, ["generator/text()"],
1850
+ :select_result_value => true)
1851
+ @generator = FeedTools.strip_html(@generator) unless @generator.nil?
1816
1852
  end
1817
1853
  return @generator
1818
1854
  end
@@ -1825,9 +1861,9 @@ module FeedTools
1825
1861
  # Returns the feed docs
1826
1862
  def docs
1827
1863
  if @docs.nil?
1828
- @docs = XPath.first(channel_node, "docs/text()").to_s
1829
- @docs = FeedTools.strip_html(@docs)
1830
- @docs = nil if @docs == ""
1864
+ @docs = try_xpaths(self.channel_node, ["docs/text()"],
1865
+ :select_result_value => true)
1866
+ @docs = FeedTools.strip_html(@docs) unless @docs.nil?
1831
1867
  end
1832
1868
  return @docs
1833
1869
  end
@@ -1840,23 +1876,23 @@ module FeedTools
1840
1876
  # Returns the feed language
1841
1877
  def language
1842
1878
  if @language.nil?
1843
- unless channel_node.nil?
1844
- @language = XPath.first(channel_node, "language/text()").to_s
1845
- if @language == ""
1846
- @language = XPath.first(channel_node, "dc:language/text()").to_s
1847
- end
1848
- if @language == ""
1849
- @language = XPath.first(channel_node, "xml:lang/text()").to_s
1850
- end
1851
- if @language == ""
1852
- @language = XPath.first(root_node, "xml:lang/text()").to_s
1853
- end
1854
- end
1855
- if @language == "" || @language.nil?
1879
+ @language = select_not_blank([
1880
+ try_xpaths(self.channel_node, [
1881
+ "language/text()",
1882
+ "dc:language/text()",
1883
+ "@dc:language",
1884
+ "@xml:lang",
1885
+ "xml:lang/text()"
1886
+ ], :select_result_value => true),
1887
+ try_xpaths(self.root_node, [
1888
+ "@xml:lang",
1889
+ "xml:lang/text()"
1890
+ ], :select_result_value => true)
1891
+ ])
1892
+ if @language.blank?
1856
1893
  @language = "en-us"
1857
1894
  end
1858
1895
  @language = @language.downcase
1859
- @language = nil if @language == ""
1860
1896
  end
1861
1897
  return @language
1862
1898
  end
@@ -1869,12 +1905,11 @@ module FeedTools
1869
1905
  # Returns true if this feed contains explicit material.
1870
1906
  def explicit?
1871
1907
  if @explicit.nil?
1872
- if XPath.first(channel_node,
1873
- "media:adult/text()").to_s.downcase == "true" ||
1874
- XPath.first(channel_node,
1875
- "itunes:explicit/text()").to_s.downcase == "yes" ||
1876
- XPath.first(channel_node,
1877
- "itunes:explicit/text()").to_s.downcase == "true"
1908
+ explicit_string = try_xpaths(self.channel_node, [
1909
+ "media:adult/text()",
1910
+ "itunes:explicit/text()"
1911
+ ], :select_result_value => true)
1912
+ if explicit_string == "true" || explicit_string == "yes"
1878
1913
  @explicit = true
1879
1914
  else
1880
1915
  @explicit = false
@@ -1888,66 +1923,68 @@ module FeedTools
1888
1923
  @explicit = (new_explicit ? true : false)
1889
1924
  end
1890
1925
 
1891
- # Returns the feed items
1892
- def items
1893
- if @items.nil?
1894
- unless root_node.nil?
1895
- raw_items = XPath.match(root_node, "item")
1896
- if raw_items == nil || raw_items == []
1897
- raw_items = XPath.match(channel_node, "item")
1898
- end
1899
- if raw_items == nil || raw_items == []
1900
- raw_items = XPath.match(channel_node, "ITEM")
1901
- end
1902
- if raw_items == nil || raw_items == []
1903
- raw_items = XPath.match(root_node, "ITEM")
1904
- end
1905
- if raw_items == nil || raw_items == []
1906
- raw_items = XPath.match(channel_node, "entry")
1907
- end
1908
- if raw_items == nil || raw_items == []
1909
- raw_items = XPath.match(root_node, "entry")
1910
- end
1911
- end
1926
+ # Returns the feed entries
1927
+ def entries
1928
+ if @entries.blank?
1929
+ raw_entries = select_not_blank([
1930
+ try_xpaths_all(self.channel_node, [
1931
+ "atom10:entry",
1932
+ "atom03:entry",
1933
+ "atom:entry",
1934
+ "entry"
1935
+ ]),
1936
+ try_xpaths_all(self.root_node, [
1937
+ "rss10:item",
1938
+ "item",
1939
+ "atom10:entry",
1940
+ "atom03:entry",
1941
+ "atom:entry",
1942
+ "entry"
1943
+ ]),
1944
+ try_xpaths_all(self.channel_node, [
1945
+ "rss10:item",
1946
+ "item"
1947
+ ])
1948
+ ])
1912
1949
 
1913
1950
  # create the individual feed items
1914
- @items = []
1915
- if raw_items != nil
1916
- for item_node in raw_items.reverse
1917
- new_item = FeedItem.new
1918
- new_item.feed_data = item_node.to_s
1919
- new_item.feed_data_type = self.feed_data_type
1920
- @items << new_item
1951
+ @entries = []
1952
+ unless raw_entries.blank?
1953
+ for entry_node in raw_entries.reverse
1954
+ new_entry = FeedItem.new
1955
+ new_entry.feed_data = entry_node.to_s
1956
+ new_entry.feed_data_type = self.feed_data_type
1957
+ @entries << new_entry
1921
1958
  end
1922
1959
  end
1923
1960
  end
1924
1961
 
1925
1962
  # Sort the items
1926
- @items = @items.sort do |a,b|
1927
- (b.time or Time.mktime(1970)) <=> (a.time or Time.mktime(1970))
1963
+ @entries = @entries.sort do |a, b|
1964
+ (b.time or Time.utc(1970)) <=> (a.time or Time.utc(1970))
1928
1965
  end
1929
- return @items
1966
+ return @entries
1930
1967
  end
1931
1968
 
1932
- # Sets the items array to a new array.
1933
- def items=(new_items)
1934
- for item in new_items
1935
- unless item.kind_of? FeedTools::FeedItem
1969
+ # Sets the entries array to a new array.
1970
+ def entries=(new_entries)
1971
+ for entry in new_entries
1972
+ unless entry.kind_of? FeedTools::FeedItem
1936
1973
  raise ArgumentError,
1937
- "You should only add FeedItem objects to the items array."
1974
+ "You should only add FeedItem objects to the entries array."
1938
1975
  end
1939
1976
  end
1940
- @items = new_items
1977
+ @entries = new_entries
1941
1978
  end
1942
1979
 
1943
1980
  # Syntactic sugar for appending feed items to a feed.
1944
- def <<(new_item)
1945
- @items ||= []
1946
- unless new_item.kind_of? FeedTools::FeedItem
1981
+ def <<(new_entry)
1982
+ @entries ||= []
1983
+ unless new_entry.kind_of? FeedTools::FeedItem
1947
1984
  raise ArgumentError,
1948
- "You should only add FeedItem objects to the items array."
1985
+ "You should only add FeedItem objects to the entries array."
1949
1986
  end
1950
- @items << new_item
1987
+ @entries << new_entry
1951
1988
  end
1952
1989
 
1953
1990
  # The time that the feed was last requested from the remote server. Nil
@@ -2020,11 +2057,14 @@ module FeedTools
2020
2057
  end
2021
2058
 
2022
2059
  # Generates xml based on the content of the feed
2023
- def build_xml(feed_type=(self.feed_type or "rss"), version=nil,
2024
- xml_builder=Builder::XmlMarkup.new(:indent => 2))
2025
- if feed_type == "rss" && (version == nil || version == 0.0)
2060
+ def build_xml(feed_type=(self.feed_type or "atom"), version=nil,
2061
+ xml_builder=Builder::XmlMarkup.new(
2062
+ :indent => 2, :escape_attrs => false))
2063
+ xml_builder.instruct! :xml, :version => "1.0",
2064
+ :encoding => (FeedTools.configurations[:output_encoding] or "utf-8")
2065
+ if feed_type == "rss" && (version == nil || version <= 0.0)
2026
2066
  version = 1.0
2027
- elsif feed_type == "atom" && (version == nil || version == 0.0)
2067
+ elsif feed_type == "atom" && (version == nil || version <= 0.0)
2028
2068
  version = 1.0
2029
2069
  end
2030
2070
  if feed_type == "rss" && (version == 0.9 || version == 1.0 ||
@@ -2040,7 +2080,8 @@ module FeedTools
2040
2080
  "xmlns:media" => FEED_TOOLS_NAMESPACES['media']) do
2041
2081
  channel_attributes = {}
2042
2082
  unless self.link.nil?
2043
- channel_attributes["rdf:about"] = CGI.escapeHTML(self.link)
2083
+ channel_attributes["rdf:about"] =
2084
+ FeedTools.escape_entities(self.link)
2044
2085
  end
2045
2086
  xml_builder.channel(channel_attributes) do
2046
2087
  unless title.nil? || title == ""
@@ -2054,7 +2095,7 @@ module FeedTools
2054
2095
  xml_builder.link
2055
2096
  end
2056
2097
  unless images.nil? || images.empty?
2057
- xml_builder.image("rdf:resource" => CGI.escapeHTML(
2098
+ xml_builder.image("rdf:resource" => FeedTools.escape_entities(
2058
2099
  images.first.url))
2059
2100
  end
2060
2101
  unless description.nil? || description == ""
@@ -2078,7 +2119,7 @@ module FeedTools
2078
2119
  "item link field."
2079
2120
  end
2080
2121
  xml_builder.tag!("rdf:li", "rdf:resource" =>
2081
- CGI.escapeHTML(item.link))
2122
+ FeedTools.escape_entities(item.link))
2082
2123
  end
2083
2124
  end
2084
2125
  end
@@ -2095,20 +2136,20 @@ module FeedTools
2095
2136
  end
2096
2137
  best_image = images.first if best_image.nil?
2097
2138
  xml_builder.image(
2098
- "rdf:about" => CGI.escapeHTML(best_image.url)) do
2099
- if best_image.title != nil && best_image.title != ""
2139
+ "rdf:about" => FeedTools.escape_entities(best_image.url)) do
2140
+ if !best_image.title.blank?
2100
2141
  xml_builder.title(best_image.title)
2101
- elsif self.title != nil && self.title != ""
2142
+ elsif !self.title.blank?
2102
2143
  xml_builder.title(self.title)
2103
2144
  else
2104
2145
  xml_builder.title
2105
2146
  end
2106
- unless best_image.url.nil? || best_image.url == ""
2147
+ unless best_image.url.blank?
2107
2148
  xml_builder.url(best_image.url)
2108
2149
  end
2109
- if best_image.link != nil && best_image.link != ""
2150
+ if !best_image.link.blank?
2110
2151
  xml_builder.link(best_image.link)
2111
- elsif self.link != nil && self.link != ""
2152
+ elsif !self.link.blank?
2112
2153
  xml_builder.link(self.link)
2113
2154
  else
2114
2155
  xml_builder.link
@@ -2131,18 +2172,18 @@ module FeedTools
2131
2172
  "xmlns:itunes" => FEED_TOOLS_NAMESPACES['itunes'],
2132
2173
  "xmlns:media" => FEED_TOOLS_NAMESPACES['media']) do
2133
2174
  xml_builder.channel do
2134
- unless title.nil? || title == ""
2175
+ unless title.blank?
2135
2176
  xml_builder.title(title)
2136
2177
  end
2137
- unless link.nil? || link == ""
2178
+ unless link.blank?
2138
2179
  xml_builder.link(link)
2139
2180
  end
2140
- unless description.nil? || description == ""
2181
+ unless description.blank?
2141
2182
  xml_builder.description(description)
2142
2183
  end
2143
2184
  xml_builder.ttl((time_to_live / 1.minute).to_s)
2144
2185
  xml_builder.generator(
2145
- "http://www.sporkmonger.com/projects/feedtools")
2186
+ FeedTools.configurations[:generator_href])
2146
2187
  build_xml_hook(feed_type, version, xml_builder)
2147
2188
  unless items.nil?
2148
2189
  for item in items
@@ -2152,53 +2193,12 @@ module FeedTools
2152
2193
  end
2153
2194
  end
2154
2195
  elsif feed_type == "atom" && version == 0.3
2155
- # normal atom format
2156
- return xml_builder.feed("xmlns" => FEED_TOOLS_NAMESPACES['atom03'],
2157
- "version" => version,
2158
- "xml:lang" => language) do
2159
- unless title.nil? || title == ""
2160
- xml_builder.title(title,
2161
- "mode" => "escaped",
2162
- "type" => "text/html")
2163
- end
2164
- xml_builder.author do
2165
- unless self.author.nil? || self.author.name.nil?
2166
- xml_builder.name(self.author.name)
2167
- else
2168
- xml_builder.name("n/a")
2169
- end
2170
- unless self.author.nil? || self.author.email.nil?
2171
- xml_builder.email(self.author.email)
2172
- end
2173
- unless self.author.nil? || self.author.url.nil?
2174
- xml_builder.url(self.author.url)
2175
- end
2176
- end
2177
- unless link.nil? || link == ""
2178
- xml_builder.link("href" => link,
2179
- "rel" => "alternate",
2180
- "type" => "text/html",
2181
- "title" => title)
2182
- end
2183
- unless description.nil? || description == ""
2184
- xml_builder.tagline(description,
2185
- "mode" => "escaped",
2186
- "type" => "text/html")
2187
- end
2188
- xml_builder.generator("FeedTools",
2189
- "url" => "http://www.sporkmonger.com/projects/feedtools")
2190
- build_xml_hook(feed_type, version, xml_builder)
2191
- unless items.nil?
2192
- for item in items
2193
- item.build_xml(feed_type, version, xml_builder)
2194
- end
2195
- end
2196
- end
2196
+ raise "Atom 0.3 is obsolete."
2197
2197
  elsif feed_type == "atom" && version == 1.0
2198
2198
  # normal atom format
2199
2199
  return xml_builder.feed("xmlns" => FEED_TOOLS_NAMESPACES['atom10'],
2200
2200
  "xml:lang" => language) do
2201
- unless title.nil? || title == ""
2201
+ unless title.blank?
2202
2202
  xml_builder.title(title,
2203
2203
  "type" => "html")
2204
2204
  end
@@ -2212,22 +2212,22 @@ module FeedTools
2212
2212
  xml_builder.email(self.author.email)
2213
2213
  end
2214
2214
  unless self.author.nil? || self.author.url.nil?
2215
- xml_builder.url(self.author.url)
2215
+ xml_builder.uri(self.author.url)
2216
2216
  end
2217
2217
  end
2218
- unless self.url.nil? || self.url == ""
2218
+ unless self.url.blank?
2219
2219
  xml_builder.link("href" => self.url,
2220
2220
  "rel" => "self",
2221
2221
  "type" => "application/atom+xml")
2222
2222
  end
2223
- unless self.link.nil? || self.link == ""
2224
- xml_builder.link("href" => self.link,
2223
+ unless self.link.blank?
2224
+ xml_builder.link("href" => FeedTools.escape_entities(self.link),
2225
2225
  "rel" => "alternate",
2226
2226
  "type" => "text/html",
2227
- "title" => self.title)
2227
+ "title" => FeedTools.escape_entities(self.title))
2228
2228
  end
2229
- unless description.nil? || description == ""
2230
- xml_builder.subtitle(description,
2229
+ unless description.blank?
2230
+ xml_builder.subtitle(self.subtitle,
2231
2231
  "type" => "html")
2232
2232
  else
2233
2233
  xml_builder.subtitle(FeedTools.no_content_string,
@@ -2242,8 +2242,8 @@ module FeedTools
2242
2242
  else
2243
2243
  xml_builder.updated(Time.now.gmtime.iso8601)
2244
2244
  end
2245
- xml_builder.generator("FeedTools - " +
2246
- "http://www.sporkmonger.com/projects/feedtools")
2245
+ xml_builder.generator(FeedTools.configurations[:generator_name] +
2246
+ " - " + FeedTools.configurations[:generator_href])
2247
2247
  if self.id != nil
2248
2248
  unless FeedTools.is_uri? self.id
2249
2249
  if self.link != nil
@@ -2266,46 +2266,49 @@ module FeedTools
2266
2266
  end
2267
2267
  end
2268
2268
  end
2269
+ else
2270
+ raise "Unsupported feed format/version."
2269
2271
  end
2270
2272
  end
2271
2273
 
2272
2274
  # Persists the current feed state to the cache.
2273
2275
  def save
2274
- if FeedTools.feed_cache.nil?
2275
- raise "Caching is currently disabled. Cannot save to cache."
2276
- elsif self.url.nil?
2277
- raise "The url field must be set to save to the cache."
2278
- elsif self.cache_object.nil?
2279
- raise "The cache_object is currently nil. Cannot save to cache."
2280
- else
2281
- self.cache_object.url = self.url
2282
- unless self.feed_data.nil?
2283
- self.cache_object.title = self.title
2284
- self.cache_object.link = self.link
2285
- self.cache_object.feed_data = self.feed_data
2286
- self.cache_object.feed_data_type = self.feed_data_type.to_s
2287
- end
2288
- unless self.http_response.nil?
2276
+ unless self.url =~ /^file:\/\//
2277
+ if FeedTools.feed_cache.nil?
2278
+ raise "Caching is currently disabled. Cannot save to cache."
2279
+ elsif self.url.nil?
2280
+ raise "The url field must be set to save to the cache."
2281
+ elsif self.cache_object.nil?
2282
+ raise "The cache_object is currently nil. Cannot save to cache."
2283
+ else
2284
+ self.cache_object.url = self.url
2285
+ unless self.feed_data.nil?
2286
+ self.cache_object.title = self.title
2287
+ self.cache_object.link = self.link
2288
+ self.cache_object.feed_data = self.feed_data
2289
+ self.cache_object.feed_data_type = self.feed_data_type.to_s
2290
+ end
2289
2291
  self.cache_object.http_headers = self.http_headers.to_yaml
2292
+ self.cache_object.last_retrieved = self.last_retrieved
2293
+ self.cache_object.save
2290
2294
  end
2291
- self.cache_object.last_retrieved = self.last_retrieved
2292
- self.cache_object.save
2293
2295
  end
2294
2296
  end
2295
2297
 
2296
- alias_method :tagline, :description
2297
- alias_method :tagline=, :description=
2298
- alias_method :subtitle, :description
2299
- alias_method :subtitle=, :description=
2300
- alias_method :abstract, :description
2301
- alias_method :abstract=, :description=
2302
- alias_method :content, :description
2303
- alias_method :content=, :description=
2298
+ alias_method :tagline, :subtitle
2299
+ alias_method :tagline=, :subtitle=
2300
+ alias_method :description, :subtitle
2301
+ alias_method :description=, :subtitle=
2302
+ alias_method :abstract, :subtitle
2303
+ alias_method :abstract=, :subtitle=
2304
+ alias_method :content, :subtitle
2305
+ alias_method :content=, :subtitle=
2304
2306
  alias_method :ttl, :time_to_live
2305
2307
  alias_method :ttl=, :time_to_live=
2306
2308
  alias_method :guid, :id
2307
2309
  alias_method :guid=, :id=
2308
- alias_method :entries, :items
2310
+ alias_method :items, :entries
2311
+ alias_method :items=, :entries=
2309
2312
 
2310
2313
  # passes missing methods to the cache_object
2311
2314
  def method_missing(msg, *params)