feedtools 0.2.24 → 0.2.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG CHANGED
@@ -1,3 +1,15 @@
1
+ == FeedTools 0.2.25
2
+ * fixed self reference href bug
3
+ * fixed primary link assignment heuristic
4
+ * link rel's now default to 'alternate' for atom feeds
5
+ * better handling of language attributes
6
+ * better support for threaded applications
7
+ * generated rss 2.0 now includes author, managingEditor, webMaster elements
8
+ * improved handling of author elements
9
+ * improved handling of http timeouts
10
+ * fixed issue with relative paths in the location header for http redirection
11
+ * monkey patches now work with Ruby 1.8.4's version of REXML
12
+ * REXML monkey patches moved into their own file
1
13
  == FeedTools 0.2.24
2
14
  * fixed autodiscovery bugs
3
15
  * updated autodiscovery to handle relative uris
@@ -151,6 +151,8 @@ begin
151
151
  end
152
152
  end
153
153
 
154
+ require 'feed_tools/monkey_patch'
155
+
154
156
  require 'feed_tools/feed'
155
157
  require 'feed_tools/feed_item'
156
158
  require 'feed_tools/feed_structures'
@@ -348,301 +350,6 @@ module FeedTools
348
350
  end
349
351
  end
350
352
 
351
- module REXML # :nodoc:
352
- class LiberalXPathParser < XPathParser # :nodoc:
353
- private
354
- def internal_parse(path_stack, nodeset) # :nodoc:
355
- return nodeset if nodeset.size == 0 or path_stack.size == 0
356
- case path_stack.shift
357
- when :document
358
- return [ nodeset[0].root.parent ]
359
-
360
- when :qname
361
- prefix = path_stack.shift.downcase
362
- name = path_stack.shift.downcase
363
- n = nodeset.clone
364
- ns = @namespaces[prefix]
365
- ns = ns ? ns : ''
366
- n.delete_if do |node|
367
- if node.node_type == :element and ns == ''
368
- ns = node.namespace( prefix )
369
- end
370
- !(node.node_type == :element and
371
- node.name.downcase == name.downcase and node.namespace == ns )
372
- end
373
- return n
374
-
375
- when :any
376
- n = nodeset.clone
377
- n.delete_if { |node| node.node_type != :element }
378
- return n
379
-
380
- when :self
381
- # THIS SPACE LEFT INTENTIONALLY BLANK
382
-
383
- when :processing_instruction
384
- target = path_stack.shift
385
- n = nodeset.clone
386
- n.delete_if do |node|
387
- (node.node_type != :processing_instruction) or
388
- ( !target.nil? and ( node.target != target ) )
389
- end
390
- return n
391
-
392
- when :text
393
- n = nodeset.clone
394
- n.delete_if do |node|
395
- node.node_type != :text
396
- end
397
- return n
398
-
399
- when :comment
400
- n = nodeset.clone
401
- n.delete_if do |node|
402
- node.node_type != :comment
403
- end
404
- return n
405
-
406
- when :node
407
- return nodeset
408
-
409
- when :child
410
- new_nodeset = []
411
- nt = nil
412
- for node in nodeset
413
- nt = node.node_type
414
- new_nodeset += node.children if nt == :element or nt == :document
415
- end
416
- return new_nodeset
417
-
418
- when :literal
419
- literal = path_stack.shift
420
- if literal =~ /^\d+(\.\d+)?$/
421
- return ($1 ? literal.to_f : literal.to_i)
422
- end
423
- return literal
424
-
425
- when :attribute
426
- new_nodeset = []
427
- case path_stack.shift
428
- when :qname
429
- prefix = path_stack.shift
430
- name = path_stack.shift.downcase
431
- for element in nodeset
432
- if element.node_type == :element
433
- for attribute_name in element.attributes.keys
434
- if attribute_name.downcase == name.downcase
435
- attrib = element.attribute( attribute_name,
436
- @namespaces[prefix] )
437
- new_nodeset << attrib if attrib
438
- end
439
- end
440
- end
441
- end
442
- when :any
443
- for element in nodeset
444
- if element.node_type == :element
445
- new_nodeset += element.attributes.to_a
446
- end
447
- end
448
- end
449
- return new_nodeset
450
-
451
- when :parent
452
- return internal_parse( path_stack,
453
- nodeset.collect{|n| n.parent}.compact )
454
-
455
- when :ancestor
456
- new_nodeset = []
457
- for node in nodeset
458
- while node.parent
459
- node = node.parent
460
- new_nodeset << node unless new_nodeset.include? node
461
- end
462
- end
463
- return new_nodeset
464
-
465
- when :ancestor_or_self
466
- new_nodeset = []
467
- for node in nodeset
468
- if node.node_type == :element
469
- new_nodeset << node
470
- while ( node.parent )
471
- node = node.parent
472
- new_nodeset << node unless new_nodeset.include? node
473
- end
474
- end
475
- end
476
- return new_nodeset
477
-
478
- when :predicate
479
- predicate = path_stack.shift
480
- new_nodeset = []
481
- Functions::size = nodeset.size
482
- nodeset.size.times do |index|
483
- node = nodeset[index]
484
- Functions::node = node
485
- Functions::index = index+1
486
- result = Predicate( predicate, node )
487
- if result.kind_of? Numeric
488
- new_nodeset << node if result == (index+1)
489
- elsif result.instance_of? Array
490
- new_nodeset << node if result.size > 0
491
- else
492
- new_nodeset << node if result
493
- end
494
- end
495
- return new_nodeset
496
-
497
- when :descendant_or_self
498
- rv = descendant_or_self( path_stack, nodeset )
499
- path_stack.clear
500
- return rv
501
-
502
- when :descendant
503
- results = []
504
- nt = nil
505
- for node in nodeset
506
- nt = node.node_type
507
- if nt == :element or nt == :document
508
- results += internal_parse(
509
- path_stack.clone.unshift( :descendant_or_self ),
510
- node.children )
511
- end
512
- end
513
- return results
514
-
515
- when :following_sibling
516
- results = []
517
- for node in nodeset
518
- all_siblings = node.parent.children
519
- current_index = all_siblings.index( node )
520
- following_siblings = all_siblings[ current_index+1 .. -1 ]
521
- results += internal_parse( path_stack.clone, following_siblings )
522
- end
523
- return results
524
-
525
- when :preceding_sibling
526
- results = []
527
- for node in nodeset
528
- all_siblings = node.parent.children
529
- current_index = all_siblings.index( node )
530
- preceding_siblings = all_siblings[ 0 .. current_index-1 ]
531
- results += internal_parse( path_stack.clone, preceding_siblings )
532
- end
533
- return results
534
-
535
- when :preceding
536
- new_nodeset = []
537
- for node in nodeset
538
- new_nodeset += preceding( node )
539
- end
540
- return new_nodeset
541
-
542
- when :following
543
- new_nodeset = []
544
- for node in nodeset
545
- new_nodeset += following( node )
546
- end
547
- return new_nodeset
548
-
549
- when :namespace
550
- new_set = []
551
- for node in nodeset
552
- if node.node_type == :element or node.node_type == :attribute
553
- new_nodeset << node.namespace
554
- end
555
- end
556
- return new_nodeset
557
-
558
- when :variable
559
- var_name = path_stack.shift
560
- return @variables[ var_name ]
561
-
562
- end
563
- nodeset
564
- end
565
- end
566
-
567
- class XPath # :nodoc:
568
- def self.liberal_match(element, path=nil, namespaces={},
569
- variables={}) # :nodoc:
570
- parser = LiberalXPathParser.new
571
- parser.namespaces = namespaces
572
- parser.variables = variables
573
- path = "*" unless path
574
- element = [element] unless element.kind_of? Array
575
- parser.parse(path, element)
576
- end
577
-
578
- def self.liberal_first(element, path=nil, namespaces={},
579
- variables={}) # :nodoc:
580
- parser = LiberalXPathParser.new
581
- parser.namespaces = namespaces
582
- parser.variables = variables
583
- path = "*" unless path
584
- element = [element] unless element.kind_of? Array
585
- parser.parse(path, element)[0]
586
- end
587
-
588
- def self.liberal_each(element, path=nil, namespaces={},
589
- variables={}, &block) # :nodoc:
590
- parser = LiberalXPathParser.new
591
- parser.namespaces = namespaces
592
- parser.variables = variables
593
- path = "*" unless path
594
- element = [element] unless element.kind_of? Array
595
- parser.parse(path, element).each( &block )
596
- end
597
- end
598
-
599
- class Element # :nodoc:
600
- unless REXML::Element.public_instance_methods.include? :inner_xml
601
- def inner_xml # :nodoc:
602
- result = ""
603
- self.each_child do |child|
604
- if child.kind_of? REXML::Comment
605
- result << "<!--" + child.to_s + "-->"
606
- else
607
- result << child.to_s
608
- end
609
- end
610
- return result.strip
611
- end
612
- else
613
- warn("inner_xml method already exists.")
614
- end
615
-
616
- def base_uri # :nodoc:
617
- begin
618
- base_attribute = FeedTools::XmlHelper.try_xpaths(self, [
619
- '@xml:base'
620
- ])
621
- if parent == nil || parent.kind_of?(REXML::Document)
622
- return nil if base_attribute == nil
623
- return base_attribute.value
624
- end
625
- if base_attribute != nil && parent == nil
626
- return base_attribute.value
627
- elsif parent != nil && base_attribute == nil
628
- return parent.base_uri
629
- elsif parent != nil && base_attribute != nil
630
- parent_base_uri = parent.base_uri
631
- if parent_base_uri != nil
632
- uri = URI.parse(parent_base_uri)
633
- return (uri + base_attribute.value).to_s
634
- else
635
- return base_attribute.value
636
- end
637
- end
638
- return nil
639
- rescue
640
- return nil
641
- end
642
- end
643
- end
644
- end
645
-
646
353
  begin
647
354
  unless FeedTools.feed_cache.nil?
648
355
  FeedTools.feed_cache.initialize_cache
@@ -78,6 +78,8 @@ module FeedTools
78
78
  # load the new feed
79
79
  feed.href = url
80
80
  feed.update! unless feed.configurations[:disable_update_from_remote]
81
+ Thread.pass
82
+
81
83
  return feed
82
84
  end
83
85
 
@@ -101,6 +103,10 @@ module FeedTools
101
103
  # Don't do anything if this option is set
102
104
  return
103
105
  end
106
+ if !FeedTools.feed_cache.nil? &&
107
+ !FeedTools.feed_cache.set_up_correctly?
108
+ FeedTools.feed_cache.initialize_cache()
109
+ end
104
110
  if !FeedTools.feed_cache.nil? &&
105
111
  !FeedTools.feed_cache.set_up_correctly?
106
112
  raise "Your feed cache system is incorrectly set up. " +
@@ -225,6 +231,7 @@ module FeedTools
225
231
  end
226
232
 
227
233
  @title = nil; self.title
234
+ self.href
228
235
  @link = nil; self.link
229
236
 
230
237
  self.last_retrieved = cached_feed.last_retrieved
@@ -254,7 +261,7 @@ module FeedTools
254
261
  self.http_headers[key.downcase] = value
255
262
  end
256
263
  self.last_retrieved = Time.now.gmtime
257
- @live = true
264
+ @live = false
258
265
  else
259
266
  @live = false
260
267
  end
@@ -724,20 +731,24 @@ module FeedTools
724
731
  end
725
732
  end
726
733
  if override_href.call(@href) && self.feed_data != nil
727
- # rdf:about is ordered last because a lot of people put the url to
728
- # the feed inside it instead of a link to their blog.
729
- # Ordering it last gives them as many chances as humanly possible
730
- # for them to redeem themselves. If the link turns out to be the
731
- # same as the blog link, it will be reset to the original value.
732
734
  for link_object in self.links
733
735
  if link_object.rel == 'self'
734
- if link_object.href != self.link
736
+ if link_object.href != self.link ||
737
+ (link_object.href =~ /xml/ ||
738
+ link_object.href =~ /atom/ ||
739
+ link_object.href =~ /feed/)
735
740
  @href = link_object.href
736
741
  @href_overridden = true
742
+ @link = nil
737
743
  return @href
738
744
  end
739
745
  end
740
746
  end
747
+ # rdf:about is ordered last because a lot of people put the url to
748
+ # the feed inside it instead of a link to their blog.
749
+ # Ordering it last gives them as many chances as humanly possible
750
+ # for them to redeem themselves. If the link turns out to be the
751
+ # same as the blog link, it will be reset to the original value.
741
752
  @href = FeedTools::XmlHelper.try_xpaths(self.channel_node, [
742
753
  "admin:feed/@rdf:resource",
743
754
  "admin:feed/@resource",
@@ -770,6 +781,9 @@ module FeedTools
770
781
  @href = original_href
771
782
  @href_overridden = false
772
783
  end
784
+ if @href_overridden == true
785
+ @link = nil
786
+ end
773
787
  end
774
788
  end
775
789
  return @href
@@ -791,7 +805,8 @@ module FeedTools
791
805
  "atom:title",
792
806
  "title",
793
807
  "dc:title",
794
- "channelTitle"
808
+ "channelTitle",
809
+ "TITLE"
795
810
  ])
796
811
  @title = FeedTools::HtmlHelper.process_text_construct(title_node,
797
812
  self.feed_type, self.feed_version)
@@ -822,6 +837,7 @@ module FeedTools
822
837
  "description",
823
838
  "summary",
824
839
  "abstract",
840
+ "ABSTRACT",
825
841
  "content:encoded",
826
842
  "encoded",
827
843
  "content",
@@ -947,19 +963,36 @@ module FeedTools
947
963
  max_score = 0
948
964
  for link_object in self.links.reverse
949
965
  score = 0
950
- if FeedTools::HtmlHelper.html_type?(link_object.type)
951
- score = score + 2
952
- elsif link_object.type != nil
953
- score = score - 1
966
+ next if link_object.href.nil?
967
+ if @href != nil && link_object.href == @href
968
+ score = score - 2
954
969
  end
955
- if FeedTools::HtmlHelper.xml_type?(link_object.type)
956
- score = score + 1
970
+ if link_object.type != nil
971
+ if (link_object.type =~ /image/ || link_object.type =~ /video/)
972
+ score = score - 2
973
+ end
974
+ if FeedTools::HtmlHelper.xml_type?(link_object.type)
975
+ score = score + 1
976
+ end
977
+ if FeedTools::HtmlHelper.html_type?(link_object.type)
978
+ score = score + 2
979
+ elsif link_object.type != nil
980
+ score = score - 1
981
+ end
982
+ end
983
+ if link_object.rel == "enclosure"
984
+ score = score - 2
957
985
  end
958
986
  if link_object.rel == "alternate"
959
987
  score = score + 1
960
988
  end
961
989
  if link_object.rel == "self"
962
990
  score = score - 1
991
+ if (link_object.href =~ /xml/ ||
992
+ link_object.href =~ /atom/ ||
993
+ link_object.href =~ /feed/)
994
+ score = score - 1
995
+ end
963
996
  end
964
997
  if score >= max_score
965
998
  max_score = score
@@ -1073,6 +1106,9 @@ module FeedTools
1073
1106
  unless link_object.rel.nil?
1074
1107
  link_object.rel = link_object.rel.downcase
1075
1108
  end
1109
+ if link_object.rel.nil? && self.feed_type == "atom"
1110
+ link_object.rel = "alternate"
1111
+ end
1076
1112
  link_object.type = FeedTools::XmlHelper.try_xpaths(link_node, [
1077
1113
  "@atom10:type",
1078
1114
  "@atom03:type",
@@ -1110,6 +1146,7 @@ module FeedTools
1110
1146
  link_object.length = nil
1111
1147
  end
1112
1148
  end
1149
+ @links = [] if @links.nil?
1113
1150
  @links << link_object
1114
1151
  end
1115
1152
  end
@@ -1128,10 +1165,13 @@ module FeedTools
1128
1165
  "@base"
1129
1166
  ], :select_result_value => true)
1130
1167
  if @base_uri.blank?
1131
- @base_uri =
1132
- FeedTools::GenericHelper.recursion_trap(:feed_base_uri) do
1133
- self.href
1134
- end
1168
+ begin
1169
+ @base_uri =
1170
+ FeedTools::GenericHelper.recursion_trap(:feed_base_uri) do
1171
+ self.href
1172
+ end
1173
+ rescue Exception
1174
+ end
1135
1175
  end
1136
1176
  if !@base_uri.blank?
1137
1177
  @base_uri = FeedTools::UriHelper.normalize_url(@base_uri)
@@ -1307,6 +1347,21 @@ module FeedTools
1307
1347
  ], :select_result_value => true)
1308
1348
  )
1309
1349
  end
1350
+ if @author.name.blank? && !@author.raw.blank? &&
1351
+ !@author.email.blank?
1352
+ name_scan = @author.raw.scan(
1353
+ /"?([^"]*)"? ?[\(<].*#{@author.email}.*[\)>].*/)
1354
+ if name_scan.flatten.size == 1
1355
+ @author.name = name_scan.flatten[0].strip
1356
+ end
1357
+ if @author.name.blank?
1358
+ name_scan = @author.raw.scan(
1359
+ /.*#{@author.email} ?[\(<]"?([^"]*)"?[\)>].*/)
1360
+ if name_scan.flatten.size == 1
1361
+ @author.name = name_scan.flatten[0].strip
1362
+ end
1363
+ end
1364
+ end
1310
1365
  @author.name = nil if @author.name.blank?
1311
1366
  @author.raw = nil if @author.raw.blank?
1312
1367
  @author.email = nil if @author.email.blank?
@@ -1935,7 +1990,13 @@ module FeedTools
1935
1990
  if @language.blank?
1936
1991
  @language = "en-us"
1937
1992
  end
1993
+ @language.gsub!(/_/, "-")
1938
1994
  @language = @language.downcase
1995
+ if @language.split('-').size > 1
1996
+ @language =
1997
+ "#{@language.split('-').first}-" +
1998
+ "#{@language.split('-').last.upcase}"
1999
+ end
1939
2000
  end
1940
2001
  return @language
1941
2002
  end
@@ -2267,6 +2328,12 @@ module FeedTools
2267
2328
  else
2268
2329
  xml_builder.description
2269
2330
  end
2331
+ unless self.author.email.blank?
2332
+ xml_builder.managingEditor(self.author.email)
2333
+ end
2334
+ unless self.publisher.email.blank?
2335
+ xml_builder.webMaster(self.publisher.email)
2336
+ end
2270
2337
  unless self.published.blank?
2271
2338
  xml_builder.pubDate(self.published.rfc822)
2272
2339
  end
@@ -2397,6 +2464,7 @@ module FeedTools
2397
2464
  end
2398
2465
  self.cache_object.http_headers = self.http_headers.to_yaml
2399
2466
  self.cache_object.last_retrieved = self.last_retrieved
2467
+ Thread.pass
2400
2468
  self.cache_object.save
2401
2469
  end
2402
2470
  end