feedtools 0.2.24 → 0.2.25

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG CHANGED
@@ -1,3 +1,15 @@
1
+ == FeedTools 0.2.25
2
+ * fixed self reference href bug
3
+ * fixed primary link assignment heuristic
4
+ * link rel's now default to 'alternate' for atom feeds
5
+ * better handling of language attributes
6
+ * better support for threaded applications
7
+ * generated rss 2.0 now includes author, managingEditor, webMaster elements
8
+ * improved handling of author elements
9
+ * improved handling of http timeouts
10
+ * fixed issue with relative paths in the location header for http redirection
11
+ * monkey patches now work with Ruby 1.8.4's version of REXML
12
+ * REXML monkey patches moved into their own file
1
13
  == FeedTools 0.2.24
2
14
  * fixed autodiscovery bugs
3
15
  * updated autodiscovery to handle relative uris
@@ -151,6 +151,8 @@ begin
151
151
  end
152
152
  end
153
153
 
154
+ require 'feed_tools/monkey_patch'
155
+
154
156
  require 'feed_tools/feed'
155
157
  require 'feed_tools/feed_item'
156
158
  require 'feed_tools/feed_structures'
@@ -348,301 +350,6 @@ module FeedTools
348
350
  end
349
351
  end
350
352
 
351
- module REXML # :nodoc:
352
- class LiberalXPathParser < XPathParser # :nodoc:
353
- private
354
- def internal_parse(path_stack, nodeset) # :nodoc:
355
- return nodeset if nodeset.size == 0 or path_stack.size == 0
356
- case path_stack.shift
357
- when :document
358
- return [ nodeset[0].root.parent ]
359
-
360
- when :qname
361
- prefix = path_stack.shift.downcase
362
- name = path_stack.shift.downcase
363
- n = nodeset.clone
364
- ns = @namespaces[prefix]
365
- ns = ns ? ns : ''
366
- n.delete_if do |node|
367
- if node.node_type == :element and ns == ''
368
- ns = node.namespace( prefix )
369
- end
370
- !(node.node_type == :element and
371
- node.name.downcase == name.downcase and node.namespace == ns )
372
- end
373
- return n
374
-
375
- when :any
376
- n = nodeset.clone
377
- n.delete_if { |node| node.node_type != :element }
378
- return n
379
-
380
- when :self
381
- # THIS SPACE LEFT INTENTIONALLY BLANK
382
-
383
- when :processing_instruction
384
- target = path_stack.shift
385
- n = nodeset.clone
386
- n.delete_if do |node|
387
- (node.node_type != :processing_instruction) or
388
- ( !target.nil? and ( node.target != target ) )
389
- end
390
- return n
391
-
392
- when :text
393
- n = nodeset.clone
394
- n.delete_if do |node|
395
- node.node_type != :text
396
- end
397
- return n
398
-
399
- when :comment
400
- n = nodeset.clone
401
- n.delete_if do |node|
402
- node.node_type != :comment
403
- end
404
- return n
405
-
406
- when :node
407
- return nodeset
408
-
409
- when :child
410
- new_nodeset = []
411
- nt = nil
412
- for node in nodeset
413
- nt = node.node_type
414
- new_nodeset += node.children if nt == :element or nt == :document
415
- end
416
- return new_nodeset
417
-
418
- when :literal
419
- literal = path_stack.shift
420
- if literal =~ /^\d+(\.\d+)?$/
421
- return ($1 ? literal.to_f : literal.to_i)
422
- end
423
- return literal
424
-
425
- when :attribute
426
- new_nodeset = []
427
- case path_stack.shift
428
- when :qname
429
- prefix = path_stack.shift
430
- name = path_stack.shift.downcase
431
- for element in nodeset
432
- if element.node_type == :element
433
- for attribute_name in element.attributes.keys
434
- if attribute_name.downcase == name.downcase
435
- attrib = element.attribute( attribute_name,
436
- @namespaces[prefix] )
437
- new_nodeset << attrib if attrib
438
- end
439
- end
440
- end
441
- end
442
- when :any
443
- for element in nodeset
444
- if element.node_type == :element
445
- new_nodeset += element.attributes.to_a
446
- end
447
- end
448
- end
449
- return new_nodeset
450
-
451
- when :parent
452
- return internal_parse( path_stack,
453
- nodeset.collect{|n| n.parent}.compact )
454
-
455
- when :ancestor
456
- new_nodeset = []
457
- for node in nodeset
458
- while node.parent
459
- node = node.parent
460
- new_nodeset << node unless new_nodeset.include? node
461
- end
462
- end
463
- return new_nodeset
464
-
465
- when :ancestor_or_self
466
- new_nodeset = []
467
- for node in nodeset
468
- if node.node_type == :element
469
- new_nodeset << node
470
- while ( node.parent )
471
- node = node.parent
472
- new_nodeset << node unless new_nodeset.include? node
473
- end
474
- end
475
- end
476
- return new_nodeset
477
-
478
- when :predicate
479
- predicate = path_stack.shift
480
- new_nodeset = []
481
- Functions::size = nodeset.size
482
- nodeset.size.times do |index|
483
- node = nodeset[index]
484
- Functions::node = node
485
- Functions::index = index+1
486
- result = Predicate( predicate, node )
487
- if result.kind_of? Numeric
488
- new_nodeset << node if result == (index+1)
489
- elsif result.instance_of? Array
490
- new_nodeset << node if result.size > 0
491
- else
492
- new_nodeset << node if result
493
- end
494
- end
495
- return new_nodeset
496
-
497
- when :descendant_or_self
498
- rv = descendant_or_self( path_stack, nodeset )
499
- path_stack.clear
500
- return rv
501
-
502
- when :descendant
503
- results = []
504
- nt = nil
505
- for node in nodeset
506
- nt = node.node_type
507
- if nt == :element or nt == :document
508
- results += internal_parse(
509
- path_stack.clone.unshift( :descendant_or_self ),
510
- node.children )
511
- end
512
- end
513
- return results
514
-
515
- when :following_sibling
516
- results = []
517
- for node in nodeset
518
- all_siblings = node.parent.children
519
- current_index = all_siblings.index( node )
520
- following_siblings = all_siblings[ current_index+1 .. -1 ]
521
- results += internal_parse( path_stack.clone, following_siblings )
522
- end
523
- return results
524
-
525
- when :preceding_sibling
526
- results = []
527
- for node in nodeset
528
- all_siblings = node.parent.children
529
- current_index = all_siblings.index( node )
530
- preceding_siblings = all_siblings[ 0 .. current_index-1 ]
531
- results += internal_parse( path_stack.clone, preceding_siblings )
532
- end
533
- return results
534
-
535
- when :preceding
536
- new_nodeset = []
537
- for node in nodeset
538
- new_nodeset += preceding( node )
539
- end
540
- return new_nodeset
541
-
542
- when :following
543
- new_nodeset = []
544
- for node in nodeset
545
- new_nodeset += following( node )
546
- end
547
- return new_nodeset
548
-
549
- when :namespace
550
- new_set = []
551
- for node in nodeset
552
- if node.node_type == :element or node.node_type == :attribute
553
- new_nodeset << node.namespace
554
- end
555
- end
556
- return new_nodeset
557
-
558
- when :variable
559
- var_name = path_stack.shift
560
- return @variables[ var_name ]
561
-
562
- end
563
- nodeset
564
- end
565
- end
566
-
567
- class XPath # :nodoc:
568
- def self.liberal_match(element, path=nil, namespaces={},
569
- variables={}) # :nodoc:
570
- parser = LiberalXPathParser.new
571
- parser.namespaces = namespaces
572
- parser.variables = variables
573
- path = "*" unless path
574
- element = [element] unless element.kind_of? Array
575
- parser.parse(path, element)
576
- end
577
-
578
- def self.liberal_first(element, path=nil, namespaces={},
579
- variables={}) # :nodoc:
580
- parser = LiberalXPathParser.new
581
- parser.namespaces = namespaces
582
- parser.variables = variables
583
- path = "*" unless path
584
- element = [element] unless element.kind_of? Array
585
- parser.parse(path, element)[0]
586
- end
587
-
588
- def self.liberal_each(element, path=nil, namespaces={},
589
- variables={}, &block) # :nodoc:
590
- parser = LiberalXPathParser.new
591
- parser.namespaces = namespaces
592
- parser.variables = variables
593
- path = "*" unless path
594
- element = [element] unless element.kind_of? Array
595
- parser.parse(path, element).each( &block )
596
- end
597
- end
598
-
599
- class Element # :nodoc:
600
- unless REXML::Element.public_instance_methods.include? :inner_xml
601
- def inner_xml # :nodoc:
602
- result = ""
603
- self.each_child do |child|
604
- if child.kind_of? REXML::Comment
605
- result << "<!--" + child.to_s + "-->"
606
- else
607
- result << child.to_s
608
- end
609
- end
610
- return result.strip
611
- end
612
- else
613
- warn("inner_xml method already exists.")
614
- end
615
-
616
- def base_uri # :nodoc:
617
- begin
618
- base_attribute = FeedTools::XmlHelper.try_xpaths(self, [
619
- '@xml:base'
620
- ])
621
- if parent == nil || parent.kind_of?(REXML::Document)
622
- return nil if base_attribute == nil
623
- return base_attribute.value
624
- end
625
- if base_attribute != nil && parent == nil
626
- return base_attribute.value
627
- elsif parent != nil && base_attribute == nil
628
- return parent.base_uri
629
- elsif parent != nil && base_attribute != nil
630
- parent_base_uri = parent.base_uri
631
- if parent_base_uri != nil
632
- uri = URI.parse(parent_base_uri)
633
- return (uri + base_attribute.value).to_s
634
- else
635
- return base_attribute.value
636
- end
637
- end
638
- return nil
639
- rescue
640
- return nil
641
- end
642
- end
643
- end
644
- end
645
-
646
353
  begin
647
354
  unless FeedTools.feed_cache.nil?
648
355
  FeedTools.feed_cache.initialize_cache
@@ -78,6 +78,8 @@ module FeedTools
78
78
  # load the new feed
79
79
  feed.href = url
80
80
  feed.update! unless feed.configurations[:disable_update_from_remote]
81
+ Thread.pass
82
+
81
83
  return feed
82
84
  end
83
85
 
@@ -101,6 +103,10 @@ module FeedTools
101
103
  # Don't do anything if this option is set
102
104
  return
103
105
  end
106
+ if !FeedTools.feed_cache.nil? &&
107
+ !FeedTools.feed_cache.set_up_correctly?
108
+ FeedTools.feed_cache.initialize_cache()
109
+ end
104
110
  if !FeedTools.feed_cache.nil? &&
105
111
  !FeedTools.feed_cache.set_up_correctly?
106
112
  raise "Your feed cache system is incorrectly set up. " +
@@ -225,6 +231,7 @@ module FeedTools
225
231
  end
226
232
 
227
233
  @title = nil; self.title
234
+ self.href
228
235
  @link = nil; self.link
229
236
 
230
237
  self.last_retrieved = cached_feed.last_retrieved
@@ -254,7 +261,7 @@ module FeedTools
254
261
  self.http_headers[key.downcase] = value
255
262
  end
256
263
  self.last_retrieved = Time.now.gmtime
257
- @live = true
264
+ @live = false
258
265
  else
259
266
  @live = false
260
267
  end
@@ -724,20 +731,24 @@ module FeedTools
724
731
  end
725
732
  end
726
733
  if override_href.call(@href) && self.feed_data != nil
727
- # rdf:about is ordered last because a lot of people put the url to
728
- # the feed inside it instead of a link to their blog.
729
- # Ordering it last gives them as many chances as humanly possible
730
- # for them to redeem themselves. If the link turns out to be the
731
- # same as the blog link, it will be reset to the original value.
732
734
  for link_object in self.links
733
735
  if link_object.rel == 'self'
734
- if link_object.href != self.link
736
+ if link_object.href != self.link ||
737
+ (link_object.href =~ /xml/ ||
738
+ link_object.href =~ /atom/ ||
739
+ link_object.href =~ /feed/)
735
740
  @href = link_object.href
736
741
  @href_overridden = true
742
+ @link = nil
737
743
  return @href
738
744
  end
739
745
  end
740
746
  end
747
+ # rdf:about is ordered last because a lot of people put the url to
748
+ # the feed inside it instead of a link to their blog.
749
+ # Ordering it last gives them as many chances as humanly possible
750
+ # for them to redeem themselves. If the link turns out to be the
751
+ # same as the blog link, it will be reset to the original value.
741
752
  @href = FeedTools::XmlHelper.try_xpaths(self.channel_node, [
742
753
  "admin:feed/@rdf:resource",
743
754
  "admin:feed/@resource",
@@ -770,6 +781,9 @@ module FeedTools
770
781
  @href = original_href
771
782
  @href_overridden = false
772
783
  end
784
+ if @href_overridden == true
785
+ @link = nil
786
+ end
773
787
  end
774
788
  end
775
789
  return @href
@@ -791,7 +805,8 @@ module FeedTools
791
805
  "atom:title",
792
806
  "title",
793
807
  "dc:title",
794
- "channelTitle"
808
+ "channelTitle",
809
+ "TITLE"
795
810
  ])
796
811
  @title = FeedTools::HtmlHelper.process_text_construct(title_node,
797
812
  self.feed_type, self.feed_version)
@@ -822,6 +837,7 @@ module FeedTools
822
837
  "description",
823
838
  "summary",
824
839
  "abstract",
840
+ "ABSTRACT",
825
841
  "content:encoded",
826
842
  "encoded",
827
843
  "content",
@@ -947,19 +963,36 @@ module FeedTools
947
963
  max_score = 0
948
964
  for link_object in self.links.reverse
949
965
  score = 0
950
- if FeedTools::HtmlHelper.html_type?(link_object.type)
951
- score = score + 2
952
- elsif link_object.type != nil
953
- score = score - 1
966
+ next if link_object.href.nil?
967
+ if @href != nil && link_object.href == @href
968
+ score = score - 2
954
969
  end
955
- if FeedTools::HtmlHelper.xml_type?(link_object.type)
956
- score = score + 1
970
+ if link_object.type != nil
971
+ if (link_object.type =~ /image/ || link_object.type =~ /video/)
972
+ score = score - 2
973
+ end
974
+ if FeedTools::HtmlHelper.xml_type?(link_object.type)
975
+ score = score + 1
976
+ end
977
+ if FeedTools::HtmlHelper.html_type?(link_object.type)
978
+ score = score + 2
979
+ elsif link_object.type != nil
980
+ score = score - 1
981
+ end
982
+ end
983
+ if link_object.rel == "enclosure"
984
+ score = score - 2
957
985
  end
958
986
  if link_object.rel == "alternate"
959
987
  score = score + 1
960
988
  end
961
989
  if link_object.rel == "self"
962
990
  score = score - 1
991
+ if (link_object.href =~ /xml/ ||
992
+ link_object.href =~ /atom/ ||
993
+ link_object.href =~ /feed/)
994
+ score = score - 1
995
+ end
963
996
  end
964
997
  if score >= max_score
965
998
  max_score = score
@@ -1073,6 +1106,9 @@ module FeedTools
1073
1106
  unless link_object.rel.nil?
1074
1107
  link_object.rel = link_object.rel.downcase
1075
1108
  end
1109
+ if link_object.rel.nil? && self.feed_type == "atom"
1110
+ link_object.rel = "alternate"
1111
+ end
1076
1112
  link_object.type = FeedTools::XmlHelper.try_xpaths(link_node, [
1077
1113
  "@atom10:type",
1078
1114
  "@atom03:type",
@@ -1110,6 +1146,7 @@ module FeedTools
1110
1146
  link_object.length = nil
1111
1147
  end
1112
1148
  end
1149
+ @links = [] if @links.nil?
1113
1150
  @links << link_object
1114
1151
  end
1115
1152
  end
@@ -1128,10 +1165,13 @@ module FeedTools
1128
1165
  "@base"
1129
1166
  ], :select_result_value => true)
1130
1167
  if @base_uri.blank?
1131
- @base_uri =
1132
- FeedTools::GenericHelper.recursion_trap(:feed_base_uri) do
1133
- self.href
1134
- end
1168
+ begin
1169
+ @base_uri =
1170
+ FeedTools::GenericHelper.recursion_trap(:feed_base_uri) do
1171
+ self.href
1172
+ end
1173
+ rescue Exception
1174
+ end
1135
1175
  end
1136
1176
  if !@base_uri.blank?
1137
1177
  @base_uri = FeedTools::UriHelper.normalize_url(@base_uri)
@@ -1307,6 +1347,21 @@ module FeedTools
1307
1347
  ], :select_result_value => true)
1308
1348
  )
1309
1349
  end
1350
+ if @author.name.blank? && !@author.raw.blank? &&
1351
+ !@author.email.blank?
1352
+ name_scan = @author.raw.scan(
1353
+ /"?([^"]*)"? ?[\(<].*#{@author.email}.*[\)>].*/)
1354
+ if name_scan.flatten.size == 1
1355
+ @author.name = name_scan.flatten[0].strip
1356
+ end
1357
+ if @author.name.blank?
1358
+ name_scan = @author.raw.scan(
1359
+ /.*#{@author.email} ?[\(<]"?([^"]*)"?[\)>].*/)
1360
+ if name_scan.flatten.size == 1
1361
+ @author.name = name_scan.flatten[0].strip
1362
+ end
1363
+ end
1364
+ end
1310
1365
  @author.name = nil if @author.name.blank?
1311
1366
  @author.raw = nil if @author.raw.blank?
1312
1367
  @author.email = nil if @author.email.blank?
@@ -1935,7 +1990,13 @@ module FeedTools
1935
1990
  if @language.blank?
1936
1991
  @language = "en-us"
1937
1992
  end
1993
+ @language.gsub!(/_/, "-")
1938
1994
  @language = @language.downcase
1995
+ if @language.split('-').size > 1
1996
+ @language =
1997
+ "#{@language.split('-').first}-" +
1998
+ "#{@language.split('-').last.upcase}"
1999
+ end
1939
2000
  end
1940
2001
  return @language
1941
2002
  end
@@ -2267,6 +2328,12 @@ module FeedTools
2267
2328
  else
2268
2329
  xml_builder.description
2269
2330
  end
2331
+ unless self.author.email.blank?
2332
+ xml_builder.managingEditor(self.author.email)
2333
+ end
2334
+ unless self.publisher.email.blank?
2335
+ xml_builder.webMaster(self.publisher.email)
2336
+ end
2270
2337
  unless self.published.blank?
2271
2338
  xml_builder.pubDate(self.published.rfc822)
2272
2339
  end
@@ -2397,6 +2464,7 @@ module FeedTools
2397
2464
  end
2398
2465
  self.cache_object.http_headers = self.http_headers.to_yaml
2399
2466
  self.cache_object.last_retrieved = self.last_retrieved
2467
+ Thread.pass
2400
2468
  self.cache_object.save
2401
2469
  end
2402
2470
  end