feedtools 0.2.18 → 0.2.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG CHANGED
@@ -1,3 +1,29 @@
1
+ == FeedTools 0.2.19
2
+ * lousy encoding support (as opposed to none at all)
3
+ * xml processing instruction now correctly prefixes generated feeds
4
+ * attributes are escaped properly when generating feeds
5
+ * uppercase html is no longer sanitized for not being in the whitelist
6
+ * added alias method for assigning to entries
7
+ * changed the xpath querying to be much, much more DRY
8
+ * find_node and find_all_nodes are actually useful now
9
+ * full case-insensitivity implemented for the xpath helper methods
10
+ * fixed bug in tests where some assertion failures could affect other tests
11
+ * fixed bug where the feed item author would sometimes be parsed incorrectly
12
+ * fixed bug where the convertLineBreaks element would break feed entries
13
+ * default (i.e. preferred) methods will be Atom-style instead of RSS-style
14
+ * default feed output format changed to Atom 1.0
15
+ * itunes namespace corrected
16
+ * fixed images property when dealing with atom
17
+ * fixed atom link property
18
+ * improved timestamp handling
19
+ * whitespace nodes now ignored by REXML
20
+ * added option to disable timestamp estimation
21
+ * added option to limit time-to-live to some upper maximum
22
+ * enclosures included in feed generation
23
+ * no longer uses the cache at all for file:/// urls
24
+ * changed itunes:keywords to use commas
25
+ * testing now excludes cache testing by default, use "test_all" to include it
26
+ * more tests
1
27
  == FeedTools 0.2.18
2
28
  * no longer ever polls more often than once every 30 minutes
3
29
  * fixed overlooked improperly refactored enclosure code
@@ -6,6 +32,8 @@
6
32
  * test cases now implemented using helpers
7
33
  * fixed issue with timeouts
8
34
  * fixed stack overflow while estimating timestamps
35
+ * fixed some namespace issue with atom
36
+ * added base64 decoding support
9
37
  == FeedTools 0.2.17
10
38
  * more fixes for timestamping of feed items
11
39
  * fixed nil bug in root_node, feed_type, feed_version, build_xml
data/lib/feed_tools.rb CHANGED
@@ -30,9 +30,9 @@ end
30
30
 
31
31
  FEED_TOOLS_ENV = ENV['FEED_TOOLS_ENV'] ||
32
32
  ENV['RAILS_ENV'] ||
33
- 'production' # :nodoc:
33
+ 'development' # :nodoc:
34
34
 
35
- FEED_TOOLS_VERSION = "0.2.18"
35
+ FEED_TOOLS_VERSION = "0.2.19"
36
36
 
37
37
  FEED_TOOLS_NAMESPACES = {
38
38
  "admin" => "http://webns.net/mvcb/",
@@ -40,8 +40,10 @@ FEED_TOOLS_NAMESPACES = {
40
40
  "annotate" => "http://purl.org/rss/1.0/modules/annotate/",
41
41
  "atom10" => "http://www.w3.org/2005/Atom",
42
42
  "atom03" => "http://purl.org/atom/ns#",
43
+ # "atom-blog" => "http://purl.org/atom-blog/ns#",
43
44
  "audio" => "http://media.tangent.org/rss/1.0/",
44
45
  "blogChannel" => "http://backend.userland.com/blogChannelModule",
46
+ "blogger" => "http://www.blogger.com/atom/ns#",
45
47
  "cc" => "http://web.resource.org/cc/",
46
48
  "creativeCommons" => "http://backend.userland.com/creativeCommonsRssModule",
47
49
  "co" => "http://purl.org/rss/1.0/modules/company",
@@ -56,7 +58,7 @@ FEED_TOOLS_NAMESPACES = {
56
58
  "feedburner" => "http://rssnamespace.org/feedburner/ext/1.0",
57
59
  "foaf" => "http://xmlns.com/foaf/0.1/",
58
60
  "fm" => "http://freshmeat.net/rss/fm/",
59
- "itunes" => "http://www.itunes.com/DTDs/Podcast-1.0.dtd",
61
+ "itunes" => "http://www.itunes.com/dtds/podcast-1.0.dtd",
60
62
  "l" => "http://purl.org/rss/1.0/modules/link/",
61
63
  "media" => "http://search.yahoo.com/mrss",
62
64
  "pingback" => "http://madskills.com/public/xml/rss/module/pingback/",
@@ -97,12 +99,7 @@ begin
97
99
 
98
100
  require 'rubygems'
99
101
 
100
- begin
101
- require 'builder'
102
- rescue LoadError
103
- # RubyGems version is not available, use included Builder
104
- require 'feed_tools/vendor/builder'
105
- end
102
+ require_gem('builder', '>= 1.2.4')
106
103
 
107
104
  begin
108
105
  require 'tidy'
@@ -113,8 +110,10 @@ begin
113
110
  require 'feed_tools/vendor/htree'
114
111
 
115
112
  require 'net/http'
116
- require 'net/https'
117
- require 'net/ftp'
113
+
114
+ # TODO: Not used yet, don't load since it'll only be a performance hit
115
+ # require 'net/https'
116
+ # require 'net/ftp'
118
117
 
119
118
  require 'rexml/document'
120
119
 
@@ -125,7 +124,8 @@ begin
125
124
  require 'yaml'
126
125
  require 'base64'
127
126
 
128
- require_gem('activerecord', '>= 1.10.1')
127
+ require_gem('activesupport', '>= 1.1.1')
128
+ require_gem('activerecord', '>= 1.11.1')
129
129
  require_gem('uuidtools', '>= 0.1.2')
130
130
 
131
131
  require 'feed_tools/feed'
@@ -155,25 +155,55 @@ end
155
155
  # slashdot_feed.items.first.find_node("slash:hitparade/text()").value
156
156
  # => "43,37,28,23,11,3,1"
157
157
  module FeedTools
158
+ @configurations = {}
159
+
160
+ def FeedTools.load_configurations
161
+ if @configurations.blank?
162
+ config_hash = {}
163
+ @configurations = {
164
+ :feed_cache => "FeedTools::DatabaseFeedCache",
165
+ :user_agent => "FeedTools/#{FEED_TOOLS_VERSION} " +
166
+ "+http://www.sporkmonger.com/projects/feedtools/",
167
+ :generator_name => "FeedTools/#{FEED_TOOLS_VERSION}",
168
+ :generator_href => "http://www.sporkmonger.com/projects/feedtools/",
169
+ :tidy_enabled => false,
170
+ :tidy_options => {},
171
+ :sanitize_with_nofollow => true,
172
+ :timestamp_estimation_enabled => true,
173
+ :url_normalization_enabled => true,
174
+ :strip_comment_count => false,
175
+ :max_ttl => 3.days.to_s,
176
+ :output_encoding => "utf-8",
177
+ :no_content_value => "[no description]"
178
+ }.merge(config_hash)
179
+ end
180
+ return @configurations
181
+ end
158
182
 
159
- @force_tidy_enabled = true
160
- @tidy_enabled = false
161
- @feed_cache = DatabaseFeedCache
162
- @user_agent = "FeedTools/#{FEED_TOOLS_VERSION} " +
163
- "+http://www.sporkmonger.com/projects/feedtools/"
164
- @no_content_string = "[no description]"
183
+ # Resets configuration to a clean load
184
+ def FeedTools.reset_configurations
185
+ @configurations = nil
186
+ FeedTools.load_configurations
187
+ end
188
+
189
+ # Returns the configuration hash for FeedTools
190
+ def FeedTools.configurations
191
+ if @configurations.blank?
192
+ FeedTools.load_configurations()
193
+ end
194
+ return @configurations
195
+ end
196
+
197
+ # Sets the configuration hash for FeedTools
198
+ def FeedTools.configurations=(new_configurations)
199
+ @configurations = new_configurations
200
+ end
165
201
 
166
202
  # Error raised when a feed cannot be retrieved
167
203
  class FeedAccessError < StandardError
168
204
  end
169
205
 
170
206
  # Returns the current caching mechanism.
171
- def FeedTools.feed_cache
172
- return @feed_cache
173
- end
174
-
175
- # Sets the current caching mechanism. If set to nil, disables caching.
176
- # Default is the DatabaseFeedCache class.
177
207
  #
178
208
  # Objects of this class must accept the following messages:
179
209
  # id
@@ -199,12 +229,26 @@ module FeedTools
199
229
  # find_by_url
200
230
  # initialize_cache
201
231
  # connected?
202
- def FeedTools.feed_cache=(new_feed_cache)
203
- # TODO: ensure that the feed cache class actually does those things.
204
- # ==================================================================
205
- @feed_cache = new_feed_cache
232
+ def FeedTools.feed_cache
233
+ return nil if FeedTools.configurations[:feed_cache].blank?
234
+ class_name = FeedTools.configurations[:feed_cache].to_s
235
+ if @feed_cache.nil? || @feed_cache.to_s != class_name
236
+ begin
237
+ cache_class = eval(class_name)
238
+ if cache_class.kind_of?(Class)
239
+ @feed_cache = cache_class
240
+ return cache_class
241
+ else
242
+ return nil
243
+ end
244
+ rescue
245
+ return nil
246
+ end
247
+ else
248
+ return @feed_cache
249
+ end
206
250
  end
207
-
251
+
208
252
  # Returns true if FeedTools.feed_cache is not nil and a connection with
209
253
  # the cache has been successfully established. Also returns false if an
210
254
  # error is raised while trying to determine the status of the cache.
@@ -216,28 +260,7 @@ module FeedTools
216
260
  return false
217
261
  end
218
262
  end
219
-
220
- # Returns the currently used user agent string.
221
- def FeedTools.user_agent
222
- return @user_agent
223
- end
224
-
225
- # Sets the user agent string to send in the http headers.
226
- def FeedTools.user_agent=(new_user_agent)
227
- @user_agent = new_user_agent
228
- end
229
-
230
- # Returns the currently used no content string.
231
- def FeedTools.no_content_string
232
- return @no_content_string
233
- end
234
-
235
- # Sets the no content string to use when a feed is missing a content element.
236
- # Used only for xml output.
237
- def FeedTools.no_content_string=(new_no_content_string)
238
- @no_content_string = new_no_content_string
239
- end
240
-
263
+
241
264
  # Returns true if the html tidy module can be used.
242
265
  #
243
266
  # Obviously, you need the tidy gem installed in order to run with html
@@ -255,7 +278,7 @@ module FeedTools
255
278
  def FeedTools.tidy_enabled?
256
279
  # This is an override variable to keep tidy from being used even if it
257
280
  # is available.
258
- if @force_tidy_enabled == false
281
+ if FeedTools.configurations[:tidy_enabled] == false
259
282
  return false
260
283
  end
261
284
  if @tidy_enabled.nil? || @tidy_enabled == false
@@ -337,13 +360,6 @@ module FeedTools
337
360
  end
338
361
  return @tidy_enabled
339
362
  end
340
-
341
- # Turns html tidy support on or off. Be aware, that setting this to true
342
- # does not mean tidy will be enabled. It simply means that tidy will be
343
- # enabled if it is available to be enabled.
344
- def FeedTools.tidy_enabled=(new_tidy_enabled)
345
- @force_tidy_enabled = new_tidy_enabled
346
- end
347
363
 
348
364
  # Attempts to ensures that the passed url is valid and sane. Accepts very, very ugly urls
349
365
  # and makes every effort to figure out what it was supposed to be. Also translates from
@@ -470,8 +486,8 @@ module FeedTools
470
486
  def FeedTools.escape_entities(html)
471
487
  return nil if html.nil?
472
488
  escaped_html = CGI.escapeHTML(html)
473
- unescaped_html.gsub!(/'/, "&apos;")
474
- unescaped_html.gsub!(/"/, "&quot;")
489
+ escaped_html.gsub!(/'/, "&apos;")
490
+ escaped_html.gsub!(/"/, "&quot;")
475
491
  return escaped_html
476
492
  end
477
493
 
@@ -540,6 +556,9 @@ module FeedTools
540
556
  else
541
557
  tidy_html = html
542
558
  end
559
+ if tidy_html.blank? && !html.blank?
560
+ tidy_html = html.strip
561
+ end
543
562
  return tidy_html
544
563
  end
545
564
 
@@ -586,7 +605,7 @@ module FeedTools
586
605
  if html_node.respond_to? :children
587
606
  for child in html_node.children
588
607
  if child.kind_of? REXML::Element
589
- unless acceptable_elements.include? child.name
608
+ unless acceptable_elements.include? child.name.downcase
590
609
  if mode == :strip
591
610
  html_node.delete_element(child)
592
611
  else
@@ -596,7 +615,7 @@ module FeedTools
596
615
  end
597
616
  end
598
617
  for attribute in child.attributes.keys
599
- unless acceptable_attributes.include? attribute
618
+ unless acceptable_attributes.include? attribute.downcase
600
619
  child.delete_attribute(attribute)
601
620
  end
602
621
  end
@@ -655,6 +674,252 @@ module FeedTools
655
674
  end
656
675
 
657
676
  module REXML # :nodoc:
677
+ class LiberalXPathParser < XPathParser # :nodoc:
678
+ private
679
+ def internal_parse(path_stack, nodeset) # :nodoc:
680
+ return nodeset if nodeset.size == 0 or path_stack.size == 0
681
+ case path_stack.shift
682
+ when :document
683
+ return [ nodeset[0].root.parent ]
684
+
685
+ when :qname
686
+ prefix = path_stack.shift.downcase
687
+ name = path_stack.shift.downcase
688
+ n = nodeset.clone
689
+ ns = @namespaces[prefix]
690
+ ns = ns ? ns : ''
691
+ n.delete_if do |node|
692
+ if node.node_type == :element and ns == ''
693
+ ns = node.namespace( prefix )
694
+ end
695
+ !(node.node_type == :element and
696
+ node.name.downcase == name and node.namespace == ns )
697
+ end
698
+ return n
699
+
700
+ when :any
701
+ n = nodeset.clone
702
+ n.delete_if { |node| node.node_type != :element }
703
+ return n
704
+
705
+ when :self
706
+ # THIS SPACE LEFT INTENTIONALLY BLANK
707
+
708
+ when :processing_instruction
709
+ target = path_stack.shift
710
+ n = nodeset.clone
711
+ n.delete_if do |node|
712
+ (node.node_type != :processing_instruction) or
713
+ ( !target.nil? and ( node.target != target ) )
714
+ end
715
+ return n
716
+
717
+ when :text
718
+ n = nodeset.clone
719
+ n.delete_if do |node|
720
+ node.node_type != :text
721
+ end
722
+ return n
723
+
724
+ when :comment
725
+ n = nodeset.clone
726
+ n.delete_if do |node|
727
+ node.node_type != :comment
728
+ end
729
+ return n
730
+
731
+ when :node
732
+ return nodeset
733
+
734
+ when :child
735
+ new_nodeset = []
736
+ nt = nil
737
+ for node in nodeset
738
+ nt = node.node_type
739
+ new_nodeset += node.children if nt == :element or nt == :document
740
+ end
741
+ return new_nodeset
742
+
743
+ when :literal
744
+ literal = path_stack.shift
745
+ if literal =~ /^\d+(\.\d+)?$/
746
+ return ($1 ? literal.to_f : literal.to_i)
747
+ end
748
+ return literal
749
+
750
+ when :attribute
751
+ new_nodeset = []
752
+ case path_stack.shift
753
+ when :qname
754
+ prefix = path_stack.shift
755
+ name = path_stack.shift.downcase
756
+ for element in nodeset
757
+ if element.node_type == :element
758
+ for attribute_name in element.attributes.keys
759
+ if attribute_name.downcase == name
760
+ attrib = element.attribute( attribute_name,
761
+ @namespaces[prefix] )
762
+ new_nodeset << attrib if attrib
763
+ end
764
+ end
765
+ end
766
+ end
767
+ when :any
768
+ for element in nodeset
769
+ if element.node_type == :element
770
+ new_nodeset += element.attributes.to_a
771
+ end
772
+ end
773
+ end
774
+ return new_nodeset
775
+
776
+ when :parent
777
+ return internal_parse( path_stack, nodeset.collect{|n| n.parent}.compact )
778
+
779
+ when :ancestor
780
+ new_nodeset = []
781
+ for node in nodeset
782
+ while node.parent
783
+ node = node.parent
784
+ new_nodeset << node unless new_nodeset.include? node
785
+ end
786
+ end
787
+ return new_nodeset
788
+
789
+ when :ancestor_or_self
790
+ new_nodeset = []
791
+ for node in nodeset
792
+ if node.node_type == :element
793
+ new_nodeset << node
794
+ while ( node.parent )
795
+ node = node.parent
796
+ new_nodeset << node unless new_nodeset.include? node
797
+ end
798
+ end
799
+ end
800
+ return new_nodeset
801
+
802
+ when :predicate
803
+ predicate = path_stack.shift
804
+ new_nodeset = []
805
+ Functions::size = nodeset.size
806
+ nodeset.size.times do |index|
807
+ node = nodeset[index]
808
+ Functions::node = node
809
+ Functions::index = index+1
810
+ result = Predicate( predicate, node )
811
+ if result.kind_of? Numeric
812
+ new_nodeset << node if result == (index+1)
813
+ elsif result.instance_of? Array
814
+ new_nodeset << node if result.size > 0
815
+ else
816
+ new_nodeset << node if result
817
+ end
818
+ end
819
+ return new_nodeset
820
+
821
+ when :descendant_or_self
822
+ rv = descendant_or_self( path_stack, nodeset )
823
+ path_stack.clear
824
+ return rv
825
+
826
+ when :descendant
827
+ results = []
828
+ nt = nil
829
+ for node in nodeset
830
+ nt = node.node_type
831
+ if nt == :element or nt == :document
832
+ results += internal_parse(
833
+ path_stack.clone.unshift( :descendant_or_self ),
834
+ node.children )
835
+ end
836
+ end
837
+ return results
838
+
839
+ when :following_sibling
840
+ results = []
841
+ for node in nodeset
842
+ all_siblings = node.parent.children
843
+ current_index = all_siblings.index( node )
844
+ following_siblings = all_siblings[ current_index+1 .. -1 ]
845
+ results += internal_parse( path_stack.clone, following_siblings )
846
+ end
847
+ return results
848
+
849
+ when :preceding_sibling
850
+ results = []
851
+ for node in nodeset
852
+ all_siblings = node.parent.children
853
+ current_index = all_siblings.index( node )
854
+ preceding_siblings = all_siblings[ 0 .. current_index-1 ]
855
+ results += internal_parse( path_stack.clone, preceding_siblings )
856
+ end
857
+ return results
858
+
859
+ when :preceding
860
+ new_nodeset = []
861
+ for node in nodeset
862
+ new_nodeset += preceding( node )
863
+ end
864
+ return new_nodeset
865
+
866
+ when :following
867
+ new_nodeset = []
868
+ for node in nodeset
869
+ new_nodeset += following( node )
870
+ end
871
+ return new_nodeset
872
+
873
+ when :namespace
874
+ new_set = []
875
+ for node in nodeset
876
+ if node.node_type == :element or node.node_type == :attribute
877
+ new_nodeset << node.namespace
878
+ end
879
+ end
880
+ return new_nodeset
881
+
882
+ when :variable
883
+ var_name = path_stack.shift
884
+ return @variables[ var_name ]
885
+
886
+ end
887
+ nodeset
888
+ end
889
+ end
890
+
891
+ class XPath # :nodoc:
892
+ def self.liberal_match(element, path=nil, namespaces={},
893
+ variables={}) # :nodoc:
894
+ parser = LiberalXPathParser.new
895
+ parser.namespaces = namespaces
896
+ parser.variables = variables
897
+ path = "*" unless path
898
+ element = [element] unless element.kind_of? Array
899
+ parser.parse(path, element)
900
+ end
901
+
902
+ def self.liberal_first(element, path=nil, namespaces={},
903
+ variables={}) # :nodoc:
904
+ parser = LiberalXPathParser.new
905
+ parser.namespaces = namespaces
906
+ parser.variables = variables
907
+ path = "*" unless path
908
+ element = [element] unless element.kind_of? Array
909
+ parser.parse(path, element)[0]
910
+ end
911
+
912
+ def self.liberal_each(element, path=nil, namespaces={},
913
+ variables={}, &block) # :nodoc:
914
+ parser = LiberalXPathParser.new
915
+ parser.namespaces = namespaces
916
+ parser.variables = variables
917
+ path = "*" unless path
918
+ element = [element] unless element.kind_of? Array
919
+ parser.parse(path, element).each( &block )
920
+ end
921
+ end
922
+
658
923
  class Element # :nodoc:
659
924
  unless REXML::Element.public_instance_methods.include? :inner_xml
660
925
  def inner_xml # :nodoc: