RubyGems - feedme - Versions diffs - 0.8.0 → 0.8.1 - Mend

feedme 0.8.0 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

data/lib/feedme.rb CHANGED

@@ -153,6 +153,10 @@ module FeedMe
           match = Regexp.new(regexp).match(str)
           match.nil? ? nil : match[1]
         end,
+        # this shouldn't be necessary since all text is automatically
+        # unescaped, but some feeds double-escape HTML
+        :esc => proc {|str| CGI.unescapeHTML(str) }
     	}
     end
@@ -528,7 +532,7 @@ module FeedMe
   end
   class Parser < FeedData
-    attr_reader :fm_source, :fm_options, :fm_type, :fm_tags, :fm_unparsed
+    attr_reader :fm_source, :fm_options, :fm_type, :fm_tags, :fm_parsed, :fm_unparsed
     def initialize(builder, source, options={})
       super(nil, nil, builder)
@@ -672,13 +676,13 @@ module FeedMe
     end
   	def clean_content(tag, attrs, content, parent)
-  		content = content.to_s
-  		if fm_builder.date_tags.include? tag
+  	  content = content.to_s
+  	  if fm_builder.date_tags.include? tag
   			content = Time.parse(content) rescue unescape(content)
   		else
   		  content = unescape(content)
   		end
       unless attrs.empty?
         hash = FeedData.new(tag, parent, fm_builder)
         attrs.each_pair {|key, value| hash[key] = unescape(value) }
@@ -706,7 +710,7 @@ module FeedMe
     def unescape(content)
       content = CGI.unescapeHTML(content)
       query = content.match(/^(http:.*\?)(.*)$/)
       content = query[1] + CGI.unescape(query[2]) if query

data/lib/hpricot-util.rb CHANGED

@@ -30,7 +30,7 @@ module FeedMe
     # sanitize HTML
     # todo: dup code to fix bugs
     def clean_html(html)
-      FeedMe::HtmlCleaner.clean(html)
+      FeedMe::HtmlCleaner.clean(html)
     end
   end

data/lib/html-cleaner.rb CHANGED

@@ -59,17 +59,17 @@ module FeedMe
       # Extra (i.e. unmatched) ending tags and comments are removed.
       def clean(str)
         str = unescapeHTML(str)
         doc = Hpricot(str, :fixup_tags => true)
         doc = subtree(doc, :body)
         # get all the tags in the document
         # Somewhere near hpricot 0.4.92 "*" starting to return all elements,
         # including text nodes instead of just tagged elements.
         tags = (doc/"*").inject([]) { |m,e| m << e.name if(e.respond_to?(:name) && e.name =~ /^\w+$/) ; m }.uniq
         # Remove tags that aren't whitelisted.
-        remove_tags!(doc, tags - HTML_ELEMENTS)
+        diff = tags - HTML_ELEMENTS
+        remove_tags!(doc, diff)
         remaining_tags = tags & HTML_ELEMENTS
         # Remove attributes that aren't on the whitelist, or are suspicious URLs.
@@ -80,9 +80,9 @@ module FeedMe
           end
           element.raw_attributes = element.raw_attributes.build_hash {|a,v| [a, add_entities(v)]}
         end unless remaining_tags.empty?
         doc.traverse_text {|t| t.set(add_entities(t.to_html))}
         # Return the tree, without comments. Ugly way of removing comments,
         # but can't see a way to do this in Hpricot yet.
         doc.to_s.gsub(/<\!--.*?-->/mi, '')

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: feedme
 version: !ruby/object:Gem::Version
-  version: 0.8.0
+  version: 0.8.1
 platform: ruby
 authors:
 - John Didion
@@ -9,7 +9,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2009-12-28 00:00:00 -05:00
+date: 2010-01-02 00:00:00 -05:00
 default_executable:
 dependencies: []