feed-normalizer 1.3.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.txt CHANGED
@@ -1,6 +1,11 @@
1
+ 1.3.1
2
+
3
+ * Small changes to work with hpricot 0.6. This release depends on hpricot 0.6.
4
+ * Reduced the greediness of a regexp that was removing html comments.
5
+
1
6
  1.3.0
2
7
 
3
- * Small changes to work with hpricot 0.5.
8
+ * Small changes to work with hpricot 0.5.
4
9
 
5
10
  1.2.0
6
11
 
data/Rakefile CHANGED
@@ -1,6 +1,6 @@
1
1
  require 'hoe'
2
2
 
3
- Hoe.new("feed-normalizer", "1.3.0") do |s|
3
+ Hoe.new("feed-normalizer", "1.3.1") do |s|
4
4
  s.author = "Andrew A. Smith"
5
5
  s.email = "andy@tinnedfruit.org"
6
6
  s.url = "http://feed-normalizer.rubyforge.org/"
@@ -8,7 +8,7 @@ Hoe.new("feed-normalizer", "1.3.0") do |s|
8
8
  s.description = s.paragraphs_of('Readme.txt', 1..2).join("\n\n")
9
9
  s.changes = s.paragraphs_of('History.txt', 0..1).join("\n\n")
10
10
  s.extra_deps << ["simple-rss", ">= 1.1"]
11
- s.extra_deps << ["hpricot", ">= 0.4"]
11
+ s.extra_deps << ["hpricot", ">= 0.6"]
12
12
  s.need_zip = true
13
13
  s.need_tar = false
14
14
  end
data/lib/html-cleaner.rb CHANGED
@@ -73,18 +73,18 @@ module FeedNormalizer
73
73
 
74
74
  # Remove attributes that aren't on the whitelist, or are suspicious URLs.
75
75
  (doc/remaining_tags.join(",")).each do |element|
76
- element.attributes.reject! do |attr,val|
76
+ element.raw_attributes.reject! do |attr,val|
77
77
  !HTML_ATTRS.include?(attr) || (HTML_URI_ATTRS.include?(attr) && dodgy_uri?(val))
78
78
  end
79
79
 
80
- element.attributes = element.attributes.build_hash {|a,v| [a, add_entities(v)]}
80
+ element.raw_attributes = element.raw_attributes.build_hash {|a,v| [a, add_entities(v)]}
81
81
  end unless remaining_tags.empty?
82
82
 
83
- doc.traverse_text {|t| t.set(add_entities(t.to_s))}
83
+ doc.traverse_text {|t| t.set(add_entities(t.to_html))}
84
84
 
85
85
  # Return the tree, without comments. Ugly way of removing comments,
86
86
  # but can't see a way to do this in Hpricot yet.
87
- doc.to_s.gsub(/<\!--.*-->/mi, '')
87
+ doc.to_s.gsub(/<\!--.*?-->/mi, '')
88
88
  end
89
89
 
90
90
  # For all other feed elements:
@@ -100,7 +100,7 @@ module FeedNormalizer
100
100
  doc = subtree(doc, :body)
101
101
 
102
102
  out = ""
103
- doc.traverse_text {|t| out << add_entities(t.to_s)}
103
+ doc.traverse_text {|t| out << add_entities(t.to_html)}
104
104
 
105
105
  return out
106
106
  end
@@ -47,7 +47,7 @@ class HtmlCleanerTest < Test::Unit::TestCase
47
47
  assert_equal "<p>two</p>", HtmlCleaner.clean("<p>para</p><body><p>two</p>")
48
48
  assert_equal "<p>para</p>&lt;bo /dy&gt;<p>two</p>", HtmlCleaner.clean("<p>para</p><bo /dy><p>two</p></body>")
49
49
  assert_equal "<p>para</p>&lt;bo\\/dy&gt;<p>two</p>", HtmlCleaner.clean("<p>para</p><bo\\/dy><p>two</p></body>")
50
- assert_equal "<p>para</p><p>two</p>", HtmlCleaner.clean("<p>para</p><body/><p>two</p></body>")
50
+ assert_equal "<p>two</p>", HtmlCleaner.clean("<p>para</p><body/><p>two</p></body>")
51
51
 
52
52
  assert_equal "<p>one &amp; two</p>", HtmlCleaner.clean(HtmlCleaner.clean("<p>one & two</p>"))
53
53
 
@@ -87,6 +87,8 @@ class HtmlCleanerTest < Test::Unit::TestCase
87
87
  assert_equal "", HtmlCleaner.clean("<!--[if gte IE 4]><SCRIPT>alert('XSS');</SCRIPT><![endif]-->")
88
88
  assert_equal "<p></p>", HtmlCleaner.clean("<p><!--[if gte IE 4]><SCRIPT>alert('XSS');</SCRIPT><![endif]--></p>")
89
89
  assert_equal "<p>hi</p><p></p>", HtmlCleaner.clean("<p>hi</p><p><!--[if gte IE 4]><SCRIPT>alert('XSS');</SCRIPT><![endif]--></p>")
90
+
91
+ assert_equal "<p>hello</p>", HtmlCleaner.clean("<p>h<!-- hoho -->ell<!-- hoho -->o</p>")
90
92
  end
91
93
 
92
94
  def test_html_flatten
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.2
3
3
  specification_version: 1
4
4
  name: feed-normalizer
5
5
  version: !ruby/object:Gem::Version
6
- version: 1.3.0
7
- date: 2007-05-22 00:00:00 -07:00
6
+ version: 1.3.1
7
+ date: 2007-06-18 00:00:00 -07:00
8
8
  summary: Extensible Ruby wrapper for Atom and RSS parsers
9
9
  require_paths:
10
10
  - lib
@@ -77,7 +77,7 @@ dependencies:
77
77
  requirements:
78
78
  - - ">="
79
79
  - !ruby/object:Gem::Version
80
- version: "0.4"
80
+ version: "0.6"
81
81
  version:
82
82
  - !ruby/object:Gem::Dependency
83
83
  name: hoe