feed-normalizer 1.3.0 → 1.3.1

Sign up to get free protection for your applications and to get access to all the features.
data/History.txt CHANGED
@@ -1,6 +1,11 @@
1
+ 1.3.1
2
+
3
+ * Small changes to work with hpricot 0.6. This release depends on hpricot 0.6.
4
+ * Reduced the greediness of a regexp that was removing html comments.
5
+
1
6
  1.3.0
2
7
 
3
- * Small changes to work with hpricot 0.5.
8
+ * Small changes to work with hpricot 0.5.
4
9
 
5
10
  1.2.0
6
11
 
data/Rakefile CHANGED
@@ -1,6 +1,6 @@
1
1
  require 'hoe'
2
2
 
3
- Hoe.new("feed-normalizer", "1.3.0") do |s|
3
+ Hoe.new("feed-normalizer", "1.3.1") do |s|
4
4
  s.author = "Andrew A. Smith"
5
5
  s.email = "andy@tinnedfruit.org"
6
6
  s.url = "http://feed-normalizer.rubyforge.org/"
@@ -8,7 +8,7 @@ Hoe.new("feed-normalizer", "1.3.0") do |s|
8
8
  s.description = s.paragraphs_of('Readme.txt', 1..2).join("\n\n")
9
9
  s.changes = s.paragraphs_of('History.txt', 0..1).join("\n\n")
10
10
  s.extra_deps << ["simple-rss", ">= 1.1"]
11
- s.extra_deps << ["hpricot", ">= 0.4"]
11
+ s.extra_deps << ["hpricot", ">= 0.6"]
12
12
  s.need_zip = true
13
13
  s.need_tar = false
14
14
  end
data/lib/html-cleaner.rb CHANGED
@@ -73,18 +73,18 @@ module FeedNormalizer
73
73
 
74
74
  # Remove attributes that aren't on the whitelist, or are suspicious URLs.
75
75
  (doc/remaining_tags.join(",")).each do |element|
76
- element.attributes.reject! do |attr,val|
76
+ element.raw_attributes.reject! do |attr,val|
77
77
  !HTML_ATTRS.include?(attr) || (HTML_URI_ATTRS.include?(attr) && dodgy_uri?(val))
78
78
  end
79
79
 
80
- element.attributes = element.attributes.build_hash {|a,v| [a, add_entities(v)]}
80
+ element.raw_attributes = element.raw_attributes.build_hash {|a,v| [a, add_entities(v)]}
81
81
  end unless remaining_tags.empty?
82
82
 
83
- doc.traverse_text {|t| t.set(add_entities(t.to_s))}
83
+ doc.traverse_text {|t| t.set(add_entities(t.to_html))}
84
84
 
85
85
  # Return the tree, without comments. Ugly way of removing comments,
86
86
  # but can't see a way to do this in Hpricot yet.
87
- doc.to_s.gsub(/<\!--.*-->/mi, '')
87
+ doc.to_s.gsub(/<\!--.*?-->/mi, '')
88
88
  end
89
89
 
90
90
  # For all other feed elements:
@@ -100,7 +100,7 @@ module FeedNormalizer
100
100
  doc = subtree(doc, :body)
101
101
 
102
102
  out = ""
103
- doc.traverse_text {|t| out << add_entities(t.to_s)}
103
+ doc.traverse_text {|t| out << add_entities(t.to_html)}
104
104
 
105
105
  return out
106
106
  end
@@ -47,7 +47,7 @@ class HtmlCleanerTest < Test::Unit::TestCase
47
47
  assert_equal "<p>two</p>", HtmlCleaner.clean("<p>para</p><body><p>two</p>")
48
48
  assert_equal "<p>para</p>&lt;bo /dy&gt;<p>two</p>", HtmlCleaner.clean("<p>para</p><bo /dy><p>two</p></body>")
49
49
  assert_equal "<p>para</p>&lt;bo\\/dy&gt;<p>two</p>", HtmlCleaner.clean("<p>para</p><bo\\/dy><p>two</p></body>")
50
- assert_equal "<p>para</p><p>two</p>", HtmlCleaner.clean("<p>para</p><body/><p>two</p></body>")
50
+ assert_equal "<p>two</p>", HtmlCleaner.clean("<p>para</p><body/><p>two</p></body>")
51
51
 
52
52
  assert_equal "<p>one &amp; two</p>", HtmlCleaner.clean(HtmlCleaner.clean("<p>one & two</p>"))
53
53
 
@@ -87,6 +87,8 @@ class HtmlCleanerTest < Test::Unit::TestCase
87
87
  assert_equal "", HtmlCleaner.clean("<!--[if gte IE 4]><SCRIPT>alert('XSS');</SCRIPT><![endif]-->")
88
88
  assert_equal "<p></p>", HtmlCleaner.clean("<p><!--[if gte IE 4]><SCRIPT>alert('XSS');</SCRIPT><![endif]--></p>")
89
89
  assert_equal "<p>hi</p><p></p>", HtmlCleaner.clean("<p>hi</p><p><!--[if gte IE 4]><SCRIPT>alert('XSS');</SCRIPT><![endif]--></p>")
90
+
91
+ assert_equal "<p>hello</p>", HtmlCleaner.clean("<p>h<!-- hoho -->ell<!-- hoho -->o</p>")
90
92
  end
91
93
 
92
94
  def test_html_flatten
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.2
3
3
  specification_version: 1
4
4
  name: feed-normalizer
5
5
  version: !ruby/object:Gem::Version
6
- version: 1.3.0
7
- date: 2007-05-22 00:00:00 -07:00
6
+ version: 1.3.1
7
+ date: 2007-06-18 00:00:00 -07:00
8
8
  summary: Extensible Ruby wrapper for Atom and RSS parsers
9
9
  require_paths:
10
10
  - lib
@@ -77,7 +77,7 @@ dependencies:
77
77
  requirements:
78
78
  - - ">="
79
79
  - !ruby/object:Gem::Version
80
- version: "0.4"
80
+ version: "0.6"
81
81
  version:
82
82
  - !ruby/object:Gem::Dependency
83
83
  name: hoe