feed-normalizer 1.3.0 → 1.3.1
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +6 -1
- data/Rakefile +2 -2
- data/lib/html-cleaner.rb +5 -5
- data/test/test_htmlcleaner.rb +3 -1
- metadata +3 -3
data/History.txt
CHANGED
@@ -1,6 +1,11 @@
|
|
1
|
+
1.3.1
|
2
|
+
|
3
|
+
* Small changes to work with hpricot 0.6. This release depends on hpricot 0.6.
|
4
|
+
* Reduced the greediness of a regexp that was removing html comments.
|
5
|
+
|
1
6
|
1.3.0
|
2
7
|
|
3
|
-
* Small changes to work with hpricot 0.5.
|
8
|
+
* Small changes to work with hpricot 0.5.
|
4
9
|
|
5
10
|
1.2.0
|
6
11
|
|
data/Rakefile
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
require 'hoe'
|
2
2
|
|
3
|
-
Hoe.new("feed-normalizer", "1.3.
|
3
|
+
Hoe.new("feed-normalizer", "1.3.1") do |s|
|
4
4
|
s.author = "Andrew A. Smith"
|
5
5
|
s.email = "andy@tinnedfruit.org"
|
6
6
|
s.url = "http://feed-normalizer.rubyforge.org/"
|
@@ -8,7 +8,7 @@ Hoe.new("feed-normalizer", "1.3.0") do |s|
|
|
8
8
|
s.description = s.paragraphs_of('Readme.txt', 1..2).join("\n\n")
|
9
9
|
s.changes = s.paragraphs_of('History.txt', 0..1).join("\n\n")
|
10
10
|
s.extra_deps << ["simple-rss", ">= 1.1"]
|
11
|
-
s.extra_deps << ["hpricot", ">= 0.
|
11
|
+
s.extra_deps << ["hpricot", ">= 0.6"]
|
12
12
|
s.need_zip = true
|
13
13
|
s.need_tar = false
|
14
14
|
end
|
data/lib/html-cleaner.rb
CHANGED
@@ -73,18 +73,18 @@ module FeedNormalizer
|
|
73
73
|
|
74
74
|
# Remove attributes that aren't on the whitelist, or are suspicious URLs.
|
75
75
|
(doc/remaining_tags.join(",")).each do |element|
|
76
|
-
element.
|
76
|
+
element.raw_attributes.reject! do |attr,val|
|
77
77
|
!HTML_ATTRS.include?(attr) || (HTML_URI_ATTRS.include?(attr) && dodgy_uri?(val))
|
78
78
|
end
|
79
79
|
|
80
|
-
element.
|
80
|
+
element.raw_attributes = element.raw_attributes.build_hash {|a,v| [a, add_entities(v)]}
|
81
81
|
end unless remaining_tags.empty?
|
82
82
|
|
83
|
-
doc.traverse_text {|t| t.set(add_entities(t.
|
83
|
+
doc.traverse_text {|t| t.set(add_entities(t.to_html))}
|
84
84
|
|
85
85
|
# Return the tree, without comments. Ugly way of removing comments,
|
86
86
|
# but can't see a way to do this in Hpricot yet.
|
87
|
-
doc.to_s.gsub(
|
87
|
+
doc.to_s.gsub(/<\!--.*?-->/mi, '')
|
88
88
|
end
|
89
89
|
|
90
90
|
# For all other feed elements:
|
@@ -100,7 +100,7 @@ module FeedNormalizer
|
|
100
100
|
doc = subtree(doc, :body)
|
101
101
|
|
102
102
|
out = ""
|
103
|
-
doc.traverse_text {|t| out << add_entities(t.
|
103
|
+
doc.traverse_text {|t| out << add_entities(t.to_html)}
|
104
104
|
|
105
105
|
return out
|
106
106
|
end
|
data/test/test_htmlcleaner.rb
CHANGED
@@ -47,7 +47,7 @@ class HtmlCleanerTest < Test::Unit::TestCase
|
|
47
47
|
assert_equal "<p>two</p>", HtmlCleaner.clean("<p>para</p><body><p>two</p>")
|
48
48
|
assert_equal "<p>para</p><bo /dy><p>two</p>", HtmlCleaner.clean("<p>para</p><bo /dy><p>two</p></body>")
|
49
49
|
assert_equal "<p>para</p><bo\\/dy><p>two</p>", HtmlCleaner.clean("<p>para</p><bo\\/dy><p>two</p></body>")
|
50
|
-
assert_equal "<p>
|
50
|
+
assert_equal "<p>two</p>", HtmlCleaner.clean("<p>para</p><body/><p>two</p></body>")
|
51
51
|
|
52
52
|
assert_equal "<p>one & two</p>", HtmlCleaner.clean(HtmlCleaner.clean("<p>one & two</p>"))
|
53
53
|
|
@@ -87,6 +87,8 @@ class HtmlCleanerTest < Test::Unit::TestCase
|
|
87
87
|
assert_equal "", HtmlCleaner.clean("<!--[if gte IE 4]><SCRIPT>alert('XSS');</SCRIPT><![endif]-->")
|
88
88
|
assert_equal "<p></p>", HtmlCleaner.clean("<p><!--[if gte IE 4]><SCRIPT>alert('XSS');</SCRIPT><![endif]--></p>")
|
89
89
|
assert_equal "<p>hi</p><p></p>", HtmlCleaner.clean("<p>hi</p><p><!--[if gte IE 4]><SCRIPT>alert('XSS');</SCRIPT><![endif]--></p>")
|
90
|
+
|
91
|
+
assert_equal "<p>hello</p>", HtmlCleaner.clean("<p>h<!-- hoho -->ell<!-- hoho -->o</p>")
|
90
92
|
end
|
91
93
|
|
92
94
|
def test_html_flatten
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.2
|
|
3
3
|
specification_version: 1
|
4
4
|
name: feed-normalizer
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 1.3.
|
7
|
-
date: 2007-
|
6
|
+
version: 1.3.1
|
7
|
+
date: 2007-06-18 00:00:00 -07:00
|
8
8
|
summary: Extensible Ruby wrapper for Atom and RSS parsers
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -77,7 +77,7 @@ dependencies:
|
|
77
77
|
requirements:
|
78
78
|
- - ">="
|
79
79
|
- !ruby/object:Gem::Version
|
80
|
-
version: "0.
|
80
|
+
version: "0.6"
|
81
81
|
version:
|
82
82
|
- !ruby/object:Gem::Dependency
|
83
83
|
name: hoe
|