feed-normalizer 1.3.0 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +6 -1
- data/Rakefile +2 -2
- data/lib/html-cleaner.rb +5 -5
- data/test/test_htmlcleaner.rb +3 -1
- metadata +3 -3
data/History.txt
CHANGED
@@ -1,6 +1,11 @@
|
|
1
|
+
1.3.1
|
2
|
+
|
3
|
+
* Small changes to work with hpricot 0.6. This release depends on hpricot 0.6.
|
4
|
+
* Reduced the greediness of a regexp that was removing html comments.
|
5
|
+
|
1
6
|
1.3.0
|
2
7
|
|
3
|
-
* Small changes to work with hpricot 0.5.
|
8
|
+
* Small changes to work with hpricot 0.5.
|
4
9
|
|
5
10
|
1.2.0
|
6
11
|
|
data/Rakefile
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
require 'hoe'
|
2
2
|
|
3
|
-
Hoe.new("feed-normalizer", "1.3.
|
3
|
+
Hoe.new("feed-normalizer", "1.3.1") do |s|
|
4
4
|
s.author = "Andrew A. Smith"
|
5
5
|
s.email = "andy@tinnedfruit.org"
|
6
6
|
s.url = "http://feed-normalizer.rubyforge.org/"
|
@@ -8,7 +8,7 @@ Hoe.new("feed-normalizer", "1.3.0") do |s|
|
|
8
8
|
s.description = s.paragraphs_of('Readme.txt', 1..2).join("\n\n")
|
9
9
|
s.changes = s.paragraphs_of('History.txt', 0..1).join("\n\n")
|
10
10
|
s.extra_deps << ["simple-rss", ">= 1.1"]
|
11
|
-
s.extra_deps << ["hpricot", ">= 0.
|
11
|
+
s.extra_deps << ["hpricot", ">= 0.6"]
|
12
12
|
s.need_zip = true
|
13
13
|
s.need_tar = false
|
14
14
|
end
|
data/lib/html-cleaner.rb
CHANGED
@@ -73,18 +73,18 @@ module FeedNormalizer
|
|
73
73
|
|
74
74
|
# Remove attributes that aren't on the whitelist, or are suspicious URLs.
|
75
75
|
(doc/remaining_tags.join(",")).each do |element|
|
76
|
-
element.
|
76
|
+
element.raw_attributes.reject! do |attr,val|
|
77
77
|
!HTML_ATTRS.include?(attr) || (HTML_URI_ATTRS.include?(attr) && dodgy_uri?(val))
|
78
78
|
end
|
79
79
|
|
80
|
-
element.
|
80
|
+
element.raw_attributes = element.raw_attributes.build_hash {|a,v| [a, add_entities(v)]}
|
81
81
|
end unless remaining_tags.empty?
|
82
82
|
|
83
|
-
doc.traverse_text {|t| t.set(add_entities(t.
|
83
|
+
doc.traverse_text {|t| t.set(add_entities(t.to_html))}
|
84
84
|
|
85
85
|
# Return the tree, without comments. Ugly way of removing comments,
|
86
86
|
# but can't see a way to do this in Hpricot yet.
|
87
|
-
doc.to_s.gsub(
|
87
|
+
doc.to_s.gsub(/<\!--.*?-->/mi, '')
|
88
88
|
end
|
89
89
|
|
90
90
|
# For all other feed elements:
|
@@ -100,7 +100,7 @@ module FeedNormalizer
|
|
100
100
|
doc = subtree(doc, :body)
|
101
101
|
|
102
102
|
out = ""
|
103
|
-
doc.traverse_text {|t| out << add_entities(t.
|
103
|
+
doc.traverse_text {|t| out << add_entities(t.to_html)}
|
104
104
|
|
105
105
|
return out
|
106
106
|
end
|
data/test/test_htmlcleaner.rb
CHANGED
@@ -47,7 +47,7 @@ class HtmlCleanerTest < Test::Unit::TestCase
|
|
47
47
|
assert_equal "<p>two</p>", HtmlCleaner.clean("<p>para</p><body><p>two</p>")
|
48
48
|
assert_equal "<p>para</p><bo /dy><p>two</p>", HtmlCleaner.clean("<p>para</p><bo /dy><p>two</p></body>")
|
49
49
|
assert_equal "<p>para</p><bo\\/dy><p>two</p>", HtmlCleaner.clean("<p>para</p><bo\\/dy><p>two</p></body>")
|
50
|
-
assert_equal "<p>
|
50
|
+
assert_equal "<p>two</p>", HtmlCleaner.clean("<p>para</p><body/><p>two</p></body>")
|
51
51
|
|
52
52
|
assert_equal "<p>one & two</p>", HtmlCleaner.clean(HtmlCleaner.clean("<p>one & two</p>"))
|
53
53
|
|
@@ -87,6 +87,8 @@ class HtmlCleanerTest < Test::Unit::TestCase
|
|
87
87
|
assert_equal "", HtmlCleaner.clean("<!--[if gte IE 4]><SCRIPT>alert('XSS');</SCRIPT><![endif]-->")
|
88
88
|
assert_equal "<p></p>", HtmlCleaner.clean("<p><!--[if gte IE 4]><SCRIPT>alert('XSS');</SCRIPT><![endif]--></p>")
|
89
89
|
assert_equal "<p>hi</p><p></p>", HtmlCleaner.clean("<p>hi</p><p><!--[if gte IE 4]><SCRIPT>alert('XSS');</SCRIPT><![endif]--></p>")
|
90
|
+
|
91
|
+
assert_equal "<p>hello</p>", HtmlCleaner.clean("<p>h<!-- hoho -->ell<!-- hoho -->o</p>")
|
90
92
|
end
|
91
93
|
|
92
94
|
def test_html_flatten
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.2
|
|
3
3
|
specification_version: 1
|
4
4
|
name: feed-normalizer
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 1.3.
|
7
|
-
date: 2007-
|
6
|
+
version: 1.3.1
|
7
|
+
date: 2007-06-18 00:00:00 -07:00
|
8
8
|
summary: Extensible Ruby wrapper for Atom and RSS parsers
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -77,7 +77,7 @@ dependencies:
|
|
77
77
|
requirements:
|
78
78
|
- - ">="
|
79
79
|
- !ruby/object:Gem::Version
|
80
|
-
version: "0.
|
80
|
+
version: "0.6"
|
81
81
|
version:
|
82
82
|
- !ruby/object:Gem::Dependency
|
83
83
|
name: hoe
|