feed-normalizer 1.2.0 → 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +3 -0
- data/Rakefile +1 -1
- data/lib/html-cleaner.rb +6 -3
- data/lib/structures.rb +1 -1
- data/test/test_htmlcleaner.rb +2 -0
- metadata +10 -9
data/History.txt
CHANGED
data/Rakefile
CHANGED
data/lib/html-cleaner.rb
CHANGED
@@ -59,11 +59,13 @@ module FeedNormalizer
|
|
59
59
|
def clean(str)
|
60
60
|
str = unescapeHTML(str)
|
61
61
|
|
62
|
-
doc = Hpricot(str, :
|
62
|
+
doc = Hpricot(str, :fixup_tags => true)
|
63
63
|
doc = subtree(doc, :body)
|
64
64
|
|
65
65
|
# get all the tags in the document
|
66
|
-
|
66
|
+
# Somewhere near hpricot 0.4.92 "*" starting to return all elements,
|
67
|
+
# including text nodes instead of just tagged elements.
|
68
|
+
tags = (doc/"*").inject([]) { |m,e| m << e.name if(e.respond_to?(:name) && e.name =~ /^\w+$/) ; m }.uniq
|
67
69
|
|
68
70
|
# Remove tags that aren't whitelisted.
|
69
71
|
remove_tags!(doc, tags - HTML_ELEMENTS)
|
@@ -109,6 +111,7 @@ module FeedNormalizer
|
|
109
111
|
# This method rejects javascript, vbscript, livescript, mocha and data URLs.
|
110
112
|
# It *could* be refined to only deny dangerous data URLs, however.
|
111
113
|
def dodgy_uri?(uri)
|
114
|
+
uri = uri.to_s
|
112
115
|
|
113
116
|
# special case for poorly-formed entities (missing ';')
|
114
117
|
# if these occur *anywhere* within the string, then throw it out.
|
@@ -143,7 +146,7 @@ module FeedNormalizer
|
|
143
146
|
#
|
144
147
|
# This method could be improved by adding a whitelist of html entities.
|
145
148
|
def add_entities(str)
|
146
|
-
str.gsub(/\"/n, '"').gsub(/>/n, '>').gsub(/</n, '<').gsub(/&(?!(\#\d+|\#x([0-9a-f]+)|\w{2,8});)/nmi, '&')
|
149
|
+
str.to_s.gsub(/\"/n, '"').gsub(/>/n, '>').gsub(/</n, '<').gsub(/&(?!(\#\d+|\#x([0-9a-f]+)|\w{2,8});)/nmi, '&')
|
147
150
|
end
|
148
151
|
|
149
152
|
private
|
data/lib/structures.rb
CHANGED
@@ -29,7 +29,7 @@ module FeedNormalizer
|
|
29
29
|
def ==(other)
|
30
30
|
other.equal?(self) ||
|
31
31
|
(other.instance_of?(self.class) &&
|
32
|
-
self.class::ELEMENTS.
|
32
|
+
self.class::ELEMENTS.all?{ |el| self.send(el) == other.send(el)} )
|
33
33
|
end
|
34
34
|
|
35
35
|
# Returns the difference between two Feed instances as a hash.
|
data/test/test_htmlcleaner.rb
CHANGED
@@ -13,6 +13,7 @@ class HtmlCleanerTest < Test::Unit::TestCase
|
|
13
13
|
end
|
14
14
|
|
15
15
|
def test_add_entities
|
16
|
+
assert_equal "", HtmlCleaner.add_entities(nil)
|
16
17
|
assert_equal "x > y", HtmlCleaner.add_entities("x > y")
|
17
18
|
assert_equal "1 & 2", HtmlCleaner.add_entities("1 & 2")
|
18
19
|
assert_equal "& { ´ ģ", HtmlCleaner.add_entities("& { ´ ģ")
|
@@ -140,6 +141,7 @@ class HtmlCleanerTest < Test::Unit::TestCase
|
|
140
141
|
assert HtmlCleaner.dodgy_uri?("jav\tascript:foo()")
|
141
142
|
|
142
143
|
# The Good
|
144
|
+
assert_nil HtmlCleaner.dodgy_uri?(nil)
|
143
145
|
assert_nil HtmlCleaner.dodgy_uri?("http://example.org")
|
144
146
|
assert_nil HtmlCleaner.dodgy_uri?("http://example.org/foo.html")
|
145
147
|
assert_nil HtmlCleaner.dodgy_uri?("http://example.org/foo.cgi?x=y&a=b")
|
metadata
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
|
-
rubygems_version: 0.
|
2
|
+
rubygems_version: 0.9.2
|
3
3
|
specification_version: 1
|
4
4
|
name: feed-normalizer
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 1.
|
7
|
-
date:
|
6
|
+
version: 1.3.0
|
7
|
+
date: 2007-05-22 00:00:00 -07:00
|
8
8
|
summary: Extensible Ruby wrapper for Atom and RSS parsers
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -25,6 +25,7 @@ required_ruby_version: !ruby/object:Gem::Version::Requirement
|
|
25
25
|
platform: ruby
|
26
26
|
signing_key:
|
27
27
|
cert_chain:
|
28
|
+
post_install_message:
|
28
29
|
authors:
|
29
30
|
- Andrew A. Smith
|
30
31
|
files:
|
@@ -61,29 +62,29 @@ requirements: []
|
|
61
62
|
|
62
63
|
dependencies:
|
63
64
|
- !ruby/object:Gem::Dependency
|
64
|
-
name:
|
65
|
+
name: simple-rss
|
65
66
|
version_requirement:
|
66
67
|
version_requirements: !ruby/object:Gem::Version::Requirement
|
67
68
|
requirements:
|
68
69
|
- - ">="
|
69
70
|
- !ruby/object:Gem::Version
|
70
|
-
version: 1.1
|
71
|
+
version: "1.1"
|
71
72
|
version:
|
72
73
|
- !ruby/object:Gem::Dependency
|
73
|
-
name:
|
74
|
+
name: hpricot
|
74
75
|
version_requirement:
|
75
76
|
version_requirements: !ruby/object:Gem::Version::Requirement
|
76
77
|
requirements:
|
77
78
|
- - ">="
|
78
79
|
- !ruby/object:Gem::Version
|
79
|
-
version: "
|
80
|
+
version: "0.4"
|
80
81
|
version:
|
81
82
|
- !ruby/object:Gem::Dependency
|
82
|
-
name:
|
83
|
+
name: hoe
|
83
84
|
version_requirement:
|
84
85
|
version_requirements: !ruby/object:Gem::Version::Requirement
|
85
86
|
requirements:
|
86
87
|
- - ">="
|
87
88
|
- !ruby/object:Gem::Version
|
88
|
-
version:
|
89
|
+
version: 1.2.0
|
89
90
|
version:
|