feed-normalizer 1.2.0 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
data/History.txt CHANGED
@@ -1,3 +1,6 @@
1
+ 1.3.0
2
+
3
+ * Small changes to work with hpricot 0.5.
1
4
 
2
5
  1.2.0
3
6
 
data/Rakefile CHANGED
@@ -1,6 +1,6 @@
1
1
  require 'hoe'
2
2
 
3
- Hoe.new("feed-normalizer", "1.2.0") do |s|
3
+ Hoe.new("feed-normalizer", "1.3.0") do |s|
4
4
  s.author = "Andrew A. Smith"
5
5
  s.email = "andy@tinnedfruit.org"
6
6
  s.url = "http://feed-normalizer.rubyforge.org/"
data/lib/html-cleaner.rb CHANGED
@@ -59,11 +59,13 @@ module FeedNormalizer
59
59
  def clean(str)
60
60
  str = unescapeHTML(str)
61
61
 
62
- doc = Hpricot(str, :xhtml_strict => true)
62
+ doc = Hpricot(str, :fixup_tags => true)
63
63
  doc = subtree(doc, :body)
64
64
 
65
65
  # get all the tags in the document
66
- tags = (doc/"*").collect {|e| e.name}
66
+ # Somewhere near hpricot 0.4.92 "*" starting to return all elements,
67
+ # including text nodes instead of just tagged elements.
68
+ tags = (doc/"*").inject([]) { |m,e| m << e.name if(e.respond_to?(:name) && e.name =~ /^\w+$/) ; m }.uniq
67
69
 
68
70
  # Remove tags that aren't whitelisted.
69
71
  remove_tags!(doc, tags - HTML_ELEMENTS)
@@ -109,6 +111,7 @@ module FeedNormalizer
109
111
  # This method rejects javascript, vbscript, livescript, mocha and data URLs.
110
112
  # It *could* be refined to only deny dangerous data URLs, however.
111
113
  def dodgy_uri?(uri)
114
+ uri = uri.to_s
112
115
 
113
116
  # special case for poorly-formed entities (missing ';')
114
117
  # if these occur *anywhere* within the string, then throw it out.
@@ -143,7 +146,7 @@ module FeedNormalizer
143
146
  #
144
147
  # This method could be improved by adding a whitelist of html entities.
145
148
  def add_entities(str)
146
- str.gsub(/\"/n, '&quot;').gsub(/>/n, '&gt;').gsub(/</n, '&lt;').gsub(/&(?!(\#\d+|\#x([0-9a-f]+)|\w{2,8});)/nmi, '&amp;')
149
+ str.to_s.gsub(/\"/n, '&quot;').gsub(/>/n, '&gt;').gsub(/</n, '&lt;').gsub(/&(?!(\#\d+|\#x([0-9a-f]+)|\w{2,8});)/nmi, '&amp;')
147
150
  end
148
151
 
149
152
  private
data/lib/structures.rb CHANGED
@@ -29,7 +29,7 @@ module FeedNormalizer
29
29
  def ==(other)
30
30
  other.equal?(self) ||
31
31
  (other.instance_of?(self.class) &&
32
- self.class::ELEMENTS.collect{|el| self.instance_variable_get("@#{el}")==other.instance_variable_get("@#{el}")}.all?)
32
+ self.class::ELEMENTS.all?{ |el| self.send(el) == other.send(el)} )
33
33
  end
34
34
 
35
35
  # Returns the difference between two Feed instances as a hash.
@@ -13,6 +13,7 @@ class HtmlCleanerTest < Test::Unit::TestCase
13
13
  end
14
14
 
15
15
  def test_add_entities
16
+ assert_equal "", HtmlCleaner.add_entities(nil)
16
17
  assert_equal "x &gt; y", HtmlCleaner.add_entities("x > y")
17
18
  assert_equal "1 &amp; 2", HtmlCleaner.add_entities("1 & 2")
18
19
  assert_equal "&amp; &#123; &acute; &#x123;", HtmlCleaner.add_entities("& &#123; &acute; &#x123;")
@@ -140,6 +141,7 @@ class HtmlCleanerTest < Test::Unit::TestCase
140
141
  assert HtmlCleaner.dodgy_uri?("jav\tascript:foo()")
141
142
 
142
143
  # The Good
144
+ assert_nil HtmlCleaner.dodgy_uri?(nil)
143
145
  assert_nil HtmlCleaner.dodgy_uri?("http://example.org")
144
146
  assert_nil HtmlCleaner.dodgy_uri?("http://example.org/foo.html")
145
147
  assert_nil HtmlCleaner.dodgy_uri?("http://example.org/foo.cgi?x=y&a=b")
metadata CHANGED
@@ -1,10 +1,10 @@
1
1
  --- !ruby/object:Gem::Specification
2
- rubygems_version: 0.8.11
2
+ rubygems_version: 0.9.2
3
3
  specification_version: 1
4
4
  name: feed-normalizer
5
5
  version: !ruby/object:Gem::Version
6
- version: 1.2.0
7
- date: 2006-11-29 00:00:00 -08:00
6
+ version: 1.3.0
7
+ date: 2007-05-22 00:00:00 -07:00
8
8
  summary: Extensible Ruby wrapper for Atom and RSS parsers
9
9
  require_paths:
10
10
  - lib
@@ -25,6 +25,7 @@ required_ruby_version: !ruby/object:Gem::Version::Requirement
25
25
  platform: ruby
26
26
  signing_key:
27
27
  cert_chain:
28
+ post_install_message:
28
29
  authors:
29
30
  - Andrew A. Smith
30
31
  files:
@@ -61,29 +62,29 @@ requirements: []
61
62
 
62
63
  dependencies:
63
64
  - !ruby/object:Gem::Dependency
64
- name: hoe
65
+ name: simple-rss
65
66
  version_requirement:
66
67
  version_requirements: !ruby/object:Gem::Version::Requirement
67
68
  requirements:
68
69
  - - ">="
69
70
  - !ruby/object:Gem::Version
70
- version: 1.1.6
71
+ version: "1.1"
71
72
  version:
72
73
  - !ruby/object:Gem::Dependency
73
- name: simple-rss
74
+ name: hpricot
74
75
  version_requirement:
75
76
  version_requirements: !ruby/object:Gem::Version::Requirement
76
77
  requirements:
77
78
  - - ">="
78
79
  - !ruby/object:Gem::Version
79
- version: "1.1"
80
+ version: "0.4"
80
81
  version:
81
82
  - !ruby/object:Gem::Dependency
82
- name: hpricot
83
+ name: hoe
83
84
  version_requirement:
84
85
  version_requirements: !ruby/object:Gem::Version::Requirement
85
86
  requirements:
86
87
  - - ">="
87
88
  - !ruby/object:Gem::Version
88
- version: "0.4"
89
+ version: 1.2.0
89
90
  version: