feed-normalizer 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.txt CHANGED
@@ -1,3 +1,6 @@
1
+ 1.3.0
2
+
3
+ * Small changes to work with hpricot 0.5.
1
4
 
2
5
  1.2.0
3
6
 
data/Rakefile CHANGED
@@ -1,6 +1,6 @@
1
1
  require 'hoe'
2
2
 
3
- Hoe.new("feed-normalizer", "1.2.0") do |s|
3
+ Hoe.new("feed-normalizer", "1.3.0") do |s|
4
4
  s.author = "Andrew A. Smith"
5
5
  s.email = "andy@tinnedfruit.org"
6
6
  s.url = "http://feed-normalizer.rubyforge.org/"
data/lib/html-cleaner.rb CHANGED
@@ -59,11 +59,13 @@ module FeedNormalizer
59
59
  def clean(str)
60
60
  str = unescapeHTML(str)
61
61
 
62
- doc = Hpricot(str, :xhtml_strict => true)
62
+ doc = Hpricot(str, :fixup_tags => true)
63
63
  doc = subtree(doc, :body)
64
64
 
65
65
  # get all the tags in the document
66
- tags = (doc/"*").collect {|e| e.name}
66
+ # Somewhere near hpricot 0.4.92 "*" starting to return all elements,
67
+ # including text nodes instead of just tagged elements.
68
+ tags = (doc/"*").inject([]) { |m,e| m << e.name if(e.respond_to?(:name) && e.name =~ /^\w+$/) ; m }.uniq
67
69
 
68
70
  # Remove tags that aren't whitelisted.
69
71
  remove_tags!(doc, tags - HTML_ELEMENTS)
@@ -109,6 +111,7 @@ module FeedNormalizer
109
111
  # This method rejects javascript, vbscript, livescript, mocha and data URLs.
110
112
  # It *could* be refined to only deny dangerous data URLs, however.
111
113
  def dodgy_uri?(uri)
114
+ uri = uri.to_s
112
115
 
113
116
  # special case for poorly-formed entities (missing ';')
114
117
  # if these occur *anywhere* within the string, then throw it out.
@@ -143,7 +146,7 @@ module FeedNormalizer
143
146
  #
144
147
  # This method could be improved by adding a whitelist of html entities.
145
148
  def add_entities(str)
146
- str.gsub(/\"/n, '&quot;').gsub(/>/n, '&gt;').gsub(/</n, '&lt;').gsub(/&(?!(\#\d+|\#x([0-9a-f]+)|\w{2,8});)/nmi, '&amp;')
149
+ str.to_s.gsub(/\"/n, '&quot;').gsub(/>/n, '&gt;').gsub(/</n, '&lt;').gsub(/&(?!(\#\d+|\#x([0-9a-f]+)|\w{2,8});)/nmi, '&amp;')
147
150
  end
148
151
 
149
152
  private
data/lib/structures.rb CHANGED
@@ -29,7 +29,7 @@ module FeedNormalizer
29
29
  def ==(other)
30
30
  other.equal?(self) ||
31
31
  (other.instance_of?(self.class) &&
32
- self.class::ELEMENTS.collect{|el| self.instance_variable_get("@#{el}")==other.instance_variable_get("@#{el}")}.all?)
32
+ self.class::ELEMENTS.all?{ |el| self.send(el) == other.send(el)} )
33
33
  end
34
34
 
35
35
  # Returns the difference between two Feed instances as a hash.
@@ -13,6 +13,7 @@ class HtmlCleanerTest < Test::Unit::TestCase
13
13
  end
14
14
 
15
15
  def test_add_entities
16
+ assert_equal "", HtmlCleaner.add_entities(nil)
16
17
  assert_equal "x &gt; y", HtmlCleaner.add_entities("x > y")
17
18
  assert_equal "1 &amp; 2", HtmlCleaner.add_entities("1 & 2")
18
19
  assert_equal "&amp; &#123; &acute; &#x123;", HtmlCleaner.add_entities("& &#123; &acute; &#x123;")
@@ -140,6 +141,7 @@ class HtmlCleanerTest < Test::Unit::TestCase
140
141
  assert HtmlCleaner.dodgy_uri?("jav\tascript:foo()")
141
142
 
142
143
  # The Good
144
+ assert_nil HtmlCleaner.dodgy_uri?(nil)
143
145
  assert_nil HtmlCleaner.dodgy_uri?("http://example.org")
144
146
  assert_nil HtmlCleaner.dodgy_uri?("http://example.org/foo.html")
145
147
  assert_nil HtmlCleaner.dodgy_uri?("http://example.org/foo.cgi?x=y&a=b")
metadata CHANGED
@@ -1,10 +1,10 @@
1
1
  --- !ruby/object:Gem::Specification
2
- rubygems_version: 0.8.11
2
+ rubygems_version: 0.9.2
3
3
  specification_version: 1
4
4
  name: feed-normalizer
5
5
  version: !ruby/object:Gem::Version
6
- version: 1.2.0
7
- date: 2006-11-29 00:00:00 -08:00
6
+ version: 1.3.0
7
+ date: 2007-05-22 00:00:00 -07:00
8
8
  summary: Extensible Ruby wrapper for Atom and RSS parsers
9
9
  require_paths:
10
10
  - lib
@@ -25,6 +25,7 @@ required_ruby_version: !ruby/object:Gem::Version::Requirement
25
25
  platform: ruby
26
26
  signing_key:
27
27
  cert_chain:
28
+ post_install_message:
28
29
  authors:
29
30
  - Andrew A. Smith
30
31
  files:
@@ -61,29 +62,29 @@ requirements: []
61
62
 
62
63
  dependencies:
63
64
  - !ruby/object:Gem::Dependency
64
- name: hoe
65
+ name: simple-rss
65
66
  version_requirement:
66
67
  version_requirements: !ruby/object:Gem::Version::Requirement
67
68
  requirements:
68
69
  - - ">="
69
70
  - !ruby/object:Gem::Version
70
- version: 1.1.6
71
+ version: "1.1"
71
72
  version:
72
73
  - !ruby/object:Gem::Dependency
73
- name: simple-rss
74
+ name: hpricot
74
75
  version_requirement:
75
76
  version_requirements: !ruby/object:Gem::Version::Requirement
76
77
  requirements:
77
78
  - - ">="
78
79
  - !ruby/object:Gem::Version
79
- version: "1.1"
80
+ version: "0.4"
80
81
  version:
81
82
  - !ruby/object:Gem::Dependency
82
- name: hpricot
83
+ name: hoe
83
84
  version_requirement:
84
85
  version_requirements: !ruby/object:Gem::Version::Requirement
85
86
  requirements:
86
87
  - - ">="
87
88
  - !ruby/object:Gem::Version
88
- version: "0.4"
89
+ version: 1.2.0
89
90
  version: