hpricot_scrub 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.txt +9 -0
- data/README.txt +1 -4
- data/Rakefile +1 -2
- data/lib/hpricot_scrub/version.rb +1 -1
- data/lib/hpricot_scrub.rb +1 -1
- data/test/hpricot_scrub_test.rb +34 -7
- data/test/scrubber_data.rb +3 -1
- data/test/test_helper.rb +6 -0
- metadata +2 -2
data/CHANGELOG.txt
CHANGED
@@ -0,0 +1,9 @@
|
|
1
|
+
2007-03-04 Michael <michael@underpantsgnome.com>
|
2
|
+
Release 0.2.0
|
3
|
+
Add String methods for scrub and scrub!
|
4
|
+
|
5
|
+
Fixed a bug where nested elements were not being scrubbed when using a
|
6
|
+
config hash
|
7
|
+
|
8
|
+
2007-03-03 Michael <michael@underpantsgnome.com>
|
9
|
+
Release 0.1.0, Initial Gem version
|
data/README.txt
CHANGED
data/Rakefile
CHANGED
@@ -15,10 +15,9 @@ AUTHOR = "UnderpantsGnome" # can also be an array of Authors
|
|
15
15
|
EMAIL = "michael@underpantsgnome.com"
|
16
16
|
DESCRIPTION = "Scrub HTML with Hpricot"
|
17
17
|
GEM_NAME = "hpricot_scrub" # what ppl will type to install your gem
|
18
|
-
RUBYFORGE_PROJECT = "
|
18
|
+
RUBYFORGE_PROJECT = "hpricot-scrub" # The unix name for your project
|
19
19
|
HOMEPATH = "http://trac.underpantsgnome.com/hpricot_scrub/"
|
20
20
|
|
21
|
-
|
22
21
|
NAME = "hpricot_scrub"
|
23
22
|
REV = nil # UNCOMMENT IF REQUIRED: File.read(".svn/entries")[/committed-rev="(d+)"/, 1] rescue nil
|
24
23
|
VERS = ENV['VERSION'] || (HpricotScrub::VERSION::STRING + (REV ? ".#{REV}" : ""))
|
data/lib/hpricot_scrub.rb
CHANGED
@@ -1 +1 @@
|
|
1
|
-
Dir[File.join(File.dirname(__FILE__), 'hpricot_scrub/**/*.rb')].sort.each { |lib| require lib }
|
1
|
+
Dir[File.join(File.dirname(__FILE__), 'hpricot_scrub/**/*.rb')].sort.each { |lib| require lib }
|
data/test/hpricot_scrub_test.rb
CHANGED
@@ -4,15 +4,42 @@ require File.dirname(__FILE__) + '/scrubber_data.rb'
|
|
4
4
|
class HpricotScrubTest < Test::Unit::TestCase
|
5
5
|
|
6
6
|
def setup
|
7
|
+
@clean = Hpricot(MARKUP).scrub.inner_html
|
8
|
+
@config = YAML.load_file('examples/config.yml')
|
7
9
|
end
|
8
|
-
|
10
|
+
|
9
11
|
def test_full_scrub
|
10
|
-
# using the divisor search throws warnings in test
|
11
12
|
doc = Hpricot(MARKUP).scrub
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
13
|
+
# using the divisor search throws warnings in test
|
14
|
+
assert_tag_count(doc, 'a', 0)
|
15
|
+
assert_tag_count(doc, 'p', 0)
|
16
|
+
assert_tag_count(doc, 'img', 0)
|
17
|
+
assert_tag_count(doc, 'br', 0)
|
18
|
+
assert_tag_count(doc, 'div', 0)
|
19
|
+
assert_tag_count(doc, 'script', 0)
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_partial_scrub
|
23
|
+
full = Hpricot(MARKUP)
|
24
|
+
doc = Hpricot(MARKUP).scrub(@config)
|
25
|
+
# using the divisor search throws warnings in test
|
26
|
+
assert_tag_count(doc, 'a', 0)
|
27
|
+
assert_tag_count(doc, 'p', full.search('//p').size)
|
28
|
+
assert_tag_count(doc, 'div', full.search('//div').size)
|
29
|
+
assert_tag_count(doc, 'img', full.search('//img').size)
|
30
|
+
assert_tag_count(doc, 'br', full.search('//br').size)
|
31
|
+
assert_tag_count(doc, 'script', 0)
|
32
|
+
end
|
33
|
+
|
34
|
+
def test_string_scrub
|
35
|
+
formatted = MARKUP
|
36
|
+
assert formatted.scrub == @clean
|
37
|
+
assert formatted == MARKUP
|
38
|
+
end
|
39
|
+
|
40
|
+
def test_string_scrub!
|
41
|
+
formatted = MARKUP
|
42
|
+
assert formatted.scrub! == @clean
|
43
|
+
assert formatted == @clean
|
17
44
|
end
|
18
45
|
end
|
data/test/scrubber_data.rb
CHANGED
@@ -4,9 +4,11 @@ MARKUP = <<-EOS
|
|
4
4
|
<p>Still more junk <u>here</u>... </p>
|
5
5
|
<p><img title="nothing to see here" alt="nothing to see here" mce_src="http://example.com/imgtest.png" src="http://example.com/imgtest.png" align="middle" border="1" height="240" hspace="5" vspace="5" width="320"> </p>
|
6
6
|
<p> And a <a title="Just a link" target="_blank" mce_href="http://example.com/nothing.html" href="http://example.com/nothing.html">link</a> just because</p>
|
7
|
+
<p><div>some stuff in here</div><img title="nothing to see here" alt="nothing to see here" mce_src="http://example.com/imgtest.png" src="http://example.com/imgtest.png" align="middle" border="1" height="240" hspace="5" vspace="5" width="320"></p>
|
7
8
|
<a name="junk"></a>
|
8
|
-
<script type="text/javascript">//nasty bits go here
|
9
|
+
<p><script type="text/javascript">//nasty bits go here
|
9
10
|
alert("gotcha");</script><img src="http://content.example.com/content/3587a2f6ee641074fec4e7534c01655326c218ec">how about an <a href="javascript:alert('gotcha')">inline script</a>
|
11
|
+
</p>
|
10
12
|
<span>some random unclosed span
|
11
13
|
<style type="text/css">.foo {color:blue}</style>
|
12
14
|
EOS
|
data/test/test_helper.rb
CHANGED
metadata
CHANGED
@@ -3,14 +3,14 @@ rubygems_version: 0.9.1
|
|
3
3
|
specification_version: 1
|
4
4
|
name: hpricot_scrub
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.
|
6
|
+
version: 0.2.0
|
7
7
|
date: 2007-03-04 00:00:00 -08:00
|
8
8
|
summary: Scrub HTML with Hpricot
|
9
9
|
require_paths:
|
10
10
|
- lib
|
11
11
|
email: michael@underpantsgnome.com
|
12
12
|
homepage: http://trac.underpantsgnome.com/hpricot_scrub/
|
13
|
-
rubyforge_project:
|
13
|
+
rubyforge_project: hpricot-scrub
|
14
14
|
description: Scrub HTML with Hpricot
|
15
15
|
autorequire:
|
16
16
|
default_executable:
|