hpricot_scrub 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.txt +9 -0
- data/README.txt +1 -4
- data/Rakefile +1 -2
- data/lib/hpricot_scrub/version.rb +1 -1
- data/lib/hpricot_scrub.rb +1 -1
- data/test/hpricot_scrub_test.rb +34 -7
- data/test/scrubber_data.rb +3 -1
- data/test/test_helper.rb +6 -0
- metadata +2 -2
data/CHANGELOG.txt
CHANGED
@@ -0,0 +1,9 @@
|
|
1
|
+
2007-03-04 Michael <michael@underpantsgnome.com>
|
2
|
+
Release 0.2.0
|
3
|
+
Add String methods for scrub and scrub!
|
4
|
+
|
5
|
+
Fixed a bug where nested elements were not being scrubbed when using a
|
6
|
+
config hash
|
7
|
+
|
8
|
+
2007-03-03 Michael <michael@underpantsgnome.com>
|
9
|
+
Release 0.1.0, Initial Gem version
|
data/README.txt
CHANGED
data/Rakefile
CHANGED
@@ -15,10 +15,9 @@ AUTHOR = "UnderpantsGnome" # can also be an array of Authors
|
|
15
15
|
EMAIL = "michael@underpantsgnome.com"
|
16
16
|
DESCRIPTION = "Scrub HTML with Hpricot"
|
17
17
|
GEM_NAME = "hpricot_scrub" # what ppl will type to install your gem
|
18
|
-
RUBYFORGE_PROJECT = "
|
18
|
+
RUBYFORGE_PROJECT = "hpricot-scrub" # The unix name for your project
|
19
19
|
HOMEPATH = "http://trac.underpantsgnome.com/hpricot_scrub/"
|
20
20
|
|
21
|
-
|
22
21
|
NAME = "hpricot_scrub"
|
23
22
|
REV = nil # UNCOMMENT IF REQUIRED: File.read(".svn/entries")[/committed-rev="(d+)"/, 1] rescue nil
|
24
23
|
VERS = ENV['VERSION'] || (HpricotScrub::VERSION::STRING + (REV ? ".#{REV}" : ""))
|
data/lib/hpricot_scrub.rb
CHANGED
@@ -1 +1 @@
|
|
1
|
-
Dir[File.join(File.dirname(__FILE__), 'hpricot_scrub/**/*.rb')].sort.each { |lib| require lib }
|
1
|
+
Dir[File.join(File.dirname(__FILE__), 'hpricot_scrub/**/*.rb')].sort.each { |lib| require lib }
|
data/test/hpricot_scrub_test.rb
CHANGED
@@ -4,15 +4,42 @@ require File.dirname(__FILE__) + '/scrubber_data.rb'
|
|
4
4
|
class HpricotScrubTest < Test::Unit::TestCase
|
5
5
|
|
6
6
|
def setup
|
7
|
+
@clean = Hpricot(MARKUP).scrub.inner_html
|
8
|
+
@config = YAML.load_file('examples/config.yml')
|
7
9
|
end
|
8
|
-
|
10
|
+
|
9
11
|
def test_full_scrub
|
10
|
-
# using the divisor search throws warnings in test
|
11
12
|
doc = Hpricot(MARKUP).scrub
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
13
|
+
# using the divisor search throws warnings in test
|
14
|
+
assert_tag_count(doc, 'a', 0)
|
15
|
+
assert_tag_count(doc, 'p', 0)
|
16
|
+
assert_tag_count(doc, 'img', 0)
|
17
|
+
assert_tag_count(doc, 'br', 0)
|
18
|
+
assert_tag_count(doc, 'div', 0)
|
19
|
+
assert_tag_count(doc, 'script', 0)
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_partial_scrub
|
23
|
+
full = Hpricot(MARKUP)
|
24
|
+
doc = Hpricot(MARKUP).scrub(@config)
|
25
|
+
# using the divisor search throws warnings in test
|
26
|
+
assert_tag_count(doc, 'a', 0)
|
27
|
+
assert_tag_count(doc, 'p', full.search('//p').size)
|
28
|
+
assert_tag_count(doc, 'div', full.search('//div').size)
|
29
|
+
assert_tag_count(doc, 'img', full.search('//img').size)
|
30
|
+
assert_tag_count(doc, 'br', full.search('//br').size)
|
31
|
+
assert_tag_count(doc, 'script', 0)
|
32
|
+
end
|
33
|
+
|
34
|
+
def test_string_scrub
|
35
|
+
formatted = MARKUP
|
36
|
+
assert formatted.scrub == @clean
|
37
|
+
assert formatted == MARKUP
|
38
|
+
end
|
39
|
+
|
40
|
+
def test_string_scrub!
|
41
|
+
formatted = MARKUP
|
42
|
+
assert formatted.scrub! == @clean
|
43
|
+
assert formatted == @clean
|
17
44
|
end
|
18
45
|
end
|
data/test/scrubber_data.rb
CHANGED
@@ -4,9 +4,11 @@ MARKUP = <<-EOS
|
|
4
4
|
<p>Still more junk <u>here</u>... </p>
|
5
5
|
<p><img title="nothing to see here" alt="nothing to see here" mce_src="http://example.com/imgtest.png" src="http://example.com/imgtest.png" align="middle" border="1" height="240" hspace="5" vspace="5" width="320"> </p>
|
6
6
|
<p> And a <a title="Just a link" target="_blank" mce_href="http://example.com/nothing.html" href="http://example.com/nothing.html">link</a> just because</p>
|
7
|
+
<p><div>some stuff in here</div><img title="nothing to see here" alt="nothing to see here" mce_src="http://example.com/imgtest.png" src="http://example.com/imgtest.png" align="middle" border="1" height="240" hspace="5" vspace="5" width="320"></p>
|
7
8
|
<a name="junk"></a>
|
8
|
-
<script type="text/javascript">//nasty bits go here
|
9
|
+
<p><script type="text/javascript">//nasty bits go here
|
9
10
|
alert("gotcha");</script><img src="http://content.example.com/content/3587a2f6ee641074fec4e7534c01655326c218ec">how about an <a href="javascript:alert('gotcha')">inline script</a>
|
11
|
+
</p>
|
10
12
|
<span>some random unclosed span
|
11
13
|
<style type="text/css">.foo {color:blue}</style>
|
12
14
|
EOS
|
data/test/test_helper.rb
CHANGED
metadata
CHANGED
@@ -3,14 +3,14 @@ rubygems_version: 0.9.1
|
|
3
3
|
specification_version: 1
|
4
4
|
name: hpricot_scrub
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.
|
6
|
+
version: 0.2.0
|
7
7
|
date: 2007-03-04 00:00:00 -08:00
|
8
8
|
summary: Scrub HTML with Hpricot
|
9
9
|
require_paths:
|
10
10
|
- lib
|
11
11
|
email: michael@underpantsgnome.com
|
12
12
|
homepage: http://trac.underpantsgnome.com/hpricot_scrub/
|
13
|
-
rubyforge_project:
|
13
|
+
rubyforge_project: hpricot-scrub
|
14
14
|
description: Scrub HTML with Hpricot
|
15
15
|
autorequire:
|
16
16
|
default_executable:
|