hpricot_scrub 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG.txt CHANGED
@@ -0,0 +1,9 @@
1
+ 2007-03-04 Michael <michael@underpantsgnome.com>
2
+ Release 0.2.0
3
+ Add String methods for scrub and scrub!
4
+
5
+ Fixed a bug where nested elements were not being scrubbed when using a
6
+ config hash
7
+
8
+ 2007-03-03 Michael <michael@underpantsgnome.com>
9
+ Release 0.1.0, Initial Gem version
data/README.txt CHANGED
@@ -6,7 +6,4 @@ of tags and attributes you don't want in the final output.
6
6
 
7
7
  See examples/config.yml for a sample config file or
8
8
 
9
- http://underpantsgnome.com/2007/01/20/hpricot-scrub/
10
-
11
- for more info.
12
-
9
+ http://trac.underpantsgnome.com/hpricot_scrub
data/Rakefile CHANGED
@@ -15,10 +15,9 @@ AUTHOR = "UnderpantsGnome" # can also be an array of Authors
15
15
  EMAIL = "michael@underpantsgnome.com"
16
16
  DESCRIPTION = "Scrub HTML with Hpricot"
17
17
  GEM_NAME = "hpricot_scrub" # what ppl will type to install your gem
18
- RUBYFORGE_PROJECT = "hpricot_scrub" # The unix name for your project
18
+ RUBYFORGE_PROJECT = "hpricot-scrub" # The unix name for your project
19
19
  HOMEPATH = "http://trac.underpantsgnome.com/hpricot_scrub/"
20
20
 
21
-
22
21
  NAME = "hpricot_scrub"
23
22
  REV = nil # UNCOMMENT IF REQUIRED: File.read(".svn/entries")[/committed-rev="(d+)"/, 1] rescue nil
24
23
  VERS = ENV['VERSION'] || (HpricotScrub::VERSION::STRING + (REV ? ".#{REV}" : ""))
@@ -1,7 +1,7 @@
1
1
  module HpricotScrub #:nodoc:
2
2
  module VERSION #:nodoc:
3
3
  MAJOR = 0
4
- MINOR = 1
4
+ MINOR = 2
5
5
  TINY = 0
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
data/lib/hpricot_scrub.rb CHANGED
@@ -1 +1 @@
1
- Dir[File.join(File.dirname(__FILE__), 'hpricot_scrub/**/*.rb')].sort.each { |lib| require lib }
1
+ Dir[File.join(File.dirname(__FILE__), 'hpricot_scrub/**/*.rb')].sort.each { |lib| require lib }
@@ -4,15 +4,42 @@ require File.dirname(__FILE__) + '/scrubber_data.rb'
4
4
  class HpricotScrubTest < Test::Unit::TestCase
5
5
 
6
6
  def setup
7
+ @clean = Hpricot(MARKUP).scrub.inner_html
8
+ @config = YAML.load_file('examples/config.yml')
7
9
  end
8
-
10
+
9
11
  def test_full_scrub
10
- # using the divisor search throws warnings in test
11
12
  doc = Hpricot(MARKUP).scrub
12
- assert doc.search('//a').size == 0
13
- assert doc.search('//p').size == 0
14
- assert doc.search('//img').size == 0
15
- assert doc.search('//br').size == 0
16
- assert doc.search('//script').size == 0
13
+ # using the divisor search throws warnings in test
14
+ assert_tag_count(doc, 'a', 0)
15
+ assert_tag_count(doc, 'p', 0)
16
+ assert_tag_count(doc, 'img', 0)
17
+ assert_tag_count(doc, 'br', 0)
18
+ assert_tag_count(doc, 'div', 0)
19
+ assert_tag_count(doc, 'script', 0)
20
+ end
21
+
22
+ def test_partial_scrub
23
+ full = Hpricot(MARKUP)
24
+ doc = Hpricot(MARKUP).scrub(@config)
25
+ # using the divisor search throws warnings in test
26
+ assert_tag_count(doc, 'a', 0)
27
+ assert_tag_count(doc, 'p', full.search('//p').size)
28
+ assert_tag_count(doc, 'div', full.search('//div').size)
29
+ assert_tag_count(doc, 'img', full.search('//img').size)
30
+ assert_tag_count(doc, 'br', full.search('//br').size)
31
+ assert_tag_count(doc, 'script', 0)
32
+ end
33
+
34
+ def test_string_scrub
35
+ formatted = MARKUP
36
+ assert formatted.scrub == @clean
37
+ assert formatted == MARKUP
38
+ end
39
+
40
+ def test_string_scrub!
41
+ formatted = MARKUP
42
+ assert formatted.scrub! == @clean
43
+ assert formatted == @clean
17
44
  end
18
45
  end
@@ -4,9 +4,11 @@ MARKUP = <<-EOS
4
4
  <p>Still more junk <u>here</u>... </p>
5
5
  <p><img title="nothing to see here" alt="nothing to see here" mce_src="http://example.com/imgtest.png" src="http://example.com/imgtest.png" align="middle" border="1" height="240" hspace="5" vspace="5" width="320">&nbsp;</p>
6
6
  <p>&nbsp;And a <a title="Just a link" target="_blank" mce_href="http://example.com/nothing.html" href="http://example.com/nothing.html">link</a> just because</p>
7
+ <p><div>some stuff in here</div><img title="nothing to see here" alt="nothing to see here" mce_src="http://example.com/imgtest.png" src="http://example.com/imgtest.png" align="middle" border="1" height="240" hspace="5" vspace="5" width="320"></p>
7
8
  <a name="junk"></a>
8
- <script type="text/javascript">//nasty bits go here
9
+ <p><script type="text/javascript">//nasty bits go here
9
10
  alert("gotcha");</script><img src="http://content.example.com/content/3587a2f6ee641074fec4e7534c01655326c218ec">how about an <a href="javascript:alert('gotcha')">inline script</a>
11
+ </p>
10
12
  <span>some random unclosed span
11
13
  <style type="text/css">.foo {color:blue}</style>
12
14
  EOS
data/test/test_helper.rb CHANGED
@@ -1,2 +1,8 @@
1
1
  require 'test/unit'
2
2
  require File.dirname(__FILE__) + '/../lib/hpricot_scrub'
3
+
4
+ def assert_tag_count(doc, tag, expected)
5
+ found = doc.search("//#{tag}").size
6
+ assert found == expected,
7
+ "Expected to find #{expected} '#{tag}' tag(s), found #{found}"
8
+ end
metadata CHANGED
@@ -3,14 +3,14 @@ rubygems_version: 0.9.1
3
3
  specification_version: 1
4
4
  name: hpricot_scrub
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.1.0
6
+ version: 0.2.0
7
7
  date: 2007-03-04 00:00:00 -08:00
8
8
  summary: Scrub HTML with Hpricot
9
9
  require_paths:
10
10
  - lib
11
11
  email: michael@underpantsgnome.com
12
12
  homepage: http://trac.underpantsgnome.com/hpricot_scrub/
13
- rubyforge_project: hpricot_scrub
13
+ rubyforge_project: hpricot-scrub
14
14
  description: Scrub HTML with Hpricot
15
15
  autorequire:
16
16
  default_executable: