hpricot_scrub 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG.txt CHANGED
@@ -0,0 +1,9 @@
1
+ 2007-03-04 Michael <michael@underpantsgnome.com>
2
+ Release 0.2.0
3
+ Add String methods for scrub and scrub!
4
+
5
+ Fixed a bug where nested elements were not being scrubbed when using a
6
+ config hash
7
+
8
+ 2007-03-03 Michael <michael@underpantsgnome.com>
9
+ Release 0.1.0, Initial Gem version
data/README.txt CHANGED
@@ -6,7 +6,4 @@ of tags and attributes you don't want in the final output.
6
6
 
7
7
  See examples/config.yml for a sample config file or
8
8
 
9
- http://underpantsgnome.com/2007/01/20/hpricot-scrub/
10
-
11
- for more info.
12
-
9
+ http://trac.underpantsgnome.com/hpricot_scrub
data/Rakefile CHANGED
@@ -15,10 +15,9 @@ AUTHOR = "UnderpantsGnome" # can also be an array of Authors
15
15
  EMAIL = "michael@underpantsgnome.com"
16
16
  DESCRIPTION = "Scrub HTML with Hpricot"
17
17
  GEM_NAME = "hpricot_scrub" # what ppl will type to install your gem
18
- RUBYFORGE_PROJECT = "hpricot_scrub" # The unix name for your project
18
+ RUBYFORGE_PROJECT = "hpricot-scrub" # The unix name for your project
19
19
  HOMEPATH = "http://trac.underpantsgnome.com/hpricot_scrub/"
20
20
 
21
-
22
21
  NAME = "hpricot_scrub"
23
22
  REV = nil # UNCOMMENT IF REQUIRED: File.read(".svn/entries")[/committed-rev="(d+)"/, 1] rescue nil
24
23
  VERS = ENV['VERSION'] || (HpricotScrub::VERSION::STRING + (REV ? ".#{REV}" : ""))
@@ -1,7 +1,7 @@
1
1
  module HpricotScrub #:nodoc:
2
2
  module VERSION #:nodoc:
3
3
  MAJOR = 0
4
- MINOR = 1
4
+ MINOR = 2
5
5
  TINY = 0
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
data/lib/hpricot_scrub.rb CHANGED
@@ -1 +1 @@
1
- Dir[File.join(File.dirname(__FILE__), 'hpricot_scrub/**/*.rb')].sort.each { |lib| require lib }
1
+ Dir[File.join(File.dirname(__FILE__), 'hpricot_scrub/**/*.rb')].sort.each { |lib| require lib }
@@ -4,15 +4,42 @@ require File.dirname(__FILE__) + '/scrubber_data.rb'
4
4
  class HpricotScrubTest < Test::Unit::TestCase
5
5
 
6
6
  def setup
7
+ @clean = Hpricot(MARKUP).scrub.inner_html
8
+ @config = YAML.load_file('examples/config.yml')
7
9
  end
8
-
10
+
9
11
  def test_full_scrub
10
- # using the divisor search throws warnings in test
11
12
  doc = Hpricot(MARKUP).scrub
12
- assert doc.search('//a').size == 0
13
- assert doc.search('//p').size == 0
14
- assert doc.search('//img').size == 0
15
- assert doc.search('//br').size == 0
16
- assert doc.search('//script').size == 0
13
+ # using the divisor search throws warnings in test
14
+ assert_tag_count(doc, 'a', 0)
15
+ assert_tag_count(doc, 'p', 0)
16
+ assert_tag_count(doc, 'img', 0)
17
+ assert_tag_count(doc, 'br', 0)
18
+ assert_tag_count(doc, 'div', 0)
19
+ assert_tag_count(doc, 'script', 0)
20
+ end
21
+
22
+ def test_partial_scrub
23
+ full = Hpricot(MARKUP)
24
+ doc = Hpricot(MARKUP).scrub(@config)
25
+ # using the divisor search throws warnings in test
26
+ assert_tag_count(doc, 'a', 0)
27
+ assert_tag_count(doc, 'p', full.search('//p').size)
28
+ assert_tag_count(doc, 'div', full.search('//div').size)
29
+ assert_tag_count(doc, 'img', full.search('//img').size)
30
+ assert_tag_count(doc, 'br', full.search('//br').size)
31
+ assert_tag_count(doc, 'script', 0)
32
+ end
33
+
34
+ def test_string_scrub
35
+ formatted = MARKUP
36
+ assert formatted.scrub == @clean
37
+ assert formatted == MARKUP
38
+ end
39
+
40
+ def test_string_scrub!
41
+ formatted = MARKUP
42
+ assert formatted.scrub! == @clean
43
+ assert formatted == @clean
17
44
  end
18
45
  end
@@ -4,9 +4,11 @@ MARKUP = <<-EOS
4
4
  <p>Still more junk <u>here</u>... </p>
5
5
  <p><img title="nothing to see here" alt="nothing to see here" mce_src="http://example.com/imgtest.png" src="http://example.com/imgtest.png" align="middle" border="1" height="240" hspace="5" vspace="5" width="320">&nbsp;</p>
6
6
  <p>&nbsp;And a <a title="Just a link" target="_blank" mce_href="http://example.com/nothing.html" href="http://example.com/nothing.html">link</a> just because</p>
7
+ <p><div>some stuff in here</div><img title="nothing to see here" alt="nothing to see here" mce_src="http://example.com/imgtest.png" src="http://example.com/imgtest.png" align="middle" border="1" height="240" hspace="5" vspace="5" width="320"></p>
7
8
  <a name="junk"></a>
8
- <script type="text/javascript">//nasty bits go here
9
+ <p><script type="text/javascript">//nasty bits go here
9
10
  alert("gotcha");</script><img src="http://content.example.com/content/3587a2f6ee641074fec4e7534c01655326c218ec">how about an <a href="javascript:alert('gotcha')">inline script</a>
11
+ </p>
10
12
  <span>some random unclosed span
11
13
  <style type="text/css">.foo {color:blue}</style>
12
14
  EOS
data/test/test_helper.rb CHANGED
@@ -1,2 +1,8 @@
1
1
  require 'test/unit'
2
2
  require File.dirname(__FILE__) + '/../lib/hpricot_scrub'
3
+
4
+ def assert_tag_count(doc, tag, expected)
5
+ found = doc.search("//#{tag}").size
6
+ assert found == expected,
7
+ "Expected to find #{expected} '#{tag}' tag(s), found #{found}"
8
+ end
metadata CHANGED
@@ -3,14 +3,14 @@ rubygems_version: 0.9.1
3
3
  specification_version: 1
4
4
  name: hpricot_scrub
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.1.0
6
+ version: 0.2.0
7
7
  date: 2007-03-04 00:00:00 -08:00
8
8
  summary: Scrub HTML with Hpricot
9
9
  require_paths:
10
10
  - lib
11
11
  email: michael@underpantsgnome.com
12
12
  homepage: http://trac.underpantsgnome.com/hpricot_scrub/
13
- rubyforge_project: hpricot_scrub
13
+ rubyforge_project: hpricot-scrub
14
14
  description: Scrub HTML with Hpricot
15
15
  autorequire:
16
16
  default_executable: