hpricot_scrub 0.2.2 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.txt +10 -4
 - data/examples/config.yml +47 -45
 - data/lib/hpricot_scrub/hpricot_scrub.rb +30 -6
 - data/lib/hpricot_scrub/version.rb +1 -1
 - data/test/hpricot_scrub_test.rb +45 -6
 - data/test/scrubber_data.rb +20 -0
 - metadata +3 -3
 
    
        data/CHANGELOG.txt
    CHANGED
    
    | 
         @@ -1,3 +1,9 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            2007-04-05 Michael <michael@underpantsgnome.com>
         
     | 
| 
      
 2 
     | 
    
         
            +
            	Release 0.2.3
         
     | 
| 
      
 3 
     | 
    
         
            +
            	Add patches from Eric Wong
         
     | 
| 
      
 4 
     | 
    
         
            +
            		- Recursive scrubbing wasn't scrubbing if parent was in allow
         
     | 
| 
      
 5 
     | 
    
         
            +
            		- Add optional use of HTMLEntities
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
       1 
7 
     | 
    
         
             
            2007-03-04 Michael <michael@underpantsgnome.com>
         
     | 
| 
       2 
8 
     | 
    
         
             
            	Release 0.2.2
         
     | 
| 
       3 
9 
     | 
    
         
             
            	Add patches from Eric Wong 
         
     | 
| 
         @@ -6,10 +12,10 @@ 
     | 
|
| 
       6 
12 
     | 
    
         | 
| 
       7 
13 
     | 
    
         
             
            2007-03-04 Michael <michael@underpantsgnome.com>
         
     | 
| 
       8 
14 
     | 
    
         
             
            	Release 0.2.0
         
     | 
| 
       9 
     | 
    
         
            -
             
     | 
| 
      
 15 
     | 
    
         
            +
            		- Add String methods for scrub and scrub!
         
     | 
| 
       10 
16 
     | 
    
         | 
| 
       11 
     | 
    
         
            -
             
     | 
| 
       12 
     | 
    
         
            -
             
     | 
| 
      
 17 
     | 
    
         
            +
            		- Fixed a bug where nested elements were not being scrubbed when using a 
         
     | 
| 
      
 18 
     | 
    
         
            +
            			config hash
         
     | 
| 
       13 
19 
     | 
    
         | 
| 
       14 
20 
     | 
    
         
             
            2007-03-03 Michael <michael@underpantsgnome.com>
         
     | 
| 
       15 
     | 
    
         
            -
             
     | 
| 
      
 21 
     | 
    
         
            +
            		- Release 0.1.0, Initial Gem version
         
     | 
    
        data/examples/config.yml
    CHANGED
    
    | 
         @@ -1,47 +1,49 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
             
     | 
| 
       2 
1 
     | 
    
         
             
            ---
         
     | 
| 
       3 
     | 
    
         
            -
             
     | 
| 
       4 
     | 
    
         
            -
             
     | 
| 
       5 
     | 
    
         
            -
             
     | 
| 
       6 
     | 
    
         
            -
             
     | 
| 
       7 
     | 
    
         
            -
             
     | 
| 
       8 
     | 
    
         
            -
             
     | 
| 
       9 
     | 
    
         
            -
             
     | 
| 
       10 
     | 
    
         
            -
             
     | 
| 
       11 
     | 
    
         
            -
             
     | 
| 
       12 
     | 
    
         
            -
             
     | 
| 
       13 
     | 
    
         
            -
             
     | 
| 
       14 
     | 
    
         
            -
             
     | 
| 
       15 
     | 
    
         
            -
             
     | 
| 
       16 
     | 
    
         
            -
             
     | 
| 
       17 
     | 
    
         
            -
             
     | 
| 
       18 
     | 
    
         
            -
             
     | 
| 
       19 
     | 
    
         
            -
             
     | 
| 
       20 
     | 
    
         
            -
             
     | 
| 
       21 
     | 
    
         
            -
             
     | 
| 
       22 
     | 
    
         
            -
             
     | 
| 
       23 
     | 
    
         
            -
             
     | 
| 
       24 
     | 
    
         
            -
             
     | 
| 
       25 
     | 
    
         
            -
             
     | 
| 
       26 
     | 
    
         
            -
             
     | 
| 
       27 
     | 
    
         
            -
             
     | 
| 
       28 
     | 
    
         
            -
             
     | 
| 
       29 
     | 
    
         
            -
             
     | 
| 
       30 
     | 
    
         
            -
             
     | 
| 
       31 
     | 
    
         
            -
             
     | 
| 
       32 
     | 
    
         
            -
             
     | 
| 
       33 
     | 
    
         
            -
             
     | 
| 
       34 
     | 
    
         
            -
             
     | 
| 
       35 
     | 
    
         
            -
             
     | 
| 
       36 
     | 
    
         
            -
             
     | 
| 
      
 2 
     | 
    
         
            +
              :allow_tags: # let these tags stay, but will strip attributes
         
     | 
| 
      
 3 
     | 
    
         
            +
                - 'html'
         
     | 
| 
      
 4 
     | 
    
         
            +
                - 'head'
         
     | 
| 
      
 5 
     | 
    
         
            +
                - 'body'
         
     | 
| 
      
 6 
     | 
    
         
            +
                - 'b'
         
     | 
| 
      
 7 
     | 
    
         
            +
                - 'blockquote'
         
     | 
| 
      
 8 
     | 
    
         
            +
                - 'br'
         
     | 
| 
      
 9 
     | 
    
         
            +
                - 'div'
         
     | 
| 
      
 10 
     | 
    
         
            +
                - 'h1'
         
     | 
| 
      
 11 
     | 
    
         
            +
                - 'h2'
         
     | 
| 
      
 12 
     | 
    
         
            +
                - 'h3'
         
     | 
| 
      
 13 
     | 
    
         
            +
                - 'h4'
         
     | 
| 
      
 14 
     | 
    
         
            +
                - 'h5'
         
     | 
| 
      
 15 
     | 
    
         
            +
                - 'h6'
         
     | 
| 
      
 16 
     | 
    
         
            +
                - 'hr'
         
     | 
| 
      
 17 
     | 
    
         
            +
                - 'i'
         
     | 
| 
      
 18 
     | 
    
         
            +
                - 'em'
         
     | 
| 
      
 19 
     | 
    
         
            +
                - 'img'
         
     | 
| 
      
 20 
     | 
    
         
            +
                - 'li'
         
     | 
| 
      
 21 
     | 
    
         
            +
                - 'ol'
         
     | 
| 
      
 22 
     | 
    
         
            +
                - 'p'
         
     | 
| 
      
 23 
     | 
    
         
            +
                - 'pre'
         
     | 
| 
      
 24 
     | 
    
         
            +
                - 'small'
         
     | 
| 
      
 25 
     | 
    
         
            +
                - 'span'
         
     | 
| 
      
 26 
     | 
    
         
            +
                - 'span'
         
     | 
| 
      
 27 
     | 
    
         
            +
                - 'strike'
         
     | 
| 
      
 28 
     | 
    
         
            +
                - 'strong'
         
     | 
| 
      
 29 
     | 
    
         
            +
                - 'sub'
         
     | 
| 
      
 30 
     | 
    
         
            +
                - 'sup'
         
     | 
| 
      
 31 
     | 
    
         
            +
                - 'table'
         
     | 
| 
      
 32 
     | 
    
         
            +
                - 'tbody'
         
     | 
| 
      
 33 
     | 
    
         
            +
                - 'td'
         
     | 
| 
      
 34 
     | 
    
         
            +
                - 'tfoot'
         
     | 
| 
      
 35 
     | 
    
         
            +
                - 'thead'
         
     | 
| 
      
 36 
     | 
    
         
            +
                - 'tr'
         
     | 
| 
      
 37 
     | 
    
         
            +
                - 'u'
         
     | 
| 
      
 38 
     | 
    
         
            +
                - 'ul'
         
     | 
| 
       37 
39 
     | 
    
         | 
| 
       38 
     | 
    
         
            -
             
     | 
| 
       39 
     | 
    
         
            -
             
     | 
| 
       40 
     | 
    
         
            -
             
     | 
| 
       41 
     | 
    
         
            -
             
     | 
| 
       42 
     | 
    
         
            -
             
     | 
| 
       43 
     | 
    
         
            -
             
     | 
| 
       44 
     | 
    
         
            -
             
     | 
| 
       45 
     | 
    
         
            -
             
     | 
| 
       46 
     | 
    
         
            -
             
     | 
| 
       47 
     | 
    
         
            -
             
     | 
| 
      
 40 
     | 
    
         
            +
              :remove_tags: # completely removes everything between open and close tag
         
     | 
| 
      
 41 
     | 
    
         
            +
                - 'form'
         
     | 
| 
      
 42 
     | 
    
         
            +
                - 'script'
         
     | 
| 
      
 43 
     | 
    
         
            +
                
         
     | 
| 
      
 44 
     | 
    
         
            +
              :allow_attributes: # let these attributes stay, strip all others
         
     | 
| 
      
 45 
     | 
    
         
            +
                - 'src'
         
     | 
| 
      
 46 
     | 
    
         
            +
                - 'font'
         
     | 
| 
      
 47 
     | 
    
         
            +
                - 'alt'
         
     | 
| 
      
 48 
     | 
    
         
            +
                - 'style'
         
     | 
| 
      
 49 
     | 
    
         
            +
                - 'align'
         
     | 
| 
         @@ -10,9 +10,12 @@ require 'hpricot' 
     | 
|
| 
       10 
10 
     | 
    
         | 
| 
       11 
11 
     | 
    
         
             
            module Hpricot
         
     | 
| 
       12 
12 
     | 
    
         
             
              module Scrubable
         
     | 
| 
      
 13 
     | 
    
         
            +
                # TODO: figure out how to handle comments
         
     | 
| 
       13 
14 
     | 
    
         
             
                def scrubable?
         
     | 
| 
       14 
     | 
    
         
            -
                  ! [Hpricot::Text,  
     | 
| 
       15 
     | 
    
         
            -
             
     | 
| 
      
 15 
     | 
    
         
            +
                  ! [ Hpricot::Text, 
         
     | 
| 
      
 16 
     | 
    
         
            +
                      Hpricot::BogusETag, 
         
     | 
| 
      
 17 
     | 
    
         
            +
                      Hpricot::Comment
         
     | 
| 
      
 18 
     | 
    
         
            +
                    ].include?(self.class) && self.respond_to?(:scrub)
         
     | 
| 
       16 
19 
     | 
    
         
             
                end
         
     | 
| 
       17 
20 
     | 
    
         
             
              end
         
     | 
| 
       18 
21 
     | 
    
         | 
| 
         @@ -34,9 +37,7 @@ module Hpricot 
     | 
|
| 
       34 
37 
     | 
    
         
             
                include Scrubable
         
     | 
| 
       35 
38 
     | 
    
         | 
| 
       36 
39 
     | 
    
         
             
                def scrub(config)
         
     | 
| 
       37 
     | 
    
         
            -
                  children.reverse.each { |c| 
         
     | 
| 
       38 
     | 
    
         
            -
                    c.scrub(config) if c.scrubable? && ! config[:allow_tags].include?(c.name)
         
     | 
| 
       39 
     | 
    
         
            -
                  }
         
     | 
| 
      
 40 
     | 
    
         
            +
                  children.reverse.each { |c| c.scrub(config) if c.scrubable? }
         
     | 
| 
       40 
41 
     | 
    
         
             
                  strip unless config[:allow_tags].include?(name)
         
     | 
| 
       41 
42 
     | 
    
         
             
                end
         
     | 
| 
       42 
43 
     | 
    
         | 
| 
         @@ -61,7 +62,7 @@ module Hpricot 
     | 
|
| 
       61 
62 
     | 
    
         
             
                end
         
     | 
| 
       62 
63 
     | 
    
         | 
| 
       63 
64 
     | 
    
         
             
                def strip_removes?
         
     | 
| 
       64 
     | 
    
         
            -
                  #  
     | 
| 
      
 65 
     | 
    
         
            +
                  # TODO: find other elements that should be removed instead of stripped
         
     | 
| 
       65 
66 
     | 
    
         
             
                  attributes && attributes['type'] =~ /script|css/
         
     | 
| 
       66 
67 
     | 
    
         
             
                end
         
     | 
| 
       67 
68 
     | 
    
         
             
              end
         
     | 
| 
         @@ -93,3 +94,26 @@ class String 
     | 
|
| 
       93 
94 
     | 
    
         
             
                dup.scrub!
         
     | 
| 
       94 
95 
     | 
    
         
             
              end
         
     | 
| 
       95 
96 
     | 
    
         
             
            end
         
     | 
| 
      
 97 
     | 
    
         
            +
             
     | 
| 
      
 98 
     | 
    
         
            +
            begin
         
     | 
| 
      
 99 
     | 
    
         
            +
              require 'htmlentities'
         
     | 
| 
      
 100 
     | 
    
         
            +
              
         
     | 
| 
      
 101 
     | 
    
         
            +
              module Hpricot
         
     | 
| 
      
 102 
     | 
    
         
            +
                class Scrub
         
     | 
| 
      
 103 
     | 
    
         
            +
                  @coder = HTMLEntities.new
         
     | 
| 
      
 104 
     | 
    
         
            +
                  class << self
         
     | 
| 
      
 105 
     | 
    
         
            +
                    def entifier; @coder end
         
     | 
| 
      
 106 
     | 
    
         
            +
                  end
         
     | 
| 
      
 107 
     | 
    
         
            +
                end
         
     | 
| 
      
 108 
     | 
    
         
            +
              end
         
     | 
| 
      
 109 
     | 
    
         
            +
              
         
     | 
| 
      
 110 
     | 
    
         
            +
              class String
         
     | 
| 
      
 111 
     | 
    
         
            +
                def decode!
         
     | 
| 
      
 112 
     | 
    
         
            +
                  self.gsub!(/^(\n|.)*$/, Hpricot::Scrub.entifier.decode(self))
         
     | 
| 
      
 113 
     | 
    
         
            +
                end
         
     | 
| 
      
 114 
     | 
    
         
            +
             
     | 
| 
      
 115 
     | 
    
         
            +
                def decode
         
     | 
| 
      
 116 
     | 
    
         
            +
                  dup.decode!
         
     | 
| 
      
 117 
     | 
    
         
            +
                end
         
     | 
| 
      
 118 
     | 
    
         
            +
              end
         
     | 
| 
      
 119 
     | 
    
         
            +
            rescue LoadError; end
         
     | 
    
        data/test/hpricot_scrub_test.rb
    CHANGED
    
    | 
         @@ -6,6 +6,17 @@ class HpricotScrubTest < Test::Unit::TestCase 
     | 
|
| 
       6 
6 
     | 
    
         
             
              def setup
         
     | 
| 
       7 
7 
     | 
    
         
             
                @clean = Hpricot(MARKUP).scrub.inner_html
         
     | 
| 
       8 
8 
     | 
    
         
             
                @config = YAML.load_file('examples/config.yml')
         
     | 
| 
      
 9 
     | 
    
         
            +
             
     | 
| 
      
 10 
     | 
    
         
            +
                # add some tags that most users will probably want
         
     | 
| 
      
 11 
     | 
    
         
            +
                @config_full = @config.dup
         
     | 
| 
      
 12 
     | 
    
         
            +
                %w(body head html).each { |x| @config_full[:allow_tags].push(x) }
         
     | 
| 
      
 13 
     | 
    
         
            +
              end
         
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
      
 15 
     | 
    
         
            +
              def test_full_markup_partial_scrub
         
     | 
| 
      
 16 
     | 
    
         
            +
                full = Hpricot(MARKUP)
         
     | 
| 
      
 17 
     | 
    
         
            +
                full_markup = '<html><head></head><body>' + MARKUP + '</body></html>'
         
     | 
| 
      
 18 
     | 
    
         
            +
                doc = Hpricot(full_markup).scrub(@config_full)
         
     | 
| 
      
 19 
     | 
    
         
            +
                partial_scrub_common(doc, full)
         
     | 
| 
       9 
20 
     | 
    
         
             
              end
         
     | 
| 
       10 
21 
     | 
    
         | 
| 
       11 
22 
     | 
    
         
             
              def test_full_scrub
         
     | 
| 
         @@ -22,12 +33,16 @@ class HpricotScrubTest < Test::Unit::TestCase 
     | 
|
| 
       22 
33 
     | 
    
         
             
              def test_partial_scrub
         
     | 
| 
       23 
34 
     | 
    
         
             
                full = Hpricot(MARKUP)
         
     | 
| 
       24 
35 
     | 
    
         
             
                doc = Hpricot(MARKUP).scrub(@config)
         
     | 
| 
       25 
     | 
    
         
            -
                 
     | 
| 
      
 36 
     | 
    
         
            +
                partial_scrub_common(doc, full)
         
     | 
| 
      
 37 
     | 
    
         
            +
              end
         
     | 
| 
      
 38 
     | 
    
         
            +
             
     | 
| 
      
 39 
     | 
    
         
            +
              def test_full_doc
         
     | 
| 
      
 40 
     | 
    
         
            +
                doc = Hpricot(GOOGLE).scrub
         
     | 
| 
       26 
41 
     | 
    
         
             
                assert_tag_count(doc, 'a', 0)
         
     | 
| 
       27 
     | 
    
         
            -
                assert_tag_count(doc, 'p',  
     | 
| 
       28 
     | 
    
         
            -
                assert_tag_count(doc, ' 
     | 
| 
       29 
     | 
    
         
            -
                assert_tag_count(doc, ' 
     | 
| 
       30 
     | 
    
         
            -
                assert_tag_count(doc, ' 
     | 
| 
      
 42 
     | 
    
         
            +
                assert_tag_count(doc, 'p', 0)
         
     | 
| 
      
 43 
     | 
    
         
            +
                assert_tag_count(doc, 'img', 0)
         
     | 
| 
      
 44 
     | 
    
         
            +
                assert_tag_count(doc, 'br', 0)
         
     | 
| 
      
 45 
     | 
    
         
            +
                assert_tag_count(doc, 'div', 0)
         
     | 
| 
       31 
46 
     | 
    
         
             
                assert_tag_count(doc, 'script', 0)
         
     | 
| 
       32 
47 
     | 
    
         
             
              end
         
     | 
| 
       33 
48 
     | 
    
         | 
| 
         @@ -36,10 +51,34 @@ class HpricotScrubTest < Test::Unit::TestCase 
     | 
|
| 
       36 
51 
     | 
    
         
             
                assert formatted.scrub == @clean
         
     | 
| 
       37 
52 
     | 
    
         
             
                assert formatted == MARKUP
         
     | 
| 
       38 
53 
     | 
    
         
             
              end
         
     | 
| 
       39 
     | 
    
         
            -
             
     | 
| 
      
 54 
     | 
    
         
            +
             
     | 
| 
       40 
55 
     | 
    
         
             
              def test_string_scrub!
         
     | 
| 
       41 
56 
     | 
    
         
             
                formatted = MARKUP
         
     | 
| 
       42 
57 
     | 
    
         
             
                assert formatted.scrub! == @clean
         
     | 
| 
       43 
58 
     | 
    
         
             
                assert formatted == @clean
         
     | 
| 
       44 
59 
     | 
    
         
             
              end
         
     | 
| 
      
 60 
     | 
    
         
            +
             
     | 
| 
      
 61 
     | 
    
         
            +
              def test_decoder
         
     | 
| 
      
 62 
     | 
    
         
            +
                str = 'some <a href="http://example.com/">example link</a> to nowhere'
         
     | 
| 
      
 63 
     | 
    
         
            +
                scrubbed_str = str.scrub
         
     | 
| 
      
 64 
     | 
    
         
            +
                assert scrubbed_str.include?(' ')
         
     | 
| 
      
 65 
     | 
    
         
            +
             
     | 
| 
      
 66 
     | 
    
         
            +
                if defined?(HTMLEntities)
         
     | 
| 
      
 67 
     | 
    
         
            +
                  assert ! scrubbed_str.decode.include?(' ')
         
     | 
| 
      
 68 
     | 
    
         
            +
             
     | 
| 
      
 69 
     | 
    
         
            +
                  scrubbed_str.decode!
         
     | 
| 
      
 70 
     | 
    
         
            +
                  assert ! scrubbed_str.include?(' ')
         
     | 
| 
      
 71 
     | 
    
         
            +
                end
         
     | 
| 
      
 72 
     | 
    
         
            +
              end
         
     | 
| 
      
 73 
     | 
    
         
            +
             
     | 
| 
      
 74 
     | 
    
         
            +
            private
         
     | 
| 
      
 75 
     | 
    
         
            +
              def partial_scrub_common(doc, full)
         
     | 
| 
      
 76 
     | 
    
         
            +
                # using the divisor search throws warnings in test
         
     | 
| 
      
 77 
     | 
    
         
            +
                assert_tag_count(doc, 'a', 0)
         
     | 
| 
      
 78 
     | 
    
         
            +
                assert_tag_count(doc, 'p', full.search('//p').size)
         
     | 
| 
      
 79 
     | 
    
         
            +
                assert_tag_count(doc, 'div', full.search('//div').size)
         
     | 
| 
      
 80 
     | 
    
         
            +
                assert_tag_count(doc, 'img', full.search('//img').size)
         
     | 
| 
      
 81 
     | 
    
         
            +
                assert_tag_count(doc, 'br', full.search('//br').size)
         
     | 
| 
      
 82 
     | 
    
         
            +
                assert_tag_count(doc, 'script', 0)
         
     | 
| 
      
 83 
     | 
    
         
            +
              end
         
     | 
| 
       45 
84 
     | 
    
         
             
            end
         
     | 
    
        data/test/scrubber_data.rb
    CHANGED
    
    | 
         @@ -12,3 +12,23 @@ alert("gotcha");</script><img src="http://content.example.com/content/3587a2f6ee 
     | 
|
| 
       12 
12 
     | 
    
         
             
            <span>some random unclosed span
         
     | 
| 
       13 
13 
     | 
    
         
             
            <style type="text/css">.foo {color:blue}</style>
         
     | 
| 
       14 
14 
     | 
    
         
             
            EOS
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
            GOOGLE = <<-EOS
         
     | 
| 
      
 17 
     | 
    
         
            +
            <html><head><meta http-equiv="content-type" content="text/html; charset=ISO-8859-1"><title>Google</title><style><!--
         
     | 
| 
      
 18 
     | 
    
         
            +
            body,td,a,p,.h{font-family:arial,sans-serif}
         
     | 
| 
      
 19 
     | 
    
         
            +
            .h{font-size:20px}
         
     | 
| 
      
 20 
     | 
    
         
            +
            .h{color:#3366cc}
         
     | 
| 
      
 21 
     | 
    
         
            +
            .q{color:#00c}
         
     | 
| 
      
 22 
     | 
    
         
            +
            --></style>
         
     | 
| 
      
 23 
     | 
    
         
            +
            <script>
         
     | 
| 
      
 24 
     | 
    
         
            +
            <!--
         
     | 
| 
      
 25 
     | 
    
         
            +
            function sf(){document.f.q.focus();}
         
     | 
| 
      
 26 
     | 
    
         
            +
            // -->
         
     | 
| 
      
 27 
     | 
    
         
            +
            </script>
         
     | 
| 
      
 28 
     | 
    
         
            +
            </head><body bgcolor=#ffffff text=#000000 link=#0000cc vlink=#551a8b alink=#ff0000 onload="sf();if(document.images){new Image().src='/images/nav_logo2.png'}" topmargin=3 marginheight=3><center><div align=right nowrap style="padding-bottom:4px" width=100%><font size=-1><a href="/url?sa=p&pref=ig&pval=3&q=http://www.google.com/ig%3Fhl%3Den&usg=__yvmOvIrk79QYmDkrJAeuYO8jTmo=">Personalize this page</a> | <a href="https://www.google.com/accounts/Login?continue=http://www.google.com/&hl=en">Sign in</a></font></div><img alt="Google" height=110 src="/intl/en_ALL/images/logo.gif" width=276><br><br><form action="/search" name=f><script defer><!--
         
     | 
| 
      
 29 
     | 
    
         
            +
            function togDisp(e){stopB(e);var elems=document.getElementsByName('more');for(var i=0;i<elems.length;i++){var obj=elems[i],dp="";if(obj.style.display==""){dp="none";}obj.style.display=dp;}return false;}
         
     | 
| 
      
 30 
     | 
    
         
            +
            function stopB(e){if(!e)e=window.event;e.cancelBubble=true;}
         
     | 
| 
      
 31 
     | 
    
         
            +
            document.onclick=function(event){var elems=document.getElementsByName('more');if(elems[0].style.display==""){togDisp(event);}}
         
     | 
| 
      
 32 
     | 
    
         
            +
            //-->
         
     | 
| 
      
 33 
     | 
    
         
            +
            </script><table border=0 cellspacing=0 cellpadding=4><tr><td nowrap><font size=-1><b>Web</b>    <a class=q href="http://images.google.com/imghp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wi">Images</a>    <a class=q href="http://video.google.com/?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wv">Video</a>    <a class=q href="http://news.google.com/nwshp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wn">News</a>    <a class=q href="http://maps.google.com/maps?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wl">Maps</a>    <b><a href="/intl/en/options/" class=q onclick="this.blur();return togDisp(event)">more »</a></b><span name=more id=more style="display:none;position:absolute;background:#fff;border:1px solid #369;margin:-.5ex 2ex;padding:0 0 .5ex .8ex;width:16ex;line-height:1.9;z-index:1000" onclick="stopB(event)"><a href=# onclick="return togDisp(event)"><img border=0 src=/images/x2.gif width=12 height=12 alt="Close menu" align=right hspace=4 vspace=4></a><a class=q href="http://blogsearch.google.com/?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wb">Blogs</a><br><a class=q href="http://books.google.com/bkshp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wp">Books</a><br><a class=q href="http://froogle.google.com/frghp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wf">Froogle</a><br><a class=q href="http://groups.google.com/grphp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wg">Groups</a><br><a class=q href="http://www.google.com/ptshp?ie=ISO-8859-1&oe=ISO-8859-1&hl=en&tab=wt">Patents</a><br><a href="/intl/en/options/" class=q><b>even more »</b></a></span></font></td></tr></table><table cellpadding=0 cellspacing=0><tr valign=top><td width=25%> </td><td align=center nowrap><input name=hl type=hidden value=en><input type=hidden name=ie value="ISO-8859-1"><input maxlength=2048 name=q size=55 title="Google Search" value=""><br><input name=btnG type=submit value="Google Search"><input name=btnI type=submit value="I'm Feeling Lucky"></td><td nowrap width=25%><font size=-2>  <a href=/advanced_search?hl=en>Advanced Search</a><br>  <a href=/preferences?hl=en>Preferences</a><br>  <a href=/language_tools?hl=en>Language Tools</a></font></td></tr></table></form><br><br><font size=-1><a href="/intl/en/ads/">Advertising Programs</a> - <a href="/services/">Business Solutions</a> - <a href=/intl/en/about.html>About Google</a></font><p><font size=-2>©2007 Google</font></p></center></body></html>
         
     | 
| 
      
 34 
     | 
    
         
            +
            EOS
         
     | 
    
        metadata
    CHANGED
    
    | 
         @@ -1,10 +1,10 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification 
         
     | 
| 
       2 
     | 
    
         
            -
            rubygems_version: 0.9.1
         
     | 
| 
      
 2 
     | 
    
         
            +
            rubygems_version: 0.9.2.1
         
     | 
| 
       3 
3 
     | 
    
         
             
            specification_version: 1
         
     | 
| 
       4 
4 
     | 
    
         
             
            name: hpricot_scrub
         
     | 
| 
       5 
5 
     | 
    
         
             
            version: !ruby/object:Gem::Version 
         
     | 
| 
       6 
     | 
    
         
            -
              version: 0.2. 
     | 
| 
       7 
     | 
    
         
            -
            date: 2007- 
     | 
| 
      
 6 
     | 
    
         
            +
              version: 0.2.3
         
     | 
| 
      
 7 
     | 
    
         
            +
            date: 2007-04-05 00:00:00 -07:00
         
     | 
| 
       8 
8 
     | 
    
         
             
            summary: Scrub HTML with Hpricot
         
     | 
| 
       9 
9 
     | 
    
         
             
            require_paths: 
         
     | 
| 
       10 
10 
     | 
    
         
             
            - lib
         
     |