hpricot 0.4-mswin32 → 0.5-mswin32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +16 -0
 - data/README +279 -4
 - data/Rakefile +12 -3
 - data/ext/hpricot_scan/hpricot_scan.c +3106 -3348
 - data/ext/hpricot_scan/hpricot_scan.rl +78 -38
 - data/lib/hpricot.rb +19 -0
 - data/lib/hpricot/elements.rb +194 -87
 - data/lib/hpricot/inspect.rb +13 -0
 - data/lib/hpricot/parse.rb +83 -99
 - data/lib/hpricot/tag.rb +114 -40
 - data/lib/hpricot/traverse.rb +311 -61
 - data/lib/hpricot_scan.so +0 -0
 - data/test/files/cy0.html +3653 -0
 - data/test/files/utf8.html +1054 -0
 - data/test/files/week9.html +1723 -0
 - data/test/test_parser.rb +160 -10
 - data/test/test_paths.rb +16 -0
 - data/test/test_preserved.rb +46 -0
 - data/test/test_xml.rb +15 -0
 - metadata +41 -35
 
    
        data/test/test_parser.rb
    CHANGED
    
    | 
         @@ -5,49 +5,57 @@ require 'hpricot' 
     | 
|
| 
       5 
5 
     | 
    
         
             
            require 'load_files'
         
     | 
| 
       6 
6 
     | 
    
         | 
| 
       7 
7 
     | 
    
         
             
            class TestParser < Test::Unit::TestCase
         
     | 
| 
       8 
     | 
    
         
            -
              def  
     | 
| 
      
 8 
     | 
    
         
            +
              def test_set_attr
         
     | 
| 
       9 
9 
     | 
    
         
             
                @basic = Hpricot.parse(TestFiles::BASIC)
         
     | 
| 
       10 
     | 
    
         
            -
                @ 
     | 
| 
       11 
     | 
    
         
            -
                 
     | 
| 
       12 
     | 
    
         
            -
                 
     | 
| 
       13 
     | 
    
         
            -
                # @utf8 = Hpricot.parse(TestFiles::UTF8)
         
     | 
| 
      
 10 
     | 
    
         
            +
                @basic.search('//p').set('class', 'para')
         
     | 
| 
      
 11 
     | 
    
         
            +
                assert_equal 4, @basic.search('//p').length
         
     | 
| 
      
 12 
     | 
    
         
            +
                assert_equal 4, @basic.search('//p').find_all { |x| x['class'] == 'para' }.length
         
     | 
| 
       14 
13 
     | 
    
         
             
              end
         
     | 
| 
       15 
14 
     | 
    
         | 
| 
       16 
     | 
    
         
            -
              #  
     | 
| 
       17 
     | 
    
         
            -
               
     | 
| 
       18 
     | 
    
         
            -
             
     | 
| 
       19 
     | 
    
         
            -
             
     | 
| 
      
 15 
     | 
    
         
            +
              # Test creating a new element 
         
     | 
| 
      
 16 
     | 
    
         
            +
              def test_new_element 
         
     | 
| 
      
 17 
     | 
    
         
            +
                elem = Hpricot::Elem.new(Hpricot::STag.new('form')) 
         
     | 
| 
      
 18 
     | 
    
         
            +
                assert_not_nil(elem) 
         
     | 
| 
      
 19 
     | 
    
         
            +
                assert_not_nil(elem.attributes) 
         
     | 
| 
      
 20 
     | 
    
         
            +
              end 
         
     | 
| 
       20 
21 
     | 
    
         | 
| 
       21 
22 
     | 
    
         
             
              def test_scan_text
         
     | 
| 
       22 
23 
     | 
    
         
             
                assert_equal 'FOO', Hpricot.make("FOO").first.content
         
     | 
| 
       23 
24 
     | 
    
         
             
              end
         
     | 
| 
       24 
25 
     | 
    
         | 
| 
       25 
26 
     | 
    
         
             
              def test_get_element_by_id
         
     | 
| 
      
 27 
     | 
    
         
            +
                @basic = Hpricot.parse(TestFiles::BASIC)
         
     | 
| 
       26 
28 
     | 
    
         
             
                assert_equal 'link1', @basic.get_element_by_id('link1')['id']
         
     | 
| 
       27 
29 
     | 
    
         
             
                assert_equal 'link1', @basic.get_element_by_id('body1').get_element_by_id('link1').get_attribute('id')
         
     | 
| 
       28 
30 
     | 
    
         
             
              end
         
     | 
| 
       29 
31 
     | 
    
         | 
| 
       30 
32 
     | 
    
         
             
              def test_get_element_by_tag_name
         
     | 
| 
      
 33 
     | 
    
         
            +
                @basic = Hpricot.parse(TestFiles::BASIC)
         
     | 
| 
       31 
34 
     | 
    
         
             
                assert_equal 'link1', @basic.get_elements_by_tag_name('a')[0].get_attribute('id')
         
     | 
| 
       32 
35 
     | 
    
         
             
                assert_equal 'link1', @basic.get_elements_by_tag_name('body')[0].get_element_by_id('link1').get_attribute('id')
         
     | 
| 
       33 
36 
     | 
    
         
             
              end
         
     | 
| 
       34 
37 
     | 
    
         | 
| 
       35 
38 
     | 
    
         
             
              def test_output_basic
         
     | 
| 
      
 39 
     | 
    
         
            +
                @basic = Hpricot.parse(TestFiles::BASIC)
         
     | 
| 
       36 
40 
     | 
    
         
             
                @basic2 = Hpricot.parse(@basic.inner_html)
         
     | 
| 
       37 
41 
     | 
    
         
             
                scan_basic @basic2
         
     | 
| 
       38 
42 
     | 
    
         
             
              end
         
     | 
| 
       39 
43 
     | 
    
         | 
| 
       40 
44 
     | 
    
         
             
              def test_scan_basic
         
     | 
| 
      
 45 
     | 
    
         
            +
                @basic = Hpricot.parse(TestFiles::BASIC)
         
     | 
| 
       41 
46 
     | 
    
         
             
                scan_basic @basic
         
     | 
| 
       42 
47 
     | 
    
         
             
              end
         
     | 
| 
       43 
48 
     | 
    
         | 
| 
       44 
49 
     | 
    
         
             
              def scan_basic doc
         
     | 
| 
      
 50 
     | 
    
         
            +
                assert_kind_of Hpricot::XMLDecl, doc.children.first 
         
     | 
| 
      
 51 
     | 
    
         
            +
                assert_not_equal doc.children.first.to_s, doc.children[1].to_s 
         
     | 
| 
       45 
52 
     | 
    
         
             
                assert_equal 'link1', doc.at('#link1')['id']
         
     | 
| 
       46 
53 
     | 
    
         
             
                assert_equal 'link1', doc.at("p a")['id']
         
     | 
| 
       47 
54 
     | 
    
         
             
                assert_equal 'link1', (doc/:p/:a).first['id']
         
     | 
| 
       48 
55 
     | 
    
         
             
                assert_equal 'link1', doc.search('p').at('a').get_attribute('id')
         
     | 
| 
       49 
56 
     | 
    
         
             
                assert_equal 'link2', (doc/'p').filter('.ohmy').search('a').first.get_attribute('id')
         
     | 
| 
       50 
57 
     | 
    
         
             
                assert_equal (doc/'p')[2], (doc/'p').filter(':nth(2)')[0]
         
     | 
| 
      
 58 
     | 
    
         
            +
                assert_equal (doc/'p')[2], (doc/'p').filter('[3]')[0]
         
     | 
| 
       51 
59 
     | 
    
         
             
                assert_equal 4, (doc/'p').filter('*').length
         
     | 
| 
       52 
60 
     | 
    
         
             
                assert_equal 4, (doc/'p').filter('* *').length
         
     | 
| 
       53 
61 
     | 
    
         
             
                eles = (doc/'p').filter('.ohmy')
         
     | 
| 
         @@ -64,23 +72,65 @@ class TestParser < Test::Unit::TestCase 
     | 
|
| 
       64 
72 
     | 
    
         
             
                assert_equal 2, (doc/'p / a').length
         
     | 
| 
       65 
73 
     | 
    
         
             
                assert_equal 2, (doc/'link ~ link').length
         
     | 
| 
       66 
74 
     | 
    
         
             
                assert_equal 3, (doc/'title ~ link').length
         
     | 
| 
      
 75 
     | 
    
         
            +
                assert_equal 5, (doc/"//p/text()").length
         
     | 
| 
      
 76 
     | 
    
         
            +
                assert_equal 6, (doc/"//p[a]//text()").length
         
     | 
| 
      
 77 
     | 
    
         
            +
                assert_equal 2, (doc/"//p/a/text()").length
         
     | 
| 
      
 78 
     | 
    
         
            +
              end
         
     | 
| 
      
 79 
     | 
    
         
            +
             
     | 
| 
      
 80 
     | 
    
         
            +
              def test_positional
         
     | 
| 
      
 81 
     | 
    
         
            +
                h = Hpricot( "<div><br/><p>one</p><p>two</p></div>" )
         
     | 
| 
      
 82 
     | 
    
         
            +
                assert_equal "<p>one</p>", h.search("//div/p:eq(0)").to_s
         
     | 
| 
      
 83 
     | 
    
         
            +
                assert_equal "<p>one</p>", h.search("//div/p:first").to_s
         
     | 
| 
      
 84 
     | 
    
         
            +
                assert_equal "<p>one</p>", h.search("//div/p:first()").to_s
         
     | 
| 
       67 
85 
     | 
    
         
             
              end
         
     | 
| 
       68 
86 
     | 
    
         | 
| 
       69 
87 
     | 
    
         
             
              def test_scan_boingboing
         
     | 
| 
      
 88 
     | 
    
         
            +
                @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
         
     | 
| 
       70 
89 
     | 
    
         
             
                assert_equal 60, (@boingboing/'p.posted').length
         
     | 
| 
       71 
90 
     | 
    
         
             
                assert_equal 1, @boingboing.search("//a[@name='027906']").length
         
     | 
| 
      
 91 
     | 
    
         
            +
                assert_equal 10, @boingboing.search("script comment()").length
         
     | 
| 
      
 92 
     | 
    
         
            +
                assert_equal 3, @boingboing.search("a[text()*='Boing']").length
         
     | 
| 
      
 93 
     | 
    
         
            +
                assert_equal 1, @boingboing.search("h3[text()='College kids reportedly taking more smart drugs']").length
         
     | 
| 
      
 94 
     | 
    
         
            +
                assert_equal 0, @boingboing.search("h3[text()='College']").length
         
     | 
| 
      
 95 
     | 
    
         
            +
                assert_equal 60, @boingboing.search("h3").length
         
     | 
| 
      
 96 
     | 
    
         
            +
                assert_equal 59, @boingboing.search("h3[text()!='College kids reportedly taking more smart drugs']").length
         
     | 
| 
      
 97 
     | 
    
         
            +
                assert_equal 17, @boingboing.search("h3[text()$='s']").length
         
     | 
| 
      
 98 
     | 
    
         
            +
                assert_equal 128, @boingboing.search("p[text()]").length
         
     | 
| 
      
 99 
     | 
    
         
            +
                assert_equal 211, @boingboing.search("p").length
         
     | 
| 
      
 100 
     | 
    
         
            +
              end
         
     | 
| 
      
 101 
     | 
    
         
            +
             
     | 
| 
      
 102 
     | 
    
         
            +
              def test_reparent
         
     | 
| 
      
 103 
     | 
    
         
            +
                doc = Hpricot(%{<div id="blurb_1"></div>})
         
     | 
| 
      
 104 
     | 
    
         
            +
                div1 = doc.search('#blurb_1')
         
     | 
| 
      
 105 
     | 
    
         
            +
                div1.before('<div id="blurb_0"></div>')
         
     | 
| 
      
 106 
     | 
    
         
            +
             
     | 
| 
      
 107 
     | 
    
         
            +
                div0 = doc.search('#blurb_0')
         
     | 
| 
      
 108 
     | 
    
         
            +
                div0.before('<div id="blurb_a"></div>')
         
     | 
| 
      
 109 
     | 
    
         
            +
             
     | 
| 
      
 110 
     | 
    
         
            +
                assert_equal 'div', doc.at('#blurb_1').name
         
     | 
| 
      
 111 
     | 
    
         
            +
              end
         
     | 
| 
      
 112 
     | 
    
         
            +
             
     | 
| 
      
 113 
     | 
    
         
            +
              def test_siblings
         
     | 
| 
      
 114 
     | 
    
         
            +
                @basic = Hpricot.parse(TestFiles::BASIC)
         
     | 
| 
      
 115 
     | 
    
         
            +
                t = @basic.at(:title)
         
     | 
| 
      
 116 
     | 
    
         
            +
                e = t.next_sibling
         
     | 
| 
      
 117 
     | 
    
         
            +
                assert_equal 'test1.css', e['href']
         
     | 
| 
      
 118 
     | 
    
         
            +
                assert_equal 'title', e.previous_sibling.name
         
     | 
| 
       72 
119 
     | 
    
         
             
              end
         
     | 
| 
       73 
120 
     | 
    
         | 
| 
       74 
121 
     | 
    
         
             
              def test_css_negation
         
     | 
| 
      
 122 
     | 
    
         
            +
                @basic = Hpricot.parse(TestFiles::BASIC)
         
     | 
| 
       75 
123 
     | 
    
         
             
                assert_equal 3, (@basic/'p:not(.final)').length
         
     | 
| 
       76 
124 
     | 
    
         
             
              end
         
     | 
| 
       77 
125 
     | 
    
         | 
| 
       78 
126 
     | 
    
         
             
              def test_remove_attribute
         
     | 
| 
      
 127 
     | 
    
         
            +
                @basic = Hpricot.parse(TestFiles::BASIC)
         
     | 
| 
       79 
128 
     | 
    
         
             
                (@basic/:p).each { |ele| ele.remove_attribute('class') }
         
     | 
| 
       80 
129 
     | 
    
         
             
                assert_equal 0, (@basic/'p[@class]').length
         
     | 
| 
       81 
130 
     | 
    
         
             
              end
         
     | 
| 
       82 
131 
     | 
    
         | 
| 
       83 
132 
     | 
    
         
             
              def test_abs_xpath
         
     | 
| 
      
 133 
     | 
    
         
            +
                @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
         
     | 
| 
       84 
134 
     | 
    
         
             
                assert_equal 60, @boingboing.search("/html/body//p[@class='posted']").length
         
     | 
| 
       85 
135 
     | 
    
         
             
                assert_equal 60, @boingboing.search("/*/body//p[@class='posted']").length
         
     | 
| 
       86 
136 
     | 
    
         
             
                assert_equal 18, @boingboing.search("//script").length
         
     | 
| 
         @@ -94,6 +144,7 @@ class TestParser < Test::Unit::TestCase 
     | 
|
| 
       94 
144 
     | 
    
         
             
              end
         
     | 
| 
       95 
145 
     | 
    
         | 
| 
       96 
146 
     | 
    
         
             
              def test_predicates
         
     | 
| 
      
 147 
     | 
    
         
            +
                @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
         
     | 
| 
       97 
148 
     | 
    
         
             
                assert_equal 2, @boingboing.search('//link[@rel="alternate"]').length
         
     | 
| 
       98 
149 
     | 
    
         
             
                p_imgs = @boingboing.search('//div/p[/a/img]')
         
     | 
| 
       99 
150 
     | 
    
         
             
                assert_equal 15, p_imgs.length
         
     | 
| 
         @@ -105,7 +156,10 @@ class TestParser < Test::Unit::TestCase 
     | 
|
| 
       105 
156 
     | 
    
         
             
              end
         
     | 
| 
       106 
157 
     | 
    
         | 
| 
       107 
158 
     | 
    
         
             
              def test_alt_predicates
         
     | 
| 
      
 159 
     | 
    
         
            +
                @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
         
     | 
| 
       108 
160 
     | 
    
         
             
                assert_equal 2, @boingboing.search('//table/tr:last').length
         
     | 
| 
      
 161 
     | 
    
         
            +
             
     | 
| 
      
 162 
     | 
    
         
            +
                @basic = Hpricot.parse(TestFiles::BASIC)
         
     | 
| 
       109 
163 
     | 
    
         
             
                assert_equal "<p>The third paragraph</p>",
         
     | 
| 
       110 
164 
     | 
    
         
             
                    @basic.search('p:eq(2)').to_html
         
     | 
| 
       111 
165 
     | 
    
         
             
                assert_equal '<p class="last final"><b>THE FINAL PARAGRAPH</b></p>',
         
     | 
| 
         @@ -114,11 +168,43 @@ class TestParser < Test::Unit::TestCase 
     | 
|
| 
       114 
168 
     | 
    
         
             
              end
         
     | 
| 
       115 
169 
     | 
    
         | 
| 
       116 
170 
     | 
    
         
             
              def test_many_paths
         
     | 
| 
      
 171 
     | 
    
         
            +
                @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
         
     | 
| 
       117 
172 
     | 
    
         
             
                assert_equal 62, @boingboing.search('p.posted, link[@rel="alternate"]').length
         
     | 
| 
       118 
173 
     | 
    
         
             
                assert_equal 20, @boingboing.search('//div/p[a/img]|//link[@rel="alternate"]').length
         
     | 
| 
       119 
174 
     | 
    
         
             
              end
         
     | 
| 
       120 
175 
     | 
    
         | 
| 
      
 176 
     | 
    
         
            +
              def test_stacked_search
         
     | 
| 
      
 177 
     | 
    
         
            +
                @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
         
     | 
| 
      
 178 
     | 
    
         
            +
                assert_kind_of Hpricot::Elements, @boingboing.search('//div/p').search('a img')
         
     | 
| 
      
 179 
     | 
    
         
            +
              end
         
     | 
| 
      
 180 
     | 
    
         
            +
             
     | 
| 
      
 181 
     | 
    
         
            +
              def test_class_search
         
     | 
| 
      
 182 
     | 
    
         
            +
                # test case sent by Chih-Chao Lam
         
     | 
| 
      
 183 
     | 
    
         
            +
                doc = Hpricot("<div class=xyz'>abc</div>")
         
     | 
| 
      
 184 
     | 
    
         
            +
                assert_equal 1, doc.search(".xyz").length
         
     | 
| 
      
 185 
     | 
    
         
            +
                doc = Hpricot("<div class=xyz>abc</div><div class=abc>xyz</div>")
         
     | 
| 
      
 186 
     | 
    
         
            +
                assert_equal 1, doc.search(".xyz").length
         
     | 
| 
      
 187 
     | 
    
         
            +
                assert_equal 4, doc.search("*").length
         
     | 
| 
      
 188 
     | 
    
         
            +
              end
         
     | 
| 
      
 189 
     | 
    
         
            +
             
     | 
| 
      
 190 
     | 
    
         
            +
              def test_kleene_star
         
     | 
| 
      
 191 
     | 
    
         
            +
                # bug noticed by raja bhatia
         
     | 
| 
      
 192 
     | 
    
         
            +
                doc = Hpricot("<span class='small'>1</span><div class='large'>2</div><div class='small'>3</div><span class='blue large'>4</span>")
         
     | 
| 
      
 193 
     | 
    
         
            +
                assert_equal 2, doc.search("*[@class*='small']").length
         
     | 
| 
      
 194 
     | 
    
         
            +
                assert_equal 2, doc.search("*.small").length
         
     | 
| 
      
 195 
     | 
    
         
            +
                assert_equal 2, doc.search(".small").length
         
     | 
| 
      
 196 
     | 
    
         
            +
                assert_equal 2, doc.search(".large").length
         
     | 
| 
      
 197 
     | 
    
         
            +
              end
         
     | 
| 
      
 198 
     | 
    
         
            +
             
     | 
| 
      
 199 
     | 
    
         
            +
              def test_empty_comment
         
     | 
| 
      
 200 
     | 
    
         
            +
                doc = Hpricot("<p><!----></p>")
         
     | 
| 
      
 201 
     | 
    
         
            +
                assert doc.children[0].children[0].comment?
         
     | 
| 
      
 202 
     | 
    
         
            +
                doc = Hpricot("<p><!-- --></p>")
         
     | 
| 
      
 203 
     | 
    
         
            +
                assert doc.children[0].children[0].comment?
         
     | 
| 
      
 204 
     | 
    
         
            +
              end
         
     | 
| 
      
 205 
     | 
    
         
            +
             
     | 
| 
       121 
206 
     | 
    
         
             
              def test_body_newlines
         
     | 
| 
      
 207 
     | 
    
         
            +
                @immob = Hpricot.parse(TestFiles::IMMOB)
         
     | 
| 
       122 
208 
     | 
    
         
             
                body = @immob.at(:body)
         
     | 
| 
       123 
209 
     | 
    
         
             
                {'background' => '', 'bgcolor' => '#ffffff', 'text' => '#000000', 'marginheight' => '10',
         
     | 
| 
       124 
210 
     | 
    
         
             
                 'marginwidth' => '10', 'leftmargin' => '10', 'topmargin' => '10', 'link' => '#000066',
         
     | 
| 
         @@ -127,15 +213,79 @@ class TestParser < Test::Unit::TestCase 
     | 
|
| 
       127 
213 
     | 
    
         
             
                end
         
     | 
| 
       128 
214 
     | 
    
         
             
              end
         
     | 
| 
       129 
215 
     | 
    
         | 
| 
      
 216 
     | 
    
         
            +
              def test_nested_twins
         
     | 
| 
      
 217 
     | 
    
         
            +
                @doc = Hpricot("<div>Hi<div>there</div></div>")
         
     | 
| 
      
 218 
     | 
    
         
            +
                assert_equal 1, (@doc/"div div").length
         
     | 
| 
      
 219 
     | 
    
         
            +
              end
         
     | 
| 
      
 220 
     | 
    
         
            +
             
     | 
| 
      
 221 
     | 
    
         
            +
              def test_wildcard
         
     | 
| 
      
 222 
     | 
    
         
            +
                @basic = Hpricot.parse(TestFiles::BASIC)
         
     | 
| 
      
 223 
     | 
    
         
            +
                assert_equal 3, (@basic/"*[@id]").length
         
     | 
| 
      
 224 
     | 
    
         
            +
                assert_equal 3, (@basic/"//*[@id]").length
         
     | 
| 
      
 225 
     | 
    
         
            +
              end
         
     | 
| 
      
 226 
     | 
    
         
            +
             
     | 
| 
       130 
227 
     | 
    
         
             
              def test_javascripts
         
     | 
| 
      
 228 
     | 
    
         
            +
                @immob = Hpricot.parse(TestFiles::IMMOB)
         
     | 
| 
       131 
229 
     | 
    
         
             
                assert_equal 3, (@immob/:script)[0].inner_html.scan(/<LINK/).length
         
     | 
| 
       132 
230 
     | 
    
         
             
              end
         
     | 
| 
       133 
231 
     | 
    
         | 
| 
      
 232 
     | 
    
         
            +
              def test_nested_scripts
         
     | 
| 
      
 233 
     | 
    
         
            +
                @week9 = Hpricot.parse(TestFiles::WEEK9)
         
     | 
| 
      
 234 
     | 
    
         
            +
                assert_equal 14, (@week9/"a").find_all { |x| x.inner_html.include? "GameCenter" }.length
         
     | 
| 
      
 235 
     | 
    
         
            +
              end
         
     | 
| 
      
 236 
     | 
    
         
            +
             
     | 
| 
       134 
237 
     | 
    
         
             
              def test_uswebgen
         
     | 
| 
      
 238 
     | 
    
         
            +
                @uswebgen = Hpricot.parse(TestFiles::USWEBGEN)
         
     | 
| 
       135 
239 
     | 
    
         
             
                # sent by brent beardsley, hpricot 0.3 had problems with all the links.
         
     | 
| 
       136 
240 
     | 
    
         
             
                assert_equal 67, (@uswebgen/:a).length
         
     | 
| 
       137 
241 
     | 
    
         
             
              end
         
     | 
| 
       138 
242 
     | 
    
         | 
| 
       139 
     | 
    
         
            -
              def  
     | 
| 
      
 243 
     | 
    
         
            +
              def test_mangled_tags
         
     | 
| 
      
 244 
     | 
    
         
            +
                [%{<html><form name='loginForm' method='post' action='/units/a/login/1,13088,779-1,00.html'?URL=></form></html>},
         
     | 
| 
      
 245 
     | 
    
         
            +
                 %{<html><form name='loginForm' ?URL= method='post' action='/units/a/login/1,13088,779-1,00.html'></form></html>},
         
     | 
| 
      
 246 
     | 
    
         
            +
                 %{<html><form name='loginForm'?URL= ?URL= method='post' action='/units/a/login/1,13088,779-1,00.html'?URL=></form></html>},
         
     | 
| 
      
 247 
     | 
    
         
            +
                 %{<html><form name='loginForm' method='post' action='/units/a/login/1,13088,779-1,00.html' ?URL=></form></html>}].
         
     | 
| 
      
 248 
     | 
    
         
            +
                each do |str|
         
     | 
| 
      
 249 
     | 
    
         
            +
                  doc = Hpricot(str)
         
     | 
| 
      
 250 
     | 
    
         
            +
                  assert_equal 1, (doc/:form).length
         
     | 
| 
      
 251 
     | 
    
         
            +
                  assert_equal '/units/a/login/1,13088,779-1,00.html', doc.at("form")['action']
         
     | 
| 
      
 252 
     | 
    
         
            +
                end
         
     | 
| 
      
 253 
     | 
    
         
            +
              end
         
     | 
| 
      
 254 
     | 
    
         
            +
             
     | 
| 
      
 255 
     | 
    
         
            +
              def test_procins
         
     | 
| 
      
 256 
     | 
    
         
            +
                doc = Hpricot("<?php print('hello') ?>\n<?xml blah='blah'?>")
         
     | 
| 
      
 257 
     | 
    
         
            +
                assert_equal "php", doc.children[0].target
         
     | 
| 
      
 258 
     | 
    
         
            +
                assert_equal "blah='blah'", doc.children[2].content
         
     | 
| 
      
 259 
     | 
    
         
            +
              end
         
     | 
| 
      
 260 
     | 
    
         
            +
             
     | 
| 
      
 261 
     | 
    
         
            +
              def test_buffer_error
         
     | 
| 
      
 262 
     | 
    
         
            +
                assert_raise Hpricot::ParseError, "ran out of buffer space on element <input>, starting on line 3." do
         
     | 
| 
      
 263 
     | 
    
         
            +
                  Hpricot(%{<p>\n\n<input type="hidden" name="__VIEWSTATE"  value="#{(("X" * 2000) + "\n") * 22}" />\n\n</p>})
         
     | 
| 
      
 264 
     | 
    
         
            +
                end
         
     | 
| 
      
 265 
     | 
    
         
            +
              end
         
     | 
| 
      
 266 
     | 
    
         
            +
             
     | 
| 
      
 267 
     | 
    
         
            +
              def test_filters
         
     | 
| 
      
 268 
     | 
    
         
            +
                @basic = Hpricot.parse(TestFiles::BASIC)
         
     | 
| 
      
 269 
     | 
    
         
            +
                assert_equal 0, (@basic/"title:parent").size
         
     | 
| 
      
 270 
     | 
    
         
            +
                assert_equal 3, (@basic/"p:parent").size
         
     | 
| 
      
 271 
     | 
    
         
            +
                assert_equal 1, (@basic/"title:empty").size
         
     | 
| 
      
 272 
     | 
    
         
            +
                assert_equal 1, (@basic/"p:empty").size
         
     | 
| 
      
 273 
     | 
    
         
            +
              end
         
     | 
| 
      
 274 
     | 
    
         
            +
             
     | 
| 
      
 275 
     | 
    
         
            +
              def test_keep_cdata
         
     | 
| 
      
 276 
     | 
    
         
            +
                str = %{<script> /*<![CDATA[*/
         
     | 
| 
      
 277 
     | 
    
         
            +
                /*]]>*/ </script>}
         
     | 
| 
      
 278 
     | 
    
         
            +
                assert_equal str, Hpricot(str).to_html
         
     | 
| 
      
 279 
     | 
    
         
            +
              end
         
     | 
| 
      
 280 
     | 
    
         
            +
             
     | 
| 
      
 281 
     | 
    
         
            +
              def test_namespace
         
     | 
| 
      
 282 
     | 
    
         
            +
                chunk = <<-END
         
     | 
| 
      
 283 
     | 
    
         
            +
                <a xmlns:t="http://www.nexopia.com/dev/template">
         
     | 
| 
      
 284 
     | 
    
         
            +
                  <t:sam>hi </t:sam>
         
     | 
| 
      
 285 
     | 
    
         
            +
                </a>
         
     | 
| 
      
 286 
     | 
    
         
            +
                END
         
     | 
| 
      
 287 
     | 
    
         
            +
                doc = Hpricot::XML(chunk)
         
     | 
| 
      
 288 
     | 
    
         
            +
                assert (doc/"//t:sam").size > 0 # at least this should probably work
         
     | 
| 
      
 289 
     | 
    
         
            +
                # assert (doc/"//sam").size > 0  # this would be nice 
         
     | 
| 
       140 
290 
     | 
    
         
             
              end
         
     | 
| 
       141 
291 
     | 
    
         
             
            end
         
     | 
    
        data/test/test_paths.rb
    ADDED
    
    | 
         @@ -0,0 +1,16 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            #!/usr/bin/env ruby
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            require 'test/unit'
         
     | 
| 
      
 4 
     | 
    
         
            +
            require 'hpricot'
         
     | 
| 
      
 5 
     | 
    
         
            +
            require 'load_files'
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
            class TestParser < Test::Unit::TestCase
         
     | 
| 
      
 8 
     | 
    
         
            +
              def test_roundtrip
         
     | 
| 
      
 9 
     | 
    
         
            +
                @basic = Hpricot.parse(TestFiles::BASIC)
         
     | 
| 
      
 10 
     | 
    
         
            +
                %w[link link[2] body #link1 a p.ohmy].each do |css_sel|
         
     | 
| 
      
 11 
     | 
    
         
            +
                  ele = @basic.at(css_sel)
         
     | 
| 
      
 12 
     | 
    
         
            +
                  assert_equal ele, @basic.at(ele.css_path)
         
     | 
| 
      
 13 
     | 
    
         
            +
                  assert_equal ele, @basic.at(ele.xpath)
         
     | 
| 
      
 14 
     | 
    
         
            +
                end
         
     | 
| 
      
 15 
     | 
    
         
            +
              end
         
     | 
| 
      
 16 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,46 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            #!/usr/bin/env ruby
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            require 'test/unit'
         
     | 
| 
      
 4 
     | 
    
         
            +
            require 'hpricot'
         
     | 
| 
      
 5 
     | 
    
         
            +
            require 'load_files'
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
            class TestPreserved < Test::Unit::TestCase
         
     | 
| 
      
 8 
     | 
    
         
            +
              def assert_roundtrip str
         
     | 
| 
      
 9 
     | 
    
         
            +
                doc = Hpricot(str)
         
     | 
| 
      
 10 
     | 
    
         
            +
                yield doc if block_given?
         
     | 
| 
      
 11 
     | 
    
         
            +
                str2 = doc.to_original_html
         
     | 
| 
      
 12 
     | 
    
         
            +
                [*str].zip([*str2]).each do |s1, s2|
         
     | 
| 
      
 13 
     | 
    
         
            +
                  assert_equal s1, s2
         
     | 
| 
      
 14 
     | 
    
         
            +
                end
         
     | 
| 
      
 15 
     | 
    
         
            +
              end
         
     | 
| 
      
 16 
     | 
    
         
            +
             
     | 
| 
      
 17 
     | 
    
         
            +
              def assert_html str1, str2
         
     | 
| 
      
 18 
     | 
    
         
            +
                doc = Hpricot(str2)
         
     | 
| 
      
 19 
     | 
    
         
            +
                yield doc if block_given?
         
     | 
| 
      
 20 
     | 
    
         
            +
                assert_equal str1, doc.to_original_html
         
     | 
| 
      
 21 
     | 
    
         
            +
              end
         
     | 
| 
      
 22 
     | 
    
         
            +
             
     | 
| 
      
 23 
     | 
    
         
            +
              def test_simple
         
     | 
| 
      
 24 
     | 
    
         
            +
                str = "<p>Hpricot is a <b>you know <i>uh</b> fine thing.</p>"
         
     | 
| 
      
 25 
     | 
    
         
            +
                assert_html str, str
         
     | 
| 
      
 26 
     | 
    
         
            +
                assert_html "<p class=\"new\">Hpricot is a <b>you know <i>uh</b> fine thing.</p>", str do |doc|
         
     | 
| 
      
 27 
     | 
    
         
            +
                  (doc/:p).set('class', 'new')
         
     | 
| 
      
 28 
     | 
    
         
            +
                end
         
     | 
| 
      
 29 
     | 
    
         
            +
              end
         
     | 
| 
      
 30 
     | 
    
         
            +
             
     | 
| 
      
 31 
     | 
    
         
            +
              def test_parent
         
     | 
| 
      
 32 
     | 
    
         
            +
                str = "<html><base href='/'><head><title>Test</title></head><body><div id='wrap'><p>Paragraph one.</p><p>Paragraph two.</p></div></body></html>"
         
     | 
| 
      
 33 
     | 
    
         
            +
                assert_html str, str
         
     | 
| 
      
 34 
     | 
    
         
            +
                assert_html "<html><base href='/'><body><div id=\"all\"><div><p>Paragraph one.</p></div><div><p>Paragraph two.</p></div></div></body></html>", str do |doc|
         
     | 
| 
      
 35 
     | 
    
         
            +
                  (doc/:head).remove
         
     | 
| 
      
 36 
     | 
    
         
            +
                  (doc/:div).set('id', 'all')
         
     | 
| 
      
 37 
     | 
    
         
            +
                  (doc/:p).wrap('<div></div>')
         
     | 
| 
      
 38 
     | 
    
         
            +
                end
         
     | 
| 
      
 39 
     | 
    
         
            +
              end
         
     | 
| 
      
 40 
     | 
    
         
            +
             
     | 
| 
      
 41 
     | 
    
         
            +
              def test_files
         
     | 
| 
      
 42 
     | 
    
         
            +
                assert_roundtrip TestFiles::BASIC
         
     | 
| 
      
 43 
     | 
    
         
            +
                assert_roundtrip TestFiles::BOINGBOING
         
     | 
| 
      
 44 
     | 
    
         
            +
                assert_roundtrip TestFiles::CY0
         
     | 
| 
      
 45 
     | 
    
         
            +
              end
         
     | 
| 
      
 46 
     | 
    
         
            +
            end
         
     | 
    
        data/test/test_xml.rb
    ADDED
    
    | 
         @@ -0,0 +1,15 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            #!/usr/bin/env ruby
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            require 'test/unit'
         
     | 
| 
      
 4 
     | 
    
         
            +
            require 'hpricot'
         
     | 
| 
      
 5 
     | 
    
         
            +
            require 'load_files'
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
            class TestParser < Test::Unit::TestCase
         
     | 
| 
      
 8 
     | 
    
         
            +
              # normally, the link tags are empty HTML tags.
         
     | 
| 
      
 9 
     | 
    
         
            +
              # contributed by laudney.
         
     | 
| 
      
 10 
     | 
    
         
            +
              def test_normally_empty
         
     | 
| 
      
 11 
     | 
    
         
            +
                doc = Hpricot::XML("<rss><channel><title>this is title</title><link>http://fake.com</link></channel></rss>")
         
     | 
| 
      
 12 
     | 
    
         
            +
                assert_equal "this is title", (doc/:rss/:channel/:title).text
         
     | 
| 
      
 13 
     | 
    
         
            +
                assert_equal "http://fake.com", (doc/:rss/:channel/:link).text
         
     | 
| 
      
 14 
     | 
    
         
            +
              end
         
     | 
| 
      
 15 
     | 
    
         
            +
            end
         
     | 
    
        metadata
    CHANGED
    
    | 
         @@ -3,11 +3,11 @@ rubygems_version: 0.9.0 
     | 
|
| 
       3 
3 
     | 
    
         
             
            specification_version: 1
         
     | 
| 
       4 
4 
     | 
    
         
             
            name: hpricot
         
     | 
| 
       5 
5 
     | 
    
         
             
            version: !ruby/object:Gem::Version 
         
     | 
| 
       6 
     | 
    
         
            -
              version: "0. 
     | 
| 
       7 
     | 
    
         
            -
            date:  
     | 
| 
      
 6 
     | 
    
         
            +
              version: "0.5"
         
     | 
| 
      
 7 
     | 
    
         
            +
            date: 2007-01-31 00:00:00 -08:00
         
     | 
| 
       8 
8 
     | 
    
         
             
            summary: a swift, liberal HTML parser with a fantastic library
         
     | 
| 
       9 
9 
     | 
    
         
             
            require_paths: 
         
     | 
| 
       10 
     | 
    
         
            -
             
     | 
| 
      
 10 
     | 
    
         
            +
            - lib
         
     | 
| 
       11 
11 
     | 
    
         
             
            email: why@ruby-lang.org
         
     | 
| 
       12 
12 
     | 
    
         
             
            homepage: http://code.whytheluckystiff.net/hpricot/
         
     | 
| 
       13 
13 
     | 
    
         
             
            rubyforge_project: 
         
     | 
| 
         @@ -18,50 +18,56 @@ bindir: bin 
     | 
|
| 
       18 
18 
     | 
    
         
             
            has_rdoc: false
         
     | 
| 
       19 
19 
     | 
    
         
             
            required_ruby_version: !ruby/object:Gem::Version::Requirement 
         
     | 
| 
       20 
20 
     | 
    
         
             
              requirements: 
         
     | 
| 
       21 
     | 
    
         
            -
             
     | 
| 
       22 
     | 
    
         
            -
             
     | 
| 
       23 
     | 
    
         
            -
             
     | 
| 
      
 21 
     | 
    
         
            +
              - - ">"
         
     | 
| 
      
 22 
     | 
    
         
            +
                - !ruby/object:Gem::Version 
         
     | 
| 
      
 23 
     | 
    
         
            +
                  version: 0.0.0
         
     | 
| 
       24 
24 
     | 
    
         
             
              version: 
         
     | 
| 
       25 
25 
     | 
    
         
             
            platform: mswin32
         
     | 
| 
       26 
26 
     | 
    
         
             
            signing_key: 
         
     | 
| 
       27 
27 
     | 
    
         
             
            cert_chain: 
         
     | 
| 
       28 
28 
     | 
    
         
             
            post_install_message: 
         
     | 
| 
       29 
29 
     | 
    
         
             
            authors: 
         
     | 
| 
       30 
     | 
    
         
            -
             
     | 
| 
      
 30 
     | 
    
         
            +
            - why the lucky stiff
         
     | 
| 
       31 
31 
     | 
    
         
             
            files: 
         
     | 
| 
       32 
     | 
    
         
            -
             
     | 
| 
       33 
     | 
    
         
            -
             
     | 
| 
       34 
     | 
    
         
            -
             
     | 
| 
       35 
     | 
    
         
            -
             
     | 
| 
       36 
     | 
    
         
            -
             
     | 
| 
       37 
     | 
    
         
            -
             
     | 
| 
       38 
     | 
    
         
            -
             
     | 
| 
       39 
     | 
    
         
            -
             
     | 
| 
       40 
     | 
    
         
            -
             
     | 
| 
       41 
     | 
    
         
            -
             
     | 
| 
       42 
     | 
    
         
            -
             
     | 
| 
       43 
     | 
    
         
            -
             
     | 
| 
       44 
     | 
    
         
            -
             
     | 
| 
       45 
     | 
    
         
            -
             
     | 
| 
       46 
     | 
    
         
            -
             
     | 
| 
       47 
     | 
    
         
            -
             
     | 
| 
       48 
     | 
    
         
            -
             
     | 
| 
       49 
     | 
    
         
            -
             
     | 
| 
       50 
     | 
    
         
            -
             
     | 
| 
       51 
     | 
    
         
            -
             
     | 
| 
       52 
     | 
    
         
            -
             
     | 
| 
       53 
     | 
    
         
            -
             
     | 
| 
       54 
     | 
    
         
            -
             
     | 
| 
       55 
     | 
    
         
            -
             
     | 
| 
       56 
     | 
    
         
            -
             
     | 
| 
      
 32 
     | 
    
         
            +
            - test/test_preserved.rb
         
     | 
| 
      
 33 
     | 
    
         
            +
            - test/test_paths.rb
         
     | 
| 
      
 34 
     | 
    
         
            +
            - test/load_files.rb
         
     | 
| 
      
 35 
     | 
    
         
            +
            - test/test_xml.rb
         
     | 
| 
      
 36 
     | 
    
         
            +
            - test/test_parser.rb
         
     | 
| 
      
 37 
     | 
    
         
            +
            - test/files/boingboing.html
         
     | 
| 
      
 38 
     | 
    
         
            +
            - test/files/uswebgen.html
         
     | 
| 
      
 39 
     | 
    
         
            +
            - test/files/immob.html
         
     | 
| 
      
 40 
     | 
    
         
            +
            - test/files/week9.html
         
     | 
| 
      
 41 
     | 
    
         
            +
            - test/files/utf8.html
         
     | 
| 
      
 42 
     | 
    
         
            +
            - test/files/cy0.html
         
     | 
| 
      
 43 
     | 
    
         
            +
            - test/files/basic.xhtml
         
     | 
| 
      
 44 
     | 
    
         
            +
            - lib/hpricot.rb
         
     | 
| 
      
 45 
     | 
    
         
            +
            - lib/hpricot/htmlinfo.rb
         
     | 
| 
      
 46 
     | 
    
         
            +
            - lib/hpricot/text.rb
         
     | 
| 
      
 47 
     | 
    
         
            +
            - lib/hpricot/inspect.rb
         
     | 
| 
      
 48 
     | 
    
         
            +
            - lib/hpricot/modules.rb
         
     | 
| 
      
 49 
     | 
    
         
            +
            - lib/hpricot/parse.rb
         
     | 
| 
      
 50 
     | 
    
         
            +
            - lib/hpricot/tag.rb
         
     | 
| 
      
 51 
     | 
    
         
            +
            - lib/hpricot/traverse.rb
         
     | 
| 
      
 52 
     | 
    
         
            +
            - lib/hpricot/elements.rb
         
     | 
| 
      
 53 
     | 
    
         
            +
            - ext/hpricot_scan/hpricot_scan.c
         
     | 
| 
      
 54 
     | 
    
         
            +
            - ext/hpricot_scan/extconf.rb
         
     | 
| 
      
 55 
     | 
    
         
            +
            - ext/hpricot_scan/hpricot_scan.h
         
     | 
| 
      
 56 
     | 
    
         
            +
            - ext/hpricot_scan/hpricot_scan.rl
         
     | 
| 
      
 57 
     | 
    
         
            +
            - CHANGELOG
         
     | 
| 
      
 58 
     | 
    
         
            +
            - README
         
     | 
| 
      
 59 
     | 
    
         
            +
            - Rakefile
         
     | 
| 
      
 60 
     | 
    
         
            +
            - COPYING
         
     | 
| 
      
 61 
     | 
    
         
            +
            - extras/mingw-rbconfig.rb
         
     | 
| 
      
 62 
     | 
    
         
            +
            - lib/hpricot_scan.so
         
     | 
| 
       57 
63 
     | 
    
         
             
            test_files: []
         
     | 
| 
       58 
64 
     | 
    
         | 
| 
       59 
65 
     | 
    
         
             
            rdoc_options: []
         
     | 
| 
       60 
66 
     | 
    
         | 
| 
       61 
67 
     | 
    
         
             
            extra_rdoc_files: 
         
     | 
| 
       62 
     | 
    
         
            -
             
     | 
| 
       63 
     | 
    
         
            -
             
     | 
| 
       64 
     | 
    
         
            -
             
     | 
| 
      
 68 
     | 
    
         
            +
            - README
         
     | 
| 
      
 69 
     | 
    
         
            +
            - CHANGELOG
         
     | 
| 
      
 70 
     | 
    
         
            +
            - COPYING
         
     | 
| 
       65 
71 
     | 
    
         
             
            executables: []
         
     | 
| 
       66 
72 
     | 
    
         | 
| 
       67 
73 
     | 
    
         
             
            extensions: []
         
     |