hpricot 0.4-mswin32 → 0.5-mswin32

Sign up to get free protection for your applications and to get access to all the features.
@@ -5,49 +5,57 @@ require 'hpricot'
5
5
  require 'load_files'
6
6
 
7
7
  class TestParser < Test::Unit::TestCase
8
- def setup
8
+ def test_set_attr
9
9
  @basic = Hpricot.parse(TestFiles::BASIC)
10
- @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
11
- @immob = Hpricot.parse(TestFiles::IMMOB)
12
- @uswebgen = Hpricot.parse(TestFiles::USWEBGEN)
13
- # @utf8 = Hpricot.parse(TestFiles::UTF8)
10
+ @basic.search('//p').set('class', 'para')
11
+ assert_equal 4, @basic.search('//p').length
12
+ assert_equal 4, @basic.search('//p').find_all { |x| x['class'] == 'para' }.length
14
13
  end
15
14
 
16
- # def test_set_attr
17
- # @basic.search('//p').set('class', 'para')
18
- # assert_equal '', @basic.search('//p').map { |x| x.attributes }
19
- # end
15
+ # Test creating a new element
16
+ def test_new_element
17
+ elem = Hpricot::Elem.new(Hpricot::STag.new('form'))
18
+ assert_not_nil(elem)
19
+ assert_not_nil(elem.attributes)
20
+ end
20
21
 
21
22
  def test_scan_text
22
23
  assert_equal 'FOO', Hpricot.make("FOO").first.content
23
24
  end
24
25
 
25
26
  def test_get_element_by_id
27
+ @basic = Hpricot.parse(TestFiles::BASIC)
26
28
  assert_equal 'link1', @basic.get_element_by_id('link1')['id']
27
29
  assert_equal 'link1', @basic.get_element_by_id('body1').get_element_by_id('link1').get_attribute('id')
28
30
  end
29
31
 
30
32
  def test_get_element_by_tag_name
33
+ @basic = Hpricot.parse(TestFiles::BASIC)
31
34
  assert_equal 'link1', @basic.get_elements_by_tag_name('a')[0].get_attribute('id')
32
35
  assert_equal 'link1', @basic.get_elements_by_tag_name('body')[0].get_element_by_id('link1').get_attribute('id')
33
36
  end
34
37
 
35
38
  def test_output_basic
39
+ @basic = Hpricot.parse(TestFiles::BASIC)
36
40
  @basic2 = Hpricot.parse(@basic.inner_html)
37
41
  scan_basic @basic2
38
42
  end
39
43
 
40
44
  def test_scan_basic
45
+ @basic = Hpricot.parse(TestFiles::BASIC)
41
46
  scan_basic @basic
42
47
  end
43
48
 
44
49
  def scan_basic doc
50
+ assert_kind_of Hpricot::XMLDecl, doc.children.first
51
+ assert_not_equal doc.children.first.to_s, doc.children[1].to_s
45
52
  assert_equal 'link1', doc.at('#link1')['id']
46
53
  assert_equal 'link1', doc.at("p a")['id']
47
54
  assert_equal 'link1', (doc/:p/:a).first['id']
48
55
  assert_equal 'link1', doc.search('p').at('a').get_attribute('id')
49
56
  assert_equal 'link2', (doc/'p').filter('.ohmy').search('a').first.get_attribute('id')
50
57
  assert_equal (doc/'p')[2], (doc/'p').filter(':nth(2)')[0]
58
+ assert_equal (doc/'p')[2], (doc/'p').filter('[3]')[0]
51
59
  assert_equal 4, (doc/'p').filter('*').length
52
60
  assert_equal 4, (doc/'p').filter('* *').length
53
61
  eles = (doc/'p').filter('.ohmy')
@@ -64,23 +72,65 @@ class TestParser < Test::Unit::TestCase
64
72
  assert_equal 2, (doc/'p / a').length
65
73
  assert_equal 2, (doc/'link ~ link').length
66
74
  assert_equal 3, (doc/'title ~ link').length
75
+ assert_equal 5, (doc/"//p/text()").length
76
+ assert_equal 6, (doc/"//p[a]//text()").length
77
+ assert_equal 2, (doc/"//p/a/text()").length
78
+ end
79
+
80
+ def test_positional
81
+ h = Hpricot( "<div><br/><p>one</p><p>two</p></div>" )
82
+ assert_equal "<p>one</p>", h.search("//div/p:eq(0)").to_s
83
+ assert_equal "<p>one</p>", h.search("//div/p:first").to_s
84
+ assert_equal "<p>one</p>", h.search("//div/p:first()").to_s
67
85
  end
68
86
 
69
87
  def test_scan_boingboing
88
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
70
89
  assert_equal 60, (@boingboing/'p.posted').length
71
90
  assert_equal 1, @boingboing.search("//a[@name='027906']").length
91
+ assert_equal 10, @boingboing.search("script comment()").length
92
+ assert_equal 3, @boingboing.search("a[text()*='Boing']").length
93
+ assert_equal 1, @boingboing.search("h3[text()='College kids reportedly taking more smart drugs']").length
94
+ assert_equal 0, @boingboing.search("h3[text()='College']").length
95
+ assert_equal 60, @boingboing.search("h3").length
96
+ assert_equal 59, @boingboing.search("h3[text()!='College kids reportedly taking more smart drugs']").length
97
+ assert_equal 17, @boingboing.search("h3[text()$='s']").length
98
+ assert_equal 128, @boingboing.search("p[text()]").length
99
+ assert_equal 211, @boingboing.search("p").length
100
+ end
101
+
102
+ def test_reparent
103
+ doc = Hpricot(%{<div id="blurb_1"></div>})
104
+ div1 = doc.search('#blurb_1')
105
+ div1.before('<div id="blurb_0"></div>')
106
+
107
+ div0 = doc.search('#blurb_0')
108
+ div0.before('<div id="blurb_a"></div>')
109
+
110
+ assert_equal 'div', doc.at('#blurb_1').name
111
+ end
112
+
113
+ def test_siblings
114
+ @basic = Hpricot.parse(TestFiles::BASIC)
115
+ t = @basic.at(:title)
116
+ e = t.next_sibling
117
+ assert_equal 'test1.css', e['href']
118
+ assert_equal 'title', e.previous_sibling.name
72
119
  end
73
120
 
74
121
  def test_css_negation
122
+ @basic = Hpricot.parse(TestFiles::BASIC)
75
123
  assert_equal 3, (@basic/'p:not(.final)').length
76
124
  end
77
125
 
78
126
  def test_remove_attribute
127
+ @basic = Hpricot.parse(TestFiles::BASIC)
79
128
  (@basic/:p).each { |ele| ele.remove_attribute('class') }
80
129
  assert_equal 0, (@basic/'p[@class]').length
81
130
  end
82
131
 
83
132
  def test_abs_xpath
133
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
84
134
  assert_equal 60, @boingboing.search("/html/body//p[@class='posted']").length
85
135
  assert_equal 60, @boingboing.search("/*/body//p[@class='posted']").length
86
136
  assert_equal 18, @boingboing.search("//script").length
@@ -94,6 +144,7 @@ class TestParser < Test::Unit::TestCase
94
144
  end
95
145
 
96
146
  def test_predicates
147
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
97
148
  assert_equal 2, @boingboing.search('//link[@rel="alternate"]').length
98
149
  p_imgs = @boingboing.search('//div/p[/a/img]')
99
150
  assert_equal 15, p_imgs.length
@@ -105,7 +156,10 @@ class TestParser < Test::Unit::TestCase
105
156
  end
106
157
 
107
158
  def test_alt_predicates
159
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
108
160
  assert_equal 2, @boingboing.search('//table/tr:last').length
161
+
162
+ @basic = Hpricot.parse(TestFiles::BASIC)
109
163
  assert_equal "<p>The third paragraph</p>",
110
164
  @basic.search('p:eq(2)').to_html
111
165
  assert_equal '<p class="last final"><b>THE FINAL PARAGRAPH</b></p>',
@@ -114,11 +168,43 @@ class TestParser < Test::Unit::TestCase
114
168
  end
115
169
 
116
170
  def test_many_paths
171
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
117
172
  assert_equal 62, @boingboing.search('p.posted, link[@rel="alternate"]').length
118
173
  assert_equal 20, @boingboing.search('//div/p[a/img]|//link[@rel="alternate"]').length
119
174
  end
120
175
 
176
+ def test_stacked_search
177
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
178
+ assert_kind_of Hpricot::Elements, @boingboing.search('//div/p').search('a img')
179
+ end
180
+
181
+ def test_class_search
182
+ # test case sent by Chih-Chao Lam
183
+ doc = Hpricot("<div class=xyz'>abc</div>")
184
+ assert_equal 1, doc.search(".xyz").length
185
+ doc = Hpricot("<div class=xyz>abc</div><div class=abc>xyz</div>")
186
+ assert_equal 1, doc.search(".xyz").length
187
+ assert_equal 4, doc.search("*").length
188
+ end
189
+
190
+ def test_kleene_star
191
+ # bug noticed by raja bhatia
192
+ doc = Hpricot("<span class='small'>1</span><div class='large'>2</div><div class='small'>3</div><span class='blue large'>4</span>")
193
+ assert_equal 2, doc.search("*[@class*='small']").length
194
+ assert_equal 2, doc.search("*.small").length
195
+ assert_equal 2, doc.search(".small").length
196
+ assert_equal 2, doc.search(".large").length
197
+ end
198
+
199
+ def test_empty_comment
200
+ doc = Hpricot("<p><!----></p>")
201
+ assert doc.children[0].children[0].comment?
202
+ doc = Hpricot("<p><!-- --></p>")
203
+ assert doc.children[0].children[0].comment?
204
+ end
205
+
121
206
  def test_body_newlines
207
+ @immob = Hpricot.parse(TestFiles::IMMOB)
122
208
  body = @immob.at(:body)
123
209
  {'background' => '', 'bgcolor' => '#ffffff', 'text' => '#000000', 'marginheight' => '10',
124
210
  'marginwidth' => '10', 'leftmargin' => '10', 'topmargin' => '10', 'link' => '#000066',
@@ -127,15 +213,79 @@ class TestParser < Test::Unit::TestCase
127
213
  end
128
214
  end
129
215
 
216
+ def test_nested_twins
217
+ @doc = Hpricot("<div>Hi<div>there</div></div>")
218
+ assert_equal 1, (@doc/"div div").length
219
+ end
220
+
221
+ def test_wildcard
222
+ @basic = Hpricot.parse(TestFiles::BASIC)
223
+ assert_equal 3, (@basic/"*[@id]").length
224
+ assert_equal 3, (@basic/"//*[@id]").length
225
+ end
226
+
130
227
  def test_javascripts
228
+ @immob = Hpricot.parse(TestFiles::IMMOB)
131
229
  assert_equal 3, (@immob/:script)[0].inner_html.scan(/<LINK/).length
132
230
  end
133
231
 
232
+ def test_nested_scripts
233
+ @week9 = Hpricot.parse(TestFiles::WEEK9)
234
+ assert_equal 14, (@week9/"a").find_all { |x| x.inner_html.include? "GameCenter" }.length
235
+ end
236
+
134
237
  def test_uswebgen
238
+ @uswebgen = Hpricot.parse(TestFiles::USWEBGEN)
135
239
  # sent by brent beardsley, hpricot 0.3 had problems with all the links.
136
240
  assert_equal 67, (@uswebgen/:a).length
137
241
  end
138
242
 
139
- def test_unicode
243
+ def test_mangled_tags
244
+ [%{<html><form name='loginForm' method='post' action='/units/a/login/1,13088,779-1,00.html'?URL=></form></html>},
245
+ %{<html><form name='loginForm' ?URL= method='post' action='/units/a/login/1,13088,779-1,00.html'></form></html>},
246
+ %{<html><form name='loginForm'?URL= ?URL= method='post' action='/units/a/login/1,13088,779-1,00.html'?URL=></form></html>},
247
+ %{<html><form name='loginForm' method='post' action='/units/a/login/1,13088,779-1,00.html' ?URL=></form></html>}].
248
+ each do |str|
249
+ doc = Hpricot(str)
250
+ assert_equal 1, (doc/:form).length
251
+ assert_equal '/units/a/login/1,13088,779-1,00.html', doc.at("form")['action']
252
+ end
253
+ end
254
+
255
+ def test_procins
256
+ doc = Hpricot("<?php print('hello') ?>\n<?xml blah='blah'?>")
257
+ assert_equal "php", doc.children[0].target
258
+ assert_equal "blah='blah'", doc.children[2].content
259
+ end
260
+
261
+ def test_buffer_error
262
+ assert_raise Hpricot::ParseError, "ran out of buffer space on element <input>, starting on line 3." do
263
+ Hpricot(%{<p>\n\n<input type="hidden" name="__VIEWSTATE" value="#{(("X" * 2000) + "\n") * 22}" />\n\n</p>})
264
+ end
265
+ end
266
+
267
+ def test_filters
268
+ @basic = Hpricot.parse(TestFiles::BASIC)
269
+ assert_equal 0, (@basic/"title:parent").size
270
+ assert_equal 3, (@basic/"p:parent").size
271
+ assert_equal 1, (@basic/"title:empty").size
272
+ assert_equal 1, (@basic/"p:empty").size
273
+ end
274
+
275
+ def test_keep_cdata
276
+ str = %{<script> /*<![CDATA[*/
277
+ /*]]>*/ </script>}
278
+ assert_equal str, Hpricot(str).to_html
279
+ end
280
+
281
+ def test_namespace
282
+ chunk = <<-END
283
+ <a xmlns:t="http://www.nexopia.com/dev/template">
284
+ <t:sam>hi </t:sam>
285
+ </a>
286
+ END
287
+ doc = Hpricot::XML(chunk)
288
+ assert (doc/"//t:sam").size > 0 # at least this should probably work
289
+ # assert (doc/"//sam").size > 0 # this would be nice
140
290
  end
141
291
  end
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'test/unit'
4
+ require 'hpricot'
5
+ require 'load_files'
6
+
7
+ class TestParser < Test::Unit::TestCase
8
+ def test_roundtrip
9
+ @basic = Hpricot.parse(TestFiles::BASIC)
10
+ %w[link link[2] body #link1 a p.ohmy].each do |css_sel|
11
+ ele = @basic.at(css_sel)
12
+ assert_equal ele, @basic.at(ele.css_path)
13
+ assert_equal ele, @basic.at(ele.xpath)
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,46 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'test/unit'
4
+ require 'hpricot'
5
+ require 'load_files'
6
+
7
+ class TestPreserved < Test::Unit::TestCase
8
+ def assert_roundtrip str
9
+ doc = Hpricot(str)
10
+ yield doc if block_given?
11
+ str2 = doc.to_original_html
12
+ [*str].zip([*str2]).each do |s1, s2|
13
+ assert_equal s1, s2
14
+ end
15
+ end
16
+
17
+ def assert_html str1, str2
18
+ doc = Hpricot(str2)
19
+ yield doc if block_given?
20
+ assert_equal str1, doc.to_original_html
21
+ end
22
+
23
+ def test_simple
24
+ str = "<p>Hpricot is a <b>you know <i>uh</b> fine thing.</p>"
25
+ assert_html str, str
26
+ assert_html "<p class=\"new\">Hpricot is a <b>you know <i>uh</b> fine thing.</p>", str do |doc|
27
+ (doc/:p).set('class', 'new')
28
+ end
29
+ end
30
+
31
+ def test_parent
32
+ str = "<html><base href='/'><head><title>Test</title></head><body><div id='wrap'><p>Paragraph one.</p><p>Paragraph two.</p></div></body></html>"
33
+ assert_html str, str
34
+ assert_html "<html><base href='/'><body><div id=\"all\"><div><p>Paragraph one.</p></div><div><p>Paragraph two.</p></div></div></body></html>", str do |doc|
35
+ (doc/:head).remove
36
+ (doc/:div).set('id', 'all')
37
+ (doc/:p).wrap('<div></div>')
38
+ end
39
+ end
40
+
41
+ def test_files
42
+ assert_roundtrip TestFiles::BASIC
43
+ assert_roundtrip TestFiles::BOINGBOING
44
+ assert_roundtrip TestFiles::CY0
45
+ end
46
+ end
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'test/unit'
4
+ require 'hpricot'
5
+ require 'load_files'
6
+
7
+ class TestParser < Test::Unit::TestCase
8
+ # normally, the link tags are empty HTML tags.
9
+ # contributed by laudney.
10
+ def test_normally_empty
11
+ doc = Hpricot::XML("<rss><channel><title>this is title</title><link>http://fake.com</link></channel></rss>")
12
+ assert_equal "this is title", (doc/:rss/:channel/:title).text
13
+ assert_equal "http://fake.com", (doc/:rss/:channel/:link).text
14
+ end
15
+ end
metadata CHANGED
@@ -3,11 +3,11 @@ rubygems_version: 0.9.0
3
3
  specification_version: 1
4
4
  name: hpricot
5
5
  version: !ruby/object:Gem::Version
6
- version: "0.4"
7
- date: 2006-08-11 00:00:00 -06:00
6
+ version: "0.5"
7
+ date: 2007-01-31 00:00:00 -08:00
8
8
  summary: a swift, liberal HTML parser with a fantastic library
9
9
  require_paths:
10
- - lib
10
+ - lib
11
11
  email: why@ruby-lang.org
12
12
  homepage: http://code.whytheluckystiff.net/hpricot/
13
13
  rubyforge_project:
@@ -18,50 +18,56 @@ bindir: bin
18
18
  has_rdoc: false
19
19
  required_ruby_version: !ruby/object:Gem::Version::Requirement
20
20
  requirements:
21
- - - ">"
22
- - !ruby/object:Gem::Version
23
- version: 0.0.0
21
+ - - ">"
22
+ - !ruby/object:Gem::Version
23
+ version: 0.0.0
24
24
  version:
25
25
  platform: mswin32
26
26
  signing_key:
27
27
  cert_chain:
28
28
  post_install_message:
29
29
  authors:
30
- - why the lucky stiff
30
+ - why the lucky stiff
31
31
  files:
32
- - test/load_files.rb
33
- - test/test_parser.rb
34
- - test/files/uswebgen.html
35
- - test/files/boingboing.html
36
- - test/files/immob.html
37
- - test/files/basic.xhtml
38
- - lib/hpricot.rb
39
- - lib/hpricot/htmlinfo.rb
40
- - lib/hpricot/text.rb
41
- - lib/hpricot/inspect.rb
42
- - lib/hpricot/modules.rb
43
- - lib/hpricot/parse.rb
44
- - lib/hpricot/tag.rb
45
- - lib/hpricot/traverse.rb
46
- - lib/hpricot/elements.rb
47
- - ext/hpricot_scan/hpricot_scan.c
48
- - ext/hpricot_scan/extconf.rb
49
- - ext/hpricot_scan/hpricot_scan.h
50
- - ext/hpricot_scan/hpricot_scan.rl
51
- - CHANGELOG
52
- - README
53
- - Rakefile
54
- - COPYING
55
- - extras/mingw-rbconfig.rb
56
- - lib/hpricot_scan.so
32
+ - test/test_preserved.rb
33
+ - test/test_paths.rb
34
+ - test/load_files.rb
35
+ - test/test_xml.rb
36
+ - test/test_parser.rb
37
+ - test/files/boingboing.html
38
+ - test/files/uswebgen.html
39
+ - test/files/immob.html
40
+ - test/files/week9.html
41
+ - test/files/utf8.html
42
+ - test/files/cy0.html
43
+ - test/files/basic.xhtml
44
+ - lib/hpricot.rb
45
+ - lib/hpricot/htmlinfo.rb
46
+ - lib/hpricot/text.rb
47
+ - lib/hpricot/inspect.rb
48
+ - lib/hpricot/modules.rb
49
+ - lib/hpricot/parse.rb
50
+ - lib/hpricot/tag.rb
51
+ - lib/hpricot/traverse.rb
52
+ - lib/hpricot/elements.rb
53
+ - ext/hpricot_scan/hpricot_scan.c
54
+ - ext/hpricot_scan/extconf.rb
55
+ - ext/hpricot_scan/hpricot_scan.h
56
+ - ext/hpricot_scan/hpricot_scan.rl
57
+ - CHANGELOG
58
+ - README
59
+ - Rakefile
60
+ - COPYING
61
+ - extras/mingw-rbconfig.rb
62
+ - lib/hpricot_scan.so
57
63
  test_files: []
58
64
 
59
65
  rdoc_options: []
60
66
 
61
67
  extra_rdoc_files:
62
- - README
63
- - CHANGELOG
64
- - COPYING
68
+ - README
69
+ - CHANGELOG
70
+ - COPYING
65
71
  executables: []
66
72
 
67
73
  extensions: []