hpricot 0.4-mswin32 → 0.5-mswin32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,49 +5,57 @@ require 'hpricot'
5
5
  require 'load_files'
6
6
 
7
7
  class TestParser < Test::Unit::TestCase
8
- def setup
8
+ def test_set_attr
9
9
  @basic = Hpricot.parse(TestFiles::BASIC)
10
- @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
11
- @immob = Hpricot.parse(TestFiles::IMMOB)
12
- @uswebgen = Hpricot.parse(TestFiles::USWEBGEN)
13
- # @utf8 = Hpricot.parse(TestFiles::UTF8)
10
+ @basic.search('//p').set('class', 'para')
11
+ assert_equal 4, @basic.search('//p').length
12
+ assert_equal 4, @basic.search('//p').find_all { |x| x['class'] == 'para' }.length
14
13
  end
15
14
 
16
- # def test_set_attr
17
- # @basic.search('//p').set('class', 'para')
18
- # assert_equal '', @basic.search('//p').map { |x| x.attributes }
19
- # end
15
+ # Test creating a new element
16
+ def test_new_element
17
+ elem = Hpricot::Elem.new(Hpricot::STag.new('form'))
18
+ assert_not_nil(elem)
19
+ assert_not_nil(elem.attributes)
20
+ end
20
21
 
21
22
  def test_scan_text
22
23
  assert_equal 'FOO', Hpricot.make("FOO").first.content
23
24
  end
24
25
 
25
26
  def test_get_element_by_id
27
+ @basic = Hpricot.parse(TestFiles::BASIC)
26
28
  assert_equal 'link1', @basic.get_element_by_id('link1')['id']
27
29
  assert_equal 'link1', @basic.get_element_by_id('body1').get_element_by_id('link1').get_attribute('id')
28
30
  end
29
31
 
30
32
  def test_get_element_by_tag_name
33
+ @basic = Hpricot.parse(TestFiles::BASIC)
31
34
  assert_equal 'link1', @basic.get_elements_by_tag_name('a')[0].get_attribute('id')
32
35
  assert_equal 'link1', @basic.get_elements_by_tag_name('body')[0].get_element_by_id('link1').get_attribute('id')
33
36
  end
34
37
 
35
38
  def test_output_basic
39
+ @basic = Hpricot.parse(TestFiles::BASIC)
36
40
  @basic2 = Hpricot.parse(@basic.inner_html)
37
41
  scan_basic @basic2
38
42
  end
39
43
 
40
44
  def test_scan_basic
45
+ @basic = Hpricot.parse(TestFiles::BASIC)
41
46
  scan_basic @basic
42
47
  end
43
48
 
44
49
  def scan_basic doc
50
+ assert_kind_of Hpricot::XMLDecl, doc.children.first
51
+ assert_not_equal doc.children.first.to_s, doc.children[1].to_s
45
52
  assert_equal 'link1', doc.at('#link1')['id']
46
53
  assert_equal 'link1', doc.at("p a")['id']
47
54
  assert_equal 'link1', (doc/:p/:a).first['id']
48
55
  assert_equal 'link1', doc.search('p').at('a').get_attribute('id')
49
56
  assert_equal 'link2', (doc/'p').filter('.ohmy').search('a').first.get_attribute('id')
50
57
  assert_equal (doc/'p')[2], (doc/'p').filter(':nth(2)')[0]
58
+ assert_equal (doc/'p')[2], (doc/'p').filter('[3]')[0]
51
59
  assert_equal 4, (doc/'p').filter('*').length
52
60
  assert_equal 4, (doc/'p').filter('* *').length
53
61
  eles = (doc/'p').filter('.ohmy')
@@ -64,23 +72,65 @@ class TestParser < Test::Unit::TestCase
64
72
  assert_equal 2, (doc/'p / a').length
65
73
  assert_equal 2, (doc/'link ~ link').length
66
74
  assert_equal 3, (doc/'title ~ link').length
75
+ assert_equal 5, (doc/"//p/text()").length
76
+ assert_equal 6, (doc/"//p[a]//text()").length
77
+ assert_equal 2, (doc/"//p/a/text()").length
78
+ end
79
+
80
+ def test_positional
81
+ h = Hpricot( "<div><br/><p>one</p><p>two</p></div>" )
82
+ assert_equal "<p>one</p>", h.search("//div/p:eq(0)").to_s
83
+ assert_equal "<p>one</p>", h.search("//div/p:first").to_s
84
+ assert_equal "<p>one</p>", h.search("//div/p:first()").to_s
67
85
  end
68
86
 
69
87
  def test_scan_boingboing
88
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
70
89
  assert_equal 60, (@boingboing/'p.posted').length
71
90
  assert_equal 1, @boingboing.search("//a[@name='027906']").length
91
+ assert_equal 10, @boingboing.search("script comment()").length
92
+ assert_equal 3, @boingboing.search("a[text()*='Boing']").length
93
+ assert_equal 1, @boingboing.search("h3[text()='College kids reportedly taking more smart drugs']").length
94
+ assert_equal 0, @boingboing.search("h3[text()='College']").length
95
+ assert_equal 60, @boingboing.search("h3").length
96
+ assert_equal 59, @boingboing.search("h3[text()!='College kids reportedly taking more smart drugs']").length
97
+ assert_equal 17, @boingboing.search("h3[text()$='s']").length
98
+ assert_equal 128, @boingboing.search("p[text()]").length
99
+ assert_equal 211, @boingboing.search("p").length
100
+ end
101
+
102
+ def test_reparent
103
+ doc = Hpricot(%{<div id="blurb_1"></div>})
104
+ div1 = doc.search('#blurb_1')
105
+ div1.before('<div id="blurb_0"></div>')
106
+
107
+ div0 = doc.search('#blurb_0')
108
+ div0.before('<div id="blurb_a"></div>')
109
+
110
+ assert_equal 'div', doc.at('#blurb_1').name
111
+ end
112
+
113
+ def test_siblings
114
+ @basic = Hpricot.parse(TestFiles::BASIC)
115
+ t = @basic.at(:title)
116
+ e = t.next_sibling
117
+ assert_equal 'test1.css', e['href']
118
+ assert_equal 'title', e.previous_sibling.name
72
119
  end
73
120
 
74
121
  def test_css_negation
122
+ @basic = Hpricot.parse(TestFiles::BASIC)
75
123
  assert_equal 3, (@basic/'p:not(.final)').length
76
124
  end
77
125
 
78
126
  def test_remove_attribute
127
+ @basic = Hpricot.parse(TestFiles::BASIC)
79
128
  (@basic/:p).each { |ele| ele.remove_attribute('class') }
80
129
  assert_equal 0, (@basic/'p[@class]').length
81
130
  end
82
131
 
83
132
  def test_abs_xpath
133
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
84
134
  assert_equal 60, @boingboing.search("/html/body//p[@class='posted']").length
85
135
  assert_equal 60, @boingboing.search("/*/body//p[@class='posted']").length
86
136
  assert_equal 18, @boingboing.search("//script").length
@@ -94,6 +144,7 @@ class TestParser < Test::Unit::TestCase
94
144
  end
95
145
 
96
146
  def test_predicates
147
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
97
148
  assert_equal 2, @boingboing.search('//link[@rel="alternate"]').length
98
149
  p_imgs = @boingboing.search('//div/p[/a/img]')
99
150
  assert_equal 15, p_imgs.length
@@ -105,7 +156,10 @@ class TestParser < Test::Unit::TestCase
105
156
  end
106
157
 
107
158
  def test_alt_predicates
159
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
108
160
  assert_equal 2, @boingboing.search('//table/tr:last').length
161
+
162
+ @basic = Hpricot.parse(TestFiles::BASIC)
109
163
  assert_equal "<p>The third paragraph</p>",
110
164
  @basic.search('p:eq(2)').to_html
111
165
  assert_equal '<p class="last final"><b>THE FINAL PARAGRAPH</b></p>',
@@ -114,11 +168,43 @@ class TestParser < Test::Unit::TestCase
114
168
  end
115
169
 
116
170
  def test_many_paths
171
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
117
172
  assert_equal 62, @boingboing.search('p.posted, link[@rel="alternate"]').length
118
173
  assert_equal 20, @boingboing.search('//div/p[a/img]|//link[@rel="alternate"]').length
119
174
  end
120
175
 
176
+ def test_stacked_search
177
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
178
+ assert_kind_of Hpricot::Elements, @boingboing.search('//div/p').search('a img')
179
+ end
180
+
181
+ def test_class_search
182
+ # test case sent by Chih-Chao Lam
183
+ doc = Hpricot("<div class=xyz'>abc</div>")
184
+ assert_equal 1, doc.search(".xyz").length
185
+ doc = Hpricot("<div class=xyz>abc</div><div class=abc>xyz</div>")
186
+ assert_equal 1, doc.search(".xyz").length
187
+ assert_equal 4, doc.search("*").length
188
+ end
189
+
190
+ def test_kleene_star
191
+ # bug noticed by raja bhatia
192
+ doc = Hpricot("<span class='small'>1</span><div class='large'>2</div><div class='small'>3</div><span class='blue large'>4</span>")
193
+ assert_equal 2, doc.search("*[@class*='small']").length
194
+ assert_equal 2, doc.search("*.small").length
195
+ assert_equal 2, doc.search(".small").length
196
+ assert_equal 2, doc.search(".large").length
197
+ end
198
+
199
+ def test_empty_comment
200
+ doc = Hpricot("<p><!----></p>")
201
+ assert doc.children[0].children[0].comment?
202
+ doc = Hpricot("<p><!-- --></p>")
203
+ assert doc.children[0].children[0].comment?
204
+ end
205
+
121
206
  def test_body_newlines
207
+ @immob = Hpricot.parse(TestFiles::IMMOB)
122
208
  body = @immob.at(:body)
123
209
  {'background' => '', 'bgcolor' => '#ffffff', 'text' => '#000000', 'marginheight' => '10',
124
210
  'marginwidth' => '10', 'leftmargin' => '10', 'topmargin' => '10', 'link' => '#000066',
@@ -127,15 +213,79 @@ class TestParser < Test::Unit::TestCase
127
213
  end
128
214
  end
129
215
 
216
+ def test_nested_twins
217
+ @doc = Hpricot("<div>Hi<div>there</div></div>")
218
+ assert_equal 1, (@doc/"div div").length
219
+ end
220
+
221
+ def test_wildcard
222
+ @basic = Hpricot.parse(TestFiles::BASIC)
223
+ assert_equal 3, (@basic/"*[@id]").length
224
+ assert_equal 3, (@basic/"//*[@id]").length
225
+ end
226
+
130
227
  def test_javascripts
228
+ @immob = Hpricot.parse(TestFiles::IMMOB)
131
229
  assert_equal 3, (@immob/:script)[0].inner_html.scan(/<LINK/).length
132
230
  end
133
231
 
232
+ def test_nested_scripts
233
+ @week9 = Hpricot.parse(TestFiles::WEEK9)
234
+ assert_equal 14, (@week9/"a").find_all { |x| x.inner_html.include? "GameCenter" }.length
235
+ end
236
+
134
237
  def test_uswebgen
238
+ @uswebgen = Hpricot.parse(TestFiles::USWEBGEN)
135
239
  # sent by brent beardsley, hpricot 0.3 had problems with all the links.
136
240
  assert_equal 67, (@uswebgen/:a).length
137
241
  end
138
242
 
139
- def test_unicode
243
+ def test_mangled_tags
244
+ [%{<html><form name='loginForm' method='post' action='/units/a/login/1,13088,779-1,00.html'?URL=></form></html>},
245
+ %{<html><form name='loginForm' ?URL= method='post' action='/units/a/login/1,13088,779-1,00.html'></form></html>},
246
+ %{<html><form name='loginForm'?URL= ?URL= method='post' action='/units/a/login/1,13088,779-1,00.html'?URL=></form></html>},
247
+ %{<html><form name='loginForm' method='post' action='/units/a/login/1,13088,779-1,00.html' ?URL=></form></html>}].
248
+ each do |str|
249
+ doc = Hpricot(str)
250
+ assert_equal 1, (doc/:form).length
251
+ assert_equal '/units/a/login/1,13088,779-1,00.html', doc.at("form")['action']
252
+ end
253
+ end
254
+
255
+ def test_procins
256
+ doc = Hpricot("<?php print('hello') ?>\n<?xml blah='blah'?>")
257
+ assert_equal "php", doc.children[0].target
258
+ assert_equal "blah='blah'", doc.children[2].content
259
+ end
260
+
261
+ def test_buffer_error
262
+ assert_raise Hpricot::ParseError, "ran out of buffer space on element <input>, starting on line 3." do
263
+ Hpricot(%{<p>\n\n<input type="hidden" name="__VIEWSTATE" value="#{(("X" * 2000) + "\n") * 22}" />\n\n</p>})
264
+ end
265
+ end
266
+
267
+ def test_filters
268
+ @basic = Hpricot.parse(TestFiles::BASIC)
269
+ assert_equal 0, (@basic/"title:parent").size
270
+ assert_equal 3, (@basic/"p:parent").size
271
+ assert_equal 1, (@basic/"title:empty").size
272
+ assert_equal 1, (@basic/"p:empty").size
273
+ end
274
+
275
+ def test_keep_cdata
276
+ str = %{<script> /*<![CDATA[*/
277
+ /*]]>*/ </script>}
278
+ assert_equal str, Hpricot(str).to_html
279
+ end
280
+
281
+ def test_namespace
282
+ chunk = <<-END
283
+ <a xmlns:t="http://www.nexopia.com/dev/template">
284
+ <t:sam>hi </t:sam>
285
+ </a>
286
+ END
287
+ doc = Hpricot::XML(chunk)
288
+ assert (doc/"//t:sam").size > 0 # at least this should probably work
289
+ # assert (doc/"//sam").size > 0 # this would be nice
140
290
  end
141
291
  end
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'test/unit'
4
+ require 'hpricot'
5
+ require 'load_files'
6
+
7
+ class TestParser < Test::Unit::TestCase
8
+ def test_roundtrip
9
+ @basic = Hpricot.parse(TestFiles::BASIC)
10
+ %w[link link[2] body #link1 a p.ohmy].each do |css_sel|
11
+ ele = @basic.at(css_sel)
12
+ assert_equal ele, @basic.at(ele.css_path)
13
+ assert_equal ele, @basic.at(ele.xpath)
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,46 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'test/unit'
4
+ require 'hpricot'
5
+ require 'load_files'
6
+
7
+ class TestPreserved < Test::Unit::TestCase
8
+ def assert_roundtrip str
9
+ doc = Hpricot(str)
10
+ yield doc if block_given?
11
+ str2 = doc.to_original_html
12
+ [*str].zip([*str2]).each do |s1, s2|
13
+ assert_equal s1, s2
14
+ end
15
+ end
16
+
17
+ def assert_html str1, str2
18
+ doc = Hpricot(str2)
19
+ yield doc if block_given?
20
+ assert_equal str1, doc.to_original_html
21
+ end
22
+
23
+ def test_simple
24
+ str = "<p>Hpricot is a <b>you know <i>uh</b> fine thing.</p>"
25
+ assert_html str, str
26
+ assert_html "<p class=\"new\">Hpricot is a <b>you know <i>uh</b> fine thing.</p>", str do |doc|
27
+ (doc/:p).set('class', 'new')
28
+ end
29
+ end
30
+
31
+ def test_parent
32
+ str = "<html><base href='/'><head><title>Test</title></head><body><div id='wrap'><p>Paragraph one.</p><p>Paragraph two.</p></div></body></html>"
33
+ assert_html str, str
34
+ assert_html "<html><base href='/'><body><div id=\"all\"><div><p>Paragraph one.</p></div><div><p>Paragraph two.</p></div></div></body></html>", str do |doc|
35
+ (doc/:head).remove
36
+ (doc/:div).set('id', 'all')
37
+ (doc/:p).wrap('<div></div>')
38
+ end
39
+ end
40
+
41
+ def test_files
42
+ assert_roundtrip TestFiles::BASIC
43
+ assert_roundtrip TestFiles::BOINGBOING
44
+ assert_roundtrip TestFiles::CY0
45
+ end
46
+ end
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'test/unit'
4
+ require 'hpricot'
5
+ require 'load_files'
6
+
7
+ class TestParser < Test::Unit::TestCase
8
+ # normally, the link tags are empty HTML tags.
9
+ # contributed by laudney.
10
+ def test_normally_empty
11
+ doc = Hpricot::XML("<rss><channel><title>this is title</title><link>http://fake.com</link></channel></rss>")
12
+ assert_equal "this is title", (doc/:rss/:channel/:title).text
13
+ assert_equal "http://fake.com", (doc/:rss/:channel/:link).text
14
+ end
15
+ end
metadata CHANGED
@@ -3,11 +3,11 @@ rubygems_version: 0.9.0
3
3
  specification_version: 1
4
4
  name: hpricot
5
5
  version: !ruby/object:Gem::Version
6
- version: "0.4"
7
- date: 2006-08-11 00:00:00 -06:00
6
+ version: "0.5"
7
+ date: 2007-01-31 00:00:00 -08:00
8
8
  summary: a swift, liberal HTML parser with a fantastic library
9
9
  require_paths:
10
- - lib
10
+ - lib
11
11
  email: why@ruby-lang.org
12
12
  homepage: http://code.whytheluckystiff.net/hpricot/
13
13
  rubyforge_project:
@@ -18,50 +18,56 @@ bindir: bin
18
18
  has_rdoc: false
19
19
  required_ruby_version: !ruby/object:Gem::Version::Requirement
20
20
  requirements:
21
- - - ">"
22
- - !ruby/object:Gem::Version
23
- version: 0.0.0
21
+ - - ">"
22
+ - !ruby/object:Gem::Version
23
+ version: 0.0.0
24
24
  version:
25
25
  platform: mswin32
26
26
  signing_key:
27
27
  cert_chain:
28
28
  post_install_message:
29
29
  authors:
30
- - why the lucky stiff
30
+ - why the lucky stiff
31
31
  files:
32
- - test/load_files.rb
33
- - test/test_parser.rb
34
- - test/files/uswebgen.html
35
- - test/files/boingboing.html
36
- - test/files/immob.html
37
- - test/files/basic.xhtml
38
- - lib/hpricot.rb
39
- - lib/hpricot/htmlinfo.rb
40
- - lib/hpricot/text.rb
41
- - lib/hpricot/inspect.rb
42
- - lib/hpricot/modules.rb
43
- - lib/hpricot/parse.rb
44
- - lib/hpricot/tag.rb
45
- - lib/hpricot/traverse.rb
46
- - lib/hpricot/elements.rb
47
- - ext/hpricot_scan/hpricot_scan.c
48
- - ext/hpricot_scan/extconf.rb
49
- - ext/hpricot_scan/hpricot_scan.h
50
- - ext/hpricot_scan/hpricot_scan.rl
51
- - CHANGELOG
52
- - README
53
- - Rakefile
54
- - COPYING
55
- - extras/mingw-rbconfig.rb
56
- - lib/hpricot_scan.so
32
+ - test/test_preserved.rb
33
+ - test/test_paths.rb
34
+ - test/load_files.rb
35
+ - test/test_xml.rb
36
+ - test/test_parser.rb
37
+ - test/files/boingboing.html
38
+ - test/files/uswebgen.html
39
+ - test/files/immob.html
40
+ - test/files/week9.html
41
+ - test/files/utf8.html
42
+ - test/files/cy0.html
43
+ - test/files/basic.xhtml
44
+ - lib/hpricot.rb
45
+ - lib/hpricot/htmlinfo.rb
46
+ - lib/hpricot/text.rb
47
+ - lib/hpricot/inspect.rb
48
+ - lib/hpricot/modules.rb
49
+ - lib/hpricot/parse.rb
50
+ - lib/hpricot/tag.rb
51
+ - lib/hpricot/traverse.rb
52
+ - lib/hpricot/elements.rb
53
+ - ext/hpricot_scan/hpricot_scan.c
54
+ - ext/hpricot_scan/extconf.rb
55
+ - ext/hpricot_scan/hpricot_scan.h
56
+ - ext/hpricot_scan/hpricot_scan.rl
57
+ - CHANGELOG
58
+ - README
59
+ - Rakefile
60
+ - COPYING
61
+ - extras/mingw-rbconfig.rb
62
+ - lib/hpricot_scan.so
57
63
  test_files: []
58
64
 
59
65
  rdoc_options: []
60
66
 
61
67
  extra_rdoc_files:
62
- - README
63
- - CHANGELOG
64
- - COPYING
68
+ - README
69
+ - CHANGELOG
70
+ - COPYING
65
71
  executables: []
66
72
 
67
73
  extensions: []