hpricot 0.6-jruby

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. data/CHANGELOG +62 -0
  2. data/COPYING +18 -0
  3. data/README +284 -0
  4. data/Rakefile +211 -0
  5. data/ext/hpricot_scan/HpricotScanService.java +1340 -0
  6. data/ext/hpricot_scan/extconf.rb +6 -0
  7. data/ext/hpricot_scan/hpricot_common.rl +76 -0
  8. data/ext/hpricot_scan/hpricot_scan.c +5976 -0
  9. data/ext/hpricot_scan/hpricot_scan.h +79 -0
  10. data/ext/hpricot_scan/hpricot_scan.java.rl +363 -0
  11. data/ext/hpricot_scan/hpricot_scan.rl +273 -0
  12. data/extras/mingw-rbconfig.rb +176 -0
  13. data/lib/hpricot.rb +26 -0
  14. data/lib/hpricot/blankslate.rb +63 -0
  15. data/lib/hpricot/builder.rb +200 -0
  16. data/lib/hpricot/elements.rb +510 -0
  17. data/lib/hpricot/htmlinfo.rb +672 -0
  18. data/lib/hpricot/inspect.rb +107 -0
  19. data/lib/hpricot/modules.rb +37 -0
  20. data/lib/hpricot/parse.rb +297 -0
  21. data/lib/hpricot/tag.rb +228 -0
  22. data/lib/hpricot/tags.rb +164 -0
  23. data/lib/hpricot/traverse.rb +821 -0
  24. data/lib/hpricot/xchar.rb +94 -0
  25. data/lib/i686-linux/hpricot_scan.jar +0 -0
  26. data/test/files/basic.xhtml +17 -0
  27. data/test/files/boingboing.html +2266 -0
  28. data/test/files/cy0.html +3653 -0
  29. data/test/files/immob.html +400 -0
  30. data/test/files/pace_application.html +1320 -0
  31. data/test/files/tenderlove.html +16 -0
  32. data/test/files/uswebgen.html +220 -0
  33. data/test/files/utf8.html +1054 -0
  34. data/test/files/week9.html +1723 -0
  35. data/test/files/why.xml +19 -0
  36. data/test/load_files.rb +7 -0
  37. data/test/test_alter.rb +65 -0
  38. data/test/test_builder.rb +24 -0
  39. data/test/test_parser.rb +379 -0
  40. data/test/test_paths.rb +16 -0
  41. data/test/test_preserved.rb +66 -0
  42. data/test/test_xml.rb +28 -0
  43. metadata +98 -0
@@ -0,0 +1,19 @@
1
+ <?xml version='1.0'?><rss xmlns:admin='http://webns.net/mvcb/' version='2.0' xmlns:sy='http://purl.org/rss/1.0/modules/syndication/' xmlns:dc='http://purl.org/dc/elements/1.1/' xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'>
2
+ <channel>
3
+ <title>why the lucky stiff</title>
4
+ <link>http://whytheluckystiff.net</link>
5
+ <description>hex-editing reality to give us infinite grenades!!</description>
6
+ <dc:language>en-us</dc:language>
7
+ <dc:creator/>
8
+ <dc:date>2007-01-16T22:39:04+00:00</dc:date>
9
+ <admin:generatorAgent rdf:resource='http://hobix.com/?v=0.4'/>
10
+ <sy:updatePeriod>hourly</sy:updatePeriod>
11
+ <sy:updateFrequency>1</sy:updateFrequency>
12
+ <sy:updateBase>2000-01-01T12:00+00:00</sy:updateBase>
13
+ <item><title>1.3</title><link>http://whytheluckystiff.net/quatrains/1.3.html</link><guid isPermaLink='false'>quatrains/1.3@http://whytheluckystiff.net</guid><dc:subject>quatrains</dc:subject><dc:subject>quatrains</dc:subject><dc:creator>why the lucky stiff</dc:creator><dc:date>2007-01-14T08:47:05+00:00</dc:date><description>&lt;blockquote&gt;
14
+ &lt;p&gt;That cadillac of yours and that driver of yours!&lt;br /&gt;You and your teacups rattling away in the back seat!&lt;br /&gt;You always took the mike, oh, and all those cowboys you shot!&lt;br /&gt;I held your hand! And I&amp;#8217;ll shoot a cowboy one day!&lt;/p&gt;
15
+ &lt;/blockquote&gt;
16
+ &lt;blockquote&gt;
17
+ &lt;p&gt;You said, &amp;#8220;Let&amp;#8217;s run into the woods like kids!&amp;#8221; &lt;br /&gt;You said, &amp;#8220;Let&amp;#8217;s rub our hands together super-hot!&amp;#8221; &lt;br /&gt;And we scalded the trees and left octagons, I think that was you and&lt;br /&gt;You threw parties on the roof!&lt;/p&gt;
18
+ &lt;/blockquote&gt;</description></item></channel>
19
+ </rss>
@@ -0,0 +1,7 @@
1
+ module TestFiles
2
+ Dir.chdir(File.dirname(__FILE__)) do
3
+ Dir['files/*.{html,xhtml,xml}'].each do |fname|
4
+ const_set fname[%r!/(\w+)\.\w+$!, 1].upcase, IO.read(fname)
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,65 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'test/unit'
4
+ require 'hpricot'
5
+ require 'load_files'
6
+
7
+ class TestAlter < Test::Unit::TestCase
8
+ def setup
9
+ @basic = Hpricot.parse(TestFiles::BASIC)
10
+ end
11
+
12
+ def test_before
13
+ test0 = "<link rel='stylesheet' href='test0.css' />"
14
+ @basic.at("link").before(test0)
15
+ assert_equal 'test0.css', @basic.at("link").attributes['href']
16
+ end
17
+
18
+ def test_after
19
+ test_inf = "<link rel='stylesheet' href='test_inf.css' />"
20
+ @basic.search("link")[-1].after(test_inf)
21
+ assert_equal 'test_inf.css', @basic.search("link")[-1].attributes['href']
22
+ end
23
+
24
+ def test_wrap
25
+ ohmy = (@basic/"p.ohmy").wrap("<div id='wrapper'></div>")
26
+ assert_equal 'wrapper', ohmy[0].parent['id']
27
+ assert_equal 'ohmy', Hpricot(@basic.to_html).at("#wrapper").children[0]['class']
28
+ end
29
+
30
+ def test_add_class
31
+ first_p = (@basic/"p:first").add_class("testing123")
32
+ assert first_p[0].get_attribute("class").split(" ").include?("testing123")
33
+ assert (Hpricot(@basic.to_html)/"p:first")[0].attributes["class"].split(" ").include?("testing123")
34
+ assert !(Hpricot(@basic.to_html)/"p:gt(0)")[0].attributes["class"].split(" ").include?("testing123")
35
+ end
36
+
37
+ def test_change_attributes
38
+ all_ps = (@basic/"p").attr("title", "Some Title")
39
+ all_as = (@basic/"a").attr("href", "http://my_new_href.com")
40
+ all_lb = (@basic/"link").attr("href") { |e| e.name }
41
+ assert_changed(@basic, "p", all_ps) {|p| p.attributes["title"] == "Some Title"}
42
+ assert_changed(@basic, "a", all_as) {|a| a.attributes["href"] == "http://my_new_href.com"}
43
+ assert_changed(@basic, "link", all_lb) {|a| a.attributes["href"] == "link" }
44
+ end
45
+
46
+ def test_remove_attr
47
+ all_rl = (@basic/"link").remove_attr("href")
48
+ assert_changed(@basic, "link", all_rl) { |link| link['href'].nil? }
49
+ end
50
+
51
+ def test_remove_class
52
+ all_c1 = (@basic/"p[@class*='last']").remove_class("last")
53
+ assert_changed(@basic, "p[@class*='last']", all_c1) { |p| p['class'] == 'final' }
54
+ end
55
+
56
+ def test_remove_all_classes
57
+ all_c2 = (@basic/"p[@class]").remove_class
58
+ assert_changed(@basic, "p[@class]", all_c2) { |p| p['class'].nil? }
59
+ end
60
+
61
+ def assert_changed original, selector, set, &block
62
+ assert set.all?(&block)
63
+ assert Hpricot(original.to_html).search(selector).all?(&block)
64
+ end
65
+ end
@@ -0,0 +1,24 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'test/unit'
4
+ require 'hpricot'
5
+
6
+ class TestBuilder < Test::Unit::TestCase
7
+ def test_escaping_text
8
+ doc = Hpricot() { b "<a\"b>" }
9
+ assert_equal "<b>&lt;a&quot;b&gt;</b>", doc.to_html
10
+ assert_equal %{<a"b>}, doc.at("text()").to_s
11
+ end
12
+
13
+ def test_no_escaping_text
14
+ doc = Hpricot() { div.test.me! { text "<a\"b>" } }
15
+ assert_equal %{<div class="test" id="me"><a"b></div>}, doc.to_html
16
+ assert_equal %{<a"b>}, doc.at("text()").to_s
17
+ end
18
+
19
+ def test_latin1_entities
20
+ doc = Hpricot() { b "\200\225" }
21
+ assert_equal "<b>&#8364;&#8226;</b>", doc.to_html
22
+ assert_equal "\342\202\254\342\200\242", doc.at("text()").to_s
23
+ end
24
+ end
@@ -0,0 +1,379 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'test/unit'
4
+ require 'hpricot'
5
+ require 'load_files'
6
+
7
+ class TestParser < Test::Unit::TestCase
8
+ def test_set_attr
9
+ @basic = Hpricot.parse(TestFiles::BASIC)
10
+ @basic.search('//p').set('class', 'para')
11
+ assert_equal 4, @basic.search('//p').length
12
+ assert_equal 4, @basic.search('//p').find_all { |x| x['class'] == 'para' }.length
13
+ end
14
+
15
+ # Test creating a new element
16
+ def test_new_element
17
+ elem = Hpricot::Elem.new(Hpricot::STag.new('form'))
18
+ assert_not_nil(elem)
19
+ assert_not_nil(elem.attributes)
20
+ end
21
+
22
+ def test_scan_text
23
+ assert_equal 'FOO', Hpricot.make("FOO").first.content
24
+ end
25
+
26
+ def test_filter_by_attr
27
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
28
+
29
+ # this link is escaped in the doc
30
+ link = 'http://www.youtube.com/watch?v=TvSNXyNw26g&search=chris%20ware'
31
+ assert_equal link, @boingboing.at("a[@href='#{link}']")['href']
32
+ end
33
+
34
+ def test_filter_contains
35
+ @basic = Hpricot.parse(TestFiles::BASIC)
36
+ assert_equal '<title>Sample XHTML</title>', @basic.search("title:contains('Sample')").to_s
37
+ end
38
+
39
+ def test_get_element_by_id
40
+ @basic = Hpricot.parse(TestFiles::BASIC)
41
+ assert_equal 'link1', @basic.get_element_by_id('link1')['id']
42
+ assert_equal 'link1', @basic.get_element_by_id('body1').get_element_by_id('link1').get_attribute('id')
43
+ end
44
+
45
+ def test_get_element_by_tag_name
46
+ @basic = Hpricot.parse(TestFiles::BASIC)
47
+ assert_equal 'link1', @basic.get_elements_by_tag_name('a')[0].get_attribute('id')
48
+ assert_equal 'link1', @basic.get_elements_by_tag_name('body')[0].get_element_by_id('link1').get_attribute('id')
49
+ end
50
+
51
+ def test_output_basic
52
+ @basic = Hpricot.parse(TestFiles::BASIC)
53
+ @basic2 = Hpricot.parse(@basic.inner_html)
54
+ scan_basic @basic2
55
+ end
56
+
57
+ def test_scan_basic
58
+ @basic = Hpricot.parse(TestFiles::BASIC)
59
+ scan_basic @basic
60
+ end
61
+
62
+ def scan_basic doc
63
+ assert_kind_of Hpricot::XMLDecl, doc.children.first
64
+ assert_not_equal doc.children.first.to_s, doc.children[1].to_s
65
+ assert_equal 'link1', doc.at('#link1')['id']
66
+ assert_equal 'link1', doc.at("p a")['id']
67
+ assert_equal 'link1', (doc/:p/:a).first['id']
68
+ assert_equal 'link1', doc.search('p').at('a').get_attribute('id')
69
+ assert_equal 'link2', (doc/'p').filter('.ohmy').search('a').first.get_attribute('id')
70
+ assert_equal (doc/'p')[2], (doc/'p').filter(':nth(2)')[0]
71
+ assert_equal (doc/'p')[2], (doc/'p').filter('[3]')[0]
72
+ assert_equal 4, (doc/'p').filter('*').length
73
+ assert_equal 4, (doc/'p').filter('* *').length
74
+ eles = (doc/'p').filter('.ohmy')
75
+ assert_equal 1, eles.length
76
+ assert_equal 'ohmy', eles.first.get_attribute('class')
77
+ assert_equal 3, (doc/'p:not(.ohmy)').length
78
+ assert_equal 3, (doc/'p').not('.ohmy').length
79
+ assert_equal 3, (doc/'p').not(eles.first).length
80
+ assert_equal 2, (doc/'p').filter('[@class]').length
81
+ assert_equal 'last final', (doc/'p[@class~="final"]').first.get_attribute('class')
82
+ assert_equal 1, (doc/'p').filter('[@class~="final"]').length
83
+ assert_equal 2, (doc/'p > a').length
84
+ assert_equal 1, (doc/'p.ohmy > a').length
85
+ assert_equal 2, (doc/'p / a').length
86
+ assert_equal 2, (doc/'link ~ link').length
87
+ assert_equal 3, (doc/'title ~ link').length
88
+ assert_equal 5, (doc/"//p/text()").length
89
+ assert_equal 6, (doc/"//p[a]//text()").length
90
+ assert_equal 2, (doc/"//p/a/text()").length
91
+ end
92
+
93
+ def test_positional
94
+ h = Hpricot( "<div><br/><p>one</p><p>two</p></div>" )
95
+ assert_equal "<p>one</p>", h.search("//div/p:eq(0)").to_s
96
+ assert_equal "<p>one</p>", h.search("//div/p:first").to_s
97
+ assert_equal "<p>one</p>", h.search("//div/p:first()").to_s
98
+ end
99
+
100
+ def test_pace
101
+ doc = Hpricot(TestFiles::PACE_APPLICATION)
102
+ assert_equal 'get', doc.at('form[@name=frmSect11]')['method']
103
+ # assert_equal '2', doc.at('#hdnSpouse')['value']
104
+ end
105
+
106
+ def test_scan_boingboing
107
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
108
+ assert_equal 60, (@boingboing/'p.posted').length
109
+ assert_equal 1, @boingboing.search("//a[@name='027906']").length
110
+ assert_equal 10, @boingboing.search("script comment()").length
111
+ assert_equal 3, @boingboing.search("a[text()*='Boing']").length
112
+ assert_equal 1, @boingboing.search("h3[text()='College kids reportedly taking more smart drugs']").length
113
+ assert_equal 0, @boingboing.search("h3[text()='College']").length
114
+ assert_equal 60, @boingboing.search("h3").length
115
+ assert_equal 59, @boingboing.search("h3[text()!='College kids reportedly taking more smart drugs']").length
116
+ assert_equal 17, @boingboing.search("h3[text()$='s']").length
117
+ assert_equal 129, @boingboing.search("p[text()]").length
118
+ assert_equal 211, @boingboing.search("p").length
119
+ end
120
+
121
+ def test_reparent
122
+ doc = Hpricot(%{<div id="blurb_1"></div>})
123
+ div1 = doc.search('#blurb_1')
124
+ div1.before('<div id="blurb_0"></div>')
125
+
126
+ div0 = doc.search('#blurb_0')
127
+ div0.before('<div id="blurb_a"></div>')
128
+
129
+ assert_equal 'div', doc.at('#blurb_1').name
130
+ end
131
+
132
+ def test_siblings
133
+ @basic = Hpricot.parse(TestFiles::BASIC)
134
+ t = @basic.at(:title)
135
+ e = t.next_sibling
136
+ assert_equal 'test1.css', e['href']
137
+ assert_equal 'title', e.previous_sibling.name
138
+ end
139
+
140
+ def test_css_negation
141
+ @basic = Hpricot.parse(TestFiles::BASIC)
142
+ assert_equal 3, (@basic/'p:not(.final)').length
143
+ end
144
+
145
+ def test_remove_attribute
146
+ @basic = Hpricot.parse(TestFiles::BASIC)
147
+ (@basic/:p).each { |ele| ele.remove_attribute('class') }
148
+ assert_equal 0, (@basic/'p[@class]').length
149
+ end
150
+
151
+ def test_abs_xpath
152
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
153
+ assert_equal 60, @boingboing.search("/html/body//p[@class='posted']").length
154
+ assert_equal 60, @boingboing.search("/*/body//p[@class='posted']").length
155
+ assert_equal 18, @boingboing.search("//script").length
156
+ divs = @boingboing.search("//script/../div")
157
+ assert_equal 1, divs.length
158
+ imgs = @boingboing.search('//div/p/a/img')
159
+ assert_equal 15, imgs.length
160
+ assert_equal 17, @boingboing.search('//div').search('p/a/img').length
161
+ assert imgs.all? { |x| x.name == 'img' }
162
+ end
163
+
164
+ def test_predicates
165
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
166
+ assert_equal 2, @boingboing.search('//link[@rel="alternate"]').length
167
+ p_imgs = @boingboing.search('//div/p[/a/img]')
168
+ assert_equal 15, p_imgs.length
169
+ assert p_imgs.all? { |x| x.name == 'p' }
170
+ p_imgs = @boingboing.search('//div/p[a/img]')
171
+ assert_equal 18, p_imgs.length
172
+ assert p_imgs.all? { |x| x.name == 'p' }
173
+ assert_equal 1, @boingboing.search('//input[@checked]').length
174
+ end
175
+
176
+ def test_tag_case
177
+ @tenderlove = Hpricot.parse(TestFiles::TENDERLOVE)
178
+ assert_equal 2, @tenderlove.search('//a').length
179
+ assert_equal 3, @tenderlove.search('//area').length
180
+ assert_equal 2, @tenderlove.search('//meta').length
181
+ end
182
+
183
+ def test_alt_predicates
184
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
185
+ assert_equal 1, @boingboing.search('//table/tr:last').length
186
+
187
+ @basic = Hpricot.parse(TestFiles::BASIC)
188
+ assert_equal "<p>The third paragraph</p>",
189
+ @basic.search('p:eq(2)').to_html
190
+ assert_equal '<p class="last final"><b>THE FINAL PARAGRAPH</b></p>',
191
+ @basic.search('p:last').to_html
192
+ assert_equal 'last final', @basic.search('//p:last-of-type').first.get_attribute('class')
193
+ end
194
+
195
+ def test_insert_after # ticket #63
196
+ doc = Hpricot('<html><body><div id="a-div"></div></body></html>')
197
+ (doc/'div').each do |element|
198
+ element.after('<p>Paragraph 1</p><p>Paragraph 2</p>')
199
+ end
200
+ assert_equal doc.to_html, '<html><body><div id="a-div"></div><p>Paragraph 1</p><p>Paragraph 2</p></body></html>'
201
+ end
202
+
203
+ def test_insert_before # ticket #61
204
+ doc = Hpricot('<html><body><div id="a-div"></div></body></html>')
205
+ (doc/'div').each do |element|
206
+ element.before('<p>Paragraph 1</p><p>Paragraph 2</p>')
207
+ end
208
+ assert_equal doc.to_html, '<html><body><p>Paragraph 1</p><p>Paragraph 2</p><div id="a-div"></div></body></html>'
209
+ end
210
+
211
+ def test_many_paths
212
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
213
+ assert_equal 62, @boingboing.search('p.posted, link[@rel="alternate"]').length
214
+ assert_equal 20, @boingboing.search('//div/p[a/img]|//link[@rel="alternate"]').length
215
+ end
216
+
217
+ def test_stacked_search
218
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
219
+ assert_kind_of Hpricot::Elements, @boingboing.search('//div/p').search('a img')
220
+ end
221
+
222
+ def test_class_search
223
+ # test case sent by Chih-Chao Lam
224
+ doc = Hpricot("<div class=xyz'>abc</div>")
225
+ assert_equal 1, doc.search(".xyz").length
226
+ doc = Hpricot("<div class=xyz>abc</div><div class=abc>xyz</div>")
227
+ assert_equal 1, doc.search(".xyz").length
228
+ assert_equal 4, doc.search("*").length
229
+ end
230
+
231
+ def test_kleene_star
232
+ # bug noticed by raja bhatia
233
+ doc = Hpricot("<span class='small'>1</span><div class='large'>2</div><div class='small'>3</div><span class='blue large'>4</span>")
234
+ assert_equal 2, doc.search("*[@class*='small']").length
235
+ assert_equal 2, doc.search("*.small").length
236
+ assert_equal 2, doc.search(".small").length
237
+ assert_equal 2, doc.search(".large").length
238
+ end
239
+
240
+ def test_empty_comment
241
+ doc = Hpricot("<p><!----></p>")
242
+ assert doc.children[0].children[0].comment?
243
+ doc = Hpricot("<p><!-- --></p>")
244
+ assert doc.children[0].children[0].comment?
245
+ end
246
+
247
+ def test_body_newlines
248
+ @immob = Hpricot.parse(TestFiles::IMMOB)
249
+ body = @immob.at(:body)
250
+ {'background' => '', 'bgcolor' => '#ffffff', 'text' => '#000000', 'marginheight' => '10',
251
+ 'marginwidth' => '10', 'leftmargin' => '10', 'topmargin' => '10', 'link' => '#000066',
252
+ 'alink' => '#ff6600', 'hlink' => "#ff6600", 'vlink' => "#000000"}.each do |k, v|
253
+ assert_equal v, body[k]
254
+ end
255
+ end
256
+
257
+ def test_nested_twins
258
+ @doc = Hpricot("<div>Hi<div>there</div></div>")
259
+ assert_equal 1, (@doc/"div div").length
260
+ end
261
+
262
+ def test_wildcard
263
+ @basic = Hpricot.parse(TestFiles::BASIC)
264
+ assert_equal 3, (@basic/"*[@id]").length
265
+ assert_equal 3, (@basic/"//*[@id]").length
266
+ end
267
+
268
+ def test_javascripts
269
+ @immob = Hpricot.parse(TestFiles::IMMOB)
270
+ assert_equal 3, (@immob/:script)[0].inner_html.scan(/<LINK/).length
271
+ end
272
+
273
+ def test_nested_scripts
274
+ @week9 = Hpricot.parse(TestFiles::WEEK9)
275
+ assert_equal 14, (@week9/"a").find_all { |x| x.inner_html.include? "GameCenter" }.length
276
+ end
277
+
278
+ def test_uswebgen
279
+ @uswebgen = Hpricot.parse(TestFiles::USWEBGEN)
280
+ # sent by brent beardsley, hpricot 0.3 had problems with all the links.
281
+ assert_equal 67, (@uswebgen/:a).length
282
+ end
283
+
284
+ def test_mangled_tags
285
+ [%{<html><form name='loginForm' method='post' action='/units/a/login/1,13088,779-1,00.html'?URL=></form></html>},
286
+ %{<html><form name='loginForm' ?URL= method='post' action='/units/a/login/1,13088,779-1,00.html'></form></html>},
287
+ %{<html><form name='loginForm'?URL= ?URL= method='post' action='/units/a/login/1,13088,779-1,00.html'?URL=></form></html>},
288
+ %{<html><form name='loginForm' method='post' action='/units/a/login/1,13088,779-1,00.html' ?URL=></form></html>}].
289
+ each do |str|
290
+ doc = Hpricot(str)
291
+ assert_equal 1, (doc/:form).length
292
+ assert_equal '/units/a/login/1,13088,779-1,00.html', doc.at("form")['action']
293
+ end
294
+ end
295
+
296
+ def test_procins
297
+ doc = Hpricot("<?php print('hello') ?>\n<?xml blah='blah'?>")
298
+ assert_equal "php", doc.children[0].target
299
+ assert_equal "blah='blah'", doc.children[2].content
300
+ end
301
+
302
+ def test_buffer_error
303
+ assert_raise Hpricot::ParseError, "ran out of buffer space on element <input>, starting on line 3." do
304
+ Hpricot(%{<p>\n\n<input type="hidden" name="__VIEWSTATE" value="#{(("X" * 2000) + "\n") * 22}" />\n\n</p>})
305
+ end
306
+ end
307
+
308
+ def test_youtube_attr
309
+ str = <<-edoc
310
+ <html><body>
311
+ Lorem ipsum. Jolly roger, ding-dong sing-a-long
312
+ <object width="425" height="350">
313
+ <param name="movie" value="http://www.youtube.com/v/NbDQ4M_cuwA"></param>
314
+ <param name="wmode" value="transparent"></param>
315
+ <embed src="http://www.youtube.com/v/NbDQ4M_cuwA"
316
+ type="application/x-shockwave-flash" wmode="transparent" width="425" height="350">
317
+ </embed>
318
+ </object>
319
+ Check out my posting, I have bright mice in large clown cars.
320
+ <object width="425" height="350">
321
+ <param name="movie" value="http://www.youtube.com/v/foobar"></param>
322
+ <param name="wmode" value="transparent"></param>
323
+ <embed src="http://www.youtube.com/v/foobar"
324
+ type="application/x-shockwave-flash" wmode="transparent" width="425" height="350">
325
+ </embed>
326
+ </object>
327
+ </body></html?
328
+ edoc
329
+ doc = Hpricot(str)
330
+ assert_equal "http://www.youtube.com/v/NbDQ4M_cuwA",
331
+ doc.at("//object/param[@value='http://www.youtube.com/v/NbDQ4M_cuwA']")['value']
332
+ end
333
+
334
+ # ticket #84 by jamezilla
335
+ def test_screwed_xmlns
336
+ doc = Hpricot(<<-edoc)
337
+ <?xml:namespace prefix = cwi />
338
+ <html><body>HAI</body></html>
339
+ edoc
340
+ assert_equal "HAI", doc.at("body").inner_text
341
+ end
342
+
343
+ # Reported by Jonathan Nichols on the Hpricot list (24 May 2007)
344
+ def test_self_closed_form
345
+ doc = Hpricot(<<-edoc)
346
+ <body>
347
+ <form action="/loginRegForm" name="regForm" method="POST" />
348
+ <input type="button">
349
+ </form>
350
+ </body>
351
+ edoc
352
+ assert_equal "button", doc.at("//form/input")['type']
353
+ end
354
+
355
+ def test_filters
356
+ @basic = Hpricot.parse(TestFiles::BASIC)
357
+ assert_equal 0, (@basic/"title:parent").size
358
+ assert_equal 3, (@basic/"p:parent").size
359
+ assert_equal 1, (@basic/"title:empty").size
360
+ assert_equal 1, (@basic/"p:empty").size
361
+ end
362
+
363
+ def test_keep_cdata
364
+ str = %{<script> /*<![CDATA[*/
365
+ /*]]>*/ </script>}
366
+ assert_equal str, Hpricot(str).to_html
367
+ end
368
+
369
+ def test_namespace
370
+ chunk = <<-END
371
+ <a xmlns:t="http://www.nexopia.com/dev/template">
372
+ <t:sam>hi </t:sam>
373
+ </a>
374
+ END
375
+ doc = Hpricot::XML(chunk)
376
+ assert (doc/"//t:sam").size > 0 # at least this should probably work
377
+ # assert (doc/"//sam").size > 0 # this would be nice
378
+ end
379
+ end