why-hpricot 0.6.201

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. data/CHANGELOG +62 -0
  2. data/COPYING +18 -0
  3. data/README +284 -0
  4. data/Rakefile +259 -0
  5. data/ext/fast_xs/FastXsService.java +1018 -0
  6. data/ext/fast_xs/extconf.rb +4 -0
  7. data/ext/fast_xs/fast_xs.c +200 -0
  8. data/ext/hpricot_scan/HpricotScanService.java +1305 -0
  9. data/ext/hpricot_scan/extconf.rb +6 -0
  10. data/ext/hpricot_scan/hpricot_common.rl +76 -0
  11. data/ext/hpricot_scan/hpricot_css.c +3506 -0
  12. data/ext/hpricot_scan/hpricot_scan.c +6679 -0
  13. data/ext/hpricot_scan/hpricot_scan.h +79 -0
  14. data/ext/hpricot_scan/hpricot_scan.java.rl +373 -0
  15. data/ext/hpricot_scan/hpricot_scan.rl +697 -0
  16. data/extras/mingw-rbconfig.rb +176 -0
  17. data/lib/hpricot.rb +26 -0
  18. data/lib/hpricot/blankslate.rb +63 -0
  19. data/lib/hpricot/builder.rb +215 -0
  20. data/lib/hpricot/elements.rb +510 -0
  21. data/lib/hpricot/htmlinfo.rb +691 -0
  22. data/lib/hpricot/inspect.rb +103 -0
  23. data/lib/hpricot/modules.rb +38 -0
  24. data/lib/hpricot/parse.rb +38 -0
  25. data/lib/hpricot/tag.rb +198 -0
  26. data/lib/hpricot/tags.rb +164 -0
  27. data/lib/hpricot/traverse.rb +838 -0
  28. data/lib/hpricot/xchar.rb +94 -0
  29. data/test/files/basic.xhtml +17 -0
  30. data/test/files/boingboing.html +2266 -0
  31. data/test/files/cy0.html +3653 -0
  32. data/test/files/immob.html +400 -0
  33. data/test/files/pace_application.html +1320 -0
  34. data/test/files/tenderlove.html +16 -0
  35. data/test/files/uswebgen.html +220 -0
  36. data/test/files/utf8.html +1054 -0
  37. data/test/files/week9.html +1723 -0
  38. data/test/files/why.xml +19 -0
  39. data/test/load_files.rb +7 -0
  40. data/test/test_alter.rb +77 -0
  41. data/test/test_builder.rb +37 -0
  42. data/test/test_parser.rb +400 -0
  43. data/test/test_paths.rb +25 -0
  44. data/test/test_preserved.rb +66 -0
  45. data/test/test_xml.rb +28 -0
  46. metadata +107 -0
@@ -0,0 +1,19 @@
1
+ <?xml version='1.0'?><rss xmlns:admin='http://webns.net/mvcb/' version='2.0' xmlns:sy='http://purl.org/rss/1.0/modules/syndication/' xmlns:dc='http://purl.org/dc/elements/1.1/' xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'>
2
+ <channel>
3
+ <title>why the lucky stiff</title>
4
+ <link>http://whytheluckystiff.net</link>
5
+ <description>hex-editing reality to give us infinite grenades!!</description>
6
+ <dc:language>en-us</dc:language>
7
+ <dc:creator/>
8
+ <dc:date>2007-01-16T22:39:04+00:00</dc:date>
9
+ <admin:generatorAgent rdf:resource='http://hobix.com/?v=0.4'/>
10
+ <sy:updatePeriod>hourly</sy:updatePeriod>
11
+ <sy:updateFrequency>1</sy:updateFrequency>
12
+ <sy:updateBase>2000-01-01T12:00+00:00</sy:updateBase>
13
+ <item><title>1.3</title><link>http://whytheluckystiff.net/quatrains/1.3.html</link><guid isPermaLink='false'>quatrains/1.3@http://whytheluckystiff.net</guid><dc:subject>quatrains</dc:subject><dc:subject>quatrains</dc:subject><dc:creator>why the lucky stiff</dc:creator><dc:date>2007-01-14T08:47:05+00:00</dc:date><description>&lt;blockquote&gt;
14
+ &lt;p&gt;That cadillac of yours and that driver of yours!&lt;br /&gt;You and your teacups rattling away in the back seat!&lt;br /&gt;You always took the mike, oh, and all those cowboys you shot!&lt;br /&gt;I held your hand! And I&amp;#8217;ll shoot a cowboy one day!&lt;/p&gt;
15
+ &lt;/blockquote&gt;
16
+ &lt;blockquote&gt;
17
+ &lt;p&gt;You said, &amp;#8220;Let&amp;#8217;s run into the woods like kids!&amp;#8221; &lt;br /&gt;You said, &amp;#8220;Let&amp;#8217;s rub our hands together super-hot!&amp;#8221; &lt;br /&gt;And we scalded the trees and left octagons, I think that was you and&lt;br /&gt;You threw parties on the roof!&lt;/p&gt;
18
+ &lt;/blockquote&gt;</description></item></channel>
19
+ </rss>
@@ -0,0 +1,7 @@
1
+ module TestFiles
2
+ Dir.chdir(File.dirname(__FILE__)) do
3
+ Dir['files/*.{html,xhtml,xml}'].each do |fname|
4
+ const_set fname[%r!/(\w+)\.\w+$!, 1].upcase, IO.read(fname)
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,77 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'test/unit'
4
+ require 'hpricot'
5
+ require 'load_files'
6
+
7
+ class TestAlter < Test::Unit::TestCase
8
+ def setup
9
+ @basic = Hpricot.parse(TestFiles::BASIC)
10
+ end
11
+
12
+ def test_before
13
+ test0 = "<link rel='stylesheet' href='test0.css' />"
14
+ @basic.at("link").before(test0)
15
+ assert_equal 'test0.css', @basic.at("link").attributes['href']
16
+ end
17
+
18
+ def test_after
19
+ test_inf = "<link rel='stylesheet' href='test_inf.css' />"
20
+ @basic.search("link")[-1].after(test_inf)
21
+ assert_equal 'test_inf.css', @basic.search("link")[-1].attributes['href']
22
+ end
23
+
24
+ def test_wrap
25
+ ohmy = (@basic/"p.ohmy").wrap("<div id='wrapper'></div>")
26
+ assert_equal 'wrapper', ohmy[0].parent['id']
27
+ assert_equal 'ohmy', Hpricot(@basic.to_html).at("#wrapper").children[0]['class']
28
+ end
29
+
30
+ def test_add_class
31
+ first_p = (@basic/"p:first").add_class("testing123")
32
+ assert first_p[0].get_attribute("class").split(" ").include?("testing123")
33
+ assert (Hpricot(@basic.to_html)/"p:first")[0].attributes["class"].split(" ").include?("testing123")
34
+ assert !(Hpricot(@basic.to_html)/"p:gt(0)")[0].attributes["class"].split(" ").include?("testing123")
35
+ end
36
+
37
+ def test_change_attributes
38
+ all_ps = (@basic/"p").attr("title", "Some Title")
39
+ all_as = (@basic/"a").attr("href", "http://my_new_href.com")
40
+ all_lb = (@basic/"link").attr("href") { |e| e.name }
41
+ assert_changed(@basic, "p", all_ps) {|p| p.attributes["title"] == "Some Title"}
42
+ assert_changed(@basic, "a", all_as) {|a| a.attributes["href"] == "http://my_new_href.com"}
43
+ assert_changed(@basic, "link", all_lb) {|a| a.attributes["href"] == "link" }
44
+ end
45
+
46
+ def test_remove_attr
47
+ all_rl = (@basic/"link").remove_attr("href")
48
+ assert_changed(@basic, "link", all_rl) { |link| link['href'].nil? }
49
+ end
50
+
51
+ def test_remove_class
52
+ all_c1 = (@basic/"p[@class*='last']").remove_class("last")
53
+ assert_changed(@basic, "p[@class*='last']", all_c1) { |p| p['class'] == 'final' }
54
+ end
55
+
56
+ def test_remove_all_classes
57
+ all_c2 = (@basic/"p[@class]").remove_class
58
+ assert_changed(@basic, "p[@class]", all_c2) { |p| p['class'].nil? }
59
+ end
60
+
61
+ def test_xml_casing
62
+ doc = Hpricot.XML("<root><wildCat>text</wildCat></root>")
63
+ (doc/:root/:wildCat).after("<beanPole>gravity</beanPole>")
64
+ assert_equal doc.to_s, "<root><wildCat>text</wildCat><beanPole>gravity</beanPole></root>"
65
+
66
+ frag = Hpricot.XML do
67
+ b { i "A bit of HTML" }
68
+ end
69
+ (frag/:b).after("<beanPole>gravity</beanPole>")
70
+ assert_equal frag.to_s, "<b><i>A bit of HTML</i></b><beanPole>gravity</beanPole>"
71
+ end
72
+
73
+ def assert_changed original, selector, set, &block
74
+ assert set.all?(&block)
75
+ assert Hpricot(original.to_html).search(selector).all?(&block)
76
+ end
77
+ end
@@ -0,0 +1,37 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'test/unit'
4
+ require 'hpricot'
5
+
6
+ class TestBuilder < Test::Unit::TestCase
7
+ def test_escaping_text
8
+ doc = Hpricot() { b "<a\"b>" }
9
+ assert_equal "<b>&lt;a&quot;b&gt;</b>", doc.to_html
10
+ assert_equal %{<a"b>}, doc.at("text()").to_s
11
+ end
12
+
13
+ def test_no_escaping_text
14
+ doc = Hpricot() { div.test.me! { text "<a\"b>" } }
15
+ assert_equal %{<div class="test" id="me"><a"b></div>}, doc.to_html
16
+ assert_equal %{<a"b>}, doc.at("text()").to_s
17
+ end
18
+
19
+ def test_latin1_entities
20
+ doc = Hpricot() { b "\200\225" }
21
+ assert_equal "<b>&#8364;&#8226;</b>", doc.to_html
22
+ assert_equal "\342\202\254\342\200\242", doc.at("text()").to_s
23
+ end
24
+
25
+ def test_escaping_attrs
26
+ text = "<span style='font-family:\"MS Mincho\"'>Some text</span>"
27
+ assert_equal "<span style=\"font-family:\\\"MS Mincho\\\"\">Some text</span>",
28
+ Hpricot(text).to_html
29
+ end
30
+
31
+ def test_korean_utf8_entities
32
+ # a = '한글'
33
+ a = "\xed\x95\x9c\xea\xb8\x80"
34
+ doc = Hpricot() { b a }
35
+ assert_equal "<b>&#54620;&#44544;</b>", doc.to_html
36
+ end
37
+ end
@@ -0,0 +1,400 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'test/unit'
4
+ require 'hpricot'
5
+ require 'load_files'
6
+
7
+ class TestParser < Test::Unit::TestCase
8
+ def test_set_attr
9
+ @basic = Hpricot.parse(TestFiles::BASIC)
10
+ @basic.search('//p').set('class', 'para')
11
+ assert_equal 4, @basic.search('//p').length
12
+ assert_equal 4, @basic.search('//p').find_all { |x| x['class'] == 'para' }.length
13
+ end
14
+
15
+ # Test creating a new element
16
+ def test_new_element
17
+ elem = Hpricot::Elem.new('form')
18
+ assert_not_nil(elem)
19
+ assert_not_nil(elem.attributes)
20
+ end
21
+
22
+ def test_scan_text
23
+ assert_equal 'FOO', Hpricot.make("FOO").children.first.content
24
+ end
25
+
26
+ def test_filter_by_attr
27
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
28
+
29
+ # this link is escaped in the doc
30
+ link = 'http://www.youtube.com/watch?v=TvSNXyNw26g&search=chris%20ware'
31
+ assert_equal link, @boingboing.at("a[@href='#{link}']")['href']
32
+ end
33
+
34
+ def test_filter_contains
35
+ @basic = Hpricot.parse(TestFiles::BASIC)
36
+ assert_equal '<title>Sample XHTML</title>', @basic.search("title:contains('Sample')").to_s
37
+ end
38
+
39
+ def test_get_element_by_id
40
+ @basic = Hpricot.parse(TestFiles::BASIC)
41
+ assert_equal 'link1', @basic.get_element_by_id('link1')['id']
42
+ assert_equal 'link1', @basic.get_element_by_id('body1').get_element_by_id('link1').get_attribute('id')
43
+ end
44
+
45
+ def test_get_element_by_tag_name
46
+ @basic = Hpricot.parse(TestFiles::BASIC)
47
+ assert_equal 'link1', @basic.get_elements_by_tag_name('a')[0].get_attribute('id')
48
+ assert_equal 'link1', @basic.get_elements_by_tag_name('body')[0].get_element_by_id('link1').get_attribute('id')
49
+ end
50
+
51
+ def test_get_elements_by_tag_name_star
52
+ simple = Hpricot.parse("<div><p id='first'>First</p><p id='second'>Second</p></div>")
53
+ assert_equal 3, simple.get_elements_by_tag_name("*").size
54
+ assert_equal 1, simple.get_elements_by_tag_name("div").size
55
+ assert_equal 2, simple.get_elements_by_tag_name("p").size
56
+ end
57
+
58
+ def test_output_basic
59
+ @basic = Hpricot.parse(TestFiles::BASIC)
60
+ @basic2 = Hpricot.parse(@basic.inner_html)
61
+ scan_basic @basic2
62
+ end
63
+
64
+ def test_scan_basic
65
+ @basic = Hpricot.parse(TestFiles::BASIC)
66
+ scan_basic @basic
67
+ end
68
+
69
+ def scan_basic doc
70
+ assert_kind_of Hpricot::XMLDecl, doc.children.first
71
+ assert_not_equal doc.children.first.to_s, doc.children[1].to_s
72
+ assert_equal 'link1', doc.at('#link1')['id']
73
+ assert_equal 'link1', doc.at("p a")['id']
74
+ assert_equal 'link1', (doc/:p/:a).first['id']
75
+ assert_equal 'link1', doc.search('p').at('a').get_attribute('id')
76
+ assert_equal 'link2', (doc/'p').filter('.ohmy').search('a').first.get_attribute('id')
77
+ assert_equal (doc/'p')[2], (doc/'p').filter(':nth(2)')[0]
78
+ assert_equal (doc/'p')[2], (doc/'p').filter('[3]')[0]
79
+ assert_equal 4, (doc/'p').filter('*').length
80
+ assert_equal 4, (doc/'p').filter('* *').length
81
+ eles = (doc/'p').filter('.ohmy')
82
+ assert_equal 1, eles.length
83
+ assert_equal 'ohmy', eles.first.get_attribute('class')
84
+ assert_equal 3, (doc/'p:not(.ohmy)').length
85
+ assert_equal 3, (doc/'p').not('.ohmy').length
86
+ assert_equal 3, (doc/'p').not(eles.first).length
87
+ assert_equal 2, (doc/'p').filter('[@class]').length
88
+ assert_equal 'last final', (doc/'p[@class~="final"]').first.get_attribute('class')
89
+ assert_equal 1, (doc/'p').filter('[@class~="final"]').length
90
+ assert_equal 2, (doc/'p > a').length
91
+ assert_equal 1, (doc/'p.ohmy > a').length
92
+ assert_equal 2, (doc/'p / a').length
93
+ assert_equal 2, (doc/'link ~ link').length
94
+ assert_equal 3, (doc/'title ~ link').length
95
+ assert_equal 5, (doc/"//p/text()").length
96
+ assert_equal 6, (doc/"//p[a]//text()").length
97
+ assert_equal 2, (doc/"//p/a/text()").length
98
+ end
99
+
100
+ def test_positional
101
+ h = Hpricot( "<div><br/><p>one</p><p>two</p></div>" )
102
+ assert_equal "<p>one</p>", h.search("//div/p:eq(0)").to_s
103
+ assert_equal "<p>one</p>", h.search("//div/p:first").to_s
104
+ assert_equal "<p>one</p>", h.search("//div/p:first()").to_s
105
+ end
106
+
107
+ def test_pace
108
+ doc = Hpricot(TestFiles::PACE_APPLICATION)
109
+ assert_equal 'get', doc.at('form[@name=frmSect11]')['method']
110
+ # assert_equal '2', doc.at('#hdnSpouse')['value']
111
+ end
112
+
113
+ def test_scan_boingboing
114
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
115
+ assert_equal 60, (@boingboing/'p.posted').length
116
+ assert_equal 1, @boingboing.search("//a[@name='027906']").length
117
+ assert_equal 10, @boingboing.search("script comment()").length
118
+ assert_equal 3, @boingboing.search("a[text()*='Boing']").length
119
+ assert_equal 1, @boingboing.search("h3[text()='College kids reportedly taking more smart drugs']").length
120
+ assert_equal 0, @boingboing.search("h3[text()='College']").length
121
+ assert_equal 60, @boingboing.search("h3").length
122
+ assert_equal 59, @boingboing.search("h3[text()!='College kids reportedly taking more smart drugs']").length
123
+ assert_equal 17, @boingboing.search("h3[text()$='s']").length
124
+ assert_equal 129, @boingboing.search("p[text()]").length
125
+ assert_equal 211, @boingboing.search("p").length
126
+ end
127
+
128
+ def test_reparent
129
+ doc = Hpricot(%{<div id="blurb_1"></div>})
130
+ div1 = doc.search('#blurb_1')
131
+ div1.before('<div id="blurb_0"></div>')
132
+
133
+ div0 = doc.search('#blurb_0')
134
+ div0.before('<div id="blurb_a"></div>')
135
+
136
+ assert_equal 'div', doc.at('#blurb_1').name
137
+ end
138
+
139
+ def test_siblings
140
+ @basic = Hpricot.parse(TestFiles::BASIC)
141
+ t = @basic.at(:title)
142
+ e = t.next_sibling
143
+ assert_equal 'test1.css', e['href']
144
+ assert_equal 'title', e.previous_sibling.name
145
+ end
146
+
147
+ def test_css_negation
148
+ @basic = Hpricot.parse(TestFiles::BASIC)
149
+ assert_equal 3, (@basic/'p:not(.final)').length
150
+ end
151
+
152
+ def test_remove_attribute
153
+ @basic = Hpricot.parse(TestFiles::BASIC)
154
+ (@basic/:p).each { |ele| ele.remove_attribute('class') }
155
+ assert_equal 0, (@basic/'p[@class]').length
156
+ end
157
+
158
+ def test_abs_xpath
159
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
160
+ assert_equal 60, @boingboing.search("/html/body//p[@class='posted']").length
161
+ assert_equal 60, @boingboing.search("/*/body//p[@class='posted']").length
162
+ assert_equal 18, @boingboing.search("//script").length
163
+ divs = @boingboing.search("//script/../div")
164
+ assert_equal 1, divs.length
165
+ imgs = @boingboing.search('//div/p/a/img')
166
+ assert_equal 15, imgs.length
167
+ assert_equal 17, @boingboing.search('//div').search('p/a/img').length
168
+ assert imgs.all? { |x| x.name == 'img' }
169
+ end
170
+
171
+ def test_predicates
172
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
173
+ assert_equal 2, @boingboing.search('//link[@rel="alternate"]').length
174
+ p_imgs = @boingboing.search('//div/p[/a/img]')
175
+ assert_equal 15, p_imgs.length
176
+ assert p_imgs.all? { |x| x.name == 'p' }
177
+ p_imgs = @boingboing.search('//div/p[a/img]')
178
+ assert_equal 18, p_imgs.length
179
+ assert p_imgs.all? { |x| x.name == 'p' }
180
+ assert_equal 1, @boingboing.search('//input[@checked]').length
181
+ end
182
+
183
+ def test_tag_case
184
+ @tenderlove = Hpricot.parse(TestFiles::TENDERLOVE)
185
+ assert_equal 2, @tenderlove.search('//a').length
186
+ assert_equal 3, @tenderlove.search('//area').length
187
+ assert_equal 2, @tenderlove.search('//meta').length
188
+ end
189
+
190
+ def test_alt_predicates
191
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
192
+ assert_equal 1, @boingboing.search('//table/tr:last').length
193
+
194
+ @basic = Hpricot.parse(TestFiles::BASIC)
195
+ assert_equal "<p>The third paragraph</p>",
196
+ @basic.search('p:eq(2)').to_html
197
+ assert_equal '<p class="last final"><b>THE FINAL PARAGRAPH</b></p>',
198
+ @basic.search('p:last').to_html
199
+ assert_equal 'last final', @basic.search('//p:last-of-type').first.get_attribute('class')
200
+ end
201
+
202
+ def test_insert_after # ticket #63
203
+ doc = Hpricot('<html><body><div id="a-div"></div></body></html>')
204
+ (doc/'div').each do |element|
205
+ element.after('<p>Paragraph 1</p><p>Paragraph 2</p>')
206
+ end
207
+ assert_equal doc.to_html, '<html><body><div id="a-div"></div><p>Paragraph 1</p><p>Paragraph 2</p></body></html>'
208
+ end
209
+
210
+ def test_insert_before # ticket #61
211
+ doc = Hpricot('<html><body><div id="a-div"></div></body></html>')
212
+ (doc/'div').each do |element|
213
+ element.before('<p>Paragraph 1</p><p>Paragraph 2</p>')
214
+ end
215
+ assert_equal doc.to_html, '<html><body><p>Paragraph 1</p><p>Paragraph 2</p><div id="a-div"></div></body></html>'
216
+ end
217
+
218
+ def test_many_paths
219
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
220
+ assert_equal 62, @boingboing.search('p.posted, link[@rel="alternate"]').length
221
+ assert_equal 20, @boingboing.search('//div/p[a/img]|//link[@rel="alternate"]').length
222
+ end
223
+
224
+ def test_stacked_search
225
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
226
+ assert_kind_of Hpricot::Elements, @boingboing.search('//div/p').search('a img')
227
+ end
228
+
229
+ def test_class_search
230
+ # test case sent by Chih-Chao Lam
231
+ doc = Hpricot("<div class=xyz'>abc</div>")
232
+ assert_equal 1, doc.search(".xyz").length
233
+ doc = Hpricot("<div class=xyz>abc</div><div class=abc>xyz</div>")
234
+ assert_equal 1, doc.search(".xyz").length
235
+ assert_equal 4, doc.search("*").length
236
+ end
237
+
238
+ def test_kleene_star
239
+ # bug noticed by raja bhatia
240
+ doc = Hpricot("<span class='small'>1</span><div class='large'>2</div><div class='small'>3</div><span class='blue large'>4</span>")
241
+ assert_equal 2, doc.search("*[@class*='small']").length
242
+ assert_equal 2, doc.search("*.small").length
243
+ assert_equal 2, doc.search(".small").length
244
+ assert_equal 2, doc.search(".large").length
245
+ end
246
+
247
+ def test_empty_comment
248
+ doc = Hpricot("<p><!----></p>")
249
+ assert doc.children[0].children[0].comment?
250
+ doc = Hpricot("<p><!-- --></p>")
251
+ assert doc.children[0].children[0].comment?
252
+ end
253
+
254
+ def test_body_newlines
255
+ @immob = Hpricot.parse(TestFiles::IMMOB)
256
+ body = @immob.at(:body)
257
+ {'background' => '', 'bgcolor' => '#ffffff', 'text' => '#000000', 'marginheight' => '10',
258
+ 'marginwidth' => '10', 'leftmargin' => '10', 'topmargin' => '10', 'link' => '#000066',
259
+ 'alink' => '#ff6600', 'hlink' => "#ff6600", 'vlink' => "#000000"}.each do |k, v|
260
+ assert_equal v, body[k]
261
+ end
262
+ end
263
+
264
+ def test_nested_twins
265
+ @doc = Hpricot("<div>Hi<div>there</div></div>")
266
+ assert_equal 1, (@doc/"div div").length
267
+ end
268
+
269
+ def test_wildcard
270
+ @basic = Hpricot.parse(TestFiles::BASIC)
271
+ assert_equal 3, (@basic/"*[@id]").length
272
+ assert_equal 3, (@basic/"//*[@id]").length
273
+ end
274
+
275
+ def test_javascripts
276
+ @immob = Hpricot.parse(TestFiles::IMMOB)
277
+ assert_equal 3, (@immob/:script)[0].inner_html.scan(/<LINK/).length
278
+ end
279
+
280
+ def test_nested_scripts
281
+ @week9 = Hpricot.parse(TestFiles::WEEK9)
282
+ assert_equal 14, (@week9/"a").find_all { |x| x.inner_html.include? "GameCenter" }.length
283
+ end
284
+
285
+ def test_uswebgen
286
+ @uswebgen = Hpricot.parse(TestFiles::USWEBGEN)
287
+ # sent by brent beardsley, hpricot 0.3 had problems with all the links.
288
+ assert_equal 67, (@uswebgen/:a).length
289
+ end
290
+
291
+ def test_mangled_tags
292
+ [%{<html><form name='loginForm' method='post' action='/units/a/login/1,13088,779-1,00.html'?URL=></form></html>},
293
+ %{<html><form name='loginForm' ?URL= method='post' action='/units/a/login/1,13088,779-1,00.html'></form></html>},
294
+ %{<html><form name='loginForm'?URL= ?URL= method='post' action='/units/a/login/1,13088,779-1,00.html'?URL=></form></html>},
295
+ %{<html><form name='loginForm' method='post' action='/units/a/login/1,13088,779-1,00.html' ?URL=></form></html>}].
296
+ each do |str|
297
+ doc = Hpricot(str)
298
+ assert_equal 1, (doc/:form).length
299
+ assert_equal '/units/a/login/1,13088,779-1,00.html', doc.at("form")['action']
300
+ end
301
+ end
302
+
303
+ def test_procins
304
+ doc = Hpricot("<?php print('hello') ?>\n<?xml blah='blah'?>")
305
+ assert_equal "php", doc.children[0].target
306
+ assert_equal "blah='blah'", doc.children[2].content
307
+ end
308
+
309
+ def test_no_buffer_error
310
+ Hpricot(%{<p>\n\n<input type="hidden" name="__VIEWSTATE" value="#{(("X" * 2000) + "\n") * 44}" />\n\n</p>})
311
+ end
312
+
313
+ def test_youtube_attr
314
+ str = <<-edoc
315
+ <html><body>
316
+ Lorem ipsum. Jolly roger, ding-dong sing-a-long
317
+ <object width="425" height="350">
318
+ <param name="movie" value="http://www.youtube.com/v/NbDQ4M_cuwA"></param>
319
+ <param name="wmode" value="transparent"></param>
320
+ <embed src="http://www.youtube.com/v/NbDQ4M_cuwA"
321
+ type="application/x-shockwave-flash" wmode="transparent" width="425" height="350">
322
+ </embed>
323
+ </object>
324
+ Check out my posting, I have bright mice in large clown cars.
325
+ <object width="425" height="350">
326
+ <param name="movie" value="http://www.youtube.com/v/foobar"></param>
327
+ <param name="wmode" value="transparent"></param>
328
+ <embed src="http://www.youtube.com/v/foobar"
329
+ type="application/x-shockwave-flash" wmode="transparent" width="425" height="350">
330
+ </embed>
331
+ </object>
332
+ </body></html?
333
+ edoc
334
+ doc = Hpricot(str)
335
+ assert_equal "http://www.youtube.com/v/NbDQ4M_cuwA",
336
+ doc.at("//object/param[@value='http://www.youtube.com/v/NbDQ4M_cuwA']")['value']
337
+ end
338
+
339
+ # ticket #84 by jamezilla
340
+ def test_screwed_xmlns
341
+ doc = Hpricot(<<-edoc)
342
+ <?xml:namespace prefix = cwi />
343
+ <html><body>HAI</body></html>
344
+ edoc
345
+ assert_equal "HAI", doc.at("body").inner_text
346
+ end
347
+
348
+ # Reported by Jonathan Nichols on the Hpricot list (24 May 2007)
349
+ def test_self_closed_form
350
+ doc = Hpricot(<<-edoc)
351
+ <body>
352
+ <form action="/loginRegForm" name="regForm" method="POST" />
353
+ <input type="button">
354
+ </form>
355
+ </body>
356
+ edoc
357
+ assert_equal "button", doc.at("//form/input")['type']
358
+ end
359
+
360
+ def test_filters
361
+ @basic = Hpricot.parse(TestFiles::BASIC)
362
+ assert_equal 0, (@basic/"title:parent").size
363
+ assert_equal 3, (@basic/"p:parent").size
364
+ assert_equal 1, (@basic/"title:empty").size
365
+ assert_equal 1, (@basic/"p:empty").size
366
+ end
367
+
368
+ def test_keep_cdata
369
+ str = %{<script> /*<![CDATA[*/
370
+ /*]]>*/ </script>}
371
+ assert_equal str, Hpricot(str).to_html
372
+ end
373
+
374
+ def test_namespace
375
+ chunk = <<-END
376
+ <a xmlns:t="http://www.nexopia.com/dev/template">
377
+ <t:sam>hi </t:sam>
378
+ </a>
379
+ END
380
+ doc = Hpricot::XML(chunk)
381
+ assert (doc/"//t:sam").size > 0 # at least this should probably work
382
+ # assert (doc/"//sam").size > 0 # this would be nice
383
+ end
384
+
385
+ def test_uxs_ignores_non_entities
386
+ assert_equal 'abc', Hpricot.uxs('abc')
387
+ end
388
+
389
+ def test_uxs_handles_gt_lt_amp_quot
390
+ assert_equal '"&<>', Hpricot.uxs('&quot;&amp;&lt;&gt;')
391
+ end
392
+
393
+ def test_uxs_handles_numeric_values
394
+ assert_equal "\303\251", Hpricot.uxs('&#233;')
395
+ end
396
+
397
+ def test_uxs_handles_entities
398
+ assert_equal "\303\251", Hpricot.uxs('&eacute;')
399
+ end
400
+ end