hpricot 0.7-x86-mswin32

Sign up to get free protection for your applications and to get access to all the features.
Files changed (51) hide show
  1. data/CHANGELOG +68 -0
  2. data/COPYING +18 -0
  3. data/README +284 -0
  4. data/Rakefile +260 -0
  5. data/ext/fast_xs/FastXsService.java +1018 -0
  6. data/ext/fast_xs/extconf.rb +4 -0
  7. data/ext/fast_xs/fast_xs.c +200 -0
  8. data/ext/hpricot_scan/HpricotScanService.java +1305 -0
  9. data/ext/hpricot_scan/extconf.rb +6 -0
  10. data/ext/hpricot_scan/hpricot_common.rl +76 -0
  11. data/ext/hpricot_scan/hpricot_css.c +3502 -0
  12. data/ext/hpricot_scan/hpricot_css.rl +115 -0
  13. data/ext/hpricot_scan/hpricot_scan.c +6704 -0
  14. data/ext/hpricot_scan/hpricot_scan.h +79 -0
  15. data/ext/hpricot_scan/hpricot_scan.java.rl +373 -0
  16. data/ext/hpricot_scan/hpricot_scan.rl +722 -0
  17. data/ext/hpricot_scan/test.rb +4 -0
  18. data/extras/mingw-rbconfig.rb +176 -0
  19. data/lib/fast_xs.so +0 -0
  20. data/lib/hpricot.rb +26 -0
  21. data/lib/hpricot/blankslate.rb +63 -0
  22. data/lib/hpricot/builder.rb +216 -0
  23. data/lib/hpricot/elements.rb +510 -0
  24. data/lib/hpricot/htmlinfo.rb +691 -0
  25. data/lib/hpricot/inspect.rb +103 -0
  26. data/lib/hpricot/modules.rb +38 -0
  27. data/lib/hpricot/parse.rb +38 -0
  28. data/lib/hpricot/tag.rb +198 -0
  29. data/lib/hpricot/tags.rb +164 -0
  30. data/lib/hpricot/traverse.rb +838 -0
  31. data/lib/hpricot/xchar.rb +94 -0
  32. data/lib/hpricot_scan.so +0 -0
  33. data/test/files/basic.xhtml +17 -0
  34. data/test/files/boingboing.html +2266 -0
  35. data/test/files/cy0.html +3653 -0
  36. data/test/files/immob.html +400 -0
  37. data/test/files/pace_application.html +1320 -0
  38. data/test/files/tenderlove.html +16 -0
  39. data/test/files/uswebgen.html +220 -0
  40. data/test/files/utf8.html +1054 -0
  41. data/test/files/week9.html +1723 -0
  42. data/test/files/why.xml +19 -0
  43. data/test/load_files.rb +7 -0
  44. data/test/nokogiri-bench.rb +64 -0
  45. data/test/test_alter.rb +77 -0
  46. data/test/test_builder.rb +37 -0
  47. data/test/test_parser.rb +409 -0
  48. data/test/test_paths.rb +25 -0
  49. data/test/test_preserved.rb +70 -0
  50. data/test/test_xml.rb +28 -0
  51. metadata +111 -0
@@ -0,0 +1,19 @@
1
+ <?xml version='1.0'?><rss xmlns:admin='http://webns.net/mvcb/' version='2.0' xmlns:sy='http://purl.org/rss/1.0/modules/syndication/' xmlns:dc='http://purl.org/dc/elements/1.1/' xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'>
2
+ <channel>
3
+ <title>why the lucky stiff</title>
4
+ <link>http://whytheluckystiff.net</link>
5
+ <description>hex-editing reality to give us infinite grenades!!</description>
6
+ <dc:language>en-us</dc:language>
7
+ <dc:creator/>
8
+ <dc:date>2007-01-16T22:39:04+00:00</dc:date>
9
+ <admin:generatorAgent rdf:resource='http://hobix.com/?v=0.4'/>
10
+ <sy:updatePeriod>hourly</sy:updatePeriod>
11
+ <sy:updateFrequency>1</sy:updateFrequency>
12
+ <sy:updateBase>2000-01-01T12:00+00:00</sy:updateBase>
13
+ <item><title>1.3</title><link>http://whytheluckystiff.net/quatrains/1.3.html</link><guid isPermaLink='false'>quatrains/1.3@http://whytheluckystiff.net</guid><dc:subject>quatrains</dc:subject><dc:subject>quatrains</dc:subject><dc:creator>why the lucky stiff</dc:creator><dc:date>2007-01-14T08:47:05+00:00</dc:date><description>&lt;blockquote&gt;
14
+ &lt;p&gt;That cadillac of yours and that driver of yours!&lt;br /&gt;You and your teacups rattling away in the back seat!&lt;br /&gt;You always took the mike, oh, and all those cowboys you shot!&lt;br /&gt;I held your hand! And I&amp;#8217;ll shoot a cowboy one day!&lt;/p&gt;
15
+ &lt;/blockquote&gt;
16
+ &lt;blockquote&gt;
17
+ &lt;p&gt;You said, &amp;#8220;Let&amp;#8217;s run into the woods like kids!&amp;#8221; &lt;br /&gt;You said, &amp;#8220;Let&amp;#8217;s rub our hands together super-hot!&amp;#8221; &lt;br /&gt;And we scalded the trees and left octagons, I think that was you and&lt;br /&gt;You threw parties on the roof!&lt;/p&gt;
18
+ &lt;/blockquote&gt;</description></item></channel>
19
+ </rss>
@@ -0,0 +1,7 @@
1
+ module TestFiles
2
+ Dir.chdir(File.dirname(__FILE__)) do
3
+ Dir['files/*.{html,xhtml,xml}'].each do |fname|
4
+ const_set fname[%r!/(\w+)\.\w+$!, 1].upcase, IO.read(fname)
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,64 @@
1
+ #!/usr/bin/env ruby
2
+ require 'rubygems'
3
+ require 'open-uri'
4
+ require 'hpricot'
5
+ require 'nokogiri'
6
+ require 'benchmark'
7
+
8
+ content = File.read("test/files/boingboing.html")
9
+
10
+ N = 100
11
+
12
+ unless Gem.loaded_specs['hpricot'].version > Gem::Version.new('0.6.161')
13
+ abort "** Use higher than Hpricot 0.6.161!"
14
+ end
15
+
16
+ puts "Hpricot #{Gem.loaded_specs['hpricot'].version} vs. Nokogiri #{Gem.loaded_specs['nokogiri'].version}"
17
+ hdoc = Hpricot(content)
18
+ ndoc = Nokogiri.Hpricot(content)
19
+
20
+ Benchmark.bm do |x|
21
+ x.report('hpricot:doc') do
22
+ N.times do
23
+ Hpricot(content)
24
+ end
25
+ end
26
+
27
+ x.report('nokogiri:doc') do
28
+ N.times do
29
+ Nokogiri.Hpricot(content)
30
+ end
31
+ end
32
+ end
33
+
34
+ Benchmark.bm do |x|
35
+ x.report('hpricot:xpath') do
36
+ N.times do
37
+ info = hdoc.search("//a[@name='027906']").first.inner_text
38
+ url = hdoc.search("h3[text()='College kids reportedly taking more smart drugs']").first.inner_text
39
+ end
40
+ end
41
+
42
+ x.report('nokogiri:xpath') do
43
+ N.times do
44
+ info = ndoc.search("//a[@name='027906']").first.inner_text
45
+ url = ndoc.search("h3[text()='College kids reportedly taking more smart drugs']").first.inner_text
46
+ end
47
+ end
48
+ end
49
+
50
+ Benchmark.bm do |x|
51
+ x.report('hpricot:css') do
52
+ N.times do
53
+ info = hdoc.search('form input[@checked]').first
54
+ url = hdoc.search('td spacer').first.inner_text
55
+ end
56
+ end
57
+
58
+ x.report('nokogiri:css') do
59
+ N.times do
60
+ info = ndoc.search('form input[@checked]').first
61
+ url = ndoc.search('td spacer').first.inner_text
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,77 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'test/unit'
4
+ require 'hpricot'
5
+ require 'load_files'
6
+
7
+ class TestAlter < Test::Unit::TestCase
8
+ def setup
9
+ @basic = Hpricot.parse(TestFiles::BASIC)
10
+ end
11
+
12
+ def test_before
13
+ test0 = "<link rel='stylesheet' href='test0.css' />"
14
+ @basic.at("link").before(test0)
15
+ assert_equal 'test0.css', @basic.at("link").attributes['href']
16
+ end
17
+
18
+ def test_after
19
+ test_inf = "<link rel='stylesheet' href='test_inf.css' />"
20
+ @basic.search("link")[-1].after(test_inf)
21
+ assert_equal 'test_inf.css', @basic.search("link")[-1].attributes['href']
22
+ end
23
+
24
+ def test_wrap
25
+ ohmy = (@basic/"p.ohmy").wrap("<div id='wrapper'></div>")
26
+ assert_equal 'wrapper', ohmy[0].parent['id']
27
+ assert_equal 'ohmy', Hpricot(@basic.to_html).at("#wrapper").children[0]['class']
28
+ end
29
+
30
+ def test_add_class
31
+ first_p = (@basic/"p:first").add_class("testing123")
32
+ assert first_p[0].get_attribute("class").split(" ").include?("testing123")
33
+ assert (Hpricot(@basic.to_html)/"p:first")[0].attributes["class"].split(" ").include?("testing123")
34
+ assert !(Hpricot(@basic.to_html)/"p:gt(0)")[0].attributes["class"].split(" ").include?("testing123")
35
+ end
36
+
37
+ def test_change_attributes
38
+ all_ps = (@basic/"p").attr("title", "Some Title")
39
+ all_as = (@basic/"a").attr("href", "http://my_new_href.com")
40
+ all_lb = (@basic/"link").attr("href") { |e| e.name }
41
+ assert_changed(@basic, "p", all_ps) {|p| p.attributes["title"] == "Some Title"}
42
+ assert_changed(@basic, "a", all_as) {|a| a.attributes["href"] == "http://my_new_href.com"}
43
+ assert_changed(@basic, "link", all_lb) {|a| a.attributes["href"] == "link" }
44
+ end
45
+
46
+ def test_remove_attr
47
+ all_rl = (@basic/"link").remove_attr("href")
48
+ assert_changed(@basic, "link", all_rl) { |link| link['href'].nil? }
49
+ end
50
+
51
+ def test_remove_class
52
+ all_c1 = (@basic/"p[@class*='last']").remove_class("last")
53
+ assert_changed(@basic, "p[@class*='last']", all_c1) { |p| p['class'] == 'final' }
54
+ end
55
+
56
+ def test_remove_all_classes
57
+ all_c2 = (@basic/"p[@class]").remove_class
58
+ assert_changed(@basic, "p[@class]", all_c2) { |p| p['class'].nil? }
59
+ end
60
+
61
+ def test_xml_casing
62
+ doc = Hpricot.XML("<root><wildCat>text</wildCat></root>")
63
+ (doc/:root/:wildCat).after("<beanPole>gravity</beanPole>")
64
+ assert_equal doc.to_s, "<root><wildCat>text</wildCat><beanPole>gravity</beanPole></root>"
65
+
66
+ frag = Hpricot.XML do
67
+ b { i "A bit of HTML" }
68
+ end
69
+ (frag/:b).after("<beanPole>gravity</beanPole>")
70
+ assert_equal frag.to_s, "<b><i>A bit of HTML</i></b><beanPole>gravity</beanPole>"
71
+ end
72
+
73
+ def assert_changed original, selector, set, &block
74
+ assert set.all?(&block)
75
+ assert Hpricot(original.to_html).search(selector).all?(&block)
76
+ end
77
+ end
@@ -0,0 +1,37 @@
1
+ # -*- coding: utf-8 -*-
2
+ #!/usr/bin/env ruby
3
+
4
+ require 'test/unit'
5
+ require 'hpricot'
6
+
7
+ class TestBuilder < Test::Unit::TestCase
8
+ def test_escaping_text
9
+ doc = Hpricot() { b "<a\"b>" }
10
+ assert_equal "<b>&lt;a&quot;b&gt;</b>", doc.to_html
11
+ assert_equal %{<a"b>}, doc.at("text()").to_s
12
+ end
13
+
14
+ def test_no_escaping_text
15
+ doc = Hpricot() { div.test.me! { text "<a\"b>" } }
16
+ assert_equal %{<div class="test" id="me"><a"b></div>}, doc.to_html
17
+ assert_equal %{<a"b>}, doc.at("text()").to_s
18
+ end
19
+
20
+ def test_latin1_entities
21
+ doc = Hpricot() { b "ۥ" }
22
+ assert_equal "<b>&#8364;&#8226;</b>", doc.to_html
23
+ assert_equal "ۥ", doc.at("text()").to_s
24
+ end
25
+
26
+ def test_escaping_attrs
27
+ text = "<span style='font-family:\"MS Mincho\"'>Some text</span>"
28
+ assert_equal "<span style=\"font-family:\\\"MS Mincho\\\"\">Some text</span>",
29
+ Hpricot(text).to_html
30
+ end
31
+
32
+ def test_korean_utf8_entities
33
+ a = '한글'
34
+ doc = Hpricot() { b a }
35
+ assert_equal "<b>&#54620;&#44544;</b>", doc.to_html
36
+ end
37
+ end
@@ -0,0 +1,409 @@
1
+ # -*- coding: utf-8 -*-
2
+ #!/usr/bin/env ruby
3
+
4
+ require 'test/unit'
5
+ require 'hpricot'
6
+ require 'load_files'
7
+
8
+ class TestParser < Test::Unit::TestCase
9
+ def test_set_attr
10
+ @basic = Hpricot.parse(TestFiles::BASIC)
11
+ @basic.search('//p').set('class', 'para')
12
+ assert_equal 4, @basic.search('//p').length
13
+ assert_equal 4, @basic.search('//p').find_all { |x| x['class'] == 'para' }.length
14
+ end
15
+
16
+ # Test creating a new element
17
+ def test_new_element
18
+ elem = Hpricot::Elem.new('form')
19
+ assert_not_nil(elem)
20
+ assert_not_nil(elem.attributes)
21
+ end
22
+
23
+ def test_scan_text
24
+ assert_equal 'FOO', Hpricot.make("FOO").children.first.content
25
+ end
26
+
27
+ def test_filter_by_attr
28
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
29
+
30
+ # this link is escaped in the doc
31
+ link = 'http://www.youtube.com/watch?v=TvSNXyNw26g&search=chris%20ware'
32
+ assert_equal link, @boingboing.at("a[@href='#{link}']")['href']
33
+ end
34
+
35
+ def test_filter_contains
36
+ @basic = Hpricot.parse(TestFiles::BASIC)
37
+ assert_equal '<title>Sample XHTML</title>', @basic.search("title:contains('Sample')").to_s
38
+ end
39
+
40
+ def test_get_element_by_id
41
+ @basic = Hpricot.parse(TestFiles::BASIC)
42
+ assert_equal 'link1', @basic.get_element_by_id('link1')['id']
43
+ assert_equal 'link1', @basic.get_element_by_id('body1').get_element_by_id('link1').get_attribute('id')
44
+ end
45
+
46
+ def test_get_element_by_tag_name
47
+ @basic = Hpricot.parse(TestFiles::BASIC)
48
+ assert_equal 'link1', @basic.get_elements_by_tag_name('a')[0].get_attribute('id')
49
+ assert_equal 'link1', @basic.get_elements_by_tag_name('body')[0].get_element_by_id('link1').get_attribute('id')
50
+ end
51
+
52
+ def test_get_elements_by_tag_name_star
53
+ simple = Hpricot.parse("<div><p id='first'>First</p><p id='second'>Second</p></div>")
54
+ assert_equal 3, simple.get_elements_by_tag_name("*").size
55
+ assert_equal 1, simple.get_elements_by_tag_name("div").size
56
+ assert_equal 2, simple.get_elements_by_tag_name("p").size
57
+ end
58
+
59
+ def test_output_basic
60
+ @basic = Hpricot.parse(TestFiles::BASIC)
61
+ @basic2 = Hpricot.parse(@basic.inner_html)
62
+ scan_basic @basic2
63
+ end
64
+
65
+ def test_scan_basic
66
+ @basic = Hpricot.parse(TestFiles::BASIC)
67
+ scan_basic @basic
68
+ end
69
+
70
+ def scan_basic doc
71
+ assert_kind_of Hpricot::XMLDecl, doc.children.first
72
+ assert_not_equal doc.children.first.to_s, doc.children[1].to_s
73
+ assert_equal 'link1', doc.at('#link1')['id']
74
+ assert_equal 'link1', doc.at("p a")['id']
75
+ assert_equal 'link1', (doc/:p/:a).first['id']
76
+ assert_equal 'link1', doc.search('p').at('a').get_attribute('id')
77
+ assert_equal 'link2', (doc/'p').filter('.ohmy').search('a').first.get_attribute('id')
78
+ assert_equal (doc/'p')[2], (doc/'p').filter(':nth(2)')[0]
79
+ assert_equal (doc/'p')[2], (doc/'p').filter('[3]')[0]
80
+ assert_equal 4, (doc/'p').filter('*').length
81
+ assert_equal 4, (doc/'p').filter('* *').length
82
+ eles = (doc/'p').filter('.ohmy')
83
+ assert_equal 1, eles.length
84
+ assert_equal 'ohmy', eles.first.get_attribute('class')
85
+ assert_equal 3, (doc/'p:not(.ohmy)').length
86
+ assert_equal 3, (doc/'p').not('.ohmy').length
87
+ assert_equal 3, (doc/'p').not(eles.first).length
88
+ assert_equal 2, (doc/'p').filter('[@class]').length
89
+ assert_equal 'last final', (doc/'p[@class~="final"]').first.get_attribute('class')
90
+ assert_equal 1, (doc/'p').filter('[@class~="final"]').length
91
+ assert_equal 2, (doc/'p > a').length
92
+ assert_equal 1, (doc/'p.ohmy > a').length
93
+ assert_equal 2, (doc/'p / a').length
94
+ assert_equal 2, (doc/'link ~ link').length
95
+ assert_equal 3, (doc/'title ~ link').length
96
+ assert_equal 5, (doc/"//p/text()").length
97
+ assert_equal 6, (doc/"//p[a]//text()").length
98
+ assert_equal 2, (doc/"//p/a/text()").length
99
+ end
100
+
101
+ def test_positional
102
+ h = Hpricot( "<div><br/><p>one</p><p>two</p></div>" )
103
+ assert_equal "<p>one</p>", h.search("//div/p:eq(0)").to_s
104
+ assert_equal "<p>one</p>", h.search("//div/p:first").to_s
105
+ assert_equal "<p>one</p>", h.search("//div/p:first()").to_s
106
+ end
107
+
108
+ def test_pace
109
+ doc = Hpricot(TestFiles::PACE_APPLICATION)
110
+ assert_equal 'get', doc.at('form[@name=frmSect11]')['method']
111
+ # assert_equal '2', doc.at('#hdnSpouse')['value']
112
+ end
113
+
114
+ def test_scan_boingboing
115
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
116
+ assert_equal 60, (@boingboing/'p.posted').length
117
+ assert_equal 1, @boingboing.search("//a[@name='027906']").length
118
+ assert_equal 10, @boingboing.search("script comment()").length
119
+ assert_equal 3, @boingboing.search("a[text()*='Boing']").length
120
+ assert_equal 1, @boingboing.search("h3[text()='College kids reportedly taking more smart drugs']").length
121
+ assert_equal 0, @boingboing.search("h3[text()='College']").length
122
+ assert_equal 60, @boingboing.search("h3").length
123
+ assert_equal 59, @boingboing.search("h3[text()!='College kids reportedly taking more smart drugs']").length
124
+ assert_equal 17, @boingboing.search("h3[text()$='s']").length
125
+ assert_equal 116, @boingboing.search("p[text()]").length
126
+ assert_equal 211, @boingboing.search("p").length
127
+ end
128
+
129
+ def test_reparent
130
+ doc = Hpricot(%{<div id="blurb_1"></div>})
131
+ div1 = doc.search('#blurb_1')
132
+ div1.before('<div id="blurb_0"></div>')
133
+
134
+ div0 = doc.search('#blurb_0')
135
+ div0.before('<div id="blurb_a"></div>')
136
+
137
+ assert_equal 'div', doc.at('#blurb_1').name
138
+ end
139
+
140
+ def test_siblings
141
+ @basic = Hpricot.parse(TestFiles::BASIC)
142
+ t = @basic.at(:title)
143
+ e = t.next_sibling
144
+ assert_equal 'test1.css', e['href']
145
+ assert_equal 'title', e.previous_sibling.name
146
+ end
147
+
148
+ def test_css_negation
149
+ @basic = Hpricot.parse(TestFiles::BASIC)
150
+ assert_equal 3, (@basic/'p:not(.final)').length
151
+ end
152
+
153
+ def test_remove_attribute
154
+ @basic = Hpricot.parse(TestFiles::BASIC)
155
+ (@basic/:p).each { |ele| ele.remove_attribute('class') }
156
+ assert_equal 0, (@basic/'p[@class]').length
157
+ end
158
+
159
+ def test_abs_xpath
160
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
161
+ assert_equal 60, @boingboing.search("/html/body//p[@class='posted']").length
162
+ assert_equal 60, @boingboing.search("/*/body//p[@class='posted']").length
163
+ assert_equal 18, @boingboing.search("//script").length
164
+ divs = @boingboing.search("//script/../div")
165
+ assert_equal 2, divs.length
166
+ imgs = @boingboing.search('//div/p/a/img')
167
+ assert_equal 16, imgs.length
168
+ assert_equal 16, @boingboing.search('//div').search('p/a/img').length
169
+ assert imgs.all? { |x| x.name == 'img' }
170
+ end
171
+
172
+ def test_predicates
173
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
174
+ assert_equal 2, @boingboing.search('//link[@rel="alternate"]').length
175
+ p_imgs = @boingboing.search('//div/p[/a/img]')
176
+ assert_equal 16, p_imgs.length
177
+ assert p_imgs.all? { |x| x.name == 'p' }
178
+ p_imgs = @boingboing.search('//div/p[a/img]')
179
+ assert_equal 16, p_imgs.length
180
+ assert p_imgs.all? { |x| x.name == 'p' }
181
+ assert_equal 1, @boingboing.search('//input[@checked]').length
182
+ end
183
+
184
+ def test_tag_case
185
+ @tenderlove = Hpricot.parse(TestFiles::TENDERLOVE)
186
+ assert_equal 2, @tenderlove.search('//a').length
187
+ assert_equal 3, @tenderlove.search('//area').length
188
+ assert_equal 2, @tenderlove.search('//meta').length
189
+ end
190
+
191
+ def test_alt_predicates
192
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
193
+ assert_equal 1, @boingboing.search('//table/tr:last').length
194
+
195
+ @basic = Hpricot.parse(TestFiles::BASIC)
196
+ assert_equal "<p>The third paragraph</p>",
197
+ @basic.search('p:eq(2)').to_html
198
+ assert_equal '<p class="last final"><b>THE FINAL PARAGRAPH</b></p>',
199
+ @basic.search('p:last').to_html
200
+ assert_equal 'last final', @basic.search('//p:last-of-type').first.get_attribute('class')
201
+ end
202
+
203
+ def test_insert_after # ticket #63
204
+ doc = Hpricot('<html><body><div id="a-div"></div></body></html>')
205
+ (doc/'div').each do |element|
206
+ element.after('<p>Paragraph 1</p><p>Paragraph 2</p>')
207
+ end
208
+ assert_equal doc.to_html, '<html><body><div id="a-div"></div><p>Paragraph 1</p><p>Paragraph 2</p></body></html>'
209
+ end
210
+
211
+ def test_insert_before # ticket #61
212
+ doc = Hpricot('<html><body><div id="a-div"></div></body></html>')
213
+ (doc/'div').each do |element|
214
+ element.before('<p>Paragraph 1</p><p>Paragraph 2</p>')
215
+ end
216
+ assert_equal doc.to_html, '<html><body><p>Paragraph 1</p><p>Paragraph 2</p><div id="a-div"></div></body></html>'
217
+ end
218
+
219
+ def test_many_paths
220
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
221
+ assert_equal 62, @boingboing.search('p.posted, link[@rel="alternate"]').length
222
+ assert_equal 18, @boingboing.search('//div/p[a/img]|//link[@rel="alternate"]').length
223
+ end
224
+
225
+ def test_stacked_search
226
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
227
+ assert_kind_of Hpricot::Elements, @boingboing.search('//div/p').search('a img')
228
+ end
229
+
230
+ def test_class_search
231
+ # test case sent by Chih-Chao Lam
232
+ doc = Hpricot("<div class=xyz'>abc</div>")
233
+ assert_equal 1, doc.search(".xyz").length
234
+ doc = Hpricot("<div class=xyz>abc</div><div class=abc>xyz</div>")
235
+ assert_equal 1, doc.search(".xyz").length
236
+ assert_equal 4, doc.search("*").length
237
+ end
238
+
239
+ def test_kleene_star
240
+ # bug noticed by raja bhatia
241
+ doc = Hpricot("<span class='small'>1</span><div class='large'>2</div><div class='small'>3</div><span class='blue large'>4</span>")
242
+ assert_equal 2, doc.search("*[@class*='small']").length
243
+ assert_equal 2, doc.search("*.small").length
244
+ assert_equal 2, doc.search(".small").length
245
+ assert_equal 2, doc.search(".large").length
246
+ end
247
+
248
+ def test_empty_comment
249
+ doc = Hpricot("<p><!----></p>")
250
+ assert doc.children[0].children[0].comment?
251
+ doc = Hpricot("<p><!-- --></p>")
252
+ assert doc.children[0].children[0].comment?
253
+ end
254
+
255
+ def test_body_newlines
256
+ @immob = Hpricot.parse(TestFiles::IMMOB)
257
+ body = @immob.at(:body)
258
+ {'background' => '', 'bgcolor' => '#ffffff', 'text' => '#000000', 'marginheight' => '10',
259
+ 'marginwidth' => '10', 'leftmargin' => '10', 'topmargin' => '10', 'link' => '#000066',
260
+ 'alink' => '#ff6600', 'hlink' => "#ff6600", 'vlink' => "#000000"}.each do |k, v|
261
+ assert_equal v, body[k]
262
+ end
263
+ end
264
+
265
+ def test_nested_twins
266
+ @doc = Hpricot("<div>Hi<div>there</div></div>")
267
+ assert_equal 1, (@doc/"div div").length
268
+ end
269
+
270
+ def test_wildcard
271
+ @basic = Hpricot.parse(TestFiles::BASIC)
272
+ assert_equal 3, (@basic/"*[@id]").length
273
+ assert_equal 3, (@basic/"//*[@id]").length
274
+ end
275
+
276
+ def test_javascripts
277
+ @immob = Hpricot.parse(TestFiles::IMMOB)
278
+ assert_equal 3, (@immob/:script)[0].inner_html.scan(/<LINK/).length
279
+ end
280
+
281
+ def test_nested_scripts
282
+ @week9 = Hpricot.parse(TestFiles::WEEK9)
283
+ assert_equal 14, (@week9/"a").find_all { |x| x.inner_html.include? "GameCenter" }.length
284
+ end
285
+
286
+ def test_uswebgen
287
+ @uswebgen = Hpricot.parse(TestFiles::USWEBGEN)
288
+ # sent by brent beardsley, hpricot 0.3 had problems with all the links.
289
+ assert_equal 67, (@uswebgen/:a).length
290
+ end
291
+
292
+ def test_mangled_tags
293
+ [%{<html><form name='loginForm' method='post' action='/units/a/login/1,13088,779-1,00.html'?URL=></form></html>},
294
+ %{<html><form name='loginForm' ?URL= method='post' action='/units/a/login/1,13088,779-1,00.html'></form></html>},
295
+ %{<html><form name='loginForm'?URL= ?URL= method='post' action='/units/a/login/1,13088,779-1,00.html'?URL=></form></html>},
296
+ %{<html><form name='loginForm' method='post' action='/units/a/login/1,13088,779-1,00.html' ?URL=></form></html>}].
297
+ each do |str|
298
+ doc = Hpricot(str)
299
+ assert_equal 1, (doc/:form).length
300
+ assert_equal '/units/a/login/1,13088,779-1,00.html', doc.at("form")['action']
301
+ end
302
+ end
303
+
304
+ def test_procins
305
+ doc = Hpricot("<?php print('hello') ?>\n<?xml blah='blah'?>")
306
+ assert_equal "php", doc.children[0].target
307
+ assert_equal "blah='blah'", doc.children[2].content
308
+ end
309
+
310
+ def test_no_buffer_error
311
+ Hpricot(%{<p>\n\n<input type="hidden" name="__VIEWSTATE" value="#{(("X" * 2000) + "\n") * 44}" />\n\n</p>})
312
+ end
313
+
314
+ def test_youtube_attr
315
+ str = <<-edoc
316
+ <html><body>
317
+ Lorem ipsum. Jolly roger, ding-dong sing-a-long
318
+ <object width="425" height="350">
319
+ <param name="movie" value="http://www.youtube.com/v/NbDQ4M_cuwA"></param>
320
+ <param name="wmode" value="transparent"></param>
321
+ <embed src="http://www.youtube.com/v/NbDQ4M_cuwA"
322
+ type="application/x-shockwave-flash" wmode="transparent" width="425" height="350">
323
+ </embed>
324
+ </object>
325
+ Check out my posting, I have bright mice in large clown cars.
326
+ <object width="425" height="350">
327
+ <param name="movie" value="http://www.youtube.com/v/foobar"></param>
328
+ <param name="wmode" value="transparent"></param>
329
+ <embed src="http://www.youtube.com/v/foobar"
330
+ type="application/x-shockwave-flash" wmode="transparent" width="425" height="350">
331
+ </embed>
332
+ </object>
333
+ </body></html?
334
+ edoc
335
+ doc = Hpricot(str)
336
+ assert_equal "http://www.youtube.com/v/NbDQ4M_cuwA",
337
+ doc.at("//object/param[@value='http://www.youtube.com/v/NbDQ4M_cuwA']")['value']
338
+ end
339
+
340
+ # ticket #84 by jamezilla
341
+ def test_screwed_xmlns
342
+ doc = Hpricot(<<-edoc)
343
+ <?xml:namespace prefix = cwi />
344
+ <html><body>HAI</body></html>
345
+ edoc
346
+ assert_equal "HAI", doc.at("body").inner_text
347
+ end
348
+
349
+ # Reported by Jonathan Nichols on the Hpricot list (24 May 2007)
350
+ def test_self_closed_form
351
+ doc = Hpricot(<<-edoc)
352
+ <body>
353
+ <form action="/loginRegForm" name="regForm" method="POST" />
354
+ <input type="button">
355
+ </form>
356
+ </body>
357
+ edoc
358
+ assert_equal "button", doc.at("//form/input")['type']
359
+ end
360
+
361
+ def test_filters
362
+ @basic = Hpricot.parse(TestFiles::BASIC)
363
+ assert_equal 0, (@basic/"title:parent").size
364
+ assert_equal 3, (@basic/"p:parent").size
365
+ assert_equal 1, (@basic/"title:empty").size
366
+ assert_equal 1, (@basic/"p:empty").size
367
+ end
368
+
369
+ def test_keep_cdata
370
+ str = %{<script> /*<![CDATA[*/
371
+ /*]]>*/ </script>}
372
+ assert_equal str, Hpricot(str).to_html
373
+ end
374
+
375
+ def test_namespace
376
+ chunk = <<-END
377
+ <a xmlns:t="http://www.nexopia.com/dev/template">
378
+ <t:sam>hi </t:sam>
379
+ </a>
380
+ END
381
+ doc = Hpricot::XML(chunk)
382
+ assert (doc/"//t:sam").size > 0 # at least this should probably work
383
+ # assert (doc/"//sam").size > 0 # this would be nice
384
+ end
385
+
386
+ def test_uxs_ignores_non_entities
387
+ assert_equal 'abc', Hpricot.uxs('abc')
388
+ end
389
+
390
+ def test_uxs_handles_gt_lt_amp_quot
391
+ assert_equal '"&<>', Hpricot.uxs('&quot;&amp;&lt;&gt;')
392
+ end
393
+
394
+ def test_uxs_handles_numeric_values
395
+ if String.method_defined? :encoding
396
+ assert_equal "é", Hpricot.uxs('&#233;')
397
+ else
398
+ assert_equal "\303\251", Hpricot.uxs('&#233;')
399
+ end
400
+ end
401
+
402
+ def test_uxs_handles_entities
403
+ if String.method_defined? :encoding
404
+ assert_equal "é", Hpricot.uxs('&eacute;')
405
+ else
406
+ assert_equal "\303\251", Hpricot.uxs('&eacute;')
407
+ end
408
+ end
409
+ end