hpricot 0.7-x86-mswin32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. data/CHANGELOG +68 -0
  2. data/COPYING +18 -0
  3. data/README +284 -0
  4. data/Rakefile +260 -0
  5. data/ext/fast_xs/FastXsService.java +1018 -0
  6. data/ext/fast_xs/extconf.rb +4 -0
  7. data/ext/fast_xs/fast_xs.c +200 -0
  8. data/ext/hpricot_scan/HpricotScanService.java +1305 -0
  9. data/ext/hpricot_scan/extconf.rb +6 -0
  10. data/ext/hpricot_scan/hpricot_common.rl +76 -0
  11. data/ext/hpricot_scan/hpricot_css.c +3502 -0
  12. data/ext/hpricot_scan/hpricot_css.rl +115 -0
  13. data/ext/hpricot_scan/hpricot_scan.c +6704 -0
  14. data/ext/hpricot_scan/hpricot_scan.h +79 -0
  15. data/ext/hpricot_scan/hpricot_scan.java.rl +373 -0
  16. data/ext/hpricot_scan/hpricot_scan.rl +722 -0
  17. data/ext/hpricot_scan/test.rb +4 -0
  18. data/extras/mingw-rbconfig.rb +176 -0
  19. data/lib/fast_xs.so +0 -0
  20. data/lib/hpricot.rb +26 -0
  21. data/lib/hpricot/blankslate.rb +63 -0
  22. data/lib/hpricot/builder.rb +216 -0
  23. data/lib/hpricot/elements.rb +510 -0
  24. data/lib/hpricot/htmlinfo.rb +691 -0
  25. data/lib/hpricot/inspect.rb +103 -0
  26. data/lib/hpricot/modules.rb +38 -0
  27. data/lib/hpricot/parse.rb +38 -0
  28. data/lib/hpricot/tag.rb +198 -0
  29. data/lib/hpricot/tags.rb +164 -0
  30. data/lib/hpricot/traverse.rb +838 -0
  31. data/lib/hpricot/xchar.rb +94 -0
  32. data/lib/hpricot_scan.so +0 -0
  33. data/test/files/basic.xhtml +17 -0
  34. data/test/files/boingboing.html +2266 -0
  35. data/test/files/cy0.html +3653 -0
  36. data/test/files/immob.html +400 -0
  37. data/test/files/pace_application.html +1320 -0
  38. data/test/files/tenderlove.html +16 -0
  39. data/test/files/uswebgen.html +220 -0
  40. data/test/files/utf8.html +1054 -0
  41. data/test/files/week9.html +1723 -0
  42. data/test/files/why.xml +19 -0
  43. data/test/load_files.rb +7 -0
  44. data/test/nokogiri-bench.rb +64 -0
  45. data/test/test_alter.rb +77 -0
  46. data/test/test_builder.rb +37 -0
  47. data/test/test_parser.rb +409 -0
  48. data/test/test_paths.rb +25 -0
  49. data/test/test_preserved.rb +70 -0
  50. data/test/test_xml.rb +28 -0
  51. metadata +111 -0
@@ -0,0 +1,19 @@
1
+ <?xml version='1.0'?><rss xmlns:admin='http://webns.net/mvcb/' version='2.0' xmlns:sy='http://purl.org/rss/1.0/modules/syndication/' xmlns:dc='http://purl.org/dc/elements/1.1/' xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'>
2
+ <channel>
3
+ <title>why the lucky stiff</title>
4
+ <link>http://whytheluckystiff.net</link>
5
+ <description>hex-editing reality to give us infinite grenades!!</description>
6
+ <dc:language>en-us</dc:language>
7
+ <dc:creator/>
8
+ <dc:date>2007-01-16T22:39:04+00:00</dc:date>
9
+ <admin:generatorAgent rdf:resource='http://hobix.com/?v=0.4'/>
10
+ <sy:updatePeriod>hourly</sy:updatePeriod>
11
+ <sy:updateFrequency>1</sy:updateFrequency>
12
+ <sy:updateBase>2000-01-01T12:00+00:00</sy:updateBase>
13
+ <item><title>1.3</title><link>http://whytheluckystiff.net/quatrains/1.3.html</link><guid isPermaLink='false'>quatrains/1.3@http://whytheluckystiff.net</guid><dc:subject>quatrains</dc:subject><dc:subject>quatrains</dc:subject><dc:creator>why the lucky stiff</dc:creator><dc:date>2007-01-14T08:47:05+00:00</dc:date><description>&lt;blockquote&gt;
14
+ &lt;p&gt;That cadillac of yours and that driver of yours!&lt;br /&gt;You and your teacups rattling away in the back seat!&lt;br /&gt;You always took the mike, oh, and all those cowboys you shot!&lt;br /&gt;I held your hand! And I&amp;#8217;ll shoot a cowboy one day!&lt;/p&gt;
15
+ &lt;/blockquote&gt;
16
+ &lt;blockquote&gt;
17
+ &lt;p&gt;You said, &amp;#8220;Let&amp;#8217;s run into the woods like kids!&amp;#8221; &lt;br /&gt;You said, &amp;#8220;Let&amp;#8217;s rub our hands together super-hot!&amp;#8221; &lt;br /&gt;And we scalded the trees and left octagons, I think that was you and&lt;br /&gt;You threw parties on the roof!&lt;/p&gt;
18
+ &lt;/blockquote&gt;</description></item></channel>
19
+ </rss>
@@ -0,0 +1,7 @@
1
+ module TestFiles
2
+ Dir.chdir(File.dirname(__FILE__)) do
3
+ Dir['files/*.{html,xhtml,xml}'].each do |fname|
4
+ const_set fname[%r!/(\w+)\.\w+$!, 1].upcase, IO.read(fname)
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,64 @@
1
+ #!/usr/bin/env ruby
2
+ require 'rubygems'
3
+ require 'open-uri'
4
+ require 'hpricot'
5
+ require 'nokogiri'
6
+ require 'benchmark'
7
+
8
+ content = File.read("test/files/boingboing.html")
9
+
10
+ N = 100
11
+
12
+ unless Gem.loaded_specs['hpricot'].version > Gem::Version.new('0.6.161')
13
+ abort "** Use higher than Hpricot 0.6.161!"
14
+ end
15
+
16
+ puts "Hpricot #{Gem.loaded_specs['hpricot'].version} vs. Nokogiri #{Gem.loaded_specs['nokogiri'].version}"
17
+ hdoc = Hpricot(content)
18
+ ndoc = Nokogiri.Hpricot(content)
19
+
20
+ Benchmark.bm do |x|
21
+ x.report('hpricot:doc') do
22
+ N.times do
23
+ Hpricot(content)
24
+ end
25
+ end
26
+
27
+ x.report('nokogiri:doc') do
28
+ N.times do
29
+ Nokogiri.Hpricot(content)
30
+ end
31
+ end
32
+ end
33
+
34
+ Benchmark.bm do |x|
35
+ x.report('hpricot:xpath') do
36
+ N.times do
37
+ info = hdoc.search("//a[@name='027906']").first.inner_text
38
+ url = hdoc.search("h3[text()='College kids reportedly taking more smart drugs']").first.inner_text
39
+ end
40
+ end
41
+
42
+ x.report('nokogiri:xpath') do
43
+ N.times do
44
+ info = ndoc.search("//a[@name='027906']").first.inner_text
45
+ url = ndoc.search("h3[text()='College kids reportedly taking more smart drugs']").first.inner_text
46
+ end
47
+ end
48
+ end
49
+
50
+ Benchmark.bm do |x|
51
+ x.report('hpricot:css') do
52
+ N.times do
53
+ info = hdoc.search('form input[@checked]').first
54
+ url = hdoc.search('td spacer').first.inner_text
55
+ end
56
+ end
57
+
58
+ x.report('nokogiri:css') do
59
+ N.times do
60
+ info = ndoc.search('form input[@checked]').first
61
+ url = ndoc.search('td spacer').first.inner_text
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,77 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'test/unit'
4
+ require 'hpricot'
5
+ require 'load_files'
6
+
7
+ class TestAlter < Test::Unit::TestCase
8
+ def setup
9
+ @basic = Hpricot.parse(TestFiles::BASIC)
10
+ end
11
+
12
+ def test_before
13
+ test0 = "<link rel='stylesheet' href='test0.css' />"
14
+ @basic.at("link").before(test0)
15
+ assert_equal 'test0.css', @basic.at("link").attributes['href']
16
+ end
17
+
18
+ def test_after
19
+ test_inf = "<link rel='stylesheet' href='test_inf.css' />"
20
+ @basic.search("link")[-1].after(test_inf)
21
+ assert_equal 'test_inf.css', @basic.search("link")[-1].attributes['href']
22
+ end
23
+
24
+ def test_wrap
25
+ ohmy = (@basic/"p.ohmy").wrap("<div id='wrapper'></div>")
26
+ assert_equal 'wrapper', ohmy[0].parent['id']
27
+ assert_equal 'ohmy', Hpricot(@basic.to_html).at("#wrapper").children[0]['class']
28
+ end
29
+
30
+ def test_add_class
31
+ first_p = (@basic/"p:first").add_class("testing123")
32
+ assert first_p[0].get_attribute("class").split(" ").include?("testing123")
33
+ assert (Hpricot(@basic.to_html)/"p:first")[0].attributes["class"].split(" ").include?("testing123")
34
+ assert !(Hpricot(@basic.to_html)/"p:gt(0)")[0].attributes["class"].split(" ").include?("testing123")
35
+ end
36
+
37
+ def test_change_attributes
38
+ all_ps = (@basic/"p").attr("title", "Some Title")
39
+ all_as = (@basic/"a").attr("href", "http://my_new_href.com")
40
+ all_lb = (@basic/"link").attr("href") { |e| e.name }
41
+ assert_changed(@basic, "p", all_ps) {|p| p.attributes["title"] == "Some Title"}
42
+ assert_changed(@basic, "a", all_as) {|a| a.attributes["href"] == "http://my_new_href.com"}
43
+ assert_changed(@basic, "link", all_lb) {|a| a.attributes["href"] == "link" }
44
+ end
45
+
46
+ def test_remove_attr
47
+ all_rl = (@basic/"link").remove_attr("href")
48
+ assert_changed(@basic, "link", all_rl) { |link| link['href'].nil? }
49
+ end
50
+
51
+ def test_remove_class
52
+ all_c1 = (@basic/"p[@class*='last']").remove_class("last")
53
+ assert_changed(@basic, "p[@class*='last']", all_c1) { |p| p['class'] == 'final' }
54
+ end
55
+
56
+ def test_remove_all_classes
57
+ all_c2 = (@basic/"p[@class]").remove_class
58
+ assert_changed(@basic, "p[@class]", all_c2) { |p| p['class'].nil? }
59
+ end
60
+
61
+ def test_xml_casing
62
+ doc = Hpricot.XML("<root><wildCat>text</wildCat></root>")
63
+ (doc/:root/:wildCat).after("<beanPole>gravity</beanPole>")
64
+ assert_equal doc.to_s, "<root><wildCat>text</wildCat><beanPole>gravity</beanPole></root>"
65
+
66
+ frag = Hpricot.XML do
67
+ b { i "A bit of HTML" }
68
+ end
69
+ (frag/:b).after("<beanPole>gravity</beanPole>")
70
+ assert_equal frag.to_s, "<b><i>A bit of HTML</i></b><beanPole>gravity</beanPole>"
71
+ end
72
+
73
+ def assert_changed original, selector, set, &block
74
+ assert set.all?(&block)
75
+ assert Hpricot(original.to_html).search(selector).all?(&block)
76
+ end
77
+ end
@@ -0,0 +1,37 @@
1
+ # -*- coding: utf-8 -*-
2
+ #!/usr/bin/env ruby
3
+
4
+ require 'test/unit'
5
+ require 'hpricot'
6
+
7
+ class TestBuilder < Test::Unit::TestCase
8
+ def test_escaping_text
9
+ doc = Hpricot() { b "<a\"b>" }
10
+ assert_equal "<b>&lt;a&quot;b&gt;</b>", doc.to_html
11
+ assert_equal %{<a"b>}, doc.at("text()").to_s
12
+ end
13
+
14
+ def test_no_escaping_text
15
+ doc = Hpricot() { div.test.me! { text "<a\"b>" } }
16
+ assert_equal %{<div class="test" id="me"><a"b></div>}, doc.to_html
17
+ assert_equal %{<a"b>}, doc.at("text()").to_s
18
+ end
19
+
20
+ def test_latin1_entities
21
+ doc = Hpricot() { b "ۥ" }
22
+ assert_equal "<b>&#8364;&#8226;</b>", doc.to_html
23
+ assert_equal "ۥ", doc.at("text()").to_s
24
+ end
25
+
26
+ def test_escaping_attrs
27
+ text = "<span style='font-family:\"MS Mincho\"'>Some text</span>"
28
+ assert_equal "<span style=\"font-family:\\\"MS Mincho\\\"\">Some text</span>",
29
+ Hpricot(text).to_html
30
+ end
31
+
32
+ def test_korean_utf8_entities
33
+ a = '한글'
34
+ doc = Hpricot() { b a }
35
+ assert_equal "<b>&#54620;&#44544;</b>", doc.to_html
36
+ end
37
+ end
@@ -0,0 +1,409 @@
1
+ # -*- coding: utf-8 -*-
2
+ #!/usr/bin/env ruby
3
+
4
+ require 'test/unit'
5
+ require 'hpricot'
6
+ require 'load_files'
7
+
8
+ class TestParser < Test::Unit::TestCase
9
+ def test_set_attr
10
+ @basic = Hpricot.parse(TestFiles::BASIC)
11
+ @basic.search('//p').set('class', 'para')
12
+ assert_equal 4, @basic.search('//p').length
13
+ assert_equal 4, @basic.search('//p').find_all { |x| x['class'] == 'para' }.length
14
+ end
15
+
16
+ # Test creating a new element
17
+ def test_new_element
18
+ elem = Hpricot::Elem.new('form')
19
+ assert_not_nil(elem)
20
+ assert_not_nil(elem.attributes)
21
+ end
22
+
23
+ def test_scan_text
24
+ assert_equal 'FOO', Hpricot.make("FOO").children.first.content
25
+ end
26
+
27
+ def test_filter_by_attr
28
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
29
+
30
+ # this link is escaped in the doc
31
+ link = 'http://www.youtube.com/watch?v=TvSNXyNw26g&search=chris%20ware'
32
+ assert_equal link, @boingboing.at("a[@href='#{link}']")['href']
33
+ end
34
+
35
+ def test_filter_contains
36
+ @basic = Hpricot.parse(TestFiles::BASIC)
37
+ assert_equal '<title>Sample XHTML</title>', @basic.search("title:contains('Sample')").to_s
38
+ end
39
+
40
+ def test_get_element_by_id
41
+ @basic = Hpricot.parse(TestFiles::BASIC)
42
+ assert_equal 'link1', @basic.get_element_by_id('link1')['id']
43
+ assert_equal 'link1', @basic.get_element_by_id('body1').get_element_by_id('link1').get_attribute('id')
44
+ end
45
+
46
+ def test_get_element_by_tag_name
47
+ @basic = Hpricot.parse(TestFiles::BASIC)
48
+ assert_equal 'link1', @basic.get_elements_by_tag_name('a')[0].get_attribute('id')
49
+ assert_equal 'link1', @basic.get_elements_by_tag_name('body')[0].get_element_by_id('link1').get_attribute('id')
50
+ end
51
+
52
+ def test_get_elements_by_tag_name_star
53
+ simple = Hpricot.parse("<div><p id='first'>First</p><p id='second'>Second</p></div>")
54
+ assert_equal 3, simple.get_elements_by_tag_name("*").size
55
+ assert_equal 1, simple.get_elements_by_tag_name("div").size
56
+ assert_equal 2, simple.get_elements_by_tag_name("p").size
57
+ end
58
+
59
+ def test_output_basic
60
+ @basic = Hpricot.parse(TestFiles::BASIC)
61
+ @basic2 = Hpricot.parse(@basic.inner_html)
62
+ scan_basic @basic2
63
+ end
64
+
65
+ def test_scan_basic
66
+ @basic = Hpricot.parse(TestFiles::BASIC)
67
+ scan_basic @basic
68
+ end
69
+
70
+ def scan_basic doc
71
+ assert_kind_of Hpricot::XMLDecl, doc.children.first
72
+ assert_not_equal doc.children.first.to_s, doc.children[1].to_s
73
+ assert_equal 'link1', doc.at('#link1')['id']
74
+ assert_equal 'link1', doc.at("p a")['id']
75
+ assert_equal 'link1', (doc/:p/:a).first['id']
76
+ assert_equal 'link1', doc.search('p').at('a').get_attribute('id')
77
+ assert_equal 'link2', (doc/'p').filter('.ohmy').search('a').first.get_attribute('id')
78
+ assert_equal (doc/'p')[2], (doc/'p').filter(':nth(2)')[0]
79
+ assert_equal (doc/'p')[2], (doc/'p').filter('[3]')[0]
80
+ assert_equal 4, (doc/'p').filter('*').length
81
+ assert_equal 4, (doc/'p').filter('* *').length
82
+ eles = (doc/'p').filter('.ohmy')
83
+ assert_equal 1, eles.length
84
+ assert_equal 'ohmy', eles.first.get_attribute('class')
85
+ assert_equal 3, (doc/'p:not(.ohmy)').length
86
+ assert_equal 3, (doc/'p').not('.ohmy').length
87
+ assert_equal 3, (doc/'p').not(eles.first).length
88
+ assert_equal 2, (doc/'p').filter('[@class]').length
89
+ assert_equal 'last final', (doc/'p[@class~="final"]').first.get_attribute('class')
90
+ assert_equal 1, (doc/'p').filter('[@class~="final"]').length
91
+ assert_equal 2, (doc/'p > a').length
92
+ assert_equal 1, (doc/'p.ohmy > a').length
93
+ assert_equal 2, (doc/'p / a').length
94
+ assert_equal 2, (doc/'link ~ link').length
95
+ assert_equal 3, (doc/'title ~ link').length
96
+ assert_equal 5, (doc/"//p/text()").length
97
+ assert_equal 6, (doc/"//p[a]//text()").length
98
+ assert_equal 2, (doc/"//p/a/text()").length
99
+ end
100
+
101
+ def test_positional
102
+ h = Hpricot( "<div><br/><p>one</p><p>two</p></div>" )
103
+ assert_equal "<p>one</p>", h.search("//div/p:eq(0)").to_s
104
+ assert_equal "<p>one</p>", h.search("//div/p:first").to_s
105
+ assert_equal "<p>one</p>", h.search("//div/p:first()").to_s
106
+ end
107
+
108
+ def test_pace
109
+ doc = Hpricot(TestFiles::PACE_APPLICATION)
110
+ assert_equal 'get', doc.at('form[@name=frmSect11]')['method']
111
+ # assert_equal '2', doc.at('#hdnSpouse')['value']
112
+ end
113
+
114
+ def test_scan_boingboing
115
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
116
+ assert_equal 60, (@boingboing/'p.posted').length
117
+ assert_equal 1, @boingboing.search("//a[@name='027906']").length
118
+ assert_equal 10, @boingboing.search("script comment()").length
119
+ assert_equal 3, @boingboing.search("a[text()*='Boing']").length
120
+ assert_equal 1, @boingboing.search("h3[text()='College kids reportedly taking more smart drugs']").length
121
+ assert_equal 0, @boingboing.search("h3[text()='College']").length
122
+ assert_equal 60, @boingboing.search("h3").length
123
+ assert_equal 59, @boingboing.search("h3[text()!='College kids reportedly taking more smart drugs']").length
124
+ assert_equal 17, @boingboing.search("h3[text()$='s']").length
125
+ assert_equal 116, @boingboing.search("p[text()]").length
126
+ assert_equal 211, @boingboing.search("p").length
127
+ end
128
+
129
+ def test_reparent
130
+ doc = Hpricot(%{<div id="blurb_1"></div>})
131
+ div1 = doc.search('#blurb_1')
132
+ div1.before('<div id="blurb_0"></div>')
133
+
134
+ div0 = doc.search('#blurb_0')
135
+ div0.before('<div id="blurb_a"></div>')
136
+
137
+ assert_equal 'div', doc.at('#blurb_1').name
138
+ end
139
+
140
+ def test_siblings
141
+ @basic = Hpricot.parse(TestFiles::BASIC)
142
+ t = @basic.at(:title)
143
+ e = t.next_sibling
144
+ assert_equal 'test1.css', e['href']
145
+ assert_equal 'title', e.previous_sibling.name
146
+ end
147
+
148
+ def test_css_negation
149
+ @basic = Hpricot.parse(TestFiles::BASIC)
150
+ assert_equal 3, (@basic/'p:not(.final)').length
151
+ end
152
+
153
+ def test_remove_attribute
154
+ @basic = Hpricot.parse(TestFiles::BASIC)
155
+ (@basic/:p).each { |ele| ele.remove_attribute('class') }
156
+ assert_equal 0, (@basic/'p[@class]').length
157
+ end
158
+
159
+ def test_abs_xpath
160
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
161
+ assert_equal 60, @boingboing.search("/html/body//p[@class='posted']").length
162
+ assert_equal 60, @boingboing.search("/*/body//p[@class='posted']").length
163
+ assert_equal 18, @boingboing.search("//script").length
164
+ divs = @boingboing.search("//script/../div")
165
+ assert_equal 2, divs.length
166
+ imgs = @boingboing.search('//div/p/a/img')
167
+ assert_equal 16, imgs.length
168
+ assert_equal 16, @boingboing.search('//div').search('p/a/img').length
169
+ assert imgs.all? { |x| x.name == 'img' }
170
+ end
171
+
172
+ def test_predicates
173
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
174
+ assert_equal 2, @boingboing.search('//link[@rel="alternate"]').length
175
+ p_imgs = @boingboing.search('//div/p[/a/img]')
176
+ assert_equal 16, p_imgs.length
177
+ assert p_imgs.all? { |x| x.name == 'p' }
178
+ p_imgs = @boingboing.search('//div/p[a/img]')
179
+ assert_equal 16, p_imgs.length
180
+ assert p_imgs.all? { |x| x.name == 'p' }
181
+ assert_equal 1, @boingboing.search('//input[@checked]').length
182
+ end
183
+
184
+ def test_tag_case
185
+ @tenderlove = Hpricot.parse(TestFiles::TENDERLOVE)
186
+ assert_equal 2, @tenderlove.search('//a').length
187
+ assert_equal 3, @tenderlove.search('//area').length
188
+ assert_equal 2, @tenderlove.search('//meta').length
189
+ end
190
+
191
+ def test_alt_predicates
192
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
193
+ assert_equal 1, @boingboing.search('//table/tr:last').length
194
+
195
+ @basic = Hpricot.parse(TestFiles::BASIC)
196
+ assert_equal "<p>The third paragraph</p>",
197
+ @basic.search('p:eq(2)').to_html
198
+ assert_equal '<p class="last final"><b>THE FINAL PARAGRAPH</b></p>',
199
+ @basic.search('p:last').to_html
200
+ assert_equal 'last final', @basic.search('//p:last-of-type').first.get_attribute('class')
201
+ end
202
+
203
+ def test_insert_after # ticket #63
204
+ doc = Hpricot('<html><body><div id="a-div"></div></body></html>')
205
+ (doc/'div').each do |element|
206
+ element.after('<p>Paragraph 1</p><p>Paragraph 2</p>')
207
+ end
208
+ assert_equal doc.to_html, '<html><body><div id="a-div"></div><p>Paragraph 1</p><p>Paragraph 2</p></body></html>'
209
+ end
210
+
211
+ def test_insert_before # ticket #61
212
+ doc = Hpricot('<html><body><div id="a-div"></div></body></html>')
213
+ (doc/'div').each do |element|
214
+ element.before('<p>Paragraph 1</p><p>Paragraph 2</p>')
215
+ end
216
+ assert_equal doc.to_html, '<html><body><p>Paragraph 1</p><p>Paragraph 2</p><div id="a-div"></div></body></html>'
217
+ end
218
+
219
+ def test_many_paths
220
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
221
+ assert_equal 62, @boingboing.search('p.posted, link[@rel="alternate"]').length
222
+ assert_equal 18, @boingboing.search('//div/p[a/img]|//link[@rel="alternate"]').length
223
+ end
224
+
225
+ def test_stacked_search
226
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
227
+ assert_kind_of Hpricot::Elements, @boingboing.search('//div/p').search('a img')
228
+ end
229
+
230
+ def test_class_search
231
+ # test case sent by Chih-Chao Lam
232
+ doc = Hpricot("<div class=xyz'>abc</div>")
233
+ assert_equal 1, doc.search(".xyz").length
234
+ doc = Hpricot("<div class=xyz>abc</div><div class=abc>xyz</div>")
235
+ assert_equal 1, doc.search(".xyz").length
236
+ assert_equal 4, doc.search("*").length
237
+ end
238
+
239
+ def test_kleene_star
240
+ # bug noticed by raja bhatia
241
+ doc = Hpricot("<span class='small'>1</span><div class='large'>2</div><div class='small'>3</div><span class='blue large'>4</span>")
242
+ assert_equal 2, doc.search("*[@class*='small']").length
243
+ assert_equal 2, doc.search("*.small").length
244
+ assert_equal 2, doc.search(".small").length
245
+ assert_equal 2, doc.search(".large").length
246
+ end
247
+
248
+ def test_empty_comment
249
+ doc = Hpricot("<p><!----></p>")
250
+ assert doc.children[0].children[0].comment?
251
+ doc = Hpricot("<p><!-- --></p>")
252
+ assert doc.children[0].children[0].comment?
253
+ end
254
+
255
+ def test_body_newlines
256
+ @immob = Hpricot.parse(TestFiles::IMMOB)
257
+ body = @immob.at(:body)
258
+ {'background' => '', 'bgcolor' => '#ffffff', 'text' => '#000000', 'marginheight' => '10',
259
+ 'marginwidth' => '10', 'leftmargin' => '10', 'topmargin' => '10', 'link' => '#000066',
260
+ 'alink' => '#ff6600', 'hlink' => "#ff6600", 'vlink' => "#000000"}.each do |k, v|
261
+ assert_equal v, body[k]
262
+ end
263
+ end
264
+
265
+ def test_nested_twins
266
+ @doc = Hpricot("<div>Hi<div>there</div></div>")
267
+ assert_equal 1, (@doc/"div div").length
268
+ end
269
+
270
+ def test_wildcard
271
+ @basic = Hpricot.parse(TestFiles::BASIC)
272
+ assert_equal 3, (@basic/"*[@id]").length
273
+ assert_equal 3, (@basic/"//*[@id]").length
274
+ end
275
+
276
+ def test_javascripts
277
+ @immob = Hpricot.parse(TestFiles::IMMOB)
278
+ assert_equal 3, (@immob/:script)[0].inner_html.scan(/<LINK/).length
279
+ end
280
+
281
+ def test_nested_scripts
282
+ @week9 = Hpricot.parse(TestFiles::WEEK9)
283
+ assert_equal 14, (@week9/"a").find_all { |x| x.inner_html.include? "GameCenter" }.length
284
+ end
285
+
286
+ def test_uswebgen
287
+ @uswebgen = Hpricot.parse(TestFiles::USWEBGEN)
288
+ # sent by brent beardsley, hpricot 0.3 had problems with all the links.
289
+ assert_equal 67, (@uswebgen/:a).length
290
+ end
291
+
292
+ def test_mangled_tags
293
+ [%{<html><form name='loginForm' method='post' action='/units/a/login/1,13088,779-1,00.html'?URL=></form></html>},
294
+ %{<html><form name='loginForm' ?URL= method='post' action='/units/a/login/1,13088,779-1,00.html'></form></html>},
295
+ %{<html><form name='loginForm'?URL= ?URL= method='post' action='/units/a/login/1,13088,779-1,00.html'?URL=></form></html>},
296
+ %{<html><form name='loginForm' method='post' action='/units/a/login/1,13088,779-1,00.html' ?URL=></form></html>}].
297
+ each do |str|
298
+ doc = Hpricot(str)
299
+ assert_equal 1, (doc/:form).length
300
+ assert_equal '/units/a/login/1,13088,779-1,00.html', doc.at("form")['action']
301
+ end
302
+ end
303
+
304
+ def test_procins
305
+ doc = Hpricot("<?php print('hello') ?>\n<?xml blah='blah'?>")
306
+ assert_equal "php", doc.children[0].target
307
+ assert_equal "blah='blah'", doc.children[2].content
308
+ end
309
+
310
+ def test_no_buffer_error
311
+ Hpricot(%{<p>\n\n<input type="hidden" name="__VIEWSTATE" value="#{(("X" * 2000) + "\n") * 44}" />\n\n</p>})
312
+ end
313
+
314
+ def test_youtube_attr
315
+ str = <<-edoc
316
+ <html><body>
317
+ Lorem ipsum. Jolly roger, ding-dong sing-a-long
318
+ <object width="425" height="350">
319
+ <param name="movie" value="http://www.youtube.com/v/NbDQ4M_cuwA"></param>
320
+ <param name="wmode" value="transparent"></param>
321
+ <embed src="http://www.youtube.com/v/NbDQ4M_cuwA"
322
+ type="application/x-shockwave-flash" wmode="transparent" width="425" height="350">
323
+ </embed>
324
+ </object>
325
+ Check out my posting, I have bright mice in large clown cars.
326
+ <object width="425" height="350">
327
+ <param name="movie" value="http://www.youtube.com/v/foobar"></param>
328
+ <param name="wmode" value="transparent"></param>
329
+ <embed src="http://www.youtube.com/v/foobar"
330
+ type="application/x-shockwave-flash" wmode="transparent" width="425" height="350">
331
+ </embed>
332
+ </object>
333
+ </body></html?
334
+ edoc
335
+ doc = Hpricot(str)
336
+ assert_equal "http://www.youtube.com/v/NbDQ4M_cuwA",
337
+ doc.at("//object/param[@value='http://www.youtube.com/v/NbDQ4M_cuwA']")['value']
338
+ end
339
+
340
+ # ticket #84 by jamezilla
341
+ def test_screwed_xmlns
342
+ doc = Hpricot(<<-edoc)
343
+ <?xml:namespace prefix = cwi />
344
+ <html><body>HAI</body></html>
345
+ edoc
346
+ assert_equal "HAI", doc.at("body").inner_text
347
+ end
348
+
349
+ # Reported by Jonathan Nichols on the Hpricot list (24 May 2007)
350
+ def test_self_closed_form
351
+ doc = Hpricot(<<-edoc)
352
+ <body>
353
+ <form action="/loginRegForm" name="regForm" method="POST" />
354
+ <input type="button">
355
+ </form>
356
+ </body>
357
+ edoc
358
+ assert_equal "button", doc.at("//form/input")['type']
359
+ end
360
+
361
+ def test_filters
362
+ @basic = Hpricot.parse(TestFiles::BASIC)
363
+ assert_equal 0, (@basic/"title:parent").size
364
+ assert_equal 3, (@basic/"p:parent").size
365
+ assert_equal 1, (@basic/"title:empty").size
366
+ assert_equal 1, (@basic/"p:empty").size
367
+ end
368
+
369
+ def test_keep_cdata
370
+ str = %{<script> /*<![CDATA[*/
371
+ /*]]>*/ </script>}
372
+ assert_equal str, Hpricot(str).to_html
373
+ end
374
+
375
+ def test_namespace
376
+ chunk = <<-END
377
+ <a xmlns:t="http://www.nexopia.com/dev/template">
378
+ <t:sam>hi </t:sam>
379
+ </a>
380
+ END
381
+ doc = Hpricot::XML(chunk)
382
+ assert (doc/"//t:sam").size > 0 # at least this should probably work
383
+ # assert (doc/"//sam").size > 0 # this would be nice
384
+ end
385
+
386
+ def test_uxs_ignores_non_entities
387
+ assert_equal 'abc', Hpricot.uxs('abc')
388
+ end
389
+
390
+ def test_uxs_handles_gt_lt_amp_quot
391
+ assert_equal '"&<>', Hpricot.uxs('&quot;&amp;&lt;&gt;')
392
+ end
393
+
394
+ def test_uxs_handles_numeric_values
395
+ if String.method_defined? :encoding
396
+ assert_equal "é", Hpricot.uxs('&#233;')
397
+ else
398
+ assert_equal "\303\251", Hpricot.uxs('&#233;')
399
+ end
400
+ end
401
+
402
+ def test_uxs_handles_entities
403
+ if String.method_defined? :encoding
404
+ assert_equal "é", Hpricot.uxs('&eacute;')
405
+ else
406
+ assert_equal "\303\251", Hpricot.uxs('&eacute;')
407
+ end
408
+ end
409
+ end