hpricot 0.8.3-i386-mswin32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. data/CHANGELOG +104 -0
  2. data/COPYING +18 -0
  3. data/README.md +276 -0
  4. data/Rakefile +234 -0
  5. data/ext/fast_xs/FastXsService.java +1123 -0
  6. data/ext/fast_xs/extconf.rb +4 -0
  7. data/ext/fast_xs/fast_xs.c +210 -0
  8. data/ext/hpricot_scan/HpricotCss.java +850 -0
  9. data/ext/hpricot_scan/HpricotScanService.java +2099 -0
  10. data/ext/hpricot_scan/extconf.rb +9 -0
  11. data/ext/hpricot_scan/hpricot_common.rl +76 -0
  12. data/ext/hpricot_scan/hpricot_css.c +3511 -0
  13. data/ext/hpricot_scan/hpricot_css.java.rl +155 -0
  14. data/ext/hpricot_scan/hpricot_css.rl +120 -0
  15. data/ext/hpricot_scan/hpricot_scan.c +7039 -0
  16. data/ext/hpricot_scan/hpricot_scan.h +79 -0
  17. data/ext/hpricot_scan/hpricot_scan.java.rl +1161 -0
  18. data/ext/hpricot_scan/hpricot_scan.rl +896 -0
  19. data/extras/hpricot.png +0 -0
  20. data/lib/fast_xs.rb +1 -0
  21. data/lib/fast_xs/1.8/fast_xs.so +0 -0
  22. data/lib/fast_xs/1.9/fast_xs.so +0 -0
  23. data/lib/hpricot.rb +26 -0
  24. data/lib/hpricot/blankslate.rb +63 -0
  25. data/lib/hpricot/builder.rb +216 -0
  26. data/lib/hpricot/elements.rb +514 -0
  27. data/lib/hpricot/htmlinfo.rb +691 -0
  28. data/lib/hpricot/inspect.rb +103 -0
  29. data/lib/hpricot/modules.rb +40 -0
  30. data/lib/hpricot/parse.rb +38 -0
  31. data/lib/hpricot/tag.rb +219 -0
  32. data/lib/hpricot/tags.rb +164 -0
  33. data/lib/hpricot/traverse.rb +839 -0
  34. data/lib/hpricot/xchar.rb +94 -0
  35. data/lib/hpricot_scan.rb +1 -0
  36. data/lib/hpricot_scan/1.8/hpricot_scan.so +0 -0
  37. data/lib/hpricot_scan/1.9/hpricot_scan.so +0 -0
  38. data/test/files/basic.xhtml +17 -0
  39. data/test/files/boingboing.html +2266 -0
  40. data/test/files/cy0.html +3653 -0
  41. data/test/files/immob.html +400 -0
  42. data/test/files/pace_application.html +1320 -0
  43. data/test/files/tenderlove.html +16 -0
  44. data/test/files/uswebgen.html +220 -0
  45. data/test/files/utf8.html +1054 -0
  46. data/test/files/week9.html +1723 -0
  47. data/test/files/why.xml +19 -0
  48. data/test/load_files.rb +7 -0
  49. data/test/nokogiri-bench.rb +64 -0
  50. data/test/test_alter.rb +96 -0
  51. data/test/test_builder.rb +37 -0
  52. data/test/test_parser.rb +457 -0
  53. data/test/test_paths.rb +25 -0
  54. data/test/test_preserved.rb +88 -0
  55. data/test/test_xml.rb +28 -0
  56. metadata +128 -0
@@ -0,0 +1,19 @@
1
+ <?xml version='1.0'?><rss xmlns:admin='http://webns.net/mvcb/' version='2.0' xmlns:sy='http://purl.org/rss/1.0/modules/syndication/' xmlns:dc='http://purl.org/dc/elements/1.1/' xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'>
2
+ <channel>
3
+ <title>why the lucky stiff</title>
4
+ <link>http://whytheluckystiff.net</link>
5
+ <description>hex-editing reality to give us infinite grenades!!</description>
6
+ <dc:language>en-us</dc:language>
7
+ <dc:creator/>
8
+ <dc:date>2007-01-16T22:39:04+00:00</dc:date>
9
+ <admin:generatorAgent rdf:resource='http://hobix.com/?v=0.4'/>
10
+ <sy:updatePeriod>hourly</sy:updatePeriod>
11
+ <sy:updateFrequency>1</sy:updateFrequency>
12
+ <sy:updateBase>2000-01-01T12:00+00:00</sy:updateBase>
13
+ <item><title>1.3</title><link>http://whytheluckystiff.net/quatrains/1.3.html</link><guid isPermaLink='false'>quatrains/1.3@http://whytheluckystiff.net</guid><dc:subject>quatrains</dc:subject><dc:subject>quatrains</dc:subject><dc:creator>why the lucky stiff</dc:creator><dc:date>2007-01-14T08:47:05+00:00</dc:date><description>&lt;blockquote&gt;
14
+ &lt;p&gt;That cadillac of yours and that driver of yours!&lt;br /&gt;You and your teacups rattling away in the back seat!&lt;br /&gt;You always took the mike, oh, and all those cowboys you shot!&lt;br /&gt;I held your hand! And I&amp;#8217;ll shoot a cowboy one day!&lt;/p&gt;
15
+ &lt;/blockquote&gt;
16
+ &lt;blockquote&gt;
17
+ &lt;p&gt;You said, &amp;#8220;Let&amp;#8217;s run into the woods like kids!&amp;#8221; &lt;br /&gt;You said, &amp;#8220;Let&amp;#8217;s rub our hands together super-hot!&amp;#8221; &lt;br /&gt;And we scalded the trees and left octagons, I think that was you and&lt;br /&gt;You threw parties on the roof!&lt;/p&gt;
18
+ &lt;/blockquote&gt;</description></item></channel>
19
+ </rss>
@@ -0,0 +1,7 @@
1
+ module TestFiles
2
+ Dir.chdir(File.dirname(__FILE__)) do
3
+ Dir['files/*.{html,xhtml,xml}'].each do |fname|
4
+ const_set fname[%r!/(\w+)\.\w+$!, 1].upcase, IO.read(fname)
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,64 @@
1
+ #!/usr/bin/env ruby
2
+ require 'rubygems'
3
+ require 'open-uri'
4
+ require 'hpricot'
5
+ require 'nokogiri'
6
+ require 'benchmark'
7
+
8
+ content = File.read("test/files/boingboing.html")
9
+
10
+ N = 100
11
+
12
+ unless Gem.loaded_specs['hpricot'].version > Gem::Version.new('0.6.161')
13
+ abort "** Use higher than Hpricot 0.6.161!"
14
+ end
15
+
16
+ puts "Hpricot #{Gem.loaded_specs['hpricot'].version} vs. Nokogiri #{Gem.loaded_specs['nokogiri'].version}"
17
+ hdoc = Hpricot(content)
18
+ ndoc = Nokogiri.Hpricot(content)
19
+
20
+ Benchmark.bm do |x|
21
+ x.report('hpricot:doc') do
22
+ N.times do
23
+ Hpricot(content)
24
+ end
25
+ end
26
+
27
+ x.report('nokogiri:doc') do
28
+ N.times do
29
+ Nokogiri.Hpricot(content)
30
+ end
31
+ end
32
+ end
33
+
34
+ Benchmark.bm do |x|
35
+ x.report('hpricot:xpath') do
36
+ N.times do
37
+ info = hdoc.search("//a[@name='027906']").first.inner_text
38
+ url = hdoc.search("h3[text()='College kids reportedly taking more smart drugs']").first.inner_text
39
+ end
40
+ end
41
+
42
+ x.report('nokogiri:xpath') do
43
+ N.times do
44
+ info = ndoc.search("//a[@name='027906']").first.inner_text
45
+ url = ndoc.search("h3[text()='College kids reportedly taking more smart drugs']").first.inner_text
46
+ end
47
+ end
48
+ end
49
+
50
+ Benchmark.bm do |x|
51
+ x.report('hpricot:css') do
52
+ N.times do
53
+ info = hdoc.search('form input[@checked]').first
54
+ url = hdoc.search('td spacer').first.inner_text
55
+ end
56
+ end
57
+
58
+ x.report('nokogiri:css') do
59
+ N.times do
60
+ info = ndoc.search('form input[@checked]').first
61
+ url = ndoc.search('td spacer').first.inner_text
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,96 @@
1
+ # -*- coding: utf-8 -*-
2
+ #!/usr/bin/env ruby
3
+
4
+ require 'test/unit'
5
+ require 'hpricot'
6
+ require 'load_files'
7
+
8
+ class TestAlter < Test::Unit::TestCase
9
+ def setup
10
+ @basic = Hpricot.parse(TestFiles::BASIC)
11
+ end
12
+
13
+ def test_before
14
+ test0 = "<link rel='stylesheet' href='test0.css' />"
15
+ @basic.at("link").before(test0)
16
+ assert_equal 'test0.css', @basic.at("link").attributes['href']
17
+ end
18
+
19
+ def test_after
20
+ test_inf = "<link rel='stylesheet' href='test_inf.css' />"
21
+ @basic.search("link")[-1].after(test_inf)
22
+ assert_equal 'test_inf.css', @basic.search("link")[-1].attributes['href']
23
+ end
24
+
25
+ def test_wrap
26
+ ohmy = (@basic/"p.ohmy").wrap("<div id='wrapper'></div>")
27
+ assert_equal 'wrapper', ohmy[0].parent['id']
28
+ assert_equal 'ohmy', Hpricot(@basic.to_html).at("#wrapper").children[0]['class']
29
+ end
30
+
31
+ def test_add_class
32
+ first_p = (@basic/"p:first").add_class("testing123")
33
+ assert first_p[0].get_attribute("class").split(" ").include?("testing123")
34
+ assert (Hpricot(@basic.to_html)/"p:first")[0].attributes["class"].split(" ").include?("testing123")
35
+ assert !(Hpricot(@basic.to_html)/"p:gt(0)")[0].attributes["class"].split(" ").include?("testing123")
36
+ end
37
+
38
+ def test_change_attributes
39
+ all_ps = (@basic/"p").attr("title", "Some Title & Etc…")
40
+ all_as = (@basic/"a").attr("href", "http://my_new_href.com")
41
+ all_lb = (@basic/"link").attr("href") { |e| e.name }
42
+ assert_changed(@basic, "p", all_ps) {|p| p.raw_attributes["title"] == "Some Title &amp; Etc&#8230;"}
43
+ assert_changed(@basic, "a", all_as) {|a| a.attributes["href"] == "http://my_new_href.com"}
44
+ assert_changed(@basic, "link", all_lb) {|a| a.attributes["href"] == "link" }
45
+ end
46
+
47
+ def test_change_attributes2
48
+ all_as = (@basic%"a").attributes["href"] = "http://my_new_href.com"
49
+ all_ps = (@basic%"p").attributes["title"] = "Some Title & Etc…"
50
+ assert_equal (@basic%"a").raw_attributes["href"], "http://my_new_href.com"
51
+ assert_equal (@basic%"p").raw_attributes["title"], "Some Title &amp; Etc&#8230;"
52
+ assert_equal (@basic%"p").attributes["title"], "Some Title & Etc…"
53
+ end
54
+
55
+ def test_remove_attr
56
+ all_rl = (@basic/"link").remove_attr("href")
57
+ assert_changed(@basic, "link", all_rl) { |link| link['href'].nil? }
58
+ end
59
+
60
+ def test_remove_class
61
+ all_c1 = (@basic/"p[@class*='last']").remove_class("last")
62
+ assert_changed(@basic, "p[@class*='last']", all_c1) { |p| p['class'] == 'final' }
63
+ end
64
+
65
+ def test_remove_all_classes
66
+ all_c2 = (@basic/"p[@class]").remove_class
67
+ assert_changed(@basic, "p[@class]", all_c2) { |p| p['class'].nil? }
68
+ end
69
+
70
+ def test_xml_casing
71
+ doc = Hpricot.XML("<root><wildCat>text</wildCat></root>")
72
+ (doc/:root/:wildCat).after("<beanPole>gravity</beanPole>")
73
+ assert_equal doc.to_s, "<root><wildCat>text</wildCat><beanPole>gravity</beanPole></root>"
74
+
75
+ frag = Hpricot.XML do
76
+ b { i "A bit of HTML" }
77
+ end
78
+ (frag/:b).after("<beanPole>gravity</beanPole>")
79
+ assert_equal frag.to_s, "<b><i>A bit of HTML</i></b><beanPole>gravity</beanPole>"
80
+ end
81
+
82
+ def test_reparent_empty_nodes
83
+ doc = Hpricot("<div/>")
84
+ doc.root.inner_html = "foo"
85
+ assert_equal doc.root.inner_html, "foo"
86
+ doc.root.inner_html = ""
87
+ assert_equal doc.root.inner_html, ""
88
+ doc.root.swap { b "test" }
89
+ assert_equal doc.root.inner_html, "test"
90
+ end
91
+
92
+ def assert_changed original, selector, set, &block
93
+ assert set.all?(&block)
94
+ assert Hpricot(original.to_html).search(selector).all?(&block)
95
+ end
96
+ end
@@ -0,0 +1,37 @@
1
+ # -*- coding: utf-8 -*-
2
+ #!/usr/bin/env ruby
3
+
4
+ require 'test/unit'
5
+ require 'hpricot'
6
+
7
+ class TestBuilder < Test::Unit::TestCase
8
+ def test_escaping_text
9
+ doc = Hpricot() { b "<a\"b>" }
10
+ assert_equal "<b>&lt;a&quot;b&gt;</b>", doc.to_html
11
+ assert_equal %{<a"b>}, doc.at("text()").to_s
12
+ end
13
+
14
+ def test_no_escaping_text
15
+ doc = Hpricot() { div.test.me! { text "<a\"b>" } }
16
+ assert_equal %{<div class="test" id="me"><a"b></div>}, doc.to_html
17
+ assert_equal %{<a"b>}, doc.at("text()").to_s
18
+ end
19
+
20
+ def test_latin1_entities
21
+ doc = Hpricot() { b "ۥ" }
22
+ assert_equal "<b>&#8364;&#8226;</b>", doc.to_html
23
+ assert_equal "ۥ", doc.at("text()").to_s
24
+ end
25
+
26
+ def test_escaping_attrs
27
+ text = "<span style='font-family:\"MS Mincho\"'>Some text</span>"
28
+ assert_equal "<span style=\"font-family:\\\"MS Mincho\\\"\">Some text</span>",
29
+ Hpricot(text).to_html
30
+ end
31
+
32
+ def test_korean_utf8_entities
33
+ a = '한글'
34
+ doc = Hpricot() { b a }
35
+ assert_equal "<b>&#54620;&#44544;</b>", doc.to_html
36
+ end
37
+ end
@@ -0,0 +1,457 @@
1
+ # -*- coding: utf-8 -*-
2
+ #!/usr/bin/env ruby
3
+
4
+ require 'test/unit'
5
+ require 'hpricot'
6
+ require 'load_files'
7
+
8
+ class TestParser < Test::Unit::TestCase
9
+ def test_set_attr
10
+ @basic = Hpricot.parse(TestFiles::BASIC)
11
+ @basic.search('//p').set('class', 'para')
12
+ assert_equal 4, @basic.search('//p').length
13
+ assert_equal 4, @basic.search('//p').find_all { |x| x['class'] == 'para' }.length
14
+ end
15
+
16
+ # Test creating a new element
17
+ def test_new_element
18
+ elem = Hpricot::Elem.new('form')
19
+ assert_not_nil(elem)
20
+ assert_not_nil(elem.attributes)
21
+ end
22
+
23
+ def test_scan_text
24
+ assert_equal 'FOO', Hpricot.make("FOO").children.first.content
25
+ end
26
+
27
+ def test_filter_by_attr
28
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
29
+
30
+ # this link is escaped in the doc
31
+ link = 'http://www.youtube.com/watch?v=TvSNXyNw26g&search=chris%20ware'
32
+ assert_equal link, @boingboing.at("a[@href='#{link}']")['href']
33
+ end
34
+
35
+ def test_filter_contains
36
+ @basic = Hpricot.parse(TestFiles::BASIC)
37
+ assert_equal '<title>Sample XHTML</title>', @basic.search("title:contains('Sample')").to_s
38
+ end
39
+
40
+ def test_get_element_by_id
41
+ @basic = Hpricot.parse(TestFiles::BASIC)
42
+ assert_equal 'link1', @basic.get_element_by_id('link1')['id']
43
+ assert_equal 'link1', @basic.get_element_by_id('body1').get_element_by_id('link1').get_attribute('id')
44
+ end
45
+
46
+ def test_get_element_by_tag_name
47
+ @basic = Hpricot.parse(TestFiles::BASIC)
48
+ assert_equal 'link1', @basic.get_elements_by_tag_name('a')[0].get_attribute('id')
49
+ assert_equal 'link1', @basic.get_elements_by_tag_name('body')[0].get_element_by_id('link1').get_attribute('id')
50
+ end
51
+
52
+ def test_get_elements_by_tag_name_star
53
+ simple = Hpricot.parse("<div><p id='first'>First</p><p id='second'>Second</p></div>")
54
+ assert_equal 3, simple.get_elements_by_tag_name("*").size
55
+ assert_equal 1, simple.get_elements_by_tag_name("div").size
56
+ assert_equal 2, simple.get_elements_by_tag_name("p").size
57
+ end
58
+
59
+ def test_output_basic
60
+ @basic = Hpricot.parse(TestFiles::BASIC)
61
+ @basic2 = Hpricot.parse(@basic.inner_html)
62
+ scan_basic @basic2
63
+ end
64
+
65
+ def test_scan_basic
66
+ @basic = Hpricot.parse(TestFiles::BASIC)
67
+ scan_basic @basic
68
+ end
69
+
70
+ def scan_basic doc
71
+ assert_kind_of Hpricot::XMLDecl, doc.children.first
72
+ assert_not_equal doc.children.first.to_s, doc.children[1].to_s
73
+ assert_equal 'link1', doc.at('#link1')['id']
74
+ assert_equal 'link1', doc.at("p a")['id']
75
+ assert_equal 'link1', (doc/:p/:a).first['id']
76
+ assert_equal 'link1', doc.search('p').at('a').get_attribute('id')
77
+ assert_equal 'link2', (doc/'p').filter('.ohmy').search('a').first.get_attribute('id')
78
+ assert_equal (doc/'p')[2], (doc/'p').filter(':nth(2)')[0]
79
+ assert_equal (doc/'p')[2], (doc/'p').filter('[3]')[0]
80
+ assert_equal 4, (doc/'p').filter('*').length
81
+ assert_equal 4, (doc/'p').filter('* *').length
82
+ eles = (doc/'p').filter('.ohmy')
83
+ assert_equal 1, eles.length
84
+ assert_equal 'ohmy', eles.first.get_attribute('class')
85
+ assert_equal 3, (doc/'p:not(.ohmy)').length
86
+ assert_equal 3, (doc/'p').not('.ohmy').length
87
+ assert_equal 3, (doc/'p').not(eles.first).length
88
+ assert_equal 2, (doc/'p').filter('[@class]').length
89
+ assert_equal 'last final', (doc/'p[@class~="final"]').first.get_attribute('class')
90
+ assert_equal 1, (doc/'p').filter('[@class~="final"]').length
91
+ assert_equal 2, (doc/'p > a').length
92
+ assert_equal 1, (doc/'p.ohmy > a').length
93
+ assert_equal 2, (doc/'p / a').length
94
+ assert_equal 2, (doc/'link ~ link').length
95
+ assert_equal 3, (doc/'title ~ link').length
96
+ assert_equal 5, (doc/"//p/text()").length
97
+ assert_equal 6, (doc/"//p[a]//text()").length
98
+ assert_equal 2, (doc/"//p/a/text()").length
99
+ end
100
+
101
+ def test_positional
102
+ h = Hpricot( "<div><br/><p>one</p><p>two</p></div>" )
103
+ assert_equal "<p>one</p>", h.search("//div/p:eq(0)").to_s
104
+ assert_equal "<p>one</p>", h.search("//div/p:first").to_s
105
+ assert_equal "<p>one</p>", h.search("//div/p:first()").to_s
106
+ end
107
+
108
+ def test_pace
109
+ doc = Hpricot(TestFiles::PACE_APPLICATION)
110
+ assert_equal 'get', doc.at('form[@name=frmSect11]')['method']
111
+ # assert_equal '2', doc.at('#hdnSpouse')['value']
112
+ end
113
+
114
+ def test_scan_boingboing
115
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
116
+ assert_equal 60, (@boingboing/'p.posted').length
117
+ assert_equal 1, @boingboing.search("//a[@name='027906']").length
118
+ assert_equal 10, @boingboing.search("script comment()").length
119
+ assert_equal 3, @boingboing.search("a[text()*='Boing']").length
120
+ assert_equal 1, @boingboing.search("h3[text()='College kids reportedly taking more smart drugs']").length
121
+ assert_equal 0, @boingboing.search("h3[text()='College']").length
122
+ assert_equal 60, @boingboing.search("h3").length
123
+ assert_equal 59, @boingboing.search("h3[text()!='College kids reportedly taking more smart drugs']").length
124
+ assert_equal 17, @boingboing.search("h3[text()$='s']").length
125
+ assert_equal 116, @boingboing.search("p[text()]").length
126
+ assert_equal 211, @boingboing.search("p").length
127
+ end
128
+
129
+ def test_reparent
130
+ doc = Hpricot(%{<div id="blurb_1"></div>})
131
+ div1 = doc.search('#blurb_1')
132
+ div1.before('<div id="blurb_0"></div>')
133
+
134
+ div0 = doc.search('#blurb_0')
135
+ div0.before('<div id="blurb_a"></div>')
136
+
137
+ assert_equal 'div', doc.at('#blurb_1').name
138
+ end
139
+
140
+ def test_siblings
141
+ @basic = Hpricot.parse(TestFiles::BASIC)
142
+ t = @basic.at(:title)
143
+ e = t.next_sibling
144
+ assert_equal 'test1.css', e['href']
145
+ assert_equal 'title', e.previous_sibling.name
146
+ end
147
+
148
+ def test_css_negation
149
+ @basic = Hpricot.parse(TestFiles::BASIC)
150
+ assert_equal 3, (@basic/'p:not(.final)').length
151
+ end
152
+
153
+ def test_remove_attribute
154
+ @basic = Hpricot.parse(TestFiles::BASIC)
155
+ (@basic/:p).each { |ele| ele.remove_attribute('class') }
156
+ assert_equal 0, (@basic/'p[@class]').length
157
+ end
158
+
159
+ def test_abs_xpath
160
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
161
+ assert_equal 60, @boingboing.search("/html/body//p[@class='posted']").length
162
+ assert_equal 60, @boingboing.search("/*/body//p[@class='posted']").length
163
+ assert_equal 18, @boingboing.search("//script").length
164
+ divs = @boingboing.search("//script/../div")
165
+ assert_equal 2, divs.length
166
+ imgs = @boingboing.search('//div/p/a/img')
167
+ assert_equal 16, imgs.length
168
+ assert_equal 16, @boingboing.search('//div').search('p/a/img').length
169
+ assert imgs.all? { |x| x.name == 'img' }
170
+ end
171
+
172
+ def test_predicates
173
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
174
+ assert_equal 2, @boingboing.search('//link[@rel="alternate"]').length
175
+ p_imgs = @boingboing.search('//div/p[/a/img]')
176
+ assert_equal 16, p_imgs.length
177
+ assert p_imgs.all? { |x| x.name == 'p' }
178
+ p_imgs = @boingboing.search('//div/p[a/img]')
179
+ assert_equal 16, p_imgs.length
180
+ assert p_imgs.all? { |x| x.name == 'p' }
181
+ assert_equal 1, @boingboing.search('//input[@checked]').length
182
+ end
183
+
184
+ def test_tag_case
185
+ @tenderlove = Hpricot.parse(TestFiles::TENDERLOVE)
186
+ assert_equal 2, @tenderlove.search('//a').length
187
+ assert_equal 3, @tenderlove.search('//area').length
188
+ assert_equal 2, @tenderlove.search('//meta').length
189
+ end
190
+
191
+ def test_alt_predicates
192
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
193
+ assert_equal 1, @boingboing.search('//table/tr:last').length
194
+
195
+ @basic = Hpricot.parse(TestFiles::BASIC)
196
+ assert_equal "<p>The third paragraph</p>",
197
+ @basic.search('p:eq(2)').to_html
198
+ assert_equal '<p class="last final"><b>THE FINAL PARAGRAPH</b></p>',
199
+ @basic.search('p:last').to_html
200
+ assert_equal 'last final', @basic.search('//p:last-of-type').first.get_attribute('class')
201
+ end
202
+
203
+ def test_insert_after # ticket #63
204
+ doc = Hpricot('<html><body><div id="a-div"></div></body></html>')
205
+ (doc/'div').each do |element|
206
+ element.after('<p>Paragraph 1</p><p>Paragraph 2</p>')
207
+ end
208
+ assert_equal doc.to_html, '<html><body><div id="a-div"></div><p>Paragraph 1</p><p>Paragraph 2</p></body></html>'
209
+ end
210
+
211
+ def test_insert_before # ticket #61
212
+ doc = Hpricot('<html><body><div id="a-div"></div></body></html>')
213
+ (doc/'div').each do |element|
214
+ element.before('<p>Paragraph 1</p><p>Paragraph 2</p>')
215
+ end
216
+ assert_equal doc.to_html, '<html><body><p>Paragraph 1</p><p>Paragraph 2</p><div id="a-div"></div></body></html>'
217
+ end
218
+
219
+ def test_many_paths
220
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
221
+ assert_equal 62, @boingboing.search('p.posted, link[@rel="alternate"]').length
222
+ assert_equal 18, @boingboing.search('//div/p[a/img]|//link[@rel="alternate"]').length
223
+ end
224
+
225
+ def test_stacked_search
226
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
227
+ assert_kind_of Hpricot::Elements, @boingboing.search('//div/p').search('a img')
228
+ end
229
+
230
+ def test_attr_casing
231
+ doc = Hpricot("<a HREF='a'>A simple <b>test</b> string.</a>")
232
+ assert_equal (doc % :a)[:href], "a"
233
+ assert_equal (doc % :a)[:HREF], nil
234
+ assert_equal (doc % :a)['href'], "a"
235
+ assert_equal (doc % :a)['HREF'], nil
236
+ end
237
+
238
+ def test_class_search
239
+ # test case sent by Chih-Chao Lam
240
+ doc = Hpricot("<div class=xyz'>abc</div>")
241
+ assert_equal 1, doc.search(".xyz").length
242
+ doc = Hpricot("<div class=xyz>abc</div><div class=abc>xyz</div>")
243
+ assert_equal 1, doc.search(".xyz").length
244
+ assert_equal 4, doc.search("*").length
245
+ end
246
+
247
+ def test_kleene_star
248
+ # bug noticed by raja bhatia
249
+ doc = Hpricot("<span class='small'>1</span><div class='large'>2</div><div class='small'>3</div><span class='blue large'>4</span>")
250
+ assert_equal 2, doc.search("*[@class*='small']").length
251
+ assert_equal 2, doc.search("*.small").length
252
+ assert_equal 2, doc.search(".small").length
253
+ assert_equal 2, doc.search(".large").length
254
+ end
255
+
256
+ def test_empty_comment
257
+ doc = Hpricot("<p><!----></p>")
258
+ assert doc.children[0].children[0].comment?
259
+ doc = Hpricot("<p><!-- --></p>")
260
+ assert doc.children[0].children[0].comment?
261
+ end
262
+
263
+ def test_body_newlines
264
+ @immob = Hpricot.parse(TestFiles::IMMOB)
265
+ body = @immob.at(:body)
266
+ {'background' => '', 'bgcolor' => '#ffffff', 'text' => '#000000', 'marginheight' => '10',
267
+ 'marginwidth' => '10', 'leftmargin' => '10', 'topmargin' => '10', 'link' => '#000066',
268
+ 'alink' => '#ff6600', 'hlink' => "#ff6600", 'vlink' => "#000000"}.each do |k, v|
269
+ assert_equal v, body[k]
270
+ end
271
+ end
272
+
273
+ def test_nested_twins
274
+ @doc = Hpricot("<div>Hi<div>there</div></div>")
275
+ assert_equal 1, (@doc/"div div").length
276
+ end
277
+
278
+ def test_wildcard
279
+ @basic = Hpricot.parse(TestFiles::BASIC)
280
+ assert_equal 3, (@basic/"*[@id]").length
281
+ assert_equal 3, (@basic/"//*[@id]").length
282
+ end
283
+
284
+ def test_javascripts
285
+ @immob = Hpricot.parse(TestFiles::IMMOB)
286
+ assert_equal 3, (@immob/:script)[0].inner_html.scan(/<LINK/).length
287
+ end
288
+
289
+ def test_nested_scripts
290
+ @week9 = Hpricot.parse(TestFiles::WEEK9)
291
+ assert_equal 14, (@week9/"a").find_all { |x| x.inner_html.include? "GameCenter" }.length
292
+ end
293
+
294
+ def test_uswebgen
295
+ @uswebgen = Hpricot.parse(TestFiles::USWEBGEN)
296
+ # sent by brent beardsley, hpricot 0.3 had problems with all the links.
297
+ assert_equal 67, (@uswebgen/:a).length
298
+ end
299
+
300
+ def test_mangled_tags
301
+ [%{<html><form name='loginForm' method='post' action='/units/a/login/1,13088,779-1,00.html'?URL=></form></html>},
302
+ %{<html><form name='loginForm' ?URL= method='post' action='/units/a/login/1,13088,779-1,00.html'></form></html>},
303
+ %{<html><form name='loginForm'?URL= ?URL= method='post' action='/units/a/login/1,13088,779-1,00.html'?URL=></form></html>},
304
+ %{<html><form name='loginForm' method='post' action='/units/a/login/1,13088,779-1,00.html' ?URL=></form></html>}].
305
+ each do |str|
306
+ doc = Hpricot(str)
307
+ assert_equal 1, (doc/:form).length
308
+ assert_equal '/units/a/login/1,13088,779-1,00.html', doc.at("form")['action']
309
+ end
310
+ end
311
+
312
+ def test_procins
313
+ doc = Hpricot("<?php print('hello') ?>\n<?xml blah='blah'?>")
314
+ assert_equal "php", doc.children[0].target
315
+ assert_equal "blah='blah'", doc.children[2].content
316
+ end
317
+
318
+ def test_no_buffer_error
319
+ Hpricot(%{<p>\n\n<input type="hidden" name="__VIEWSTATE" value="#{(("X" * 2000) + "\n") * 44}" />\n\n</p>})
320
+ end
321
+
322
+ def test_youtube_attr
323
+ str = <<-edoc
324
+ <html><body>
325
+ Lorem ipsum. Jolly roger, ding-dong sing-a-long
326
+ <object width="425" height="350">
327
+ <param name="movie" value="http://www.youtube.com/v/NbDQ4M_cuwA"></param>
328
+ <param name="wmode" value="transparent"></param>
329
+ <embed src="http://www.youtube.com/v/NbDQ4M_cuwA"
330
+ type="application/x-shockwave-flash" wmode="transparent" width="425" height="350">
331
+ </embed>
332
+ </object>
333
+ Check out my posting, I have bright mice in large clown cars.
334
+ <object width="425" height="350">
335
+ <param name="movie" value="http://www.youtube.com/v/foobar"></param>
336
+ <param name="wmode" value="transparent"></param>
337
+ <embed src="http://www.youtube.com/v/foobar"
338
+ type="application/x-shockwave-flash" wmode="transparent" width="425" height="350">
339
+ </embed>
340
+ </object>
341
+ </body></html?
342
+ edoc
343
+ doc = Hpricot(str)
344
+ assert_equal "http://www.youtube.com/v/NbDQ4M_cuwA",
345
+ doc.at("//object/param[@value='http://www.youtube.com/v/NbDQ4M_cuwA']")['value']
346
+ end
347
+
348
+ # ticket #84 by jamezilla
349
+ def test_screwed_xmlns
350
+ doc = Hpricot(<<-edoc)
351
+ <?xml:namespace prefix = cwi />
352
+ <html><body>HAI</body></html>
353
+ edoc
354
+ assert_equal "HAI", doc.at("body").inner_text
355
+ end
356
+
357
+ # http://github.com/hpricot/hpricot/issues#issue/28
358
+ def test_invalid_inner_text
359
+ assert_equal "A", Hpricot('A&B;').inner_text[0...1]
360
+ end
361
+
362
+ # http://github.com/hpricot/hpricot/issues#issue/25
363
+ def test_encoding_compatibility_error
364
+ Hpricot("<p>\xC3\x9Cber</p><p>M&sup3;</p>").inner_text
365
+ end
366
+
367
+ # Reported by Jonathan Nichols on the Hpricot list (24 May 2007)
368
+ def test_self_closed_form
369
+ doc = Hpricot(<<-edoc)
370
+ <body>
371
+ <form action="/loginRegForm" name="regForm" method="POST" />
372
+ <input type="button">
373
+ </form>
374
+ </body>
375
+ edoc
376
+ assert_equal "button", doc.at("//form/input")['type']
377
+ end
378
+
379
+ def test_filters
380
+ @basic = Hpricot.parse(TestFiles::BASIC)
381
+ assert_equal 0, (@basic/"title:parent").size
382
+ assert_equal 3, (@basic/"p:parent").size
383
+ assert_equal 3, (@basic/"link:empty").size
384
+ assert_equal 1, (@basic/"span:empty").size
385
+ end
386
+
387
+ def test_keep_cdata
388
+ str = %{<script> /*<![CDATA[*/
389
+ /*]]>*/ </script>}
390
+ assert_equal str, Hpricot(str).to_html
391
+ end
392
+
393
+ def test_namespace
394
+ chunk = <<-END
395
+ <a xmlns:t="http://www.nexopia.com/dev/template">
396
+ <t:sam>hi </t:sam>
397
+ </a>
398
+ END
399
+ doc = Hpricot::XML(chunk)
400
+ assert (doc/"//t:sam").size > 0 # at least this should probably work
401
+ # assert (doc/"//sam").size > 0 # this would be nice
402
+ end
403
+
404
+ def test_uxs_ignores_non_entities
405
+ assert_equal 'abc', Hpricot.uxs('abc')
406
+ end
407
+
408
+ def test_uxs_handles_gt_lt_amp_quot
409
+ assert_equal '"&<>', Hpricot.uxs('&quot;&amp;&lt;&gt;')
410
+ end
411
+
412
+ def test_uxs_handles_numeric_values
413
+ if String.method_defined? :encoding
414
+ assert_equal "é", Hpricot.uxs('&#233;')
415
+ else
416
+ assert_equal "\303\251", Hpricot.uxs('&#233;')
417
+ end
418
+ end
419
+
420
+ def test_uxs_handles_entities
421
+ if String.method_defined? :encoding
422
+ assert_equal "é", Hpricot.uxs('&eacute;')
423
+ else
424
+ assert_equal "\303\251", Hpricot.uxs('&eacute;')
425
+ end
426
+ end
427
+
428
+ def test_cdata_inner_text
429
+ xml = Hpricot.XML(%{
430
+ <peon>
431
+ <id>96586</id>
432
+ <stdout><![CDATA[This is STDOUT]]></stdout>
433
+ <stderr><!-- IGNORE --><![CDATA[This is]]> STDERR</stderr>
434
+ </peon>})
435
+ assert_equal "This is STDOUT", (xml/:peon/:stdout).inner_text
436
+ assert_equal "This is STDERR", (xml/:peon/:stderr).inner_text
437
+ end
438
+
439
+ def test_parsing_html_with_noscript
440
+ doc = Hpricot(<<-edoc)
441
+ <html>
442
+ <head>
443
+ <noscript>
444
+ <meta http-equiv="refresh" content="0; url=http://www.yoursite.com/noscripts.html"/>
445
+ </noscript>
446
+ <meta name="verification" content="7ff5e90iormq5niy6x98j75" />
447
+ </head>
448
+ <body>
449
+ <h1>Testing</h1>
450
+ </body>
451
+ </html>
452
+
453
+ edoc
454
+ assert_equal "7ff5e90iormq5niy6x98j75", doc.at("/html/head/meta[@name='verification']")['content']
455
+ end
456
+
457
+ end