hpricot 0.6-jruby
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +62 -0
- data/COPYING +18 -0
- data/README +284 -0
- data/Rakefile +211 -0
- data/ext/hpricot_scan/HpricotScanService.java +1340 -0
- data/ext/hpricot_scan/extconf.rb +6 -0
- data/ext/hpricot_scan/hpricot_common.rl +76 -0
- data/ext/hpricot_scan/hpricot_scan.c +5976 -0
- data/ext/hpricot_scan/hpricot_scan.h +79 -0
- data/ext/hpricot_scan/hpricot_scan.java.rl +363 -0
- data/ext/hpricot_scan/hpricot_scan.rl +273 -0
- data/extras/mingw-rbconfig.rb +176 -0
- data/lib/hpricot.rb +26 -0
- data/lib/hpricot/blankslate.rb +63 -0
- data/lib/hpricot/builder.rb +200 -0
- data/lib/hpricot/elements.rb +510 -0
- data/lib/hpricot/htmlinfo.rb +672 -0
- data/lib/hpricot/inspect.rb +107 -0
- data/lib/hpricot/modules.rb +37 -0
- data/lib/hpricot/parse.rb +297 -0
- data/lib/hpricot/tag.rb +228 -0
- data/lib/hpricot/tags.rb +164 -0
- data/lib/hpricot/traverse.rb +821 -0
- data/lib/hpricot/xchar.rb +94 -0
- data/lib/i686-linux/hpricot_scan.jar +0 -0
- data/test/files/basic.xhtml +17 -0
- data/test/files/boingboing.html +2266 -0
- data/test/files/cy0.html +3653 -0
- data/test/files/immob.html +400 -0
- data/test/files/pace_application.html +1320 -0
- data/test/files/tenderlove.html +16 -0
- data/test/files/uswebgen.html +220 -0
- data/test/files/utf8.html +1054 -0
- data/test/files/week9.html +1723 -0
- data/test/files/why.xml +19 -0
- data/test/load_files.rb +7 -0
- data/test/test_alter.rb +65 -0
- data/test/test_builder.rb +24 -0
- data/test/test_parser.rb +379 -0
- data/test/test_paths.rb +16 -0
- data/test/test_preserved.rb +66 -0
- data/test/test_xml.rb +28 -0
- metadata +98 -0
data/test/files/why.xml
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
<?xml version='1.0'?><rss xmlns:admin='http://webns.net/mvcb/' version='2.0' xmlns:sy='http://purl.org/rss/1.0/modules/syndication/' xmlns:dc='http://purl.org/dc/elements/1.1/' xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'>
|
2
|
+
<channel>
|
3
|
+
<title>why the lucky stiff</title>
|
4
|
+
<link>http://whytheluckystiff.net</link>
|
5
|
+
<description>hex-editing reality to give us infinite grenades!!</description>
|
6
|
+
<dc:language>en-us</dc:language>
|
7
|
+
<dc:creator/>
|
8
|
+
<dc:date>2007-01-16T22:39:04+00:00</dc:date>
|
9
|
+
<admin:generatorAgent rdf:resource='http://hobix.com/?v=0.4'/>
|
10
|
+
<sy:updatePeriod>hourly</sy:updatePeriod>
|
11
|
+
<sy:updateFrequency>1</sy:updateFrequency>
|
12
|
+
<sy:updateBase>2000-01-01T12:00+00:00</sy:updateBase>
|
13
|
+
<item><title>1.3</title><link>http://whytheluckystiff.net/quatrains/1.3.html</link><guid isPermaLink='false'>quatrains/1.3@http://whytheluckystiff.net</guid><dc:subject>quatrains</dc:subject><dc:subject>quatrains</dc:subject><dc:creator>why the lucky stiff</dc:creator><dc:date>2007-01-14T08:47:05+00:00</dc:date><description><blockquote>
|
14
|
+
<p>That cadillac of yours and that driver of yours!<br />You and your teacups rattling away in the back seat!<br />You always took the mike, oh, and all those cowboys you shot!<br />I held your hand! And I&#8217;ll shoot a cowboy one day!</p>
|
15
|
+
</blockquote>
|
16
|
+
<blockquote>
|
17
|
+
<p>You said, &#8220;Let&#8217;s run into the woods like kids!&#8221; <br />You said, &#8220;Let&#8217;s rub our hands together super-hot!&#8221; <br />And we scalded the trees and left octagons, I think that was you and<br />You threw parties on the roof!</p>
|
18
|
+
</blockquote></description></item></channel>
|
19
|
+
</rss>
|
data/test/load_files.rb
ADDED
data/test/test_alter.rb
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'test/unit'
|
4
|
+
require 'hpricot'
|
5
|
+
require 'load_files'
|
6
|
+
|
7
|
+
class TestAlter < Test::Unit::TestCase
|
8
|
+
def setup
|
9
|
+
@basic = Hpricot.parse(TestFiles::BASIC)
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_before
|
13
|
+
test0 = "<link rel='stylesheet' href='test0.css' />"
|
14
|
+
@basic.at("link").before(test0)
|
15
|
+
assert_equal 'test0.css', @basic.at("link").attributes['href']
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_after
|
19
|
+
test_inf = "<link rel='stylesheet' href='test_inf.css' />"
|
20
|
+
@basic.search("link")[-1].after(test_inf)
|
21
|
+
assert_equal 'test_inf.css', @basic.search("link")[-1].attributes['href']
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_wrap
|
25
|
+
ohmy = (@basic/"p.ohmy").wrap("<div id='wrapper'></div>")
|
26
|
+
assert_equal 'wrapper', ohmy[0].parent['id']
|
27
|
+
assert_equal 'ohmy', Hpricot(@basic.to_html).at("#wrapper").children[0]['class']
|
28
|
+
end
|
29
|
+
|
30
|
+
def test_add_class
|
31
|
+
first_p = (@basic/"p:first").add_class("testing123")
|
32
|
+
assert first_p[0].get_attribute("class").split(" ").include?("testing123")
|
33
|
+
assert (Hpricot(@basic.to_html)/"p:first")[0].attributes["class"].split(" ").include?("testing123")
|
34
|
+
assert !(Hpricot(@basic.to_html)/"p:gt(0)")[0].attributes["class"].split(" ").include?("testing123")
|
35
|
+
end
|
36
|
+
|
37
|
+
def test_change_attributes
|
38
|
+
all_ps = (@basic/"p").attr("title", "Some Title")
|
39
|
+
all_as = (@basic/"a").attr("href", "http://my_new_href.com")
|
40
|
+
all_lb = (@basic/"link").attr("href") { |e| e.name }
|
41
|
+
assert_changed(@basic, "p", all_ps) {|p| p.attributes["title"] == "Some Title"}
|
42
|
+
assert_changed(@basic, "a", all_as) {|a| a.attributes["href"] == "http://my_new_href.com"}
|
43
|
+
assert_changed(@basic, "link", all_lb) {|a| a.attributes["href"] == "link" }
|
44
|
+
end
|
45
|
+
|
46
|
+
def test_remove_attr
|
47
|
+
all_rl = (@basic/"link").remove_attr("href")
|
48
|
+
assert_changed(@basic, "link", all_rl) { |link| link['href'].nil? }
|
49
|
+
end
|
50
|
+
|
51
|
+
def test_remove_class
|
52
|
+
all_c1 = (@basic/"p[@class*='last']").remove_class("last")
|
53
|
+
assert_changed(@basic, "p[@class*='last']", all_c1) { |p| p['class'] == 'final' }
|
54
|
+
end
|
55
|
+
|
56
|
+
def test_remove_all_classes
|
57
|
+
all_c2 = (@basic/"p[@class]").remove_class
|
58
|
+
assert_changed(@basic, "p[@class]", all_c2) { |p| p['class'].nil? }
|
59
|
+
end
|
60
|
+
|
61
|
+
def assert_changed original, selector, set, &block
|
62
|
+
assert set.all?(&block)
|
63
|
+
assert Hpricot(original.to_html).search(selector).all?(&block)
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'test/unit'
|
4
|
+
require 'hpricot'
|
5
|
+
|
6
|
+
class TestBuilder < Test::Unit::TestCase
|
7
|
+
def test_escaping_text
|
8
|
+
doc = Hpricot() { b "<a\"b>" }
|
9
|
+
assert_equal "<b><a"b></b>", doc.to_html
|
10
|
+
assert_equal %{<a"b>}, doc.at("text()").to_s
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_no_escaping_text
|
14
|
+
doc = Hpricot() { div.test.me! { text "<a\"b>" } }
|
15
|
+
assert_equal %{<div class="test" id="me"><a"b></div>}, doc.to_html
|
16
|
+
assert_equal %{<a"b>}, doc.at("text()").to_s
|
17
|
+
end
|
18
|
+
|
19
|
+
def test_latin1_entities
|
20
|
+
doc = Hpricot() { b "\200\225" }
|
21
|
+
assert_equal "<b>ۥ</b>", doc.to_html
|
22
|
+
assert_equal "\342\202\254\342\200\242", doc.at("text()").to_s
|
23
|
+
end
|
24
|
+
end
|
data/test/test_parser.rb
ADDED
@@ -0,0 +1,379 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'test/unit'
|
4
|
+
require 'hpricot'
|
5
|
+
require 'load_files'
|
6
|
+
|
7
|
+
class TestParser < Test::Unit::TestCase
|
8
|
+
def test_set_attr
|
9
|
+
@basic = Hpricot.parse(TestFiles::BASIC)
|
10
|
+
@basic.search('//p').set('class', 'para')
|
11
|
+
assert_equal 4, @basic.search('//p').length
|
12
|
+
assert_equal 4, @basic.search('//p').find_all { |x| x['class'] == 'para' }.length
|
13
|
+
end
|
14
|
+
|
15
|
+
# Test creating a new element
|
16
|
+
def test_new_element
|
17
|
+
elem = Hpricot::Elem.new(Hpricot::STag.new('form'))
|
18
|
+
assert_not_nil(elem)
|
19
|
+
assert_not_nil(elem.attributes)
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_scan_text
|
23
|
+
assert_equal 'FOO', Hpricot.make("FOO").first.content
|
24
|
+
end
|
25
|
+
|
26
|
+
def test_filter_by_attr
|
27
|
+
@boingboing = Hpricot.parse(TestFiles::BOINGBOING)
|
28
|
+
|
29
|
+
# this link is escaped in the doc
|
30
|
+
link = 'http://www.youtube.com/watch?v=TvSNXyNw26g&search=chris%20ware'
|
31
|
+
assert_equal link, @boingboing.at("a[@href='#{link}']")['href']
|
32
|
+
end
|
33
|
+
|
34
|
+
def test_filter_contains
|
35
|
+
@basic = Hpricot.parse(TestFiles::BASIC)
|
36
|
+
assert_equal '<title>Sample XHTML</title>', @basic.search("title:contains('Sample')").to_s
|
37
|
+
end
|
38
|
+
|
39
|
+
def test_get_element_by_id
|
40
|
+
@basic = Hpricot.parse(TestFiles::BASIC)
|
41
|
+
assert_equal 'link1', @basic.get_element_by_id('link1')['id']
|
42
|
+
assert_equal 'link1', @basic.get_element_by_id('body1').get_element_by_id('link1').get_attribute('id')
|
43
|
+
end
|
44
|
+
|
45
|
+
def test_get_element_by_tag_name
|
46
|
+
@basic = Hpricot.parse(TestFiles::BASIC)
|
47
|
+
assert_equal 'link1', @basic.get_elements_by_tag_name('a')[0].get_attribute('id')
|
48
|
+
assert_equal 'link1', @basic.get_elements_by_tag_name('body')[0].get_element_by_id('link1').get_attribute('id')
|
49
|
+
end
|
50
|
+
|
51
|
+
def test_output_basic
|
52
|
+
@basic = Hpricot.parse(TestFiles::BASIC)
|
53
|
+
@basic2 = Hpricot.parse(@basic.inner_html)
|
54
|
+
scan_basic @basic2
|
55
|
+
end
|
56
|
+
|
57
|
+
def test_scan_basic
|
58
|
+
@basic = Hpricot.parse(TestFiles::BASIC)
|
59
|
+
scan_basic @basic
|
60
|
+
end
|
61
|
+
|
62
|
+
def scan_basic doc
|
63
|
+
assert_kind_of Hpricot::XMLDecl, doc.children.first
|
64
|
+
assert_not_equal doc.children.first.to_s, doc.children[1].to_s
|
65
|
+
assert_equal 'link1', doc.at('#link1')['id']
|
66
|
+
assert_equal 'link1', doc.at("p a")['id']
|
67
|
+
assert_equal 'link1', (doc/:p/:a).first['id']
|
68
|
+
assert_equal 'link1', doc.search('p').at('a').get_attribute('id')
|
69
|
+
assert_equal 'link2', (doc/'p').filter('.ohmy').search('a').first.get_attribute('id')
|
70
|
+
assert_equal (doc/'p')[2], (doc/'p').filter(':nth(2)')[0]
|
71
|
+
assert_equal (doc/'p')[2], (doc/'p').filter('[3]')[0]
|
72
|
+
assert_equal 4, (doc/'p').filter('*').length
|
73
|
+
assert_equal 4, (doc/'p').filter('* *').length
|
74
|
+
eles = (doc/'p').filter('.ohmy')
|
75
|
+
assert_equal 1, eles.length
|
76
|
+
assert_equal 'ohmy', eles.first.get_attribute('class')
|
77
|
+
assert_equal 3, (doc/'p:not(.ohmy)').length
|
78
|
+
assert_equal 3, (doc/'p').not('.ohmy').length
|
79
|
+
assert_equal 3, (doc/'p').not(eles.first).length
|
80
|
+
assert_equal 2, (doc/'p').filter('[@class]').length
|
81
|
+
assert_equal 'last final', (doc/'p[@class~="final"]').first.get_attribute('class')
|
82
|
+
assert_equal 1, (doc/'p').filter('[@class~="final"]').length
|
83
|
+
assert_equal 2, (doc/'p > a').length
|
84
|
+
assert_equal 1, (doc/'p.ohmy > a').length
|
85
|
+
assert_equal 2, (doc/'p / a').length
|
86
|
+
assert_equal 2, (doc/'link ~ link').length
|
87
|
+
assert_equal 3, (doc/'title ~ link').length
|
88
|
+
assert_equal 5, (doc/"//p/text()").length
|
89
|
+
assert_equal 6, (doc/"//p[a]//text()").length
|
90
|
+
assert_equal 2, (doc/"//p/a/text()").length
|
91
|
+
end
|
92
|
+
|
93
|
+
def test_positional
|
94
|
+
h = Hpricot( "<div><br/><p>one</p><p>two</p></div>" )
|
95
|
+
assert_equal "<p>one</p>", h.search("//div/p:eq(0)").to_s
|
96
|
+
assert_equal "<p>one</p>", h.search("//div/p:first").to_s
|
97
|
+
assert_equal "<p>one</p>", h.search("//div/p:first()").to_s
|
98
|
+
end
|
99
|
+
|
100
|
+
def test_pace
|
101
|
+
doc = Hpricot(TestFiles::PACE_APPLICATION)
|
102
|
+
assert_equal 'get', doc.at('form[@name=frmSect11]')['method']
|
103
|
+
# assert_equal '2', doc.at('#hdnSpouse')['value']
|
104
|
+
end
|
105
|
+
|
106
|
+
def test_scan_boingboing
|
107
|
+
@boingboing = Hpricot.parse(TestFiles::BOINGBOING)
|
108
|
+
assert_equal 60, (@boingboing/'p.posted').length
|
109
|
+
assert_equal 1, @boingboing.search("//a[@name='027906']").length
|
110
|
+
assert_equal 10, @boingboing.search("script comment()").length
|
111
|
+
assert_equal 3, @boingboing.search("a[text()*='Boing']").length
|
112
|
+
assert_equal 1, @boingboing.search("h3[text()='College kids reportedly taking more smart drugs']").length
|
113
|
+
assert_equal 0, @boingboing.search("h3[text()='College']").length
|
114
|
+
assert_equal 60, @boingboing.search("h3").length
|
115
|
+
assert_equal 59, @boingboing.search("h3[text()!='College kids reportedly taking more smart drugs']").length
|
116
|
+
assert_equal 17, @boingboing.search("h3[text()$='s']").length
|
117
|
+
assert_equal 129, @boingboing.search("p[text()]").length
|
118
|
+
assert_equal 211, @boingboing.search("p").length
|
119
|
+
end
|
120
|
+
|
121
|
+
def test_reparent
|
122
|
+
doc = Hpricot(%{<div id="blurb_1"></div>})
|
123
|
+
div1 = doc.search('#blurb_1')
|
124
|
+
div1.before('<div id="blurb_0"></div>')
|
125
|
+
|
126
|
+
div0 = doc.search('#blurb_0')
|
127
|
+
div0.before('<div id="blurb_a"></div>')
|
128
|
+
|
129
|
+
assert_equal 'div', doc.at('#blurb_1').name
|
130
|
+
end
|
131
|
+
|
132
|
+
def test_siblings
|
133
|
+
@basic = Hpricot.parse(TestFiles::BASIC)
|
134
|
+
t = @basic.at(:title)
|
135
|
+
e = t.next_sibling
|
136
|
+
assert_equal 'test1.css', e['href']
|
137
|
+
assert_equal 'title', e.previous_sibling.name
|
138
|
+
end
|
139
|
+
|
140
|
+
def test_css_negation
|
141
|
+
@basic = Hpricot.parse(TestFiles::BASIC)
|
142
|
+
assert_equal 3, (@basic/'p:not(.final)').length
|
143
|
+
end
|
144
|
+
|
145
|
+
def test_remove_attribute
|
146
|
+
@basic = Hpricot.parse(TestFiles::BASIC)
|
147
|
+
(@basic/:p).each { |ele| ele.remove_attribute('class') }
|
148
|
+
assert_equal 0, (@basic/'p[@class]').length
|
149
|
+
end
|
150
|
+
|
151
|
+
def test_abs_xpath
|
152
|
+
@boingboing = Hpricot.parse(TestFiles::BOINGBOING)
|
153
|
+
assert_equal 60, @boingboing.search("/html/body//p[@class='posted']").length
|
154
|
+
assert_equal 60, @boingboing.search("/*/body//p[@class='posted']").length
|
155
|
+
assert_equal 18, @boingboing.search("//script").length
|
156
|
+
divs = @boingboing.search("//script/../div")
|
157
|
+
assert_equal 1, divs.length
|
158
|
+
imgs = @boingboing.search('//div/p/a/img')
|
159
|
+
assert_equal 15, imgs.length
|
160
|
+
assert_equal 17, @boingboing.search('//div').search('p/a/img').length
|
161
|
+
assert imgs.all? { |x| x.name == 'img' }
|
162
|
+
end
|
163
|
+
|
164
|
+
def test_predicates
|
165
|
+
@boingboing = Hpricot.parse(TestFiles::BOINGBOING)
|
166
|
+
assert_equal 2, @boingboing.search('//link[@rel="alternate"]').length
|
167
|
+
p_imgs = @boingboing.search('//div/p[/a/img]')
|
168
|
+
assert_equal 15, p_imgs.length
|
169
|
+
assert p_imgs.all? { |x| x.name == 'p' }
|
170
|
+
p_imgs = @boingboing.search('//div/p[a/img]')
|
171
|
+
assert_equal 18, p_imgs.length
|
172
|
+
assert p_imgs.all? { |x| x.name == 'p' }
|
173
|
+
assert_equal 1, @boingboing.search('//input[@checked]').length
|
174
|
+
end
|
175
|
+
|
176
|
+
def test_tag_case
|
177
|
+
@tenderlove = Hpricot.parse(TestFiles::TENDERLOVE)
|
178
|
+
assert_equal 2, @tenderlove.search('//a').length
|
179
|
+
assert_equal 3, @tenderlove.search('//area').length
|
180
|
+
assert_equal 2, @tenderlove.search('//meta').length
|
181
|
+
end
|
182
|
+
|
183
|
+
def test_alt_predicates
|
184
|
+
@boingboing = Hpricot.parse(TestFiles::BOINGBOING)
|
185
|
+
assert_equal 1, @boingboing.search('//table/tr:last').length
|
186
|
+
|
187
|
+
@basic = Hpricot.parse(TestFiles::BASIC)
|
188
|
+
assert_equal "<p>The third paragraph</p>",
|
189
|
+
@basic.search('p:eq(2)').to_html
|
190
|
+
assert_equal '<p class="last final"><b>THE FINAL PARAGRAPH</b></p>',
|
191
|
+
@basic.search('p:last').to_html
|
192
|
+
assert_equal 'last final', @basic.search('//p:last-of-type').first.get_attribute('class')
|
193
|
+
end
|
194
|
+
|
195
|
+
def test_insert_after # ticket #63
|
196
|
+
doc = Hpricot('<html><body><div id="a-div"></div></body></html>')
|
197
|
+
(doc/'div').each do |element|
|
198
|
+
element.after('<p>Paragraph 1</p><p>Paragraph 2</p>')
|
199
|
+
end
|
200
|
+
assert_equal doc.to_html, '<html><body><div id="a-div"></div><p>Paragraph 1</p><p>Paragraph 2</p></body></html>'
|
201
|
+
end
|
202
|
+
|
203
|
+
def test_insert_before # ticket #61
|
204
|
+
doc = Hpricot('<html><body><div id="a-div"></div></body></html>')
|
205
|
+
(doc/'div').each do |element|
|
206
|
+
element.before('<p>Paragraph 1</p><p>Paragraph 2</p>')
|
207
|
+
end
|
208
|
+
assert_equal doc.to_html, '<html><body><p>Paragraph 1</p><p>Paragraph 2</p><div id="a-div"></div></body></html>'
|
209
|
+
end
|
210
|
+
|
211
|
+
def test_many_paths
|
212
|
+
@boingboing = Hpricot.parse(TestFiles::BOINGBOING)
|
213
|
+
assert_equal 62, @boingboing.search('p.posted, link[@rel="alternate"]').length
|
214
|
+
assert_equal 20, @boingboing.search('//div/p[a/img]|//link[@rel="alternate"]').length
|
215
|
+
end
|
216
|
+
|
217
|
+
def test_stacked_search
|
218
|
+
@boingboing = Hpricot.parse(TestFiles::BOINGBOING)
|
219
|
+
assert_kind_of Hpricot::Elements, @boingboing.search('//div/p').search('a img')
|
220
|
+
end
|
221
|
+
|
222
|
+
def test_class_search
|
223
|
+
# test case sent by Chih-Chao Lam
|
224
|
+
doc = Hpricot("<div class=xyz'>abc</div>")
|
225
|
+
assert_equal 1, doc.search(".xyz").length
|
226
|
+
doc = Hpricot("<div class=xyz>abc</div><div class=abc>xyz</div>")
|
227
|
+
assert_equal 1, doc.search(".xyz").length
|
228
|
+
assert_equal 4, doc.search("*").length
|
229
|
+
end
|
230
|
+
|
231
|
+
def test_kleene_star
|
232
|
+
# bug noticed by raja bhatia
|
233
|
+
doc = Hpricot("<span class='small'>1</span><div class='large'>2</div><div class='small'>3</div><span class='blue large'>4</span>")
|
234
|
+
assert_equal 2, doc.search("*[@class*='small']").length
|
235
|
+
assert_equal 2, doc.search("*.small").length
|
236
|
+
assert_equal 2, doc.search(".small").length
|
237
|
+
assert_equal 2, doc.search(".large").length
|
238
|
+
end
|
239
|
+
|
240
|
+
def test_empty_comment
|
241
|
+
doc = Hpricot("<p><!----></p>")
|
242
|
+
assert doc.children[0].children[0].comment?
|
243
|
+
doc = Hpricot("<p><!-- --></p>")
|
244
|
+
assert doc.children[0].children[0].comment?
|
245
|
+
end
|
246
|
+
|
247
|
+
def test_body_newlines
|
248
|
+
@immob = Hpricot.parse(TestFiles::IMMOB)
|
249
|
+
body = @immob.at(:body)
|
250
|
+
{'background' => '', 'bgcolor' => '#ffffff', 'text' => '#000000', 'marginheight' => '10',
|
251
|
+
'marginwidth' => '10', 'leftmargin' => '10', 'topmargin' => '10', 'link' => '#000066',
|
252
|
+
'alink' => '#ff6600', 'hlink' => "#ff6600", 'vlink' => "#000000"}.each do |k, v|
|
253
|
+
assert_equal v, body[k]
|
254
|
+
end
|
255
|
+
end
|
256
|
+
|
257
|
+
def test_nested_twins
|
258
|
+
@doc = Hpricot("<div>Hi<div>there</div></div>")
|
259
|
+
assert_equal 1, (@doc/"div div").length
|
260
|
+
end
|
261
|
+
|
262
|
+
def test_wildcard
|
263
|
+
@basic = Hpricot.parse(TestFiles::BASIC)
|
264
|
+
assert_equal 3, (@basic/"*[@id]").length
|
265
|
+
assert_equal 3, (@basic/"//*[@id]").length
|
266
|
+
end
|
267
|
+
|
268
|
+
def test_javascripts
|
269
|
+
@immob = Hpricot.parse(TestFiles::IMMOB)
|
270
|
+
assert_equal 3, (@immob/:script)[0].inner_html.scan(/<LINK/).length
|
271
|
+
end
|
272
|
+
|
273
|
+
def test_nested_scripts
|
274
|
+
@week9 = Hpricot.parse(TestFiles::WEEK9)
|
275
|
+
assert_equal 14, (@week9/"a").find_all { |x| x.inner_html.include? "GameCenter" }.length
|
276
|
+
end
|
277
|
+
|
278
|
+
def test_uswebgen
|
279
|
+
@uswebgen = Hpricot.parse(TestFiles::USWEBGEN)
|
280
|
+
# sent by brent beardsley, hpricot 0.3 had problems with all the links.
|
281
|
+
assert_equal 67, (@uswebgen/:a).length
|
282
|
+
end
|
283
|
+
|
284
|
+
def test_mangled_tags
|
285
|
+
[%{<html><form name='loginForm' method='post' action='/units/a/login/1,13088,779-1,00.html'?URL=></form></html>},
|
286
|
+
%{<html><form name='loginForm' ?URL= method='post' action='/units/a/login/1,13088,779-1,00.html'></form></html>},
|
287
|
+
%{<html><form name='loginForm'?URL= ?URL= method='post' action='/units/a/login/1,13088,779-1,00.html'?URL=></form></html>},
|
288
|
+
%{<html><form name='loginForm' method='post' action='/units/a/login/1,13088,779-1,00.html' ?URL=></form></html>}].
|
289
|
+
each do |str|
|
290
|
+
doc = Hpricot(str)
|
291
|
+
assert_equal 1, (doc/:form).length
|
292
|
+
assert_equal '/units/a/login/1,13088,779-1,00.html', doc.at("form")['action']
|
293
|
+
end
|
294
|
+
end
|
295
|
+
|
296
|
+
def test_procins
|
297
|
+
doc = Hpricot("<?php print('hello') ?>\n<?xml blah='blah'?>")
|
298
|
+
assert_equal "php", doc.children[0].target
|
299
|
+
assert_equal "blah='blah'", doc.children[2].content
|
300
|
+
end
|
301
|
+
|
302
|
+
def test_buffer_error
|
303
|
+
assert_raise Hpricot::ParseError, "ran out of buffer space on element <input>, starting on line 3." do
|
304
|
+
Hpricot(%{<p>\n\n<input type="hidden" name="__VIEWSTATE" value="#{(("X" * 2000) + "\n") * 22}" />\n\n</p>})
|
305
|
+
end
|
306
|
+
end
|
307
|
+
|
308
|
+
def test_youtube_attr
|
309
|
+
str = <<-edoc
|
310
|
+
<html><body>
|
311
|
+
Lorem ipsum. Jolly roger, ding-dong sing-a-long
|
312
|
+
<object width="425" height="350">
|
313
|
+
<param name="movie" value="http://www.youtube.com/v/NbDQ4M_cuwA"></param>
|
314
|
+
<param name="wmode" value="transparent"></param>
|
315
|
+
<embed src="http://www.youtube.com/v/NbDQ4M_cuwA"
|
316
|
+
type="application/x-shockwave-flash" wmode="transparent" width="425" height="350">
|
317
|
+
</embed>
|
318
|
+
</object>
|
319
|
+
Check out my posting, I have bright mice in large clown cars.
|
320
|
+
<object width="425" height="350">
|
321
|
+
<param name="movie" value="http://www.youtube.com/v/foobar"></param>
|
322
|
+
<param name="wmode" value="transparent"></param>
|
323
|
+
<embed src="http://www.youtube.com/v/foobar"
|
324
|
+
type="application/x-shockwave-flash" wmode="transparent" width="425" height="350">
|
325
|
+
</embed>
|
326
|
+
</object>
|
327
|
+
</body></html?
|
328
|
+
edoc
|
329
|
+
doc = Hpricot(str)
|
330
|
+
assert_equal "http://www.youtube.com/v/NbDQ4M_cuwA",
|
331
|
+
doc.at("//object/param[@value='http://www.youtube.com/v/NbDQ4M_cuwA']")['value']
|
332
|
+
end
|
333
|
+
|
334
|
+
# ticket #84 by jamezilla
|
335
|
+
def test_screwed_xmlns
|
336
|
+
doc = Hpricot(<<-edoc)
|
337
|
+
<?xml:namespace prefix = cwi />
|
338
|
+
<html><body>HAI</body></html>
|
339
|
+
edoc
|
340
|
+
assert_equal "HAI", doc.at("body").inner_text
|
341
|
+
end
|
342
|
+
|
343
|
+
# Reported by Jonathan Nichols on the Hpricot list (24 May 2007)
|
344
|
+
def test_self_closed_form
|
345
|
+
doc = Hpricot(<<-edoc)
|
346
|
+
<body>
|
347
|
+
<form action="/loginRegForm" name="regForm" method="POST" />
|
348
|
+
<input type="button">
|
349
|
+
</form>
|
350
|
+
</body>
|
351
|
+
edoc
|
352
|
+
assert_equal "button", doc.at("//form/input")['type']
|
353
|
+
end
|
354
|
+
|
355
|
+
def test_filters
|
356
|
+
@basic = Hpricot.parse(TestFiles::BASIC)
|
357
|
+
assert_equal 0, (@basic/"title:parent").size
|
358
|
+
assert_equal 3, (@basic/"p:parent").size
|
359
|
+
assert_equal 1, (@basic/"title:empty").size
|
360
|
+
assert_equal 1, (@basic/"p:empty").size
|
361
|
+
end
|
362
|
+
|
363
|
+
def test_keep_cdata
|
364
|
+
str = %{<script> /*<![CDATA[*/
|
365
|
+
/*]]>*/ </script>}
|
366
|
+
assert_equal str, Hpricot(str).to_html
|
367
|
+
end
|
368
|
+
|
369
|
+
def test_namespace
|
370
|
+
chunk = <<-END
|
371
|
+
<a xmlns:t="http://www.nexopia.com/dev/template">
|
372
|
+
<t:sam>hi </t:sam>
|
373
|
+
</a>
|
374
|
+
END
|
375
|
+
doc = Hpricot::XML(chunk)
|
376
|
+
assert (doc/"//t:sam").size > 0 # at least this should probably work
|
377
|
+
# assert (doc/"//sam").size > 0 # this would be nice
|
378
|
+
end
|
379
|
+
end
|