tenderlove-nokogiri 0.0.0-x86-mswin32-60
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +6 -0
- data/Manifest.txt +120 -0
- data/README.ja.txt +86 -0
- data/README.txt +87 -0
- data/Rakefile +264 -0
- data/ext/nokogiri/extconf.rb +59 -0
- data/ext/nokogiri/html_document.c +83 -0
- data/ext/nokogiri/html_document.h +10 -0
- data/ext/nokogiri/html_sax_parser.c +32 -0
- data/ext/nokogiri/html_sax_parser.h +11 -0
- data/ext/nokogiri/native.c +40 -0
- data/ext/nokogiri/native.h +51 -0
- data/ext/nokogiri/xml_cdata.c +52 -0
- data/ext/nokogiri/xml_cdata.h +9 -0
- data/ext/nokogiri/xml_document.c +159 -0
- data/ext/nokogiri/xml_document.h +10 -0
- data/ext/nokogiri/xml_dtd.c +117 -0
- data/ext/nokogiri/xml_dtd.h +8 -0
- data/ext/nokogiri/xml_node.c +709 -0
- data/ext/nokogiri/xml_node.h +15 -0
- data/ext/nokogiri/xml_node_set.c +124 -0
- data/ext/nokogiri/xml_node_set.h +9 -0
- data/ext/nokogiri/xml_reader.c +429 -0
- data/ext/nokogiri/xml_reader.h +10 -0
- data/ext/nokogiri/xml_sax_parser.c +174 -0
- data/ext/nokogiri/xml_sax_parser.h +10 -0
- data/ext/nokogiri/xml_syntax_error.c +194 -0
- data/ext/nokogiri/xml_syntax_error.h +11 -0
- data/ext/nokogiri/xml_text.c +29 -0
- data/ext/nokogiri/xml_text.h +9 -0
- data/ext/nokogiri/xml_xpath.c +46 -0
- data/ext/nokogiri/xml_xpath.h +11 -0
- data/ext/nokogiri/xml_xpath_context.c +81 -0
- data/ext/nokogiri/xml_xpath_context.h +9 -0
- data/ext/nokogiri/xslt_stylesheet.c +108 -0
- data/ext/nokogiri/xslt_stylesheet.h +9 -0
- data/lib/nokogiri/css/node.rb +95 -0
- data/lib/nokogiri/css/parser.rb +24 -0
- data/lib/nokogiri/css/parser.y +198 -0
- data/lib/nokogiri/css/tokenizer.rb +9 -0
- data/lib/nokogiri/css/tokenizer.rex +63 -0
- data/lib/nokogiri/css/xpath_visitor.rb +165 -0
- data/lib/nokogiri/css.rb +6 -0
- data/lib/nokogiri/decorators/hpricot/node.rb +58 -0
- data/lib/nokogiri/decorators/hpricot/node_set.rb +14 -0
- data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +17 -0
- data/lib/nokogiri/decorators/hpricot.rb +3 -0
- data/lib/nokogiri/decorators.rb +1 -0
- data/lib/nokogiri/hpricot.rb +47 -0
- data/lib/nokogiri/html/builder.rb +9 -0
- data/lib/nokogiri/html/document.rb +9 -0
- data/lib/nokogiri/html/sax/parser.rb +21 -0
- data/lib/nokogiri/html.rb +95 -0
- data/lib/nokogiri/version.rb +3 -0
- data/lib/nokogiri/xml/after_handler.rb +18 -0
- data/lib/nokogiri/xml/before_handler.rb +32 -0
- data/lib/nokogiri/xml/builder.rb +79 -0
- data/lib/nokogiri/xml/cdata.rb +9 -0
- data/lib/nokogiri/xml/document.rb +30 -0
- data/lib/nokogiri/xml/dtd.rb +6 -0
- data/lib/nokogiri/xml/node.rb +195 -0
- data/lib/nokogiri/xml/node_set.rb +183 -0
- data/lib/nokogiri/xml/notation.rb +6 -0
- data/lib/nokogiri/xml/reader.rb +14 -0
- data/lib/nokogiri/xml/sax/document.rb +59 -0
- data/lib/nokogiri/xml/sax/parser.rb +33 -0
- data/lib/nokogiri/xml/sax.rb +9 -0
- data/lib/nokogiri/xml/syntax_error.rb +21 -0
- data/lib/nokogiri/xml/text.rb +6 -0
- data/lib/nokogiri/xml/xpath.rb +6 -0
- data/lib/nokogiri/xml/xpath_context.rb +14 -0
- data/lib/nokogiri/xml.rb +67 -0
- data/lib/nokogiri/xslt/stylesheet.rb +6 -0
- data/lib/nokogiri/xslt.rb +11 -0
- data/lib/nokogiri.rb +51 -0
- data/nokogiri.gemspec +34 -0
- data/test/css/test_nthiness.rb +159 -0
- data/test/css/test_parser.rb +224 -0
- data/test/css/test_tokenizer.rb +162 -0
- data/test/css/test_xpath_visitor.rb +54 -0
- data/test/files/staff.xml +59 -0
- data/test/files/staff.xslt +32 -0
- data/test/files/tlm.html +850 -0
- data/test/helper.rb +70 -0
- data/test/hpricot/files/basic.xhtml +17 -0
- data/test/hpricot/files/boingboing.html +2266 -0
- data/test/hpricot/files/cy0.html +3653 -0
- data/test/hpricot/files/immob.html +400 -0
- data/test/hpricot/files/pace_application.html +1320 -0
- data/test/hpricot/files/tenderlove.html +16 -0
- data/test/hpricot/files/uswebgen.html +220 -0
- data/test/hpricot/files/utf8.html +1054 -0
- data/test/hpricot/files/week9.html +1723 -0
- data/test/hpricot/files/why.xml +19 -0
- data/test/hpricot/load_files.rb +7 -0
- data/test/hpricot/test_alter.rb +67 -0
- data/test/hpricot/test_builder.rb +27 -0
- data/test/hpricot/test_parser.rb +423 -0
- data/test/hpricot/test_paths.rb +15 -0
- data/test/hpricot/test_preserved.rb +78 -0
- data/test/hpricot/test_xml.rb +30 -0
- data/test/html/sax/test_parser.rb +27 -0
- data/test/html/test_builder.rb +78 -0
- data/test/html/test_document.rb +86 -0
- data/test/test_convert_xpath.rb +180 -0
- data/test/test_nokogiri.rb +36 -0
- data/test/test_reader.rb +222 -0
- data/test/test_xslt_transforms.rb +29 -0
- data/test/xml/sax/test_parser.rb +93 -0
- data/test/xml/test_builder.rb +16 -0
- data/test/xml/test_cdata.rb +18 -0
- data/test/xml/test_document.rb +171 -0
- data/test/xml/test_dtd.rb +43 -0
- data/test/xml/test_node.rb +223 -0
- data/test/xml/test_node_set.rb +116 -0
- data/test/xml/test_text.rb +13 -0
- metadata +214 -0
@@ -0,0 +1,19 @@
|
|
1
|
+
<?xml version='1.0'?><rss xmlns:admin='http://webns.net/mvcb/' version='2.0' xmlns:sy='http://purl.org/rss/1.0/modules/syndication/' xmlns:dc='http://purl.org/dc/elements/1.1/' xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'>
|
2
|
+
<channel>
|
3
|
+
<title>why the lucky stiff</title>
|
4
|
+
<link>http://whytheluckystiff.net</link>
|
5
|
+
<description>hex-editing reality to give us infinite grenades!!</description>
|
6
|
+
<dc:language>en-us</dc:language>
|
7
|
+
<dc:creator/>
|
8
|
+
<dc:date>2007-01-16T22:39:04+00:00</dc:date>
|
9
|
+
<admin:generatorAgent rdf:resource='http://hobix.com/?v=0.4'/>
|
10
|
+
<sy:updatePeriod>hourly</sy:updatePeriod>
|
11
|
+
<sy:updateFrequency>1</sy:updateFrequency>
|
12
|
+
<sy:updateBase>2000-01-01T12:00+00:00</sy:updateBase>
|
13
|
+
<item><title>1.3</title><link>http://whytheluckystiff.net/quatrains/1.3.html</link><guid isPermaLink='false'>quatrains/1.3@http://whytheluckystiff.net</guid><dc:subject>quatrains</dc:subject><dc:subject>quatrains</dc:subject><dc:creator>why the lucky stiff</dc:creator><dc:date>2007-01-14T08:47:05+00:00</dc:date><description><blockquote>
|
14
|
+
<p>That cadillac of yours and that driver of yours!<br />You and your teacups rattling away in the back seat!<br />You always took the mike, oh, and all those cowboys you shot!<br />I held your hand! And I&#8217;ll shoot a cowboy one day!</p>
|
15
|
+
</blockquote>
|
16
|
+
<blockquote>
|
17
|
+
<p>You said, &#8220;Let&#8217;s run into the woods like kids!&#8221; <br />You said, &#8220;Let&#8217;s rub our hands together super-hot!&#8221; <br />And we scalded the trees and left octagons, I think that was you and<br />You threw parties on the roof!</p>
|
18
|
+
</blockquote></description></item></channel>
|
19
|
+
</rss>
|
@@ -0,0 +1,67 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
|
2
|
+
require File.join(File.dirname(__FILE__),"load_files")
|
3
|
+
|
4
|
+
class TestAlter < Nokogiri::TestCase
|
5
|
+
include Nokogiri
|
6
|
+
|
7
|
+
def setup
|
8
|
+
@basic = Hpricot.parse(TestFiles::BASIC)
|
9
|
+
end
|
10
|
+
|
11
|
+
def test_before
|
12
|
+
test0 = "<link rel='stylesheet' href='test0.css' />"
|
13
|
+
@basic.at("link").before(test0)
|
14
|
+
assert_equal 'test0.css', @basic.at("link").attributes['href']
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_after
|
18
|
+
test_inf = "<link rel='stylesheet' href='test_inf.css' />"
|
19
|
+
@basic.search("link")[-1].after(test_inf)
|
20
|
+
assert_equal 'test_inf.css', @basic.search("link")[-1].attributes['href']
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_wrap
|
24
|
+
ohmy = (@basic/"p.ohmy").wrap("<div id='wrapper'></div>")
|
25
|
+
assert_equal 'wrapper', ohmy[0].parent['id']
|
26
|
+
assert_equal 'ohmy', Nokogiri.Hpricot(@basic.to_html).at("#wrapper").children[0]['class']
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_add_class
|
30
|
+
first_p = (@basic/"p:first").add_class("testing123")
|
31
|
+
assert first_p[0].get_attribute("class").split(" ").include?("testing123")
|
32
|
+
assert((Nokogiri.Hpricot(@basic.to_html)/"p:first")[0].attributes["class"].split(" ").include?("testing123"))
|
33
|
+
####
|
34
|
+
# Modified. We do not support OB1 bug.
|
35
|
+
assert !(Nokogiri.Hpricot(@basic.to_html)/"p:gt(1)")[0].attributes["class"].split(" ").include?("testing123")
|
36
|
+
end
|
37
|
+
|
38
|
+
def test_change_attributes
|
39
|
+
all_ps = (@basic/"p").attr("title", "Some Title")
|
40
|
+
all_as = (@basic/"a").attr("href", "http://my_new_href.com")
|
41
|
+
all_lb = (@basic/"link").attr("href") { |e| e.name }
|
42
|
+
GC.start # try to shake out GC bugs with xpath and node sets.
|
43
|
+
assert_changed(@basic, "p", all_ps) {|p| p.attributes["title"] == "Some Title"}
|
44
|
+
assert_changed(@basic, "a", all_as) {|a| a.attributes["href"] == "http://my_new_href.com"}
|
45
|
+
assert_changed(@basic, "link", all_lb) {|a| a.attributes["href"] == "link" }
|
46
|
+
end
|
47
|
+
|
48
|
+
def test_remove_attr
|
49
|
+
all_rl = (@basic/"link").remove_attr("href")
|
50
|
+
assert_changed(@basic, "link", all_rl) { |link| link['href'].nil? }
|
51
|
+
end
|
52
|
+
|
53
|
+
def test_remove_class
|
54
|
+
all_c1 = (@basic/"p[@class*='last']").remove_class("last")
|
55
|
+
assert_changed(@basic, "p[@class*='last']", all_c1) { |p| p['class'] == 'final' }
|
56
|
+
end
|
57
|
+
|
58
|
+
def test_remove_all_classes
|
59
|
+
all_c2 = (@basic/"p[@class]").remove_class
|
60
|
+
assert_changed(@basic, "p[@class]", all_c2) { |p| p['class'].nil? }
|
61
|
+
end
|
62
|
+
|
63
|
+
def assert_changed original, selector, set, &block
|
64
|
+
assert set.all?(&block)
|
65
|
+
assert Nokogiri.Hpricot(original.to_html).search(selector).all?(&block)
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
|
2
|
+
|
3
|
+
class TestBuilder < Nokogiri::TestCase
|
4
|
+
####
|
5
|
+
# Modified
|
6
|
+
def test_escaping_text
|
7
|
+
doc = Nokogiri.Hpricot() { b "<a\"b>" }
|
8
|
+
assert_equal "<b><a\"b></b>", doc.to_html.chomp
|
9
|
+
assert_equal %{<a\"b>}, doc.at("text()").to_s
|
10
|
+
end
|
11
|
+
|
12
|
+
####
|
13
|
+
# Modified
|
14
|
+
def test_no_escaping_text
|
15
|
+
doc = Nokogiri.Hpricot() { div.test.me! { text "<a\"b>" } }
|
16
|
+
assert_equal %{<div class="test" id="me"><a"b></div>},
|
17
|
+
doc.to_html.chomp
|
18
|
+
assert_equal %{<a\"b>}, doc.at("text()").to_s
|
19
|
+
end
|
20
|
+
|
21
|
+
####
|
22
|
+
# Modified
|
23
|
+
def test_latin1_entities
|
24
|
+
doc = Nokogiri.Hpricot() { b "\200\225" }
|
25
|
+
assert_equal "<b></b>", doc.to_html.chomp
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,423 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
|
2
|
+
require File.join(File.dirname(__FILE__),"load_files")
|
3
|
+
|
4
|
+
class TestParser < Nokogiri::TestCase
|
5
|
+
include Nokogiri
|
6
|
+
|
7
|
+
def test_set_attr
|
8
|
+
@basic = Hpricot.parse(TestFiles::BASIC)
|
9
|
+
@basic.search('//p').set('class', 'para')
|
10
|
+
assert_equal 4, @basic.search('//p').length
|
11
|
+
assert_equal 4, @basic.search('//p').find_all { |x| x['class'] == 'para' }.length
|
12
|
+
end
|
13
|
+
|
14
|
+
# Test creating a new element
|
15
|
+
def test_new_element
|
16
|
+
elem = Hpricot::Elem.new(Hpricot::STag.new('form'))
|
17
|
+
assert_not_nil(elem)
|
18
|
+
assert_not_nil(elem.attributes)
|
19
|
+
end
|
20
|
+
|
21
|
+
def test_scan_text
|
22
|
+
assert_equal 'FOO', Hpricot.make("FOO").first.content
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_filter_by_attr
|
26
|
+
@boingboing = Hpricot.parse(TestFiles::BOINGBOING)
|
27
|
+
|
28
|
+
# this link is escaped in the doc
|
29
|
+
link = 'http://www.youtube.com/watch?v=TvSNXyNw26g&search=chris%20ware'
|
30
|
+
assert_equal link, @boingboing.at("a[@href='#{link}']")['href']
|
31
|
+
end
|
32
|
+
|
33
|
+
def test_filter_contains
|
34
|
+
@basic = Hpricot.parse(TestFiles::BASIC)
|
35
|
+
assert_equal '<title>Sample XHTML</title>', @basic.search("title:contains('Sample')").to_s
|
36
|
+
end
|
37
|
+
|
38
|
+
def test_get_element_by_id
|
39
|
+
@basic = Hpricot.parse(TestFiles::BASIC)
|
40
|
+
assert_equal 'link1', @basic.get_element_by_id('link1')['id']
|
41
|
+
assert_equal 'link1', @basic.get_element_by_id('body1').get_element_by_id('link1').get_attribute('id')
|
42
|
+
end
|
43
|
+
|
44
|
+
def test_get_element_by_tag_name
|
45
|
+
@basic = Hpricot.parse(TestFiles::BASIC)
|
46
|
+
assert_equal 'link1', @basic.get_elements_by_tag_name('a')[0].get_attribute('id')
|
47
|
+
assert_equal 'link1', @basic.get_elements_by_tag_name('body')[0].get_element_by_id('link1').get_attribute('id')
|
48
|
+
end
|
49
|
+
|
50
|
+
def test_output_basic
|
51
|
+
@basic = Hpricot.parse(TestFiles::BASIC)
|
52
|
+
@basic2 = Hpricot.parse(@basic.inner_html)
|
53
|
+
scan_basic @basic2
|
54
|
+
end
|
55
|
+
|
56
|
+
def test_scan_basic
|
57
|
+
@basic = Hpricot.parse(TestFiles::BASIC)
|
58
|
+
scan_basic @basic
|
59
|
+
end
|
60
|
+
|
61
|
+
def scan_basic doc
|
62
|
+
####
|
63
|
+
# Modified: asserting kind is not duck typey
|
64
|
+
#assert_kind_of Hpricot::XMLDecl, doc.children.first
|
65
|
+
assert_not_equal doc.children.first.to_s, doc.children[1].to_s
|
66
|
+
assert_equal 'link1', doc.at('#link1')['id']
|
67
|
+
assert_equal 'link1', doc.at("p a")['id']
|
68
|
+
assert_equal 'link1', (doc/:p/:a).first['id']
|
69
|
+
assert_equal 'link1', doc.search('p').at('a').get_attribute('id')
|
70
|
+
|
71
|
+
### Modified: We're not supporting the filter() function
|
72
|
+
#assert_equal 'link2', (doc/'p').filter('.ohmy').search('a').first.get_attribute('id')
|
73
|
+
#assert_equal (doc/'p')[2], (doc/'p').filter(':nth(2)')[0]
|
74
|
+
#assert_equal (doc/'p')[2], (doc/'p').filter(':nth(2)')[0]
|
75
|
+
#assert_equal (doc/'p')[2], (doc/'p').filter('[3]')[0]
|
76
|
+
#assert_equal 4, (doc/'p').filter('*').length
|
77
|
+
#assert_equal 4, (doc/'p').filter('* *').length
|
78
|
+
#eles = (doc/'p').filter('.ohmy')
|
79
|
+
#assert_equal 1, eles.length
|
80
|
+
#assert_equal 'ohmy', eles.first.get_attribute('class')
|
81
|
+
assert_equal 3, (doc/'p:not(.ohmy)').length
|
82
|
+
|
83
|
+
### Modified: We're not supporting the not() function
|
84
|
+
#assert_equal 3, (doc/'p').not('.ohmy').length
|
85
|
+
#assert_equal 3, (doc/'p').not(eles.first).length
|
86
|
+
#assert_equal 2, (doc/'p').filter('[@class]').length
|
87
|
+
assert_equal 'last final', (doc/'p[@class~="final"]').first.get_attribute('class')
|
88
|
+
#assert_equal 1, (doc/'p').filter('[@class~="final"]').length
|
89
|
+
assert_equal 2, (doc/'p > a').length
|
90
|
+
assert_equal 1, (doc/'p.ohmy > a').length
|
91
|
+
assert_equal 2, (doc/'p / a').length
|
92
|
+
assert_equal 2, (doc/'link ~ link').length
|
93
|
+
assert_equal 3, (doc/'title ~ link').length
|
94
|
+
assert_equal 5, (doc/"//p/text()").length
|
95
|
+
assert_equal 6, (doc/"//p[a]//text()").length
|
96
|
+
assert_equal 2, (doc/"//p/a/text()").length
|
97
|
+
end
|
98
|
+
|
99
|
+
def test_positional
|
100
|
+
h = Nokogiri.Hpricot( "<div><br/><p>one</p><p>two</p></div>" )
|
101
|
+
assert_equal "<p>one</p>", h.search("div/p:eq(1)").to_s # MODIFIED: eq(0) -> eq(1), and removed initial '//'
|
102
|
+
assert_equal "<p>one</p>", h.search("div/p:first").to_s # MODIFIED: removed initial '//'
|
103
|
+
assert_equal "<p>one</p>", h.search("div/p:first()").to_s # MODIFIED: removed initial '//'
|
104
|
+
end
|
105
|
+
|
106
|
+
def test_pace
|
107
|
+
doc = Nokogiri.Hpricot(TestFiles::PACE_APPLICATION)
|
108
|
+
assert_equal 'get', doc.at('form[@name=frmSect11]')['method']
|
109
|
+
# assert_equal '2', doc.at('#hdnSpouse')['value']
|
110
|
+
end
|
111
|
+
|
112
|
+
def test_scan_boingboing
|
113
|
+
@boingboing = Hpricot.parse(TestFiles::BOINGBOING)
|
114
|
+
assert_equal 60, (@boingboing/'p.posted').length
|
115
|
+
assert_equal 1, @boingboing.search("//a[@name='027906']").length
|
116
|
+
### MODIFIED: libxml wraps the contents of <script> in a CDATA tag, so we won't be able to parse comments.
|
117
|
+
# assert_equal 10, @boingboing.search("script comment()").length
|
118
|
+
assert_equal 3, @boingboing.search("a[text()*='Boing']").length
|
119
|
+
assert_equal 1, @boingboing.search("h3[text()='College kids reportedly taking more smart drugs']").length
|
120
|
+
assert_equal 0, @boingboing.search("h3[text()='College']").length
|
121
|
+
assert_equal 60, @boingboing.search("h3").length
|
122
|
+
assert_equal 59, @boingboing.search("h3[text()!='College kids reportedly taking more smart drugs']").length
|
123
|
+
assert_equal 17, @boingboing.search("h3[text()$='s']").length
|
124
|
+
### Modified. Hpricot is wrong
|
125
|
+
#assert_equal 129, @boingboing.search("p[text()]").length
|
126
|
+
if Nokogiri::LIBXML_VERSION == '2.6.16'
|
127
|
+
assert_equal 111, @boingboing.search("p[text()]").length
|
128
|
+
else
|
129
|
+
assert_equal 110, @boingboing.search("p[text()]").length
|
130
|
+
end
|
131
|
+
assert_equal 211, @boingboing.search("p").length
|
132
|
+
end
|
133
|
+
|
134
|
+
def test_reparent
|
135
|
+
doc = Nokogiri.Hpricot(%{<div id="blurb_1"></div>})
|
136
|
+
div1 = doc.search('#blurb_1')
|
137
|
+
div1.before('<div id="blurb_0"></div>')
|
138
|
+
|
139
|
+
div0 = doc.search('#blurb_0')
|
140
|
+
div0.before('<div id="blurb_a"></div>')
|
141
|
+
|
142
|
+
assert_equal 'div', doc.at('#blurb_1').name
|
143
|
+
end
|
144
|
+
|
145
|
+
def test_siblings
|
146
|
+
@basic = Hpricot.parse(TestFiles::BASIC)
|
147
|
+
t = @basic.at(:title)
|
148
|
+
e = t.next_sibling
|
149
|
+
assert_equal 'test1.css', e['href']
|
150
|
+
assert_equal 'title', e.previous_sibling.name
|
151
|
+
end
|
152
|
+
|
153
|
+
def test_css_negation
|
154
|
+
@basic = Hpricot.parse(TestFiles::BASIC)
|
155
|
+
assert_equal 3, (@basic/'p:not(.final)').length
|
156
|
+
end
|
157
|
+
|
158
|
+
def test_remove_attribute
|
159
|
+
@basic = Hpricot.parse(TestFiles::BASIC)
|
160
|
+
(@basic/:p).each { |ele| ele.remove_attribute('class') }
|
161
|
+
assert_equal 0, (@basic/'p[@class]').length
|
162
|
+
end
|
163
|
+
|
164
|
+
##
|
165
|
+
# Modified: hpricot is giving incorrect counts. Libxml gets it right.
|
166
|
+
def test_abs_xpath
|
167
|
+
@boingboing = Hpricot.parse(TestFiles::BOINGBOING)
|
168
|
+
assert_equal 60, @boingboing.search("/html/body//p[@class='posted']").length
|
169
|
+
assert_equal 60, @boingboing.search("/*/body//p[@class='posted']").length
|
170
|
+
assert_equal 18, @boingboing.search("//script").length
|
171
|
+
divs = @boingboing.search("//script/../div")
|
172
|
+
assert_equal 2, divs.length # hpricot says this is 1, but that's wrong.
|
173
|
+
imgs = @boingboing.search('//div/p/a/img')
|
174
|
+
assert_equal 12, imgs.length # hpricot says this is 15, but that's wrong.
|
175
|
+
assert_equal 16, @boingboing.search('//div').search('p/a/img').length
|
176
|
+
assert imgs.all? { |x| x.name == 'img' }
|
177
|
+
end
|
178
|
+
|
179
|
+
def test_predicates
|
180
|
+
@boingboing = Hpricot.parse(TestFiles::BOINGBOING)
|
181
|
+
assert_equal 2, @boingboing.search('//link[@rel="alternate"]').length
|
182
|
+
p_imgs = @boingboing.search('//div/p[/a/img]')
|
183
|
+
#assert_equal 15, p_imgs.length
|
184
|
+
assert p_imgs.all? { |x| x.name == 'p' }
|
185
|
+
p_imgs = @boingboing.search('//div/p[a/img]')
|
186
|
+
assert_equal 12, p_imgs.length
|
187
|
+
assert p_imgs.all? { |x| x.name == 'p' }
|
188
|
+
assert_equal 1, @boingboing.search('//input[@checked]').length
|
189
|
+
end
|
190
|
+
|
191
|
+
def test_tag_case
|
192
|
+
@tenderlove = Hpricot.parse(TestFiles::TENDERLOVE)
|
193
|
+
assert_equal 2, @tenderlove.search('//a').length
|
194
|
+
assert_equal 3, @tenderlove.search('//area').length
|
195
|
+
assert_equal 2, @tenderlove.search('//meta').length
|
196
|
+
end
|
197
|
+
|
198
|
+
def test_alt_predicates
|
199
|
+
@boingboing = Hpricot.parse(TestFiles::BOINGBOING)
|
200
|
+
assert_equal 2, @boingboing.search('table/tr:last').length # MODIFIED to not have '//' prefix
|
201
|
+
|
202
|
+
@basic = Hpricot.parse(TestFiles::BASIC)
|
203
|
+
##
|
204
|
+
# MODIFIED:
|
205
|
+
# hpricot has an off-by-one bug eith eq-and-friends.
|
206
|
+
assert_equal "<p>The third paragraph</p>",
|
207
|
+
@basic.search('p:eq(3)').to_html # under Hpricot this was eq(2)
|
208
|
+
##
|
209
|
+
# MODIFIED: to be blank-agnostic, because Nokogiri's to_html is slightly different compared to Hpricot.
|
210
|
+
assert_equal '<p class="last final"> <b>THE FINAL PARAGRAPH</b> </p>',
|
211
|
+
@basic.search('p:last').to_html.gsub(/\s+/,' ')
|
212
|
+
assert_equal 'last final', @basic.search('p:last-of-type').first.get_attribute('class') # MODIFIED to not have '//' prefix
|
213
|
+
end
|
214
|
+
|
215
|
+
def test_insert_after # ticket #63
|
216
|
+
doc = Nokogiri.Hpricot('<html><body><div id="a-div"></div></body></html>')
|
217
|
+
(doc/'div').each do |element|
|
218
|
+
element.after('<p>Paragraph 1</p><p>Paragraph 2</p>')
|
219
|
+
end
|
220
|
+
assert_match '<div id="a-div"></div><p>Paragraph 1</p><p>Paragraph 2</p>',
|
221
|
+
doc.to_html.gsub(/\n/, '')
|
222
|
+
end
|
223
|
+
|
224
|
+
def test_insert_before # ticket #61
|
225
|
+
doc = Nokogiri.Hpricot('<html><body><div id="a-div"></div></body></html>')
|
226
|
+
(doc/'div').each do |element|
|
227
|
+
element.before('<p>Paragraph 1</p><p>Paragraph 2</p>')
|
228
|
+
end
|
229
|
+
assert_match '<p>Paragraph 1</p><p>Paragraph 2</p><div id="a-div"></div>',
|
230
|
+
doc.to_html.gsub(/\n/, '')
|
231
|
+
end
|
232
|
+
|
233
|
+
def test_many_paths
|
234
|
+
@boingboing = Hpricot.parse(TestFiles::BOINGBOING)
|
235
|
+
assert_equal 62, @boingboing.search('p.posted, link[@rel="alternate"]').length
|
236
|
+
###
|
237
|
+
# Modified. I don't want to support this syntax. Just use a comma.
|
238
|
+
#assert_equal 20, @boingboing.search('//div/p[a/img]|//link[@rel="alternate"]').length
|
239
|
+
end
|
240
|
+
|
241
|
+
####
|
242
|
+
# Modified. Epic Fail. We're on the duck type train folks.
|
243
|
+
#def test_stacked_search
|
244
|
+
# @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
|
245
|
+
# assert_kind_of Hpricot::Elements, @boingboing.search('//div/p').search('a img')
|
246
|
+
#end
|
247
|
+
|
248
|
+
def test_class_search
|
249
|
+
# test case sent by Chih-Chao Lam
|
250
|
+
# Modified. libxml corrects this differently than hpricot
|
251
|
+
doc = Nokogiri.Hpricot("<div class=xyz '>abc</div>")
|
252
|
+
assert_equal 1, doc.search(".xyz").length
|
253
|
+
|
254
|
+
doc = Nokogiri.Hpricot("<div class=xyz>abc</div><div class=abc>xyz</div>")
|
255
|
+
assert_equal 1, doc.search(".xyz").length
|
256
|
+
assert_equal 4, doc.search("*").length
|
257
|
+
end
|
258
|
+
|
259
|
+
def test_kleene_star
|
260
|
+
# bug noticed by raja bhatia
|
261
|
+
doc = Nokogiri.Hpricot("<span class='small'>1</span><div class='large'>2</div><div class='small'>3</div><span class='blue large'>4</span>")
|
262
|
+
assert_equal 2, doc.search("*[@class*='small']").length
|
263
|
+
assert_equal 2, doc.search("*.small").length
|
264
|
+
assert_equal 2, doc.search(".small").length
|
265
|
+
assert_equal 2, doc.search(".large").length
|
266
|
+
end
|
267
|
+
|
268
|
+
def test_empty_comment
|
269
|
+
doc = Nokogiri.Hpricot("<p><!----></p>")
|
270
|
+
doc = doc.search('//body').first
|
271
|
+
assert doc.children[0].children[0].comment?
|
272
|
+
|
273
|
+
doc = Nokogiri.Hpricot("<p><!-- --></p>")
|
274
|
+
doc = doc.search('//body').first
|
275
|
+
assert doc.children[0].children[0].comment?
|
276
|
+
end
|
277
|
+
|
278
|
+
def test_body_newlines
|
279
|
+
@immob = Hpricot.parse(TestFiles::IMMOB)
|
280
|
+
body = @immob.at(:body)
|
281
|
+
{'background' => '', 'bgcolor' => '#ffffff', 'text' => '#000000', 'marginheight' => '10',
|
282
|
+
'marginwidth' => '10', 'leftmargin' => '10', 'topmargin' => '10', 'link' => '#000066',
|
283
|
+
'alink' => '#ff6600', 'hlink' => "#ff6600", 'vlink' => "#000000"}.each do |k, v|
|
284
|
+
assert_equal v, body[k]
|
285
|
+
end
|
286
|
+
end
|
287
|
+
|
288
|
+
def test_nested_twins
|
289
|
+
@doc = Nokogiri.Hpricot("<div>Hi<div>there</div></div>")
|
290
|
+
assert_equal 1, (@doc/"div div").length
|
291
|
+
end
|
292
|
+
|
293
|
+
def test_wildcard
|
294
|
+
@basic = Hpricot.parse(TestFiles::BASIC)
|
295
|
+
assert_equal 3, (@basic/"*[@id]").length
|
296
|
+
assert_equal 3, (@basic/"//*[@id]").length
|
297
|
+
end
|
298
|
+
|
299
|
+
def test_javascripts
|
300
|
+
@immob = Hpricot.parse(TestFiles::IMMOB)
|
301
|
+
assert_equal 3, (@immob/:script)[0].inner_html.scan(/<LINK/).length
|
302
|
+
end
|
303
|
+
|
304
|
+
####
|
305
|
+
# Modified. This test passes with later versions of libxml
|
306
|
+
def test_nested_scripts
|
307
|
+
@week9 = Hpricot.parse(TestFiles::WEEK9)
|
308
|
+
unless Nokogiri::LIBXML_VERSION == '2.6.16'
|
309
|
+
assert_equal 14, (@week9/"a").find_all { |x| x.inner_html.include? "GameCenter" }.length
|
310
|
+
end
|
311
|
+
end
|
312
|
+
|
313
|
+
def test_uswebgen
|
314
|
+
@uswebgen = Hpricot.parse(TestFiles::USWEBGEN)
|
315
|
+
# sent by brent beardsley, nokogiri 0.3 had problems with all the links.
|
316
|
+
assert_equal 67, (@uswebgen/:a).length
|
317
|
+
end
|
318
|
+
|
319
|
+
def test_mangled_tags
|
320
|
+
[%{<html><form name='loginForm' method='post' action='/units/a/login/1,13088,779-1,00.html'?URL=></form></html>},
|
321
|
+
%{<html><form name='loginForm' ?URL= method='post' action='/units/a/login/1,13088,779-1,00.html'></form></html>},
|
322
|
+
%{<html><form name='loginForm'?URL= ?URL= method='post' action='/units/a/login/1,13088,779-1,00.html'?URL=></form></html>},
|
323
|
+
%{<html><form name='loginForm' method='post' action='/units/a/login/1,13088,779-1,00.html' ?URL=></form></html>}].
|
324
|
+
each do |str|
|
325
|
+
doc = Nokogiri.Hpricot(str)
|
326
|
+
assert_equal 1, (doc/:form).length
|
327
|
+
assert_equal '/units/a/login/1,13088,779-1,00.html', doc.at("form")['action']
|
328
|
+
end
|
329
|
+
end
|
330
|
+
|
331
|
+
####
|
332
|
+
# Modified. Added question. Don't care.
|
333
|
+
def test_procins
|
334
|
+
doc = Nokogiri.Hpricot("<?php print('hello') ?>\n<?xml blah='blah'?>")
|
335
|
+
assert_equal "php", doc.children[1].target
|
336
|
+
assert_equal "blah='blah'?", doc.children[2].content
|
337
|
+
end
|
338
|
+
|
339
|
+
####
|
340
|
+
# Altered... libxml does not get a buffer error
|
341
|
+
def test_buffer_error
|
342
|
+
assert_nothing_raised {
|
343
|
+
Nokogiri.Hpricot(%{<p>\n\n<input type="hidden" name="__VIEWSTATE" value="#{(("X" * 2000) + "\n") * 22}" />\n\n</p>})
|
344
|
+
}
|
345
|
+
end
|
346
|
+
|
347
|
+
def test_youtube_attr
|
348
|
+
str = <<-edoc
|
349
|
+
<html><body>
|
350
|
+
Lorem ipsum. Jolly roger, ding-dong sing-a-long
|
351
|
+
<object width="425" height="350">
|
352
|
+
<param name="movie" value="http://www.youtube.com/v/NbDQ4M_cuwA"></param>
|
353
|
+
<param name="wmode" value="transparent"></param>
|
354
|
+
<embed src="http://www.youtube.com/v/NbDQ4M_cuwA"
|
355
|
+
type="application/x-shockwave-flash" wmode="transparent" width="425" height="350">
|
356
|
+
</embed>
|
357
|
+
</object>
|
358
|
+
Check out my posting, I have bright mice in large clown cars.
|
359
|
+
<object width="425" height="350">
|
360
|
+
<param name="movie" value="http://www.youtube.com/v/foobar"></param>
|
361
|
+
<param name="wmode" value="transparent"></param>
|
362
|
+
<embed src="http://www.youtube.com/v/foobar"
|
363
|
+
type="application/x-shockwave-flash" wmode="transparent" width="425" height="350">
|
364
|
+
</embed>
|
365
|
+
</object>
|
366
|
+
</body></html?
|
367
|
+
edoc
|
368
|
+
doc = Nokogiri.Hpricot(str)
|
369
|
+
assert_equal "http://www.youtube.com/v/NbDQ4M_cuwA",
|
370
|
+
doc.at("//object/param[@value='http://www.youtube.com/v/NbDQ4M_cuwA']")['value']
|
371
|
+
end
|
372
|
+
|
373
|
+
# ticket #84 by jamezilla
|
374
|
+
def test_screwed_xmlns
|
375
|
+
doc = Nokogiri.Hpricot(<<-edoc)
|
376
|
+
<?xml:namespace prefix = cwi />
|
377
|
+
<html><body>HAI</body></html>
|
378
|
+
edoc
|
379
|
+
assert_equal "HAI", doc.at("body").inner_text
|
380
|
+
end
|
381
|
+
|
382
|
+
# Reported by Jonathan Nichols on the Hpricot list (24 May 2007)
|
383
|
+
# MODIFIED: This is an issue with libxml which we cannot deal with....
|
384
|
+
#def test_self_closed_form
|
385
|
+
# doc = Nokogiri.Hpricot(<<-edoc)
|
386
|
+
# <body>
|
387
|
+
# <form action="/loginRegForm" name="regForm" method="POST" />
|
388
|
+
# <input type="button">
|
389
|
+
# </form>
|
390
|
+
# </body>
|
391
|
+
# edoc
|
392
|
+
# assert_equal "button", doc.at("//form/input")['type']
|
393
|
+
#end
|
394
|
+
|
395
|
+
def test_filters
|
396
|
+
@basic = Hpricot.parse(TestFiles::BASIC)
|
397
|
+
##
|
398
|
+
# MODIFIED:
|
399
|
+
# Hpricot considers nodes with text-only (but no child tags) to be empty.
|
400
|
+
# Nokogiri considers that any content makes a parent.
|
401
|
+
assert_equal 1, (@basic/"title:parent").size # so this was 0 under Hpricot
|
402
|
+
assert_equal 4, (@basic/"p:parent").size
|
403
|
+
assert_equal 0, (@basic/"title:empty").size
|
404
|
+
assert_equal 3, (@basic/"link:empty").size
|
405
|
+
end
|
406
|
+
|
407
|
+
def test_keep_cdata
|
408
|
+
str = %{<script> /*<![CDATA[*/
|
409
|
+
/*]]>*/ </script>}
|
410
|
+
assert_match str, Nokogiri.Hpricot(str).to_html
|
411
|
+
end
|
412
|
+
|
413
|
+
def test_namespace
|
414
|
+
chunk = <<-END
|
415
|
+
<a xmlns:t="http://www.nexopia.com/dev/template">
|
416
|
+
<t:sam>hi </t:sam>
|
417
|
+
</a>
|
418
|
+
END
|
419
|
+
doc = Hpricot::XML(chunk)
|
420
|
+
assert((doc/"//t:sam").size > 0) # at least this should probably work
|
421
|
+
# assert (doc/"//sam").size > 0 # this would be nice
|
422
|
+
end
|
423
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
|
2
|
+
require File.join(File.dirname(__FILE__),"load_files")
|
3
|
+
|
4
|
+
class TestParser < Nokogiri::TestCase
|
5
|
+
include Nokogiri
|
6
|
+
|
7
|
+
def test_roundtrip
|
8
|
+
@basic = Hpricot.parse(TestFiles::BASIC)
|
9
|
+
%w[link link[2] body #link1 a p.ohmy].each do |css_sel|
|
10
|
+
ele = @basic.at(css_sel)
|
11
|
+
assert_equal ele, @basic.at(ele.css_path), ele.css_path
|
12
|
+
assert_equal ele, @basic.at(ele.xpath), ele.xpath
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
|
2
|
+
require File.join(File.dirname(__FILE__),"load_files")
|
3
|
+
|
4
|
+
class TestPreserved < Nokogiri::TestCase
|
5
|
+
def assert_roundtrip str
|
6
|
+
doc = Nokogiri.Hpricot(str)
|
7
|
+
yield doc if block_given?
|
8
|
+
str2 = doc.to_original_html
|
9
|
+
[*str].zip([*str2]).each do |s1, s2|
|
10
|
+
assert_equal s1, s2
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def assert_html str1, str2
|
15
|
+
doc = Nokogiri.Hpricot(str2)
|
16
|
+
yield doc if block_given?
|
17
|
+
assert_equal str1, doc.to_original_html
|
18
|
+
end
|
19
|
+
|
20
|
+
####
|
21
|
+
# Not supporting to_original_html
|
22
|
+
#def test_simple
|
23
|
+
# str = "<p>Hpricot is a <b>you know <i>uh</b> fine thing.</p>"
|
24
|
+
# assert_html str, str
|
25
|
+
# assert_html "<p class=\"new\">Hpricot is a <b>you know <i>uh</b> fine thing.</p>", str do |doc|
|
26
|
+
# (doc/:p).set('class', 'new')
|
27
|
+
# end
|
28
|
+
#end
|
29
|
+
|
30
|
+
####
|
31
|
+
# Not supporting to_original_html
|
32
|
+
#def test_parent
|
33
|
+
# str = "<html><base href='/'><head><title>Test</title></head><body><div id='wrap'><p>Paragraph one.</p><p>Paragraph two.</p></div></body></html>"
|
34
|
+
# assert_html str, str
|
35
|
+
# assert_html "<html><base href='/'><body><div id=\"all\"><div><p>Paragraph one.</p></div><div><p>Paragraph two.</p></div></div></body></html>", str do |doc|
|
36
|
+
# (doc/:head).remove
|
37
|
+
# (doc/:div).set('id', 'all')
|
38
|
+
# (doc/:p).wrap('<div></div>')
|
39
|
+
# end
|
40
|
+
#end
|
41
|
+
|
42
|
+
# Not really a valid test. If libxml can figure out the encoding of the file,
|
43
|
+
# it will use that encoding, otherwise it uses the &#xwhatever so that no data
|
44
|
+
# is lost.
|
45
|
+
#
|
46
|
+
# libxml on OSX can't figure out the encoding, so this tests passes. linux
|
47
|
+
# can figure out the encoding, so it fails.
|
48
|
+
#def test_escaping_of_contents
|
49
|
+
# doc = Nokogiri.Hpricot(TestFiles::BOINGBOING)
|
50
|
+
# assert_equal "Fukuda’s Automatic Door opens around your body as you pass through it. The idea is to save energy and keep the room clean.", doc.at("img[@alt='200606131240']").next.to_s.strip
|
51
|
+
#end
|
52
|
+
|
53
|
+
####
|
54
|
+
# Modified. No.
|
55
|
+
#def test_files
|
56
|
+
# assert_roundtrip TestFiles::BASIC
|
57
|
+
# assert_roundtrip TestFiles::BOINGBOING
|
58
|
+
# assert_roundtrip TestFiles::CY0
|
59
|
+
#end
|
60
|
+
|
61
|
+
####
|
62
|
+
# Modified.. When calling "to_html" on the document, proper html/doc tags
|
63
|
+
# are produced too.
|
64
|
+
def test_escaping_of_attrs
|
65
|
+
# ampersands in URLs
|
66
|
+
str = %{<a href="http://google.com/search?q=nokogiri&l=en">Google</a>}
|
67
|
+
link = (doc = Nokogiri.Hpricot(str)).at(:a)
|
68
|
+
assert_equal "http://google.com/search?q=nokogiri&l=en", link['href']
|
69
|
+
assert_equal "http://google.com/search?q=nokogiri&l=en", link.attributes['href']
|
70
|
+
assert_equal "http://google.com/search?q=nokogiri&l=en", link.get_attribute('href')
|
71
|
+
assert_equal "http://google.com/search?q=nokogiri&l=en", link.raw_attributes['href']
|
72
|
+
assert_equal str, link.to_html
|
73
|
+
|
74
|
+
# alter the url
|
75
|
+
link['href'] = "javascript:alert(\"AGGA-KA-BOO!\")"
|
76
|
+
assert_equal %{<a href="javascript:alert("AGGA-KA-BOO!")">Google</a>}, link.to_html.gsub(/%22/, '"')
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
|
2
|
+
require File.join(File.dirname(__FILE__),"load_files")
|
3
|
+
|
4
|
+
class TestParser < Nokogiri::TestCase
|
5
|
+
include Nokogiri
|
6
|
+
# normally, the link tags are empty HTML tags.
|
7
|
+
# contributed by laudney.
|
8
|
+
def test_normally_empty
|
9
|
+
doc = Hpricot::XML("<rss><channel><title>this is title</title><link>http://fake.com</link></channel></rss>")
|
10
|
+
assert_equal "this is title", (doc/:rss/:channel/:title).text
|
11
|
+
assert_equal "http://fake.com", (doc/:rss/:channel/:link).text
|
12
|
+
end
|
13
|
+
|
14
|
+
# make sure XML doesn't get downcased
|
15
|
+
def test_casing
|
16
|
+
doc = Hpricot::XML(TestFiles::WHY)
|
17
|
+
|
18
|
+
### Modified.
|
19
|
+
# I don't want to differentiate pseudo classes from namespaces. If
|
20
|
+
# you're parsing xml, use XPath. That's what its for. :-P
|
21
|
+
assert_equal "hourly", (doc.at "//sy:updatePeriod").content
|
22
|
+
assert_equal 1, (doc/"guid[@isPermaLink]").length
|
23
|
+
end
|
24
|
+
|
25
|
+
# be sure tags named "text" are ok
|
26
|
+
def test_text_tags
|
27
|
+
doc = Hpricot::XML("<feed><title>City Poisoned</title><text>Rita Lee has poisoned Brazil.</text></feed>")
|
28
|
+
assert_equal "City Poisoned", (doc/"title").text
|
29
|
+
end
|
30
|
+
end
|