nokogiri 1.3.2-x86-mswin32 → 1.3.3-x86-mswin32
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- data/CHANGELOG.ja.rdoc +25 -4
- data/CHANGELOG.rdoc +20 -0
- data/Manifest.txt +2 -0
- data/Rakefile +67 -24
- data/ext/nokogiri/extconf.rb +16 -9
- data/ext/nokogiri/html_document.c +0 -2
- data/ext/nokogiri/nokogiri.c +2 -0
- data/ext/nokogiri/nokogiri.h +3 -4
- data/ext/nokogiri/xml_document.c +30 -23
- data/ext/nokogiri/xml_document.h +3 -2
- data/ext/nokogiri/xml_dtd.c +4 -0
- data/ext/nokogiri/xml_dtd.h +2 -0
- data/ext/nokogiri/xml_node.c +28 -9
- data/ext/nokogiri/xml_reader.c +0 -7
- data/ext/nokogiri/xml_relax_ng.c +7 -1
- data/ext/nokogiri/xml_sax_parser.c +2 -0
- data/lib/action-nokogiri.rb +2 -0
- data/lib/nokogiri.rb +9 -3
- data/lib/nokogiri/1.8/nokogiri.so +0 -0
- data/lib/nokogiri/1.9/nokogiri.so +0 -0
- data/lib/nokogiri/css/generated_tokenizer.rb +80 -82
- data/lib/nokogiri/css/tokenizer.rb +1 -5
- data/lib/nokogiri/decorators/hpricot/node_set.rb +1 -1
- data/lib/nokogiri/ffi/structs/common_node.rb +1 -1
- data/lib/nokogiri/ffi/structs/xml_document.rb +1 -1
- data/lib/nokogiri/ffi/xml/document.rb +15 -4
- data/lib/nokogiri/ffi/xml/node.rb +85 -63
- data/lib/nokogiri/ffi/xml/reader.rb +4 -15
- data/lib/nokogiri/ffi/xml/relax_ng.rb +3 -1
- data/lib/nokogiri/hpricot.rb +30 -0
- data/lib/nokogiri/html/document.rb +3 -1
- data/lib/nokogiri/html/document_fragment.rb +1 -1
- data/lib/nokogiri/html/sax/parser.rb +2 -1
- data/lib/nokogiri/version.rb +1 -1
- data/lib/nokogiri/xml/builder.rb +44 -1
- data/lib/nokogiri/xml/document.rb +8 -1
- data/lib/nokogiri/xml/document_fragment.rb +1 -1
- data/lib/nokogiri/xml/fragment_handler.rb +4 -7
- data/lib/nokogiri/xml/node.rb +9 -6
- data/lib/nokogiri/xml/node_set.rb +7 -0
- data/lib/nokogiri/xml/parse_options.rb +1 -1
- data/test/css/test_nthiness.rb +2 -3
- data/test/ffi/test_document.rb +6 -6
- data/test/files/2ch.html +108 -0
- data/test/files/shift_jis.xml +5 -0
- data/test/helper.rb +3 -0
- data/test/hpricot/test_alter.rb +9 -9
- data/test/hpricot/test_builder.rb +2 -2
- data/test/hpricot/test_parser.rb +70 -146
- data/test/hpricot/test_paths.rb +2 -2
- data/test/hpricot/test_preserved.rb +2 -2
- data/test/hpricot/test_xml.rb +3 -3
- data/test/html/sax/test_parser.rb +12 -0
- data/test/html/test_builder.rb +6 -4
- data/test/html/test_document.rb +7 -0
- data/test/html/test_document_encoding.rb +17 -0
- data/test/html/test_document_fragment.rb +12 -0
- data/test/html/test_node.rb +5 -2
- data/test/test_convert_xpath.rb +1 -50
- data/test/test_css_cache.rb +1 -12
- data/test/test_nokogiri.rb +7 -0
- data/test/test_reader.rb +14 -0
- data/test/xml/test_document.rb +44 -0
- data/test/xml/test_document_fragment.rb +12 -0
- data/test/xml/test_node.rb +10 -2
- data/test/xml/test_node_encoding.rb +23 -0
- data/test/xml/test_node_set.rb +10 -0
- metadata +48 -46
data/test/helper.rb
CHANGED
@@ -2,6 +2,7 @@ Process.setrlimit(Process::RLIMIT_CORE, Process::RLIM_INFINITY) unless RUBY_PLAT
|
|
2
2
|
$VERBOSE = true
|
3
3
|
require 'rubygems'
|
4
4
|
require 'test/unit'
|
5
|
+
require 'tempfile'
|
5
6
|
|
6
7
|
%w(../lib ../ext).each do |path|
|
7
8
|
$LOAD_PATH.unshift(File.expand_path(File.join(File.dirname(__FILE__), path)))
|
@@ -19,6 +20,8 @@ module Nokogiri
|
|
19
20
|
EXSLT_FILE = File.join(ASSETS_DIR, 'exslt.xslt')
|
20
21
|
EXML_FILE = File.join(ASSETS_DIR, 'exslt.xml')
|
21
22
|
HTML_FILE = File.join(ASSETS_DIR, 'tlm.html')
|
23
|
+
NICH_FILE = File.join(ASSETS_DIR, '2ch.html')
|
24
|
+
SHIFT_JIS_XML = File.join(ASSETS_DIR, 'shift_jis.xml')
|
22
25
|
PO_XML_FILE = File.join(ASSETS_DIR, 'po.xml')
|
23
26
|
PO_SCHEMA_FILE = File.join(ASSETS_DIR, 'po.xsd')
|
24
27
|
ADDRESS_SCHEMA_FILE = File.join(ASSETS_DIR, 'address_book.rlx')
|
data/test/hpricot/test_alter.rb
CHANGED
@@ -6,9 +6,9 @@ class TestAlter < Nokogiri::TestCase
|
|
6
6
|
|
7
7
|
def setup
|
8
8
|
super
|
9
|
-
@basic =
|
9
|
+
@basic = Nokogiri::HTML.parse(TestFiles::BASIC)
|
10
10
|
end
|
11
|
-
|
11
|
+
|
12
12
|
def test_before
|
13
13
|
test0 = "<link rel='stylesheet' href='test0.css' />"
|
14
14
|
@basic.at("link").before(test0)
|
@@ -24,18 +24,18 @@ class TestAlter < Nokogiri::TestCase
|
|
24
24
|
def test_wrap
|
25
25
|
ohmy = (@basic/"p.ohmy").wrap("<div id='wrapper'></div>")
|
26
26
|
assert_equal 'wrapper', ohmy[0].parent['id']
|
27
|
-
assert_equal 'ohmy', Nokogiri
|
27
|
+
assert_equal 'ohmy', Nokogiri(@basic.to_html).at("#wrapper").children[0]['class']
|
28
28
|
end
|
29
|
-
|
29
|
+
|
30
30
|
def test_add_class
|
31
31
|
first_p = (@basic/"p:first").add_class("testing123")
|
32
32
|
assert first_p[0].get_attribute("class").split(" ").include?("testing123")
|
33
|
-
assert((Nokogiri
|
33
|
+
assert((Nokogiri(@basic.to_html)/"p:first")[0]["class"].split(" ").include?("testing123"))
|
34
34
|
####
|
35
35
|
# Modified. We do not support OB1 bug.
|
36
|
-
assert !(Nokogiri
|
36
|
+
assert !(Nokogiri(@basic.to_html)/"p:gt(1)")[0]["class"].split(" ").include?("testing123")
|
37
37
|
end
|
38
|
-
|
38
|
+
|
39
39
|
def test_change_attributes
|
40
40
|
all_ps = (@basic/"p").attr("title", "Some Title")
|
41
41
|
all_as = (@basic/"a").attr("href", "http://my_new_href.com")
|
@@ -45,7 +45,7 @@ class TestAlter < Nokogiri::TestCase
|
|
45
45
|
assert_changed(@basic, "a", all_as) {|a| a.attributes["href"].to_s == "http://my_new_href.com"}
|
46
46
|
assert_changed(@basic, "link", all_lb) {|a| a.attributes["href"].to_s == "link" }
|
47
47
|
end
|
48
|
-
|
48
|
+
|
49
49
|
def test_remove_attr
|
50
50
|
all_rl = (@basic/"link").remove_attr("href")
|
51
51
|
assert_changed(@basic, "link", all_rl) { |link| link['href'].nil? }
|
@@ -63,6 +63,6 @@ class TestAlter < Nokogiri::TestCase
|
|
63
63
|
|
64
64
|
def assert_changed original, selector, set, &block
|
65
65
|
assert set.all?(&block)
|
66
|
-
assert Nokogiri
|
66
|
+
assert Nokogiri(original.to_html).search(selector).all?(&block)
|
67
67
|
end
|
68
68
|
end
|
@@ -4,7 +4,7 @@ class TestBuilder < Nokogiri::TestCase
|
|
4
4
|
####
|
5
5
|
# Modified
|
6
6
|
def test_escaping_text
|
7
|
-
doc = Nokogiri
|
7
|
+
doc = Nokogiri() { b "<a\"b>" }
|
8
8
|
assert_match "<b><a\"b></b>", doc.to_html.chomp
|
9
9
|
assert_equal %{<a\"b>}, doc.at("text()").to_s
|
10
10
|
end
|
@@ -12,7 +12,7 @@ class TestBuilder < Nokogiri::TestCase
|
|
12
12
|
####
|
13
13
|
# Modified
|
14
14
|
def test_no_escaping_text
|
15
|
-
doc = Nokogiri
|
15
|
+
doc = Nokogiri() { div.test.me! { text "<a\"b>" } }
|
16
16
|
assert_match %{<div class="test" id="me"><a"b></div>},
|
17
17
|
doc.to_html.chomp
|
18
18
|
assert_equal %{<a\"b>}, doc.at("text()").to_s
|
data/test/hpricot/test_parser.rb
CHANGED
@@ -5,87 +5,60 @@ class TestParser < Nokogiri::TestCase
|
|
5
5
|
include Nokogiri
|
6
6
|
|
7
7
|
def test_set_attr
|
8
|
-
@basic =
|
8
|
+
@basic = Nokogiri.parse(TestFiles::BASIC)
|
9
9
|
@basic.search('//p').set('class', 'para')
|
10
10
|
assert_equal 4, @basic.search('//p').length
|
11
11
|
assert_equal 4, @basic.search('//p').find_all { |x| x['class'] == 'para' }.length
|
12
12
|
end
|
13
13
|
|
14
|
-
# Modified. Not supported
|
15
|
-
## Test creating a new element
|
16
|
-
#def test_new_element
|
17
|
-
# elem = Hpricot::Elem.new(Hpricot::STag.new('form'))
|
18
|
-
# assert_not_nil(elem)
|
19
|
-
# assert_not_nil(elem.attributes)
|
20
|
-
#end
|
21
|
-
|
22
|
-
def test_scan_text
|
23
|
-
assert_equal 'FOO', Hpricot.make("FOO").first.content
|
24
|
-
end
|
25
|
-
|
26
14
|
def test_filter_by_attr
|
27
|
-
@boingboing =
|
15
|
+
@boingboing = Nokogiri.parse(TestFiles::BOINGBOING)
|
28
16
|
|
29
17
|
# this link is escaped in the doc
|
30
18
|
link = 'http://www.youtube.com/watch?v=TvSNXyNw26g&search=chris%20ware'
|
31
19
|
assert_equal link, @boingboing.at("a[@href='#{link}']")['href']
|
32
20
|
end
|
33
|
-
|
21
|
+
|
34
22
|
def test_filter_contains
|
35
|
-
@basic =
|
23
|
+
@basic = Nokogiri.parse(TestFiles::BASIC)
|
36
24
|
assert_equal '<title>Sample XHTML</title>', @basic.search("title:contains('Sample')").to_s.chomp
|
37
25
|
end
|
38
26
|
|
39
27
|
def test_get_element_by_id
|
40
|
-
@basic =
|
41
|
-
assert_equal 'link1', @basic.
|
42
|
-
assert_equal 'link1', @basic.
|
28
|
+
@basic = Nokogiri.parse(TestFiles::BASIC)
|
29
|
+
assert_equal 'link1', @basic.at('#link1')['id']
|
30
|
+
assert_equal 'link1', @basic.at('#body1').at('#link1')['id']
|
43
31
|
end
|
44
32
|
|
45
33
|
def test_get_element_by_tag_name
|
46
|
-
@basic =
|
47
|
-
assert_equal 'link1', @basic.
|
48
|
-
assert_equal 'link1', @basic.
|
34
|
+
@basic = Nokogiri.parse(TestFiles::BASIC)
|
35
|
+
assert_equal 'link1', @basic.at('a')['id']
|
36
|
+
assert_equal 'link1', @basic.at('body').at('#link1')['id']
|
49
37
|
end
|
50
38
|
|
51
39
|
def test_output_basic
|
52
|
-
@basic =
|
53
|
-
@basic2 =
|
40
|
+
@basic = Nokogiri.parse(TestFiles::BASIC)
|
41
|
+
@basic2 = Nokogiri.parse(@basic.inner_html)
|
54
42
|
scan_basic @basic2
|
55
43
|
end
|
56
44
|
|
57
45
|
def test_scan_basic
|
58
|
-
@basic =
|
46
|
+
@basic = Nokogiri.parse(TestFiles::BASIC)
|
59
47
|
scan_basic @basic
|
60
48
|
end
|
61
49
|
|
62
50
|
def scan_basic doc
|
63
|
-
|
64
|
-
# Modified: asserting kind is not duck typey
|
65
|
-
#assert_kind_of Hpricot::XMLDecl, doc.children.first
|
66
|
-
assert_not_equal doc.children.first.to_s, doc.children[1].to_s
|
51
|
+
assert_not_equal doc.children.first.to_s, doc.children[1].to_s
|
67
52
|
assert_equal 'link1', doc.at('#link1')['id']
|
68
53
|
assert_equal 'link1', doc.at("p a")['id']
|
69
54
|
assert_equal 'link1', (doc/:p/:a).first['id']
|
70
|
-
assert_equal 'link1', doc.search('p').at('a')
|
71
|
-
|
72
|
-
assert_equal 'link2', (doc/'p').
|
73
|
-
assert_equal((doc/'p')[2], (doc/'p').
|
74
|
-
assert_equal((doc/'p')[3], (doc/'p').filter('b')[0])
|
75
|
-
assert_equal((doc/'p')[1], (doc/'p').filter('a[@id="link2"]')[0])
|
76
|
-
assert_equal((doc/'p')[3], (doc/'p').filter('.last')[0])
|
77
|
-
assert_equal 4, (doc/'p').filter('*').length
|
78
|
-
assert_equal 3, (doc/'p').filter('* *').length
|
79
|
-
eles = (doc/'p').filter('.ohmy')
|
80
|
-
assert_equal 1, eles.length
|
81
|
-
assert_equal 'ohmy', eles.first.get_attribute('class')
|
55
|
+
assert_equal 'link1', doc.search('p').at('a')['id']
|
56
|
+
|
57
|
+
assert_equal 'link2', (doc/'p').css('.ohmy').search('a').first['id']
|
58
|
+
assert_equal((doc/'p')[2], (doc/'p').css('[text()="The third paragraph"]')[0])
|
82
59
|
assert_equal 3, (doc/'p:not(.ohmy)').length
|
83
60
|
|
84
|
-
assert_equal 3, (doc/'p').not('.ohmy').length
|
85
|
-
assert_equal 3, (doc/'p').not(eles.first).length
|
86
|
-
assert_equal 2, (doc/'p').filter('[@class]').length
|
87
61
|
assert_equal 'last final', (doc/'p[@class~="final"]').first.get_attribute('class')
|
88
|
-
assert_equal 1, (doc/'p').filter('[@class~="final"]').length
|
89
62
|
assert_equal 2, (doc/'p > a').length
|
90
63
|
assert_equal 1, (doc/'p.ohmy > a').length
|
91
64
|
assert_equal 2, (doc/'p / a').length
|
@@ -97,44 +70,33 @@ class TestParser < Nokogiri::TestCase
|
|
97
70
|
end
|
98
71
|
|
99
72
|
def test_positional
|
100
|
-
h = Nokogiri
|
73
|
+
h = Nokogiri( "<div><br/><p>one</p><p>two</p></div>" )
|
101
74
|
assert_equal "<p>one</p>", h.search("div/p:eq(1)").to_s.chomp # MODIFIED: eq(0) -> eq(1), and removed initial '//'
|
102
75
|
assert_equal "<p>one</p>", h.search("div/p:first").to_s.chomp # MODIFIED: removed initial '//'
|
103
76
|
assert_equal "<p>one</p>", h.search("div/p:first()").to_s.chomp # MODIFIED: removed initial '//'
|
104
77
|
end
|
105
78
|
|
106
79
|
def test_pace
|
107
|
-
doc = Nokogiri
|
80
|
+
doc = Nokogiri(TestFiles::PACE_APPLICATION)
|
108
81
|
assert_equal 'get', doc.at('form[@name=frmSect11]')['method']
|
109
|
-
# assert_equal '2', doc.at('#hdnSpouse')['value']
|
110
82
|
end
|
111
83
|
|
112
84
|
def test_scan_boingboing
|
113
|
-
@boingboing =
|
85
|
+
@boingboing = Nokogiri.HTML(TestFiles::BOINGBOING)
|
114
86
|
assert_equal 60, (@boingboing/'p.posted').length
|
115
87
|
assert_equal 1, @boingboing.search("//a[@name='027906']").length
|
116
|
-
### MODIFIED: libxml wraps the contents of <script> in a CDATA tag, so we won't be able to parse comments.
|
117
|
-
# assert_equal 10, @boingboing.search("script comment()").length
|
118
88
|
assert_equal 3, @boingboing.search("a[text()*='Boing']").length
|
119
|
-
assert_equal 1, @boingboing.search(
|
89
|
+
assert_equal 1, @boingboing.search(
|
90
|
+
"//h3[normalize-space(text())='College kids reportedly taking more smart drugs']"
|
91
|
+
).length
|
120
92
|
assert_equal 0, @boingboing.search("h3[text()='College']").length
|
121
93
|
assert_equal 60, @boingboing.search("h3").length
|
122
|
-
assert_equal 59, @boingboing.search("h3[text()!='College kids reportedly taking more smart drugs']").length
|
123
|
-
assert_equal 17, @boingboing.search("h3[text()$='s']").length
|
124
|
-
### Modified. Hpricot is wrong
|
125
|
-
#assert_equal 129, @boingboing.search("p[text()]").length
|
126
|
-
## This test seems to change between libxml versions, so I'm commenting
|
127
|
-
## it out.
|
128
|
-
#if Nokogiri::LIBXML_VERSION == '2.6.16'
|
129
|
-
# assert_equal 111, @boingboing.search("p[text()]").length
|
130
|
-
#else
|
131
|
-
# assert_equal 110, @boingboing.search("p[text()]").length
|
132
|
-
#end
|
94
|
+
assert_equal 59, @boingboing.search("//h3[normalize-space(text())!='College kids reportedly taking more smart drugs']").length
|
133
95
|
assert_equal 211, @boingboing.search("p").length
|
134
96
|
end
|
135
97
|
|
136
98
|
def test_reparent
|
137
|
-
doc = Nokogiri
|
99
|
+
doc = Nokogiri(%{<div id="blurb_1"></div>})
|
138
100
|
div1 = doc.search('#blurb_1')
|
139
101
|
div1.before('<div id="blurb_0"></div>')
|
140
102
|
|
@@ -145,7 +107,7 @@ class TestParser < Nokogiri::TestCase
|
|
145
107
|
end
|
146
108
|
|
147
109
|
def test_siblings
|
148
|
-
@basic =
|
110
|
+
@basic = Nokogiri.parse(TestFiles::BASIC)
|
149
111
|
t = @basic.at(:title)
|
150
112
|
e = t.next_sibling
|
151
113
|
assert_equal 'test1.css', e['href']
|
@@ -153,33 +115,31 @@ class TestParser < Nokogiri::TestCase
|
|
153
115
|
end
|
154
116
|
|
155
117
|
def test_css_negation
|
156
|
-
@basic =
|
118
|
+
@basic = Nokogiri.parse(TestFiles::BASIC)
|
157
119
|
assert_equal 3, (@basic/'p:not(.final)').length
|
158
120
|
end
|
159
121
|
|
160
122
|
def test_remove_attribute
|
161
|
-
@basic =
|
123
|
+
@basic = Nokogiri.parse(TestFiles::BASIC)
|
162
124
|
(@basic/:p).each { |ele| ele.remove_attribute('class') }
|
163
125
|
assert_equal 0, (@basic/'p[@class]').length
|
164
126
|
end
|
165
127
|
|
166
|
-
##
|
167
|
-
# Modified: hpricot is giving incorrect counts. Libxml gets it right.
|
168
128
|
def test_abs_xpath
|
169
|
-
@boingboing =
|
129
|
+
@boingboing = Nokogiri.parse(TestFiles::BOINGBOING)
|
170
130
|
assert_equal 60, @boingboing.search("/html/body//p[@class='posted']").length
|
171
131
|
assert_equal 60, @boingboing.search("/*/body//p[@class='posted']").length
|
172
132
|
assert_equal 18, @boingboing.search("//script").length
|
173
133
|
divs = @boingboing.search("//script/../div")
|
174
|
-
assert_equal 2, divs.length
|
134
|
+
assert_equal 2, divs.length
|
175
135
|
imgs = @boingboing.search('//div/p/a/img')
|
176
|
-
assert_equal 12, imgs.length
|
136
|
+
assert_equal 12, imgs.length
|
177
137
|
assert_equal 16, @boingboing.search('//div').search('p/a/img').length
|
178
138
|
assert imgs.all? { |x| x.name == 'img' }
|
179
139
|
end
|
180
140
|
|
181
141
|
def test_predicates
|
182
|
-
@boingboing =
|
142
|
+
@boingboing = Nokogiri.parse(TestFiles::BOINGBOING)
|
183
143
|
assert_equal 2, @boingboing.search('//link[@rel="alternate"]').length
|
184
144
|
p_imgs = @boingboing.search('//div/p[/a/img]')
|
185
145
|
#assert_equal 15, p_imgs.length
|
@@ -190,32 +150,26 @@ class TestParser < Nokogiri::TestCase
|
|
190
150
|
assert_equal 1, @boingboing.search('//input[@checked]').length
|
191
151
|
end
|
192
152
|
|
193
|
-
def test_tag_case
|
194
|
-
@tenderlove =
|
195
|
-
assert_equal 2, @tenderlove.search('//a').length
|
196
|
-
assert_equal 3, @tenderlove.search('//area').length
|
197
|
-
assert_equal 2, @tenderlove.search('//meta').length
|
198
|
-
end
|
153
|
+
def test_tag_case
|
154
|
+
@tenderlove = Nokogiri.parse(TestFiles::TENDERLOVE)
|
155
|
+
assert_equal 2, @tenderlove.search('//a').length
|
156
|
+
assert_equal 3, @tenderlove.search('//area').length
|
157
|
+
assert_equal 2, @tenderlove.search('//meta').length
|
158
|
+
end
|
199
159
|
|
200
160
|
def test_alt_predicates
|
201
|
-
@boingboing =
|
202
|
-
assert_equal 2, @boingboing.search('table/tr:last').length
|
161
|
+
@boingboing = Nokogiri.parse(TestFiles::BOINGBOING)
|
162
|
+
assert_equal 2, @boingboing.search('table/tr:last').length
|
203
163
|
|
204
|
-
@basic =
|
205
|
-
##
|
206
|
-
# MODIFIED:
|
207
|
-
# hpricot has an off-by-one bug eith eq-and-friends.
|
164
|
+
@basic = Nokogiri.parse(TestFiles::BASIC)
|
208
165
|
assert_equal "<p>The third paragraph</p>",
|
209
|
-
@basic.search('p:eq(3)').to_html.chomp
|
210
|
-
##
|
211
|
-
# MODIFIED: to be blank-agnostic, because Nokogiri's to_html is slightly different compared to Hpricot.
|
212
|
-
assert_equal '<p class="last final"><b>THE FINAL PARAGRAPH</b></p>',
|
166
|
+
@basic.search('p:eq(3)').to_html.chomp
|
213
167
|
@basic.search('p:last').to_html.gsub(/\s+/,' ').gsub(/>\s*</, '><')
|
214
|
-
assert_equal 'last final', @basic.search('p:last-of-type').first.get_attribute('class')
|
168
|
+
assert_equal 'last final', @basic.search('p:last-of-type').first.get_attribute('class')
|
215
169
|
end
|
216
170
|
|
217
171
|
def test_insert_after # ticket #63
|
218
|
-
doc = Nokogiri
|
172
|
+
doc = Nokogiri('<html><body><div id="a-div"></div></body></html>')
|
219
173
|
(doc/'div').each do |element|
|
220
174
|
element.after('<p>Paragraph 1</p><p>Paragraph 2</p>')
|
221
175
|
end
|
@@ -224,7 +178,7 @@ class TestParser < Nokogiri::TestCase
|
|
224
178
|
end
|
225
179
|
|
226
180
|
def test_insert_before # ticket #61
|
227
|
-
doc = Nokogiri.
|
181
|
+
doc = Nokogiri.HTML('<html><body><div id="a-div"></div></body></html>')
|
228
182
|
(doc/'div').each do |element|
|
229
183
|
element.before('<p>Paragraph 1</p><p>Paragraph 2</p>')
|
230
184
|
end
|
@@ -233,34 +187,22 @@ class TestParser < Nokogiri::TestCase
|
|
233
187
|
end
|
234
188
|
|
235
189
|
def test_many_paths
|
236
|
-
@boingboing =
|
190
|
+
@boingboing = Nokogiri.parse(TestFiles::BOINGBOING)
|
237
191
|
assert_equal 62, @boingboing.search('p.posted, link[@rel="alternate"]').length
|
238
|
-
###
|
239
|
-
# Modified. I don't want to support this syntax. Just use a comma.
|
240
|
-
#assert_equal 20, @boingboing.search('//div/p[a/img]|//link[@rel="alternate"]').length
|
241
192
|
end
|
242
193
|
|
243
|
-
####
|
244
|
-
# Modified. Epic Fail. We're on the duck type train folks.
|
245
|
-
#def test_stacked_search
|
246
|
-
# @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
|
247
|
-
# assert_kind_of Hpricot::Elements, @boingboing.search('//div/p').search('a img')
|
248
|
-
#end
|
249
|
-
|
250
194
|
def test_class_search
|
251
|
-
|
252
|
-
# Modified. libxml corrects this differently than hpricot
|
253
|
-
doc = Nokogiri::Hpricot.HTML("<div class=xyz '>abc</div>")
|
195
|
+
doc = Nokogiri.HTML("<div class=xyz '>abc</div>")
|
254
196
|
assert_equal 1, doc.search(".xyz").length
|
255
197
|
|
256
|
-
doc = Nokogiri
|
198
|
+
doc = Nokogiri.HTML("<div class=xyz>abc</div><div class=abc>xyz</div>")
|
257
199
|
assert_equal 1, doc.search(".xyz").length
|
258
200
|
assert_equal 4, doc.search("*").length
|
259
201
|
end
|
260
202
|
|
261
203
|
def test_kleene_star
|
262
204
|
# bug noticed by raja bhatia
|
263
|
-
doc = Nokogiri
|
205
|
+
doc = Nokogiri.HTML("<span class='small'>1</span><div class='large'>2</div><div class='small'>3</div><span class='blue large'>4</span>")
|
264
206
|
assert_equal 2, doc.search("*[@class*='small']").length
|
265
207
|
assert_equal 2, doc.search("*.small").length
|
266
208
|
assert_equal 2, doc.search(".small").length
|
@@ -268,17 +210,17 @@ class TestParser < Nokogiri::TestCase
|
|
268
210
|
end
|
269
211
|
|
270
212
|
def test_empty_comment
|
271
|
-
doc = Nokogiri
|
213
|
+
doc = Nokogiri.HTML("<p><!----></p>")
|
272
214
|
doc = doc.search('//body').first
|
273
215
|
assert doc.children[0].children[0].comment?
|
274
216
|
|
275
|
-
doc = Nokogiri
|
217
|
+
doc = Nokogiri.HTML("<p><!-- --></p>")
|
276
218
|
doc = doc.search('//body').first
|
277
219
|
assert doc.children[0].children[0].comment?
|
278
220
|
end
|
279
221
|
|
280
222
|
def test_body_newlines
|
281
|
-
@immob =
|
223
|
+
@immob = Nokogiri.parse(TestFiles::IMMOB)
|
282
224
|
body = @immob.at(:body)
|
283
225
|
{'background' => '', 'bgcolor' => '#ffffff', 'text' => '#000000', 'marginheight' => '10',
|
284
226
|
'marginwidth' => '10', 'leftmargin' => '10', 'topmargin' => '10', 'link' => '#000066',
|
@@ -288,32 +230,32 @@ class TestParser < Nokogiri::TestCase
|
|
288
230
|
end
|
289
231
|
|
290
232
|
def test_nested_twins
|
291
|
-
@doc = Nokogiri
|
233
|
+
@doc = Nokogiri("<div>Hi<div>there</div></div>")
|
292
234
|
assert_equal 1, (@doc/"div div").length
|
293
235
|
end
|
294
236
|
|
295
237
|
def test_wildcard
|
296
|
-
@basic =
|
238
|
+
@basic = Nokogiri::HTML.parse(TestFiles::BASIC)
|
297
239
|
assert_equal 3, (@basic/"*[@id]").length
|
298
240
|
assert_equal 3, (@basic/"//*[@id]").length
|
299
241
|
end
|
300
242
|
|
301
243
|
def test_javascripts
|
302
|
-
@immob =
|
244
|
+
@immob = Nokogiri::HTML.parse(TestFiles::IMMOB)
|
303
245
|
assert_equal 3, (@immob/:script)[0].inner_html.scan(/<LINK/).length
|
304
246
|
end
|
305
247
|
|
306
248
|
####
|
307
249
|
# Modified. This test passes with later versions of libxml
|
308
250
|
def test_nested_scripts
|
309
|
-
@week9 =
|
251
|
+
@week9 = Nokogiri.parse(TestFiles::WEEK9)
|
310
252
|
unless Nokogiri::LIBXML_VERSION == '2.6.16'
|
311
253
|
assert_equal 14, (@week9/"a").find_all { |x| x.inner_html.include? "GameCenter" }.length
|
312
254
|
end
|
313
255
|
end
|
314
256
|
|
315
257
|
def test_uswebgen
|
316
|
-
@uswebgen =
|
258
|
+
@uswebgen = HTML.parse(TestFiles::USWEBGEN)
|
317
259
|
# sent by brent beardsley, nokogiri 0.3 had problems with all the links.
|
318
260
|
assert_equal 67, (@uswebgen/:a).length
|
319
261
|
end
|
@@ -324,7 +266,7 @@ class TestParser < Nokogiri::TestCase
|
|
324
266
|
%{<html><form name='loginForm'?URL= ?URL= method='post' action='/units/a/login/1,13088,779-1,00.html'?URL=></form></html>},
|
325
267
|
%{<html><form name='loginForm' method='post' action='/units/a/login/1,13088,779-1,00.html' ?URL=></form></html>}].
|
326
268
|
each do |str|
|
327
|
-
doc = Nokogiri
|
269
|
+
doc = Nokogiri(str)
|
328
270
|
assert_equal 1, (doc/:form).length
|
329
271
|
assert_equal '/units/a/login/1,13088,779-1,00.html', doc.at("form")['action']
|
330
272
|
end
|
@@ -333,8 +275,8 @@ class TestParser < Nokogiri::TestCase
|
|
333
275
|
####
|
334
276
|
# Modified. Added question. Don't care.
|
335
277
|
def test_procins
|
336
|
-
doc = Nokogiri
|
337
|
-
assert_equal "php", doc.children[1].
|
278
|
+
doc = Nokogiri.HTML("<?php print('hello') ?>\n<?xml blah='blah'?>")
|
279
|
+
assert_equal "php", doc.children[1].name
|
338
280
|
assert_equal "blah='blah'?", doc.children[2].content #"# quote added so emacs ruby-mode parser doesn't barf
|
339
281
|
end
|
340
282
|
|
@@ -342,7 +284,7 @@ class TestParser < Nokogiri::TestCase
|
|
342
284
|
# Altered... libxml does not get a buffer error
|
343
285
|
def test_buffer_error
|
344
286
|
assert_nothing_raised {
|
345
|
-
Nokogiri
|
287
|
+
Nokogiri(%{<p>\n\n<input type="hidden" name="__VIEWSTATE" value="#{(("X" * 2000) + "\n") * 22}" />\n\n</p>})
|
346
288
|
}
|
347
289
|
end
|
348
290
|
|
@@ -367,40 +309,23 @@ class TestParser < Nokogiri::TestCase
|
|
367
309
|
</object>
|
368
310
|
</body></html?
|
369
311
|
edoc
|
370
|
-
doc = Nokogiri
|
312
|
+
doc = Nokogiri(str)
|
371
313
|
assert_equal "http://www.youtube.com/v/NbDQ4M_cuwA",
|
372
314
|
doc.at("//object/param[@value='http://www.youtube.com/v/NbDQ4M_cuwA']")['value']
|
373
315
|
end
|
374
|
-
|
316
|
+
|
375
317
|
# ticket #84 by jamezilla
|
376
318
|
def test_screwed_xmlns
|
377
|
-
doc = Nokogiri
|
319
|
+
doc = Nokogiri(<<-edoc)
|
378
320
|
<?xml:namespace prefix = cwi />
|
379
321
|
<html><body>HAI</body></html>
|
380
322
|
edoc
|
381
323
|
assert_equal "HAI", doc.at("body").inner_text
|
382
324
|
end
|
383
325
|
|
384
|
-
# Reported by Jonathan Nichols on the Hpricot list (24 May 2007)
|
385
|
-
# MODIFIED: This is an issue with libxml which we cannot deal with....
|
386
|
-
#def test_self_closed_form
|
387
|
-
# doc = Nokogiri.Hpricot(<<-edoc)
|
388
|
-
# <body>
|
389
|
-
# <form action="/loginRegForm" name="regForm" method="POST" />
|
390
|
-
# <input type="button">
|
391
|
-
# </form>
|
392
|
-
# </body>
|
393
|
-
# edoc
|
394
|
-
# assert_equal "button", doc.at("//form/input")['type']
|
395
|
-
#end
|
396
|
-
|
397
326
|
def test_filters
|
398
|
-
@basic =
|
399
|
-
|
400
|
-
# MODIFIED:
|
401
|
-
# Hpricot considers nodes with text-only (but no child tags) to be empty.
|
402
|
-
# Nokogiri considers that any content makes a parent.
|
403
|
-
assert_equal 1, (@basic/"title:parent").size # so this was 0 under Hpricot
|
327
|
+
@basic = Nokogiri.parse(TestFiles::BASIC)
|
328
|
+
assert_equal 1, (@basic/"title:parent").size
|
404
329
|
assert_equal 4, (@basic/"p:parent").size
|
405
330
|
assert_equal 0, (@basic/"title:empty").size
|
406
331
|
assert_equal 3, (@basic/"link:empty").size
|
@@ -410,7 +335,7 @@ class TestParser < Nokogiri::TestCase
|
|
410
335
|
str = %{<script> /*<![CDATA[*/
|
411
336
|
/*]]>*/ </script>}
|
412
337
|
# MODIFIED: if you want the cdata, to_xml it
|
413
|
-
assert_match str, Nokogiri
|
338
|
+
assert_match str, Nokogiri(str).to_xml
|
414
339
|
end
|
415
340
|
|
416
341
|
def test_namespace
|
@@ -419,8 +344,7 @@ class TestParser < Nokogiri::TestCase
|
|
419
344
|
<t:sam>hi </t:sam>
|
420
345
|
</a>
|
421
346
|
END
|
422
|
-
doc =
|
423
|
-
assert((doc/"//t:sam").size > 0)
|
424
|
-
# assert (doc/"//sam").size > 0 # this would be nice
|
347
|
+
doc = Nokogiri::XML(chunk)
|
348
|
+
assert((doc/"//t:sam").size > 0)
|
425
349
|
end
|
426
350
|
end
|