nokogiri 1.8.5 → 1.9.1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/README.md +0 -1
- data/ext/nokogiri/extconf.rb +10 -6
- data/ext/nokogiri/xml_attr.c +26 -21
- data/ext/nokogiri/xml_document.c +4 -1
- data/ext/nokogiri/xml_namespace.c +3 -3
- data/ext/nokogiri/xml_namespace.h +1 -2
- data/ext/nokogiri/xml_node.c +55 -15
- data/lib/nokogiri/css/parser.rb +61 -61
- data/lib/nokogiri/version.rb +1 -1
- data/lib/nokogiri/xml/document_fragment.rb +11 -0
- data/lib/nokogiri/xml/node.rb +12 -0
- data/lib/nokogiri/xml/node_set.rb +4 -8
- metadata +17 -153
- data/.autotest +0 -22
- data/.cross_rubies +0 -8
- data/.editorconfig +0 -17
- data/.gemtest +0 -0
- data/.travis.yml +0 -63
- data/CHANGELOG.md +0 -1368
- data/CONTRIBUTING.md +0 -42
- data/C_CODING_STYLE.rdoc +0 -33
- data/Gemfile +0 -23
- data/Gemfile-libxml-ruby +0 -3
- data/Manifest.txt +0 -370
- data/ROADMAP.md +0 -111
- data/Rakefile +0 -348
- data/SECURITY.md +0 -19
- data/STANDARD_RESPONSES.md +0 -47
- data/Y_U_NO_GEMSPEC.md +0 -155
- data/appveyor.yml +0 -29
- data/build_all +0 -44
- data/patches/sort-patches-by-date +0 -25
- data/suppressions/README.txt +0 -1
- data/suppressions/nokogiri_ruby-2.supp +0 -10
- data/tasks/test.rb +0 -100
- data/test/css/test_nthiness.rb +0 -226
- data/test/css/test_parser.rb +0 -386
- data/test/css/test_tokenizer.rb +0 -215
- data/test/css/test_xpath_visitor.rb +0 -96
- data/test/decorators/test_slop.rb +0 -23
- data/test/files/2ch.html +0 -108
- data/test/files/GH_1042.html +0 -18
- data/test/files/address_book.rlx +0 -12
- data/test/files/address_book.xml +0 -10
- data/test/files/atom.xml +0 -344
- data/test/files/bar/bar.xsd +0 -4
- data/test/files/bogus.xml +0 -0
- data/test/files/dont_hurt_em_why.xml +0 -422
- data/test/files/encoding.html +0 -82
- data/test/files/encoding.xhtml +0 -84
- data/test/files/exslt.xml +0 -8
- data/test/files/exslt.xslt +0 -35
- data/test/files/foo/foo.xsd +0 -4
- data/test/files/metacharset.html +0 -10
- data/test/files/namespace_pressure_test.xml +0 -1684
- data/test/files/noencoding.html +0 -47
- data/test/files/po.xml +0 -32
- data/test/files/po.xsd +0 -66
- data/test/files/saml/saml20assertion_schema.xsd +0 -283
- data/test/files/saml/saml20protocol_schema.xsd +0 -302
- data/test/files/saml/xenc_schema.xsd +0 -146
- data/test/files/saml/xmldsig_schema.xsd +0 -318
- data/test/files/shift_jis.html +0 -10
- data/test/files/shift_jis.xml +0 -5
- data/test/files/shift_jis_no_charset.html +0 -9
- data/test/files/slow-xpath.xml +0 -25509
- data/test/files/snuggles.xml +0 -3
- data/test/files/staff.dtd +0 -10
- data/test/files/staff.xml +0 -59
- data/test/files/staff.xslt +0 -32
- data/test/files/test_document_url/bar.xml +0 -2
- data/test/files/test_document_url/document.dtd +0 -4
- data/test/files/test_document_url/document.xml +0 -6
- data/test/files/tlm.html +0 -851
- data/test/files/to_be_xincluded.xml +0 -2
- data/test/files/valid_bar.xml +0 -2
- data/test/files/xinclude.xml +0 -4
- data/test/helper.rb +0 -271
- data/test/html/sax/test_parser.rb +0 -168
- data/test/html/sax/test_parser_context.rb +0 -46
- data/test/html/sax/test_parser_text.rb +0 -163
- data/test/html/sax/test_push_parser.rb +0 -87
- data/test/html/test_attributes.rb +0 -85
- data/test/html/test_builder.rb +0 -164
- data/test/html/test_document.rb +0 -712
- data/test/html/test_document_encoding.rb +0 -143
- data/test/html/test_document_fragment.rb +0 -310
- data/test/html/test_element_description.rb +0 -105
- data/test/html/test_named_characters.rb +0 -14
- data/test/html/test_node.rb +0 -212
- data/test/html/test_node_encoding.rb +0 -91
- data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
- data/test/namespaces/test_namespaces_aliased_default.rb +0 -24
- data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
- data/test/namespaces/test_namespaces_in_cloned_doc.rb +0 -31
- data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
- data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -80
- data/test/namespaces/test_namespaces_preservation.rb +0 -31
- data/test/test_convert_xpath.rb +0 -135
- data/test/test_css_cache.rb +0 -47
- data/test/test_encoding_handler.rb +0 -48
- data/test/test_memory_leak.rb +0 -156
- data/test/test_nokogiri.rb +0 -138
- data/test/test_soap4r_sax.rb +0 -52
- data/test/test_xslt_transforms.rb +0 -314
- data/test/xml/node/test_save_options.rb +0 -28
- data/test/xml/node/test_subclass.rb +0 -44
- data/test/xml/sax/test_parser.rb +0 -402
- data/test/xml/sax/test_parser_context.rb +0 -115
- data/test/xml/sax/test_parser_text.rb +0 -202
- data/test/xml/sax/test_push_parser.rb +0 -265
- data/test/xml/test_attr.rb +0 -74
- data/test/xml/test_attribute_decl.rb +0 -86
- data/test/xml/test_builder.rb +0 -341
- data/test/xml/test_c14n.rb +0 -180
- data/test/xml/test_cdata.rb +0 -54
- data/test/xml/test_comment.rb +0 -40
- data/test/xml/test_document.rb +0 -982
- data/test/xml/test_document_encoding.rb +0 -31
- data/test/xml/test_document_fragment.rb +0 -298
- data/test/xml/test_dtd.rb +0 -187
- data/test/xml/test_dtd_encoding.rb +0 -31
- data/test/xml/test_element_content.rb +0 -56
- data/test/xml/test_element_decl.rb +0 -73
- data/test/xml/test_entity_decl.rb +0 -122
- data/test/xml/test_entity_reference.rb +0 -262
- data/test/xml/test_namespace.rb +0 -96
- data/test/xml/test_node.rb +0 -1325
- data/test/xml/test_node_attributes.rb +0 -115
- data/test/xml/test_node_encoding.rb +0 -75
- data/test/xml/test_node_inheritance.rb +0 -32
- data/test/xml/test_node_reparenting.rb +0 -592
- data/test/xml/test_node_set.rb +0 -809
- data/test/xml/test_parse_options.rb +0 -64
- data/test/xml/test_processing_instruction.rb +0 -30
- data/test/xml/test_reader.rb +0 -620
- data/test/xml/test_reader_encoding.rb +0 -134
- data/test/xml/test_relax_ng.rb +0 -60
- data/test/xml/test_schema.rb +0 -142
- data/test/xml/test_syntax_error.rb +0 -36
- data/test/xml/test_text.rb +0 -60
- data/test/xml/test_unparented_node.rb +0 -483
- data/test/xml/test_xinclude.rb +0 -83
- data/test/xml/test_xpath.rb +0 -470
- data/test/xslt/test_custom_functions.rb +0 -133
- data/test/xslt/test_exception_handling.rb +0 -37
data/test/html/test_document.rb
DELETED
@@ -1,712 +0,0 @@
|
|
1
|
-
require "helper"
|
2
|
-
|
3
|
-
module Nokogiri
|
4
|
-
module HTML
|
5
|
-
class TestDocument < Nokogiri::TestCase
|
6
|
-
def setup
|
7
|
-
super
|
8
|
-
@html = Nokogiri::HTML.parse(File.read(HTML_FILE))
|
9
|
-
end
|
10
|
-
|
11
|
-
def test_nil_css
|
12
|
-
# Behavior is undefined but shouldn't break
|
13
|
-
assert @html.css(nil)
|
14
|
-
assert @html.xpath(nil)
|
15
|
-
end
|
16
|
-
|
17
|
-
def test_does_not_fail_with_illformatted_html
|
18
|
-
doc = Nokogiri::HTML('"</html>";'.dup.force_encoding(Encoding::BINARY))
|
19
|
-
assert_not_nil doc
|
20
|
-
end
|
21
|
-
|
22
|
-
def test_exceptions_remove_newlines
|
23
|
-
errors = @html.errors
|
24
|
-
assert errors.length > 0, 'has errors'
|
25
|
-
errors.each do |error|
|
26
|
-
assert_equal(error.to_s.chomp, error.to_s)
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
def test_fragment
|
31
|
-
fragment = @html.fragment
|
32
|
-
assert_equal 0, fragment.children.length
|
33
|
-
end
|
34
|
-
|
35
|
-
def test_document_takes_config_block
|
36
|
-
options = nil
|
37
|
-
Nokogiri::HTML(File.read(HTML_FILE), HTML_FILE) do |cfg|
|
38
|
-
options = cfg
|
39
|
-
options.nonet.nowarning.dtdattr
|
40
|
-
end
|
41
|
-
assert options.nonet?
|
42
|
-
assert options.nowarning?
|
43
|
-
assert options.dtdattr?
|
44
|
-
end
|
45
|
-
|
46
|
-
def test_parse_takes_config_block
|
47
|
-
options = nil
|
48
|
-
Nokogiri::HTML.parse(File.read(HTML_FILE), HTML_FILE) do |cfg|
|
49
|
-
options = cfg
|
50
|
-
options.nonet.nowarning.dtdattr
|
51
|
-
end
|
52
|
-
assert options.nonet?
|
53
|
-
assert options.nowarning?
|
54
|
-
assert options.dtdattr?
|
55
|
-
end
|
56
|
-
|
57
|
-
def test_subclass
|
58
|
-
klass = Class.new(Nokogiri::HTML::Document)
|
59
|
-
doc = klass.new
|
60
|
-
assert_instance_of klass, doc
|
61
|
-
end
|
62
|
-
|
63
|
-
def test_subclass_initialize
|
64
|
-
klass = Class.new(Nokogiri::HTML::Document) do
|
65
|
-
attr_accessor :initialized_with
|
66
|
-
|
67
|
-
def initialize(*args)
|
68
|
-
@initialized_with = args
|
69
|
-
end
|
70
|
-
end
|
71
|
-
doc = klass.new("uri", "external_id", 1)
|
72
|
-
assert_equal ["uri", "external_id", 1], doc.initialized_with
|
73
|
-
end
|
74
|
-
|
75
|
-
def test_subclass_dup
|
76
|
-
klass = Class.new(Nokogiri::HTML::Document)
|
77
|
-
doc = klass.new.dup
|
78
|
-
assert_instance_of klass, doc
|
79
|
-
end
|
80
|
-
|
81
|
-
def test_subclass_parse
|
82
|
-
klass = Class.new(Nokogiri::HTML::Document)
|
83
|
-
doc = klass.parse(File.read(HTML_FILE))
|
84
|
-
assert_equal @html.to_s, doc.to_s
|
85
|
-
assert_instance_of klass, doc
|
86
|
-
end
|
87
|
-
|
88
|
-
def test_document_parse_method
|
89
|
-
html = Nokogiri::HTML::Document.parse(File.read(HTML_FILE))
|
90
|
-
assert_equal @html.to_s, html.to_s
|
91
|
-
end
|
92
|
-
|
93
|
-
def test_document_parse_method_with_url
|
94
|
-
require 'open-uri'
|
95
|
-
begin
|
96
|
-
html = open('https://www.yahoo.com').read
|
97
|
-
rescue Exception => e
|
98
|
-
skip("This test needs the internet. Skips if no internet available. (#{e})")
|
99
|
-
end
|
100
|
-
doc = Nokogiri::HTML html ,"http:/foobar.foobar/", 'UTF-8'
|
101
|
-
refute_empty doc.to_s, "Document should not be empty"
|
102
|
-
end
|
103
|
-
|
104
|
-
###
|
105
|
-
# Nokogiri::HTML returns an empty Document when given a blank string GH#11
|
106
|
-
def test_empty_string_returns_empty_doc
|
107
|
-
doc = Nokogiri::HTML('')
|
108
|
-
assert_instance_of Nokogiri::HTML::Document, doc
|
109
|
-
assert_nil doc.root
|
110
|
-
end
|
111
|
-
|
112
|
-
unless Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
|
113
|
-
# FIXME: this is a hack around broken libxml versions
|
114
|
-
def test_to_xhtml_with_indent
|
115
|
-
doc = Nokogiri::HTML('<html><body><a>foo</a></body></html>')
|
116
|
-
doc = Nokogiri::HTML(doc.to_xhtml(:indent => 2))
|
117
|
-
assert_indent 2, doc
|
118
|
-
end
|
119
|
-
|
120
|
-
def test_write_to_xhtml_with_indent
|
121
|
-
io = StringIO.new
|
122
|
-
doc = Nokogiri::HTML('<html><body><a>foo</a></body></html>')
|
123
|
-
doc.write_xhtml_to io, :indent => 5
|
124
|
-
io.rewind
|
125
|
-
doc = Nokogiri::HTML(io.read)
|
126
|
-
assert_indent 5, doc
|
127
|
-
end
|
128
|
-
end
|
129
|
-
|
130
|
-
def test_swap_should_not_exist
|
131
|
-
assert_raises(NoMethodError) {
|
132
|
-
@html.swap
|
133
|
-
}
|
134
|
-
end
|
135
|
-
|
136
|
-
def test_namespace_should_not_exist
|
137
|
-
assert_raises(NoMethodError) {
|
138
|
-
@html.namespace
|
139
|
-
}
|
140
|
-
end
|
141
|
-
|
142
|
-
def test_meta_encoding
|
143
|
-
assert_equal 'UTF-8', @html.meta_encoding
|
144
|
-
end
|
145
|
-
|
146
|
-
def test_meta_encoding_is_strict_about_http_equiv
|
147
|
-
doc = Nokogiri::HTML(<<-eohtml)
|
148
|
-
<html>
|
149
|
-
<head>
|
150
|
-
<meta http-equiv="X-Content-Type" content="text/html; charset=Shift_JIS">
|
151
|
-
</head>
|
152
|
-
<body>
|
153
|
-
foo
|
154
|
-
</body>
|
155
|
-
</html>
|
156
|
-
eohtml
|
157
|
-
assert_nil doc.meta_encoding
|
158
|
-
end
|
159
|
-
|
160
|
-
def test_meta_encoding_handles_malformed_content_charset
|
161
|
-
doc = Nokogiri::HTML(<<EOHTML)
|
162
|
-
<html>
|
163
|
-
<head>
|
164
|
-
<meta http-equiv="Content-type" content="text/html; utf-8" />
|
165
|
-
</head>
|
166
|
-
<body>
|
167
|
-
foo
|
168
|
-
</body>
|
169
|
-
</html>
|
170
|
-
EOHTML
|
171
|
-
assert_nil doc.meta_encoding
|
172
|
-
end
|
173
|
-
|
174
|
-
def test_meta_encoding_checks_charset
|
175
|
-
doc = Nokogiri::HTML(<<-eohtml)
|
176
|
-
<html>
|
177
|
-
<head>
|
178
|
-
<meta charset="UTF-8">
|
179
|
-
</head>
|
180
|
-
<body>
|
181
|
-
foo
|
182
|
-
</body>
|
183
|
-
</html>
|
184
|
-
eohtml
|
185
|
-
assert_equal 'UTF-8', doc.meta_encoding
|
186
|
-
end
|
187
|
-
|
188
|
-
def test_meta_encoding=
|
189
|
-
@html.meta_encoding = 'EUC-JP'
|
190
|
-
assert_equal 'EUC-JP', @html.meta_encoding
|
191
|
-
end
|
192
|
-
|
193
|
-
def test_title
|
194
|
-
assert_equal 'Tender Lovemaking ', @html.title
|
195
|
-
doc = Nokogiri::HTML('<html><body>foo</body></html>')
|
196
|
-
assert_nil doc.title
|
197
|
-
end
|
198
|
-
|
199
|
-
def test_title=()
|
200
|
-
doc = Nokogiri::HTML(<<eohtml)
|
201
|
-
<html>
|
202
|
-
<head>
|
203
|
-
<title>old</title>
|
204
|
-
</head>
|
205
|
-
<body>
|
206
|
-
foo
|
207
|
-
</body>
|
208
|
-
</html>
|
209
|
-
eohtml
|
210
|
-
doc.title = 'new'
|
211
|
-
assert_equal 1, doc.css('title').size
|
212
|
-
assert_equal 'new', doc.title
|
213
|
-
|
214
|
-
doc = Nokogiri::HTML(<<eohtml)
|
215
|
-
<html>
|
216
|
-
<head>
|
217
|
-
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
218
|
-
</head>
|
219
|
-
<body>
|
220
|
-
foo
|
221
|
-
</body>
|
222
|
-
</html>
|
223
|
-
eohtml
|
224
|
-
doc.title = 'new'
|
225
|
-
assert_equal 'new', doc.title
|
226
|
-
title = doc.at('/html/head/title')
|
227
|
-
assert_not_nil title
|
228
|
-
assert_equal 'new', title.text
|
229
|
-
assert_equal(-1, doc.at('meta[@http-equiv]') <=> title)
|
230
|
-
|
231
|
-
doc = Nokogiri::HTML(<<eohtml)
|
232
|
-
<html>
|
233
|
-
<body>
|
234
|
-
foo
|
235
|
-
</body>
|
236
|
-
</html>
|
237
|
-
eohtml
|
238
|
-
doc.title = 'new'
|
239
|
-
assert_equal 'new', doc.title
|
240
|
-
# <head> may or may not be added
|
241
|
-
title = doc.at('/html//title')
|
242
|
-
assert_not_nil title
|
243
|
-
assert_equal 'new', title.text
|
244
|
-
assert_equal(-1, title <=> doc.at('body'))
|
245
|
-
|
246
|
-
doc = Nokogiri::HTML(<<eohtml)
|
247
|
-
<html>
|
248
|
-
<meta charset="UTF-8">
|
249
|
-
<body>
|
250
|
-
foo
|
251
|
-
</body>
|
252
|
-
</html>
|
253
|
-
eohtml
|
254
|
-
doc.title = 'new'
|
255
|
-
assert_equal 'new', doc.title
|
256
|
-
assert_equal(-1, doc.at('meta[@charset]') <=> doc.at('title'))
|
257
|
-
assert_equal(-1, doc.at('title') <=> doc.at('body'))
|
258
|
-
|
259
|
-
doc = Nokogiri::HTML('<!DOCTYPE html><p>hello')
|
260
|
-
doc.title = 'new'
|
261
|
-
assert_equal 'new', doc.title
|
262
|
-
assert_instance_of Nokogiri::XML::DTD, doc.children.first
|
263
|
-
assert_equal(-1, doc.at('title') <=> doc.at('p'))
|
264
|
-
|
265
|
-
doc = Nokogiri::HTML('')
|
266
|
-
doc.title = 'new'
|
267
|
-
assert_equal 'new', doc.title
|
268
|
-
assert_equal 'new', doc.at('/html/head/title/text()').to_s
|
269
|
-
end
|
270
|
-
|
271
|
-
def test_meta_encoding_without_head
|
272
|
-
encoding = 'EUC-JP'
|
273
|
-
html = Nokogiri::HTML('<html><body>foo</body></html>', nil, encoding)
|
274
|
-
|
275
|
-
assert_nil html.meta_encoding
|
276
|
-
|
277
|
-
html.meta_encoding = encoding
|
278
|
-
assert_equal encoding, html.meta_encoding
|
279
|
-
|
280
|
-
meta = html.at('/html/head/meta[@http-equiv and boolean(@content)]')
|
281
|
-
assert meta, 'meta is in head'
|
282
|
-
|
283
|
-
assert meta.at('./parent::head/following-sibling::body'), 'meta is before body'
|
284
|
-
end
|
285
|
-
|
286
|
-
def test_html5_meta_encoding_without_head
|
287
|
-
encoding = 'EUC-JP'
|
288
|
-
html = Nokogiri::HTML('<!DOCTYPE html><html><body>foo</body></html>', nil, encoding)
|
289
|
-
|
290
|
-
assert_nil html.meta_encoding
|
291
|
-
|
292
|
-
html.meta_encoding = encoding
|
293
|
-
assert_equal encoding, html.meta_encoding
|
294
|
-
|
295
|
-
meta = html.at('/html/head/meta[@charset]')
|
296
|
-
assert meta, 'meta is in head'
|
297
|
-
|
298
|
-
assert meta.at('./parent::head/following-sibling::body'), 'meta is before body'
|
299
|
-
end
|
300
|
-
|
301
|
-
def test_meta_encoding_with_empty_content_type
|
302
|
-
html = Nokogiri::HTML(<<-eohtml)
|
303
|
-
<html>
|
304
|
-
<head>
|
305
|
-
<meta http-equiv="Content-Type" content="">
|
306
|
-
</head>
|
307
|
-
<body>
|
308
|
-
foo
|
309
|
-
</body>
|
310
|
-
</html>
|
311
|
-
eohtml
|
312
|
-
assert_nil html.meta_encoding
|
313
|
-
|
314
|
-
html = Nokogiri::HTML(<<-eohtml)
|
315
|
-
<html>
|
316
|
-
<head>
|
317
|
-
<meta http-equiv="Content-Type">
|
318
|
-
</head>
|
319
|
-
<body>
|
320
|
-
foo
|
321
|
-
</body>
|
322
|
-
</html>
|
323
|
-
eohtml
|
324
|
-
assert_nil html.meta_encoding
|
325
|
-
end
|
326
|
-
|
327
|
-
def test_root_node_parent_is_document
|
328
|
-
parent = @html.root.parent
|
329
|
-
assert_equal @html, parent
|
330
|
-
assert_instance_of Nokogiri::HTML::Document, parent
|
331
|
-
end
|
332
|
-
|
333
|
-
def test_parse_handles_nil_gracefully
|
334
|
-
@doc = Nokogiri::HTML::Document.parse(nil)
|
335
|
-
assert_instance_of Nokogiri::HTML::Document, @doc
|
336
|
-
end
|
337
|
-
|
338
|
-
def test_parse_empty_document
|
339
|
-
doc = Nokogiri::HTML("\n")
|
340
|
-
assert_equal 0, doc.css('a').length
|
341
|
-
assert_equal 0, doc.xpath('//a').length
|
342
|
-
assert_equal 0, doc.search('//a').length
|
343
|
-
end
|
344
|
-
|
345
|
-
def test_HTML_function
|
346
|
-
html = Nokogiri::HTML(File.read(HTML_FILE))
|
347
|
-
assert html.html?
|
348
|
-
end
|
349
|
-
|
350
|
-
def test_parse_io
|
351
|
-
assert File.open(HTML_FILE, 'rb') { |f|
|
352
|
-
Document.read_io(f, nil, 'UTF-8',
|
353
|
-
XML::ParseOptions::NOERROR | XML::ParseOptions::NOWARNING
|
354
|
-
)
|
355
|
-
}
|
356
|
-
end
|
357
|
-
|
358
|
-
def test_parse_temp_file
|
359
|
-
temp_html_file = Tempfile.new("TEMP_HTML_FILE")
|
360
|
-
File.open(HTML_FILE, 'rb') { |f| temp_html_file.write f.read }
|
361
|
-
temp_html_file.close
|
362
|
-
temp_html_file.open
|
363
|
-
assert_equal Nokogiri::HTML.parse(File.read(HTML_FILE)).xpath('//div/a').length,
|
364
|
-
Nokogiri::HTML.parse(temp_html_file).xpath('//div/a').length
|
365
|
-
end
|
366
|
-
|
367
|
-
def test_to_xhtml
|
368
|
-
assert_match 'XHTML', @html.to_xhtml
|
369
|
-
assert_match 'XHTML', @html.to_xhtml(:encoding => 'UTF-8')
|
370
|
-
assert_match 'UTF-8', @html.to_xhtml(:encoding => 'UTF-8')
|
371
|
-
end
|
372
|
-
|
373
|
-
def test_no_xml_header
|
374
|
-
html = Nokogiri::HTML(<<-eohtml)
|
375
|
-
<html>
|
376
|
-
</html>
|
377
|
-
eohtml
|
378
|
-
assert html.to_html.length > 0, 'html length is too short'
|
379
|
-
assert_no_match(/^<\?xml/, html.to_html)
|
380
|
-
end
|
381
|
-
|
382
|
-
def test_document_has_error
|
383
|
-
html = Nokogiri::HTML(<<-eohtml)
|
384
|
-
<html>
|
385
|
-
<body>
|
386
|
-
<div awesome="asdf>
|
387
|
-
<p>inside div tag</p>
|
388
|
-
</div>
|
389
|
-
<p>outside div tag</p>
|
390
|
-
</body>
|
391
|
-
</html>
|
392
|
-
eohtml
|
393
|
-
assert html.errors.length > 0
|
394
|
-
end
|
395
|
-
|
396
|
-
def test_relative_css
|
397
|
-
html = Nokogiri::HTML(<<-eohtml)
|
398
|
-
<html>
|
399
|
-
<body>
|
400
|
-
<div>
|
401
|
-
<p>inside div tag</p>
|
402
|
-
</div>
|
403
|
-
<p>outside div tag</p>
|
404
|
-
</body>
|
405
|
-
</html>
|
406
|
-
eohtml
|
407
|
-
set = html.search('div').search('p')
|
408
|
-
assert_equal(1, set.length)
|
409
|
-
assert_equal('inside div tag', set.first.inner_text)
|
410
|
-
end
|
411
|
-
|
412
|
-
def test_multi_css
|
413
|
-
html = Nokogiri::HTML(<<-eohtml)
|
414
|
-
<html>
|
415
|
-
<body>
|
416
|
-
<div>
|
417
|
-
<p>p tag</p>
|
418
|
-
<a>a tag</a>
|
419
|
-
</div>
|
420
|
-
</body>
|
421
|
-
</html>
|
422
|
-
eohtml
|
423
|
-
set = html.css('p, a')
|
424
|
-
assert_equal(2, set.length)
|
425
|
-
assert_equal ['a tag', 'p tag'].sort, set.map(&:content).sort
|
426
|
-
end
|
427
|
-
|
428
|
-
def test_inner_text
|
429
|
-
html = Nokogiri::HTML(<<-eohtml)
|
430
|
-
<html>
|
431
|
-
<body>
|
432
|
-
<div>
|
433
|
-
<p>
|
434
|
-
Hello world!
|
435
|
-
</p>
|
436
|
-
</div>
|
437
|
-
</body>
|
438
|
-
</html>
|
439
|
-
eohtml
|
440
|
-
node = html.xpath('//div').first
|
441
|
-
assert_equal('Hello world!', node.inner_text.strip)
|
442
|
-
end
|
443
|
-
|
444
|
-
def test_doc_type
|
445
|
-
html = Nokogiri::HTML(<<-eohtml)
|
446
|
-
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
|
447
|
-
<html xmlns="http://www.w3.org/1999/xhtml">
|
448
|
-
<body>
|
449
|
-
<p>Rainbow Dash</p>
|
450
|
-
</body>
|
451
|
-
</html>
|
452
|
-
eohtml
|
453
|
-
assert_equal "html", html.internal_subset.name
|
454
|
-
assert_equal "-//W3C//DTD XHTML 1.1//EN", html.internal_subset.external_id
|
455
|
-
assert_equal "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd", html.internal_subset.system_id
|
456
|
-
assert_equal "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">", html.to_s[0,97]
|
457
|
-
end
|
458
|
-
|
459
|
-
def test_content_size
|
460
|
-
html = Nokogiri::HTML("<div>\n</div>")
|
461
|
-
assert_equal 1, html.content.size
|
462
|
-
assert_equal 1, html.content.split("").size
|
463
|
-
assert_equal "\n", html.content
|
464
|
-
end
|
465
|
-
|
466
|
-
def test_find_by_xpath
|
467
|
-
found = @html.xpath('//div/a')
|
468
|
-
assert_equal 3, found.length
|
469
|
-
end
|
470
|
-
|
471
|
-
def test_find_by_css
|
472
|
-
found = @html.css('div > a')
|
473
|
-
assert_equal 3, found.length
|
474
|
-
end
|
475
|
-
|
476
|
-
def test_find_by_css_with_square_brackets
|
477
|
-
found = @html.css("div[@id='header'] > h1")
|
478
|
-
found = @html.css("div[@id='header'] h1") # this blows up on commit 6fa0f6d329d9dbf1cc21c0ac72f7e627bb4c05fc
|
479
|
-
assert_equal 1, found.length
|
480
|
-
end
|
481
|
-
|
482
|
-
def test_find_by_css_with_escaped_characters
|
483
|
-
found_without_escape = @html.css("div[@id='abc.123']")
|
484
|
-
found_by_id = @html.css('#abc\.123')
|
485
|
-
found_by_class = @html.css('.special\.character')
|
486
|
-
assert_equal 1, found_without_escape.length
|
487
|
-
assert_equal found_by_id, found_without_escape
|
488
|
-
assert_equal found_by_class, found_without_escape
|
489
|
-
end
|
490
|
-
|
491
|
-
def test_find_with_function
|
492
|
-
assert @html.css("div:awesome() h1", Class.new {
|
493
|
-
def awesome divs
|
494
|
-
[divs.first]
|
495
|
-
end
|
496
|
-
}.new)
|
497
|
-
end
|
498
|
-
|
499
|
-
def test_dup_shallow
|
500
|
-
found = @html.search('//div/a').first
|
501
|
-
dup = found.dup(0)
|
502
|
-
assert dup
|
503
|
-
assert_equal '', dup.content
|
504
|
-
end
|
505
|
-
|
506
|
-
def test_search_can_handle_xpath_and_css
|
507
|
-
found = @html.search('//div/a', 'div > p')
|
508
|
-
length = @html.xpath('//div/a').length +
|
509
|
-
@html.css('div > p').length
|
510
|
-
assert_equal length, found.length
|
511
|
-
end
|
512
|
-
|
513
|
-
def test_dup_document
|
514
|
-
assert dup = @html.dup
|
515
|
-
assert_not_equal dup, @html
|
516
|
-
assert @html.html?
|
517
|
-
assert_instance_of Nokogiri::HTML::Document, dup
|
518
|
-
assert dup.html?, 'duplicate should be html'
|
519
|
-
assert_equal @html.to_s, dup.to_s
|
520
|
-
end
|
521
|
-
|
522
|
-
def test_dup_document_shallow
|
523
|
-
assert dup = @html.dup(0)
|
524
|
-
assert_not_equal dup, @html
|
525
|
-
end
|
526
|
-
|
527
|
-
def test_dup
|
528
|
-
found = @html.search('//div/a').first
|
529
|
-
dup = found.dup
|
530
|
-
assert dup
|
531
|
-
assert_equal found.content, dup.content
|
532
|
-
assert_equal found.document, dup.document
|
533
|
-
end
|
534
|
-
|
535
|
-
def test_inner_html
|
536
|
-
html = Nokogiri::HTML <<-EOHTML
|
537
|
-
<html>
|
538
|
-
<body>
|
539
|
-
<div>
|
540
|
-
<p>
|
541
|
-
Hello world!
|
542
|
-
</p>
|
543
|
-
</div>
|
544
|
-
</body>
|
545
|
-
</html>
|
546
|
-
EOHTML
|
547
|
-
node = html.xpath("//div").first
|
548
|
-
assert_equal("<p>Helloworld!</p>", node.inner_html.gsub(%r{\s}, ""))
|
549
|
-
end
|
550
|
-
|
551
|
-
def test_round_trip
|
552
|
-
doc = Nokogiri::HTML(@html.inner_html)
|
553
|
-
assert_equal @html.root.to_html, doc.root.to_html
|
554
|
-
end
|
555
|
-
|
556
|
-
def test_fragment_contains_text_node
|
557
|
-
fragment = Nokogiri::HTML.fragment('fooo')
|
558
|
-
assert_equal 1, fragment.children.length
|
559
|
-
assert_equal 'fooo', fragment.inner_text
|
560
|
-
end
|
561
|
-
|
562
|
-
def test_fragment_includes_two_tags
|
563
|
-
assert_equal 2, Nokogiri::HTML.fragment("<br/><hr/>").children.length
|
564
|
-
end
|
565
|
-
|
566
|
-
def test_relative_css_finder
|
567
|
-
doc = Nokogiri::HTML(<<-eohtml)
|
568
|
-
<html>
|
569
|
-
<body>
|
570
|
-
<div class="red">
|
571
|
-
<p>
|
572
|
-
inside red
|
573
|
-
</p>
|
574
|
-
</div>
|
575
|
-
<div class="green">
|
576
|
-
<p>
|
577
|
-
inside green
|
578
|
-
</p>
|
579
|
-
</div>
|
580
|
-
</body>
|
581
|
-
</html>
|
582
|
-
eohtml
|
583
|
-
red_divs = doc.css('div.red')
|
584
|
-
assert_equal 1, red_divs.length
|
585
|
-
p_tags = red_divs.first.css('p')
|
586
|
-
assert_equal 1, p_tags.length
|
587
|
-
assert_equal 'inside red', p_tags.first.text.strip
|
588
|
-
end
|
589
|
-
|
590
|
-
def test_find_classes
|
591
|
-
doc = Nokogiri::HTML(<<-eohtml)
|
592
|
-
<html>
|
593
|
-
<body>
|
594
|
-
<p class="red">RED</p>
|
595
|
-
<p class="awesome red">RED</p>
|
596
|
-
<p class="notred">GREEN</p>
|
597
|
-
<p class="green notred">GREEN</p>
|
598
|
-
</body>
|
599
|
-
</html>
|
600
|
-
eohtml
|
601
|
-
list = doc.css('.red')
|
602
|
-
assert_equal 2, list.length
|
603
|
-
assert_equal %w{ RED RED }, list.map(&:text)
|
604
|
-
end
|
605
|
-
|
606
|
-
def test_parse_can_take_io
|
607
|
-
html = nil
|
608
|
-
File.open(HTML_FILE, 'rb') { |f|
|
609
|
-
html = Nokogiri::HTML(f)
|
610
|
-
}
|
611
|
-
assert html.html?
|
612
|
-
end
|
613
|
-
|
614
|
-
def test_html?
|
615
|
-
assert !@html.xml?
|
616
|
-
assert @html.html?
|
617
|
-
end
|
618
|
-
|
619
|
-
def test_serialize
|
620
|
-
assert @html.serialize
|
621
|
-
assert @html.to_html
|
622
|
-
end
|
623
|
-
|
624
|
-
def test_empty_document
|
625
|
-
# empty document should return "" #699
|
626
|
-
assert_equal "", Nokogiri::HTML.parse(nil).text
|
627
|
-
assert_equal "", Nokogiri::HTML.parse("").text
|
628
|
-
end
|
629
|
-
|
630
|
-
def test_capturing_nonparse_errors_during_document_clone
|
631
|
-
# see https://github.com/sparklemotion/nokogiri/issues/1196 for background
|
632
|
-
original = Nokogiri::HTML.parse("<div id='unique'></div><div id='unique'></div>")
|
633
|
-
original_errors = original.errors.dup
|
634
|
-
|
635
|
-
copy = original.dup
|
636
|
-
assert_equal original_errors, copy.errors
|
637
|
-
end
|
638
|
-
|
639
|
-
def test_capturing_nonparse_errors_during_node_copy_between_docs
|
640
|
-
# Errors should be emitted while parsing only, and should not change when moving nodes.
|
641
|
-
doc1 = Nokogiri::HTML("<html><body><diva id='unique'>one</diva></body></html>")
|
642
|
-
doc2 = Nokogiri::HTML("<html><body><dive id='unique'>two</dive></body></html>")
|
643
|
-
node1 = doc1.at_css("#unique")
|
644
|
-
node2 = doc2.at_css("#unique")
|
645
|
-
original_errors1 = doc1.errors.dup
|
646
|
-
original_errors2 = doc2.errors.dup
|
647
|
-
assert original_errors1.any?{|e| e.to_s =~ /Tag diva invalid/ }, "it should complain about the tag name"
|
648
|
-
assert original_errors2.any?{|e| e.to_s =~ /Tag dive invalid/ }, "it should complain about the tag name"
|
649
|
-
|
650
|
-
node1.add_child node2
|
651
|
-
|
652
|
-
assert_equal original_errors1, doc1.errors
|
653
|
-
assert_equal original_errors2, doc2.errors
|
654
|
-
end
|
655
|
-
|
656
|
-
def test_silencing_nonparse_errors_during_attribute_insertion_1262
|
657
|
-
# see https://github.com/sparklemotion/nokogiri/issues/1262
|
658
|
-
#
|
659
|
-
# libxml2 emits a warning when this happens; the JRuby
|
660
|
-
# implementation does not. so rather than capture the error in
|
661
|
-
# doc.errors in a platform-dependent way, I'm opting to have
|
662
|
-
# the error silenced.
|
663
|
-
#
|
664
|
-
# So this test doesn't look meaningful, but we want to avoid
|
665
|
-
# having `ID unique-issue-1262 already defined` emitted to
|
666
|
-
# stderr when running the test suite.
|
667
|
-
#
|
668
|
-
doc = Nokogiri::HTML::Document.new
|
669
|
-
Nokogiri::XML::Element.new("div", doc).set_attribute('id', 'unique-issue-1262')
|
670
|
-
Nokogiri::XML::Element.new("div", doc).set_attribute('id', 'unique-issue-1262')
|
671
|
-
assert_equal 0, doc.errors.length
|
672
|
-
end
|
673
|
-
|
674
|
-
it "skips encoding for script tags" do
|
675
|
-
html = Nokogiri::HTML <<-EOHTML
|
676
|
-
<html>
|
677
|
-
<head>
|
678
|
-
<script>var isGreater = 4 > 5;</script>
|
679
|
-
</head>
|
680
|
-
<body></body>
|
681
|
-
</html>
|
682
|
-
EOHTML
|
683
|
-
node = html.xpath("//script").first
|
684
|
-
assert_equal("var isGreater = 4 > 5;", node.inner_html)
|
685
|
-
end
|
686
|
-
|
687
|
-
it "skips encoding for style tags" do
|
688
|
-
html = Nokogiri::HTML <<-EOHTML
|
689
|
-
<html>
|
690
|
-
<head>
|
691
|
-
<style>tr > div { display:block; }</style>
|
692
|
-
</head>
|
693
|
-
<body></body>
|
694
|
-
</html>
|
695
|
-
EOHTML
|
696
|
-
node = html.xpath("//style").first
|
697
|
-
assert_equal("tr > div { display:block; }", node.inner_html)
|
698
|
-
end
|
699
|
-
|
700
|
-
it "does not fail when converting to_html using explicit encoding" do
|
701
|
-
html_fragment=<<-eos
|
702
|
-
<img width="16" height="16" src="images/icon.gif" border="0" alt="Inactive hide details for "User" ---19/05/2015 12:55:29---Provvediamo subito nell’integrare">
|
703
|
-
eos
|
704
|
-
doc = Nokogiri::HTML(html_fragment, nil, 'ISO-8859-1')
|
705
|
-
html = doc.to_html
|
706
|
-
assert html.index("src=\"images/icon.gif\"")
|
707
|
-
assert_equal 'ISO-8859-1', html.encoding.name
|
708
|
-
end
|
709
|
-
|
710
|
-
end
|
711
|
-
end
|
712
|
-
end
|