nokogiri 1.6.7.2-java → 1.6.8-java
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/.cross_rubies +2 -0
- data/.travis.yml +19 -9
- data/CHANGELOG.rdoc +73 -5
- data/CONTRIBUTING.md +42 -0
- data/Gemfile +10 -9
- data/LICENSE.txt +1 -1
- data/Manifest.txt +7 -2
- data/README.md +23 -27
- data/ROADMAP.md +11 -1
- data/Rakefile +36 -17
- data/bin/nokogiri +2 -2
- data/dependencies.yml +29 -4
- data/ext/java/nokogiri/HtmlElementDescription.java +5 -2
- data/ext/java/nokogiri/NokogiriService.java +19 -0
- data/ext/java/nokogiri/XmlAttr.java +3 -1
- data/ext/java/nokogiri/XmlDocumentFragment.java +0 -14
- data/ext/java/nokogiri/XmlNode.java +106 -63
- data/ext/java/nokogiri/XmlXpathContext.java +12 -12
- data/ext/java/nokogiri/XsltStylesheet.java +11 -4
- data/ext/java/nokogiri/internals/HtmlDomParserContext.java +8 -1
- data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +1 -2
- data/ext/java/nokogiri/internals/NokogiriHelpers.java +7 -7
- data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +1 -1
- data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +0 -1
- data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +3 -3
- data/ext/java/nokogiri/internals/ParserContext.java +4 -0
- data/ext/java/nokogiri/internals/SaveContextVisitor.java +18 -13
- data/ext/nokogiri/extconf.rb +163 -79
- data/ext/nokogiri/html_document.c +6 -6
- data/ext/nokogiri/html_element_description.c +1 -1
- data/ext/nokogiri/html_entity_lookup.c +1 -1
- data/ext/nokogiri/html_sax_parser_context.c +4 -4
- data/ext/nokogiri/html_sax_push_parser.c +2 -2
- data/ext/nokogiri/nokogiri.c +0 -7
- data/ext/nokogiri/nokogiri.h +1 -34
- data/ext/nokogiri/xml_attr.c +2 -2
- data/ext/nokogiri/xml_comment.c +1 -1
- data/ext/nokogiri/xml_document.c +20 -22
- data/ext/nokogiri/xml_encoding_handler.c +3 -3
- data/ext/nokogiri/xml_entity_reference.c +1 -1
- data/ext/nokogiri/xml_namespace.c +56 -17
- data/ext/nokogiri/xml_node.c +73 -67
- data/ext/nokogiri/xml_node_set.c +164 -146
- data/ext/nokogiri/xml_node_set.h +3 -4
- data/ext/nokogiri/xml_processing_instruction.c +2 -2
- data/ext/nokogiri/xml_reader.c +5 -18
- data/ext/nokogiri/xml_sax_parser.c +9 -12
- data/ext/nokogiri/xml_sax_parser_context.c +1 -1
- data/ext/nokogiri/xml_sax_push_parser.c +1 -1
- data/ext/nokogiri/xml_schema.c +1 -1
- data/ext/nokogiri/xml_syntax_error.c +0 -4
- data/ext/nokogiri/xml_syntax_error.h +0 -1
- data/ext/nokogiri/xml_text.c +1 -1
- data/ext/nokogiri/xml_xpath_context.c +15 -24
- data/ext/nokogiri/xslt_stylesheet.c +6 -6
- data/lib/nekohtml.jar +0 -0
- data/lib/nokogiri.rb +14 -7
- data/lib/nokogiri/css/parser.rb +8 -2
- data/lib/nokogiri/css/parser.y +7 -2
- data/lib/nokogiri/html/document.rb +4 -2
- data/lib/nokogiri/nokogiri.jar +0 -0
- data/lib/nokogiri/version.rb +1 -1
- data/lib/nokogiri/xml/document.rb +7 -1
- data/lib/nokogiri/xml/dtd.rb +4 -4
- data/lib/nokogiri/xml/node.rb +6 -10
- data/lib/nokogiri/xml/node_set.rb +3 -3
- data/lib/nokogiri/xml/parse_options.rb +22 -0
- data/lib/serializer.jar +0 -0
- data/lib/xalan.jar +0 -0
- data/lib/xercesImpl.jar +0 -0
- data/lib/xml-apis.jar +0 -0
- data/tasks/test.rb +5 -0
- data/test/css/test_parser.rb +7 -1
- data/test/files/GH_1042.html +18 -0
- data/test/files/namespace_pressure_test.xml +1684 -0
- data/test/files/tlm.html +2 -1
- data/test/helper.rb +4 -0
- data/test/html/sax/test_parser.rb +2 -2
- data/test/html/test_document.rb +47 -11
- data/test/html/test_document_encoding.rb +55 -58
- data/test/html/test_document_fragment.rb +27 -23
- data/test/html/test_node.rb +16 -0
- data/test/html/test_node_encoding.rb +71 -13
- data/test/namespaces/test_namespaces_in_parsed_doc.rb +14 -0
- data/test/test_css_cache.rb +1 -1
- data/test/test_encoding_handler.rb +2 -0
- data/test/test_xslt_transforms.rb +38 -3
- data/test/xml/sax/test_parser.rb +54 -53
- data/test/xml/test_document.rb +7 -2
- data/test/xml/test_document_encoding.rb +19 -16
- data/test/xml/test_document_fragment.rb +12 -0
- data/test/xml/test_dtd_encoding.rb +0 -2
- data/test/xml/test_namespace.rb +2 -2
- data/test/xml/test_node.rb +15 -4
- data/test/xml/test_node_attributes.rb +6 -0
- data/test/xml/test_node_encoding.rb +49 -87
- data/test/xml/test_node_reparenting.rb +193 -18
- data/test/xml/test_node_set.rb +1 -1
- data/test/xml/test_reader.rb +589 -0
- data/test/xml/test_reader_encoding.rb +100 -102
- data/test/xml/test_unparented_node.rb +14 -1
- data/test/xslt/test_exception_handling.rb +1 -1
- data/test_all +47 -33
- metadata +38 -36
- data/CHANGELOG.ja.rdoc +0 -1057
- data/test/test_reader.rb +0 -558
@@ -61,6 +61,20 @@ module Nokogiri
|
|
61
61
|
ns_attrs = n.to_xml.scan(/\bxmlns(?::.+?)?=/)
|
62
62
|
assert_equal 3, ns_attrs.length
|
63
63
|
end
|
64
|
+
|
65
|
+
def test_namespaces_under_memory_pressure_issue1155
|
66
|
+
skip("JRuby doesn't do GC.") if Nokogiri.jruby?
|
67
|
+
|
68
|
+
# this test is here to emit warnings when run under valgrind
|
69
|
+
# see https://github.com/sparklemotion/nokogiri/issues/1155 for background
|
70
|
+
filename = File.join ASSETS_DIR, 'namespace_pressure_test.xml'
|
71
|
+
doc = Nokogiri::XML File.open(filename)
|
72
|
+
|
73
|
+
# bizarrely, can't repro without the call to #to_a
|
74
|
+
doc.xpath('//namespace::*').to_a.each do |ns|
|
75
|
+
ns.inspect
|
76
|
+
end
|
77
|
+
end
|
64
78
|
end
|
65
79
|
end
|
66
80
|
end
|
data/test/test_css_cache.rb
CHANGED
@@ -6,7 +6,7 @@ class TestCssCache < Nokogiri::TestCase
|
|
6
6
|
super
|
7
7
|
@css = "a1 > b2 > c3"
|
8
8
|
@parse_result = Nokogiri::CSS.parse(@css)
|
9
|
-
@to_xpath_result = @parse_result.map
|
9
|
+
@to_xpath_result = @parse_result.map(&:to_xpath)
|
10
10
|
Nokogiri::CSS::Parser.class_eval do
|
11
11
|
class << @cache
|
12
12
|
alias :old_bracket :[]
|
@@ -32,7 +32,40 @@ class TestXsltTransforms < Nokogiri::TestCase
|
|
32
32
|
assert_match %r{<h1>Grandma</h1>}, result
|
33
33
|
|
34
34
|
assert result = style.apply_to(@doc)
|
35
|
-
assert_match %r{<h1></h1
|
35
|
+
assert_match %r{<h1></h1>|<h1/>}, result
|
36
|
+
end
|
37
|
+
|
38
|
+
def test_xml_declaration
|
39
|
+
input_xml = <<-EOS
|
40
|
+
<?xml version="1.0" encoding="utf-8"?>
|
41
|
+
<report>
|
42
|
+
<title>My Report</title>
|
43
|
+
</report>
|
44
|
+
EOS
|
45
|
+
|
46
|
+
input_xsl = <<-EOS
|
47
|
+
<?xml version="1.0" encoding="utf-8"?>
|
48
|
+
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
|
49
|
+
<xsl:output method="xml" version="1.0" encoding="utf-8" indent="yes"/>
|
50
|
+
<xsl:template match="/">
|
51
|
+
<html>
|
52
|
+
<head>
|
53
|
+
<title><xsl:value-of select="report/title"/></title>
|
54
|
+
</head>
|
55
|
+
<body>
|
56
|
+
<h1><xsl:value-of select="report/title"/></h1>
|
57
|
+
</body>
|
58
|
+
</html>
|
59
|
+
</xsl:template>
|
60
|
+
</xsl:stylesheet>
|
61
|
+
EOS
|
62
|
+
|
63
|
+
require 'nokogiri'
|
64
|
+
|
65
|
+
xml = ::Nokogiri::XML(input_xml)
|
66
|
+
xsl = ::Nokogiri::XSLT(input_xsl)
|
67
|
+
|
68
|
+
assert_includes xsl.apply_to(xml), '<?xml version="1.0" encoding="utf-8"?>'
|
36
69
|
end
|
37
70
|
|
38
71
|
def test_transform_with_output_style
|
@@ -95,7 +128,9 @@ encoding="iso-8859-1" indent="yes"/>
|
|
95
128
|
</xsl:stylesheet>
|
96
129
|
eoxslt
|
97
130
|
end
|
98
|
-
|
131
|
+
result = xslt.apply_to(@doc, ['title', 'foo'])
|
132
|
+
assert_no_match(/<td>/, result)
|
133
|
+
assert_match(/This is an adjacent/, result)
|
99
134
|
end
|
100
135
|
|
101
136
|
def test_transform_arg_error
|
@@ -268,7 +303,7 @@ encoding="iso-8859-1" indent="yes"/>
|
|
268
303
|
<xsl:output encoding="UTF-8" indent="yes" method="xml" />
|
269
304
|
|
270
305
|
<xsl:template match="/">
|
271
|
-
<xsl:value-of select="/a"
|
306
|
+
<a><xsl:value-of select="/a" /></a>
|
272
307
|
</xsl:template>
|
273
308
|
</xsl:stylesheet>
|
274
309
|
EOXSL
|
data/test/xml/sax/test_parser.rb
CHANGED
@@ -44,13 +44,21 @@ module Nokogiri
|
|
44
44
|
end
|
45
45
|
|
46
46
|
def test_xml_decl
|
47
|
-
|
48
|
-
''
|
49
|
-
'<?xml version="1.0" ?>'
|
50
|
-
|
51
|
-
'<?xml version="1.0"
|
52
|
-
|
53
|
-
|
47
|
+
[
|
48
|
+
['', nil],
|
49
|
+
['<?xml version="1.0" ?>',
|
50
|
+
['1.0']],
|
51
|
+
['<?xml version="1.0" encoding="UTF-8" ?>',
|
52
|
+
['1.0', 'UTF-8']],
|
53
|
+
['<?xml version="1.0" standalone="yes"?>',
|
54
|
+
['1.0', 'yes']],
|
55
|
+
['<?xml version="1.0" standalone="no"?>',
|
56
|
+
['1.0', 'no']],
|
57
|
+
['<?xml version="1.0" encoding="UTF-8" standalone="no"?>',
|
58
|
+
['1.0', "UTF-8", 'no']],
|
59
|
+
['<?xml version="1.0" encoding="ISO-8859-1" standalone="yes"?>',
|
60
|
+
['1.0', "ISO-8859-1", 'yes']]
|
61
|
+
].each do |decl, value|
|
54
62
|
parser = XML::SAX::Parser.new(Doc.new)
|
55
63
|
|
56
64
|
xml = "#{decl}\n<root />"
|
@@ -136,12 +144,8 @@ module Nokogiri
|
|
136
144
|
</root>
|
137
145
|
eoxml
|
138
146
|
assert_equal 5, @parser.document.start_elements.length
|
139
|
-
assert @parser.document.start_elements.map
|
140
|
-
|
141
|
-
}.include?('foo:bar')
|
142
|
-
assert @parser.document.end_elements.map { |se|
|
143
|
-
se.first
|
144
|
-
}.include?('foo:bar')
|
147
|
+
assert @parser.document.start_elements.map(&:first).include?('foo:bar')
|
148
|
+
assert @parser.document.end_elements.map(&:first).include?('foo:bar')
|
145
149
|
end
|
146
150
|
|
147
151
|
def test_start_is_called_without_namespace
|
@@ -151,7 +155,7 @@ module Nokogiri
|
|
151
155
|
</root>
|
152
156
|
eoxml
|
153
157
|
assert_equal ['root', 'foo:f', 'bar'],
|
154
|
-
@parser.document.start_elements.map
|
158
|
+
@parser.document.start_elements.map(&:first)
|
155
159
|
end
|
156
160
|
|
157
161
|
def test_parser_sets_encoding
|
@@ -167,10 +171,8 @@ module Nokogiri
|
|
167
171
|
assert @parser.document.errors
|
168
172
|
assert @parser.document.errors.length > 0
|
169
173
|
|
170
|
-
|
171
|
-
|
172
|
-
assert_equal 'UTF-8', error.message.encoding.name
|
173
|
-
end
|
174
|
+
doc.errors.each do |error|
|
175
|
+
assert_equal 'UTF-8', error.message.encoding.name
|
174
176
|
end
|
175
177
|
|
176
178
|
# when using JRuby Nokogiri, more errors will be generated as the DOM
|
@@ -207,42 +209,41 @@ module Nokogiri
|
|
207
209
|
@parser.parse_io(f, encoding)
|
208
210
|
}
|
209
211
|
assert(@parser.document.cdata_blocks.length > 0)
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
end
|
244
|
-
assert called
|
212
|
+
|
213
|
+
called = false
|
214
|
+
@parser.document.start_elements.flatten.each do |thing|
|
215
|
+
assert_equal 'UTF-8', thing.encoding.name
|
216
|
+
called = true
|
217
|
+
end
|
218
|
+
assert called
|
219
|
+
|
220
|
+
called = false
|
221
|
+
@parser.document.end_elements.flatten.each do |thing|
|
222
|
+
assert_equal 'UTF-8', thing.encoding.name
|
223
|
+
called = true
|
224
|
+
end
|
225
|
+
assert called
|
226
|
+
|
227
|
+
called = false
|
228
|
+
@parser.document.data.each do |thing|
|
229
|
+
assert_equal 'UTF-8', thing.encoding.name
|
230
|
+
called = true
|
231
|
+
end
|
232
|
+
assert called
|
233
|
+
|
234
|
+
called = false
|
235
|
+
@parser.document.comments.flatten.each do |thing|
|
236
|
+
assert_equal 'UTF-8', thing.encoding.name
|
237
|
+
called = true
|
238
|
+
end
|
239
|
+
assert called
|
240
|
+
|
241
|
+
called = false
|
242
|
+
@parser.document.cdata_blocks.flatten.each do |thing|
|
243
|
+
assert_equal 'UTF-8', thing.encoding.name
|
244
|
+
called = true
|
245
245
|
end
|
246
|
+
assert called
|
246
247
|
end
|
247
248
|
|
248
249
|
def test_parse_file
|
data/test/xml/test_document.rb
CHANGED
@@ -28,8 +28,13 @@ module Nokogiri
|
|
28
28
|
|
29
29
|
# issue #1005
|
30
30
|
def test_strict_parsing_empty_doc_should_raise_exception
|
31
|
-
|
32
|
-
|
31
|
+
["", " "].each do |empty_string|
|
32
|
+
assert_raises(SyntaxError, "empty string '#{empty_string}' should raise a SyntaxError") do
|
33
|
+
Nokogiri::XML(empty_string) { |c| c.strict }
|
34
|
+
end
|
35
|
+
assert_raises(SyntaxError, "StringIO of '#{empty_string}' should raise a SyntaxError") do
|
36
|
+
Nokogiri::XML(StringIO.new(empty_string)) { |c| c.strict }
|
37
|
+
end
|
33
38
|
end
|
34
39
|
end
|
35
40
|
|
@@ -2,27 +2,30 @@ require "helper"
|
|
2
2
|
|
3
3
|
module Nokogiri
|
4
4
|
module XML
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
end
|
5
|
+
class TestDocumentEncoding < Nokogiri::TestCase
|
6
|
+
def setup
|
7
|
+
super
|
8
|
+
@xml = Nokogiri::XML(File.read(SHIFT_JIS_XML), SHIFT_JIS_XML)
|
9
|
+
end
|
11
10
|
|
12
|
-
|
13
|
-
|
14
|
-
|
11
|
+
def test_url
|
12
|
+
assert_equal 'UTF-8', @xml.url.encoding.name
|
13
|
+
end
|
15
14
|
|
16
|
-
|
17
|
-
|
18
|
-
|
15
|
+
def test_encoding
|
16
|
+
assert_equal 'UTF-8', @xml.encoding.encoding.name
|
17
|
+
end
|
19
18
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
end
|
19
|
+
def test_dotted_version
|
20
|
+
if Nokogiri.uses_libxml?
|
21
|
+
assert_equal 'UTF-8', Nokogiri::LIBXML_VERSION.encoding.name
|
24
22
|
end
|
25
23
|
end
|
24
|
+
|
25
|
+
def test_empty_doc_encoding
|
26
|
+
encoding = 'US-ASCII'
|
27
|
+
assert_equal encoding, Nokogiri::XML(nil, nil, encoding).encoding
|
28
|
+
end
|
26
29
|
end
|
27
30
|
end
|
28
31
|
end
|
@@ -224,6 +224,18 @@ module Nokogiri
|
|
224
224
|
Nokogiri::XML::Comment.new(frag,'moo')
|
225
225
|
end
|
226
226
|
|
227
|
+
def test_issue_1077_parsing_of_frozen_strings
|
228
|
+
input = <<-EOS
|
229
|
+
<?xml version="1.0" encoding="utf-8"?>
|
230
|
+
<library>
|
231
|
+
<book title="I like turtles"/>
|
232
|
+
</library>
|
233
|
+
EOS
|
234
|
+
input.freeze
|
235
|
+
|
236
|
+
Nokogiri::XML::DocumentFragment.parse(input) # assert_nothing_raised
|
237
|
+
end
|
238
|
+
|
227
239
|
if Nokogiri.uses_libxml?
|
228
240
|
def test_for_libxml_in_context_fragment_parsing_bug_workaround
|
229
241
|
10.times do
|
data/test/xml/test_namespace.rb
CHANGED
@@ -40,7 +40,7 @@ module Nokogiri
|
|
40
40
|
|
41
41
|
def test_namespace_node_prefix
|
42
42
|
namespaces = @xml.root.namespace_definitions
|
43
|
-
assert_equal [nil, 'foo'], namespaces.map
|
43
|
+
assert_equal [nil, 'foo'], namespaces.map(&:prefix)
|
44
44
|
end
|
45
45
|
|
46
46
|
def test_namespace_node_href
|
@@ -48,7 +48,7 @@ module Nokogiri
|
|
48
48
|
assert_equal [
|
49
49
|
'http://tenderlovemaking.com/',
|
50
50
|
'bar'
|
51
|
-
], namespaces.map
|
51
|
+
], namespaces.map(&:href)
|
52
52
|
end
|
53
53
|
|
54
54
|
def test_equality
|
data/test/xml/test_node.rb
CHANGED
@@ -19,7 +19,7 @@ module Nokogiri
|
|
19
19
|
def test_element_children
|
20
20
|
nodes = @xml.root.element_children
|
21
21
|
assert_equal @xml.root.first_element_child, nodes.first
|
22
|
-
assert nodes.all?
|
22
|
+
assert nodes.all?(&:element?), 'all nodes are elements'
|
23
23
|
end
|
24
24
|
|
25
25
|
def test_last_element_child
|
@@ -622,7 +622,7 @@ module Nokogiri
|
|
622
622
|
address = @xml.xpath('//address').first
|
623
623
|
assert_equal 3, address.ancestors.length
|
624
624
|
assert_equal ['employee', 'staff', 'document'],
|
625
|
-
address.ancestors.map
|
625
|
+
address.ancestors.map(&:name)
|
626
626
|
end
|
627
627
|
|
628
628
|
def test_read_only?
|
@@ -714,7 +714,7 @@ b"></div>
|
|
714
714
|
eoxml
|
715
715
|
set = xml.css('a[@class~="bar"]')
|
716
716
|
assert_equal 4, set.length
|
717
|
-
assert_equal ['Bar'], set.map
|
717
|
+
assert_equal ['Bar'], set.map(&:content).uniq
|
718
718
|
end
|
719
719
|
|
720
720
|
def test_unlink
|
@@ -892,7 +892,7 @@ b"></div>
|
|
892
892
|
|
893
893
|
def test_whitespace_nodes
|
894
894
|
doc = Nokogiri::XML.parse("<root><b>Foo</b>\n<i>Bar</i> <p>Bazz</p></root>")
|
895
|
-
children = doc.at('//root').children.collect
|
895
|
+
children = doc.at('//root').children.collect(&:to_s)
|
896
896
|
assert_equal "\n", children[1]
|
897
897
|
assert_equal " ", children[3]
|
898
898
|
end
|
@@ -1228,6 +1228,17 @@ eoxml
|
|
1228
1228
|
subject.lang = "fr"
|
1229
1229
|
assert_equal "fr", subject.lang
|
1230
1230
|
end
|
1231
|
+
|
1232
|
+
def test_text_node_robustness_gh1426
|
1233
|
+
# notably, the original bug report was about libxml-ruby interactions
|
1234
|
+
# this test should blow up under valgrind if we regress on libxml-ruby workarounds
|
1235
|
+
message = "<h2>BOOM!</h2>"
|
1236
|
+
10_000.times do
|
1237
|
+
node = Nokogiri::HTML::DocumentFragment.parse(message)
|
1238
|
+
node.add_previous_sibling(Nokogiri::XML::Text.new('before', node.document))
|
1239
|
+
node.add_next_sibling(Nokogiri::XML::Text.new('after', node.document))
|
1240
|
+
end
|
1241
|
+
end
|
1231
1242
|
end
|
1232
1243
|
end
|
1233
1244
|
end
|
@@ -26,6 +26,12 @@ module Nokogiri
|
|
26
26
|
assert_equal nil, node['lang']
|
27
27
|
end
|
28
28
|
|
29
|
+
def test_unknown_namespace_prefix_should_not_be_removed
|
30
|
+
doc = Nokogiri::XML ''
|
31
|
+
elem = doc.create_element 'foo', 'bar:attr' => 'something'
|
32
|
+
assert_equal elem.attribute_nodes.first.name, 'bar:attr'
|
33
|
+
end
|
34
|
+
|
29
35
|
def test_set_prefixed_attributes
|
30
36
|
doc = Nokogiri::XML %Q{<root xmlns:foo="x"/>}
|
31
37
|
|
@@ -1,105 +1,67 @@
|
|
1
|
+
# encoding: UTF-8
|
1
2
|
require "helper"
|
2
3
|
|
3
4
|
module Nokogiri
|
4
5
|
module XML
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
@
|
10
|
-
end
|
11
|
-
|
12
|
-
def test_get_attribute
|
13
|
-
node = @html.css('a').first
|
14
|
-
assert_equal @html.encoding, node['href'].encoding.name
|
15
|
-
end
|
16
|
-
|
17
|
-
def test_text_encoding_is_utf_8
|
18
|
-
@html = Nokogiri::HTML(File.open(NICH_FILE))
|
19
|
-
assert_equal 'UTF-8', @html.text.encoding.name
|
20
|
-
end
|
6
|
+
class TestNodeEncoding < Nokogiri::TestCase
|
7
|
+
def test_serialize_encoding_xml
|
8
|
+
@xml = Nokogiri::XML(File.open(SHIFT_JIS_XML))
|
9
|
+
assert_equal @xml.encoding.downcase,
|
10
|
+
@xml.serialize.encoding.name.downcase
|
21
11
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
@html.serialize.encoding.name.downcase
|
12
|
+
@doc = Nokogiri::XML(@xml.serialize)
|
13
|
+
assert_equal @xml.serialize, @doc.serialize
|
14
|
+
end
|
26
15
|
|
27
|
-
|
28
|
-
|
29
|
-
|
16
|
+
def test_encoding_GH_1113
|
17
|
+
utf8 = '<frag>shahid ὡ 𐄣 𢂁</frag>'
|
18
|
+
hex = '<frag>shahid ὡ 𐄣 𢂁</frag>'
|
19
|
+
decimal = '<frag>shahid ὡ 𐄣 𢂁</frag>'
|
20
|
+
expected = Nokogiri.jruby? ? hex : decimal
|
30
21
|
|
31
|
-
|
32
|
-
|
33
|
-
assert_equal @xml.encoding.downcase,
|
34
|
-
@xml.serialize.encoding.name.downcase
|
22
|
+
frag = Nokogiri::XML(utf8, nil, 'UTF-8', Nokogiri::XML::ParseOptions::STRICT)
|
23
|
+
assert_equal utf8, frag.to_xml.sub(/^<.xml[^>]*>\n/m, '').strip
|
35
24
|
|
36
|
-
|
37
|
-
|
38
|
-
end
|
25
|
+
frag = Nokogiri::XML(expected, nil, 'UTF-8', Nokogiri::XML::ParseOptions::STRICT)
|
26
|
+
assert_equal utf8, frag.to_xml.sub(/^<.xml[^>]*>\n/m, '').strip
|
39
27
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
end
|
28
|
+
frag = Nokogiri::XML(expected, nil, 'US-ASCII', Nokogiri::XML::ParseOptions::STRICT)
|
29
|
+
assert_equal expected, frag.to_xml.sub(/^<.xml[^>]*>\n/m, '').strip
|
30
|
+
end
|
44
31
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
32
|
+
VEHICLE_XML = <<-eoxml
|
33
|
+
<root>
|
34
|
+
<car xmlns:part="http://general-motors.com/">
|
35
|
+
<part:tire>Michelin Model XGV</part:tire>
|
36
|
+
</car>
|
37
|
+
<bicycle xmlns:part="http://schwinn.com/">
|
38
|
+
<part:tire>I'm a bicycle tire!</part:tire>
|
39
|
+
</bicycle>
|
40
|
+
</root>
|
41
|
+
eoxml
|
49
42
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
43
|
+
def test_namespace
|
44
|
+
doc = Nokogiri::XML(VEHICLE_XML.encode('Shift_JIS'), nil, 'Shift_JIS')
|
45
|
+
assert_equal 'Shift_JIS', doc.encoding
|
46
|
+
n = doc.xpath('//part:tire', { 'part' => 'http://schwinn.com/' }).first
|
47
|
+
assert n
|
48
|
+
assert_equal 'UTF-8', n.namespace.href.encoding.name
|
49
|
+
assert_equal 'UTF-8', n.namespace.prefix.encoding.name
|
50
|
+
end
|
54
51
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
52
|
+
def test_namespace_as_hash
|
53
|
+
doc = Nokogiri::XML(VEHICLE_XML.encode('Shift_JIS'), nil, 'Shift_JIS')
|
54
|
+
assert_equal 'Shift_JIS', doc.encoding
|
55
|
+
assert n = doc.xpath('//car').first
|
59
56
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
<car xmlns:part="http://general-motors.com/">
|
64
|
-
<part:tire>Michelin Model XGV</part:tire>
|
65
|
-
</car>
|
66
|
-
<bicycle xmlns:part="http://schwinn.com/">
|
67
|
-
<part:tire>I'm a bicycle tire!</part:tire>
|
68
|
-
</bicycle>
|
69
|
-
</root>
|
70
|
-
eoxml
|
71
|
-
doc = Nokogiri::XML(xml, nil, 'UTF-8')
|
72
|
-
assert_equal 'UTF-8', doc.encoding
|
73
|
-
n = doc.xpath('//part:tire', { 'part' => 'http://schwinn.com/' }).first
|
74
|
-
assert n
|
75
|
-
assert_equal doc.encoding, n.namespace.href.encoding.name
|
76
|
-
assert_equal doc.encoding, n.namespace.prefix.encoding.name
|
57
|
+
n.namespace_definitions.each do |nd|
|
58
|
+
assert_equal 'UTF-8', nd.href.encoding.name
|
59
|
+
assert_equal 'UTF-8', nd.prefix.encoding.name
|
77
60
|
end
|
78
61
|
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
<car xmlns:part="http://general-motors.com/">
|
83
|
-
<part:tire>Michelin Model XGV</part:tire>
|
84
|
-
</car>
|
85
|
-
<bicycle xmlns:part="http://schwinn.com/">
|
86
|
-
<part:tire>I'm a bicycle tire!</part:tire>
|
87
|
-
</bicycle>
|
88
|
-
</root>
|
89
|
-
eoxml
|
90
|
-
doc = Nokogiri::XML(xml, nil, 'UTF-8')
|
91
|
-
assert_equal 'UTF-8', doc.encoding
|
92
|
-
assert n = doc.xpath('//car').first
|
93
|
-
|
94
|
-
n.namespace_definitions.each do |nd|
|
95
|
-
assert_equal doc.encoding, nd.href.encoding.name
|
96
|
-
assert_equal doc.encoding, nd.prefix.encoding.name
|
97
|
-
end
|
98
|
-
|
99
|
-
n.namespaces.each do |k,v|
|
100
|
-
assert_equal doc.encoding, k.encoding.name
|
101
|
-
assert_equal doc.encoding, v.encoding.name
|
102
|
-
end
|
62
|
+
n.namespaces.each do |k,v|
|
63
|
+
assert_equal 'UTF-8', k.encoding.name
|
64
|
+
assert_equal 'UTF-8', v.encoding.name
|
103
65
|
end
|
104
66
|
end
|
105
67
|
end
|