nokogiri 1.6.7.2-java → 1.6.8-java

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (107) hide show
  1. checksums.yaml +4 -4
  2. data/.cross_rubies +2 -0
  3. data/.travis.yml +19 -9
  4. data/CHANGELOG.rdoc +73 -5
  5. data/CONTRIBUTING.md +42 -0
  6. data/Gemfile +10 -9
  7. data/LICENSE.txt +1 -1
  8. data/Manifest.txt +7 -2
  9. data/README.md +23 -27
  10. data/ROADMAP.md +11 -1
  11. data/Rakefile +36 -17
  12. data/bin/nokogiri +2 -2
  13. data/dependencies.yml +29 -4
  14. data/ext/java/nokogiri/HtmlElementDescription.java +5 -2
  15. data/ext/java/nokogiri/NokogiriService.java +19 -0
  16. data/ext/java/nokogiri/XmlAttr.java +3 -1
  17. data/ext/java/nokogiri/XmlDocumentFragment.java +0 -14
  18. data/ext/java/nokogiri/XmlNode.java +106 -63
  19. data/ext/java/nokogiri/XmlXpathContext.java +12 -12
  20. data/ext/java/nokogiri/XsltStylesheet.java +11 -4
  21. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +8 -1
  22. data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +1 -2
  23. data/ext/java/nokogiri/internals/NokogiriHelpers.java +7 -7
  24. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +1 -1
  25. data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +0 -1
  26. data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +3 -3
  27. data/ext/java/nokogiri/internals/ParserContext.java +4 -0
  28. data/ext/java/nokogiri/internals/SaveContextVisitor.java +18 -13
  29. data/ext/nokogiri/extconf.rb +163 -79
  30. data/ext/nokogiri/html_document.c +6 -6
  31. data/ext/nokogiri/html_element_description.c +1 -1
  32. data/ext/nokogiri/html_entity_lookup.c +1 -1
  33. data/ext/nokogiri/html_sax_parser_context.c +4 -4
  34. data/ext/nokogiri/html_sax_push_parser.c +2 -2
  35. data/ext/nokogiri/nokogiri.c +0 -7
  36. data/ext/nokogiri/nokogiri.h +1 -34
  37. data/ext/nokogiri/xml_attr.c +2 -2
  38. data/ext/nokogiri/xml_comment.c +1 -1
  39. data/ext/nokogiri/xml_document.c +20 -22
  40. data/ext/nokogiri/xml_encoding_handler.c +3 -3
  41. data/ext/nokogiri/xml_entity_reference.c +1 -1
  42. data/ext/nokogiri/xml_namespace.c +56 -17
  43. data/ext/nokogiri/xml_node.c +73 -67
  44. data/ext/nokogiri/xml_node_set.c +164 -146
  45. data/ext/nokogiri/xml_node_set.h +3 -4
  46. data/ext/nokogiri/xml_processing_instruction.c +2 -2
  47. data/ext/nokogiri/xml_reader.c +5 -18
  48. data/ext/nokogiri/xml_sax_parser.c +9 -12
  49. data/ext/nokogiri/xml_sax_parser_context.c +1 -1
  50. data/ext/nokogiri/xml_sax_push_parser.c +1 -1
  51. data/ext/nokogiri/xml_schema.c +1 -1
  52. data/ext/nokogiri/xml_syntax_error.c +0 -4
  53. data/ext/nokogiri/xml_syntax_error.h +0 -1
  54. data/ext/nokogiri/xml_text.c +1 -1
  55. data/ext/nokogiri/xml_xpath_context.c +15 -24
  56. data/ext/nokogiri/xslt_stylesheet.c +6 -6
  57. data/lib/nekohtml.jar +0 -0
  58. data/lib/nokogiri.rb +14 -7
  59. data/lib/nokogiri/css/parser.rb +8 -2
  60. data/lib/nokogiri/css/parser.y +7 -2
  61. data/lib/nokogiri/html/document.rb +4 -2
  62. data/lib/nokogiri/nokogiri.jar +0 -0
  63. data/lib/nokogiri/version.rb +1 -1
  64. data/lib/nokogiri/xml/document.rb +7 -1
  65. data/lib/nokogiri/xml/dtd.rb +4 -4
  66. data/lib/nokogiri/xml/node.rb +6 -10
  67. data/lib/nokogiri/xml/node_set.rb +3 -3
  68. data/lib/nokogiri/xml/parse_options.rb +22 -0
  69. data/lib/serializer.jar +0 -0
  70. data/lib/xalan.jar +0 -0
  71. data/lib/xercesImpl.jar +0 -0
  72. data/lib/xml-apis.jar +0 -0
  73. data/tasks/test.rb +5 -0
  74. data/test/css/test_parser.rb +7 -1
  75. data/test/files/GH_1042.html +18 -0
  76. data/test/files/namespace_pressure_test.xml +1684 -0
  77. data/test/files/tlm.html +2 -1
  78. data/test/helper.rb +4 -0
  79. data/test/html/sax/test_parser.rb +2 -2
  80. data/test/html/test_document.rb +47 -11
  81. data/test/html/test_document_encoding.rb +55 -58
  82. data/test/html/test_document_fragment.rb +27 -23
  83. data/test/html/test_node.rb +16 -0
  84. data/test/html/test_node_encoding.rb +71 -13
  85. data/test/namespaces/test_namespaces_in_parsed_doc.rb +14 -0
  86. data/test/test_css_cache.rb +1 -1
  87. data/test/test_encoding_handler.rb +2 -0
  88. data/test/test_xslt_transforms.rb +38 -3
  89. data/test/xml/sax/test_parser.rb +54 -53
  90. data/test/xml/test_document.rb +7 -2
  91. data/test/xml/test_document_encoding.rb +19 -16
  92. data/test/xml/test_document_fragment.rb +12 -0
  93. data/test/xml/test_dtd_encoding.rb +0 -2
  94. data/test/xml/test_namespace.rb +2 -2
  95. data/test/xml/test_node.rb +15 -4
  96. data/test/xml/test_node_attributes.rb +6 -0
  97. data/test/xml/test_node_encoding.rb +49 -87
  98. data/test/xml/test_node_reparenting.rb +193 -18
  99. data/test/xml/test_node_set.rb +1 -1
  100. data/test/xml/test_reader.rb +589 -0
  101. data/test/xml/test_reader_encoding.rb +100 -102
  102. data/test/xml/test_unparented_node.rb +14 -1
  103. data/test/xslt/test_exception_handling.rb +1 -1
  104. data/test_all +47 -33
  105. metadata +38 -36
  106. data/CHANGELOG.ja.rdoc +0 -1057
  107. data/test/test_reader.rb +0 -558
@@ -61,6 +61,20 @@ module Nokogiri
61
61
  ns_attrs = n.to_xml.scan(/\bxmlns(?::.+?)?=/)
62
62
  assert_equal 3, ns_attrs.length
63
63
  end
64
+
65
+ def test_namespaces_under_memory_pressure_issue1155
66
+ skip("JRuby doesn't do GC.") if Nokogiri.jruby?
67
+
68
+ # this test is here to emit warnings when run under valgrind
69
+ # see https://github.com/sparklemotion/nokogiri/issues/1155 for background
70
+ filename = File.join ASSETS_DIR, 'namespace_pressure_test.xml'
71
+ doc = Nokogiri::XML File.open(filename)
72
+
73
+ # bizarrely, can't repro without the call to #to_a
74
+ doc.xpath('//namespace::*').to_a.each do |ns|
75
+ ns.inspect
76
+ end
77
+ end
64
78
  end
65
79
  end
66
80
  end
@@ -6,7 +6,7 @@ class TestCssCache < Nokogiri::TestCase
6
6
  super
7
7
  @css = "a1 > b2 > c3"
8
8
  @parse_result = Nokogiri::CSS.parse(@css)
9
- @to_xpath_result = @parse_result.map {|ast| ast.to_xpath}
9
+ @to_xpath_result = @parse_result.map(&:to_xpath)
10
10
  Nokogiri::CSS::Parser.class_eval do
11
11
  class << @cache
12
12
  alias :old_bracket :[]
@@ -5,6 +5,8 @@ require "helper"
5
5
  class TestEncodingHandler < Nokogiri::TestCase
6
6
  def teardown
7
7
  Nokogiri::EncodingHandler.clear_aliases!
8
+ #Replace default aliases removed by clear_aliases!
9
+ Nokogiri.install_default_aliases
8
10
  end
9
11
 
10
12
  def test_get
@@ -32,7 +32,40 @@ class TestXsltTransforms < Nokogiri::TestCase
32
32
  assert_match %r{<h1>Grandma</h1>}, result
33
33
 
34
34
  assert result = style.apply_to(@doc)
35
- assert_match %r{<h1></h1>}, result
35
+ assert_match %r{<h1></h1>|<h1/>}, result
36
+ end
37
+
38
+ def test_xml_declaration
39
+ input_xml = <<-EOS
40
+ <?xml version="1.0" encoding="utf-8"?>
41
+ <report>
42
+ <title>My Report</title>
43
+ </report>
44
+ EOS
45
+
46
+ input_xsl = <<-EOS
47
+ <?xml version="1.0" encoding="utf-8"?>
48
+ <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
49
+ <xsl:output method="xml" version="1.0" encoding="utf-8" indent="yes"/>
50
+ <xsl:template match="/">
51
+ <html>
52
+ <head>
53
+ <title><xsl:value-of select="report/title"/></title>
54
+ </head>
55
+ <body>
56
+ <h1><xsl:value-of select="report/title"/></h1>
57
+ </body>
58
+ </html>
59
+ </xsl:template>
60
+ </xsl:stylesheet>
61
+ EOS
62
+
63
+ require 'nokogiri'
64
+
65
+ xml = ::Nokogiri::XML(input_xml)
66
+ xsl = ::Nokogiri::XSLT(input_xsl)
67
+
68
+ assert_includes xsl.apply_to(xml), '<?xml version="1.0" encoding="utf-8"?>'
36
69
  end
37
70
 
38
71
  def test_transform_with_output_style
@@ -95,7 +128,9 @@ encoding="iso-8859-1" indent="yes"/>
95
128
  </xsl:stylesheet>
96
129
  eoxslt
97
130
  end
98
- assert_no_match(/<td>/, xslt.apply_to(@doc, ['title', 'foo']))
131
+ result = xslt.apply_to(@doc, ['title', 'foo'])
132
+ assert_no_match(/<td>/, result)
133
+ assert_match(/This is an adjacent/, result)
99
134
  end
100
135
 
101
136
  def test_transform_arg_error
@@ -268,7 +303,7 @@ encoding="iso-8859-1" indent="yes"/>
268
303
  <xsl:output encoding="UTF-8" indent="yes" method="xml" />
269
304
 
270
305
  <xsl:template match="/">
271
- <xsl:value-of select="/a" />
306
+ <a><xsl:value-of select="/a" /></a>
272
307
  </xsl:template>
273
308
  </xsl:stylesheet>
274
309
  EOXSL
@@ -44,13 +44,21 @@ module Nokogiri
44
44
  end
45
45
 
46
46
  def test_xml_decl
47
- {
48
- '' => nil,
49
- '<?xml version="1.0" ?>' => ['1.0'],
50
- '<?xml version="1.0" encoding="UTF-8" ?>' => ['1.0', 'UTF-8'],
51
- '<?xml version="1.0" standalone="yes"?>' => ['1.0', 'yes'],
52
- '<?xml version="1.0" standalone="no"?>' => ['1.0', 'no'],
53
- }.each do |decl,value|
47
+ [
48
+ ['', nil],
49
+ ['<?xml version="1.0" ?>',
50
+ ['1.0']],
51
+ ['<?xml version="1.0" encoding="UTF-8" ?>',
52
+ ['1.0', 'UTF-8']],
53
+ ['<?xml version="1.0" standalone="yes"?>',
54
+ ['1.0', 'yes']],
55
+ ['<?xml version="1.0" standalone="no"?>',
56
+ ['1.0', 'no']],
57
+ ['<?xml version="1.0" encoding="UTF-8" standalone="no"?>',
58
+ ['1.0', "UTF-8", 'no']],
59
+ ['<?xml version="1.0" encoding="ISO-8859-1" standalone="yes"?>',
60
+ ['1.0', "ISO-8859-1", 'yes']]
61
+ ].each do |decl, value|
54
62
  parser = XML::SAX::Parser.new(Doc.new)
55
63
 
56
64
  xml = "#{decl}\n<root />"
@@ -136,12 +144,8 @@ module Nokogiri
136
144
  </root>
137
145
  eoxml
138
146
  assert_equal 5, @parser.document.start_elements.length
139
- assert @parser.document.start_elements.map { |se|
140
- se.first
141
- }.include?('foo:bar')
142
- assert @parser.document.end_elements.map { |se|
143
- se.first
144
- }.include?('foo:bar')
147
+ assert @parser.document.start_elements.map(&:first).include?('foo:bar')
148
+ assert @parser.document.end_elements.map(&:first).include?('foo:bar')
145
149
  end
146
150
 
147
151
  def test_start_is_called_without_namespace
@@ -151,7 +155,7 @@ module Nokogiri
151
155
  </root>
152
156
  eoxml
153
157
  assert_equal ['root', 'foo:f', 'bar'],
154
- @parser.document.start_elements.map { |x| x.first }
158
+ @parser.document.start_elements.map(&:first)
155
159
  end
156
160
 
157
161
  def test_parser_sets_encoding
@@ -167,10 +171,8 @@ module Nokogiri
167
171
  assert @parser.document.errors
168
172
  assert @parser.document.errors.length > 0
169
173
 
170
- if RUBY_VERSION =~ /^1\.9/
171
- doc.errors.each do |error|
172
- assert_equal 'UTF-8', error.message.encoding.name
173
- end
174
+ doc.errors.each do |error|
175
+ assert_equal 'UTF-8', error.message.encoding.name
174
176
  end
175
177
 
176
178
  # when using JRuby Nokogiri, more errors will be generated as the DOM
@@ -207,42 +209,41 @@ module Nokogiri
207
209
  @parser.parse_io(f, encoding)
208
210
  }
209
211
  assert(@parser.document.cdata_blocks.length > 0)
210
- if RUBY_VERSION =~ /^1\.9/
211
- called = false
212
- @parser.document.start_elements.flatten.each do |thing|
213
- assert_equal 'UTF-8', thing.encoding.name
214
- called = true
215
- end
216
- assert called
217
-
218
- called = false
219
- @parser.document.end_elements.flatten.each do |thing|
220
- assert_equal 'UTF-8', thing.encoding.name
221
- called = true
222
- end
223
- assert called
224
-
225
- called = false
226
- @parser.document.data.each do |thing|
227
- assert_equal 'UTF-8', thing.encoding.name
228
- called = true
229
- end
230
- assert called
231
-
232
- called = false
233
- @parser.document.comments.flatten.each do |thing|
234
- assert_equal 'UTF-8', thing.encoding.name
235
- called = true
236
- end
237
- assert called
238
-
239
- called = false
240
- @parser.document.cdata_blocks.flatten.each do |thing|
241
- assert_equal 'UTF-8', thing.encoding.name
242
- called = true
243
- end
244
- assert called
212
+
213
+ called = false
214
+ @parser.document.start_elements.flatten.each do |thing|
215
+ assert_equal 'UTF-8', thing.encoding.name
216
+ called = true
217
+ end
218
+ assert called
219
+
220
+ called = false
221
+ @parser.document.end_elements.flatten.each do |thing|
222
+ assert_equal 'UTF-8', thing.encoding.name
223
+ called = true
224
+ end
225
+ assert called
226
+
227
+ called = false
228
+ @parser.document.data.each do |thing|
229
+ assert_equal 'UTF-8', thing.encoding.name
230
+ called = true
231
+ end
232
+ assert called
233
+
234
+ called = false
235
+ @parser.document.comments.flatten.each do |thing|
236
+ assert_equal 'UTF-8', thing.encoding.name
237
+ called = true
238
+ end
239
+ assert called
240
+
241
+ called = false
242
+ @parser.document.cdata_blocks.flatten.each do |thing|
243
+ assert_equal 'UTF-8', thing.encoding.name
244
+ called = true
245
245
  end
246
+ assert called
246
247
  end
247
248
 
248
249
  def test_parse_file
@@ -28,8 +28,13 @@ module Nokogiri
28
28
 
29
29
  # issue #1005
30
30
  def test_strict_parsing_empty_doc_should_raise_exception
31
- assert_raises(SyntaxError) do
32
- Nokogiri::XML(StringIO.new('')) { |c| c.strict }
31
+ ["", " "].each do |empty_string|
32
+ assert_raises(SyntaxError, "empty string '#{empty_string}' should raise a SyntaxError") do
33
+ Nokogiri::XML(empty_string) { |c| c.strict }
34
+ end
35
+ assert_raises(SyntaxError, "StringIO of '#{empty_string}' should raise a SyntaxError") do
36
+ Nokogiri::XML(StringIO.new(empty_string)) { |c| c.strict }
37
+ end
33
38
  end
34
39
  end
35
40
 
@@ -2,27 +2,30 @@ require "helper"
2
2
 
3
3
  module Nokogiri
4
4
  module XML
5
- if RUBY_VERSION =~ /^1\.9/
6
- class TestDocumentEncoding < Nokogiri::TestCase
7
- def setup
8
- super
9
- @xml = Nokogiri::XML(File.read(XML_FILE), XML_FILE, 'UTF-8')
10
- end
5
+ class TestDocumentEncoding < Nokogiri::TestCase
6
+ def setup
7
+ super
8
+ @xml = Nokogiri::XML(File.read(SHIFT_JIS_XML), SHIFT_JIS_XML)
9
+ end
11
10
 
12
- def test_url
13
- assert_equal @xml.encoding, @xml.url.encoding.name
14
- end
11
+ def test_url
12
+ assert_equal 'UTF-8', @xml.url.encoding.name
13
+ end
15
14
 
16
- def test_encoding
17
- assert_equal @xml.encoding, @xml.encoding.encoding.name
18
- end
15
+ def test_encoding
16
+ assert_equal 'UTF-8', @xml.encoding.encoding.name
17
+ end
19
18
 
20
- def test_dotted_version
21
- if Nokogiri.uses_libxml?
22
- assert_equal 'UTF-8', Nokogiri::LIBXML_VERSION.encoding.name
23
- end
19
+ def test_dotted_version
20
+ if Nokogiri.uses_libxml?
21
+ assert_equal 'UTF-8', Nokogiri::LIBXML_VERSION.encoding.name
24
22
  end
25
23
  end
24
+
25
+ def test_empty_doc_encoding
26
+ encoding = 'US-ASCII'
27
+ assert_equal encoding, Nokogiri::XML(nil, nil, encoding).encoding
28
+ end
26
29
  end
27
30
  end
28
31
  end
@@ -224,6 +224,18 @@ module Nokogiri
224
224
  Nokogiri::XML::Comment.new(frag,'moo')
225
225
  end
226
226
 
227
+ def test_issue_1077_parsing_of_frozen_strings
228
+ input = <<-EOS
229
+ <?xml version="1.0" encoding="utf-8"?>
230
+ <library>
231
+ <book title="I like turtles"/>
232
+ </library>
233
+ EOS
234
+ input.freeze
235
+
236
+ Nokogiri::XML::DocumentFragment.parse(input) # assert_nothing_raised
237
+ end
238
+
227
239
  if Nokogiri.uses_libxml?
228
240
  def test_for_libxml_in_context_fragment_parsing_bug_workaround
229
241
  10.times do
@@ -4,7 +4,6 @@ require "helper"
4
4
 
5
5
  module Nokogiri
6
6
  module XML
7
- if RUBY_VERSION =~ /^1\.9/
8
7
  class TestDTDEncoding < Nokogiri::TestCase
9
8
  def setup
10
9
  super
@@ -28,6 +27,5 @@ module Nokogiri
28
27
  end
29
28
  end
30
29
  end
31
- end
32
30
  end
33
31
  end
@@ -40,7 +40,7 @@ module Nokogiri
40
40
 
41
41
  def test_namespace_node_prefix
42
42
  namespaces = @xml.root.namespace_definitions
43
- assert_equal [nil, 'foo'], namespaces.map { |x| x.prefix }
43
+ assert_equal [nil, 'foo'], namespaces.map(&:prefix)
44
44
  end
45
45
 
46
46
  def test_namespace_node_href
@@ -48,7 +48,7 @@ module Nokogiri
48
48
  assert_equal [
49
49
  'http://tenderlovemaking.com/',
50
50
  'bar'
51
- ], namespaces.map { |x| x.href }
51
+ ], namespaces.map(&:href)
52
52
  end
53
53
 
54
54
  def test_equality
@@ -19,7 +19,7 @@ module Nokogiri
19
19
  def test_element_children
20
20
  nodes = @xml.root.element_children
21
21
  assert_equal @xml.root.first_element_child, nodes.first
22
- assert nodes.all? { |node| node.element? }, 'all nodes are elements'
22
+ assert nodes.all?(&:element?), 'all nodes are elements'
23
23
  end
24
24
 
25
25
  def test_last_element_child
@@ -622,7 +622,7 @@ module Nokogiri
622
622
  address = @xml.xpath('//address').first
623
623
  assert_equal 3, address.ancestors.length
624
624
  assert_equal ['employee', 'staff', 'document'],
625
- address.ancestors.map { |x| x.name }
625
+ address.ancestors.map(&:name)
626
626
  end
627
627
 
628
628
  def test_read_only?
@@ -714,7 +714,7 @@ b"></div>
714
714
  eoxml
715
715
  set = xml.css('a[@class~="bar"]')
716
716
  assert_equal 4, set.length
717
- assert_equal ['Bar'], set.map { |node| node.content }.uniq
717
+ assert_equal ['Bar'], set.map(&:content).uniq
718
718
  end
719
719
 
720
720
  def test_unlink
@@ -892,7 +892,7 @@ b"></div>
892
892
 
893
893
  def test_whitespace_nodes
894
894
  doc = Nokogiri::XML.parse("<root><b>Foo</b>\n<i>Bar</i> <p>Bazz</p></root>")
895
- children = doc.at('//root').children.collect{|j| j.to_s}
895
+ children = doc.at('//root').children.collect(&:to_s)
896
896
  assert_equal "\n", children[1]
897
897
  assert_equal " ", children[3]
898
898
  end
@@ -1228,6 +1228,17 @@ eoxml
1228
1228
  subject.lang = "fr"
1229
1229
  assert_equal "fr", subject.lang
1230
1230
  end
1231
+
1232
+ def test_text_node_robustness_gh1426
1233
+ # notably, the original bug report was about libxml-ruby interactions
1234
+ # this test should blow up under valgrind if we regress on libxml-ruby workarounds
1235
+ message = "<h2>BOOM!</h2>"
1236
+ 10_000.times do
1237
+ node = Nokogiri::HTML::DocumentFragment.parse(message)
1238
+ node.add_previous_sibling(Nokogiri::XML::Text.new('before', node.document))
1239
+ node.add_next_sibling(Nokogiri::XML::Text.new('after', node.document))
1240
+ end
1241
+ end
1231
1242
  end
1232
1243
  end
1233
1244
  end
@@ -26,6 +26,12 @@ module Nokogiri
26
26
  assert_equal nil, node['lang']
27
27
  end
28
28
 
29
+ def test_unknown_namespace_prefix_should_not_be_removed
30
+ doc = Nokogiri::XML ''
31
+ elem = doc.create_element 'foo', 'bar:attr' => 'something'
32
+ assert_equal elem.attribute_nodes.first.name, 'bar:attr'
33
+ end
34
+
29
35
  def test_set_prefixed_attributes
30
36
  doc = Nokogiri::XML %Q{<root xmlns:foo="x"/>}
31
37
 
@@ -1,105 +1,67 @@
1
+ # encoding: UTF-8
1
2
  require "helper"
2
3
 
3
4
  module Nokogiri
4
5
  module XML
5
- if RUBY_VERSION =~ /^1\.9/
6
- class TestNodeEncoding < Nokogiri::TestCase
7
- def setup
8
- super
9
- @html = Nokogiri::HTML(File.read(HTML_FILE), HTML_FILE)
10
- end
11
-
12
- def test_get_attribute
13
- node = @html.css('a').first
14
- assert_equal @html.encoding, node['href'].encoding.name
15
- end
16
-
17
- def test_text_encoding_is_utf_8
18
- @html = Nokogiri::HTML(File.open(NICH_FILE))
19
- assert_equal 'UTF-8', @html.text.encoding.name
20
- end
6
+ class TestNodeEncoding < Nokogiri::TestCase
7
+ def test_serialize_encoding_xml
8
+ @xml = Nokogiri::XML(File.open(SHIFT_JIS_XML))
9
+ assert_equal @xml.encoding.downcase,
10
+ @xml.serialize.encoding.name.downcase
21
11
 
22
- def test_serialize_encoding_html
23
- @html = Nokogiri::HTML(File.open(NICH_FILE))
24
- assert_equal @html.encoding.downcase,
25
- @html.serialize.encoding.name.downcase
12
+ @doc = Nokogiri::XML(@xml.serialize)
13
+ assert_equal @xml.serialize, @doc.serialize
14
+ end
26
15
 
27
- @doc = Nokogiri::HTML(@html.serialize)
28
- assert_equal @html.serialize, @doc.serialize
29
- end
16
+ def test_encoding_GH_1113
17
+ utf8 = '<frag>shahid ὡ 𐄣 𢂁</frag>'
18
+ hex = '<frag>shahid &#x1f61; &#x10123; &#x22081;</frag>'
19
+ decimal = '<frag>shahid &#8033; &#65827; &#139393;</frag>'
20
+ expected = Nokogiri.jruby? ? hex : decimal
30
21
 
31
- def test_serialize_encoding_xml
32
- @xml = Nokogiri::XML(File.open(SHIFT_JIS_XML))
33
- assert_equal @xml.encoding.downcase,
34
- @xml.serialize.encoding.name.downcase
22
+ frag = Nokogiri::XML(utf8, nil, 'UTF-8', Nokogiri::XML::ParseOptions::STRICT)
23
+ assert_equal utf8, frag.to_xml.sub(/^<.xml[^>]*>\n/m, '').strip
35
24
 
36
- @doc = Nokogiri::XML(@xml.serialize)
37
- assert_equal @xml.serialize, @doc.serialize
38
- end
25
+ frag = Nokogiri::XML(expected, nil, 'UTF-8', Nokogiri::XML::ParseOptions::STRICT)
26
+ assert_equal utf8, frag.to_xml.sub(/^<.xml[^>]*>\n/m, '').strip
39
27
 
40
- def test_encode_special_chars
41
- foo = @html.css('a').first.encode_special_chars('foo')
42
- assert_equal @html.encoding, foo.encoding.name
43
- end
28
+ frag = Nokogiri::XML(expected, nil, 'US-ASCII', Nokogiri::XML::ParseOptions::STRICT)
29
+ assert_equal expected, frag.to_xml.sub(/^<.xml[^>]*>\n/m, '').strip
30
+ end
44
31
 
45
- def test_content
46
- node = @html.css('a').first
47
- assert_equal @html.encoding, node.content.encoding.name
48
- end
32
+ VEHICLE_XML = <<-eoxml
33
+ <root>
34
+ <car xmlns:part="http://general-motors.com/">
35
+ <part:tire>Michelin Model XGV</part:tire>
36
+ </car>
37
+ <bicycle xmlns:part="http://schwinn.com/">
38
+ <part:tire>I'm a bicycle tire!</part:tire>
39
+ </bicycle>
40
+ </root>
41
+ eoxml
49
42
 
50
- def test_name
51
- node = @html.css('a').first
52
- assert_equal @html.encoding, node.name.encoding.name
53
- end
43
+ def test_namespace
44
+ doc = Nokogiri::XML(VEHICLE_XML.encode('Shift_JIS'), nil, 'Shift_JIS')
45
+ assert_equal 'Shift_JIS', doc.encoding
46
+ n = doc.xpath('//part:tire', { 'part' => 'http://schwinn.com/' }).first
47
+ assert n
48
+ assert_equal 'UTF-8', n.namespace.href.encoding.name
49
+ assert_equal 'UTF-8', n.namespace.prefix.encoding.name
50
+ end
54
51
 
55
- def test_path
56
- node = @html.css('a').first
57
- assert_equal @html.encoding, node.path.encoding.name
58
- end
52
+ def test_namespace_as_hash
53
+ doc = Nokogiri::XML(VEHICLE_XML.encode('Shift_JIS'), nil, 'Shift_JIS')
54
+ assert_equal 'Shift_JIS', doc.encoding
55
+ assert n = doc.xpath('//car').first
59
56
 
60
- def test_namespace
61
- xml = <<-eoxml
62
- <root>
63
- <car xmlns:part="http://general-motors.com/">
64
- <part:tire>Michelin Model XGV</part:tire>
65
- </car>
66
- <bicycle xmlns:part="http://schwinn.com/">
67
- <part:tire>I'm a bicycle tire!</part:tire>
68
- </bicycle>
69
- </root>
70
- eoxml
71
- doc = Nokogiri::XML(xml, nil, 'UTF-8')
72
- assert_equal 'UTF-8', doc.encoding
73
- n = doc.xpath('//part:tire', { 'part' => 'http://schwinn.com/' }).first
74
- assert n
75
- assert_equal doc.encoding, n.namespace.href.encoding.name
76
- assert_equal doc.encoding, n.namespace.prefix.encoding.name
57
+ n.namespace_definitions.each do |nd|
58
+ assert_equal 'UTF-8', nd.href.encoding.name
59
+ assert_equal 'UTF-8', nd.prefix.encoding.name
77
60
  end
78
61
 
79
- def test_namespace_as_hash
80
- xml = <<-eoxml
81
- <root>
82
- <car xmlns:part="http://general-motors.com/">
83
- <part:tire>Michelin Model XGV</part:tire>
84
- </car>
85
- <bicycle xmlns:part="http://schwinn.com/">
86
- <part:tire>I'm a bicycle tire!</part:tire>
87
- </bicycle>
88
- </root>
89
- eoxml
90
- doc = Nokogiri::XML(xml, nil, 'UTF-8')
91
- assert_equal 'UTF-8', doc.encoding
92
- assert n = doc.xpath('//car').first
93
-
94
- n.namespace_definitions.each do |nd|
95
- assert_equal doc.encoding, nd.href.encoding.name
96
- assert_equal doc.encoding, nd.prefix.encoding.name
97
- end
98
-
99
- n.namespaces.each do |k,v|
100
- assert_equal doc.encoding, k.encoding.name
101
- assert_equal doc.encoding, v.encoding.name
102
- end
62
+ n.namespaces.each do |k,v|
63
+ assert_equal 'UTF-8', k.encoding.name
64
+ assert_equal 'UTF-8', v.encoding.name
103
65
  end
104
66
  end
105
67
  end