nokogiri 1.6.7.2-java → 1.6.8-java

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (107) hide show
  1. checksums.yaml +4 -4
  2. data/.cross_rubies +2 -0
  3. data/.travis.yml +19 -9
  4. data/CHANGELOG.rdoc +73 -5
  5. data/CONTRIBUTING.md +42 -0
  6. data/Gemfile +10 -9
  7. data/LICENSE.txt +1 -1
  8. data/Manifest.txt +7 -2
  9. data/README.md +23 -27
  10. data/ROADMAP.md +11 -1
  11. data/Rakefile +36 -17
  12. data/bin/nokogiri +2 -2
  13. data/dependencies.yml +29 -4
  14. data/ext/java/nokogiri/HtmlElementDescription.java +5 -2
  15. data/ext/java/nokogiri/NokogiriService.java +19 -0
  16. data/ext/java/nokogiri/XmlAttr.java +3 -1
  17. data/ext/java/nokogiri/XmlDocumentFragment.java +0 -14
  18. data/ext/java/nokogiri/XmlNode.java +106 -63
  19. data/ext/java/nokogiri/XmlXpathContext.java +12 -12
  20. data/ext/java/nokogiri/XsltStylesheet.java +11 -4
  21. data/ext/java/nokogiri/internals/HtmlDomParserContext.java +8 -1
  22. data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +1 -2
  23. data/ext/java/nokogiri/internals/NokogiriHelpers.java +7 -7
  24. data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +1 -1
  25. data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +0 -1
  26. data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +3 -3
  27. data/ext/java/nokogiri/internals/ParserContext.java +4 -0
  28. data/ext/java/nokogiri/internals/SaveContextVisitor.java +18 -13
  29. data/ext/nokogiri/extconf.rb +163 -79
  30. data/ext/nokogiri/html_document.c +6 -6
  31. data/ext/nokogiri/html_element_description.c +1 -1
  32. data/ext/nokogiri/html_entity_lookup.c +1 -1
  33. data/ext/nokogiri/html_sax_parser_context.c +4 -4
  34. data/ext/nokogiri/html_sax_push_parser.c +2 -2
  35. data/ext/nokogiri/nokogiri.c +0 -7
  36. data/ext/nokogiri/nokogiri.h +1 -34
  37. data/ext/nokogiri/xml_attr.c +2 -2
  38. data/ext/nokogiri/xml_comment.c +1 -1
  39. data/ext/nokogiri/xml_document.c +20 -22
  40. data/ext/nokogiri/xml_encoding_handler.c +3 -3
  41. data/ext/nokogiri/xml_entity_reference.c +1 -1
  42. data/ext/nokogiri/xml_namespace.c +56 -17
  43. data/ext/nokogiri/xml_node.c +73 -67
  44. data/ext/nokogiri/xml_node_set.c +164 -146
  45. data/ext/nokogiri/xml_node_set.h +3 -4
  46. data/ext/nokogiri/xml_processing_instruction.c +2 -2
  47. data/ext/nokogiri/xml_reader.c +5 -18
  48. data/ext/nokogiri/xml_sax_parser.c +9 -12
  49. data/ext/nokogiri/xml_sax_parser_context.c +1 -1
  50. data/ext/nokogiri/xml_sax_push_parser.c +1 -1
  51. data/ext/nokogiri/xml_schema.c +1 -1
  52. data/ext/nokogiri/xml_syntax_error.c +0 -4
  53. data/ext/nokogiri/xml_syntax_error.h +0 -1
  54. data/ext/nokogiri/xml_text.c +1 -1
  55. data/ext/nokogiri/xml_xpath_context.c +15 -24
  56. data/ext/nokogiri/xslt_stylesheet.c +6 -6
  57. data/lib/nekohtml.jar +0 -0
  58. data/lib/nokogiri.rb +14 -7
  59. data/lib/nokogiri/css/parser.rb +8 -2
  60. data/lib/nokogiri/css/parser.y +7 -2
  61. data/lib/nokogiri/html/document.rb +4 -2
  62. data/lib/nokogiri/nokogiri.jar +0 -0
  63. data/lib/nokogiri/version.rb +1 -1
  64. data/lib/nokogiri/xml/document.rb +7 -1
  65. data/lib/nokogiri/xml/dtd.rb +4 -4
  66. data/lib/nokogiri/xml/node.rb +6 -10
  67. data/lib/nokogiri/xml/node_set.rb +3 -3
  68. data/lib/nokogiri/xml/parse_options.rb +22 -0
  69. data/lib/serializer.jar +0 -0
  70. data/lib/xalan.jar +0 -0
  71. data/lib/xercesImpl.jar +0 -0
  72. data/lib/xml-apis.jar +0 -0
  73. data/tasks/test.rb +5 -0
  74. data/test/css/test_parser.rb +7 -1
  75. data/test/files/GH_1042.html +18 -0
  76. data/test/files/namespace_pressure_test.xml +1684 -0
  77. data/test/files/tlm.html +2 -1
  78. data/test/helper.rb +4 -0
  79. data/test/html/sax/test_parser.rb +2 -2
  80. data/test/html/test_document.rb +47 -11
  81. data/test/html/test_document_encoding.rb +55 -58
  82. data/test/html/test_document_fragment.rb +27 -23
  83. data/test/html/test_node.rb +16 -0
  84. data/test/html/test_node_encoding.rb +71 -13
  85. data/test/namespaces/test_namespaces_in_parsed_doc.rb +14 -0
  86. data/test/test_css_cache.rb +1 -1
  87. data/test/test_encoding_handler.rb +2 -0
  88. data/test/test_xslt_transforms.rb +38 -3
  89. data/test/xml/sax/test_parser.rb +54 -53
  90. data/test/xml/test_document.rb +7 -2
  91. data/test/xml/test_document_encoding.rb +19 -16
  92. data/test/xml/test_document_fragment.rb +12 -0
  93. data/test/xml/test_dtd_encoding.rb +0 -2
  94. data/test/xml/test_namespace.rb +2 -2
  95. data/test/xml/test_node.rb +15 -4
  96. data/test/xml/test_node_attributes.rb +6 -0
  97. data/test/xml/test_node_encoding.rb +49 -87
  98. data/test/xml/test_node_reparenting.rb +193 -18
  99. data/test/xml/test_node_set.rb +1 -1
  100. data/test/xml/test_reader.rb +589 -0
  101. data/test/xml/test_reader_encoding.rb +100 -102
  102. data/test/xml/test_unparented_node.rb +14 -1
  103. data/test/xslt/test_exception_handling.rb +1 -1
  104. data/test_all +47 -33
  105. metadata +38 -36
  106. data/CHANGELOG.ja.rdoc +0 -1057
  107. data/test/test_reader.rb +0 -558
@@ -46,7 +46,7 @@
46
46
  .codesnip-container {border:1px solid #ccc; background:#eee; padding: 5px;margin:10px;}
47
47
  </style>
48
48
  <link rel="EditURI" type="application/rsd+xml" title="RSD" href="http://tenderlovemaking.com/xmlrpc.php?rsd" />
49
- <link rel="wlwmanifest" type="application/wlwmanifest+xml" href="http://tenderlovemaking.com/wp-includes/wlwmanifest.xml" />
49
+ <link rel="wlwmanifest" type="application/wlwmanifest+xml" href="http://tenderlovemaking.com/wp-includes/wlwmanifest.xml" />
50
50
  <meta name="generator" content="WordPress 2.6" />
51
51
 
52
52
  <link rel="stylesheet" type="text/css" href="http://tenderlovemaking.com/wp-content/plugins/spell_checker/spell_checker.css" />
@@ -826,6 +826,7 @@ page.<span class="me1">body</span> =~ /&lt;textarea<span class="br0">&#91;</span
826
826
  </ul>
827
827
  </div>
828
828
 
829
+ <div id="abc.123" class='special.character'>Special character div</div>
829
830
  <div id="footer">
830
831
  A design by <a href="http://blog.geminigeek.com/wordpress-theme">GeminiGeek</a> &bull; Powered by <a href="http://wordpress.org">Wordpress</a><!--&bull; <a href="#">CSS</a> &bull; <a href="#">xHTML 1.0</a>-->
831
832
  </div>
@@ -7,6 +7,10 @@ require 'tempfile'
7
7
  require 'pp'
8
8
 
9
9
  require 'nokogiri'
10
+ if ENV['TEST_NOKOGIRI_WITH_LIBXML_RUBY']
11
+ require 'libxml'
12
+ warn "#{__FILE__}:#{__LINE__}: loaded libxml-ruby '#{LibXML::XML::VERSION}'"
13
+ end
10
14
 
11
15
  warn "#{__FILE__}:#{__LINE__}: version info: #{Nokogiri::VERSION_INFO.inspect}"
12
16
 
@@ -29,9 +29,9 @@ module Nokogiri
29
29
  # Take a look at the comment in test_parse_document to know
30
30
  # a possible reason to this difference.
31
31
  if Nokogiri.uses_libxml?
32
- assert_equal 1110, @parser.document.end_elements.length
32
+ assert_equal 1111, @parser.document.end_elements.length
33
33
  else
34
- assert_equal 1119, @parser.document.end_elements.length
34
+ assert_equal 1120, @parser.document.end_elements.length
35
35
  end
36
36
  end
37
37
 
@@ -97,7 +97,7 @@ module Nokogiri
97
97
  rescue Exception => e
98
98
  skip("This test needs the internet. Skips if no internet available. (#{e})")
99
99
  end
100
- doc = Nokogiri::HTML html ,"http:/foobar.foobar/"
100
+ doc = Nokogiri::HTML html ,"http:/foobar.foobar/", 'UTF-8'
101
101
  refute_empty doc.to_s, "Document should not be empty"
102
102
  end
103
103
 
@@ -422,7 +422,7 @@ eohtml
422
422
  eohtml
423
423
  set = html.css('p, a')
424
424
  assert_equal(2, set.length)
425
- assert_equal ['a tag', 'p tag'].sort, set.map { |x| x.content }.sort
425
+ assert_equal ['a tag', 'p tag'].sort, set.map(&:content).sort
426
426
  end
427
427
 
428
428
  def test_inner_text
@@ -479,6 +479,15 @@ eohtml
479
479
  assert_equal 1, found.length
480
480
  end
481
481
 
482
+ def test_find_by_css_with_escaped_characters
483
+ found_without_escape = @html.css("div[@id='abc.123']")
484
+ found_by_id = @html.css('#abc\.123')
485
+ found_by_class = @html.css('.special\.character')
486
+ assert_equal 1, found_without_escape.length
487
+ assert_equal found_by_id, found_without_escape
488
+ assert_equal found_by_class, found_without_escape
489
+ end
490
+
482
491
  def test_find_with_function
483
492
  assert @html.css("div:awesome() h1", Class.new {
484
493
  def awesome divs
@@ -591,7 +600,7 @@ eohtml
591
600
  eohtml
592
601
  list = doc.css('.red')
593
602
  assert_equal 2, list.length
594
- assert_equal %w{ RED RED }, list.map { |x| x.text }
603
+ assert_equal %w{ RED RED }, list.map(&:text)
595
604
  end
596
605
 
597
606
  def test_parse_can_take_io
@@ -628,19 +637,20 @@ eohtml
628
637
  end
629
638
 
630
639
  def test_capturing_nonparse_errors_during_node_copy_between_docs
631
- skip("JRuby HTML parse errors are different than libxml2's") if Nokogiri.jruby?
632
-
633
- doc1 = Nokogiri::HTML("<div id='unique'>one</div>")
634
- doc2 = Nokogiri::HTML("<div id='unique'>two</div>")
640
+ # Errors should be emitted while parsing only, and should not change when moving nodes.
641
+ doc1 = Nokogiri::HTML("<html><body><diva id='unique'>one</diva></body></html>")
642
+ doc2 = Nokogiri::HTML("<html><body><dive id='unique'>two</dive></body></html>")
635
643
  node1 = doc1.at_css("#unique")
636
644
  node2 = doc2.at_css("#unique")
637
-
638
- original_errors = doc1.errors.dup
645
+ original_errors1 = doc1.errors.dup
646
+ original_errors2 = doc2.errors.dup
647
+ assert original_errors1.any?{|e| e.to_s =~ /Tag diva invalid/ }, "it should complain about the tag name"
648
+ assert original_errors2.any?{|e| e.to_s =~ /Tag dive invalid/ }, "it should complain about the tag name"
639
649
 
640
650
  node1.add_child node2
641
651
 
642
- assert_equal original_errors.length+1, doc1.errors.length
643
- assert_match(/ID unique already defined/, doc1.errors.last.to_s)
652
+ assert_equal original_errors1, doc1.errors
653
+ assert_equal original_errors2, doc2.errors
644
654
  end
645
655
 
646
656
  def test_silencing_nonparse_errors_during_attribute_insertion_1262
@@ -660,6 +670,32 @@ eohtml
660
670
  Nokogiri::XML::Element.new("div", doc).set_attribute('id', 'unique-issue-1262')
661
671
  assert_equal 0, doc.errors.length
662
672
  end
673
+
674
+ it "skips encoding for script tags" do
675
+ html = Nokogiri::HTML <<-EOHTML
676
+ <html>
677
+ <head>
678
+ <script>var isGreater = 4 > 5;</script>
679
+ </head>
680
+ <body></body>
681
+ </html>
682
+ EOHTML
683
+ node = html.xpath("//script").first
684
+ assert_equal("var isGreater = 4 > 5;", node.inner_html)
685
+ end
686
+
687
+ it "skips encoding for style tags" do
688
+ html = Nokogiri::HTML <<-EOHTML
689
+ <html>
690
+ <head>
691
+ <style>tr > div { display:block; }</style>
692
+ </head>
693
+ <body></body>
694
+ </html>
695
+ EOHTML
696
+ node = html.xpath("//style").first
697
+ assert_equal("tr > div { display:block; }", node.inner_html)
698
+ end
663
699
  end
664
700
  end
665
701
  end
@@ -3,34 +3,33 @@ require "helper"
3
3
 
4
4
  module Nokogiri
5
5
  module HTML
6
- if RUBY_VERSION =~ /^1\.9/
7
- class TestDocumentEncoding < Nokogiri::TestCase
8
- def test_encoding
9
- doc = Nokogiri::HTML File.open(SHIFT_JIS_HTML, 'rb')
6
+ class TestDocumentEncoding < Nokogiri::TestCase
7
+ def test_encoding
8
+ doc = Nokogiri::HTML File.open(SHIFT_JIS_HTML, 'rb')
10
9
 
11
- hello = "こんにちは"
10
+ hello = "こんにちは"
12
11
 
13
- assert_match doc.encoding, doc.to_html
14
- assert_match hello.encode('Shift_JIS'), doc.to_html
15
- assert_equal 'Shift_JIS', doc.to_html.encoding.name
12
+ assert_match doc.encoding, doc.to_html
13
+ assert_match hello.encode('Shift_JIS'), doc.to_html
14
+ assert_equal 'Shift_JIS', doc.to_html.encoding.name
16
15
 
17
- assert_match hello, doc.to_html(:encoding => 'UTF-8')
18
- assert_match 'UTF-8', doc.to_html(:encoding => 'UTF-8')
19
- assert_match 'UTF-8', doc.to_html(:encoding => 'UTF-8').encoding.name
20
- end
16
+ assert_match hello, doc.to_html(:encoding => 'UTF-8')
17
+ assert_match 'UTF-8', doc.to_html(:encoding => 'UTF-8')
18
+ assert_match 'UTF-8', doc.to_html(:encoding => 'UTF-8').encoding.name
19
+ end
21
20
 
22
- def test_encoding_without_charset
23
- doc = Nokogiri::HTML File.open(SHIFT_JIS_NO_CHARSET, 'r:cp932:cp932').read
21
+ def test_encoding_without_charset
22
+ doc = Nokogiri::HTML File.open(SHIFT_JIS_NO_CHARSET, 'r:cp932:cp932').read
24
23
 
25
- hello = "こんにちは"
24
+ hello = "こんにちは"
26
25
 
27
- assert_match hello, doc.content
28
- assert_match hello, doc.to_html(:encoding => 'UTF-8')
29
- assert_match 'UTF-8', doc.to_html(:encoding => 'UTF-8').encoding.name
30
- end
26
+ assert_match hello, doc.content
27
+ assert_match hello, doc.to_html(:encoding => 'UTF-8')
28
+ assert_match 'UTF-8', doc.to_html(:encoding => 'UTF-8').encoding.name
29
+ end
31
30
 
32
- def test_default_to_encoding_from_string
33
- bad_charset = <<-eohtml
31
+ def test_default_to_encoding_from_string
32
+ bad_charset = <<-eohtml
34
33
  <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
35
34
  <html>
36
35
  <head>
@@ -40,33 +39,33 @@ module Nokogiri
40
39
  <a href="http://tenderlovemaking.com/">blah!</a>
41
40
  </body>
42
41
  </html>
43
- eohtml
44
- doc = Nokogiri::HTML(bad_charset)
45
- assert_equal bad_charset.encoding.name, doc.encoding
42
+ eohtml
43
+ doc = Nokogiri::HTML(bad_charset)
44
+ assert_equal bad_charset.encoding.name, doc.encoding
46
45
 
47
- doc = Nokogiri.parse(bad_charset)
48
- assert_equal bad_charset.encoding.name, doc.encoding
49
- end
46
+ doc = Nokogiri.parse(bad_charset)
47
+ assert_equal bad_charset.encoding.name, doc.encoding
48
+ end
50
49
 
51
- def test_encoding_non_utf8
52
- orig = '日本語が上手です'
53
- bin = Encoding::ASCII_8BIT
54
- [Encoding::Shift_JIS, Encoding::EUC_JP].each do |enc|
55
- html = <<-eohtml.encode(enc)
50
+ def test_encoding_non_utf8
51
+ orig = '日本語が上手です'
52
+ bin = Encoding::ASCII_8BIT
53
+ [Encoding::Shift_JIS, Encoding::EUC_JP].each do |enc|
54
+ html = <<-eohtml.encode(enc)
56
55
  <html>
57
56
  <meta http-equiv="Content-Type" content="text/html; charset=#{enc.name}">
58
57
  <title xml:lang="ja">#{orig}</title></html>
59
- eohtml
60
- text = Nokogiri::HTML.parse(html).at('title').inner_text
61
- assert_equal(
62
- orig.encode(enc).force_encoding(bin),
63
- text.encode(enc).force_encoding(bin)
64
- )
65
- end
58
+ eohtml
59
+ text = Nokogiri::HTML.parse(html).at('title').inner_text
60
+ assert_equal(
61
+ orig.encode(enc).force_encoding(bin),
62
+ text.encode(enc).force_encoding(bin)
63
+ )
66
64
  end
65
+ end
67
66
 
68
- def test_encoding_with_a_bad_name
69
- bad_charset = <<-eohtml
67
+ def test_encoding_with_a_bad_name
68
+ bad_charset = <<-eohtml
70
69
  <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
71
70
  <html>
72
71
  <head>
@@ -76,23 +75,21 @@ module Nokogiri
76
75
  <a href="http://tenderlovemaking.com/">blah!</a>
77
76
  </body>
78
77
  </html>
79
- eohtml
80
- doc = Nokogiri::HTML(bad_charset, nil, 'askldjfhalsdfjhlkasdfjh')
81
- assert_equal ['http://tenderlovemaking.com/'],
82
- doc.css('a').map { |a| a['href'] }
83
- end
78
+ eohtml
79
+ doc = Nokogiri::HTML(bad_charset, nil, 'askldjfhalsdfjhlkasdfjh')
80
+ assert_equal ['http://tenderlovemaking.com/'],
81
+ doc.css('a').map { |a| a['href'] }
82
+ end
83
+
84
+ def test_empty_doc_encoding
85
+ encoding = 'US-ASCII'
86
+ assert_equal encoding, Nokogiri::HTML.parse(nil, nil, encoding).encoding
84
87
  end
85
88
  end
86
89
 
87
90
  class TestDocumentEncodingDetection < Nokogiri::TestCase
88
- if IO.respond_to?(:binread)
89
- def binread(file)
90
- IO.binread(file)
91
- end
92
- else
93
- def binread(file)
94
- IO.read(file)
95
- end
91
+ def binread(file)
92
+ IO.binread(file)
96
93
  end
97
94
 
98
95
  def binopen(file)
@@ -115,16 +112,16 @@ module Nokogiri
115
112
  def test_document_xhtml_enc
116
113
  [ENCODING_XHTML_FILE, ENCODING_HTML_FILE].each { |file|
117
114
  doc_from_string_enc = Nokogiri::HTML(binread(file), nil, 'Shift_JIS')
118
- ary_from_string_enc = doc_from_string_enc.xpath('//p/text()').map { |text| text.text }
115
+ ary_from_string_enc = doc_from_string_enc.xpath('//p/text()').map(&:text)
119
116
 
120
117
  doc_from_string = Nokogiri::HTML(binread(file))
121
- ary_from_string = doc_from_string.xpath('//p/text()').map { |text| text.text }
118
+ ary_from_string = doc_from_string.xpath('//p/text()').map(&:text)
122
119
 
123
120
  doc_from_file_enc = Nokogiri::HTML(binopen(file), nil, 'Shift_JIS')
124
- ary_from_file_enc = doc_from_file_enc.xpath('//p/text()').map { |text| text.text }
121
+ ary_from_file_enc = doc_from_file_enc.xpath('//p/text()').map(&:text)
125
122
 
126
123
  doc_from_file = Nokogiri::HTML(binopen(file))
127
- ary_from_file = doc_from_file.xpath('//p/text()').map { |text| text.text }
124
+ ary_from_file = doc_from_file.xpath('//p/text()').map(&:text)
128
125
 
129
126
  title = 'たこ焼き仮面'
130
127
 
@@ -9,19 +9,22 @@ module Nokogiri
9
9
  @html = Nokogiri::HTML.parse(File.read(HTML_FILE), HTML_FILE)
10
10
  end
11
11
 
12
- if RUBY_VERSION >= '1.9'
13
- def test_inspect_encoding
14
- fragment = "<div>こんにちは!</div>".encode('EUC-JP')
15
- f = Nokogiri::HTML::DocumentFragment.parse fragment
16
- assert_equal "こんにちは!", f.content
17
- end
12
+ def test_inspect_encoding
13
+ fragment = "<div>こんにちは!</div>".encode('EUC-JP')
14
+ f = Nokogiri::HTML::DocumentFragment.parse fragment
15
+ assert_equal "こんにちは!", f.content
16
+ end
18
17
 
19
- def test_html_parse_encoding
20
- fragment = "<div>こんにちは!</div>".encode 'EUC-JP'
21
- f = Nokogiri::HTML.fragment fragment
22
- assert_equal 'EUC-JP', f.document.encoding
23
- assert_equal "こんにちは!", f.content
24
- end
18
+ def test_html_parse_encoding
19
+ fragment = "<div>こんにちは!</div>".encode 'EUC-JP'
20
+ f = Nokogiri::HTML.fragment fragment
21
+ assert_equal 'EUC-JP', f.document.encoding
22
+ assert_equal "こんにちは!", f.content
23
+ end
24
+
25
+ def test_unlink_empty_document
26
+ frag = Nokogiri::HTML::DocumentFragment.parse('').unlink # must_not_raise
27
+ assert_nil frag.parent
25
28
  end
26
29
 
27
30
  def test_colons_are_not_removed
@@ -232,7 +235,7 @@ module Nokogiri
232
235
 
233
236
  def test_element_children_counts
234
237
  doc = Nokogiri::HTML::DocumentFragment.parse(" <div> </div>\n ")
235
- assert doc.element_children.count == 1
238
+ assert_equal 1, doc.element_children.count
236
239
  end
237
240
 
238
241
  def test_malformed_fragment_is_corrected
@@ -270,7 +273,7 @@ module Nokogiri
270
273
 
271
274
  def test_capturing_nonparse_errors_during_fragment_clone
272
275
  # see https://github.com/sparklemotion/nokogiri/issues/1196 for background
273
- original = Nokogiri::HTML.fragment("<div id='unique'></div>")
276
+ original = Nokogiri::HTML.fragment("<div id='unique'></div><div id='unique'></div>")
274
277
  original_errors = original.errors.dup
275
278
 
276
279
  copy = original.dup
@@ -278,19 +281,20 @@ module Nokogiri
278
281
  end
279
282
 
280
283
  def test_capturing_nonparse_errors_during_node_copy_between_fragments
281
- skip("JRuby HTML parse errors are different than libxml2's") if Nokogiri.jruby?
282
-
283
- frag1 = Nokogiri::HTML.fragment("<div id='unique'>one</div>")
284
- frag2 = Nokogiri::HTML.fragment("<div id='unique'>two</div>")
284
+ # Errors should be emitted while parsing only, and should not change when moving nodes.
285
+ frag1 = Nokogiri::HTML.fragment("<diva id='unique'>one</diva>")
286
+ frag2 = Nokogiri::HTML.fragment("<dive id='unique'>two</dive>")
285
287
  node1 = frag1.at_css("#unique")
286
288
  node2 = frag2.at_css("#unique")
289
+ original_errors1 = frag1.errors.dup
290
+ original_errors2 = frag2.errors.dup
291
+ assert original_errors1.any?{|e| e.to_s =~ /Tag diva invalid/ }, "it should complain about the tag name"
292
+ assert original_errors2.any?{|e| e.to_s =~ /Tag dive invalid/ }, "it should complain about the tag name"
287
293
 
288
- original_errors = frag1.errors.dup
289
-
290
- node1.add_child node2 # we should also not see an error on stderr
294
+ node1.add_child node2
291
295
 
292
- assert_equal original_errors.length+1, frag1.errors.length
293
- assert_match(/ID unique already defined/, frag1.errors.last.to_s)
296
+ assert_equal original_errors1, frag1.errors
297
+ assert_equal original_errors2, frag2.errors
294
298
  end
295
299
  end
296
300
  end
@@ -192,5 +192,21 @@ module Nokogiri
192
192
  end
193
193
  end
194
194
  end
195
+
196
+ def test_GH_1042
197
+ file = File.join(ASSETS_DIR, 'GH_1042.html');
198
+ html = Nokogiri::HTML(File.read(file))
199
+ table = html.xpath("//table")[1]
200
+ trs = table.xpath("tr").drop(1)
201
+
202
+ # the jruby inplementation of drop uses dup() on the IRubyObject (which
203
+ # is NOT the same dup() method on the ruby Object) which produces a
204
+ # shallow clone. a shallow of valid XMLNode triggers several
205
+ # NullPointerException on inspect() since loads of invariants
206
+ # are not set. the fix for GH1042 ensures a proper working clone.
207
+ assert_nothing_raised do
208
+ trs.inspect
209
+ end
210
+ end
195
211
  end
196
212
  end
@@ -3,23 +3,81 @@ require "helper"
3
3
 
4
4
  module Nokogiri
5
5
  module HTML
6
- if RUBY_VERSION =~ /^1\.9/
7
- class TestNodeEncoding < Nokogiri::TestCase
8
- def test_inner_html
9
- doc = Nokogiri::HTML File.open(SHIFT_JIS_HTML, 'rb')
6
+ class TestNodeEncoding < Nokogiri::TestCase
7
+ def setup
8
+ super
9
+ @html = Nokogiri::HTML(File.open(NICH_FILE, "rb"))
10
+ end
11
+
12
+ def test_get_attribute
13
+ node = @html.css('a').first
14
+ assert_equal 'UTF-8', node['href'].encoding.name
15
+ end
16
+
17
+ def test_text_encoding_is_utf_8
18
+ assert_equal 'UTF-8', @html.text.encoding.name
19
+ end
20
+
21
+ def test_serialize_encoding_html
22
+ assert_equal @html.encoding.downcase,
23
+ @html.serialize.encoding.name.downcase
24
+
25
+ @doc = Nokogiri::HTML(@html.serialize)
26
+ assert_equal @html.serialize, @doc.serialize
27
+ end
28
+
29
+ def test_encode_special_chars
30
+ foo = @html.css('a').first.encode_special_chars('foo')
31
+ assert_equal 'UTF-8', foo.encoding.name
32
+ end
33
+
34
+ def test_content
35
+ node = @html.css('a').first
36
+ assert_equal 'UTF-8', node.content.encoding.name
37
+ end
38
+
39
+ def test_name
40
+ node = @html.css('a').first
41
+ assert_equal 'UTF-8', node.name.encoding.name
42
+ end
43
+
44
+ def test_path
45
+ node = @html.css('a').first
46
+ assert_equal 'UTF-8', node.path.encoding.name
47
+ end
48
+
49
+ def test_inner_html
50
+ doc = Nokogiri::HTML File.open(SHIFT_JIS_HTML, 'rb')
10
51
 
11
- hello = "こんにちは"
52
+ hello = "こんにちは"
12
53
 
13
- contents = doc.at('h2').inner_html
14
- assert_equal doc.encoding, contents.encoding.name
15
- assert_match hello.encode('Shift_JIS'), contents
54
+ contents = doc.at('h2').inner_html
55
+ assert_equal doc.encoding, contents.encoding.name
56
+ assert_match hello.encode('Shift_JIS'), contents
16
57
 
17
- contents = doc.at('h2').inner_html(:encoding => 'UTF-8')
18
- assert_match hello, contents
58
+ contents = doc.at('h2').inner_html(:encoding => 'UTF-8')
59
+ assert_match hello, contents
60
+
61
+ doc.encoding = 'UTF-8'
62
+ contents = doc.at('h2').inner_html
63
+ assert_match hello, contents
64
+ end
65
+
66
+ def test_encoding_GH_1113
67
+ doc = Nokogiri::HTML::Document.new
68
+ hex = '<p>&#x1f340;</p>'
69
+ decimal = '<p>&#127808;</p>'
70
+ encoded = '<p>🍀</p>'
71
+
72
+ doc.encoding = 'UTF-8'
73
+ [hex, decimal, encoded].each do |document|
74
+ assert_equal encoded, doc.fragment(document).to_s
75
+ end
19
76
 
20
- doc.encoding = 'UTF-8'
21
- contents = doc.at('h2').inner_html
22
- assert_match hello, contents
77
+ doc.encoding = 'US-ASCII'
78
+ expected = Nokogiri.jruby? ? hex : decimal
79
+ [hex, decimal].each do |document|
80
+ assert_equal expected, doc.fragment(document).to_s
23
81
  end
24
82
  end
25
83
  end