RubyGems - nokogiri - Versions diffs - 1.6.7.2-java → 1.6.8-java - Mend

nokogiri 1.6.7.2-java → 1.6.8-java

Potentially problematic release.

This version of nokogiri might be problematic. Click here for more details.

Files changed (107) hide show

checksums.yaml +4 -4
data/.cross_rubies +2 -0
data/.travis.yml +19 -9
data/CHANGELOG.rdoc +73 -5
data/CONTRIBUTING.md +42 -0
data/Gemfile +10 -9
data/LICENSE.txt +1 -1
data/Manifest.txt +7 -2
data/README.md +23 -27
data/ROADMAP.md +11 -1
data/Rakefile +36 -17
data/bin/nokogiri +2 -2
data/dependencies.yml +29 -4
data/ext/java/nokogiri/HtmlElementDescription.java +5 -2
data/ext/java/nokogiri/NokogiriService.java +19 -0
data/ext/java/nokogiri/XmlAttr.java +3 -1
data/ext/java/nokogiri/XmlDocumentFragment.java +0 -14
data/ext/java/nokogiri/XmlNode.java +106 -63
data/ext/java/nokogiri/XmlXpathContext.java +12 -12
data/ext/java/nokogiri/XsltStylesheet.java +11 -4
data/ext/java/nokogiri/internals/HtmlDomParserContext.java +8 -1
data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +1 -2
data/ext/java/nokogiri/internals/NokogiriHelpers.java +7 -7
data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +1 -1
data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +0 -1
data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +3 -3
data/ext/java/nokogiri/internals/ParserContext.java +4 -0
data/ext/java/nokogiri/internals/SaveContextVisitor.java +18 -13
data/ext/nokogiri/extconf.rb +163 -79
data/ext/nokogiri/html_document.c +6 -6
data/ext/nokogiri/html_element_description.c +1 -1
data/ext/nokogiri/html_entity_lookup.c +1 -1
data/ext/nokogiri/html_sax_parser_context.c +4 -4
data/ext/nokogiri/html_sax_push_parser.c +2 -2
data/ext/nokogiri/nokogiri.c +0 -7
data/ext/nokogiri/nokogiri.h +1 -34
data/ext/nokogiri/xml_attr.c +2 -2
data/ext/nokogiri/xml_comment.c +1 -1
data/ext/nokogiri/xml_document.c +20 -22
data/ext/nokogiri/xml_encoding_handler.c +3 -3
data/ext/nokogiri/xml_entity_reference.c +1 -1
data/ext/nokogiri/xml_namespace.c +56 -17
data/ext/nokogiri/xml_node.c +73 -67
data/ext/nokogiri/xml_node_set.c +164 -146
data/ext/nokogiri/xml_node_set.h +3 -4
data/ext/nokogiri/xml_processing_instruction.c +2 -2
data/ext/nokogiri/xml_reader.c +5 -18
data/ext/nokogiri/xml_sax_parser.c +9 -12
data/ext/nokogiri/xml_sax_parser_context.c +1 -1
data/ext/nokogiri/xml_sax_push_parser.c +1 -1
data/ext/nokogiri/xml_schema.c +1 -1
data/ext/nokogiri/xml_syntax_error.c +0 -4
data/ext/nokogiri/xml_syntax_error.h +0 -1
data/ext/nokogiri/xml_text.c +1 -1
data/ext/nokogiri/xml_xpath_context.c +15 -24
data/ext/nokogiri/xslt_stylesheet.c +6 -6
data/lib/nekohtml.jar +0 -0
data/lib/nokogiri.rb +14 -7
data/lib/nokogiri/css/parser.rb +8 -2
data/lib/nokogiri/css/parser.y +7 -2
data/lib/nokogiri/html/document.rb +4 -2
data/lib/nokogiri/nokogiri.jar +0 -0
data/lib/nokogiri/version.rb +1 -1
data/lib/nokogiri/xml/document.rb +7 -1
data/lib/nokogiri/xml/dtd.rb +4 -4
data/lib/nokogiri/xml/node.rb +6 -10
data/lib/nokogiri/xml/node_set.rb +3 -3
data/lib/nokogiri/xml/parse_options.rb +22 -0
data/lib/serializer.jar +0 -0
data/lib/xalan.jar +0 -0
data/lib/xercesImpl.jar +0 -0
data/lib/xml-apis.jar +0 -0
data/tasks/test.rb +5 -0
data/test/css/test_parser.rb +7 -1
data/test/files/GH_1042.html +18 -0
data/test/files/namespace_pressure_test.xml +1684 -0
data/test/files/tlm.html +2 -1
data/test/helper.rb +4 -0
data/test/html/sax/test_parser.rb +2 -2
data/test/html/test_document.rb +47 -11
data/test/html/test_document_encoding.rb +55 -58
data/test/html/test_document_fragment.rb +27 -23
data/test/html/test_node.rb +16 -0
data/test/html/test_node_encoding.rb +71 -13
data/test/namespaces/test_namespaces_in_parsed_doc.rb +14 -0
data/test/test_css_cache.rb +1 -1
data/test/test_encoding_handler.rb +2 -0
data/test/test_xslt_transforms.rb +38 -3
data/test/xml/sax/test_parser.rb +54 -53
data/test/xml/test_document.rb +7 -2
data/test/xml/test_document_encoding.rb +19 -16
data/test/xml/test_document_fragment.rb +12 -0
data/test/xml/test_dtd_encoding.rb +0 -2
data/test/xml/test_namespace.rb +2 -2
data/test/xml/test_node.rb +15 -4
data/test/xml/test_node_attributes.rb +6 -0
data/test/xml/test_node_encoding.rb +49 -87
data/test/xml/test_node_reparenting.rb +193 -18
data/test/xml/test_node_set.rb +1 -1
data/test/xml/test_reader.rb +589 -0
data/test/xml/test_reader_encoding.rb +100 -102
data/test/xml/test_unparented_node.rb +14 -1
data/test/xslt/test_exception_handling.rb +1 -1
data/test_all +47 -33
metadata +38 -36
data/CHANGELOG.ja.rdoc +0 -1057
data/test/test_reader.rb +0 -558

@@ -46,7 +46,7 @@
 .codesnip-container  {border:1px solid #ccc; background:#eee; padding: 5px;margin:10px;}
 </style>
 <link rel="EditURI" type="application/rsd+xml" title="RSD" href="http://tenderlovemaking.com/xmlrpc.php?rsd" />
-<link rel="wlwmanifest" type="application/wlwmanifest+xml" href="http://tenderlovemaking.com/wp-includes/wlwmanifest.xml" />
+<link rel="wlwmanifest" type="application/wlwmanifest+xml" href="http://tenderlovemaking.com/wp-includes/wlwmanifest.xml" />
 <meta name="generator" content="WordPress 2.6" />
 	<link rel="stylesheet" type="text/css" href="http://tenderlovemaking.com/wp-content/plugins/spell_checker/spell_checker.css" />
@@ -826,6 +826,7 @@ page.<span class="me1">body</span> =~ /&lt;textarea<span class="br0">&#91;</span
 </ul>
 </div>
+<div id="abc.123" class='special.character'>Special character div</div>
 <div id="footer">
 A design by <a href="http://blog.geminigeek.com/wordpress-theme">GeminiGeek</a> &bull; Powered by <a href="http://wordpress.org">Wordpress</a><!--&bull; <a href="#">CSS</a> &bull; <a href="#">xHTML 1.0</a>-->
 </div>

data/test/helper.rb CHANGED

@@ -7,6 +7,10 @@ require 'tempfile'
 require 'pp'
 require 'nokogiri'
+if ENV['TEST_NOKOGIRI_WITH_LIBXML_RUBY']
+  require 'libxml'
+  warn "#{__FILE__}:#{__LINE__}: loaded libxml-ruby '#{LibXML::XML::VERSION}'"
+end
 warn "#{__FILE__}:#{__LINE__}: version info: #{Nokogiri::VERSION_INFO.inspect}"

data/test/html/sax/test_parser.rb CHANGED

@@ -29,9 +29,9 @@ module Nokogiri
           # Take a look at the comment in test_parse_document to know
           # a possible reason to this difference.
           if Nokogiri.uses_libxml?
-            assert_equal 1110, @parser.document.end_elements.length
+            assert_equal 1111, @parser.document.end_elements.length
           else
-            assert_equal 1119, @parser.document.end_elements.length
+            assert_equal 1120, @parser.document.end_elements.length
           end
         end

data/test/html/test_document.rb CHANGED

@@ -97,7 +97,7 @@ module Nokogiri
         rescue Exception => e
           skip("This test needs the internet. Skips if no internet available. (#{e})")
         end
-        doc = Nokogiri::HTML html ,"http:/foobar.foobar/"
+        doc = Nokogiri::HTML html ,"http:/foobar.foobar/", 'UTF-8'
         refute_empty doc.to_s, "Document should not be empty"
       end
@@ -422,7 +422,7 @@ eohtml
         eohtml
         set = html.css('p, a')
         assert_equal(2, set.length)
-        assert_equal ['a tag', 'p tag'].sort, set.map { |x| x.content }.sort
+        assert_equal ['a tag', 'p tag'].sort, set.map(&:content).sort
       end
       def test_inner_text
@@ -479,6 +479,15 @@ eohtml
         assert_equal 1, found.length
       end
+      def test_find_by_css_with_escaped_characters
+        found_without_escape = @html.css("div[@id='abc.123']")
+        found_by_id = @html.css('#abc\.123')
+        found_by_class = @html.css('.special\.character')
+        assert_equal 1, found_without_escape.length
+        assert_equal found_by_id, found_without_escape
+        assert_equal found_by_class, found_without_escape
+      end
       def test_find_with_function
         assert @html.css("div:awesome() h1", Class.new {
           def awesome divs
@@ -591,7 +600,7 @@ eohtml
         eohtml
         list = doc.css('.red')
         assert_equal 2, list.length
-        assert_equal %w{ RED RED }, list.map { |x| x.text }
+        assert_equal %w{ RED RED }, list.map(&:text)
       end
       def test_parse_can_take_io
@@ -628,19 +637,20 @@ eohtml
       end
       def test_capturing_nonparse_errors_during_node_copy_between_docs
-        skip("JRuby HTML parse errors are different than libxml2's") if Nokogiri.jruby?
-        doc1 = Nokogiri::HTML("<div id='unique'>one</div>")
-        doc2 = Nokogiri::HTML("<div id='unique'>two</div>")
+        # Errors should be emitted while parsing only, and should not change when moving nodes.
+        doc1 = Nokogiri::HTML("<html><body><diva id='unique'>one</diva></body></html>")
+        doc2 = Nokogiri::HTML("<html><body><dive id='unique'>two</dive></body></html>")
         node1 = doc1.at_css("#unique")
         node2 = doc2.at_css("#unique")
-        original_errors = doc1.errors.dup
+        original_errors1 = doc1.errors.dup
+        original_errors2 = doc2.errors.dup
+        assert original_errors1.any?{|e| e.to_s =~ /Tag diva invalid/ }, "it should complain about the tag name"
+        assert original_errors2.any?{|e| e.to_s =~ /Tag dive invalid/ }, "it should complain about the tag name"
         node1.add_child node2
-        assert_equal original_errors.length+1, doc1.errors.length
-        assert_match(/ID unique already defined/, doc1.errors.last.to_s)
+        assert_equal original_errors1, doc1.errors
+        assert_equal original_errors2, doc2.errors
       end
       def test_silencing_nonparse_errors_during_attribute_insertion_1262
@@ -660,6 +670,32 @@ eohtml
         Nokogiri::XML::Element.new("div", doc).set_attribute('id', 'unique-issue-1262')
         assert_equal 0, doc.errors.length
       end
+      it "skips encoding for script tags" do
+        html = Nokogiri::HTML <<-EOHTML
+        <html>
+          <head>
+            <script>var isGreater = 4 > 5;</script>
+          </head>
+          <body></body>
+        </html>
+        EOHTML
+        node = html.xpath("//script").first
+        assert_equal("var isGreater = 4 > 5;", node.inner_html)
+      end
+      it "skips encoding for style tags" do
+        html = Nokogiri::HTML <<-EOHTML
+        <html>
+          <head>
+            <style>tr > div { display:block; }</style>
+          </head>
+          <body></body>
+        </html>
+        EOHTML
+        node = html.xpath("//style").first
+        assert_equal("tr > div { display:block; }", node.inner_html)
+      end
     end
   end
 end

data/test/html/test_document_encoding.rb CHANGED

@@ -3,34 +3,33 @@ require "helper"
 module Nokogiri
   module HTML
-    if RUBY_VERSION =~ /^1\.9/
-      class TestDocumentEncoding < Nokogiri::TestCase
-        def test_encoding
-          doc = Nokogiri::HTML File.open(SHIFT_JIS_HTML, 'rb')
+    class TestDocumentEncoding < Nokogiri::TestCase
+      def test_encoding
+        doc = Nokogiri::HTML File.open(SHIFT_JIS_HTML, 'rb')
-          hello = "こんにちは"
+        hello = "こんにちは"
-          assert_match doc.encoding, doc.to_html
-          assert_match hello.encode('Shift_JIS'), doc.to_html
-          assert_equal 'Shift_JIS', doc.to_html.encoding.name
+        assert_match doc.encoding, doc.to_html
+        assert_match hello.encode('Shift_JIS'), doc.to_html
+        assert_equal 'Shift_JIS', doc.to_html.encoding.name
-          assert_match hello, doc.to_html(:encoding => 'UTF-8')
-          assert_match 'UTF-8', doc.to_html(:encoding => 'UTF-8')
-          assert_match 'UTF-8', doc.to_html(:encoding => 'UTF-8').encoding.name
-        end
+        assert_match hello, doc.to_html(:encoding => 'UTF-8')
+        assert_match 'UTF-8', doc.to_html(:encoding => 'UTF-8')
+        assert_match 'UTF-8', doc.to_html(:encoding => 'UTF-8').encoding.name
+      end
-        def test_encoding_without_charset
-          doc = Nokogiri::HTML File.open(SHIFT_JIS_NO_CHARSET, 'r:cp932:cp932').read
+      def test_encoding_without_charset
+        doc = Nokogiri::HTML File.open(SHIFT_JIS_NO_CHARSET, 'r:cp932:cp932').read
-          hello = "こんにちは"
+        hello = "こんにちは"
-          assert_match hello, doc.content
-          assert_match hello, doc.to_html(:encoding => 'UTF-8')
-          assert_match 'UTF-8', doc.to_html(:encoding => 'UTF-8').encoding.name
-        end
+        assert_match hello, doc.content
+        assert_match hello, doc.to_html(:encoding => 'UTF-8')
+        assert_match 'UTF-8', doc.to_html(:encoding => 'UTF-8').encoding.name
+      end
-        def test_default_to_encoding_from_string
-          bad_charset = <<-eohtml
+      def test_default_to_encoding_from_string
+        bad_charset = <<-eohtml
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"   "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 <html>
 <head>
@@ -40,33 +39,33 @@ module Nokogiri
   <a href="http://tenderlovemaking.com/">blah!</a>
 </body>
 </html>
-          eohtml
-          doc = Nokogiri::HTML(bad_charset)
-          assert_equal bad_charset.encoding.name, doc.encoding
+        eohtml
+        doc = Nokogiri::HTML(bad_charset)
+        assert_equal bad_charset.encoding.name, doc.encoding
-          doc = Nokogiri.parse(bad_charset)
-          assert_equal bad_charset.encoding.name, doc.encoding
-        end
+        doc = Nokogiri.parse(bad_charset)
+        assert_equal bad_charset.encoding.name, doc.encoding
+      end
-        def test_encoding_non_utf8
-          orig = '日本語が上手です'
-          bin = Encoding::ASCII_8BIT
-          [Encoding::Shift_JIS, Encoding::EUC_JP].each do |enc|
-            html = <<-eohtml.encode(enc)
+      def test_encoding_non_utf8
+        orig = '日本語が上手です'
+        bin = Encoding::ASCII_8BIT
+        [Encoding::Shift_JIS, Encoding::EUC_JP].each do |enc|
+          html = <<-eohtml.encode(enc)
 <html>
 <meta http-equiv="Content-Type" content="text/html; charset=#{enc.name}">
 <title xml:lang="ja">#{orig}</title></html>
-            eohtml
-            text = Nokogiri::HTML.parse(html).at('title').inner_text
-            assert_equal(
-              orig.encode(enc).force_encoding(bin),
-              text.encode(enc).force_encoding(bin)
-            )
-          end
+          eohtml
+          text = Nokogiri::HTML.parse(html).at('title').inner_text
+          assert_equal(
+            orig.encode(enc).force_encoding(bin),
+            text.encode(enc).force_encoding(bin)
+          )
         end
+      end
-        def test_encoding_with_a_bad_name
-          bad_charset = <<-eohtml
+      def test_encoding_with_a_bad_name
+        bad_charset = <<-eohtml
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"   "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 <html>
 <head>
@@ -76,23 +75,21 @@ module Nokogiri
   <a href="http://tenderlovemaking.com/">blah!</a>
 </body>
 </html>
-          eohtml
-          doc = Nokogiri::HTML(bad_charset, nil, 'askldjfhalsdfjhlkasdfjh')
-          assert_equal ['http://tenderlovemaking.com/'],
-            doc.css('a').map { |a| a['href'] }
-        end
+        eohtml
+        doc = Nokogiri::HTML(bad_charset, nil, 'askldjfhalsdfjhlkasdfjh')
+        assert_equal ['http://tenderlovemaking.com/'],
+          doc.css('a').map { |a| a['href'] }
+      end
+      def test_empty_doc_encoding
+        encoding = 'US-ASCII'
+        assert_equal encoding, Nokogiri::HTML.parse(nil, nil, encoding).encoding
       end
     end
     class TestDocumentEncodingDetection < Nokogiri::TestCase
-      if IO.respond_to?(:binread)
-        def binread(file)
-          IO.binread(file)
-        end
-      else
-        def binread(file)
-          IO.read(file)
-        end
+      def binread(file)
+        IO.binread(file)
       end
       def binopen(file)
@@ -115,16 +112,16 @@ module Nokogiri
       def test_document_xhtml_enc
         [ENCODING_XHTML_FILE, ENCODING_HTML_FILE].each { |file|
           doc_from_string_enc = Nokogiri::HTML(binread(file), nil, 'Shift_JIS')
-          ary_from_string_enc = doc_from_string_enc.xpath('//p/text()').map { |text| text.text }
+          ary_from_string_enc = doc_from_string_enc.xpath('//p/text()').map(&:text)
           doc_from_string = Nokogiri::HTML(binread(file))
-          ary_from_string = doc_from_string.xpath('//p/text()').map { |text| text.text }
+          ary_from_string = doc_from_string.xpath('//p/text()').map(&:text)
           doc_from_file_enc = Nokogiri::HTML(binopen(file), nil, 'Shift_JIS')
-          ary_from_file_enc = doc_from_file_enc.xpath('//p/text()').map { |text| text.text }
+          ary_from_file_enc = doc_from_file_enc.xpath('//p/text()').map(&:text)
           doc_from_file = Nokogiri::HTML(binopen(file))
-          ary_from_file = doc_from_file.xpath('//p/text()').map { |text| text.text }
+          ary_from_file = doc_from_file.xpath('//p/text()').map(&:text)
           title = 'たこ焼き仮面'

data/test/html/test_document_fragment.rb CHANGED

@@ -9,19 +9,22 @@ module Nokogiri
         @html = Nokogiri::HTML.parse(File.read(HTML_FILE), HTML_FILE)
       end
-      if RUBY_VERSION >= '1.9'
-        def test_inspect_encoding
-          fragment = "<div>こんにちは！</div>".encode('EUC-JP')
-          f = Nokogiri::HTML::DocumentFragment.parse fragment
-          assert_equal "こんにちは！", f.content
-        end
+      def test_inspect_encoding
+        fragment = "<div>こんにちは！</div>".encode('EUC-JP')
+        f = Nokogiri::HTML::DocumentFragment.parse fragment
+        assert_equal "こんにちは！", f.content
+      end
-        def test_html_parse_encoding
-          fragment = "<div>こんにちは！</div>".encode 'EUC-JP'
-          f = Nokogiri::HTML.fragment fragment
-          assert_equal 'EUC-JP', f.document.encoding
-          assert_equal "こんにちは！", f.content
-        end
+      def test_html_parse_encoding
+        fragment = "<div>こんにちは！</div>".encode 'EUC-JP'
+        f = Nokogiri::HTML.fragment fragment
+        assert_equal 'EUC-JP', f.document.encoding
+        assert_equal "こんにちは！", f.content
+      end
+      def test_unlink_empty_document
+        frag = Nokogiri::HTML::DocumentFragment.parse('').unlink # must_not_raise
+        assert_nil frag.parent
       end
       def test_colons_are_not_removed
@@ -232,7 +235,7 @@ module Nokogiri
       def test_element_children_counts
         doc = Nokogiri::HTML::DocumentFragment.parse("   <div>  </div>\n   ")
-        assert doc.element_children.count == 1
+        assert_equal 1, doc.element_children.count
       end
       def test_malformed_fragment_is_corrected
@@ -270,7 +273,7 @@ module Nokogiri
       def test_capturing_nonparse_errors_during_fragment_clone
         # see https://github.com/sparklemotion/nokogiri/issues/1196 for background
-        original = Nokogiri::HTML.fragment("<div id='unique'></div>")
+        original = Nokogiri::HTML.fragment("<div id='unique'></div><div id='unique'></div>")
         original_errors = original.errors.dup
         copy = original.dup
@@ -278,19 +281,20 @@ module Nokogiri
       end
       def test_capturing_nonparse_errors_during_node_copy_between_fragments
-        skip("JRuby HTML parse errors are different than libxml2's") if Nokogiri.jruby?
-        frag1 = Nokogiri::HTML.fragment("<div id='unique'>one</div>")
-        frag2 = Nokogiri::HTML.fragment("<div id='unique'>two</div>")
+        # Errors should be emitted while parsing only, and should not change when moving nodes.
+        frag1 = Nokogiri::HTML.fragment("<diva id='unique'>one</diva>")
+        frag2 = Nokogiri::HTML.fragment("<dive id='unique'>two</dive>")
         node1 = frag1.at_css("#unique")
         node2 = frag2.at_css("#unique")
+        original_errors1 = frag1.errors.dup
+        original_errors2 = frag2.errors.dup
+        assert original_errors1.any?{|e| e.to_s =~ /Tag diva invalid/ }, "it should complain about the tag name"
+        assert original_errors2.any?{|e| e.to_s =~ /Tag dive invalid/ }, "it should complain about the tag name"
-        original_errors = frag1.errors.dup
-        node1.add_child node2 # we should also not see an error on stderr
+        node1.add_child node2
-        assert_equal original_errors.length+1, frag1.errors.length
-        assert_match(/ID unique already defined/, frag1.errors.last.to_s)
+        assert_equal original_errors1, frag1.errors
+        assert_equal original_errors2, frag2.errors
       end
     end
   end

data/test/html/test_node.rb CHANGED

@@ -192,5 +192,21 @@ module Nokogiri
         end
       end
     end
+    def test_GH_1042
+      file = File.join(ASSETS_DIR, 'GH_1042.html');
+      html = Nokogiri::HTML(File.read(file))
+      table = html.xpath("//table")[1]
+      trs = table.xpath("tr").drop(1)
+      # the jruby inplementation of drop uses dup() on the IRubyObject (which
+      # is NOT the same dup() method on the ruby Object) which produces a
+      # shallow clone. a shallow of valid XMLNode triggers several
+      # NullPointerException on inspect() since loads of invariants
+      # are not set. the fix for GH1042 ensures a proper working clone.
+      assert_nothing_raised do
+        trs.inspect
+      end
+    end
   end
 end

data/test/html/test_node_encoding.rb CHANGED

@@ -3,23 +3,81 @@ require "helper"
 module Nokogiri
   module HTML
-    if RUBY_VERSION =~ /^1\.9/
-      class TestNodeEncoding < Nokogiri::TestCase
-        def test_inner_html
-          doc = Nokogiri::HTML File.open(SHIFT_JIS_HTML, 'rb')
+    class TestNodeEncoding < Nokogiri::TestCase
+      def setup
+        super
+        @html = Nokogiri::HTML(File.open(NICH_FILE, "rb"))
+      end
+      def test_get_attribute
+        node = @html.css('a').first
+        assert_equal 'UTF-8', node['href'].encoding.name
+      end
+      def test_text_encoding_is_utf_8
+        assert_equal 'UTF-8', @html.text.encoding.name
+      end
+      def test_serialize_encoding_html
+        assert_equal @html.encoding.downcase,
+          @html.serialize.encoding.name.downcase
+        @doc = Nokogiri::HTML(@html.serialize)
+        assert_equal @html.serialize, @doc.serialize
+      end
+      def test_encode_special_chars
+        foo = @html.css('a').first.encode_special_chars('foo')
+        assert_equal 'UTF-8', foo.encoding.name
+      end
+      def test_content
+        node = @html.css('a').first
+        assert_equal 'UTF-8', node.content.encoding.name
+      end
+      def test_name
+        node = @html.css('a').first
+        assert_equal 'UTF-8', node.name.encoding.name
+      end
+      def test_path
+        node = @html.css('a').first
+        assert_equal 'UTF-8', node.path.encoding.name
+      end
+      def test_inner_html
+        doc = Nokogiri::HTML File.open(SHIFT_JIS_HTML, 'rb')
-          hello = "こんにちは"
+        hello = "こんにちは"
-          contents = doc.at('h2').inner_html
-          assert_equal doc.encoding, contents.encoding.name
-          assert_match hello.encode('Shift_JIS'), contents
+        contents = doc.at('h2').inner_html
+        assert_equal doc.encoding, contents.encoding.name
+        assert_match hello.encode('Shift_JIS'), contents
-          contents = doc.at('h2').inner_html(:encoding => 'UTF-8')
-          assert_match hello, contents
+        contents = doc.at('h2').inner_html(:encoding => 'UTF-8')
+        assert_match hello, contents
+        doc.encoding = 'UTF-8'
+        contents = doc.at('h2').inner_html
+        assert_match hello, contents
+      end
+      def test_encoding_GH_1113
+        doc = Nokogiri::HTML::Document.new
+        hex = '<p>&#x1f340;</p>'
+        decimal = '<p>&#127808;</p>'
+        encoded = '<p>🍀</p>'
+        doc.encoding = 'UTF-8'
+        [hex, decimal, encoded].each do |document|
+          assert_equal encoded, doc.fragment(document).to_s
+        end
-          doc.encoding = 'UTF-8'
-          contents = doc.at('h2').inner_html
-          assert_match hello, contents
+        doc.encoding = 'US-ASCII'
+        expected = Nokogiri.jruby? ? hex : decimal
+        [hex, decimal].each do |document|
+          assert_equal expected, doc.fragment(document).to_s
         end
       end
     end