RubyGems - nokogiri - Versions diffs - 1.6.7.2-x64-mingw32 → 1.6.8.rc1-x64-mingw32 - Mend

nokogiri 1.6.7.2-x64-mingw32 → 1.6.8.rc1-x64-mingw32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of nokogiri might be problematic. Click here for more details.

Files changed (54) hide show

checksums.yaml +4 -4
data/.travis.yml +12 -9
data/CHANGELOG.ja.rdoc +18 -0
data/CHANGELOG.rdoc +12 -7
data/CONTRIBUTING.md +42 -0
data/Gemfile +1 -1
data/Manifest.txt +6 -0
data/README.md +1 -1
data/Rakefile +1 -1
data/bin/nokogiri +2 -2
data/dependencies.yml +1 -1
data/ext/nokogiri/extconf.rb +3 -3
data/ext/nokogiri/nokogiri.c +0 -7
data/ext/nokogiri/nokogiri.h +1 -34
data/ext/nokogiri/xml_document.c +2 -4
data/ext/nokogiri/xml_namespace.c +56 -17
data/ext/nokogiri/xml_node.c +12 -36
data/ext/nokogiri/xml_node_set.c +169 -143
data/ext/nokogiri/xml_node_set.h +3 -4
data/ext/nokogiri/xml_sax_parser.c +2 -5
data/ext/nokogiri/xml_syntax_error.c +0 -4
data/ext/nokogiri/xml_syntax_error.h +0 -1
data/ext/nokogiri/xml_xpath_context.c +9 -18
data/lib/nokogiri.rb +3 -0
data/lib/nokogiri/2.0/nokogiri.so +0 -0
data/lib/nokogiri/2.1/nokogiri.so +0 -0
data/lib/nokogiri/2.2/nokogiri.so +0 -0
data/lib/nokogiri/css/parser.rb +8 -2
data/lib/nokogiri/css/parser.y +7 -2
data/lib/nokogiri/version.rb +1 -1
data/lib/nokogiri/xml/document.rb +7 -1
data/lib/nokogiri/xml/dtd.rb +4 -4
data/lib/nokogiri/xml/node.rb +2 -2
data/test/css/test_parser.rb +7 -1
data/test/files/GH_1042.html +18 -0
data/test/files/namespace_pressure_test.xml +1684 -0
data/test/files/tlm.html +2 -1
data/test/html/sax/test_parser.rb +2 -2
data/test/html/test_document.rb +18 -8
data/test/html/test_document_encoding.rb +46 -54
data/test/html/test_document_fragment.rb +21 -22
data/test/html/test_node.rb +16 -0
data/test/html/test_node_encoding.rb +12 -14
data/test/namespaces/test_namespaces_in_parsed_doc.rb +14 -0
data/test/test_reader.rb +19 -0
data/test/test_xslt_transforms.rb +5 -3
data/test/xml/sax/test_parser.rb +36 -39
data/test/xml/test_document.rb +7 -2
data/test/xml/test_document_encoding.rb +14 -16
data/test/xml/test_dtd_encoding.rb +0 -2
data/test/xml/test_node_encoding.rb +78 -80
data/test/xml/test_reader_encoding.rb +100 -102
data/test/xslt/test_exception_handling.rb +1 -1
metadata +11 -7

data/test/files/tlm.html CHANGED Viewed

@@ -46,7 +46,7 @@
 .codesnip-container  {border:1px solid #ccc; background:#eee; padding: 5px;margin:10px;}
 </style>
 <link rel="EditURI" type="application/rsd+xml" title="RSD" href="http://tenderlovemaking.com/xmlrpc.php?rsd" />
-<link rel="wlwmanifest" type="application/wlwmanifest+xml" href="http://tenderlovemaking.com/wp-includes/wlwmanifest.xml" />
+<link rel="wlwmanifest" type="application/wlwmanifest+xml" href="http://tenderlovemaking.com/wp-includes/wlwmanifest.xml" />
 <meta name="generator" content="WordPress 2.6" />
 	<link rel="stylesheet" type="text/css" href="http://tenderlovemaking.com/wp-content/plugins/spell_checker/spell_checker.css" />
@@ -826,6 +826,7 @@ page.<span class="me1">body</span> =~ /&lt;textarea<span class="br0">&#91;</span
 </ul>
 </div>
+<div id="abc.123" class='special.character'>Special character div</div>
 <div id="footer">
 A design by <a href="http://blog.geminigeek.com/wordpress-theme">GeminiGeek</a> &bull; Powered by <a href="http://wordpress.org">Wordpress</a><!--&bull; <a href="#">CSS</a> &bull; <a href="#">xHTML 1.0</a>-->
 </div>

data/test/html/sax/test_parser.rb CHANGED Viewed

@@ -29,9 +29,9 @@ module Nokogiri
           # Take a look at the comment in test_parse_document to know
           # a possible reason to this difference.
           if Nokogiri.uses_libxml?
-            assert_equal 1110, @parser.document.end_elements.length
+            assert_equal 1111, @parser.document.end_elements.length
           else
-            assert_equal 1119, @parser.document.end_elements.length
+            assert_equal 1120, @parser.document.end_elements.length
           end
         end

data/test/html/test_document.rb CHANGED Viewed

@@ -479,6 +479,15 @@ eohtml
         assert_equal 1, found.length
       end
+      def test_find_by_css_with_escaped_characters
+        found_without_escape = @html.css("div[@id='abc.123']")
+        found_by_id = @html.css('#abc\.123')
+        found_by_class = @html.css('.special\.character')
+        assert_equal 1, found_without_escape.length
+        assert_equal found_by_id, found_without_escape
+        assert_equal found_by_class, found_without_escape
+      end
       def test_find_with_function
         assert @html.css("div:awesome() h1", Class.new {
           def awesome divs
@@ -628,19 +637,20 @@ eohtml
       end
       def test_capturing_nonparse_errors_during_node_copy_between_docs
-        skip("JRuby HTML parse errors are different than libxml2's") if Nokogiri.jruby?
-        doc1 = Nokogiri::HTML("<div id='unique'>one</div>")
-        doc2 = Nokogiri::HTML("<div id='unique'>two</div>")
+        # Errors should be emitted while parsing only, and should not change when moving nodes.
+        doc1 = Nokogiri::HTML("<html><body><diva id='unique'>one</diva></body></html>")
+        doc2 = Nokogiri::HTML("<html><body><dive id='unique'>two</dive></body></html>")
         node1 = doc1.at_css("#unique")
         node2 = doc2.at_css("#unique")
-        original_errors = doc1.errors.dup
+        original_errors1 = doc1.errors.dup
+        original_errors2 = doc2.errors.dup
+        assert original_errors1.any?{|e| e.to_s =~ /Tag diva invalid/ }, "it should complain about the tag name"
+        assert original_errors2.any?{|e| e.to_s =~ /Tag dive invalid/ }, "it should complain about the tag name"
         node1.add_child node2
-        assert_equal original_errors.length+1, doc1.errors.length
-        assert_match(/ID unique already defined/, doc1.errors.last.to_s)
+        assert_equal original_errors1, doc1.errors
+        assert_equal original_errors2, doc2.errors
       end
       def test_silencing_nonparse_errors_during_attribute_insertion_1262

data/test/html/test_document_encoding.rb CHANGED Viewed

@@ -3,34 +3,33 @@ require "helper"
 module Nokogiri
   module HTML
-    if RUBY_VERSION =~ /^1\.9/
-      class TestDocumentEncoding < Nokogiri::TestCase
-        def test_encoding
-          doc = Nokogiri::HTML File.open(SHIFT_JIS_HTML, 'rb')
+    class TestDocumentEncoding < Nokogiri::TestCase
+      def test_encoding
+        doc = Nokogiri::HTML File.open(SHIFT_JIS_HTML, 'rb')
-          hello = "こんにちは"
+        hello = "こんにちは"
-          assert_match doc.encoding, doc.to_html
-          assert_match hello.encode('Shift_JIS'), doc.to_html
-          assert_equal 'Shift_JIS', doc.to_html.encoding.name
+        assert_match doc.encoding, doc.to_html
+        assert_match hello.encode('Shift_JIS'), doc.to_html
+        assert_equal 'Shift_JIS', doc.to_html.encoding.name
-          assert_match hello, doc.to_html(:encoding => 'UTF-8')
-          assert_match 'UTF-8', doc.to_html(:encoding => 'UTF-8')
-          assert_match 'UTF-8', doc.to_html(:encoding => 'UTF-8').encoding.name
-        end
+        assert_match hello, doc.to_html(:encoding => 'UTF-8')
+        assert_match 'UTF-8', doc.to_html(:encoding => 'UTF-8')
+        assert_match 'UTF-8', doc.to_html(:encoding => 'UTF-8').encoding.name
+      end
-        def test_encoding_without_charset
-          doc = Nokogiri::HTML File.open(SHIFT_JIS_NO_CHARSET, 'r:cp932:cp932').read
+      def test_encoding_without_charset
+        doc = Nokogiri::HTML File.open(SHIFT_JIS_NO_CHARSET, 'r:cp932:cp932').read
-          hello = "こんにちは"
+        hello = "こんにちは"
-          assert_match hello, doc.content
-          assert_match hello, doc.to_html(:encoding => 'UTF-8')
-          assert_match 'UTF-8', doc.to_html(:encoding => 'UTF-8').encoding.name
-        end
+        assert_match hello, doc.content
+        assert_match hello, doc.to_html(:encoding => 'UTF-8')
+        assert_match 'UTF-8', doc.to_html(:encoding => 'UTF-8').encoding.name
+      end
-        def test_default_to_encoding_from_string
-          bad_charset = <<-eohtml
+      def test_default_to_encoding_from_string
+        bad_charset = <<-eohtml
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"   "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 <html>
 <head>
@@ -40,33 +39,33 @@ module Nokogiri
   <a href="http://tenderlovemaking.com/">blah!</a>
 </body>
 </html>
-          eohtml
-          doc = Nokogiri::HTML(bad_charset)
-          assert_equal bad_charset.encoding.name, doc.encoding
+        eohtml
+        doc = Nokogiri::HTML(bad_charset)
+        assert_equal bad_charset.encoding.name, doc.encoding
-          doc = Nokogiri.parse(bad_charset)
-          assert_equal bad_charset.encoding.name, doc.encoding
-        end
+        doc = Nokogiri.parse(bad_charset)
+        assert_equal bad_charset.encoding.name, doc.encoding
+      end
-        def test_encoding_non_utf8
-          orig = '日本語が上手です'
-          bin = Encoding::ASCII_8BIT
-          [Encoding::Shift_JIS, Encoding::EUC_JP].each do |enc|
-            html = <<-eohtml.encode(enc)
+      def test_encoding_non_utf8
+        orig = '日本語が上手です'
+        bin = Encoding::ASCII_8BIT
+        [Encoding::Shift_JIS, Encoding::EUC_JP].each do |enc|
+          html = <<-eohtml.encode(enc)
 <html>
 <meta http-equiv="Content-Type" content="text/html; charset=#{enc.name}">
 <title xml:lang="ja">#{orig}</title></html>
-            eohtml
-            text = Nokogiri::HTML.parse(html).at('title').inner_text
-            assert_equal(
-              orig.encode(enc).force_encoding(bin),
-              text.encode(enc).force_encoding(bin)
-            )
-          end
+          eohtml
+          text = Nokogiri::HTML.parse(html).at('title').inner_text
+          assert_equal(
+            orig.encode(enc).force_encoding(bin),
+            text.encode(enc).force_encoding(bin)
+          )
         end
+      end
-        def test_encoding_with_a_bad_name
-          bad_charset = <<-eohtml
+      def test_encoding_with_a_bad_name
+        bad_charset = <<-eohtml
 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"   "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 <html>
 <head>
@@ -76,23 +75,16 @@ module Nokogiri
   <a href="http://tenderlovemaking.com/">blah!</a>
 </body>
 </html>
-          eohtml
-          doc = Nokogiri::HTML(bad_charset, nil, 'askldjfhalsdfjhlkasdfjh')
-          assert_equal ['http://tenderlovemaking.com/'],
-            doc.css('a').map { |a| a['href'] }
-        end
+        eohtml
+        doc = Nokogiri::HTML(bad_charset, nil, 'askldjfhalsdfjhlkasdfjh')
+        assert_equal ['http://tenderlovemaking.com/'],
+          doc.css('a').map { |a| a['href'] }
       end
     end
     class TestDocumentEncodingDetection < Nokogiri::TestCase
-      if IO.respond_to?(:binread)
-        def binread(file)
-          IO.binread(file)
-        end
-      else
-        def binread(file)
-          IO.read(file)
-        end
+      def binread(file)
+        IO.binread(file)
       end
       def binopen(file)

data/test/html/test_document_fragment.rb CHANGED Viewed

@@ -9,19 +9,17 @@ module Nokogiri
         @html = Nokogiri::HTML.parse(File.read(HTML_FILE), HTML_FILE)
       end
-      if RUBY_VERSION >= '1.9'
-        def test_inspect_encoding
-          fragment = "<div>こんにちは！</div>".encode('EUC-JP')
-          f = Nokogiri::HTML::DocumentFragment.parse fragment
-          assert_equal "こんにちは！", f.content
-        end
+      def test_inspect_encoding
+        fragment = "<div>こんにちは！</div>".encode('EUC-JP')
+        f = Nokogiri::HTML::DocumentFragment.parse fragment
+        assert_equal "こんにちは！", f.content
+      end
-        def test_html_parse_encoding
-          fragment = "<div>こんにちは！</div>".encode 'EUC-JP'
-          f = Nokogiri::HTML.fragment fragment
-          assert_equal 'EUC-JP', f.document.encoding
-          assert_equal "こんにちは！", f.content
-        end
+      def test_html_parse_encoding
+        fragment = "<div>こんにちは！</div>".encode 'EUC-JP'
+        f = Nokogiri::HTML.fragment fragment
+        assert_equal 'EUC-JP', f.document.encoding
+        assert_equal "こんにちは！", f.content
       end
       def test_colons_are_not_removed
@@ -270,7 +268,7 @@ module Nokogiri
       def test_capturing_nonparse_errors_during_fragment_clone
         # see https://github.com/sparklemotion/nokogiri/issues/1196 for background
-        original = Nokogiri::HTML.fragment("<div id='unique'></div>")
+        original = Nokogiri::HTML.fragment("<div id='unique'></div><div id='unique'></div>")
         original_errors = original.errors.dup
         copy = original.dup
@@ -278,19 +276,20 @@ module Nokogiri
       end
       def test_capturing_nonparse_errors_during_node_copy_between_fragments
-        skip("JRuby HTML parse errors are different than libxml2's") if Nokogiri.jruby?
-        frag1 = Nokogiri::HTML.fragment("<div id='unique'>one</div>")
-        frag2 = Nokogiri::HTML.fragment("<div id='unique'>two</div>")
+        # Errors should be emitted while parsing only, and should not change when moving nodes.
+        frag1 = Nokogiri::HTML.fragment("<diva id='unique'>one</diva>")
+        frag2 = Nokogiri::HTML.fragment("<dive id='unique'>two</dive>")
         node1 = frag1.at_css("#unique")
         node2 = frag2.at_css("#unique")
+        original_errors1 = frag1.errors.dup
+        original_errors2 = frag2.errors.dup
+        assert original_errors1.any?{|e| e.to_s =~ /Tag diva invalid/ }, "it should complain about the tag name"
+        assert original_errors2.any?{|e| e.to_s =~ /Tag dive invalid/ }, "it should complain about the tag name"
-        original_errors = frag1.errors.dup
-        node1.add_child node2 # we should also not see an error on stderr
+        node1.add_child node2
-        assert_equal original_errors.length+1, frag1.errors.length
-        assert_match(/ID unique already defined/, frag1.errors.last.to_s)
+        assert_equal original_errors1, frag1.errors
+        assert_equal original_errors2, frag2.errors
       end
     end
   end

data/test/html/test_node.rb CHANGED Viewed

@@ -192,5 +192,21 @@ module Nokogiri
         end
       end
     end
+    def test_GH_1042
+      file = File.join(ASSETS_DIR, 'GH_1042.html');
+      html = Nokogiri::HTML(File.read(file))
+      table = html.xpath("//table")[1]
+      trs = table.xpath("tr").drop(1)
+      # the jruby inplementation of drop uses dup() on the IRubyObject (which
+      # is NOT the same dup() method on the ruby Object) which produces a
+      # shallow clone. a shallow of valid XMLNode triggers several
+      # NullPointerException on inspect() since loads of invariants
+      # are not set. the fix for GH1042 ensures a proper working clone.
+      assert_nothing_raised do
+        trs.inspect
+      end
+    end
   end
 end

data/test/html/test_node_encoding.rb CHANGED Viewed

@@ -3,24 +3,22 @@ require "helper"
 module Nokogiri
   module HTML
-    if RUBY_VERSION =~ /^1\.9/
-      class TestNodeEncoding < Nokogiri::TestCase
-        def test_inner_html
-          doc = Nokogiri::HTML File.open(SHIFT_JIS_HTML, 'rb')
+    class TestNodeEncoding < Nokogiri::TestCase
+      def test_inner_html
+        doc = Nokogiri::HTML File.open(SHIFT_JIS_HTML, 'rb')
-          hello = "こんにちは"
+        hello = "こんにちは"
-          contents = doc.at('h2').inner_html
-          assert_equal doc.encoding, contents.encoding.name
-          assert_match hello.encode('Shift_JIS'), contents
+        contents = doc.at('h2').inner_html
+        assert_equal doc.encoding, contents.encoding.name
+        assert_match hello.encode('Shift_JIS'), contents
-          contents = doc.at('h2').inner_html(:encoding => 'UTF-8')
-          assert_match hello, contents
+        contents = doc.at('h2').inner_html(:encoding => 'UTF-8')
+        assert_match hello, contents
-          doc.encoding = 'UTF-8'
-          contents = doc.at('h2').inner_html
-          assert_match hello, contents
-        end
+        doc.encoding = 'UTF-8'
+        contents = doc.at('h2').inner_html
+        assert_match hello, contents
       end
     end
   end

data/test/namespaces/test_namespaces_in_parsed_doc.rb CHANGED Viewed

@@ -61,6 +61,20 @@ module Nokogiri
         ns_attrs = n.to_xml.scan(/\bxmlns(?::.+?)?=/)
         assert_equal 3, ns_attrs.length
       end
+      def test_namespaces_under_memory_pressure_issue1155
+        skip("JRuby doesn't do GC.") if Nokogiri.jruby?
+        # this test is here to emit warnings when run under valgrind
+        # see https://github.com/sparklemotion/nokogiri/issues/1155 for background
+        filename = File.join ASSETS_DIR, 'namespace_pressure_test.xml'
+        doc = Nokogiri::XML File.open(filename)
+        # bizarrely, can't repro without the call to #to_a
+        doc.xpath('//namespace::*').to_a.each do |ns|
+          ns.inspect
+        end
+      end
     end
   end
 end

data/test/test_reader.rb CHANGED Viewed

@@ -386,6 +386,25 @@ class TestReader < Nokogiri::TestCase
                   reader.map { |n| n.namespace_uri })
   end
+  def test_namespaced_attributes
+    reader = Nokogiri::XML::Reader.from_memory(<<-eoxml)
+    <x xmlns:edi='http://ecommerce.example.org/schema' xmlns:commons="http://rets.org/xsd/RETSCommons">
+      <edi:foo commons:street-number="43">hello</edi:foo>
+      <y edi:name="francis" bacon="87"/>
+    </x>
+    eoxml
+    attr_ns = []
+    while reader.read
+      if reader.node_type == Nokogiri::XML::Node::ELEMENT_NODE
+        reader.attribute_nodes.each {|attr| attr_ns << (attr.namespace.nil? ? nil : attr.namespace.prefix) }
+      end
+    end
+    assert_equal(['commons',
+                  'edi',
+                  nil],
+                 attr_ns)
+  end
   def test_local_name
     reader = Nokogiri::XML::Reader.from_memory(<<-eoxml)
     <x xmlns:edi='http://ecommerce.example.org/schema'>

data/test/test_xslt_transforms.rb CHANGED Viewed

@@ -32,7 +32,7 @@ class TestXsltTransforms < Nokogiri::TestCase
     assert_match %r{<h1>Grandma</h1>}, result
     assert result = style.apply_to(@doc)
-    assert_match %r{<h1></h1>}, result
+    assert_match %r{<h1></h1>|<h1/>}, result
   end
   def test_transform_with_output_style
@@ -95,7 +95,9 @@ encoding="iso-8859-1" indent="yes"/>
 </xsl:stylesheet>
       eoxslt
     end
-    assert_no_match(/<td>/, xslt.apply_to(@doc, ['title', 'foo']))
+    result = xslt.apply_to(@doc, ['title', 'foo'])
+    assert_no_match(/<td>/, result)
+    assert_match(/This is an adjacent/, result)
   end
   def test_transform_arg_error
@@ -268,7 +270,7 @@ encoding="iso-8859-1" indent="yes"/>
         <xsl:output encoding="UTF-8" indent="yes" method="xml" />
         <xsl:template match="/">
-          <xsl:value-of select="/a" />
+          <a><xsl:value-of select="/a" /></a>
         </xsl:template>
       </xsl:stylesheet>
     EOXSL

data/test/xml/sax/test_parser.rb CHANGED Viewed

@@ -167,10 +167,8 @@ module Nokogiri
           assert @parser.document.errors
           assert @parser.document.errors.length > 0
-          if RUBY_VERSION =~ /^1\.9/
-            doc.errors.each do |error|
-              assert_equal 'UTF-8', error.message.encoding.name
-            end
+          doc.errors.each do |error|
+            assert_equal 'UTF-8', error.message.encoding.name
           end
           # when using JRuby Nokogiri, more errors will be generated as the DOM
@@ -207,42 +205,41 @@ module Nokogiri
             @parser.parse_io(f, encoding)
           }
           assert(@parser.document.cdata_blocks.length > 0)
-          if RUBY_VERSION =~ /^1\.9/
-            called = false
-            @parser.document.start_elements.flatten.each do |thing|
-              assert_equal 'UTF-8', thing.encoding.name
-              called = true
-            end
-            assert called
-            called = false
-            @parser.document.end_elements.flatten.each do |thing|
-              assert_equal 'UTF-8', thing.encoding.name
-              called = true
-            end
-            assert called
-            called = false
-            @parser.document.data.each do |thing|
-              assert_equal 'UTF-8', thing.encoding.name
-              called = true
-            end
-            assert called
-            called = false
-            @parser.document.comments.flatten.each do |thing|
-              assert_equal 'UTF-8', thing.encoding.name
-              called = true
-            end
-            assert called
-            called = false
-            @parser.document.cdata_blocks.flatten.each do |thing|
-              assert_equal 'UTF-8', thing.encoding.name
-              called = true
-            end
-            assert called
+          called = false
+          @parser.document.start_elements.flatten.each do |thing|
+            assert_equal 'UTF-8', thing.encoding.name
+            called = true
+          end
+          assert called
+          called = false
+          @parser.document.end_elements.flatten.each do |thing|
+            assert_equal 'UTF-8', thing.encoding.name
+            called = true
+          end
+          assert called
+          called = false
+          @parser.document.data.each do |thing|
+            assert_equal 'UTF-8', thing.encoding.name
+            called = true
+          end
+          assert called
+          called = false
+          @parser.document.comments.flatten.each do |thing|
+            assert_equal 'UTF-8', thing.encoding.name
+            called = true
+          end
+          assert called
+          called = false
+          @parser.document.cdata_blocks.flatten.each do |thing|
+            assert_equal 'UTF-8', thing.encoding.name
+            called = true
           end
+          assert called
         end
         def test_parse_file