nokogiri 1.6.7.2-x64-mingw32 → 1.6.8.rc1-x64-mingw32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
 - data/.travis.yml +12 -9
 - data/CHANGELOG.ja.rdoc +18 -0
 - data/CHANGELOG.rdoc +12 -7
 - data/CONTRIBUTING.md +42 -0
 - data/Gemfile +1 -1
 - data/Manifest.txt +6 -0
 - data/README.md +1 -1
 - data/Rakefile +1 -1
 - data/bin/nokogiri +2 -2
 - data/dependencies.yml +1 -1
 - data/ext/nokogiri/extconf.rb +3 -3
 - data/ext/nokogiri/nokogiri.c +0 -7
 - data/ext/nokogiri/nokogiri.h +1 -34
 - data/ext/nokogiri/xml_document.c +2 -4
 - data/ext/nokogiri/xml_namespace.c +56 -17
 - data/ext/nokogiri/xml_node.c +12 -36
 - data/ext/nokogiri/xml_node_set.c +169 -143
 - data/ext/nokogiri/xml_node_set.h +3 -4
 - data/ext/nokogiri/xml_sax_parser.c +2 -5
 - data/ext/nokogiri/xml_syntax_error.c +0 -4
 - data/ext/nokogiri/xml_syntax_error.h +0 -1
 - data/ext/nokogiri/xml_xpath_context.c +9 -18
 - data/lib/nokogiri.rb +3 -0
 - data/lib/nokogiri/2.0/nokogiri.so +0 -0
 - data/lib/nokogiri/2.1/nokogiri.so +0 -0
 - data/lib/nokogiri/2.2/nokogiri.so +0 -0
 - data/lib/nokogiri/css/parser.rb +8 -2
 - data/lib/nokogiri/css/parser.y +7 -2
 - data/lib/nokogiri/version.rb +1 -1
 - data/lib/nokogiri/xml/document.rb +7 -1
 - data/lib/nokogiri/xml/dtd.rb +4 -4
 - data/lib/nokogiri/xml/node.rb +2 -2
 - data/test/css/test_parser.rb +7 -1
 - data/test/files/GH_1042.html +18 -0
 - data/test/files/namespace_pressure_test.xml +1684 -0
 - data/test/files/tlm.html +2 -1
 - data/test/html/sax/test_parser.rb +2 -2
 - data/test/html/test_document.rb +18 -8
 - data/test/html/test_document_encoding.rb +46 -54
 - data/test/html/test_document_fragment.rb +21 -22
 - data/test/html/test_node.rb +16 -0
 - data/test/html/test_node_encoding.rb +12 -14
 - data/test/namespaces/test_namespaces_in_parsed_doc.rb +14 -0
 - data/test/test_reader.rb +19 -0
 - data/test/test_xslt_transforms.rb +5 -3
 - data/test/xml/sax/test_parser.rb +36 -39
 - data/test/xml/test_document.rb +7 -2
 - data/test/xml/test_document_encoding.rb +14 -16
 - data/test/xml/test_dtd_encoding.rb +0 -2
 - data/test/xml/test_node_encoding.rb +78 -80
 - data/test/xml/test_reader_encoding.rb +100 -102
 - data/test/xslt/test_exception_handling.rb +1 -1
 - metadata +11 -7
 
    
        data/test/files/tlm.html
    CHANGED
    
    | 
         @@ -46,7 +46,7 @@ 
     | 
|
| 
       46 
46 
     | 
    
         
             
            .codesnip-container  {border:1px solid #ccc; background:#eee; padding: 5px;margin:10px;}
         
     | 
| 
       47 
47 
     | 
    
         
             
            </style>
         
     | 
| 
       48 
48 
     | 
    
         
             
            <link rel="EditURI" type="application/rsd+xml" title="RSD" href="http://tenderlovemaking.com/xmlrpc.php?rsd" />
         
     | 
| 
       49 
     | 
    
         
            -
            <link rel="wlwmanifest" type="application/wlwmanifest+xml" href="http://tenderlovemaking.com/wp-includes/wlwmanifest.xml" /> 
     | 
| 
      
 49 
     | 
    
         
            +
            <link rel="wlwmanifest" type="application/wlwmanifest+xml" href="http://tenderlovemaking.com/wp-includes/wlwmanifest.xml" />
         
     | 
| 
       50 
50 
     | 
    
         
             
            <meta name="generator" content="WordPress 2.6" />
         
     | 
| 
       51 
51 
     | 
    
         | 
| 
       52 
52 
     | 
    
         
             
            	<link rel="stylesheet" type="text/css" href="http://tenderlovemaking.com/wp-content/plugins/spell_checker/spell_checker.css" />
         
     | 
| 
         @@ -826,6 +826,7 @@ page.<span class="me1">body</span> =~ /<textarea<span class="br0">[</span 
     | 
|
| 
       826 
826 
     | 
    
         
             
            </ul>
         
     | 
| 
       827 
827 
     | 
    
         
             
            </div>
         
     | 
| 
       828 
828 
     | 
    
         | 
| 
      
 829 
     | 
    
         
            +
            <div id="abc.123" class='special.character'>Special character div</div>
         
     | 
| 
       829 
830 
     | 
    
         
             
            <div id="footer">
         
     | 
| 
       830 
831 
     | 
    
         
             
            A design by <a href="http://blog.geminigeek.com/wordpress-theme">GeminiGeek</a> • Powered by <a href="http://wordpress.org">Wordpress</a><!--• <a href="#">CSS</a> • <a href="#">xHTML 1.0</a>-->
         
     | 
| 
       831 
832 
     | 
    
         
             
            </div>
         
     | 
| 
         @@ -29,9 +29,9 @@ module Nokogiri 
     | 
|
| 
       29 
29 
     | 
    
         
             
                      # Take a look at the comment in test_parse_document to know
         
     | 
| 
       30 
30 
     | 
    
         
             
                      # a possible reason to this difference.
         
     | 
| 
       31 
31 
     | 
    
         
             
                      if Nokogiri.uses_libxml?
         
     | 
| 
       32 
     | 
    
         
            -
                        assert_equal  
     | 
| 
      
 32 
     | 
    
         
            +
                        assert_equal 1111, @parser.document.end_elements.length
         
     | 
| 
       33 
33 
     | 
    
         
             
                      else
         
     | 
| 
       34 
     | 
    
         
            -
                        assert_equal  
     | 
| 
      
 34 
     | 
    
         
            +
                        assert_equal 1120, @parser.document.end_elements.length
         
     | 
| 
       35 
35 
     | 
    
         
             
                      end
         
     | 
| 
       36 
36 
     | 
    
         
             
                    end
         
     | 
| 
       37 
37 
     | 
    
         | 
    
        data/test/html/test_document.rb
    CHANGED
    
    | 
         @@ -479,6 +479,15 @@ eohtml 
     | 
|
| 
       479 
479 
     | 
    
         
             
                    assert_equal 1, found.length
         
     | 
| 
       480 
480 
     | 
    
         
             
                  end
         
     | 
| 
       481 
481 
     | 
    
         | 
| 
      
 482 
     | 
    
         
            +
                  def test_find_by_css_with_escaped_characters
         
     | 
| 
      
 483 
     | 
    
         
            +
                    found_without_escape = @html.css("div[@id='abc.123']")
         
     | 
| 
      
 484 
     | 
    
         
            +
                    found_by_id = @html.css('#abc\.123')
         
     | 
| 
      
 485 
     | 
    
         
            +
                    found_by_class = @html.css('.special\.character')
         
     | 
| 
      
 486 
     | 
    
         
            +
                    assert_equal 1, found_without_escape.length
         
     | 
| 
      
 487 
     | 
    
         
            +
                    assert_equal found_by_id, found_without_escape
         
     | 
| 
      
 488 
     | 
    
         
            +
                    assert_equal found_by_class, found_without_escape
         
     | 
| 
      
 489 
     | 
    
         
            +
                  end
         
     | 
| 
      
 490 
     | 
    
         
            +
             
     | 
| 
       482 
491 
     | 
    
         
             
                  def test_find_with_function
         
     | 
| 
       483 
492 
     | 
    
         
             
                    assert @html.css("div:awesome() h1", Class.new {
         
     | 
| 
       484 
493 
     | 
    
         
             
                      def awesome divs
         
     | 
| 
         @@ -628,19 +637,20 @@ eohtml 
     | 
|
| 
       628 
637 
     | 
    
         
             
                  end
         
     | 
| 
       629 
638 
     | 
    
         | 
| 
       630 
639 
     | 
    
         
             
                  def test_capturing_nonparse_errors_during_node_copy_between_docs
         
     | 
| 
       631 
     | 
    
         
            -
                     
     | 
| 
       632 
     | 
    
         
            -
             
     | 
| 
       633 
     | 
    
         
            -
                     
     | 
| 
       634 
     | 
    
         
            -
                    doc2 = Nokogiri::HTML("<div id='unique'>two</div>")
         
     | 
| 
      
 640 
     | 
    
         
            +
                    # Errors should be emitted while parsing only, and should not change when moving nodes.
         
     | 
| 
      
 641 
     | 
    
         
            +
                    doc1 = Nokogiri::HTML("<html><body><diva id='unique'>one</diva></body></html>")
         
     | 
| 
      
 642 
     | 
    
         
            +
                    doc2 = Nokogiri::HTML("<html><body><dive id='unique'>two</dive></body></html>")
         
     | 
| 
       635 
643 
     | 
    
         
             
                    node1 = doc1.at_css("#unique")
         
     | 
| 
       636 
644 
     | 
    
         
             
                    node2 = doc2.at_css("#unique")
         
     | 
| 
       637 
     | 
    
         
            -
             
     | 
| 
       638 
     | 
    
         
            -
                     
     | 
| 
      
 645 
     | 
    
         
            +
                    original_errors1 = doc1.errors.dup
         
     | 
| 
      
 646 
     | 
    
         
            +
                    original_errors2 = doc2.errors.dup
         
     | 
| 
      
 647 
     | 
    
         
            +
                    assert original_errors1.any?{|e| e.to_s =~ /Tag diva invalid/ }, "it should complain about the tag name"
         
     | 
| 
      
 648 
     | 
    
         
            +
                    assert original_errors2.any?{|e| e.to_s =~ /Tag dive invalid/ }, "it should complain about the tag name"
         
     | 
| 
       639 
649 
     | 
    
         | 
| 
       640 
650 
     | 
    
         
             
                    node1.add_child node2
         
     | 
| 
       641 
651 
     | 
    
         | 
| 
       642 
     | 
    
         
            -
                    assert_equal  
     | 
| 
       643 
     | 
    
         
            -
                     
     | 
| 
      
 652 
     | 
    
         
            +
                    assert_equal original_errors1, doc1.errors
         
     | 
| 
      
 653 
     | 
    
         
            +
                    assert_equal original_errors2, doc2.errors
         
     | 
| 
       644 
654 
     | 
    
         
             
                  end
         
     | 
| 
       645 
655 
     | 
    
         | 
| 
       646 
656 
     | 
    
         
             
                  def test_silencing_nonparse_errors_during_attribute_insertion_1262
         
     | 
| 
         @@ -3,34 +3,33 @@ require "helper" 
     | 
|
| 
       3 
3 
     | 
    
         | 
| 
       4 
4 
     | 
    
         
             
            module Nokogiri
         
     | 
| 
       5 
5 
     | 
    
         
             
              module HTML
         
     | 
| 
       6 
     | 
    
         
            -
                 
     | 
| 
       7 
     | 
    
         
            -
                   
     | 
| 
       8 
     | 
    
         
            -
                     
     | 
| 
       9 
     | 
    
         
            -
                      doc = Nokogiri::HTML File.open(SHIFT_JIS_HTML, 'rb')
         
     | 
| 
      
 6 
     | 
    
         
            +
                class TestDocumentEncoding < Nokogiri::TestCase
         
     | 
| 
      
 7 
     | 
    
         
            +
                  def test_encoding
         
     | 
| 
      
 8 
     | 
    
         
            +
                    doc = Nokogiri::HTML File.open(SHIFT_JIS_HTML, 'rb')
         
     | 
| 
       10 
9 
     | 
    
         | 
| 
       11 
     | 
    
         
            -
             
     | 
| 
      
 10 
     | 
    
         
            +
                    hello = "こんにちは"
         
     | 
| 
       12 
11 
     | 
    
         | 
| 
       13 
     | 
    
         
            -
             
     | 
| 
       14 
     | 
    
         
            -
             
     | 
| 
       15 
     | 
    
         
            -
             
     | 
| 
      
 12 
     | 
    
         
            +
                    assert_match doc.encoding, doc.to_html
         
     | 
| 
      
 13 
     | 
    
         
            +
                    assert_match hello.encode('Shift_JIS'), doc.to_html
         
     | 
| 
      
 14 
     | 
    
         
            +
                    assert_equal 'Shift_JIS', doc.to_html.encoding.name
         
     | 
| 
       16 
15 
     | 
    
         | 
| 
       17 
     | 
    
         
            -
             
     | 
| 
       18 
     | 
    
         
            -
             
     | 
| 
       19 
     | 
    
         
            -
             
     | 
| 
       20 
     | 
    
         
            -
             
     | 
| 
      
 16 
     | 
    
         
            +
                    assert_match hello, doc.to_html(:encoding => 'UTF-8')
         
     | 
| 
      
 17 
     | 
    
         
            +
                    assert_match 'UTF-8', doc.to_html(:encoding => 'UTF-8')
         
     | 
| 
      
 18 
     | 
    
         
            +
                    assert_match 'UTF-8', doc.to_html(:encoding => 'UTF-8').encoding.name
         
     | 
| 
      
 19 
     | 
    
         
            +
                  end
         
     | 
| 
       21 
20 
     | 
    
         | 
| 
       22 
     | 
    
         
            -
             
     | 
| 
       23 
     | 
    
         
            -
             
     | 
| 
      
 21 
     | 
    
         
            +
                  def test_encoding_without_charset
         
     | 
| 
      
 22 
     | 
    
         
            +
                    doc = Nokogiri::HTML File.open(SHIFT_JIS_NO_CHARSET, 'r:cp932:cp932').read
         
     | 
| 
       24 
23 
     | 
    
         | 
| 
       25 
     | 
    
         
            -
             
     | 
| 
      
 24 
     | 
    
         
            +
                    hello = "こんにちは"
         
     | 
| 
       26 
25 
     | 
    
         | 
| 
       27 
     | 
    
         
            -
             
     | 
| 
       28 
     | 
    
         
            -
             
     | 
| 
       29 
     | 
    
         
            -
             
     | 
| 
       30 
     | 
    
         
            -
             
     | 
| 
      
 26 
     | 
    
         
            +
                    assert_match hello, doc.content
         
     | 
| 
      
 27 
     | 
    
         
            +
                    assert_match hello, doc.to_html(:encoding => 'UTF-8')
         
     | 
| 
      
 28 
     | 
    
         
            +
                    assert_match 'UTF-8', doc.to_html(:encoding => 'UTF-8').encoding.name
         
     | 
| 
      
 29 
     | 
    
         
            +
                  end
         
     | 
| 
       31 
30 
     | 
    
         | 
| 
       32 
     | 
    
         
            -
             
     | 
| 
       33 
     | 
    
         
            -
             
     | 
| 
      
 31 
     | 
    
         
            +
                  def test_default_to_encoding_from_string
         
     | 
| 
      
 32 
     | 
    
         
            +
                    bad_charset = <<-eohtml
         
     | 
| 
       34 
33 
     | 
    
         
             
            <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"   "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
         
     | 
| 
       35 
34 
     | 
    
         
             
            <html>
         
     | 
| 
       36 
35 
     | 
    
         
             
            <head>
         
     | 
| 
         @@ -40,33 +39,33 @@ module Nokogiri 
     | 
|
| 
       40 
39 
     | 
    
         
             
              <a href="http://tenderlovemaking.com/">blah!</a>
         
     | 
| 
       41 
40 
     | 
    
         
             
            </body>
         
     | 
| 
       42 
41 
     | 
    
         
             
            </html>
         
     | 
| 
       43 
     | 
    
         
            -
             
     | 
| 
       44 
     | 
    
         
            -
             
     | 
| 
       45 
     | 
    
         
            -
             
     | 
| 
      
 42 
     | 
    
         
            +
                    eohtml
         
     | 
| 
      
 43 
     | 
    
         
            +
                    doc = Nokogiri::HTML(bad_charset)
         
     | 
| 
      
 44 
     | 
    
         
            +
                    assert_equal bad_charset.encoding.name, doc.encoding
         
     | 
| 
       46 
45 
     | 
    
         | 
| 
       47 
     | 
    
         
            -
             
     | 
| 
       48 
     | 
    
         
            -
             
     | 
| 
       49 
     | 
    
         
            -
             
     | 
| 
      
 46 
     | 
    
         
            +
                    doc = Nokogiri.parse(bad_charset)
         
     | 
| 
      
 47 
     | 
    
         
            +
                    assert_equal bad_charset.encoding.name, doc.encoding
         
     | 
| 
      
 48 
     | 
    
         
            +
                  end
         
     | 
| 
       50 
49 
     | 
    
         | 
| 
       51 
     | 
    
         
            -
             
     | 
| 
       52 
     | 
    
         
            -
             
     | 
| 
       53 
     | 
    
         
            -
             
     | 
| 
       54 
     | 
    
         
            -
             
     | 
| 
       55 
     | 
    
         
            -
             
     | 
| 
      
 50 
     | 
    
         
            +
                  def test_encoding_non_utf8
         
     | 
| 
      
 51 
     | 
    
         
            +
                    orig = '日本語が上手です'
         
     | 
| 
      
 52 
     | 
    
         
            +
                    bin = Encoding::ASCII_8BIT
         
     | 
| 
      
 53 
     | 
    
         
            +
                    [Encoding::Shift_JIS, Encoding::EUC_JP].each do |enc|
         
     | 
| 
      
 54 
     | 
    
         
            +
                      html = <<-eohtml.encode(enc)
         
     | 
| 
       56 
55 
     | 
    
         
             
            <html>
         
     | 
| 
       57 
56 
     | 
    
         
             
            <meta http-equiv="Content-Type" content="text/html; charset=#{enc.name}">
         
     | 
| 
       58 
57 
     | 
    
         
             
            <title xml:lang="ja">#{orig}</title></html>
         
     | 
| 
       59 
     | 
    
         
            -
             
     | 
| 
       60 
     | 
    
         
            -
             
     | 
| 
       61 
     | 
    
         
            -
             
     | 
| 
       62 
     | 
    
         
            -
             
     | 
| 
       63 
     | 
    
         
            -
             
     | 
| 
       64 
     | 
    
         
            -
             
     | 
| 
       65 
     | 
    
         
            -
                      end
         
     | 
| 
      
 58 
     | 
    
         
            +
                      eohtml
         
     | 
| 
      
 59 
     | 
    
         
            +
                      text = Nokogiri::HTML.parse(html).at('title').inner_text
         
     | 
| 
      
 60 
     | 
    
         
            +
                      assert_equal(
         
     | 
| 
      
 61 
     | 
    
         
            +
                        orig.encode(enc).force_encoding(bin),
         
     | 
| 
      
 62 
     | 
    
         
            +
                        text.encode(enc).force_encoding(bin)
         
     | 
| 
      
 63 
     | 
    
         
            +
                      )
         
     | 
| 
       66 
64 
     | 
    
         
             
                    end
         
     | 
| 
      
 65 
     | 
    
         
            +
                  end
         
     | 
| 
       67 
66 
     | 
    
         | 
| 
       68 
     | 
    
         
            -
             
     | 
| 
       69 
     | 
    
         
            -
             
     | 
| 
      
 67 
     | 
    
         
            +
                  def test_encoding_with_a_bad_name
         
     | 
| 
      
 68 
     | 
    
         
            +
                    bad_charset = <<-eohtml
         
     | 
| 
       70 
69 
     | 
    
         
             
            <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"   "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
         
     | 
| 
       71 
70 
     | 
    
         
             
            <html>
         
     | 
| 
       72 
71 
     | 
    
         
             
            <head>
         
     | 
| 
         @@ -76,23 +75,16 @@ module Nokogiri 
     | 
|
| 
       76 
75 
     | 
    
         
             
              <a href="http://tenderlovemaking.com/">blah!</a>
         
     | 
| 
       77 
76 
     | 
    
         
             
            </body>
         
     | 
| 
       78 
77 
     | 
    
         
             
            </html>
         
     | 
| 
       79 
     | 
    
         
            -
             
     | 
| 
       80 
     | 
    
         
            -
             
     | 
| 
       81 
     | 
    
         
            -
             
     | 
| 
       82 
     | 
    
         
            -
             
     | 
| 
       83 
     | 
    
         
            -
                    end
         
     | 
| 
      
 78 
     | 
    
         
            +
                    eohtml
         
     | 
| 
      
 79 
     | 
    
         
            +
                    doc = Nokogiri::HTML(bad_charset, nil, 'askldjfhalsdfjhlkasdfjh')
         
     | 
| 
      
 80 
     | 
    
         
            +
                    assert_equal ['http://tenderlovemaking.com/'],
         
     | 
| 
      
 81 
     | 
    
         
            +
                      doc.css('a').map { |a| a['href'] }
         
     | 
| 
       84 
82 
     | 
    
         
             
                  end
         
     | 
| 
       85 
83 
     | 
    
         
             
                end
         
     | 
| 
       86 
84 
     | 
    
         | 
| 
       87 
85 
     | 
    
         
             
                class TestDocumentEncodingDetection < Nokogiri::TestCase
         
     | 
| 
       88 
     | 
    
         
            -
                   
     | 
| 
       89 
     | 
    
         
            -
                     
     | 
| 
       90 
     | 
    
         
            -
                      IO.binread(file)
         
     | 
| 
       91 
     | 
    
         
            -
                    end
         
     | 
| 
       92 
     | 
    
         
            -
                  else
         
     | 
| 
       93 
     | 
    
         
            -
                    def binread(file)
         
     | 
| 
       94 
     | 
    
         
            -
                      IO.read(file)
         
     | 
| 
       95 
     | 
    
         
            -
                    end
         
     | 
| 
      
 86 
     | 
    
         
            +
                  def binread(file)
         
     | 
| 
      
 87 
     | 
    
         
            +
                    IO.binread(file)
         
     | 
| 
       96 
88 
     | 
    
         
             
                  end
         
     | 
| 
       97 
89 
     | 
    
         | 
| 
       98 
90 
     | 
    
         
             
                  def binopen(file)
         
     | 
| 
         @@ -9,19 +9,17 @@ module Nokogiri 
     | 
|
| 
       9 
9 
     | 
    
         
             
                    @html = Nokogiri::HTML.parse(File.read(HTML_FILE), HTML_FILE)
         
     | 
| 
       10 
10 
     | 
    
         
             
                  end
         
     | 
| 
       11 
11 
     | 
    
         | 
| 
       12 
     | 
    
         
            -
                   
     | 
| 
       13 
     | 
    
         
            -
                     
     | 
| 
       14 
     | 
    
         
            -
             
     | 
| 
       15 
     | 
    
         
            -
             
     | 
| 
       16 
     | 
    
         
            -
             
     | 
| 
       17 
     | 
    
         
            -
                    end
         
     | 
| 
      
 12 
     | 
    
         
            +
                  def test_inspect_encoding
         
     | 
| 
      
 13 
     | 
    
         
            +
                    fragment = "<div>こんにちは!</div>".encode('EUC-JP')
         
     | 
| 
      
 14 
     | 
    
         
            +
                    f = Nokogiri::HTML::DocumentFragment.parse fragment
         
     | 
| 
      
 15 
     | 
    
         
            +
                    assert_equal "こんにちは!", f.content
         
     | 
| 
      
 16 
     | 
    
         
            +
                  end
         
     | 
| 
       18 
17 
     | 
    
         | 
| 
       19 
     | 
    
         
            -
             
     | 
| 
       20 
     | 
    
         
            -
             
     | 
| 
       21 
     | 
    
         
            -
             
     | 
| 
       22 
     | 
    
         
            -
             
     | 
| 
       23 
     | 
    
         
            -
             
     | 
| 
       24 
     | 
    
         
            -
                    end
         
     | 
| 
      
 18 
     | 
    
         
            +
                  def test_html_parse_encoding
         
     | 
| 
      
 19 
     | 
    
         
            +
                    fragment = "<div>こんにちは!</div>".encode 'EUC-JP'
         
     | 
| 
      
 20 
     | 
    
         
            +
                    f = Nokogiri::HTML.fragment fragment
         
     | 
| 
      
 21 
     | 
    
         
            +
                    assert_equal 'EUC-JP', f.document.encoding
         
     | 
| 
      
 22 
     | 
    
         
            +
                    assert_equal "こんにちは!", f.content
         
     | 
| 
       25 
23 
     | 
    
         
             
                  end
         
     | 
| 
       26 
24 
     | 
    
         | 
| 
       27 
25 
     | 
    
         
             
                  def test_colons_are_not_removed
         
     | 
| 
         @@ -270,7 +268,7 @@ module Nokogiri 
     | 
|
| 
       270 
268 
     | 
    
         | 
| 
       271 
269 
     | 
    
         
             
                  def test_capturing_nonparse_errors_during_fragment_clone
         
     | 
| 
       272 
270 
     | 
    
         
             
                    # see https://github.com/sparklemotion/nokogiri/issues/1196 for background
         
     | 
| 
       273 
     | 
    
         
            -
                    original = Nokogiri::HTML.fragment("<div id='unique'></div>")
         
     | 
| 
      
 271 
     | 
    
         
            +
                    original = Nokogiri::HTML.fragment("<div id='unique'></div><div id='unique'></div>")
         
     | 
| 
       274 
272 
     | 
    
         
             
                    original_errors = original.errors.dup
         
     | 
| 
       275 
273 
     | 
    
         | 
| 
       276 
274 
     | 
    
         
             
                    copy = original.dup
         
     | 
| 
         @@ -278,19 +276,20 @@ module Nokogiri 
     | 
|
| 
       278 
276 
     | 
    
         
             
                  end
         
     | 
| 
       279 
277 
     | 
    
         | 
| 
       280 
278 
     | 
    
         
             
                  def test_capturing_nonparse_errors_during_node_copy_between_fragments
         
     | 
| 
       281 
     | 
    
         
            -
                     
     | 
| 
       282 
     | 
    
         
            -
             
     | 
| 
       283 
     | 
    
         
            -
                     
     | 
| 
       284 
     | 
    
         
            -
                    frag2 = Nokogiri::HTML.fragment("<div id='unique'>two</div>")
         
     | 
| 
      
 279 
     | 
    
         
            +
                    # Errors should be emitted while parsing only, and should not change when moving nodes.
         
     | 
| 
      
 280 
     | 
    
         
            +
                    frag1 = Nokogiri::HTML.fragment("<diva id='unique'>one</diva>")
         
     | 
| 
      
 281 
     | 
    
         
            +
                    frag2 = Nokogiri::HTML.fragment("<dive id='unique'>two</dive>")
         
     | 
| 
       285 
282 
     | 
    
         
             
                    node1 = frag1.at_css("#unique")
         
     | 
| 
       286 
283 
     | 
    
         
             
                    node2 = frag2.at_css("#unique")
         
     | 
| 
      
 284 
     | 
    
         
            +
                    original_errors1 = frag1.errors.dup
         
     | 
| 
      
 285 
     | 
    
         
            +
                    original_errors2 = frag2.errors.dup
         
     | 
| 
      
 286 
     | 
    
         
            +
                    assert original_errors1.any?{|e| e.to_s =~ /Tag diva invalid/ }, "it should complain about the tag name"
         
     | 
| 
      
 287 
     | 
    
         
            +
                    assert original_errors2.any?{|e| e.to_s =~ /Tag dive invalid/ }, "it should complain about the tag name"
         
     | 
| 
       287 
288 
     | 
    
         | 
| 
       288 
     | 
    
         
            -
                     
     | 
| 
       289 
     | 
    
         
            -
             
     | 
| 
       290 
     | 
    
         
            -
                    node1.add_child node2 # we should also not see an error on stderr
         
     | 
| 
      
 289 
     | 
    
         
            +
                    node1.add_child node2
         
     | 
| 
       291 
290 
     | 
    
         | 
| 
       292 
     | 
    
         
            -
                    assert_equal  
     | 
| 
       293 
     | 
    
         
            -
                     
     | 
| 
      
 291 
     | 
    
         
            +
                    assert_equal original_errors1, frag1.errors
         
     | 
| 
      
 292 
     | 
    
         
            +
                    assert_equal original_errors2, frag2.errors
         
     | 
| 
       294 
293 
     | 
    
         
             
                  end
         
     | 
| 
       295 
294 
     | 
    
         
             
                end
         
     | 
| 
       296 
295 
     | 
    
         
             
              end
         
     | 
    
        data/test/html/test_node.rb
    CHANGED
    
    | 
         @@ -192,5 +192,21 @@ module Nokogiri 
     | 
|
| 
       192 
192 
     | 
    
         
             
                    end
         
     | 
| 
       193 
193 
     | 
    
         
             
                  end
         
     | 
| 
       194 
194 
     | 
    
         
             
                end
         
     | 
| 
      
 195 
     | 
    
         
            +
             
     | 
| 
      
 196 
     | 
    
         
            +
                def test_GH_1042
         
     | 
| 
      
 197 
     | 
    
         
            +
                  file = File.join(ASSETS_DIR, 'GH_1042.html');
         
     | 
| 
      
 198 
     | 
    
         
            +
                  html = Nokogiri::HTML(File.read(file))
         
     | 
| 
      
 199 
     | 
    
         
            +
                  table = html.xpath("//table")[1]
         
     | 
| 
      
 200 
     | 
    
         
            +
                  trs = table.xpath("tr").drop(1)
         
     | 
| 
      
 201 
     | 
    
         
            +
             
     | 
| 
      
 202 
     | 
    
         
            +
                  # the jruby inplementation of drop uses dup() on the IRubyObject (which
         
     | 
| 
      
 203 
     | 
    
         
            +
                  # is NOT the same dup() method on the ruby Object) which produces a
         
     | 
| 
      
 204 
     | 
    
         
            +
                  # shallow clone. a shallow of valid XMLNode triggers several
         
     | 
| 
      
 205 
     | 
    
         
            +
                  # NullPointerException on inspect() since loads of invariants
         
     | 
| 
      
 206 
     | 
    
         
            +
                  # are not set. the fix for GH1042 ensures a proper working clone.
         
     | 
| 
      
 207 
     | 
    
         
            +
                  assert_nothing_raised do
         
     | 
| 
      
 208 
     | 
    
         
            +
                    trs.inspect
         
     | 
| 
      
 209 
     | 
    
         
            +
                  end
         
     | 
| 
      
 210 
     | 
    
         
            +
                end
         
     | 
| 
       195 
211 
     | 
    
         
             
              end
         
     | 
| 
       196 
212 
     | 
    
         
             
            end
         
     | 
| 
         @@ -3,24 +3,22 @@ require "helper" 
     | 
|
| 
       3 
3 
     | 
    
         | 
| 
       4 
4 
     | 
    
         
             
            module Nokogiri
         
     | 
| 
       5 
5 
     | 
    
         
             
              module HTML
         
     | 
| 
       6 
     | 
    
         
            -
                 
     | 
| 
       7 
     | 
    
         
            -
                   
     | 
| 
       8 
     | 
    
         
            -
                     
     | 
| 
       9 
     | 
    
         
            -
                      doc = Nokogiri::HTML File.open(SHIFT_JIS_HTML, 'rb')
         
     | 
| 
      
 6 
     | 
    
         
            +
                class TestNodeEncoding < Nokogiri::TestCase
         
     | 
| 
      
 7 
     | 
    
         
            +
                  def test_inner_html
         
     | 
| 
      
 8 
     | 
    
         
            +
                    doc = Nokogiri::HTML File.open(SHIFT_JIS_HTML, 'rb')
         
     | 
| 
       10 
9 
     | 
    
         | 
| 
       11 
     | 
    
         
            -
             
     | 
| 
      
 10 
     | 
    
         
            +
                    hello = "こんにちは"
         
     | 
| 
       12 
11 
     | 
    
         | 
| 
       13 
     | 
    
         
            -
             
     | 
| 
       14 
     | 
    
         
            -
             
     | 
| 
       15 
     | 
    
         
            -
             
     | 
| 
      
 12 
     | 
    
         
            +
                    contents = doc.at('h2').inner_html
         
     | 
| 
      
 13 
     | 
    
         
            +
                    assert_equal doc.encoding, contents.encoding.name
         
     | 
| 
      
 14 
     | 
    
         
            +
                    assert_match hello.encode('Shift_JIS'), contents
         
     | 
| 
       16 
15 
     | 
    
         | 
| 
       17 
     | 
    
         
            -
             
     | 
| 
       18 
     | 
    
         
            -
             
     | 
| 
      
 16 
     | 
    
         
            +
                    contents = doc.at('h2').inner_html(:encoding => 'UTF-8')
         
     | 
| 
      
 17 
     | 
    
         
            +
                    assert_match hello, contents
         
     | 
| 
       19 
18 
     | 
    
         | 
| 
       20 
     | 
    
         
            -
             
     | 
| 
       21 
     | 
    
         
            -
             
     | 
| 
       22 
     | 
    
         
            -
             
     | 
| 
       23 
     | 
    
         
            -
                    end
         
     | 
| 
      
 19 
     | 
    
         
            +
                    doc.encoding = 'UTF-8'
         
     | 
| 
      
 20 
     | 
    
         
            +
                    contents = doc.at('h2').inner_html
         
     | 
| 
      
 21 
     | 
    
         
            +
                    assert_match hello, contents
         
     | 
| 
       24 
22 
     | 
    
         
             
                  end
         
     | 
| 
       25 
23 
     | 
    
         
             
                end
         
     | 
| 
       26 
24 
     | 
    
         
             
              end
         
     | 
| 
         @@ -61,6 +61,20 @@ module Nokogiri 
     | 
|
| 
       61 
61 
     | 
    
         
             
                    ns_attrs = n.to_xml.scan(/\bxmlns(?::.+?)?=/)
         
     | 
| 
       62 
62 
     | 
    
         
             
                    assert_equal 3, ns_attrs.length
         
     | 
| 
       63 
63 
     | 
    
         
             
                  end
         
     | 
| 
      
 64 
     | 
    
         
            +
             
     | 
| 
      
 65 
     | 
    
         
            +
                  def test_namespaces_under_memory_pressure_issue1155
         
     | 
| 
      
 66 
     | 
    
         
            +
                    skip("JRuby doesn't do GC.") if Nokogiri.jruby?
         
     | 
| 
      
 67 
     | 
    
         
            +
             
     | 
| 
      
 68 
     | 
    
         
            +
                    # this test is here to emit warnings when run under valgrind
         
     | 
| 
      
 69 
     | 
    
         
            +
                    # see https://github.com/sparklemotion/nokogiri/issues/1155 for background
         
     | 
| 
      
 70 
     | 
    
         
            +
                    filename = File.join ASSETS_DIR, 'namespace_pressure_test.xml'
         
     | 
| 
      
 71 
     | 
    
         
            +
                    doc = Nokogiri::XML File.open(filename)
         
     | 
| 
      
 72 
     | 
    
         
            +
             
     | 
| 
      
 73 
     | 
    
         
            +
                    # bizarrely, can't repro without the call to #to_a
         
     | 
| 
      
 74 
     | 
    
         
            +
                    doc.xpath('//namespace::*').to_a.each do |ns|
         
     | 
| 
      
 75 
     | 
    
         
            +
                      ns.inspect
         
     | 
| 
      
 76 
     | 
    
         
            +
                    end
         
     | 
| 
      
 77 
     | 
    
         
            +
                  end
         
     | 
| 
       64 
78 
     | 
    
         
             
                end
         
     | 
| 
       65 
79 
     | 
    
         
             
              end
         
     | 
| 
       66 
80 
     | 
    
         
             
            end
         
     | 
    
        data/test/test_reader.rb
    CHANGED
    
    | 
         @@ -386,6 +386,25 @@ class TestReader < Nokogiri::TestCase 
     | 
|
| 
       386 
386 
     | 
    
         
             
                              reader.map { |n| n.namespace_uri })
         
     | 
| 
       387 
387 
     | 
    
         
             
              end
         
     | 
| 
       388 
388 
     | 
    
         | 
| 
      
 389 
     | 
    
         
            +
              def test_namespaced_attributes
         
     | 
| 
      
 390 
     | 
    
         
            +
                reader = Nokogiri::XML::Reader.from_memory(<<-eoxml)
         
     | 
| 
      
 391 
     | 
    
         
            +
                <x xmlns:edi='http://ecommerce.example.org/schema' xmlns:commons="http://rets.org/xsd/RETSCommons">
         
     | 
| 
      
 392 
     | 
    
         
            +
                  <edi:foo commons:street-number="43">hello</edi:foo>
         
     | 
| 
      
 393 
     | 
    
         
            +
                  <y edi:name="francis" bacon="87"/>
         
     | 
| 
      
 394 
     | 
    
         
            +
                </x>
         
     | 
| 
      
 395 
     | 
    
         
            +
                eoxml
         
     | 
| 
      
 396 
     | 
    
         
            +
                attr_ns = []
         
     | 
| 
      
 397 
     | 
    
         
            +
                while reader.read
         
     | 
| 
      
 398 
     | 
    
         
            +
                  if reader.node_type == Nokogiri::XML::Node::ELEMENT_NODE
         
     | 
| 
      
 399 
     | 
    
         
            +
                    reader.attribute_nodes.each {|attr| attr_ns << (attr.namespace.nil? ? nil : attr.namespace.prefix) }
         
     | 
| 
      
 400 
     | 
    
         
            +
                  end
         
     | 
| 
      
 401 
     | 
    
         
            +
                end
         
     | 
| 
      
 402 
     | 
    
         
            +
                assert_equal(['commons',
         
     | 
| 
      
 403 
     | 
    
         
            +
                              'edi',
         
     | 
| 
      
 404 
     | 
    
         
            +
                              nil],
         
     | 
| 
      
 405 
     | 
    
         
            +
                             attr_ns)
         
     | 
| 
      
 406 
     | 
    
         
            +
              end
         
     | 
| 
      
 407 
     | 
    
         
            +
             
     | 
| 
       389 
408 
     | 
    
         
             
              def test_local_name
         
     | 
| 
       390 
409 
     | 
    
         
             
                reader = Nokogiri::XML::Reader.from_memory(<<-eoxml)
         
     | 
| 
       391 
410 
     | 
    
         
             
                <x xmlns:edi='http://ecommerce.example.org/schema'>
         
     | 
| 
         @@ -32,7 +32,7 @@ class TestXsltTransforms < Nokogiri::TestCase 
     | 
|
| 
       32 
32 
     | 
    
         
             
                assert_match %r{<h1>Grandma</h1>}, result
         
     | 
| 
       33 
33 
     | 
    
         | 
| 
       34 
34 
     | 
    
         
             
                assert result = style.apply_to(@doc)
         
     | 
| 
       35 
     | 
    
         
            -
                assert_match %r{<h1></h1 
     | 
| 
      
 35 
     | 
    
         
            +
                assert_match %r{<h1></h1>|<h1/>}, result
         
     | 
| 
       36 
36 
     | 
    
         
             
              end
         
     | 
| 
       37 
37 
     | 
    
         | 
| 
       38 
38 
     | 
    
         
             
              def test_transform_with_output_style
         
     | 
| 
         @@ -95,7 +95,9 @@ encoding="iso-8859-1" indent="yes"/> 
     | 
|
| 
       95 
95 
     | 
    
         
             
            </xsl:stylesheet>
         
     | 
| 
       96 
96 
     | 
    
         
             
                  eoxslt
         
     | 
| 
       97 
97 
     | 
    
         
             
                end
         
     | 
| 
       98 
     | 
    
         
            -
                 
     | 
| 
      
 98 
     | 
    
         
            +
                result = xslt.apply_to(@doc, ['title', 'foo'])
         
     | 
| 
      
 99 
     | 
    
         
            +
                assert_no_match(/<td>/, result)
         
     | 
| 
      
 100 
     | 
    
         
            +
                assert_match(/This is an adjacent/, result)
         
     | 
| 
       99 
101 
     | 
    
         
             
              end
         
     | 
| 
       100 
102 
     | 
    
         | 
| 
       101 
103 
     | 
    
         
             
              def test_transform_arg_error
         
     | 
| 
         @@ -268,7 +270,7 @@ encoding="iso-8859-1" indent="yes"/> 
     | 
|
| 
       268 
270 
     | 
    
         
             
                    <xsl:output encoding="UTF-8" indent="yes" method="xml" />
         
     | 
| 
       269 
271 
     | 
    
         | 
| 
       270 
272 
     | 
    
         
             
                    <xsl:template match="/">
         
     | 
| 
       271 
     | 
    
         
            -
                      <xsl:value-of select="/a"  
     | 
| 
      
 273 
     | 
    
         
            +
                      <a><xsl:value-of select="/a" /></a>
         
     | 
| 
       272 
274 
     | 
    
         
             
                    </xsl:template>
         
     | 
| 
       273 
275 
     | 
    
         
             
                  </xsl:stylesheet>
         
     | 
| 
       274 
276 
     | 
    
         
             
                EOXSL
         
     | 
    
        data/test/xml/sax/test_parser.rb
    CHANGED
    
    | 
         @@ -167,10 +167,8 @@ module Nokogiri 
     | 
|
| 
       167 
167 
     | 
    
         
             
                      assert @parser.document.errors
         
     | 
| 
       168 
168 
     | 
    
         
             
                      assert @parser.document.errors.length > 0
         
     | 
| 
       169 
169 
     | 
    
         | 
| 
       170 
     | 
    
         
            -
                       
     | 
| 
       171 
     | 
    
         
            -
                         
     | 
| 
       172 
     | 
    
         
            -
                          assert_equal 'UTF-8', error.message.encoding.name
         
     | 
| 
       173 
     | 
    
         
            -
                        end
         
     | 
| 
      
 170 
     | 
    
         
            +
                      doc.errors.each do |error|
         
     | 
| 
      
 171 
     | 
    
         
            +
                        assert_equal 'UTF-8', error.message.encoding.name
         
     | 
| 
       174 
172 
     | 
    
         
             
                      end
         
     | 
| 
       175 
173 
     | 
    
         | 
| 
       176 
174 
     | 
    
         
             
                      # when using JRuby Nokogiri, more errors will be generated as the DOM
         
     | 
| 
         @@ -207,42 +205,41 @@ module Nokogiri 
     | 
|
| 
       207 
205 
     | 
    
         
             
                        @parser.parse_io(f, encoding)
         
     | 
| 
       208 
206 
     | 
    
         
             
                      }
         
     | 
| 
       209 
207 
     | 
    
         
             
                      assert(@parser.document.cdata_blocks.length > 0)
         
     | 
| 
       210 
     | 
    
         
            -
             
     | 
| 
       211 
     | 
    
         
            -
             
     | 
| 
       212 
     | 
    
         
            -
             
     | 
| 
       213 
     | 
    
         
            -
             
     | 
| 
       214 
     | 
    
         
            -
             
     | 
| 
       215 
     | 
    
         
            -
             
     | 
| 
       216 
     | 
    
         
            -
             
     | 
| 
       217 
     | 
    
         
            -
             
     | 
| 
       218 
     | 
    
         
            -
             
     | 
| 
       219 
     | 
    
         
            -
             
     | 
| 
       220 
     | 
    
         
            -
             
     | 
| 
       221 
     | 
    
         
            -
             
     | 
| 
       222 
     | 
    
         
            -
             
     | 
| 
       223 
     | 
    
         
            -
             
     | 
| 
       224 
     | 
    
         
            -
             
     | 
| 
       225 
     | 
    
         
            -
             
     | 
| 
       226 
     | 
    
         
            -
             
     | 
| 
       227 
     | 
    
         
            -
             
     | 
| 
       228 
     | 
    
         
            -
             
     | 
| 
       229 
     | 
    
         
            -
             
     | 
| 
       230 
     | 
    
         
            -
             
     | 
| 
       231 
     | 
    
         
            -
             
     | 
| 
       232 
     | 
    
         
            -
             
     | 
| 
       233 
     | 
    
         
            -
             
     | 
| 
       234 
     | 
    
         
            -
             
     | 
| 
       235 
     | 
    
         
            -
             
     | 
| 
       236 
     | 
    
         
            -
             
     | 
| 
       237 
     | 
    
         
            -
             
     | 
| 
       238 
     | 
    
         
            -
             
     | 
| 
       239 
     | 
    
         
            -
             
     | 
| 
       240 
     | 
    
         
            -
             
     | 
| 
       241 
     | 
    
         
            -
             
     | 
| 
       242 
     | 
    
         
            -
             
     | 
| 
       243 
     | 
    
         
            -
                        end
         
     | 
| 
       244 
     | 
    
         
            -
                        assert called
         
     | 
| 
      
 208 
     | 
    
         
            +
             
     | 
| 
      
 209 
     | 
    
         
            +
                      called = false
         
     | 
| 
      
 210 
     | 
    
         
            +
                      @parser.document.start_elements.flatten.each do |thing|
         
     | 
| 
      
 211 
     | 
    
         
            +
                        assert_equal 'UTF-8', thing.encoding.name
         
     | 
| 
      
 212 
     | 
    
         
            +
                        called = true
         
     | 
| 
      
 213 
     | 
    
         
            +
                      end
         
     | 
| 
      
 214 
     | 
    
         
            +
                      assert called
         
     | 
| 
      
 215 
     | 
    
         
            +
             
     | 
| 
      
 216 
     | 
    
         
            +
                      called = false
         
     | 
| 
      
 217 
     | 
    
         
            +
                      @parser.document.end_elements.flatten.each do |thing|
         
     | 
| 
      
 218 
     | 
    
         
            +
                        assert_equal 'UTF-8', thing.encoding.name
         
     | 
| 
      
 219 
     | 
    
         
            +
                        called = true
         
     | 
| 
      
 220 
     | 
    
         
            +
                      end
         
     | 
| 
      
 221 
     | 
    
         
            +
                      assert called
         
     | 
| 
      
 222 
     | 
    
         
            +
             
     | 
| 
      
 223 
     | 
    
         
            +
                      called = false
         
     | 
| 
      
 224 
     | 
    
         
            +
                      @parser.document.data.each do |thing|
         
     | 
| 
      
 225 
     | 
    
         
            +
                        assert_equal 'UTF-8', thing.encoding.name
         
     | 
| 
      
 226 
     | 
    
         
            +
                        called = true
         
     | 
| 
      
 227 
     | 
    
         
            +
                      end
         
     | 
| 
      
 228 
     | 
    
         
            +
                      assert called
         
     | 
| 
      
 229 
     | 
    
         
            +
             
     | 
| 
      
 230 
     | 
    
         
            +
                      called = false
         
     | 
| 
      
 231 
     | 
    
         
            +
                      @parser.document.comments.flatten.each do |thing|
         
     | 
| 
      
 232 
     | 
    
         
            +
                        assert_equal 'UTF-8', thing.encoding.name
         
     | 
| 
      
 233 
     | 
    
         
            +
                        called = true
         
     | 
| 
      
 234 
     | 
    
         
            +
                      end
         
     | 
| 
      
 235 
     | 
    
         
            +
                      assert called
         
     | 
| 
      
 236 
     | 
    
         
            +
             
     | 
| 
      
 237 
     | 
    
         
            +
                      called = false
         
     | 
| 
      
 238 
     | 
    
         
            +
                      @parser.document.cdata_blocks.flatten.each do |thing|
         
     | 
| 
      
 239 
     | 
    
         
            +
                        assert_equal 'UTF-8', thing.encoding.name
         
     | 
| 
      
 240 
     | 
    
         
            +
                        called = true
         
     | 
| 
       245 
241 
     | 
    
         
             
                      end
         
     | 
| 
      
 242 
     | 
    
         
            +
                      assert called
         
     | 
| 
       246 
243 
     | 
    
         
             
                    end
         
     | 
| 
       247 
244 
     | 
    
         | 
| 
       248 
245 
     | 
    
         
             
                    def test_parse_file
         
     |