nokogiri 1.6.7.2-java → 1.6.8-java
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/.cross_rubies +2 -0
- data/.travis.yml +19 -9
- data/CHANGELOG.rdoc +73 -5
- data/CONTRIBUTING.md +42 -0
- data/Gemfile +10 -9
- data/LICENSE.txt +1 -1
- data/Manifest.txt +7 -2
- data/README.md +23 -27
- data/ROADMAP.md +11 -1
- data/Rakefile +36 -17
- data/bin/nokogiri +2 -2
- data/dependencies.yml +29 -4
- data/ext/java/nokogiri/HtmlElementDescription.java +5 -2
- data/ext/java/nokogiri/NokogiriService.java +19 -0
- data/ext/java/nokogiri/XmlAttr.java +3 -1
- data/ext/java/nokogiri/XmlDocumentFragment.java +0 -14
- data/ext/java/nokogiri/XmlNode.java +106 -63
- data/ext/java/nokogiri/XmlXpathContext.java +12 -12
- data/ext/java/nokogiri/XsltStylesheet.java +11 -4
- data/ext/java/nokogiri/internals/HtmlDomParserContext.java +8 -1
- data/ext/java/nokogiri/internals/NokogiriErrorHandler.java +1 -2
- data/ext/java/nokogiri/internals/NokogiriHelpers.java +7 -7
- data/ext/java/nokogiri/internals/NokogiriNonStrictErrorHandler4NekoHtml.java +1 -1
- data/ext/java/nokogiri/internals/NokogiriStrictErrorHandler.java +0 -1
- data/ext/java/nokogiri/internals/NokogiriXsltErrorListener.java +3 -3
- data/ext/java/nokogiri/internals/ParserContext.java +4 -0
- data/ext/java/nokogiri/internals/SaveContextVisitor.java +18 -13
- data/ext/nokogiri/extconf.rb +163 -79
- data/ext/nokogiri/html_document.c +6 -6
- data/ext/nokogiri/html_element_description.c +1 -1
- data/ext/nokogiri/html_entity_lookup.c +1 -1
- data/ext/nokogiri/html_sax_parser_context.c +4 -4
- data/ext/nokogiri/html_sax_push_parser.c +2 -2
- data/ext/nokogiri/nokogiri.c +0 -7
- data/ext/nokogiri/nokogiri.h +1 -34
- data/ext/nokogiri/xml_attr.c +2 -2
- data/ext/nokogiri/xml_comment.c +1 -1
- data/ext/nokogiri/xml_document.c +20 -22
- data/ext/nokogiri/xml_encoding_handler.c +3 -3
- data/ext/nokogiri/xml_entity_reference.c +1 -1
- data/ext/nokogiri/xml_namespace.c +56 -17
- data/ext/nokogiri/xml_node.c +73 -67
- data/ext/nokogiri/xml_node_set.c +164 -146
- data/ext/nokogiri/xml_node_set.h +3 -4
- data/ext/nokogiri/xml_processing_instruction.c +2 -2
- data/ext/nokogiri/xml_reader.c +5 -18
- data/ext/nokogiri/xml_sax_parser.c +9 -12
- data/ext/nokogiri/xml_sax_parser_context.c +1 -1
- data/ext/nokogiri/xml_sax_push_parser.c +1 -1
- data/ext/nokogiri/xml_schema.c +1 -1
- data/ext/nokogiri/xml_syntax_error.c +0 -4
- data/ext/nokogiri/xml_syntax_error.h +0 -1
- data/ext/nokogiri/xml_text.c +1 -1
- data/ext/nokogiri/xml_xpath_context.c +15 -24
- data/ext/nokogiri/xslt_stylesheet.c +6 -6
- data/lib/nekohtml.jar +0 -0
- data/lib/nokogiri.rb +14 -7
- data/lib/nokogiri/css/parser.rb +8 -2
- data/lib/nokogiri/css/parser.y +7 -2
- data/lib/nokogiri/html/document.rb +4 -2
- data/lib/nokogiri/nokogiri.jar +0 -0
- data/lib/nokogiri/version.rb +1 -1
- data/lib/nokogiri/xml/document.rb +7 -1
- data/lib/nokogiri/xml/dtd.rb +4 -4
- data/lib/nokogiri/xml/node.rb +6 -10
- data/lib/nokogiri/xml/node_set.rb +3 -3
- data/lib/nokogiri/xml/parse_options.rb +22 -0
- data/lib/serializer.jar +0 -0
- data/lib/xalan.jar +0 -0
- data/lib/xercesImpl.jar +0 -0
- data/lib/xml-apis.jar +0 -0
- data/tasks/test.rb +5 -0
- data/test/css/test_parser.rb +7 -1
- data/test/files/GH_1042.html +18 -0
- data/test/files/namespace_pressure_test.xml +1684 -0
- data/test/files/tlm.html +2 -1
- data/test/helper.rb +4 -0
- data/test/html/sax/test_parser.rb +2 -2
- data/test/html/test_document.rb +47 -11
- data/test/html/test_document_encoding.rb +55 -58
- data/test/html/test_document_fragment.rb +27 -23
- data/test/html/test_node.rb +16 -0
- data/test/html/test_node_encoding.rb +71 -13
- data/test/namespaces/test_namespaces_in_parsed_doc.rb +14 -0
- data/test/test_css_cache.rb +1 -1
- data/test/test_encoding_handler.rb +2 -0
- data/test/test_xslt_transforms.rb +38 -3
- data/test/xml/sax/test_parser.rb +54 -53
- data/test/xml/test_document.rb +7 -2
- data/test/xml/test_document_encoding.rb +19 -16
- data/test/xml/test_document_fragment.rb +12 -0
- data/test/xml/test_dtd_encoding.rb +0 -2
- data/test/xml/test_namespace.rb +2 -2
- data/test/xml/test_node.rb +15 -4
- data/test/xml/test_node_attributes.rb +6 -0
- data/test/xml/test_node_encoding.rb +49 -87
- data/test/xml/test_node_reparenting.rb +193 -18
- data/test/xml/test_node_set.rb +1 -1
- data/test/xml/test_reader.rb +589 -0
- data/test/xml/test_reader_encoding.rb +100 -102
- data/test/xml/test_unparented_node.rb +14 -1
- data/test/xslt/test_exception_handling.rb +1 -1
- data/test_all +47 -33
- metadata +38 -36
- data/CHANGELOG.ja.rdoc +0 -1057
- data/test/test_reader.rb +0 -558
data/test/files/tlm.html
CHANGED
@@ -46,7 +46,7 @@
|
|
46
46
|
.codesnip-container {border:1px solid #ccc; background:#eee; padding: 5px;margin:10px;}
|
47
47
|
</style>
|
48
48
|
<link rel="EditURI" type="application/rsd+xml" title="RSD" href="http://tenderlovemaking.com/xmlrpc.php?rsd" />
|
49
|
-
<link rel="wlwmanifest" type="application/wlwmanifest+xml" href="http://tenderlovemaking.com/wp-includes/wlwmanifest.xml" />
|
49
|
+
<link rel="wlwmanifest" type="application/wlwmanifest+xml" href="http://tenderlovemaking.com/wp-includes/wlwmanifest.xml" />
|
50
50
|
<meta name="generator" content="WordPress 2.6" />
|
51
51
|
|
52
52
|
<link rel="stylesheet" type="text/css" href="http://tenderlovemaking.com/wp-content/plugins/spell_checker/spell_checker.css" />
|
@@ -826,6 +826,7 @@ page.<span class="me1">body</span> =~ /<textarea<span class="br0">[</span
|
|
826
826
|
</ul>
|
827
827
|
</div>
|
828
828
|
|
829
|
+
<div id="abc.123" class='special.character'>Special character div</div>
|
829
830
|
<div id="footer">
|
830
831
|
A design by <a href="http://blog.geminigeek.com/wordpress-theme">GeminiGeek</a> • Powered by <a href="http://wordpress.org">Wordpress</a><!--• <a href="#">CSS</a> • <a href="#">xHTML 1.0</a>-->
|
831
832
|
</div>
|
data/test/helper.rb
CHANGED
@@ -7,6 +7,10 @@ require 'tempfile'
|
|
7
7
|
require 'pp'
|
8
8
|
|
9
9
|
require 'nokogiri'
|
10
|
+
if ENV['TEST_NOKOGIRI_WITH_LIBXML_RUBY']
|
11
|
+
require 'libxml'
|
12
|
+
warn "#{__FILE__}:#{__LINE__}: loaded libxml-ruby '#{LibXML::XML::VERSION}'"
|
13
|
+
end
|
10
14
|
|
11
15
|
warn "#{__FILE__}:#{__LINE__}: version info: #{Nokogiri::VERSION_INFO.inspect}"
|
12
16
|
|
@@ -29,9 +29,9 @@ module Nokogiri
|
|
29
29
|
# Take a look at the comment in test_parse_document to know
|
30
30
|
# a possible reason to this difference.
|
31
31
|
if Nokogiri.uses_libxml?
|
32
|
-
assert_equal
|
32
|
+
assert_equal 1111, @parser.document.end_elements.length
|
33
33
|
else
|
34
|
-
assert_equal
|
34
|
+
assert_equal 1120, @parser.document.end_elements.length
|
35
35
|
end
|
36
36
|
end
|
37
37
|
|
data/test/html/test_document.rb
CHANGED
@@ -97,7 +97,7 @@ module Nokogiri
|
|
97
97
|
rescue Exception => e
|
98
98
|
skip("This test needs the internet. Skips if no internet available. (#{e})")
|
99
99
|
end
|
100
|
-
doc = Nokogiri::HTML html ,"http:/foobar.foobar/"
|
100
|
+
doc = Nokogiri::HTML html ,"http:/foobar.foobar/", 'UTF-8'
|
101
101
|
refute_empty doc.to_s, "Document should not be empty"
|
102
102
|
end
|
103
103
|
|
@@ -422,7 +422,7 @@ eohtml
|
|
422
422
|
eohtml
|
423
423
|
set = html.css('p, a')
|
424
424
|
assert_equal(2, set.length)
|
425
|
-
assert_equal ['a tag', 'p tag'].sort, set.map
|
425
|
+
assert_equal ['a tag', 'p tag'].sort, set.map(&:content).sort
|
426
426
|
end
|
427
427
|
|
428
428
|
def test_inner_text
|
@@ -479,6 +479,15 @@ eohtml
|
|
479
479
|
assert_equal 1, found.length
|
480
480
|
end
|
481
481
|
|
482
|
+
def test_find_by_css_with_escaped_characters
|
483
|
+
found_without_escape = @html.css("div[@id='abc.123']")
|
484
|
+
found_by_id = @html.css('#abc\.123')
|
485
|
+
found_by_class = @html.css('.special\.character')
|
486
|
+
assert_equal 1, found_without_escape.length
|
487
|
+
assert_equal found_by_id, found_without_escape
|
488
|
+
assert_equal found_by_class, found_without_escape
|
489
|
+
end
|
490
|
+
|
482
491
|
def test_find_with_function
|
483
492
|
assert @html.css("div:awesome() h1", Class.new {
|
484
493
|
def awesome divs
|
@@ -591,7 +600,7 @@ eohtml
|
|
591
600
|
eohtml
|
592
601
|
list = doc.css('.red')
|
593
602
|
assert_equal 2, list.length
|
594
|
-
assert_equal %w{ RED RED }, list.map
|
603
|
+
assert_equal %w{ RED RED }, list.map(&:text)
|
595
604
|
end
|
596
605
|
|
597
606
|
def test_parse_can_take_io
|
@@ -628,19 +637,20 @@ eohtml
|
|
628
637
|
end
|
629
638
|
|
630
639
|
def test_capturing_nonparse_errors_during_node_copy_between_docs
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
doc2 = Nokogiri::HTML("<div id='unique'>two</div>")
|
640
|
+
# Errors should be emitted while parsing only, and should not change when moving nodes.
|
641
|
+
doc1 = Nokogiri::HTML("<html><body><diva id='unique'>one</diva></body></html>")
|
642
|
+
doc2 = Nokogiri::HTML("<html><body><dive id='unique'>two</dive></body></html>")
|
635
643
|
node1 = doc1.at_css("#unique")
|
636
644
|
node2 = doc2.at_css("#unique")
|
637
|
-
|
638
|
-
|
645
|
+
original_errors1 = doc1.errors.dup
|
646
|
+
original_errors2 = doc2.errors.dup
|
647
|
+
assert original_errors1.any?{|e| e.to_s =~ /Tag diva invalid/ }, "it should complain about the tag name"
|
648
|
+
assert original_errors2.any?{|e| e.to_s =~ /Tag dive invalid/ }, "it should complain about the tag name"
|
639
649
|
|
640
650
|
node1.add_child node2
|
641
651
|
|
642
|
-
assert_equal
|
643
|
-
|
652
|
+
assert_equal original_errors1, doc1.errors
|
653
|
+
assert_equal original_errors2, doc2.errors
|
644
654
|
end
|
645
655
|
|
646
656
|
def test_silencing_nonparse_errors_during_attribute_insertion_1262
|
@@ -660,6 +670,32 @@ eohtml
|
|
660
670
|
Nokogiri::XML::Element.new("div", doc).set_attribute('id', 'unique-issue-1262')
|
661
671
|
assert_equal 0, doc.errors.length
|
662
672
|
end
|
673
|
+
|
674
|
+
it "skips encoding for script tags" do
|
675
|
+
html = Nokogiri::HTML <<-EOHTML
|
676
|
+
<html>
|
677
|
+
<head>
|
678
|
+
<script>var isGreater = 4 > 5;</script>
|
679
|
+
</head>
|
680
|
+
<body></body>
|
681
|
+
</html>
|
682
|
+
EOHTML
|
683
|
+
node = html.xpath("//script").first
|
684
|
+
assert_equal("var isGreater = 4 > 5;", node.inner_html)
|
685
|
+
end
|
686
|
+
|
687
|
+
it "skips encoding for style tags" do
|
688
|
+
html = Nokogiri::HTML <<-EOHTML
|
689
|
+
<html>
|
690
|
+
<head>
|
691
|
+
<style>tr > div { display:block; }</style>
|
692
|
+
</head>
|
693
|
+
<body></body>
|
694
|
+
</html>
|
695
|
+
EOHTML
|
696
|
+
node = html.xpath("//style").first
|
697
|
+
assert_equal("tr > div { display:block; }", node.inner_html)
|
698
|
+
end
|
663
699
|
end
|
664
700
|
end
|
665
701
|
end
|
@@ -3,34 +3,33 @@ require "helper"
|
|
3
3
|
|
4
4
|
module Nokogiri
|
5
5
|
module HTML
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
doc = Nokogiri::HTML File.open(SHIFT_JIS_HTML, 'rb')
|
6
|
+
class TestDocumentEncoding < Nokogiri::TestCase
|
7
|
+
def test_encoding
|
8
|
+
doc = Nokogiri::HTML File.open(SHIFT_JIS_HTML, 'rb')
|
10
9
|
|
11
|
-
|
10
|
+
hello = "こんにちは"
|
12
11
|
|
13
|
-
|
14
|
-
|
15
|
-
|
12
|
+
assert_match doc.encoding, doc.to_html
|
13
|
+
assert_match hello.encode('Shift_JIS'), doc.to_html
|
14
|
+
assert_equal 'Shift_JIS', doc.to_html.encoding.name
|
16
15
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
16
|
+
assert_match hello, doc.to_html(:encoding => 'UTF-8')
|
17
|
+
assert_match 'UTF-8', doc.to_html(:encoding => 'UTF-8')
|
18
|
+
assert_match 'UTF-8', doc.to_html(:encoding => 'UTF-8').encoding.name
|
19
|
+
end
|
21
20
|
|
22
|
-
|
23
|
-
|
21
|
+
def test_encoding_without_charset
|
22
|
+
doc = Nokogiri::HTML File.open(SHIFT_JIS_NO_CHARSET, 'r:cp932:cp932').read
|
24
23
|
|
25
|
-
|
24
|
+
hello = "こんにちは"
|
26
25
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
26
|
+
assert_match hello, doc.content
|
27
|
+
assert_match hello, doc.to_html(:encoding => 'UTF-8')
|
28
|
+
assert_match 'UTF-8', doc.to_html(:encoding => 'UTF-8').encoding.name
|
29
|
+
end
|
31
30
|
|
32
|
-
|
33
|
-
|
31
|
+
def test_default_to_encoding_from_string
|
32
|
+
bad_charset = <<-eohtml
|
34
33
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
35
34
|
<html>
|
36
35
|
<head>
|
@@ -40,33 +39,33 @@ module Nokogiri
|
|
40
39
|
<a href="http://tenderlovemaking.com/">blah!</a>
|
41
40
|
</body>
|
42
41
|
</html>
|
43
|
-
|
44
|
-
|
45
|
-
|
42
|
+
eohtml
|
43
|
+
doc = Nokogiri::HTML(bad_charset)
|
44
|
+
assert_equal bad_charset.encoding.name, doc.encoding
|
46
45
|
|
47
|
-
|
48
|
-
|
49
|
-
|
46
|
+
doc = Nokogiri.parse(bad_charset)
|
47
|
+
assert_equal bad_charset.encoding.name, doc.encoding
|
48
|
+
end
|
50
49
|
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
50
|
+
def test_encoding_non_utf8
|
51
|
+
orig = '日本語が上手です'
|
52
|
+
bin = Encoding::ASCII_8BIT
|
53
|
+
[Encoding::Shift_JIS, Encoding::EUC_JP].each do |enc|
|
54
|
+
html = <<-eohtml.encode(enc)
|
56
55
|
<html>
|
57
56
|
<meta http-equiv="Content-Type" content="text/html; charset=#{enc.name}">
|
58
57
|
<title xml:lang="ja">#{orig}</title></html>
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
end
|
58
|
+
eohtml
|
59
|
+
text = Nokogiri::HTML.parse(html).at('title').inner_text
|
60
|
+
assert_equal(
|
61
|
+
orig.encode(enc).force_encoding(bin),
|
62
|
+
text.encode(enc).force_encoding(bin)
|
63
|
+
)
|
66
64
|
end
|
65
|
+
end
|
67
66
|
|
68
|
-
|
69
|
-
|
67
|
+
def test_encoding_with_a_bad_name
|
68
|
+
bad_charset = <<-eohtml
|
70
69
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
71
70
|
<html>
|
72
71
|
<head>
|
@@ -76,23 +75,21 @@ module Nokogiri
|
|
76
75
|
<a href="http://tenderlovemaking.com/">blah!</a>
|
77
76
|
</body>
|
78
77
|
</html>
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
78
|
+
eohtml
|
79
|
+
doc = Nokogiri::HTML(bad_charset, nil, 'askldjfhalsdfjhlkasdfjh')
|
80
|
+
assert_equal ['http://tenderlovemaking.com/'],
|
81
|
+
doc.css('a').map { |a| a['href'] }
|
82
|
+
end
|
83
|
+
|
84
|
+
def test_empty_doc_encoding
|
85
|
+
encoding = 'US-ASCII'
|
86
|
+
assert_equal encoding, Nokogiri::HTML.parse(nil, nil, encoding).encoding
|
84
87
|
end
|
85
88
|
end
|
86
89
|
|
87
90
|
class TestDocumentEncodingDetection < Nokogiri::TestCase
|
88
|
-
|
89
|
-
|
90
|
-
IO.binread(file)
|
91
|
-
end
|
92
|
-
else
|
93
|
-
def binread(file)
|
94
|
-
IO.read(file)
|
95
|
-
end
|
91
|
+
def binread(file)
|
92
|
+
IO.binread(file)
|
96
93
|
end
|
97
94
|
|
98
95
|
def binopen(file)
|
@@ -115,16 +112,16 @@ module Nokogiri
|
|
115
112
|
def test_document_xhtml_enc
|
116
113
|
[ENCODING_XHTML_FILE, ENCODING_HTML_FILE].each { |file|
|
117
114
|
doc_from_string_enc = Nokogiri::HTML(binread(file), nil, 'Shift_JIS')
|
118
|
-
ary_from_string_enc = doc_from_string_enc.xpath('//p/text()').map
|
115
|
+
ary_from_string_enc = doc_from_string_enc.xpath('//p/text()').map(&:text)
|
119
116
|
|
120
117
|
doc_from_string = Nokogiri::HTML(binread(file))
|
121
|
-
ary_from_string = doc_from_string.xpath('//p/text()').map
|
118
|
+
ary_from_string = doc_from_string.xpath('//p/text()').map(&:text)
|
122
119
|
|
123
120
|
doc_from_file_enc = Nokogiri::HTML(binopen(file), nil, 'Shift_JIS')
|
124
|
-
ary_from_file_enc = doc_from_file_enc.xpath('//p/text()').map
|
121
|
+
ary_from_file_enc = doc_from_file_enc.xpath('//p/text()').map(&:text)
|
125
122
|
|
126
123
|
doc_from_file = Nokogiri::HTML(binopen(file))
|
127
|
-
ary_from_file = doc_from_file.xpath('//p/text()').map
|
124
|
+
ary_from_file = doc_from_file.xpath('//p/text()').map(&:text)
|
128
125
|
|
129
126
|
title = 'たこ焼き仮面'
|
130
127
|
|
@@ -9,19 +9,22 @@ module Nokogiri
|
|
9
9
|
@html = Nokogiri::HTML.parse(File.read(HTML_FILE), HTML_FILE)
|
10
10
|
end
|
11
11
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
end
|
12
|
+
def test_inspect_encoding
|
13
|
+
fragment = "<div>こんにちは!</div>".encode('EUC-JP')
|
14
|
+
f = Nokogiri::HTML::DocumentFragment.parse fragment
|
15
|
+
assert_equal "こんにちは!", f.content
|
16
|
+
end
|
18
17
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
18
|
+
def test_html_parse_encoding
|
19
|
+
fragment = "<div>こんにちは!</div>".encode 'EUC-JP'
|
20
|
+
f = Nokogiri::HTML.fragment fragment
|
21
|
+
assert_equal 'EUC-JP', f.document.encoding
|
22
|
+
assert_equal "こんにちは!", f.content
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_unlink_empty_document
|
26
|
+
frag = Nokogiri::HTML::DocumentFragment.parse('').unlink # must_not_raise
|
27
|
+
assert_nil frag.parent
|
25
28
|
end
|
26
29
|
|
27
30
|
def test_colons_are_not_removed
|
@@ -232,7 +235,7 @@ module Nokogiri
|
|
232
235
|
|
233
236
|
def test_element_children_counts
|
234
237
|
doc = Nokogiri::HTML::DocumentFragment.parse(" <div> </div>\n ")
|
235
|
-
|
238
|
+
assert_equal 1, doc.element_children.count
|
236
239
|
end
|
237
240
|
|
238
241
|
def test_malformed_fragment_is_corrected
|
@@ -270,7 +273,7 @@ module Nokogiri
|
|
270
273
|
|
271
274
|
def test_capturing_nonparse_errors_during_fragment_clone
|
272
275
|
# see https://github.com/sparklemotion/nokogiri/issues/1196 for background
|
273
|
-
original = Nokogiri::HTML.fragment("<div id='unique'></div>")
|
276
|
+
original = Nokogiri::HTML.fragment("<div id='unique'></div><div id='unique'></div>")
|
274
277
|
original_errors = original.errors.dup
|
275
278
|
|
276
279
|
copy = original.dup
|
@@ -278,19 +281,20 @@ module Nokogiri
|
|
278
281
|
end
|
279
282
|
|
280
283
|
def test_capturing_nonparse_errors_during_node_copy_between_fragments
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
frag2 = Nokogiri::HTML.fragment("<div id='unique'>two</div>")
|
284
|
+
# Errors should be emitted while parsing only, and should not change when moving nodes.
|
285
|
+
frag1 = Nokogiri::HTML.fragment("<diva id='unique'>one</diva>")
|
286
|
+
frag2 = Nokogiri::HTML.fragment("<dive id='unique'>two</dive>")
|
285
287
|
node1 = frag1.at_css("#unique")
|
286
288
|
node2 = frag2.at_css("#unique")
|
289
|
+
original_errors1 = frag1.errors.dup
|
290
|
+
original_errors2 = frag2.errors.dup
|
291
|
+
assert original_errors1.any?{|e| e.to_s =~ /Tag diva invalid/ }, "it should complain about the tag name"
|
292
|
+
assert original_errors2.any?{|e| e.to_s =~ /Tag dive invalid/ }, "it should complain about the tag name"
|
287
293
|
|
288
|
-
|
289
|
-
|
290
|
-
node1.add_child node2 # we should also not see an error on stderr
|
294
|
+
node1.add_child node2
|
291
295
|
|
292
|
-
assert_equal
|
293
|
-
|
296
|
+
assert_equal original_errors1, frag1.errors
|
297
|
+
assert_equal original_errors2, frag2.errors
|
294
298
|
end
|
295
299
|
end
|
296
300
|
end
|
data/test/html/test_node.rb
CHANGED
@@ -192,5 +192,21 @@ module Nokogiri
|
|
192
192
|
end
|
193
193
|
end
|
194
194
|
end
|
195
|
+
|
196
|
+
def test_GH_1042
|
197
|
+
file = File.join(ASSETS_DIR, 'GH_1042.html');
|
198
|
+
html = Nokogiri::HTML(File.read(file))
|
199
|
+
table = html.xpath("//table")[1]
|
200
|
+
trs = table.xpath("tr").drop(1)
|
201
|
+
|
202
|
+
# the jruby inplementation of drop uses dup() on the IRubyObject (which
|
203
|
+
# is NOT the same dup() method on the ruby Object) which produces a
|
204
|
+
# shallow clone. a shallow of valid XMLNode triggers several
|
205
|
+
# NullPointerException on inspect() since loads of invariants
|
206
|
+
# are not set. the fix for GH1042 ensures a proper working clone.
|
207
|
+
assert_nothing_raised do
|
208
|
+
trs.inspect
|
209
|
+
end
|
210
|
+
end
|
195
211
|
end
|
196
212
|
end
|
@@ -3,23 +3,81 @@ require "helper"
|
|
3
3
|
|
4
4
|
module Nokogiri
|
5
5
|
module HTML
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
6
|
+
class TestNodeEncoding < Nokogiri::TestCase
|
7
|
+
def setup
|
8
|
+
super
|
9
|
+
@html = Nokogiri::HTML(File.open(NICH_FILE, "rb"))
|
10
|
+
end
|
11
|
+
|
12
|
+
def test_get_attribute
|
13
|
+
node = @html.css('a').first
|
14
|
+
assert_equal 'UTF-8', node['href'].encoding.name
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_text_encoding_is_utf_8
|
18
|
+
assert_equal 'UTF-8', @html.text.encoding.name
|
19
|
+
end
|
20
|
+
|
21
|
+
def test_serialize_encoding_html
|
22
|
+
assert_equal @html.encoding.downcase,
|
23
|
+
@html.serialize.encoding.name.downcase
|
24
|
+
|
25
|
+
@doc = Nokogiri::HTML(@html.serialize)
|
26
|
+
assert_equal @html.serialize, @doc.serialize
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_encode_special_chars
|
30
|
+
foo = @html.css('a').first.encode_special_chars('foo')
|
31
|
+
assert_equal 'UTF-8', foo.encoding.name
|
32
|
+
end
|
33
|
+
|
34
|
+
def test_content
|
35
|
+
node = @html.css('a').first
|
36
|
+
assert_equal 'UTF-8', node.content.encoding.name
|
37
|
+
end
|
38
|
+
|
39
|
+
def test_name
|
40
|
+
node = @html.css('a').first
|
41
|
+
assert_equal 'UTF-8', node.name.encoding.name
|
42
|
+
end
|
43
|
+
|
44
|
+
def test_path
|
45
|
+
node = @html.css('a').first
|
46
|
+
assert_equal 'UTF-8', node.path.encoding.name
|
47
|
+
end
|
48
|
+
|
49
|
+
def test_inner_html
|
50
|
+
doc = Nokogiri::HTML File.open(SHIFT_JIS_HTML, 'rb')
|
10
51
|
|
11
|
-
|
52
|
+
hello = "こんにちは"
|
12
53
|
|
13
|
-
|
14
|
-
|
15
|
-
|
54
|
+
contents = doc.at('h2').inner_html
|
55
|
+
assert_equal doc.encoding, contents.encoding.name
|
56
|
+
assert_match hello.encode('Shift_JIS'), contents
|
16
57
|
|
17
|
-
|
18
|
-
|
58
|
+
contents = doc.at('h2').inner_html(:encoding => 'UTF-8')
|
59
|
+
assert_match hello, contents
|
60
|
+
|
61
|
+
doc.encoding = 'UTF-8'
|
62
|
+
contents = doc.at('h2').inner_html
|
63
|
+
assert_match hello, contents
|
64
|
+
end
|
65
|
+
|
66
|
+
def test_encoding_GH_1113
|
67
|
+
doc = Nokogiri::HTML::Document.new
|
68
|
+
hex = '<p>🍀</p>'
|
69
|
+
decimal = '<p>🍀</p>'
|
70
|
+
encoded = '<p>🍀</p>'
|
71
|
+
|
72
|
+
doc.encoding = 'UTF-8'
|
73
|
+
[hex, decimal, encoded].each do |document|
|
74
|
+
assert_equal encoded, doc.fragment(document).to_s
|
75
|
+
end
|
19
76
|
|
20
|
-
|
21
|
-
|
22
|
-
|
77
|
+
doc.encoding = 'US-ASCII'
|
78
|
+
expected = Nokogiri.jruby? ? hex : decimal
|
79
|
+
[hex, decimal].each do |document|
|
80
|
+
assert_equal expected, doc.fragment(document).to_s
|
23
81
|
end
|
24
82
|
end
|
25
83
|
end
|