nokogiri 1.4.7-java → 1.5.0.beta.1-java
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- data.tar.gz.sig +0 -0
- data/CHANGELOG.ja.rdoc +8 -83
- data/CHANGELOG.rdoc +6 -80
- data/Manifest.txt +4 -74
- data/README.ja.rdoc +5 -1
- data/README.rdoc +8 -22
- data/Rakefile +79 -60
- data/bin/nokogiri +1 -6
- data/deps.rip +5 -0
- data/ext/nokogiri/extconf.rb +32 -53
- data/ext/nokogiri/nokogiri.c +0 -2
- data/ext/nokogiri/nokogiri.h +0 -9
- data/ext/nokogiri/xml_document.c +0 -14
- data/ext/nokogiri/xml_dtd.c +2 -2
- data/ext/nokogiri/xml_io.c +7 -32
- data/ext/nokogiri/xml_node.c +31 -103
- data/ext/nokogiri/xml_node_set.c +8 -8
- data/ext/nokogiri/xml_reader.c +1 -20
- data/ext/nokogiri/xml_sax_parser.c +3 -5
- data/ext/nokogiri/xml_sax_parser_context.c +0 -40
- data/ext/nokogiri/xml_xpath_context.c +2 -35
- data/ext/nokogiri/xslt_stylesheet.c +6 -124
- data/lib/isorelax.jar +0 -0
- data/lib/jing.jar +0 -0
- data/lib/nekodtd.jar +0 -0
- data/lib/nekohtml.jar +0 -0
- data/lib/nokogiri.rb +7 -3
- data/lib/nokogiri/css.rb +3 -6
- data/lib/nokogiri/css/generated_parser.rb +669 -0
- data/lib/nokogiri/css/generated_tokenizer.rb +145 -0
- data/lib/nokogiri/css/parser.rb +70 -665
- data/lib/nokogiri/css/parser.y +1 -6
- data/lib/nokogiri/css/tokenizer.rb +3 -148
- data/lib/nokogiri/css/tokenizer.rex +1 -1
- data/lib/nokogiri/css/xpath_visitor.rb +14 -16
- data/lib/nokogiri/decorators/slop.rb +3 -5
- data/lib/nokogiri/html.rb +3 -2
- data/lib/nokogiri/html/document.rb +18 -134
- data/lib/nokogiri/html/document_fragment.rb +21 -26
- data/lib/nokogiri/html/element_description_defaults.rb +671 -0
- data/lib/nokogiri/html/sax/parser.rb +2 -6
- data/lib/nokogiri/nokogiri.jar +0 -0
- data/lib/nokogiri/version.rb +4 -9
- data/lib/nokogiri/xml/attribute_decl.rb +1 -1
- data/lib/nokogiri/xml/builder.rb +1 -1
- data/lib/nokogiri/xml/document.rb +3 -27
- data/lib/nokogiri/xml/document_fragment.rb +2 -9
- data/lib/nokogiri/xml/dtd.rb +1 -12
- data/lib/nokogiri/xml/element_decl.rb +1 -1
- data/lib/nokogiri/xml/entity_decl.rb +1 -1
- data/lib/nokogiri/xml/node.rb +75 -172
- data/lib/nokogiri/xml/node/save_options.rb +0 -10
- data/lib/nokogiri/xml/node_set.rb +3 -28
- data/lib/nokogiri/xml/parse_options.rb +0 -8
- data/lib/nokogiri/xml/reader.rb +6 -44
- data/lib/nokogiri/xml/sax/document.rb +5 -9
- data/lib/nokogiri/xml/schema.rb +1 -7
- data/lib/nokogiri/xslt.rb +5 -9
- data/lib/xercesImpl.jar +0 -0
- data/tasks/cross_compile.rb +12 -27
- data/tasks/test.rb +0 -0
- data/test/css/test_parser.rb +19 -40
- data/test/css/test_tokenizer.rb +0 -8
- data/test/helper.rb +1 -4
- data/test/html/sax/test_parser.rb +21 -47
- data/test/html/sax/test_parser_context.rb +2 -2
- data/test/html/test_document.rb +3 -58
- data/test/html/test_document_encoding.rb +0 -53
- data/test/html/test_document_fragment.rb +13 -82
- data/test/html/test_element_description.rb +4 -2
- data/test/html/test_node.rb +0 -9
- data/test/test_memory_leak.rb +2 -57
- data/test/test_nokogiri.rb +14 -20
- data/test/test_reader.rb +7 -47
- data/test/test_xslt_transforms.rb +5 -8
- data/test/xml/sax/test_parser.rb +17 -34
- data/test/xml/sax/test_parser_context.rb +0 -50
- data/test/xml/sax/test_push_parser.rb +1 -18
- data/test/xml/test_attr.rb +4 -31
- data/test/xml/test_attribute_decl.rb +7 -3
- data/test/xml/test_builder.rb +5 -5
- data/test/xml/test_cdata.rb +3 -3
- data/test/xml/test_document.rb +18 -15
- data/test/xml/test_document_fragment.rb +20 -19
- data/test/xml/test_dtd.rb +13 -18
- data/test/xml/test_element_content.rb +1 -1
- data/test/xml/test_element_decl.rb +1 -1
- data/test/xml/test_entity_decl.rb +12 -10
- data/test/xml/test_namespace.rb +7 -5
- data/test/xml/test_node.rb +15 -54
- data/test/xml/test_node_reparenting.rb +42 -85
- data/test/xml/test_node_set.rb +2 -61
- data/test/xml/test_schema.rb +0 -5
- data/test/xml/test_text.rb +2 -11
- data/test/xml/test_unparented_node.rb +1 -1
- data/test/xml/test_xpath.rb +7 -43
- metadata +442 -473
- metadata.gz.sig +0 -0
- data/.gemtest +0 -0
- data/ext/nokogiri/depend +0 -358
- data/ext/nokogiri/libcharset-1.dll +0 -0
- data/ext/nokogiri/libexslt.dll +0 -0
- data/ext/nokogiri/libiconv-2.dll +0 -0
- data/ext/nokogiri/libxml2.dll +0 -0
- data/ext/nokogiri/libxslt.dll +0 -0
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/zlib1.dll +0 -0
- data/lib/nokogiri/css/parser_extras.rb +0 -91
- data/lib/nokogiri/ffi/encoding_handler.rb +0 -42
- data/lib/nokogiri/ffi/html/document.rb +0 -28
- data/lib/nokogiri/ffi/html/element_description.rb +0 -81
- data/lib/nokogiri/ffi/html/entity_lookup.rb +0 -16
- data/lib/nokogiri/ffi/html/sax/parser_context.rb +0 -38
- data/lib/nokogiri/ffi/io_callbacks.rb +0 -42
- data/lib/nokogiri/ffi/libxml.rb +0 -420
- data/lib/nokogiri/ffi/structs/common_node.rb +0 -38
- data/lib/nokogiri/ffi/structs/html_elem_desc.rb +0 -24
- data/lib/nokogiri/ffi/structs/html_entity_desc.rb +0 -13
- data/lib/nokogiri/ffi/structs/xml_alloc.rb +0 -16
- data/lib/nokogiri/ffi/structs/xml_attr.rb +0 -20
- data/lib/nokogiri/ffi/structs/xml_attribute.rb +0 -27
- data/lib/nokogiri/ffi/structs/xml_buffer.rb +0 -16
- data/lib/nokogiri/ffi/structs/xml_char_encoding_handler.rb +0 -11
- data/lib/nokogiri/ffi/structs/xml_document.rb +0 -117
- data/lib/nokogiri/ffi/structs/xml_dtd.rb +0 -28
- data/lib/nokogiri/ffi/structs/xml_element.rb +0 -26
- data/lib/nokogiri/ffi/structs/xml_element_content.rb +0 -17
- data/lib/nokogiri/ffi/structs/xml_entity.rb +0 -32
- data/lib/nokogiri/ffi/structs/xml_enumeration.rb +0 -12
- data/lib/nokogiri/ffi/structs/xml_node.rb +0 -28
- data/lib/nokogiri/ffi/structs/xml_node_set.rb +0 -53
- data/lib/nokogiri/ffi/structs/xml_notation.rb +0 -11
- data/lib/nokogiri/ffi/structs/xml_ns.rb +0 -15
- data/lib/nokogiri/ffi/structs/xml_parser_context.rb +0 -20
- data/lib/nokogiri/ffi/structs/xml_parser_input.rb +0 -19
- data/lib/nokogiri/ffi/structs/xml_relax_ng.rb +0 -14
- data/lib/nokogiri/ffi/structs/xml_sax_handler.rb +0 -51
- data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +0 -124
- data/lib/nokogiri/ffi/structs/xml_schema.rb +0 -13
- data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +0 -31
- data/lib/nokogiri/ffi/structs/xml_text_reader.rb +0 -12
- data/lib/nokogiri/ffi/structs/xml_xpath_context.rb +0 -38
- data/lib/nokogiri/ffi/structs/xml_xpath_object.rb +0 -35
- data/lib/nokogiri/ffi/structs/xml_xpath_parser_context.rb +0 -20
- data/lib/nokogiri/ffi/structs/xslt_stylesheet.rb +0 -13
- data/lib/nokogiri/ffi/weak_bucket.rb +0 -40
- data/lib/nokogiri/ffi/xml/attr.rb +0 -41
- data/lib/nokogiri/ffi/xml/attribute_decl.rb +0 -27
- data/lib/nokogiri/ffi/xml/cdata.rb +0 -19
- data/lib/nokogiri/ffi/xml/comment.rb +0 -18
- data/lib/nokogiri/ffi/xml/document.rb +0 -174
- data/lib/nokogiri/ffi/xml/document_fragment.rb +0 -21
- data/lib/nokogiri/ffi/xml/dtd.rb +0 -67
- data/lib/nokogiri/ffi/xml/element_content.rb +0 -43
- data/lib/nokogiri/ffi/xml/element_decl.rb +0 -19
- data/lib/nokogiri/ffi/xml/entity_decl.rb +0 -36
- data/lib/nokogiri/ffi/xml/entity_reference.rb +0 -19
- data/lib/nokogiri/ffi/xml/namespace.rb +0 -44
- data/lib/nokogiri/ffi/xml/node.rb +0 -559
- data/lib/nokogiri/ffi/xml/node_set.rb +0 -150
- data/lib/nokogiri/ffi/xml/processing_instruction.rb +0 -20
- data/lib/nokogiri/ffi/xml/reader.rb +0 -236
- data/lib/nokogiri/ffi/xml/relax_ng.rb +0 -85
- data/lib/nokogiri/ffi/xml/sax/parser.rb +0 -143
- data/lib/nokogiri/ffi/xml/sax/parser_context.rb +0 -79
- data/lib/nokogiri/ffi/xml/sax/push_parser.rb +0 -51
- data/lib/nokogiri/ffi/xml/schema.rb +0 -109
- data/lib/nokogiri/ffi/xml/syntax_error.rb +0 -98
- data/lib/nokogiri/ffi/xml/text.rb +0 -18
- data/lib/nokogiri/ffi/xml/xpath.rb +0 -9
- data/lib/nokogiri/ffi/xml/xpath_context.rb +0 -153
- data/lib/nokogiri/ffi/xslt/stylesheet.rb +0 -77
- data/test/decorators/test_slop.rb +0 -16
- data/test/ffi/test_document.rb +0 -35
- data/test/files/encoding.html +0 -82
- data/test/files/encoding.xhtml +0 -84
- data/test/xslt/test_custom_functions.rb +0 -94
data/test/css/test_tokenizer.rb
CHANGED
data/test/helper.rb
CHANGED
@@ -7,7 +7,7 @@ require 'pp'
|
|
7
7
|
|
8
8
|
require 'nokogiri'
|
9
9
|
|
10
|
-
warn "#{__FILE__}:#{__LINE__}:
|
10
|
+
warn "#{__FILE__}:#{__LINE__}: version info: #{Nokogiri::VERSION_INFO.inspect}"
|
11
11
|
|
12
12
|
module Nokogiri
|
13
13
|
class TestCase < MiniTest::Spec
|
@@ -20,9 +20,6 @@ module Nokogiri
|
|
20
20
|
NICH_FILE = File.join(ASSETS_DIR, '2ch.html')
|
21
21
|
SHIFT_JIS_XML = File.join(ASSETS_DIR, 'shift_jis.xml')
|
22
22
|
SHIFT_JIS_HTML = File.join(ASSETS_DIR, 'shift_jis.html')
|
23
|
-
ENCODING_XHTML_FILE = File.join(ASSETS_DIR, 'encoding.xhtml')
|
24
|
-
ENCODING_HTML_FILE = File.join(ASSETS_DIR, 'encoding.html')
|
25
|
-
NOENCODING_FILE = File.join(ASSETS_DIR, 'noencoding.html')
|
26
23
|
PO_XML_FILE = File.join(ASSETS_DIR, 'po.xml')
|
27
24
|
PO_SCHEMA_FILE = File.join(ASSETS_DIR, 'po.xsd')
|
28
25
|
ADDRESS_SCHEMA_FILE = File.join(ASSETS_DIR, 'address_book.rlx')
|
@@ -24,7 +24,14 @@ module Nokogiri
|
|
24
24
|
|
25
25
|
def test_parse_file
|
26
26
|
@parser.parse_file(HTML_FILE)
|
27
|
-
|
27
|
+
|
28
|
+
# Take a look at the comment in test_parse_document to know
|
29
|
+
# a possible reason to this difference.
|
30
|
+
if Nokogiri.uses_libxml?
|
31
|
+
assert_equal 1110, @parser.document.end_elements.length
|
32
|
+
else
|
33
|
+
assert_equal 1119, @parser.document.end_elements.length
|
34
|
+
end
|
28
35
|
end
|
29
36
|
|
30
37
|
def test_parse_file_nil_argument
|
@@ -65,53 +72,20 @@ module Nokogiri
|
|
65
72
|
<p>Paragraph 1</p>
|
66
73
|
<p>Paragraph 2</p>
|
67
74
|
eoxml
|
68
|
-
assert_equal([["html", []], ["body", []], ["p", []], ["p", []]],
|
69
|
-
@parser.document.start_elements)
|
70
|
-
end
|
71
|
-
|
72
|
-
def test_parser_attributes
|
73
|
-
html = <<-eohtml
|
74
|
-
<html>
|
75
|
-
<head>
|
76
|
-
<title>hello</title>
|
77
|
-
</head>
|
78
|
-
<body>
|
79
|
-
<img src="face.jpg" title="daddy & me">
|
80
|
-
<hr noshade size="2">
|
81
|
-
</body>
|
82
|
-
</html>
|
83
|
-
eohtml
|
84
|
-
|
85
|
-
block_called = false
|
86
|
-
@parser.parse(html) { |ctx|
|
87
|
-
block_called = true
|
88
|
-
ctx.replace_entities = true
|
89
|
-
}
|
90
|
-
|
91
|
-
assert block_called
|
92
75
|
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
[
|
104
|
-
|
105
|
-
|
106
|
-
['img', [
|
107
|
-
['src', 'face.jpg'],
|
108
|
-
['title', 'daddy & me']
|
109
|
-
]],
|
110
|
-
['hr', [
|
111
|
-
noshade_value,
|
112
|
-
['size', '2']
|
113
|
-
]]
|
114
|
-
], @parser.document.start_elements
|
76
|
+
# JRuby version is different because of the internal implementation
|
77
|
+
# JRuby version uses NekoHTML which inserts empty "head" elements.
|
78
|
+
#
|
79
|
+
# Currently following features are set:
|
80
|
+
# "http://cyberneko.org/html/properties/names/elems" => "lower"
|
81
|
+
# "http://cyberneko.org/html/properties/names/attrs" => "lower"
|
82
|
+
if Nokogiri.uses_libxml?
|
83
|
+
assert_equal([["html", []], ["body", []], ["p", []], ["p", []]],
|
84
|
+
@parser.document.start_elements)
|
85
|
+
else
|
86
|
+
assert_equal([["html", []], ["head", []], ["body", []], ["p", []], ["p", []]],
|
87
|
+
@parser.document.start_elements)
|
88
|
+
end
|
115
89
|
end
|
116
90
|
end
|
117
91
|
end
|
@@ -8,13 +8,13 @@ module Nokogiri
|
|
8
8
|
class TestParserContext < Nokogiri::SAX::TestCase
|
9
9
|
def test_from_io
|
10
10
|
assert_nothing_raised do
|
11
|
-
ParserContext.new StringIO.new('fo'), 'UTF-8'
|
11
|
+
ctx = ParserContext.new StringIO.new('fo'), 'UTF-8'
|
12
12
|
end
|
13
13
|
end
|
14
14
|
|
15
15
|
def test_from_string
|
16
16
|
assert_nothing_raised do
|
17
|
-
ParserContext.new 'blah blah'
|
17
|
+
ctx = ParserContext.new 'blah blah'
|
18
18
|
end
|
19
19
|
end
|
20
20
|
|
data/test/html/test_document.rb
CHANGED
@@ -87,7 +87,7 @@ module Nokogiri
|
|
87
87
|
assert_nil doc.root
|
88
88
|
end
|
89
89
|
|
90
|
-
unless %w[2 6] === LIBXML_VERSION.split('.')[0..1]
|
90
|
+
unless Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
|
91
91
|
# FIXME: this is a hack around broken libxml versions
|
92
92
|
def test_to_xhtml_with_indent
|
93
93
|
doc = Nokogiri::HTML('<html><body><a>foo</a></body></html>')
|
@@ -119,18 +119,6 @@ module Nokogiri
|
|
119
119
|
|
120
120
|
def test_meta_encoding
|
121
121
|
assert_equal 'UTF-8', @html.meta_encoding
|
122
|
-
|
123
|
-
html = Nokogiri::HTML(<<-eohtml)
|
124
|
-
<html>
|
125
|
-
<head>
|
126
|
-
<meta http-equiv="X-Content-Type" content="text/html; charset=Shift_JIS">
|
127
|
-
</head>
|
128
|
-
<body>
|
129
|
-
foo
|
130
|
-
</body>
|
131
|
-
</html>
|
132
|
-
eohtml
|
133
|
-
assert_nil html.meta_encoding
|
134
122
|
end
|
135
123
|
|
136
124
|
def test_meta_encoding=
|
@@ -138,49 +126,6 @@ module Nokogiri
|
|
138
126
|
assert_equal 'EUC-JP', @html.meta_encoding
|
139
127
|
end
|
140
128
|
|
141
|
-
def test_title
|
142
|
-
assert_equal 'Tender Lovemaking ', @html.title
|
143
|
-
doc = Nokogiri::HTML('<html><body>foo</body></html>')
|
144
|
-
assert_nil doc.title
|
145
|
-
end
|
146
|
-
|
147
|
-
def test_title=()
|
148
|
-
doc = Nokogiri::HTML(<<eohtml)
|
149
|
-
<html>
|
150
|
-
<head>
|
151
|
-
<title>old</title>
|
152
|
-
</head>
|
153
|
-
<body>
|
154
|
-
foo
|
155
|
-
</body>
|
156
|
-
</html>
|
157
|
-
eohtml
|
158
|
-
doc.title = 'new'
|
159
|
-
assert_equal 'new', doc.title
|
160
|
-
|
161
|
-
doc = Nokogiri::HTML(<<eohtml)
|
162
|
-
<html>
|
163
|
-
<head>
|
164
|
-
</head>
|
165
|
-
<body>
|
166
|
-
foo
|
167
|
-
</body>
|
168
|
-
</html>
|
169
|
-
eohtml
|
170
|
-
doc.title = 'new'
|
171
|
-
assert_equal 'new', doc.title
|
172
|
-
|
173
|
-
doc = Nokogiri::HTML(<<eohtml)
|
174
|
-
<html>
|
175
|
-
<body>
|
176
|
-
foo
|
177
|
-
</body>
|
178
|
-
</html>
|
179
|
-
eohtml
|
180
|
-
doc.title = 'new'
|
181
|
-
assert_nil doc.title
|
182
|
-
end
|
183
|
-
|
184
129
|
def test_meta_encoding_without_head
|
185
130
|
html = Nokogiri::HTML('<html><body>foo</body></html>')
|
186
131
|
assert_nil html.meta_encoding
|
@@ -215,7 +160,7 @@ eohtml
|
|
215
160
|
end
|
216
161
|
|
217
162
|
def test_parse_io
|
218
|
-
assert File.open(HTML_FILE, 'rb') { |f|
|
163
|
+
assert doc = File.open(HTML_FILE, 'rb') { |f|
|
219
164
|
Document.read_io(f, nil, 'UTF-8',
|
220
165
|
XML::ParseOptions::NOERROR | XML::ParseOptions::NOWARNING
|
221
166
|
)
|
@@ -316,7 +261,7 @@ eohtml
|
|
316
261
|
end
|
317
262
|
|
318
263
|
def test_find_with_function
|
319
|
-
|
264
|
+
found = @html.css("div:awesome() h1", Class.new {
|
320
265
|
def awesome divs
|
321
266
|
[divs.first]
|
322
267
|
end
|
@@ -73,58 +73,5 @@ module Nokogiri
|
|
73
73
|
end
|
74
74
|
end
|
75
75
|
end
|
76
|
-
|
77
|
-
class TestDocumentEncodingDetection < Nokogiri::TestCase
|
78
|
-
if IO.respond_to?(:binread)
|
79
|
-
def binread(file)
|
80
|
-
IO.binread(file)
|
81
|
-
end
|
82
|
-
else
|
83
|
-
def binread(file)
|
84
|
-
IO.read(file)
|
85
|
-
end
|
86
|
-
end
|
87
|
-
|
88
|
-
def binopen(file)
|
89
|
-
File.open(file, 'rb')
|
90
|
-
end
|
91
|
-
|
92
|
-
def test_document_html_noencoding
|
93
|
-
from_stream = Nokogiri::HTML(binopen(NOENCODING_FILE))
|
94
|
-
from_string = Nokogiri::HTML(binread(NOENCODING_FILE))
|
95
|
-
|
96
|
-
assert_equal from_string.to_s.size, from_stream.to_s.size
|
97
|
-
end
|
98
|
-
|
99
|
-
def test_document_xhtml_enc
|
100
|
-
[ENCODING_XHTML_FILE, ENCODING_HTML_FILE].each { |file|
|
101
|
-
doc_from_string_enc = Nokogiri::HTML(binread(file), nil, 'Shift_JIS')
|
102
|
-
ary_from_string_enc = doc_from_string_enc.xpath('//p/text()').map { |text| text.text }
|
103
|
-
|
104
|
-
doc_from_string = Nokogiri::HTML(binread(file))
|
105
|
-
ary_from_string = doc_from_string.xpath('//p/text()').map { |text| text.text }
|
106
|
-
|
107
|
-
doc_from_file_enc = Nokogiri::HTML(binopen(file), nil, 'Shift_JIS')
|
108
|
-
ary_from_file_enc = doc_from_file_enc.xpath('//p/text()').map { |text| text.text }
|
109
|
-
|
110
|
-
doc_from_file = Nokogiri::HTML(binopen(file))
|
111
|
-
ary_from_file = doc_from_file.xpath('//p/text()').map { |text| text.text }
|
112
|
-
|
113
|
-
title = 'たこ焼き仮面'
|
114
|
-
|
115
|
-
assert_equal(title, doc_from_string_enc.at('//title/text()').text)
|
116
|
-
assert_equal(title, doc_from_string.at('//title/text()').text)
|
117
|
-
assert_equal(title, doc_from_file_enc.at('//title/text()').text)
|
118
|
-
assert_equal(title, doc_from_file.at('//title/text()').text)
|
119
|
-
|
120
|
-
evil = (0..72).map { |i| '超' * i + '悪い事を構想中。' }
|
121
|
-
|
122
|
-
assert_equal(evil, ary_from_string_enc)
|
123
|
-
assert_equal(evil, ary_from_string)
|
124
|
-
assert_equal(evil, ary_from_file_enc)
|
125
|
-
assert_equal(evil, ary_from_file)
|
126
|
-
}
|
127
|
-
end
|
128
|
-
end
|
129
76
|
end
|
130
77
|
end
|
@@ -9,46 +9,18 @@ module Nokogiri
|
|
9
9
|
@html = Nokogiri::HTML.parse(File.read(HTML_FILE), HTML_FILE)
|
10
10
|
end
|
11
11
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
12
|
+
def test_no_contextual_parsing_on_unlinked_nodes
|
13
|
+
node = @html.css('body').first
|
14
|
+
node.unlink
|
15
|
+
assert_raises(RuntimeError) do
|
16
|
+
node.parse('<br />')
|
17
17
|
end
|
18
|
-
|
19
|
-
def test_html_parse_encoding
|
20
|
-
fragment = "<div>こんにちは!</div>".encode 'EUC-JP'
|
21
|
-
f = Nokogiri::HTML.fragment fragment
|
22
|
-
assert_equal 'EUC-JP', f.document.encoding
|
23
|
-
assert_equal "こんにちは!", f.content
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
def test_parse_encoding
|
28
|
-
fragment = "<div>hello world</div>"
|
29
|
-
f = Nokogiri::HTML::DocumentFragment.parse fragment, 'ISO-8859-1'
|
30
|
-
assert_equal 'ISO-8859-1', f.document.encoding
|
31
|
-
assert_equal "hello world", f.content
|
32
|
-
end
|
33
|
-
|
34
|
-
def test_html_parse_with_encoding
|
35
|
-
fragment = "<div>hello world</div>"
|
36
|
-
f = Nokogiri::HTML.fragment fragment, 'ISO-8859-1'
|
37
|
-
assert_equal 'ISO-8859-1', f.document.encoding
|
38
|
-
assert_equal "hello world", f.content
|
39
18
|
end
|
40
19
|
|
41
20
|
def test_parse_in_context
|
42
21
|
assert_equal('<br>', @html.root.parse('<br />').to_s)
|
43
22
|
end
|
44
23
|
|
45
|
-
def test_inner_html=
|
46
|
-
fragment = Nokogiri::HTML.fragment '<hr />'
|
47
|
-
|
48
|
-
fragment.inner_html = "hello"
|
49
|
-
assert_equal 'hello', fragment.inner_html
|
50
|
-
end
|
51
|
-
|
52
24
|
def test_ancestors_search
|
53
25
|
html = %q{
|
54
26
|
<div>
|
@@ -71,17 +43,7 @@ module Nokogiri
|
|
71
43
|
end
|
72
44
|
|
73
45
|
def test_new
|
74
|
-
|
75
|
-
end
|
76
|
-
|
77
|
-
def test_body_fragment_should_contain_body
|
78
|
-
fragment = Nokogiri::HTML::DocumentFragment.parse(" <body><div>foo</div></body>")
|
79
|
-
assert_match(/^<body>/, fragment.to_s)
|
80
|
-
end
|
81
|
-
|
82
|
-
def test_nonbody_fragment_should_not_contain_body
|
83
|
-
fragment = Nokogiri::HTML::DocumentFragment.parse("<div>foo</div>")
|
84
|
-
assert_match(/^<div>/, fragment.to_s)
|
46
|
+
fragment = Nokogiri::HTML::DocumentFragment.new(@html)
|
85
47
|
end
|
86
48
|
|
87
49
|
def test_fragment_should_have_document
|
@@ -133,7 +95,8 @@ module Nokogiri
|
|
133
95
|
def test_html_fragment_has_outer_text
|
134
96
|
doc = "a<div>b</div>c"
|
135
97
|
fragment = Nokogiri::HTML::Document.new.fragment(doc)
|
136
|
-
if Nokogiri
|
98
|
+
if Nokogiri.uses_libxml? &&
|
99
|
+
Nokogiri::VERSION_INFO['libxml']['loaded'] <= "2.6.16"
|
137
100
|
assert_equal "a<div>b</div><p>c</p>", fragment.to_s
|
138
101
|
else
|
139
102
|
assert_equal "a<div>b</div>c", fragment.to_s
|
@@ -149,13 +112,13 @@ module Nokogiri
|
|
149
112
|
def test_html_fragment_with_leading_whitespace
|
150
113
|
doc = " <div>b</div> "
|
151
114
|
fragment = Nokogiri::HTML::Document.new.fragment(doc)
|
152
|
-
|
115
|
+
assert_equal "<div>b</div>", fragment.to_s
|
153
116
|
end
|
154
117
|
|
155
118
|
def test_html_fragment_with_leading_whitespace_and_newline
|
156
119
|
doc = " \n<div>b</div> "
|
157
120
|
fragment = Nokogiri::HTML::Document.new.fragment(doc)
|
158
|
-
|
121
|
+
assert_equal "<div>b</div>", fragment.to_s
|
159
122
|
end
|
160
123
|
|
161
124
|
def test_html_fragment_with_leading_text_and_newline
|
@@ -165,7 +128,7 @@ module Nokogiri
|
|
165
128
|
|
166
129
|
def test_html_fragment_with_leading_whitespace_and_text_and_newline
|
167
130
|
fragment = HTML::Document.new.fragment(" First line\nSecond line<br>Broken line")
|
168
|
-
assert_equal "
|
131
|
+
assert_equal "First line\nSecond line<br>Broken line", fragment.to_s
|
169
132
|
end
|
170
133
|
|
171
134
|
def test_html_fragment_with_leading_entity
|
@@ -189,7 +152,8 @@ module Nokogiri
|
|
189
152
|
def test_to_xhtml
|
190
153
|
doc = "<span>foo<br></span><span>bar</span>"
|
191
154
|
fragment = Nokogiri::HTML::Document.new.fragment(doc)
|
192
|
-
if Nokogiri
|
155
|
+
if !Nokogiri.uses_libxml? ||
|
156
|
+
Nokogiri::VERSION_INFO['libxml']['loaded'] >= "2.7.0"
|
193
157
|
assert_equal "<span>foo<br /></span><span>bar</span>", fragment.to_xhtml
|
194
158
|
else
|
195
159
|
assert_equal "<span>foo<br></span><span>bar</span>", fragment.to_xhtml
|
@@ -215,39 +179,6 @@ module Nokogiri
|
|
215
179
|
assert_equal("<p>hello<!-- your ad here --></p>",
|
216
180
|
fragment.to_s)
|
217
181
|
end
|
218
|
-
|
219
|
-
def test_malformed_fragment_is_corrected
|
220
|
-
fragment = HTML::DocumentFragment.parse("<div </div>")
|
221
|
-
assert_equal "<div></div>", fragment.to_s
|
222
|
-
end
|
223
|
-
|
224
|
-
def test_unclosed_script_tag
|
225
|
-
# see GH#315
|
226
|
-
fragment = HTML::DocumentFragment.parse("foo <script>bar")
|
227
|
-
assert_equal "foo <script>bar</script>", fragment.to_html
|
228
|
-
end
|
229
|
-
|
230
|
-
def test_error_propagation_on_fragment_parse
|
231
|
-
frag = Nokogiri::HTML::DocumentFragment.parse "<hello>oh, hello there.</hello>"
|
232
|
-
assert frag.errors.any?{|err| err.to_s =~ /Tag hello invalid/}, "errors should be copied to the fragment"
|
233
|
-
end
|
234
|
-
|
235
|
-
def test_error_propagation_on_fragment_parse_in_node_context
|
236
|
-
doc = Nokogiri::HTML::Document.parse "<html><body><div></div></body></html>"
|
237
|
-
context_node = doc.at_css "div"
|
238
|
-
frag = Nokogiri::HTML::DocumentFragment.new doc, "<hello>oh, hello there.</hello>", context_node
|
239
|
-
assert frag.errors.any?{|err| err.to_s =~ /Tag hello invalid/}, "errors should be on the context node's document"
|
240
|
-
end
|
241
|
-
|
242
|
-
def test_error_propagation_on_fragment_parse_in_node_context_should_not_include_preexisting_errors
|
243
|
-
doc = Nokogiri::HTML::Document.parse "<html><body><div></div><jimmy></jimmy></body></html>"
|
244
|
-
assert doc.errors.any?{|err| err.to_s =~ /jimmy/}, "assert on setup"
|
245
|
-
|
246
|
-
context_node = doc.at_css "div"
|
247
|
-
frag = Nokogiri::HTML::DocumentFragment.new doc, "<hello>oh, hello there.</hello>", context_node
|
248
|
-
assert frag.errors.any?{|err| err.to_s =~ /Tag hello invalid/}, "errors should be on the context node's document"
|
249
|
-
assert ! frag.errors.any?{|err| err.to_s =~ /jimmy/}, "errors should not include pre-existing document errors"
|
250
|
-
end
|
251
182
|
end
|
252
183
|
end
|
253
184
|
end
|
@@ -56,10 +56,12 @@ module Nokogiri
|
|
56
56
|
|
57
57
|
def test_subelements
|
58
58
|
sub_elements = ElementDescription['body'].sub_elements
|
59
|
-
if Nokogiri::LIBXML_VERSION
|
59
|
+
if Nokogiri.uses_libxml? && Nokogiri::LIBXML_VERSION == '2.7.7'
|
60
60
|
assert_equal 65, sub_elements.length
|
61
|
-
|
61
|
+
elsif Nokogiri.uses_libxml?
|
62
62
|
assert_equal 61, sub_elements.length
|
63
|
+
else
|
64
|
+
assert sub_elements.length > 0
|
63
65
|
end
|
64
66
|
end
|
65
67
|
|