nokogiri 1.8.5 → 1.9.1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/README.md +0 -1
- data/ext/nokogiri/extconf.rb +10 -6
- data/ext/nokogiri/xml_attr.c +26 -21
- data/ext/nokogiri/xml_document.c +4 -1
- data/ext/nokogiri/xml_namespace.c +3 -3
- data/ext/nokogiri/xml_namespace.h +1 -2
- data/ext/nokogiri/xml_node.c +55 -15
- data/lib/nokogiri/css/parser.rb +61 -61
- data/lib/nokogiri/version.rb +1 -1
- data/lib/nokogiri/xml/document_fragment.rb +11 -0
- data/lib/nokogiri/xml/node.rb +12 -0
- data/lib/nokogiri/xml/node_set.rb +4 -8
- metadata +17 -153
- data/.autotest +0 -22
- data/.cross_rubies +0 -8
- data/.editorconfig +0 -17
- data/.gemtest +0 -0
- data/.travis.yml +0 -63
- data/CHANGELOG.md +0 -1368
- data/CONTRIBUTING.md +0 -42
- data/C_CODING_STYLE.rdoc +0 -33
- data/Gemfile +0 -23
- data/Gemfile-libxml-ruby +0 -3
- data/Manifest.txt +0 -370
- data/ROADMAP.md +0 -111
- data/Rakefile +0 -348
- data/SECURITY.md +0 -19
- data/STANDARD_RESPONSES.md +0 -47
- data/Y_U_NO_GEMSPEC.md +0 -155
- data/appveyor.yml +0 -29
- data/build_all +0 -44
- data/patches/sort-patches-by-date +0 -25
- data/suppressions/README.txt +0 -1
- data/suppressions/nokogiri_ruby-2.supp +0 -10
- data/tasks/test.rb +0 -100
- data/test/css/test_nthiness.rb +0 -226
- data/test/css/test_parser.rb +0 -386
- data/test/css/test_tokenizer.rb +0 -215
- data/test/css/test_xpath_visitor.rb +0 -96
- data/test/decorators/test_slop.rb +0 -23
- data/test/files/2ch.html +0 -108
- data/test/files/GH_1042.html +0 -18
- data/test/files/address_book.rlx +0 -12
- data/test/files/address_book.xml +0 -10
- data/test/files/atom.xml +0 -344
- data/test/files/bar/bar.xsd +0 -4
- data/test/files/bogus.xml +0 -0
- data/test/files/dont_hurt_em_why.xml +0 -422
- data/test/files/encoding.html +0 -82
- data/test/files/encoding.xhtml +0 -84
- data/test/files/exslt.xml +0 -8
- data/test/files/exslt.xslt +0 -35
- data/test/files/foo/foo.xsd +0 -4
- data/test/files/metacharset.html +0 -10
- data/test/files/namespace_pressure_test.xml +0 -1684
- data/test/files/noencoding.html +0 -47
- data/test/files/po.xml +0 -32
- data/test/files/po.xsd +0 -66
- data/test/files/saml/saml20assertion_schema.xsd +0 -283
- data/test/files/saml/saml20protocol_schema.xsd +0 -302
- data/test/files/saml/xenc_schema.xsd +0 -146
- data/test/files/saml/xmldsig_schema.xsd +0 -318
- data/test/files/shift_jis.html +0 -10
- data/test/files/shift_jis.xml +0 -5
- data/test/files/shift_jis_no_charset.html +0 -9
- data/test/files/slow-xpath.xml +0 -25509
- data/test/files/snuggles.xml +0 -3
- data/test/files/staff.dtd +0 -10
- data/test/files/staff.xml +0 -59
- data/test/files/staff.xslt +0 -32
- data/test/files/test_document_url/bar.xml +0 -2
- data/test/files/test_document_url/document.dtd +0 -4
- data/test/files/test_document_url/document.xml +0 -6
- data/test/files/tlm.html +0 -851
- data/test/files/to_be_xincluded.xml +0 -2
- data/test/files/valid_bar.xml +0 -2
- data/test/files/xinclude.xml +0 -4
- data/test/helper.rb +0 -271
- data/test/html/sax/test_parser.rb +0 -168
- data/test/html/sax/test_parser_context.rb +0 -46
- data/test/html/sax/test_parser_text.rb +0 -163
- data/test/html/sax/test_push_parser.rb +0 -87
- data/test/html/test_attributes.rb +0 -85
- data/test/html/test_builder.rb +0 -164
- data/test/html/test_document.rb +0 -712
- data/test/html/test_document_encoding.rb +0 -143
- data/test/html/test_document_fragment.rb +0 -310
- data/test/html/test_element_description.rb +0 -105
- data/test/html/test_named_characters.rb +0 -14
- data/test/html/test_node.rb +0 -212
- data/test/html/test_node_encoding.rb +0 -91
- data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +0 -14
- data/test/namespaces/test_namespaces_aliased_default.rb +0 -24
- data/test/namespaces/test_namespaces_in_builder_doc.rb +0 -75
- data/test/namespaces/test_namespaces_in_cloned_doc.rb +0 -31
- data/test/namespaces/test_namespaces_in_created_doc.rb +0 -75
- data/test/namespaces/test_namespaces_in_parsed_doc.rb +0 -80
- data/test/namespaces/test_namespaces_preservation.rb +0 -31
- data/test/test_convert_xpath.rb +0 -135
- data/test/test_css_cache.rb +0 -47
- data/test/test_encoding_handler.rb +0 -48
- data/test/test_memory_leak.rb +0 -156
- data/test/test_nokogiri.rb +0 -138
- data/test/test_soap4r_sax.rb +0 -52
- data/test/test_xslt_transforms.rb +0 -314
- data/test/xml/node/test_save_options.rb +0 -28
- data/test/xml/node/test_subclass.rb +0 -44
- data/test/xml/sax/test_parser.rb +0 -402
- data/test/xml/sax/test_parser_context.rb +0 -115
- data/test/xml/sax/test_parser_text.rb +0 -202
- data/test/xml/sax/test_push_parser.rb +0 -265
- data/test/xml/test_attr.rb +0 -74
- data/test/xml/test_attribute_decl.rb +0 -86
- data/test/xml/test_builder.rb +0 -341
- data/test/xml/test_c14n.rb +0 -180
- data/test/xml/test_cdata.rb +0 -54
- data/test/xml/test_comment.rb +0 -40
- data/test/xml/test_document.rb +0 -982
- data/test/xml/test_document_encoding.rb +0 -31
- data/test/xml/test_document_fragment.rb +0 -298
- data/test/xml/test_dtd.rb +0 -187
- data/test/xml/test_dtd_encoding.rb +0 -31
- data/test/xml/test_element_content.rb +0 -56
- data/test/xml/test_element_decl.rb +0 -73
- data/test/xml/test_entity_decl.rb +0 -122
- data/test/xml/test_entity_reference.rb +0 -262
- data/test/xml/test_namespace.rb +0 -96
- data/test/xml/test_node.rb +0 -1325
- data/test/xml/test_node_attributes.rb +0 -115
- data/test/xml/test_node_encoding.rb +0 -75
- data/test/xml/test_node_inheritance.rb +0 -32
- data/test/xml/test_node_reparenting.rb +0 -592
- data/test/xml/test_node_set.rb +0 -809
- data/test/xml/test_parse_options.rb +0 -64
- data/test/xml/test_processing_instruction.rb +0 -30
- data/test/xml/test_reader.rb +0 -620
- data/test/xml/test_reader_encoding.rb +0 -134
- data/test/xml/test_relax_ng.rb +0 -60
- data/test/xml/test_schema.rb +0 -142
- data/test/xml/test_syntax_error.rb +0 -36
- data/test/xml/test_text.rb +0 -60
- data/test/xml/test_unparented_node.rb +0 -483
- data/test/xml/test_xinclude.rb +0 -83
- data/test/xml/test_xpath.rb +0 -470
- data/test/xslt/test_custom_functions.rb +0 -133
- data/test/xslt/test_exception_handling.rb +0 -37
data/test/files/valid_bar.xml
DELETED
data/test/files/xinclude.xml
DELETED
data/test/helper.rb
DELETED
@@ -1,271 +0,0 @@
|
|
1
|
-
#Process.setrlimit(Process::RLIMIT_CORE, Process::RLIM_INFINITY) unless RUBY_PLATFORM =~ /(java|mswin|mingw)/i
|
2
|
-
$VERBOSE = true
|
3
|
-
require 'minitest/autorun'
|
4
|
-
require 'minitest/pride'
|
5
|
-
require 'fileutils'
|
6
|
-
require 'tempfile'
|
7
|
-
require 'pp'
|
8
|
-
|
9
|
-
require 'nokogiri'
|
10
|
-
|
11
|
-
if ENV['TEST_NOKOGIRI_WITH_LIBXML_RUBY']
|
12
|
-
#
|
13
|
-
# if you'd like to test with the libxml-ruby gem loaded, it's
|
14
|
-
# recommended that you set
|
15
|
-
#
|
16
|
-
# BUNDLE_GEMFILE=Gemfile-libxml-ruby
|
17
|
-
#
|
18
|
-
# which will a) bundle that gem, and b) set the appropriate env var to
|
19
|
-
# trigger this block
|
20
|
-
#
|
21
|
-
require 'libxml'
|
22
|
-
warn "#{__FILE__}:#{__LINE__}: loaded libxml-ruby '#{LibXML::XML::VERSION}'"
|
23
|
-
end
|
24
|
-
|
25
|
-
warn "#{__FILE__}:#{__LINE__}: version info: #{Nokogiri::VERSION_INFO.inspect}"
|
26
|
-
|
27
|
-
module Nokogiri
|
28
|
-
class TestCase < MiniTest::Spec
|
29
|
-
ASSETS_DIR = File.expand_path File.join(File.dirname(__FILE__), 'files')
|
30
|
-
ADDRESS_SCHEMA_FILE = File.join(ASSETS_DIR, 'address_book.rlx')
|
31
|
-
ADDRESS_XML_FILE = File.join(ASSETS_DIR, 'address_book.xml')
|
32
|
-
ENCODING_HTML_FILE = File.join(ASSETS_DIR, 'encoding.html')
|
33
|
-
ENCODING_XHTML_FILE = File.join(ASSETS_DIR, 'encoding.xhtml')
|
34
|
-
EXML_FILE = File.join(ASSETS_DIR, 'exslt.xml')
|
35
|
-
EXSLT_FILE = File.join(ASSETS_DIR, 'exslt.xslt')
|
36
|
-
HTML_FILE = File.join(ASSETS_DIR, 'tlm.html')
|
37
|
-
METACHARSET_FILE = File.join(ASSETS_DIR, 'metacharset.html')
|
38
|
-
NICH_FILE = File.join(ASSETS_DIR, '2ch.html')
|
39
|
-
NOENCODING_FILE = File.join(ASSETS_DIR, 'noencoding.html')
|
40
|
-
PO_SCHEMA_FILE = File.join(ASSETS_DIR, 'po.xsd')
|
41
|
-
PO_XML_FILE = File.join(ASSETS_DIR, 'po.xml')
|
42
|
-
SHIFT_JIS_HTML = File.join(ASSETS_DIR, 'shift_jis.html')
|
43
|
-
SHIFT_JIS_NO_CHARSET= File.join(ASSETS_DIR, 'shift_jis_no_charset.html')
|
44
|
-
SHIFT_JIS_XML = File.join(ASSETS_DIR, 'shift_jis.xml')
|
45
|
-
SNUGGLES_FILE = File.join(ASSETS_DIR, 'snuggles.xml')
|
46
|
-
XML_FILE = File.join(ASSETS_DIR, 'staff.xml')
|
47
|
-
XML_XINCLUDE_FILE = File.join(ASSETS_DIR, 'xinclude.xml')
|
48
|
-
XML_ATOM_FILE = File.join(ASSETS_DIR, 'atom.xml')
|
49
|
-
XSLT_FILE = File.join(ASSETS_DIR, 'staff.xslt')
|
50
|
-
XPATH_FILE = File.join(ASSETS_DIR, 'slow-xpath.xml')
|
51
|
-
|
52
|
-
def teardown
|
53
|
-
if ENV['NOKOGIRI_GC']
|
54
|
-
STDOUT.putc '!'
|
55
|
-
if RUBY_PLATFORM =~ /java/
|
56
|
-
require 'java'
|
57
|
-
java.lang.System.gc
|
58
|
-
else
|
59
|
-
GC.start
|
60
|
-
end
|
61
|
-
end
|
62
|
-
end
|
63
|
-
|
64
|
-
def stress_memory_while &block
|
65
|
-
# force the test to explicitly declare a skip
|
66
|
-
raise "JRuby doesn't do GC" if Nokogiri.jruby?
|
67
|
-
|
68
|
-
old_stress = GC.stress
|
69
|
-
begin
|
70
|
-
GC.stress = true
|
71
|
-
yield
|
72
|
-
ensure
|
73
|
-
GC.stress = old_stress
|
74
|
-
end
|
75
|
-
end
|
76
|
-
|
77
|
-
def assert_indent amount, doc, message = nil
|
78
|
-
nodes = []
|
79
|
-
doc.traverse do |node|
|
80
|
-
nodes << node if node.text? && node.blank?
|
81
|
-
end
|
82
|
-
assert nodes.length > 0
|
83
|
-
nodes.each do |node|
|
84
|
-
len = node.content.gsub(/[\r\n]/, '').length
|
85
|
-
assert_equal(0, len % amount, message)
|
86
|
-
end
|
87
|
-
end
|
88
|
-
|
89
|
-
def util_decorate(document, decorator_module)
|
90
|
-
document.decorators(XML::Node) << decorator_module
|
91
|
-
document.decorators(XML::NodeSet) << decorator_module
|
92
|
-
document.decorate!
|
93
|
-
end
|
94
|
-
|
95
|
-
#
|
96
|
-
# Test::Unit backwards compatibility section
|
97
|
-
#
|
98
|
-
alias :assert_no_match :refute_match
|
99
|
-
alias :assert_not_nil :refute_nil
|
100
|
-
alias :assert_raise :assert_raises
|
101
|
-
alias :assert_not_equal :refute_equal
|
102
|
-
|
103
|
-
def assert_not_send send_ary, m = nil
|
104
|
-
recv, msg, *args = send_ary
|
105
|
-
m = message(m) {
|
106
|
-
"Expected #{mu_pp(recv)}.#{msg}(*#{mu_pp(args)}) to return false" }
|
107
|
-
assert !recv.__send__(msg, *args), m
|
108
|
-
end unless method_defined?(:assert_not_send)
|
109
|
-
end
|
110
|
-
|
111
|
-
module SAX
|
112
|
-
class TestCase < Nokogiri::TestCase
|
113
|
-
class Doc < XML::SAX::Document
|
114
|
-
attr_reader :start_elements, :start_document_called
|
115
|
-
attr_reader :end_elements, :end_document_called
|
116
|
-
attr_reader :data, :comments, :cdata_blocks, :start_elements_namespace
|
117
|
-
attr_reader :errors, :warnings, :end_elements_namespace
|
118
|
-
attr_reader :xmldecls
|
119
|
-
attr_reader :processing_instructions
|
120
|
-
|
121
|
-
def xmldecl version, encoding, standalone
|
122
|
-
@xmldecls = [version, encoding, standalone].compact
|
123
|
-
super
|
124
|
-
end
|
125
|
-
|
126
|
-
def start_document
|
127
|
-
@start_document_called = true
|
128
|
-
super
|
129
|
-
end
|
130
|
-
|
131
|
-
def end_document
|
132
|
-
@end_document_called = true
|
133
|
-
super
|
134
|
-
end
|
135
|
-
|
136
|
-
def error error
|
137
|
-
(@errors ||= []) << error
|
138
|
-
super
|
139
|
-
end
|
140
|
-
|
141
|
-
def warning warning
|
142
|
-
(@warning ||= []) << warning
|
143
|
-
super
|
144
|
-
end
|
145
|
-
|
146
|
-
def start_element *args
|
147
|
-
(@start_elements ||= []) << args
|
148
|
-
super
|
149
|
-
end
|
150
|
-
|
151
|
-
def start_element_namespace *args
|
152
|
-
(@start_elements_namespace ||= []) << args
|
153
|
-
super
|
154
|
-
end
|
155
|
-
|
156
|
-
def end_element *args
|
157
|
-
(@end_elements ||= []) << args
|
158
|
-
super
|
159
|
-
end
|
160
|
-
|
161
|
-
def end_element_namespace *args
|
162
|
-
(@end_elements_namespace ||= []) << args
|
163
|
-
super
|
164
|
-
end
|
165
|
-
|
166
|
-
def characters string
|
167
|
-
@data ||= []
|
168
|
-
@data += [string]
|
169
|
-
super
|
170
|
-
end
|
171
|
-
|
172
|
-
def comment string
|
173
|
-
@comments ||= []
|
174
|
-
@comments += [string]
|
175
|
-
super
|
176
|
-
end
|
177
|
-
|
178
|
-
def cdata_block string
|
179
|
-
@cdata_blocks ||= []
|
180
|
-
@cdata_blocks += [string]
|
181
|
-
super
|
182
|
-
end
|
183
|
-
|
184
|
-
def processing_instruction name, content
|
185
|
-
@processing_instructions ||= []
|
186
|
-
@processing_instructions << [name, content]
|
187
|
-
end
|
188
|
-
end
|
189
|
-
|
190
|
-
# This document will help us to test the strict order of items.
|
191
|
-
|
192
|
-
class DocWithOrderedItems < XML::SAX::Document
|
193
|
-
attr_reader :items
|
194
|
-
|
195
|
-
def initialize
|
196
|
-
# [
|
197
|
-
# [ :method_1, argument_1, ... ],
|
198
|
-
# [ :method_2, argument_2, ... ],
|
199
|
-
# ...
|
200
|
-
# ]
|
201
|
-
@items = Items.new
|
202
|
-
end
|
203
|
-
|
204
|
-
[
|
205
|
-
:xmldecl,
|
206
|
-
:start_document, :end_document,
|
207
|
-
:start_element, :end_element,
|
208
|
-
:start_element_namespace, :end_element_namespace,
|
209
|
-
:characters, :comment, :cdata_block,
|
210
|
-
:processing_instruction,
|
211
|
-
:error, :warning
|
212
|
-
]
|
213
|
-
.each do |name|
|
214
|
-
define_method name do |*arguments|
|
215
|
-
@items << [name, *arguments]
|
216
|
-
super *arguments
|
217
|
-
end
|
218
|
-
end
|
219
|
-
|
220
|
-
class Items < Array
|
221
|
-
def get_root_content root_name
|
222
|
-
items = clone
|
223
|
-
is_inside_root = false
|
224
|
-
|
225
|
-
items.select! do |item|
|
226
|
-
method_name = item[0]
|
227
|
-
element_name = item[1]
|
228
|
-
|
229
|
-
case method_name
|
230
|
-
when :start_element, :start_element_namespace
|
231
|
-
if element_name == root_name
|
232
|
-
is_inside_root = true
|
233
|
-
next false
|
234
|
-
end
|
235
|
-
|
236
|
-
when :end_element, :end_element_namespace
|
237
|
-
is_inside_root = false if element_name == root_name and is_inside_root
|
238
|
-
end
|
239
|
-
|
240
|
-
is_inside_root
|
241
|
-
end
|
242
|
-
|
243
|
-
items
|
244
|
-
end
|
245
|
-
|
246
|
-
def select_methods(names)
|
247
|
-
items = clone
|
248
|
-
|
249
|
-
items.select! do |item|
|
250
|
-
name = item[0]
|
251
|
-
names.include? name
|
252
|
-
end
|
253
|
-
|
254
|
-
items
|
255
|
-
end
|
256
|
-
|
257
|
-
def strip_text! method_names
|
258
|
-
each do |item|
|
259
|
-
method_name = item[0]
|
260
|
-
text = item[1]
|
261
|
-
|
262
|
-
text.strip! if method_names.include? method_name
|
263
|
-
end
|
264
|
-
|
265
|
-
nil
|
266
|
-
end
|
267
|
-
end
|
268
|
-
end
|
269
|
-
end
|
270
|
-
end
|
271
|
-
end
|
@@ -1,168 +0,0 @@
|
|
1
|
-
# -*- coding: utf-8 -*-
|
2
|
-
require "helper"
|
3
|
-
|
4
|
-
module Nokogiri
|
5
|
-
module HTML
|
6
|
-
module SAX
|
7
|
-
class TestParser < Nokogiri::SAX::TestCase
|
8
|
-
def setup
|
9
|
-
super
|
10
|
-
@parser = HTML::SAX::Parser.new(Doc.new)
|
11
|
-
end
|
12
|
-
|
13
|
-
def test_parse_empty_document
|
14
|
-
# This caused a segfault in libxml 2.6.x
|
15
|
-
assert_nil @parser.parse ''
|
16
|
-
end
|
17
|
-
|
18
|
-
def test_parse_empty_file
|
19
|
-
# Make sure empty files don't break stuff
|
20
|
-
empty_file_name = File.join(ASSETS_DIR, 'bogus.xml')
|
21
|
-
# assert_nothing_raised do
|
22
|
-
@parser.parse_file empty_file_name
|
23
|
-
# end
|
24
|
-
end
|
25
|
-
|
26
|
-
def test_parse_file
|
27
|
-
@parser.parse_file(HTML_FILE)
|
28
|
-
|
29
|
-
# Take a look at the comment in test_parse_document to know
|
30
|
-
# a possible reason to this difference.
|
31
|
-
if Nokogiri.uses_libxml?
|
32
|
-
assert_equal 1111, @parser.document.end_elements.length
|
33
|
-
else
|
34
|
-
assert_equal 1120, @parser.document.end_elements.length
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
def test_parse_file_nil_argument
|
39
|
-
assert_raises(ArgumentError) {
|
40
|
-
@parser.parse_file(nil)
|
41
|
-
}
|
42
|
-
end
|
43
|
-
|
44
|
-
def test_parse_file_non_existant
|
45
|
-
assert_raise Errno::ENOENT do
|
46
|
-
@parser.parse_file('there_is_no_reasonable_way_this_file_exists')
|
47
|
-
end
|
48
|
-
end
|
49
|
-
|
50
|
-
def test_parse_file_with_dir
|
51
|
-
assert_raise Errno::EISDIR do
|
52
|
-
@parser.parse_file(File.dirname(__FILE__))
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
def test_parse_memory_nil
|
57
|
-
assert_raise ArgumentError do
|
58
|
-
@parser.parse_memory(nil)
|
59
|
-
end
|
60
|
-
end
|
61
|
-
|
62
|
-
def test_parse_force_encoding
|
63
|
-
@parser.parse_memory(<<-HTML, 'UTF-8')
|
64
|
-
<meta http-equiv="Content-Type" content="text/html; charset=windows-1251">
|
65
|
-
Информация
|
66
|
-
HTML
|
67
|
-
assert_equal("Информация",
|
68
|
-
@parser.document.data.join.strip)
|
69
|
-
end
|
70
|
-
|
71
|
-
def test_parse_document
|
72
|
-
@parser.parse_memory(<<-eoxml)
|
73
|
-
<p>Paragraph 1</p>
|
74
|
-
<p>Paragraph 2</p>
|
75
|
-
eoxml
|
76
|
-
|
77
|
-
# JRuby version is different because of the internal implementation
|
78
|
-
# JRuby version uses NekoHTML which inserts empty "head" elements.
|
79
|
-
#
|
80
|
-
# Currently following features are set:
|
81
|
-
# "http://cyberneko.org/html/properties/names/elems" => "lower"
|
82
|
-
# "http://cyberneko.org/html/properties/names/attrs" => "lower"
|
83
|
-
if Nokogiri.uses_libxml?
|
84
|
-
assert_equal([["html", []], ["body", []], ["p", []], ["p", []]],
|
85
|
-
@parser.document.start_elements)
|
86
|
-
else
|
87
|
-
assert_equal([["html", []], ["head", []], ["body", []], ["p", []], ["p", []]],
|
88
|
-
@parser.document.start_elements)
|
89
|
-
end
|
90
|
-
end
|
91
|
-
|
92
|
-
def test_parser_attributes
|
93
|
-
html = <<-eohtml
|
94
|
-
<html>
|
95
|
-
<head>
|
96
|
-
<title>hello</title>
|
97
|
-
</head>
|
98
|
-
<body>
|
99
|
-
<img src="face.jpg" title="daddy & me">
|
100
|
-
<hr noshade size="2">
|
101
|
-
</body>
|
102
|
-
</html>
|
103
|
-
eohtml
|
104
|
-
|
105
|
-
block_called = false
|
106
|
-
@parser.parse(html) { |ctx|
|
107
|
-
block_called = true
|
108
|
-
ctx.replace_entities = true
|
109
|
-
}
|
110
|
-
|
111
|
-
assert block_called
|
112
|
-
|
113
|
-
noshade_value = if Nokogiri.uses_libxml? && Nokogiri::VERSION_INFO['libxml']['loaded'] < '2.7.7'
|
114
|
-
['noshade', 'noshade']
|
115
|
-
else
|
116
|
-
['noshade', nil]
|
117
|
-
end
|
118
|
-
|
119
|
-
assert_equal [
|
120
|
-
['html', []],
|
121
|
-
['head', []],
|
122
|
-
['title', []],
|
123
|
-
['body', []],
|
124
|
-
['img', [
|
125
|
-
['src', 'face.jpg'],
|
126
|
-
['title', 'daddy & me']
|
127
|
-
]],
|
128
|
-
['hr', [
|
129
|
-
noshade_value,
|
130
|
-
['size', '2']
|
131
|
-
]]
|
132
|
-
], @parser.document.start_elements
|
133
|
-
end
|
134
|
-
|
135
|
-
HTML_WITH_BR_TAG = <<-EOF
|
136
|
-
<html>
|
137
|
-
<head></head>
|
138
|
-
<body>
|
139
|
-
<div>
|
140
|
-
hello
|
141
|
-
<br>
|
142
|
-
</div>
|
143
|
-
|
144
|
-
<div>
|
145
|
-
hello again
|
146
|
-
</div>
|
147
|
-
</body>
|
148
|
-
</html>
|
149
|
-
EOF
|
150
|
-
|
151
|
-
def test_parsing_dom_error_from_string
|
152
|
-
@parser.parse(HTML_WITH_BR_TAG)
|
153
|
-
assert_equal 6, @parser.document.start_elements.length
|
154
|
-
end
|
155
|
-
|
156
|
-
def test_parsing_dom_error_from_io
|
157
|
-
@parser.parse(StringIO.new(HTML_WITH_BR_TAG))
|
158
|
-
assert_equal 6, @parser.document.start_elements.length
|
159
|
-
end
|
160
|
-
|
161
|
-
|
162
|
-
def test_empty_processing_instruction
|
163
|
-
@parser.parse_memory("<strong>this will segfault<?strong>")
|
164
|
-
end
|
165
|
-
end
|
166
|
-
end
|
167
|
-
end
|
168
|
-
end
|
@@ -1,46 +0,0 @@
|
|
1
|
-
# -*- coding: utf-8 -*-
|
2
|
-
|
3
|
-
require "helper"
|
4
|
-
|
5
|
-
module Nokogiri
|
6
|
-
module HTML
|
7
|
-
module SAX
|
8
|
-
class TestParserContext < Nokogiri::SAX::TestCase
|
9
|
-
def test_from_io
|
10
|
-
ctx = ParserContext.new StringIO.new('fo'), 'UTF-8'
|
11
|
-
assert ctx
|
12
|
-
end
|
13
|
-
|
14
|
-
def test_from_string
|
15
|
-
ctx = ParserContext.new 'blah blah'
|
16
|
-
assert ctx
|
17
|
-
end
|
18
|
-
|
19
|
-
def test_parse_with
|
20
|
-
ctx = ParserContext.new 'blah'
|
21
|
-
assert_raises ArgumentError do
|
22
|
-
ctx.parse_with nil
|
23
|
-
end
|
24
|
-
end
|
25
|
-
|
26
|
-
def test_parse_with_sax_parser
|
27
|
-
# assert_nothing_raised do
|
28
|
-
xml = "<root />"
|
29
|
-
ctx = ParserContext.new xml
|
30
|
-
parser = Parser.new Doc.new
|
31
|
-
ctx.parse_with parser
|
32
|
-
# end
|
33
|
-
end
|
34
|
-
|
35
|
-
def test_from_file
|
36
|
-
# assert_nothing_raised do
|
37
|
-
ctx = ParserContext.file HTML_FILE, 'UTF-8'
|
38
|
-
parser = Parser.new Doc.new
|
39
|
-
ctx.parse_with parser
|
40
|
-
# end
|
41
|
-
end
|
42
|
-
end
|
43
|
-
end
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
@@ -1,163 +0,0 @@
|
|
1
|
-
# -*- coding: utf-8 -*-
|
2
|
-
require "helper"
|
3
|
-
|
4
|
-
module Nokogiri
|
5
|
-
module HTML
|
6
|
-
module SAX
|
7
|
-
class TestParserText < Nokogiri::SAX::TestCase
|
8
|
-
def setup
|
9
|
-
super
|
10
|
-
@doc = DocWithOrderedItems.new
|
11
|
-
@parser = HTML::SAX::Parser.new @doc
|
12
|
-
end
|
13
|
-
|
14
|
-
def test_texts_order
|
15
|
-
html = <<-eohtml
|
16
|
-
<!DOCTYPE html>
|
17
|
-
<html>
|
18
|
-
<head></head>
|
19
|
-
<body>
|
20
|
-
text 0
|
21
|
-
<p>
|
22
|
-
text 1
|
23
|
-
<span>text 2</span>
|
24
|
-
text 3
|
25
|
-
</p>
|
26
|
-
|
27
|
-
text 4
|
28
|
-
<!--
|
29
|
-
text 5
|
30
|
-
-->
|
31
|
-
|
32
|
-
<p>
|
33
|
-
<!-- text 6 -->
|
34
|
-
<span><!-- text 7 --></span>
|
35
|
-
<!-- text 8 -->
|
36
|
-
</p>
|
37
|
-
|
38
|
-
<!-- text 9 -->
|
39
|
-
</body>
|
40
|
-
</html>
|
41
|
-
eohtml
|
42
|
-
|
43
|
-
@parser.parse html
|
44
|
-
items = @doc.items.get_root_content "body"
|
45
|
-
items = items.select_methods [
|
46
|
-
:start_element, :end_element,
|
47
|
-
:characters, :comment
|
48
|
-
]
|
49
|
-
items.strip_text! [:characters, :comment]
|
50
|
-
|
51
|
-
assert_equal [
|
52
|
-
[:characters, 'text 0'],
|
53
|
-
|
54
|
-
[:start_element, 'p', []],
|
55
|
-
[:characters, 'text 1'],
|
56
|
-
|
57
|
-
[:start_element, 'span', []],
|
58
|
-
[:characters, 'text 2'],
|
59
|
-
[:end_element, 'span'],
|
60
|
-
|
61
|
-
[:characters, 'text 3'],
|
62
|
-
[:end_element, 'p'],
|
63
|
-
|
64
|
-
[:characters, 'text 4'],
|
65
|
-
[:comment, 'text 5'],
|
66
|
-
[:characters, ''],
|
67
|
-
|
68
|
-
[:start_element, 'p', []],
|
69
|
-
[:characters, ''],
|
70
|
-
[:comment, 'text 6'],
|
71
|
-
[:characters, ''],
|
72
|
-
|
73
|
-
[:start_element, 'span', []],
|
74
|
-
[:comment, 'text 7'],
|
75
|
-
[:end_element, 'span'],
|
76
|
-
[:characters, ''],
|
77
|
-
|
78
|
-
[:comment, 'text 8'],
|
79
|
-
[:characters, ''],
|
80
|
-
[:end_element, 'p'],
|
81
|
-
[:characters, ''],
|
82
|
-
|
83
|
-
[:comment, 'text 9'],
|
84
|
-
[:characters, '']
|
85
|
-
], items
|
86
|
-
|
87
|
-
nil
|
88
|
-
end
|
89
|
-
|
90
|
-
def text_whitespace
|
91
|
-
html = <<-eohtml
|
92
|
-
<!DOCTYPE html>
|
93
|
-
<html>
|
94
|
-
<head></head>
|
95
|
-
<body>
|
96
|
-
<p>
|
97
|
-
<span></span>
|
98
|
-
<span> </span>
|
99
|
-
<span>
|
100
|
-
|
101
|
-
</span>
|
102
|
-
</p>
|
103
|
-
<p>
|
104
|
-
<!---->
|
105
|
-
<!-- -->
|
106
|
-
<!--
|
107
|
-
|
108
|
-
-->
|
109
|
-
</p>
|
110
|
-
</body>
|
111
|
-
</html>
|
112
|
-
eohtml
|
113
|
-
|
114
|
-
@parser.parse html
|
115
|
-
items = @doc.items.get_root_content "body"
|
116
|
-
items = items.select_methods [
|
117
|
-
:start_element, :end_element,
|
118
|
-
:characters, :comment
|
119
|
-
]
|
120
|
-
items.strip_text! [:characters, :comment]
|
121
|
-
|
122
|
-
assert_equal [
|
123
|
-
[:characters, ''],
|
124
|
-
[:start_element, 'p', []],
|
125
|
-
|
126
|
-
[:characters, ''],
|
127
|
-
[:start_element, 'span', []],
|
128
|
-
[:end_element, 'span'],
|
129
|
-
[:characters, ''],
|
130
|
-
|
131
|
-
[:start_element, 'span', []],
|
132
|
-
[:characters, ''],
|
133
|
-
[:end_element, 'span'],
|
134
|
-
[:characters, ''],
|
135
|
-
|
136
|
-
[:start_element, 'span', []],
|
137
|
-
[:characters, ''],
|
138
|
-
[:end_element, 'span'],
|
139
|
-
[:characters, ''],
|
140
|
-
|
141
|
-
[:end_element, 'p'],
|
142
|
-
[:characters, ''],
|
143
|
-
|
144
|
-
[:start_element, 'p', []],
|
145
|
-
[:characters, ''],
|
146
|
-
|
147
|
-
[:comment, ''],
|
148
|
-
[:characters, ''],
|
149
|
-
[:comment, ''],
|
150
|
-
[:characters, ''],
|
151
|
-
[:comment, ''],
|
152
|
-
[:characters, ''],
|
153
|
-
|
154
|
-
[:end_element, 'p'],
|
155
|
-
[:characters, '']
|
156
|
-
], items
|
157
|
-
|
158
|
-
nil
|
159
|
-
end
|
160
|
-
end
|
161
|
-
end
|
162
|
-
end
|
163
|
-
end
|