nokogiri 1.13.10-x64-mingw-ucrt → 1.14.0-x64-mingw-ucrt
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +33 -0
- data/LICENSE-DEPENDENCIES.md +830 -509
- data/LICENSE.md +1 -1
- data/README.md +18 -11
- data/dependencies.yml +25 -7
- data/ext/nokogiri/extconf.rb +80 -21
- data/ext/nokogiri/gumbo.c +19 -9
- data/ext/nokogiri/html4_document.c +1 -1
- data/ext/nokogiri/html4_entity_lookup.c +1 -1
- data/ext/nokogiri/html4_sax_parser_context.c +0 -5
- data/ext/nokogiri/include/libxslt/xsltconfig.h +1 -1
- data/ext/nokogiri/nokogiri.c +33 -51
- data/ext/nokogiri/nokogiri.h +17 -14
- data/ext/nokogiri/xml_attribute_decl.c +1 -1
- data/ext/nokogiri/xml_cdata.c +1 -1
- data/ext/nokogiri/xml_document.c +16 -11
- data/ext/nokogiri/xml_element_content.c +2 -2
- data/ext/nokogiri/xml_element_decl.c +1 -1
- data/ext/nokogiri/xml_encoding_handler.c +2 -2
- data/ext/nokogiri/xml_namespace.c +38 -8
- data/ext/nokogiri/xml_node.c +286 -26
- data/ext/nokogiri/xml_node_set.c +0 -2
- data/ext/nokogiri/xml_reader.c +40 -20
- data/ext/nokogiri/xml_relax_ng.c +0 -2
- data/ext/nokogiri/xml_sax_parser.c +22 -16
- data/ext/nokogiri/xml_sax_parser_context.c +0 -5
- data/ext/nokogiri/xml_sax_push_parser.c +0 -2
- data/ext/nokogiri/xml_schema.c +0 -2
- data/ext/nokogiri/xml_xpath_context.c +87 -83
- data/ext/nokogiri/xslt_stylesheet.c +14 -13
- data/gumbo-parser/Makefile +10 -0
- data/lib/nokogiri/3.1/nokogiri.so +0 -0
- data/lib/nokogiri/3.2/nokogiri.so +0 -0
- data/lib/nokogiri/css/node.rb +2 -2
- data/lib/nokogiri/css/xpath_visitor.rb +5 -3
- data/lib/nokogiri/css.rb +6 -0
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +3 -2
- data/lib/nokogiri/html4/document.rb +2 -121
- data/lib/nokogiri/html4/element_description_defaults.rb +6 -12
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/html4.rb +1 -0
- data/lib/nokogiri/html5/document.rb +113 -36
- data/lib/nokogiri/html5/document_fragment.rb +9 -2
- data/lib/nokogiri/html5/node.rb +3 -5
- data/lib/nokogiri/html5.rb +127 -216
- data/lib/nokogiri/jruby/dependencies.rb +1 -19
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +11 -10
- data/lib/nokogiri/xml/attr.rb +49 -0
- data/lib/nokogiri/xml/builder.rb +1 -1
- data/lib/nokogiri/xml/document.rb +102 -54
- data/lib/nokogiri/xml/document_fragment.rb +49 -6
- data/lib/nokogiri/xml/namespace.rb +42 -0
- data/lib/nokogiri/xml/node/save_options.rb +6 -4
- data/lib/nokogiri/xml/node.rb +190 -35
- data/lib/nokogiri/xml/node_set.rb +87 -9
- data/lib/nokogiri/xml/parse_options.rb +129 -50
- data/lib/nokogiri/xml/pp/node.rb +6 -4
- data/lib/nokogiri/xml/processing_instruction.rb +2 -1
- data/lib/nokogiri/xml/sax/parser.rb +2 -3
- data/lib/nokogiri/xslt.rb +1 -1
- data/lib/nokogiri.rb +3 -11
- data/lib/xsd/xmlparser/nokogiri.rb +3 -1
- metadata +12 -246
@@ -28,6 +28,13 @@ module Nokogiri
|
|
28
28
|
attr_accessor :document
|
29
29
|
attr_accessor :errors
|
30
30
|
|
31
|
+
# Get the parser's quirks mode value. See HTML5::QuirksMode.
|
32
|
+
#
|
33
|
+
# This method returns `nil` if the parser was not invoked (e.g., `Nokogiri::HTML5::DocumentFragment.new(doc)`).
|
34
|
+
#
|
35
|
+
# Since v1.14.0
|
36
|
+
attr_reader :quirks_mode
|
37
|
+
|
31
38
|
# Create a document fragment.
|
32
39
|
def initialize(doc, tags = nil, ctx = nil, options = {})
|
33
40
|
self.document = doc
|
@@ -41,10 +48,10 @@ module Nokogiri
|
|
41
48
|
Nokogiri::Gumbo.fragment(self, tags, ctx, max_attributes, max_errors, max_depth)
|
42
49
|
end
|
43
50
|
|
44
|
-
def serialize(options = {}, &block)
|
51
|
+
def serialize(options = {}, &block) # :nodoc:
|
45
52
|
# Bypass XML::Document.serialize which doesn't support options even
|
46
53
|
# though XML::Node.serialize does!
|
47
|
-
XML::Node.instance_method(:serialize).
|
54
|
+
XML::Node.instance_method(:serialize).bind_call(self, options, &block)
|
48
55
|
end
|
49
56
|
|
50
57
|
# Parse a document fragment from +tags+, returning a Nodeset.
|
data/lib/nokogiri/html5/node.rb
CHANGED
@@ -28,7 +28,7 @@ module Nokogiri
|
|
28
28
|
def inner_html(options = {})
|
29
29
|
return super(options) unless document.is_a?(HTML5::Document)
|
30
30
|
|
31
|
-
result = options[:preserve_newline] &&
|
31
|
+
result = options[:preserve_newline] && prepend_newline? ? +"\n" : +""
|
32
32
|
result << children.map { |child| child.to_html(options) }.join
|
33
33
|
result
|
34
34
|
end
|
@@ -56,11 +56,9 @@ module Nokogiri
|
|
56
56
|
native_write_to(io, encoding, indent_string, config_options)
|
57
57
|
else
|
58
58
|
# Serialize including the current node.
|
59
|
+
html = html_standard_serialize(options[:preserve_newline] || false)
|
59
60
|
encoding ||= document.encoding || Encoding::UTF_8
|
60
|
-
|
61
|
-
preserve_newline: options[:preserve_newline] || false,
|
62
|
-
}
|
63
|
-
HTML5.serialize_node_internal(self, io, encoding, internal_ops)
|
61
|
+
io << html.encode(encoding, fallback: lambda { |c| "&#x#{c.ord.to_s(16)};" })
|
64
62
|
end
|
65
63
|
end
|
66
64
|
|
data/lib/nokogiri/html5.rb
CHANGED
@@ -227,250 +227,161 @@ module Nokogiri
|
|
227
227
|
#
|
228
228
|
# Since v1.12.0
|
229
229
|
module HTML5
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace"
|
236
|
-
XMLNS_NAMESPACE = "http://www.w3.org/2000/xmlns/"
|
230
|
+
class << self
|
231
|
+
# Parse an HTML 5 document. Convenience method for {Nokogiri::HTML5::Document.parse}
|
232
|
+
def parse(string, url = nil, encoding = nil, **options, &block)
|
233
|
+
Document.parse(string, url, encoding, **options, &block)
|
234
|
+
end
|
237
235
|
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
236
|
+
# Parse a fragment from +string+. Convenience method for
|
237
|
+
# {Nokogiri::HTML5::DocumentFragment.parse}.
|
238
|
+
def fragment(string, encoding = nil, **options)
|
239
|
+
DocumentFragment.parse(string, encoding, options)
|
240
|
+
end
|
242
241
|
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
242
|
+
# Fetch and parse a HTML document from the web, following redirects,
|
243
|
+
# handling https, and determining the character encoding using HTML5
|
244
|
+
# rules. +uri+ may be a +String+ or a +URI+. +options+ contains
|
245
|
+
# http headers and special options. Everything which is not a
|
246
|
+
# special option is considered a header. Special options include:
|
247
|
+
# * :follow_limit => number of redirects which are followed
|
248
|
+
# * :basic_auth => [username, password]
|
249
|
+
def get(uri, options = {})
|
250
|
+
# TODO: deprecate
|
251
|
+
warn("Nokogiri::HTML5.get is deprecated and will be removed in a future version of Nokogiri.",
|
252
|
+
uplevel: 1, category: :deprecated)
|
253
|
+
get_impl(uri, options)
|
254
|
+
end
|
248
255
|
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
256
|
+
# :nodoc:
|
257
|
+
def read_and_encode(string, encoding)
|
258
|
+
# Read the string with the given encoding.
|
259
|
+
if string.respond_to?(:read)
|
260
|
+
string = if encoding.nil?
|
261
|
+
string.read
|
262
|
+
else
|
263
|
+
string.read(encoding: encoding)
|
264
|
+
end
|
265
|
+
else
|
266
|
+
# Otherwise the string has the given encoding.
|
267
|
+
string = string.to_s
|
268
|
+
if encoding
|
269
|
+
string = string.dup
|
270
|
+
string.force_encoding(encoding)
|
271
|
+
end
|
272
|
+
end
|
262
273
|
|
263
|
-
|
274
|
+
# convert to UTF-8
|
275
|
+
if string.encoding != Encoding::UTF_8
|
276
|
+
string = reencode(string)
|
277
|
+
end
|
278
|
+
string
|
279
|
+
end
|
264
280
|
|
265
|
-
|
266
|
-
headers = options.clone
|
267
|
-
headers = { follow_limit: headers } if Numeric === headers # deprecated
|
268
|
-
limit = headers[:follow_limit] ? headers.delete(:follow_limit).to_i : 10
|
281
|
+
private
|
269
282
|
|
270
|
-
|
271
|
-
|
283
|
+
def get_impl(uri, options = {})
|
284
|
+
headers = options.clone
|
285
|
+
headers = { follow_limit: headers } if Numeric === headers # deprecated
|
286
|
+
limit = headers[:follow_limit] ? headers.delete(:follow_limit).to_i : 10
|
272
287
|
|
273
|
-
|
288
|
+
require "net/http"
|
289
|
+
uri = URI(uri) unless URI === uri
|
274
290
|
|
275
|
-
|
276
|
-
http.use_ssl = true if uri.scheme == "https"
|
291
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
277
292
|
|
278
|
-
|
279
|
-
|
280
|
-
# :close_on_empty_response, :continue_timeout, :key, :open_timeout,
|
281
|
-
# :read_timeout, :ssl_timeout, :ssl_version, :use_ssl,
|
282
|
-
# :verify_callback, :verify_depth, :verify_mode
|
283
|
-
options.each do |key, _value|
|
284
|
-
http.send("#{key}=", headers.delete(key)) if http.respond_to?("#{key}=")
|
285
|
-
end
|
293
|
+
# TLS / SSL support
|
294
|
+
http.use_ssl = true if uri.scheme == "https"
|
286
295
|
|
287
|
-
|
296
|
+
# Pass through Net::HTTP override values, which currently include:
|
297
|
+
# :ca_file, :ca_path, :cert, :cert_store, :ciphers,
|
298
|
+
# :close_on_empty_response, :continue_timeout, :key, :open_timeout,
|
299
|
+
# :read_timeout, :ssl_timeout, :ssl_version, :use_ssl,
|
300
|
+
# :verify_callback, :verify_depth, :verify_mode
|
301
|
+
options.each do |key, _value|
|
302
|
+
http.send("#{key}=", headers.delete(key)) if http.respond_to?("#{key}=")
|
303
|
+
end
|
288
304
|
|
289
|
-
|
290
|
-
auth = headers.delete(:basic_auth)
|
291
|
-
auth ||= [uri.user, uri.password] if uri.user && uri.password
|
292
|
-
request.basic_auth(auth.first, auth.last) if auth
|
305
|
+
request = Net::HTTP::Get.new(uri.request_uri)
|
293
306
|
|
294
|
-
|
295
|
-
|
307
|
+
# basic authentication
|
308
|
+
auth = headers.delete(:basic_auth)
|
309
|
+
auth ||= [uri.user, uri.password] if uri.user && uri.password
|
310
|
+
request.basic_auth(auth.first, auth.last) if auth
|
296
311
|
|
297
|
-
|
312
|
+
# remaining options are treated as headers
|
313
|
+
headers.each { |key, value| request[key.to_s] = value.to_s }
|
298
314
|
|
299
|
-
|
300
|
-
when Net::HTTPSuccess
|
301
|
-
doc = parse(reencode(response.body, response["content-type"]), options)
|
302
|
-
doc.instance_variable_set("@response", response)
|
303
|
-
doc.class.send(:attr_reader, :response)
|
304
|
-
doc
|
305
|
-
when Net::HTTPRedirection
|
306
|
-
response.value if limit <= 1
|
307
|
-
location = URI.join(uri, response["location"])
|
308
|
-
get_impl(location, options.merge(follow_limit: limit - 1))
|
309
|
-
else
|
310
|
-
response.value
|
311
|
-
end
|
312
|
-
end
|
315
|
+
response = http.request(request)
|
313
316
|
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
317
|
+
case response
|
318
|
+
when Net::HTTPSuccess
|
319
|
+
doc = parse(reencode(response.body, response["content-type"]), options)
|
320
|
+
doc.instance_variable_set(:@response, response)
|
321
|
+
doc.class.send(:attr_reader, :response)
|
322
|
+
doc
|
323
|
+
when Net::HTTPRedirection
|
324
|
+
response.value if limit <= 1
|
325
|
+
location = URI.join(uri, response["location"])
|
326
|
+
get_impl(location, options.merge(follow_limit: limit - 1))
|
319
327
|
else
|
320
|
-
|
321
|
-
end
|
322
|
-
else
|
323
|
-
# Otherwise the string has the given encoding.
|
324
|
-
string = string.to_s
|
325
|
-
if encoding
|
326
|
-
string = string.dup
|
327
|
-
string.force_encoding(encoding)
|
328
|
+
response.value
|
328
329
|
end
|
329
330
|
end
|
330
331
|
|
331
|
-
#
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
initial_bytes = body[0..2].bytes
|
356
|
-
if initial_bytes[0..2] == [0xEF, 0xBB, 0xBF]
|
357
|
-
encoding = Encoding::UTF_8
|
358
|
-
elsif initial_bytes[0..1] == [0xFE, 0xFF]
|
359
|
-
encoding = Encoding::UTF_16BE
|
360
|
-
elsif initial_bytes[0..1] == [0xFF, 0xFE]
|
361
|
-
encoding = Encoding::UTF_16LE
|
362
|
-
end
|
363
|
-
|
364
|
-
# look for a charset in a content-encoding header
|
365
|
-
if content_type
|
366
|
-
encoding ||= content_type[/charset=["']?(.*?)($|["';\s])/i, 1]
|
367
|
-
end
|
368
|
-
|
369
|
-
# look for a charset in a meta tag in the first 1024 bytes
|
370
|
-
unless encoding
|
371
|
-
data = body[0..1023].gsub(/<!--.*?(-->|\Z)/m, "")
|
372
|
-
data.scan(/<meta.*?>/m).each do |meta|
|
373
|
-
encoding ||= meta[/charset=["']?([^>]*?)($|["'\s>])/im, 1]
|
332
|
+
# Charset sniffing is a complex and controversial topic that understandably isn't done _by
|
333
|
+
# default_ by the Ruby Net::HTTP library. This being said, it is a very real problem for
|
334
|
+
# consumers of HTML as the default for HTML is iso-8859-1, most "good" producers use utf-8, and
|
335
|
+
# the Gumbo parser *only* supports utf-8.
|
336
|
+
#
|
337
|
+
# Accordingly, Nokogiri::HTML4::Document.parse provides limited encoding detection. Following
|
338
|
+
# this lead, Nokogiri::HTML5 attempts to do likewise, while attempting to more closely follow
|
339
|
+
# the HTML5 standard.
|
340
|
+
#
|
341
|
+
# http://bugs.ruby-lang.org/issues/2567
|
342
|
+
# http://www.w3.org/TR/html5/syntax.html#determining-the-character-encoding
|
343
|
+
#
|
344
|
+
def reencode(body, content_type = nil)
|
345
|
+
if body.encoding == Encoding::ASCII_8BIT
|
346
|
+
encoding = nil
|
347
|
+
|
348
|
+
# look for a Byte Order Mark (BOM)
|
349
|
+
initial_bytes = body[0..2].bytes
|
350
|
+
if initial_bytes[0..2] == [0xEF, 0xBB, 0xBF]
|
351
|
+
encoding = Encoding::UTF_8
|
352
|
+
elsif initial_bytes[0..1] == [0xFE, 0xFF]
|
353
|
+
encoding = Encoding::UTF_16BE
|
354
|
+
elsif initial_bytes[0..1] == [0xFF, 0xFE]
|
355
|
+
encoding = Encoding::UTF_16LE
|
374
356
|
end
|
375
|
-
end
|
376
|
-
|
377
|
-
# if all else fails, default to the official default encoding for HTML
|
378
|
-
encoding ||= Encoding::ISO_8859_1
|
379
|
-
|
380
|
-
# change the encoding to match the detected or inferred encoding
|
381
|
-
body = body.dup
|
382
|
-
begin
|
383
|
-
body.force_encoding(encoding)
|
384
|
-
rescue ArgumentError
|
385
|
-
body.force_encoding(Encoding::ISO_8859_1)
|
386
|
-
end
|
387
|
-
end
|
388
357
|
|
389
|
-
|
390
|
-
|
358
|
+
# look for a charset in a content-encoding header
|
359
|
+
if content_type
|
360
|
+
encoding ||= content_type[/charset=["']?(.*?)($|["';\s])/i, 1]
|
361
|
+
end
|
391
362
|
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
# XXX(sfc): attach namespaces to all nodes, even html?
|
398
|
-
tagname = if ns_uri.nil? || ns_uri == HTML_NAMESPACE || ns_uri == MATHML_NAMESPACE || ns_uri == SVG_NAMESPACE
|
399
|
-
current_node.name
|
400
|
-
else
|
401
|
-
"#{ns.prefix}:#{current_node.name}"
|
402
|
-
end
|
403
|
-
io << "<" << tagname
|
404
|
-
current_node.attribute_nodes.each do |attr|
|
405
|
-
attr_ns = attr.namespace
|
406
|
-
if attr_ns.nil?
|
407
|
-
attr_name = attr.name
|
408
|
-
else
|
409
|
-
ns_uri = attr_ns.href
|
410
|
-
attr_name = if ns_uri == XML_NAMESPACE
|
411
|
-
"xml:" + attr.name.sub(/^[^:]*:/, "")
|
412
|
-
elsif ns_uri == XMLNS_NAMESPACE && attr.name.sub(/^[^:]*:/, "") == "xmlns"
|
413
|
-
"xmlns"
|
414
|
-
elsif ns_uri == XMLNS_NAMESPACE
|
415
|
-
"xmlns:" + attr.name.sub(/^[^:]*:/, "")
|
416
|
-
elsif ns_uri == XLINK_NAMESPACE
|
417
|
-
"xlink:" + attr.name.sub(/^[^:]*:/, "")
|
418
|
-
else
|
419
|
-
"#{attr_ns.prefix}:#{attr.name}"
|
363
|
+
# look for a charset in a meta tag in the first 1024 bytes
|
364
|
+
unless encoding
|
365
|
+
data = body[0..1023].gsub(/<!--.*?(-->|\Z)/m, "")
|
366
|
+
data.scan(/<meta.*?>/im).each do |meta|
|
367
|
+
encoding ||= meta[/charset=["']?([^>]*?)($|["'\s>])/im, 1]
|
420
368
|
end
|
421
369
|
end
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
370
|
+
|
371
|
+
# if all else fails, default to the official default encoding for HTML
|
372
|
+
encoding ||= Encoding::ISO_8859_1
|
373
|
+
|
374
|
+
# change the encoding to match the detected or inferred encoding
|
375
|
+
body = body.dup
|
376
|
+
begin
|
377
|
+
body.force_encoding(encoding)
|
378
|
+
rescue ArgumentError
|
379
|
+
body.force_encoding(Encoding::ISO_8859_1)
|
430
380
|
end
|
431
|
-
io << "</" << tagname << ">"
|
432
|
-
end
|
433
|
-
when XML::Node::TEXT_NODE
|
434
|
-
parent = current_node.parent
|
435
|
-
io << if parent.element? && ["style", "script", "xmp", "iframe", "noembed", "noframes", "plaintext", "noscript"].include?(parent.name)
|
436
|
-
current_node.content
|
437
|
-
else
|
438
|
-
escape_text(current_node.content, encoding, false)
|
439
|
-
end
|
440
|
-
when XML::Node::CDATA_SECTION_NODE
|
441
|
-
io << "<![CDATA[" << current_node.content << "]]>"
|
442
|
-
when XML::Node::COMMENT_NODE
|
443
|
-
io << "<!--" << current_node.content << "-->"
|
444
|
-
when XML::Node::PI_NODE
|
445
|
-
io << "<?" << current_node.content << ">"
|
446
|
-
when XML::Node::DOCUMENT_TYPE_NODE, XML::Node::DTD_NODE
|
447
|
-
io << "<!DOCTYPE " << current_node.name << ">"
|
448
|
-
when XML::Node::HTML_DOCUMENT_NODE, XML::Node::DOCUMENT_FRAG_NODE
|
449
|
-
current_node.children.each do |child|
|
450
|
-
serialize_node_internal(child, io, encoding, options)
|
451
381
|
end
|
452
|
-
else
|
453
|
-
raise "Unexpected node '#{current_node.name}' of type #{current_node.type}"
|
454
|
-
end
|
455
|
-
end
|
456
382
|
|
457
|
-
|
458
|
-
text = if attribute_mode
|
459
|
-
text.gsub(/[&\u00a0"]/,
|
460
|
-
"&" => "&", "\u00a0" => " ", '"' => """)
|
461
|
-
else
|
462
|
-
text.gsub(/[&\u00a0<>]/,
|
463
|
-
"&" => "&", "\u00a0" => " ", "<" => "<", ">" => ">")
|
383
|
+
body.encode(Encoding::UTF_8)
|
464
384
|
end
|
465
|
-
# Not part of the standard
|
466
|
-
text.encode(encoding, fallback: lambda { |c| "&\#x#{c.ord.to_s(16)};" })
|
467
|
-
end
|
468
|
-
|
469
|
-
def self.prepend_newline?(node)
|
470
|
-
return false unless ["pre", "textarea", "listing"].include?(node.name) && !node.children.empty?
|
471
|
-
|
472
|
-
first_child = node.children[0]
|
473
|
-
first_child.text? && first_child.content.start_with?("\n")
|
474
385
|
end
|
475
386
|
end
|
476
387
|
end
|
@@ -1,21 +1,3 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
4
|
-
# unless defined?(JRuby::Rack::VERSION) || defined?(AppEngine::ApiProxy)
|
5
|
-
#
|
6
|
-
# However, simply cutting defined?(JRuby::Rack::VERSION) off resulted in
|
7
|
-
# an unable-to-load-nokogiri problem. Thus, now, Nokogiri checks the presense
|
8
|
-
# of appengine-rack.jar in $LOAD_PATH. If Nokogiri is on GAE, Nokogiri
|
9
|
-
# should skip loading xml jars. This is because those are in WEB-INF/lib and
|
10
|
-
# already set in the classpath.
|
11
|
-
unless $LOAD_PATH.to_s.include?("appengine-rack")
|
12
|
-
require "stringio"
|
13
|
-
require "isorelax.jar"
|
14
|
-
require "jing.jar"
|
15
|
-
require "nekohtml.jar"
|
16
|
-
require "nekodtd.jar"
|
17
|
-
require "xercesImpl.jar"
|
18
|
-
require "serializer.jar"
|
19
|
-
require "xalan.jar"
|
20
|
-
require "xml-apis.jar"
|
21
|
-
end
|
3
|
+
require_relative "nokogiri_jars"
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# this is a generated file, to avoid over-writing it just delete this comment
|
2
|
+
begin
|
3
|
+
require 'jar_dependencies'
|
4
|
+
rescue LoadError
|
5
|
+
require 'xalan/xalan/2.7.2/xalan-2.7.2.jar'
|
6
|
+
require 'net/sourceforge/htmlunit/neko-htmlunit/2.63.0/neko-htmlunit-2.63.0.jar'
|
7
|
+
require 'nu/validator/jing/20200702VNU/jing-20200702VNU.jar'
|
8
|
+
require 'xerces/xercesImpl/2.12.2/xercesImpl-2.12.2.jar'
|
9
|
+
require 'org/nokogiri/nekodtd/0.1.11.noko1/nekodtd-0.1.11.noko1.jar'
|
10
|
+
require 'net/sf/saxon/Saxon-HE/9.6.0-4/Saxon-HE-9.6.0-4.jar'
|
11
|
+
require 'xml-apis/xml-apis/1.4.01/xml-apis-1.4.01.jar'
|
12
|
+
require 'xalan/serializer/2.7.2/serializer-2.7.2.jar'
|
13
|
+
require 'isorelax/isorelax/20030108/isorelax-20030108.jar'
|
14
|
+
end
|
15
|
+
|
16
|
+
if defined? Jars
|
17
|
+
require_jar 'xalan', 'xalan', '2.7.2'
|
18
|
+
require_jar 'net.sourceforge.htmlunit', 'neko-htmlunit', '2.63.0'
|
19
|
+
require_jar 'nu.validator', 'jing', '20200702VNU'
|
20
|
+
require_jar 'xerces', 'xercesImpl', '2.12.2'
|
21
|
+
require_jar 'org.nokogiri', 'nekodtd', '0.1.11.noko1'
|
22
|
+
require_jar 'net.sf.saxon', 'Saxon-HE', '9.6.0-4'
|
23
|
+
require_jar 'xml-apis', 'xml-apis', '1.4.01'
|
24
|
+
require_jar 'xalan', 'serializer', '2.7.2'
|
25
|
+
require_jar 'isorelax', 'isorelax', '20030108'
|
26
|
+
end
|
27
|
+
|
28
|
+
module Nokogiri
|
29
|
+
# generated by the :vendor_jars rake task
|
30
|
+
JAR_DEPENDENCIES = {
|
31
|
+
"isorelax:isorelax" => "20030108",
|
32
|
+
"net.sf.saxon:Saxon-HE" => "9.6.0-4",
|
33
|
+
"net.sourceforge.htmlunit:neko-htmlunit" => "2.63.0",
|
34
|
+
"nu.validator:jing" => "20200702VNU",
|
35
|
+
"org.nokogiri:nekodtd" => "0.1.11.noko1",
|
36
|
+
"xalan:serializer" => "2.7.2",
|
37
|
+
"xalan:xalan" => "2.7.2",
|
38
|
+
"xerces:xercesImpl" => "2.12.2",
|
39
|
+
"xml-apis:xml-apis" => "1.4.01",
|
40
|
+
}.freeze
|
41
|
+
XERCES_VERSION = JAR_DEPENDENCIES["xerces:xercesImpl"]
|
42
|
+
NEKO_VERSION = JAR_DEPENDENCIES["net.sourceforge.htmlunit:neko-htmlunit"]
|
43
|
+
end
|
@@ -105,16 +105,16 @@ module Nokogiri
|
|
105
105
|
if libxml2_using_packaged?
|
106
106
|
cppflags << "-I#{File.join(header_directory, "include").shellescape}"
|
107
107
|
cppflags << "-I#{File.join(header_directory, "include/libxml2").shellescape}"
|
108
|
+
end
|
108
109
|
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
end
|
115
|
-
ldflags << "-L#{lib_directory.shellescape}"
|
116
|
-
ldflags << "-l:nokogiri.so"
|
110
|
+
if windows?
|
111
|
+
# on windows, nokogumbo needs to link against nokogiri.so to resolve symbols. see #2167
|
112
|
+
lib_directory = File.expand_path(File.join(File.dirname(__FILE__), "../#{ruby_minor}"))
|
113
|
+
unless File.exist?(lib_directory)
|
114
|
+
lib_directory = File.expand_path(File.join(File.dirname(__FILE__), ".."))
|
117
115
|
end
|
116
|
+
ldflags << "-L#{lib_directory.shellescape}"
|
117
|
+
ldflags << "-l:nokogiri.so"
|
118
118
|
end
|
119
119
|
|
120
120
|
nokogiri["cppflags"] = cppflags
|
@@ -169,8 +169,9 @@ module Nokogiri
|
|
169
169
|
vi["other_libraries"] = Hash[*Nokogiri::OTHER_LIBRARY_VERSIONS.split(/[,:]/)]
|
170
170
|
elsif jruby?
|
171
171
|
vi["other_libraries"] = {}.tap do |ol|
|
172
|
-
|
173
|
-
|
172
|
+
Nokogiri::JAR_DEPENDENCIES.each do |k, v|
|
173
|
+
ol[k] = v
|
174
|
+
end
|
174
175
|
end
|
175
176
|
end
|
176
177
|
end
|
data/lib/nokogiri/xml/attr.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# coding: utf-8
|
1
2
|
# frozen_string_literal: true
|
2
3
|
|
3
4
|
module Nokogiri
|
@@ -7,6 +8,54 @@ module Nokogiri
|
|
7
8
|
alias_method :to_s, :content
|
8
9
|
alias_method :content=, :value=
|
9
10
|
|
11
|
+
#
|
12
|
+
# :call-seq: deconstruct_keys(array_of_names) → Hash
|
13
|
+
#
|
14
|
+
# Returns a hash describing the Attr, to use in pattern matching.
|
15
|
+
#
|
16
|
+
# Valid keys and their values:
|
17
|
+
# - +name+ → (String) The name of the attribute.
|
18
|
+
# - +value+ → (String) The value of the attribute.
|
19
|
+
# - +namespace+ → (Namespace, nil) The Namespace of the attribute, or +nil+ if there is no namespace.
|
20
|
+
#
|
21
|
+
# ⚡ This is an experimental feature, available since v1.14.0
|
22
|
+
#
|
23
|
+
# *Example*
|
24
|
+
#
|
25
|
+
# doc = Nokogiri::XML.parse(<<~XML)
|
26
|
+
# <?xml version="1.0"?>
|
27
|
+
# <root xmlns="http://nokogiri.org/ns/default" xmlns:noko="http://nokogiri.org/ns/noko">
|
28
|
+
# <child1 foo="abc" noko:bar="def"/>
|
29
|
+
# </root>
|
30
|
+
# XML
|
31
|
+
#
|
32
|
+
# attributes = doc.root.elements.first.attribute_nodes
|
33
|
+
# # => [#(Attr:0x35c { name = "foo", value = "abc" }),
|
34
|
+
# # #(Attr:0x370 {
|
35
|
+
# # name = "bar",
|
36
|
+
# # namespace = #(Namespace:0x384 {
|
37
|
+
# # prefix = "noko",
|
38
|
+
# # href = "http://nokogiri.org/ns/noko"
|
39
|
+
# # }),
|
40
|
+
# # value = "def"
|
41
|
+
# # })]
|
42
|
+
#
|
43
|
+
# attributes.first.deconstruct_keys([:name, :value, :namespace])
|
44
|
+
# # => {:name=>"foo", :value=>"abc", :namespace=>nil}
|
45
|
+
#
|
46
|
+
# attributes.last.deconstruct_keys([:name, :value, :namespace])
|
47
|
+
# # => {:name=>"bar",
|
48
|
+
# # :value=>"def",
|
49
|
+
# # :namespace=>
|
50
|
+
# # #(Namespace:0x384 {
|
51
|
+
# # prefix = "noko",
|
52
|
+
# # href = "http://nokogiri.org/ns/noko"
|
53
|
+
# # })}
|
54
|
+
#
|
55
|
+
def deconstruct_keys(keys)
|
56
|
+
{ name: name, value: value, namespace: namespace }
|
57
|
+
end
|
58
|
+
|
10
59
|
private
|
11
60
|
|
12
61
|
def inspect_attributes
|
data/lib/nokogiri/xml/builder.rb
CHANGED
@@ -234,7 +234,7 @@ module Nokogiri
|
|
234
234
|
#
|
235
235
|
# == Document Types
|
236
236
|
#
|
237
|
-
# To create a document type (DTD),
|
237
|
+
# To create a document type (DTD), use the Builder#doc method to get
|
238
238
|
# the current context document. Then call Node#create_internal_subset to
|
239
239
|
# create the DTD node.
|
240
240
|
#
|