nokogiri 1.13.10-x86_64-darwin → 1.14.0-x86_64-darwin

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (68) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +33 -0
  3. data/LICENSE-DEPENDENCIES.md +830 -509
  4. data/LICENSE.md +1 -1
  5. data/README.md +18 -11
  6. data/dependencies.yml +25 -7
  7. data/ext/nokogiri/extconf.rb +80 -21
  8. data/ext/nokogiri/gumbo.c +19 -9
  9. data/ext/nokogiri/html4_document.c +1 -1
  10. data/ext/nokogiri/html4_entity_lookup.c +1 -1
  11. data/ext/nokogiri/html4_sax_parser_context.c +0 -5
  12. data/ext/nokogiri/nokogiri.c +33 -51
  13. data/ext/nokogiri/nokogiri.h +17 -14
  14. data/ext/nokogiri/xml_attribute_decl.c +1 -1
  15. data/ext/nokogiri/xml_cdata.c +1 -1
  16. data/ext/nokogiri/xml_document.c +16 -11
  17. data/ext/nokogiri/xml_element_content.c +2 -2
  18. data/ext/nokogiri/xml_element_decl.c +1 -1
  19. data/ext/nokogiri/xml_encoding_handler.c +2 -2
  20. data/ext/nokogiri/xml_namespace.c +38 -8
  21. data/ext/nokogiri/xml_node.c +286 -26
  22. data/ext/nokogiri/xml_node_set.c +0 -2
  23. data/ext/nokogiri/xml_reader.c +40 -20
  24. data/ext/nokogiri/xml_relax_ng.c +0 -2
  25. data/ext/nokogiri/xml_sax_parser.c +22 -16
  26. data/ext/nokogiri/xml_sax_parser_context.c +0 -5
  27. data/ext/nokogiri/xml_sax_push_parser.c +0 -2
  28. data/ext/nokogiri/xml_schema.c +0 -2
  29. data/ext/nokogiri/xml_xpath_context.c +87 -83
  30. data/ext/nokogiri/xslt_stylesheet.c +14 -13
  31. data/gumbo-parser/Makefile +10 -0
  32. data/lib/nokogiri/2.7/nokogiri.bundle +0 -0
  33. data/lib/nokogiri/3.0/nokogiri.bundle +0 -0
  34. data/lib/nokogiri/3.1/nokogiri.bundle +0 -0
  35. data/lib/nokogiri/{2.6 → 3.2}/nokogiri.bundle +0 -0
  36. data/lib/nokogiri/css/node.rb +2 -2
  37. data/lib/nokogiri/css/xpath_visitor.rb +5 -3
  38. data/lib/nokogiri/css.rb +6 -0
  39. data/lib/nokogiri/encoding_handler.rb +57 -0
  40. data/lib/nokogiri/extension.rb +3 -2
  41. data/lib/nokogiri/html4/document.rb +2 -121
  42. data/lib/nokogiri/html4/element_description_defaults.rb +6 -12
  43. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  44. data/lib/nokogiri/html4.rb +1 -0
  45. data/lib/nokogiri/html5/document.rb +113 -36
  46. data/lib/nokogiri/html5/document_fragment.rb +9 -2
  47. data/lib/nokogiri/html5/node.rb +3 -5
  48. data/lib/nokogiri/html5.rb +127 -216
  49. data/lib/nokogiri/jruby/dependencies.rb +1 -19
  50. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  51. data/lib/nokogiri/version/constant.rb +1 -1
  52. data/lib/nokogiri/version/info.rb +11 -10
  53. data/lib/nokogiri/xml/attr.rb +49 -0
  54. data/lib/nokogiri/xml/builder.rb +1 -1
  55. data/lib/nokogiri/xml/document.rb +102 -54
  56. data/lib/nokogiri/xml/document_fragment.rb +49 -6
  57. data/lib/nokogiri/xml/namespace.rb +42 -0
  58. data/lib/nokogiri/xml/node/save_options.rb +6 -4
  59. data/lib/nokogiri/xml/node.rb +190 -35
  60. data/lib/nokogiri/xml/node_set.rb +87 -9
  61. data/lib/nokogiri/xml/parse_options.rb +129 -50
  62. data/lib/nokogiri/xml/pp/node.rb +6 -4
  63. data/lib/nokogiri/xml/processing_instruction.rb +2 -1
  64. data/lib/nokogiri/xml/sax/parser.rb +2 -3
  65. data/lib/nokogiri/xslt.rb +1 -1
  66. data/lib/nokogiri.rb +3 -11
  67. data/lib/xsd/xmlparser/nokogiri.rb +3 -1
  68. metadata +13 -248
@@ -28,6 +28,13 @@ module Nokogiri
28
28
  attr_accessor :document
29
29
  attr_accessor :errors
30
30
 
31
+ # Get the parser's quirks mode value. See HTML5::QuirksMode.
32
+ #
33
+ # This method returns `nil` if the parser was not invoked (e.g., `Nokogiri::HTML5::DocumentFragment.new(doc)`).
34
+ #
35
+ # Since v1.14.0
36
+ attr_reader :quirks_mode
37
+
31
38
  # Create a document fragment.
32
39
  def initialize(doc, tags = nil, ctx = nil, options = {})
33
40
  self.document = doc
@@ -41,10 +48,10 @@ module Nokogiri
41
48
  Nokogiri::Gumbo.fragment(self, tags, ctx, max_attributes, max_errors, max_depth)
42
49
  end
43
50
 
44
- def serialize(options = {}, &block)
51
+ def serialize(options = {}, &block) # :nodoc:
45
52
  # Bypass XML::Document.serialize which doesn't support options even
46
53
  # though XML::Node.serialize does!
47
- XML::Node.instance_method(:serialize).bind(self).call(options, &block)
54
+ XML::Node.instance_method(:serialize).bind_call(self, options, &block)
48
55
  end
49
56
 
50
57
  # Parse a document fragment from +tags+, returning a Nodeset.
@@ -28,7 +28,7 @@ module Nokogiri
28
28
  def inner_html(options = {})
29
29
  return super(options) unless document.is_a?(HTML5::Document)
30
30
 
31
- result = options[:preserve_newline] && HTML5.prepend_newline?(self) ? +"\n" : +""
31
+ result = options[:preserve_newline] && prepend_newline? ? +"\n" : +""
32
32
  result << children.map { |child| child.to_html(options) }.join
33
33
  result
34
34
  end
@@ -56,11 +56,9 @@ module Nokogiri
56
56
  native_write_to(io, encoding, indent_string, config_options)
57
57
  else
58
58
  # Serialize including the current node.
59
+ html = html_standard_serialize(options[:preserve_newline] || false)
59
60
  encoding ||= document.encoding || Encoding::UTF_8
60
- internal_ops = {
61
- preserve_newline: options[:preserve_newline] || false,
62
- }
63
- HTML5.serialize_node_internal(self, io, encoding, internal_ops)
61
+ io << html.encode(encoding, fallback: lambda { |c| "&#x#{c.ord.to_s(16)};" })
64
62
  end
65
63
  end
66
64
 
@@ -227,250 +227,161 @@ module Nokogiri
227
227
  #
228
228
  # Since v1.12.0
229
229
  module HTML5
230
- # HTML uses the XHTML namespace.
231
- HTML_NAMESPACE = "http://www.w3.org/1999/xhtml"
232
- MATHML_NAMESPACE = "http://www.w3.org/1998/Math/MathML"
233
- SVG_NAMESPACE = "http://www.w3.org/2000/svg"
234
- XLINK_NAMESPACE = "http://www.w3.org/1999/xlink"
235
- XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace"
236
- XMLNS_NAMESPACE = "http://www.w3.org/2000/xmlns/"
230
+ class << self
231
+ # Parse an HTML 5 document. Convenience method for {Nokogiri::HTML5::Document.parse}
232
+ def parse(string, url = nil, encoding = nil, **options, &block)
233
+ Document.parse(string, url, encoding, **options, &block)
234
+ end
237
235
 
238
- # Parse an HTML 5 document. Convenience method for {Nokogiri::HTML5::Document.parse}
239
- def self.parse(string, url = nil, encoding = nil, **options, &block)
240
- Document.parse(string, url, encoding, **options, &block)
241
- end
236
+ # Parse a fragment from +string+. Convenience method for
237
+ # {Nokogiri::HTML5::DocumentFragment.parse}.
238
+ def fragment(string, encoding = nil, **options)
239
+ DocumentFragment.parse(string, encoding, options)
240
+ end
242
241
 
243
- # Parse a fragment from +string+. Convenience method for
244
- # {Nokogiri::HTML5::DocumentFragment.parse}.
245
- def self.fragment(string, encoding = nil, **options)
246
- DocumentFragment.parse(string, encoding, options)
247
- end
242
+ # Fetch and parse a HTML document from the web, following redirects,
243
+ # handling https, and determining the character encoding using HTML5
244
+ # rules. +uri+ may be a +String+ or a +URI+. +options+ contains
245
+ # http headers and special options. Everything which is not a
246
+ # special option is considered a header. Special options include:
247
+ # * :follow_limit => number of redirects which are followed
248
+ # * :basic_auth => [username, password]
249
+ def get(uri, options = {})
250
+ # TODO: deprecate
251
+ warn("Nokogiri::HTML5.get is deprecated and will be removed in a future version of Nokogiri.",
252
+ uplevel: 1, category: :deprecated)
253
+ get_impl(uri, options)
254
+ end
248
255
 
249
- # Fetch and parse a HTML document from the web, following redirects,
250
- # handling https, and determining the character encoding using HTML5
251
- # rules. +uri+ may be a +String+ or a +URI+. +options+ contains
252
- # http headers and special options. Everything which is not a
253
- # special option is considered a header. Special options include:
254
- # * :follow_limit => number of redirects which are followed
255
- # * :basic_auth => [username, password]
256
- def self.get(uri, options = {})
257
- # TODO: deprecate
258
- warn("Nokogiri::HTML5.get is deprecated and will be removed in a future version of Nokogiri.",
259
- uplevel: 1, category: :deprecated)
260
- get_impl(uri, options)
261
- end
256
+ # :nodoc:
257
+ def read_and_encode(string, encoding)
258
+ # Read the string with the given encoding.
259
+ if string.respond_to?(:read)
260
+ string = if encoding.nil?
261
+ string.read
262
+ else
263
+ string.read(encoding: encoding)
264
+ end
265
+ else
266
+ # Otherwise the string has the given encoding.
267
+ string = string.to_s
268
+ if encoding
269
+ string = string.dup
270
+ string.force_encoding(encoding)
271
+ end
272
+ end
262
273
 
263
- private
274
+ # convert to UTF-8
275
+ if string.encoding != Encoding::UTF_8
276
+ string = reencode(string)
277
+ end
278
+ string
279
+ end
264
280
 
265
- def self.get_impl(uri, options = {})
266
- headers = options.clone
267
- headers = { follow_limit: headers } if Numeric === headers # deprecated
268
- limit = headers[:follow_limit] ? headers.delete(:follow_limit).to_i : 10
281
+ private
269
282
 
270
- require "net/http"
271
- uri = URI(uri) unless URI === uri
283
+ def get_impl(uri, options = {})
284
+ headers = options.clone
285
+ headers = { follow_limit: headers } if Numeric === headers # deprecated
286
+ limit = headers[:follow_limit] ? headers.delete(:follow_limit).to_i : 10
272
287
 
273
- http = Net::HTTP.new(uri.host, uri.port)
288
+ require "net/http"
289
+ uri = URI(uri) unless URI === uri
274
290
 
275
- # TLS / SSL support
276
- http.use_ssl = true if uri.scheme == "https"
291
+ http = Net::HTTP.new(uri.host, uri.port)
277
292
 
278
- # Pass through Net::HTTP override values, which currently include:
279
- # :ca_file, :ca_path, :cert, :cert_store, :ciphers,
280
- # :close_on_empty_response, :continue_timeout, :key, :open_timeout,
281
- # :read_timeout, :ssl_timeout, :ssl_version, :use_ssl,
282
- # :verify_callback, :verify_depth, :verify_mode
283
- options.each do |key, _value|
284
- http.send("#{key}=", headers.delete(key)) if http.respond_to?("#{key}=")
285
- end
293
+ # TLS / SSL support
294
+ http.use_ssl = true if uri.scheme == "https"
286
295
 
287
- request = Net::HTTP::Get.new(uri.request_uri)
296
+ # Pass through Net::HTTP override values, which currently include:
297
+ # :ca_file, :ca_path, :cert, :cert_store, :ciphers,
298
+ # :close_on_empty_response, :continue_timeout, :key, :open_timeout,
299
+ # :read_timeout, :ssl_timeout, :ssl_version, :use_ssl,
300
+ # :verify_callback, :verify_depth, :verify_mode
301
+ options.each do |key, _value|
302
+ http.send("#{key}=", headers.delete(key)) if http.respond_to?("#{key}=")
303
+ end
288
304
 
289
- # basic authentication
290
- auth = headers.delete(:basic_auth)
291
- auth ||= [uri.user, uri.password] if uri.user && uri.password
292
- request.basic_auth(auth.first, auth.last) if auth
305
+ request = Net::HTTP::Get.new(uri.request_uri)
293
306
 
294
- # remaining options are treated as headers
295
- headers.each { |key, value| request[key.to_s] = value.to_s }
307
+ # basic authentication
308
+ auth = headers.delete(:basic_auth)
309
+ auth ||= [uri.user, uri.password] if uri.user && uri.password
310
+ request.basic_auth(auth.first, auth.last) if auth
296
311
 
297
- response = http.request(request)
312
+ # remaining options are treated as headers
313
+ headers.each { |key, value| request[key.to_s] = value.to_s }
298
314
 
299
- case response
300
- when Net::HTTPSuccess
301
- doc = parse(reencode(response.body, response["content-type"]), options)
302
- doc.instance_variable_set("@response", response)
303
- doc.class.send(:attr_reader, :response)
304
- doc
305
- when Net::HTTPRedirection
306
- response.value if limit <= 1
307
- location = URI.join(uri, response["location"])
308
- get_impl(location, options.merge(follow_limit: limit - 1))
309
- else
310
- response.value
311
- end
312
- end
315
+ response = http.request(request)
313
316
 
314
- def self.read_and_encode(string, encoding)
315
- # Read the string with the given encoding.
316
- if string.respond_to?(:read)
317
- string = if encoding.nil?
318
- string.read
317
+ case response
318
+ when Net::HTTPSuccess
319
+ doc = parse(reencode(response.body, response["content-type"]), options)
320
+ doc.instance_variable_set(:@response, response)
321
+ doc.class.send(:attr_reader, :response)
322
+ doc
323
+ when Net::HTTPRedirection
324
+ response.value if limit <= 1
325
+ location = URI.join(uri, response["location"])
326
+ get_impl(location, options.merge(follow_limit: limit - 1))
319
327
  else
320
- string.read(encoding: encoding)
321
- end
322
- else
323
- # Otherwise the string has the given encoding.
324
- string = string.to_s
325
- if encoding
326
- string = string.dup
327
- string.force_encoding(encoding)
328
+ response.value
328
329
  end
329
330
  end
330
331
 
331
- # convert to UTF-8
332
- if string.encoding != Encoding::UTF_8
333
- string = reencode(string)
334
- end
335
- string
336
- end
337
-
338
- # Charset sniffing is a complex and controversial topic that understandably isn't done _by
339
- # default_ by the Ruby Net::HTTP library. This being said, it is a very real problem for
340
- # consumers of HTML as the default for HTML is iso-8859-1, most "good" producers use utf-8, and
341
- # the Gumbo parser *only* supports utf-8.
342
- #
343
- # Accordingly, Nokogiri::HTML4::Document.parse provides limited encoding detection. Following
344
- # this lead, Nokogiri::HTML5 attempts to do likewise, while attempting to more closely follow
345
- # the HTML5 standard.
346
- #
347
- # http://bugs.ruby-lang.org/issues/2567
348
- # http://www.w3.org/TR/html5/syntax.html#determining-the-character-encoding
349
- #
350
- def self.reencode(body, content_type = nil)
351
- if body.encoding == Encoding::ASCII_8BIT
352
- encoding = nil
353
-
354
- # look for a Byte Order Mark (BOM)
355
- initial_bytes = body[0..2].bytes
356
- if initial_bytes[0..2] == [0xEF, 0xBB, 0xBF]
357
- encoding = Encoding::UTF_8
358
- elsif initial_bytes[0..1] == [0xFE, 0xFF]
359
- encoding = Encoding::UTF_16BE
360
- elsif initial_bytes[0..1] == [0xFF, 0xFE]
361
- encoding = Encoding::UTF_16LE
362
- end
363
-
364
- # look for a charset in a content-encoding header
365
- if content_type
366
- encoding ||= content_type[/charset=["']?(.*?)($|["';\s])/i, 1]
367
- end
368
-
369
- # look for a charset in a meta tag in the first 1024 bytes
370
- unless encoding
371
- data = body[0..1023].gsub(/<!--.*?(-->|\Z)/m, "")
372
- data.scan(/<meta.*?>/m).each do |meta|
373
- encoding ||= meta[/charset=["']?([^>]*?)($|["'\s>])/im, 1]
332
+ # Charset sniffing is a complex and controversial topic that understandably isn't done _by
333
+ # default_ by the Ruby Net::HTTP library. This being said, it is a very real problem for
334
+ # consumers of HTML as the default for HTML is iso-8859-1, most "good" producers use utf-8, and
335
+ # the Gumbo parser *only* supports utf-8.
336
+ #
337
+ # Accordingly, Nokogiri::HTML4::Document.parse provides limited encoding detection. Following
338
+ # this lead, Nokogiri::HTML5 attempts to do likewise, while attempting to more closely follow
339
+ # the HTML5 standard.
340
+ #
341
+ # http://bugs.ruby-lang.org/issues/2567
342
+ # http://www.w3.org/TR/html5/syntax.html#determining-the-character-encoding
343
+ #
344
+ def reencode(body, content_type = nil)
345
+ if body.encoding == Encoding::ASCII_8BIT
346
+ encoding = nil
347
+
348
+ # look for a Byte Order Mark (BOM)
349
+ initial_bytes = body[0..2].bytes
350
+ if initial_bytes[0..2] == [0xEF, 0xBB, 0xBF]
351
+ encoding = Encoding::UTF_8
352
+ elsif initial_bytes[0..1] == [0xFE, 0xFF]
353
+ encoding = Encoding::UTF_16BE
354
+ elsif initial_bytes[0..1] == [0xFF, 0xFE]
355
+ encoding = Encoding::UTF_16LE
374
356
  end
375
- end
376
-
377
- # if all else fails, default to the official default encoding for HTML
378
- encoding ||= Encoding::ISO_8859_1
379
-
380
- # change the encoding to match the detected or inferred encoding
381
- body = body.dup
382
- begin
383
- body.force_encoding(encoding)
384
- rescue ArgumentError
385
- body.force_encoding(Encoding::ISO_8859_1)
386
- end
387
- end
388
357
 
389
- body.encode(Encoding::UTF_8)
390
- end
358
+ # look for a charset in a content-encoding header
359
+ if content_type
360
+ encoding ||= content_type[/charset=["']?(.*?)($|["';\s])/i, 1]
361
+ end
391
362
 
392
- def self.serialize_node_internal(current_node, io, encoding, options)
393
- case current_node.type
394
- when XML::Node::ELEMENT_NODE
395
- ns = current_node.namespace
396
- ns_uri = ns.nil? ? nil : ns.href
397
- # XXX(sfc): attach namespaces to all nodes, even html?
398
- tagname = if ns_uri.nil? || ns_uri == HTML_NAMESPACE || ns_uri == MATHML_NAMESPACE || ns_uri == SVG_NAMESPACE
399
- current_node.name
400
- else
401
- "#{ns.prefix}:#{current_node.name}"
402
- end
403
- io << "<" << tagname
404
- current_node.attribute_nodes.each do |attr|
405
- attr_ns = attr.namespace
406
- if attr_ns.nil?
407
- attr_name = attr.name
408
- else
409
- ns_uri = attr_ns.href
410
- attr_name = if ns_uri == XML_NAMESPACE
411
- "xml:" + attr.name.sub(/^[^:]*:/, "")
412
- elsif ns_uri == XMLNS_NAMESPACE && attr.name.sub(/^[^:]*:/, "") == "xmlns"
413
- "xmlns"
414
- elsif ns_uri == XMLNS_NAMESPACE
415
- "xmlns:" + attr.name.sub(/^[^:]*:/, "")
416
- elsif ns_uri == XLINK_NAMESPACE
417
- "xlink:" + attr.name.sub(/^[^:]*:/, "")
418
- else
419
- "#{attr_ns.prefix}:#{attr.name}"
363
+ # look for a charset in a meta tag in the first 1024 bytes
364
+ unless encoding
365
+ data = body[0..1023].gsub(/<!--.*?(-->|\Z)/m, "")
366
+ data.scan(/<meta.*?>/im).each do |meta|
367
+ encoding ||= meta[/charset=["']?([^>]*?)($|["'\s>])/im, 1]
420
368
  end
421
369
  end
422
- io << " " << attr_name << '="' << escape_text(attr.content, encoding, true) << '"'
423
- end
424
- io << ">"
425
- unless ["area", "base", "basefont", "bgsound", "br", "col", "embed", "frame", "hr", "img", "input", "keygen", "link", "meta", "param", "source", "track", "wbr"].include?(current_node.name)
426
- io << "\n" if options[:preserve_newline] && prepend_newline?(current_node)
427
- current_node.children.each do |child|
428
- # XXX(sfc): Templates handled specially?
429
- serialize_node_internal(child, io, encoding, options)
370
+
371
+ # if all else fails, default to the official default encoding for HTML
372
+ encoding ||= Encoding::ISO_8859_1
373
+
374
+ # change the encoding to match the detected or inferred encoding
375
+ body = body.dup
376
+ begin
377
+ body.force_encoding(encoding)
378
+ rescue ArgumentError
379
+ body.force_encoding(Encoding::ISO_8859_1)
430
380
  end
431
- io << "</" << tagname << ">"
432
- end
433
- when XML::Node::TEXT_NODE
434
- parent = current_node.parent
435
- io << if parent.element? && ["style", "script", "xmp", "iframe", "noembed", "noframes", "plaintext", "noscript"].include?(parent.name)
436
- current_node.content
437
- else
438
- escape_text(current_node.content, encoding, false)
439
- end
440
- when XML::Node::CDATA_SECTION_NODE
441
- io << "<![CDATA[" << current_node.content << "]]>"
442
- when XML::Node::COMMENT_NODE
443
- io << "<!--" << current_node.content << "-->"
444
- when XML::Node::PI_NODE
445
- io << "<?" << current_node.content << ">"
446
- when XML::Node::DOCUMENT_TYPE_NODE, XML::Node::DTD_NODE
447
- io << "<!DOCTYPE " << current_node.name << ">"
448
- when XML::Node::HTML_DOCUMENT_NODE, XML::Node::DOCUMENT_FRAG_NODE
449
- current_node.children.each do |child|
450
- serialize_node_internal(child, io, encoding, options)
451
381
  end
452
- else
453
- raise "Unexpected node '#{current_node.name}' of type #{current_node.type}"
454
- end
455
- end
456
382
 
457
- def self.escape_text(text, encoding, attribute_mode)
458
- text = if attribute_mode
459
- text.gsub(/[&\u00a0"]/,
460
- "&" => "&amp;", "\u00a0" => "&nbsp;", '"' => "&quot;")
461
- else
462
- text.gsub(/[&\u00a0<>]/,
463
- "&" => "&amp;", "\u00a0" => "&nbsp;", "<" => "&lt;", ">" => "&gt;")
383
+ body.encode(Encoding::UTF_8)
464
384
  end
465
- # Not part of the standard
466
- text.encode(encoding, fallback: lambda { |c| "&\#x#{c.ord.to_s(16)};" })
467
- end
468
-
469
- def self.prepend_newline?(node)
470
- return false unless ["pre", "textarea", "listing"].include?(node.name) && !node.children.empty?
471
-
472
- first_child = node.children[0]
473
- first_child.text? && first_child.content.start_with?("\n")
474
385
  end
475
386
  end
476
387
  end
@@ -1,21 +1,3 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # The line below caused a problem on non-GAE rack environment.
4
- # unless defined?(JRuby::Rack::VERSION) || defined?(AppEngine::ApiProxy)
5
- #
6
- # However, simply cutting defined?(JRuby::Rack::VERSION) off resulted in
7
- # an unable-to-load-nokogiri problem. Thus, now, Nokogiri checks the presense
8
- # of appengine-rack.jar in $LOAD_PATH. If Nokogiri is on GAE, Nokogiri
9
- # should skip loading xml jars. This is because those are in WEB-INF/lib and
10
- # already set in the classpath.
11
- unless $LOAD_PATH.to_s.include?("appengine-rack")
12
- require "stringio"
13
- require "isorelax.jar"
14
- require "jing.jar"
15
- require "nekohtml.jar"
16
- require "nekodtd.jar"
17
- require "xercesImpl.jar"
18
- require "serializer.jar"
19
- require "xalan.jar"
20
- require "xml-apis.jar"
21
- end
3
+ require_relative "nokogiri_jars"
@@ -0,0 +1,43 @@
1
+ # this is a generated file, to avoid over-writing it just delete this comment
2
+ begin
3
+ require 'jar_dependencies'
4
+ rescue LoadError
5
+ require 'xalan/xalan/2.7.2/xalan-2.7.2.jar'
6
+ require 'net/sourceforge/htmlunit/neko-htmlunit/2.63.0/neko-htmlunit-2.63.0.jar'
7
+ require 'nu/validator/jing/20200702VNU/jing-20200702VNU.jar'
8
+ require 'xerces/xercesImpl/2.12.2/xercesImpl-2.12.2.jar'
9
+ require 'org/nokogiri/nekodtd/0.1.11.noko1/nekodtd-0.1.11.noko1.jar'
10
+ require 'net/sf/saxon/Saxon-HE/9.6.0-4/Saxon-HE-9.6.0-4.jar'
11
+ require 'xml-apis/xml-apis/1.4.01/xml-apis-1.4.01.jar'
12
+ require 'xalan/serializer/2.7.2/serializer-2.7.2.jar'
13
+ require 'isorelax/isorelax/20030108/isorelax-20030108.jar'
14
+ end
15
+
16
+ if defined? Jars
17
+ require_jar 'xalan', 'xalan', '2.7.2'
18
+ require_jar 'net.sourceforge.htmlunit', 'neko-htmlunit', '2.63.0'
19
+ require_jar 'nu.validator', 'jing', '20200702VNU'
20
+ require_jar 'xerces', 'xercesImpl', '2.12.2'
21
+ require_jar 'org.nokogiri', 'nekodtd', '0.1.11.noko1'
22
+ require_jar 'net.sf.saxon', 'Saxon-HE', '9.6.0-4'
23
+ require_jar 'xml-apis', 'xml-apis', '1.4.01'
24
+ require_jar 'xalan', 'serializer', '2.7.2'
25
+ require_jar 'isorelax', 'isorelax', '20030108'
26
+ end
27
+
28
+ module Nokogiri
29
+ # generated by the :vendor_jars rake task
30
+ JAR_DEPENDENCIES = {
31
+ "isorelax:isorelax" => "20030108",
32
+ "net.sf.saxon:Saxon-HE" => "9.6.0-4",
33
+ "net.sourceforge.htmlunit:neko-htmlunit" => "2.63.0",
34
+ "nu.validator:jing" => "20200702VNU",
35
+ "org.nokogiri:nekodtd" => "0.1.11.noko1",
36
+ "xalan:serializer" => "2.7.2",
37
+ "xalan:xalan" => "2.7.2",
38
+ "xerces:xercesImpl" => "2.12.2",
39
+ "xml-apis:xml-apis" => "1.4.01",
40
+ }.freeze
41
+ XERCES_VERSION = JAR_DEPENDENCIES["xerces:xercesImpl"]
42
+ NEKO_VERSION = JAR_DEPENDENCIES["net.sourceforge.htmlunit:neko-htmlunit"]
43
+ end
@@ -2,5 +2,5 @@
2
2
 
3
3
  module Nokogiri
4
4
  # The version of Nokogiri you are using
5
- VERSION = "1.13.10"
5
+ VERSION = "1.14.0"
6
6
  end
@@ -105,16 +105,16 @@ module Nokogiri
105
105
  if libxml2_using_packaged?
106
106
  cppflags << "-I#{File.join(header_directory, "include").shellescape}"
107
107
  cppflags << "-I#{File.join(header_directory, "include/libxml2").shellescape}"
108
+ end
108
109
 
109
- if windows?
110
- # on windows, nokogumbo needs to link against nokogiri.so to resolve symbols. see #2167
111
- lib_directory = File.expand_path(File.join(File.dirname(__FILE__), "../#{ruby_minor}"))
112
- unless File.exist?(lib_directory)
113
- lib_directory = File.expand_path(File.join(File.dirname(__FILE__), ".."))
114
- end
115
- ldflags << "-L#{lib_directory.shellescape}"
116
- ldflags << "-l:nokogiri.so"
110
+ if windows?
111
+ # on windows, nokogumbo needs to link against nokogiri.so to resolve symbols. see #2167
112
+ lib_directory = File.expand_path(File.join(File.dirname(__FILE__), "../#{ruby_minor}"))
113
+ unless File.exist?(lib_directory)
114
+ lib_directory = File.expand_path(File.join(File.dirname(__FILE__), ".."))
117
115
  end
116
+ ldflags << "-L#{lib_directory.shellescape}"
117
+ ldflags << "-l:nokogiri.so"
118
118
  end
119
119
 
120
120
  nokogiri["cppflags"] = cppflags
@@ -169,8 +169,9 @@ module Nokogiri
169
169
  vi["other_libraries"] = Hash[*Nokogiri::OTHER_LIBRARY_VERSIONS.split(/[,:]/)]
170
170
  elsif jruby?
171
171
  vi["other_libraries"] = {}.tap do |ol|
172
- ol["xerces"] = Nokogiri::XERCES_VERSION
173
- ol["nekohtml"] = Nokogiri::NEKO_VERSION
172
+ Nokogiri::JAR_DEPENDENCIES.each do |k, v|
173
+ ol[k] = v
174
+ end
174
175
  end
175
176
  end
176
177
  end
@@ -1,3 +1,4 @@
1
+ # coding: utf-8
1
2
  # frozen_string_literal: true
2
3
 
3
4
  module Nokogiri
@@ -7,6 +8,54 @@ module Nokogiri
7
8
  alias_method :to_s, :content
8
9
  alias_method :content=, :value=
9
10
 
11
+ #
12
+ # :call-seq: deconstruct_keys(array_of_names) → Hash
13
+ #
14
+ # Returns a hash describing the Attr, to use in pattern matching.
15
+ #
16
+ # Valid keys and their values:
17
+ # - +name+ → (String) The name of the attribute.
18
+ # - +value+ → (String) The value of the attribute.
19
+ # - +namespace+ → (Namespace, nil) The Namespace of the attribute, or +nil+ if there is no namespace.
20
+ #
21
+ # ⚡ This is an experimental feature, available since v1.14.0
22
+ #
23
+ # *Example*
24
+ #
25
+ # doc = Nokogiri::XML.parse(<<~XML)
26
+ # <?xml version="1.0"?>
27
+ # <root xmlns="http://nokogiri.org/ns/default" xmlns:noko="http://nokogiri.org/ns/noko">
28
+ # <child1 foo="abc" noko:bar="def"/>
29
+ # </root>
30
+ # XML
31
+ #
32
+ # attributes = doc.root.elements.first.attribute_nodes
33
+ # # => [#(Attr:0x35c { name = "foo", value = "abc" }),
34
+ # # #(Attr:0x370 {
35
+ # # name = "bar",
36
+ # # namespace = #(Namespace:0x384 {
37
+ # # prefix = "noko",
38
+ # # href = "http://nokogiri.org/ns/noko"
39
+ # # }),
40
+ # # value = "def"
41
+ # # })]
42
+ #
43
+ # attributes.first.deconstruct_keys([:name, :value, :namespace])
44
+ # # => {:name=>"foo", :value=>"abc", :namespace=>nil}
45
+ #
46
+ # attributes.last.deconstruct_keys([:name, :value, :namespace])
47
+ # # => {:name=>"bar",
48
+ # # :value=>"def",
49
+ # # :namespace=>
50
+ # # #(Namespace:0x384 {
51
+ # # prefix = "noko",
52
+ # # href = "http://nokogiri.org/ns/noko"
53
+ # # })}
54
+ #
55
+ def deconstruct_keys(keys)
56
+ { name: name, value: value, namespace: namespace }
57
+ end
58
+
10
59
  private
11
60
 
12
61
  def inspect_attributes
@@ -234,7 +234,7 @@ module Nokogiri
234
234
  #
235
235
  # == Document Types
236
236
  #
237
- # To create a document type (DTD), access use the Builder#doc method to get
237
+ # To create a document type (DTD), use the Builder#doc method to get
238
238
  # the current context document. Then call Node#create_internal_subset to
239
239
  # create the DTD node.
240
240
  #