nokogiri 1.12.3 → 1.13.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (92) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +5 -0
  3. data/README.md +9 -7
  4. data/bin/nokogiri +63 -50
  5. data/dependencies.yml +5 -6
  6. data/ext/nokogiri/extconf.rb +47 -35
  7. data/ext/nokogiri/xml_document.c +35 -35
  8. data/ext/nokogiri/xml_document_fragment.c +0 -2
  9. data/ext/nokogiri/xml_dtd.c +2 -2
  10. data/ext/nokogiri/xml_encoding_handler.c +25 -11
  11. data/ext/nokogiri/xml_node.c +645 -333
  12. data/ext/nokogiri/xml_reader.c +37 -11
  13. data/ext/nokogiri/xml_xpath_context.c +72 -49
  14. data/ext/nokogiri/xslt_stylesheet.c +107 -9
  15. data/gumbo-parser/src/parser.c +0 -11
  16. data/lib/nokogiri/class_resolver.rb +67 -0
  17. data/lib/nokogiri/css/node.rb +9 -8
  18. data/lib/nokogiri/css/parser.rb +360 -341
  19. data/lib/nokogiri/css/parser.y +249 -244
  20. data/lib/nokogiri/css/parser_extras.rb +20 -20
  21. data/lib/nokogiri/css/syntax_error.rb +1 -0
  22. data/lib/nokogiri/css/tokenizer.rb +4 -3
  23. data/lib/nokogiri/css/tokenizer.rex +3 -2
  24. data/lib/nokogiri/css/xpath_visitor.rb +179 -82
  25. data/lib/nokogiri/css.rb +38 -6
  26. data/lib/nokogiri/decorators/slop.rb +8 -7
  27. data/lib/nokogiri/extension.rb +1 -1
  28. data/lib/nokogiri/gumbo.rb +1 -0
  29. data/lib/nokogiri/html.rb +16 -10
  30. data/lib/nokogiri/html4/builder.rb +1 -0
  31. data/lib/nokogiri/html4/document.rb +84 -75
  32. data/lib/nokogiri/html4/document_fragment.rb +11 -7
  33. data/lib/nokogiri/html4/element_description.rb +1 -0
  34. data/lib/nokogiri/html4/element_description_defaults.rb +426 -520
  35. data/lib/nokogiri/html4/entity_lookup.rb +2 -1
  36. data/lib/nokogiri/html4/sax/parser.rb +2 -1
  37. data/lib/nokogiri/html4/sax/parser_context.rb +1 -0
  38. data/lib/nokogiri/html4/sax/push_parser.rb +7 -7
  39. data/lib/nokogiri/html4.rb +11 -5
  40. data/lib/nokogiri/html5/document.rb +24 -10
  41. data/lib/nokogiri/html5/document_fragment.rb +5 -2
  42. data/lib/nokogiri/html5/node.rb +6 -3
  43. data/lib/nokogiri/html5.rb +68 -64
  44. data/lib/nokogiri/jruby/dependencies.rb +10 -9
  45. data/lib/nokogiri/syntax_error.rb +1 -0
  46. data/lib/nokogiri/version/constant.rb +2 -1
  47. data/lib/nokogiri/version/info.rb +19 -13
  48. data/lib/nokogiri/version.rb +1 -0
  49. data/lib/nokogiri/xml/attr.rb +5 -3
  50. data/lib/nokogiri/xml/attribute_decl.rb +2 -1
  51. data/lib/nokogiri/xml/builder.rb +69 -31
  52. data/lib/nokogiri/xml/cdata.rb +2 -1
  53. data/lib/nokogiri/xml/character_data.rb +1 -0
  54. data/lib/nokogiri/xml/document.rb +178 -96
  55. data/lib/nokogiri/xml/document_fragment.rb +41 -38
  56. data/lib/nokogiri/xml/dtd.rb +3 -2
  57. data/lib/nokogiri/xml/element_content.rb +1 -0
  58. data/lib/nokogiri/xml/element_decl.rb +2 -1
  59. data/lib/nokogiri/xml/entity_decl.rb +3 -2
  60. data/lib/nokogiri/xml/entity_reference.rb +1 -0
  61. data/lib/nokogiri/xml/namespace.rb +2 -0
  62. data/lib/nokogiri/xml/node/save_options.rb +7 -4
  63. data/lib/nokogiri/xml/node.rb +512 -348
  64. data/lib/nokogiri/xml/node_set.rb +46 -54
  65. data/lib/nokogiri/xml/notation.rb +12 -0
  66. data/lib/nokogiri/xml/parse_options.rb +11 -7
  67. data/lib/nokogiri/xml/pp/character_data.rb +8 -6
  68. data/lib/nokogiri/xml/pp/node.rb +24 -26
  69. data/lib/nokogiri/xml/pp.rb +1 -0
  70. data/lib/nokogiri/xml/processing_instruction.rb +2 -1
  71. data/lib/nokogiri/xml/reader.rb +17 -19
  72. data/lib/nokogiri/xml/relax_ng.rb +1 -0
  73. data/lib/nokogiri/xml/sax/document.rb +20 -19
  74. data/lib/nokogiri/xml/sax/parser.rb +36 -34
  75. data/lib/nokogiri/xml/sax/parser_context.rb +7 -3
  76. data/lib/nokogiri/xml/sax/push_parser.rb +5 -5
  77. data/lib/nokogiri/xml/sax.rb +1 -0
  78. data/lib/nokogiri/xml/schema.rb +7 -6
  79. data/lib/nokogiri/xml/searchable.rb +93 -62
  80. data/lib/nokogiri/xml/syntax_error.rb +4 -4
  81. data/lib/nokogiri/xml/text.rb +1 -0
  82. data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
  83. data/lib/nokogiri/xml/xpath.rb +12 -0
  84. data/lib/nokogiri/xml/xpath_context.rb +2 -3
  85. data/lib/nokogiri/xml.rb +3 -3
  86. data/lib/nokogiri/xslt/stylesheet.rb +1 -0
  87. data/lib/nokogiri/xslt.rb +21 -13
  88. data/lib/nokogiri.rb +19 -16
  89. data/lib/xsd/xmlparser/nokogiri.rb +25 -24
  90. data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +61 -0
  91. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  92. metadata +101 -27
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Nokogiri
3
4
  module HTML4
4
5
  class EntityDescription < Struct.new(:value, :name, :description); end
@@ -6,7 +7,7 @@ module Nokogiri
6
7
  class EntityLookup
7
8
  ###
8
9
  # Look up entity with +name+
9
- def [] name
10
+ def [](name)
10
11
  (val = get(name)) && val.value
11
12
  end
12
13
  end
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Nokogiri
3
4
  module HTML4
4
5
  ###
@@ -28,7 +29,7 @@ module Nokogiri
28
29
  # Parse html stored in +data+ using +encoding+
29
30
  def parse_memory(data, encoding = "UTF-8")
30
31
  raise ArgumentError unless data
31
- return unless data.length > 0
32
+ return if data.empty?
32
33
  ctx = ParserContext.memory(data, encoding)
33
34
  yield ctx if block_given?
34
35
  ctx.parse_with(self)
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Nokogiri
3
4
  module HTML4
4
5
  module SAX
@@ -1,14 +1,14 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Nokogiri
3
4
  module HTML4
4
5
  module SAX
5
6
  class PushParser
6
-
7
7
  # The Nokogiri::HTML4::SAX::Document on which the PushParser will be
8
8
  # operating
9
9
  attr_accessor :document
10
-
11
- def initialize(doc = HTML4::SAX::Document.new, file_name = nil, encoding = 'UTF-8')
10
+
11
+ def initialize(doc = HTML4::SAX::Document.new, file_name = nil, encoding = "UTF-8")
12
12
  @document = doc
13
13
  @encoding = encoding
14
14
  @sax_parser = HTML4::SAX::Parser.new(doc, @encoding)
@@ -16,20 +16,20 @@ module Nokogiri
16
16
  ## Create our push parser context
17
17
  initialize_native(@sax_parser, file_name, encoding)
18
18
  end
19
-
19
+
20
20
  ###
21
21
  # Write a +chunk+ of HTML to the PushParser. Any callback methods
22
22
  # that can be called will be called immediately.
23
- def write chunk, last_chunk = false
23
+ def write(chunk, last_chunk = false)
24
24
  native_write(chunk, last_chunk)
25
25
  end
26
- alias :<< :write
26
+ alias_method :<<, :write
27
27
 
28
28
  ###
29
29
  # Finish the parsing. This method is only necessary for
30
30
  # Nokogiri::HTML4::SAX::Document#end_document to be called.
31
31
  def finish
32
- write '', true
32
+ write("", true)
33
33
  end
34
34
  end
35
35
  end
@@ -1,15 +1,21 @@
1
+ # coding: utf-8
1
2
  # frozen_string_literal: true
3
+
2
4
  module Nokogiri
3
5
  class << self
4
- ###
6
+ # :call-seq:
7
+ # HTML4(input, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block) → Nokogiri::HTML4::Document
8
+ #
5
9
  # Parse HTML. Convenience method for Nokogiri::HTML4::Document.parse
6
10
  def HTML4(input, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block)
7
11
  Nokogiri::HTML4::Document.parse(input, url, encoding, options, &block)
8
12
  end
9
13
  end
10
14
 
11
- # @since v1.12.0
12
- # @note Before v1.12.0, {Nokogiri::HTML4} did not exist, and {Nokogiri::HTML} was the module/namespace for parsing HTML.
15
+ # Since v1.12.0
16
+ #
17
+ # 💡 Before v1.12.0, Nokogiri::HTML4 did not exist, and Nokogiri::HTML was the module/namespace
18
+ # for parsing HTML.
13
19
  module HTML4
14
20
  class << self
15
21
  ###
@@ -20,8 +26,8 @@ module Nokogiri
20
26
 
21
27
  ####
22
28
  # Parse a fragment from +string+ in to a NodeSet.
23
- def fragment(string, encoding = nil)
24
- HTML4::DocumentFragment.parse(string, encoding)
29
+ def fragment(string, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block)
30
+ HTML4::DocumentFragment.parse(string, encoding, options, &block)
25
31
  end
26
32
  end
27
33
 
@@ -1,4 +1,6 @@
1
+ # coding: utf-8
1
2
  # frozen_string_literal: true
3
+
2
4
  #
3
5
  # Copyright 2013-2021 Sam Ruby, Stephen Checkoway
4
6
  #
@@ -19,14 +21,15 @@ require_relative "../html4/document"
19
21
 
20
22
  module Nokogiri
21
23
  module HTML5
22
- # @since v1.12.0
23
- # @note HTML5 functionality is not available when running JRuby.
24
+ # Since v1.12.0
25
+ #
26
+ # 💡 HTML5 functionality is not available when running JRuby.
24
27
  class Document < Nokogiri::HTML4::Document
25
28
  def self.parse(string_or_io, url = nil, encoding = nil, **options, &block)
26
- yield options if block_given?
27
- string_or_io = '' unless string_or_io
29
+ yield options if block
30
+ string_or_io = "" unless string_or_io
28
31
 
29
- if string_or_io.respond_to?(:encoding) && string_or_io.encoding.name != 'ASCII-8BIT'
32
+ if string_or_io.respond_to?(:encoding) && string_or_io.encoding.name != "ASCII-8BIT"
30
33
  encoding ||= string_or_io.encoding.name
31
34
  end
32
35
 
@@ -34,23 +37,23 @@ module Nokogiri
34
37
  url ||= string_or_io.path
35
38
  end
36
39
  unless string_or_io.respond_to?(:read) || string_or_io.respond_to?(:to_str)
37
- raise ArgumentError.new("not a string or IO object")
40
+ raise ArgumentError, "not a string or IO object"
38
41
  end
39
42
  do_parse(string_or_io, url, encoding, options)
40
43
  end
41
44
 
42
45
  def self.read_io(io, url = nil, encoding = nil, **options)
43
- raise ArgumentError.new("io object doesn't respond to :read") unless io.respond_to?(:read)
46
+ raise ArgumentError, "io object doesn't respond to :read" unless io.respond_to?(:read)
44
47
  do_parse(io, url, encoding, options)
45
48
  end
46
49
 
47
50
  def self.read_memory(string, url = nil, encoding = nil, **options)
48
- raise ArgumentError.new("string object doesn't respond to :to_str") unless string.respond_to?(:to_str)
51
+ raise ArgumentError, "string object doesn't respond to :to_str" unless string.respond_to?(:to_str)
49
52
  do_parse(string, url, encoding, options)
50
53
  end
51
54
 
52
55
  def fragment(tags = nil)
53
- DocumentFragment.new(self, tags, self.root)
56
+ DocumentFragment.new(self, tags, root)
54
57
  end
55
58
 
56
59
  def to_xml(options = {}, &block)
@@ -59,14 +62,25 @@ module Nokogiri
59
62
  XML::Node.instance_method(:to_xml).bind(self).call(options, &block)
60
63
  end
61
64
 
65
+ # :call-seq:
66
+ # xpath_doctype() → Nokogiri::CSS::XPathVisitor::DoctypeConfig
67
+ #
68
+ # [Returns] The document type which determines CSS-to-XPath translation.
69
+ #
70
+ # See XPathVisitor for more information.
71
+ def xpath_doctype
72
+ Nokogiri::CSS::XPathVisitor::DoctypeConfig::HTML5
73
+ end
74
+
62
75
  private
76
+
63
77
  def self.do_parse(string_or_io, url, encoding, options)
64
78
  string = HTML5.read_and_encode(string_or_io, encoding)
65
79
  max_attributes = options[:max_attributes] || Nokogiri::Gumbo::DEFAULT_MAX_ATTRIBUTES
66
80
  max_errors = options[:max_errors] || options[:max_parse_errors] || Nokogiri::Gumbo::DEFAULT_MAX_ERRORS
67
81
  max_depth = options[:max_tree_depth] || Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH
68
82
  doc = Nokogiri::Gumbo.parse(string, url, max_attributes, max_errors, max_depth)
69
- doc.encoding = 'UTF-8'
83
+ doc.encoding = "UTF-8"
70
84
  doc
71
85
  end
72
86
  end
@@ -1,4 +1,6 @@
1
+ # coding: utf-8
1
2
  # frozen_string_literal: true
3
+
2
4
  #
3
5
  # Copyright 2013-2021 Sam Ruby, Stephen Checkoway
4
6
  #
@@ -19,8 +21,9 @@ require_relative "../html4/document_fragment"
19
21
 
20
22
  module Nokogiri
21
23
  module HTML5
22
- # @since v1.12.0
23
- # @note HTML5 functionality is not available when running JRuby.
24
+ # Since v1.12.0
25
+ #
26
+ # 💡 HTML5 functionality is not available when running JRuby.
24
27
  class DocumentFragment < Nokogiri::HTML4::DocumentFragment
25
28
  attr_accessor :document
26
29
  attr_accessor :errors
@@ -1,4 +1,6 @@
1
+ # coding: utf-8
1
2
  # frozen_string_literal: true
3
+
2
4
  #
3
5
  # Copyright 2013-2021 Sam Ruby, Stephen Checkoway
4
6
  #
@@ -19,12 +21,13 @@ require_relative "../xml/node"
19
21
 
20
22
  module Nokogiri
21
23
  module HTML5
22
- # @since v1.12.0
23
- # @note HTML5 functionality is not available when running JRuby.
24
+ # Since v1.12.0
25
+ #
26
+ # 💡 HTML5 functionality is not available when running JRuby.
24
27
  module Node
25
28
  def inner_html(options = {})
26
29
  return super(options) unless document.is_a?(HTML5::Document)
27
- result = options[:preserve_newline] && HTML5.prepend_newline?(self) ? String.new("\n") : String.new
30
+ result = options[:preserve_newline] && HTML5.prepend_newline?(self) ? +"\n" : +""
28
31
  result << children.map { |child| child.to_html(options) }.join
29
32
  result
30
33
  end
@@ -1,5 +1,6 @@
1
1
  # coding: utf-8
2
2
  # frozen_string_literal: true
3
+
3
4
  #
4
5
  # Copyright 2013-2021 Sam Ruby, Stephen Checkoway
5
6
  #
@@ -16,13 +17,15 @@
16
17
  # limitations under the License.
17
18
  #
18
19
 
19
- require_relative 'html5/document'
20
- require_relative 'html5/document_fragment'
21
- require_relative 'html5/node'
20
+ require_relative "html5/document"
21
+ require_relative "html5/document_fragment"
22
+ require_relative "html5/node"
22
23
 
23
24
  module Nokogiri
24
- # @since v1.12.0
25
- # @note HTML5 functionality is not available when running JRuby.
25
+ # Since v1.12.0
26
+ #
27
+ # ⚠ HTML5 functionality is not available when running JRuby.
28
+ #
26
29
  # Parse an HTML5 document. Convenience method for {Nokogiri::HTML5::Document.parse}
27
30
  def self.HTML5(input, url = nil, encoding = nil, **options, &block)
28
31
  Nokogiri::HTML5::Document.parse(input, url, encoding, **options, &block)
@@ -30,6 +33,8 @@ module Nokogiri
30
33
 
31
34
  # == Usage
32
35
  #
36
+ # ⚠ HTML5 functionality is not available when running JRuby.
37
+ #
33
38
  # Parse an HTML5 document:
34
39
  #
35
40
  # doc = Nokogiri.HTML5(string)
@@ -220,16 +225,15 @@ module Nokogiri
220
225
  # * Instead of returning +unknown+ as the element name for unknown tags, the
221
226
  # original tag name is returned verbatim.
222
227
  #
223
- # @since v1.12.0
224
- # @note HTML5 functionality is not available when running JRuby.
228
+ # Since v1.12.0
225
229
  module HTML5
226
230
  # HTML uses the XHTML namespace.
227
- HTML_NAMESPACE = 'http://www.w3.org/1999/xhtml'.freeze
228
- MATHML_NAMESPACE = 'http://www.w3.org/1998/Math/MathML'.freeze
229
- SVG_NAMESPACE = 'http://www.w3.org/2000/svg'.freeze
230
- XLINK_NAMESPACE = 'http://www.w3.org/1999/xlink'.freeze
231
- XML_NAMESPACE = 'http://www.w3.org/XML/1998/namespace'.freeze
232
- XMLNS_NAMESPACE = 'http://www.w3.org/2000/xmlns/'.freeze
231
+ HTML_NAMESPACE = "http://www.w3.org/1999/xhtml"
232
+ MATHML_NAMESPACE = "http://www.w3.org/1998/Math/MathML"
233
+ SVG_NAMESPACE = "http://www.w3.org/2000/svg"
234
+ XLINK_NAMESPACE = "http://www.w3.org/1999/xlink"
235
+ XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace"
236
+ XMLNS_NAMESPACE = "http://www.w3.org/2000/xmlns/"
233
237
 
234
238
  # Parse an HTML 5 document. Convenience method for {Nokogiri::HTML5::Document.parse}
235
239
  def self.parse(string, url = nil, encoding = nil, **options, &block)
@@ -249,34 +253,35 @@ module Nokogiri
249
253
  # special option is considered a header. Special options include:
250
254
  # * :follow_limit => number of redirects which are followed
251
255
  # * :basic_auth => [username, password]
252
- def self.get(uri, options={})
256
+ def self.get(uri, options = {})
257
+ # TODO: deprecate
253
258
  warn("Nokogiri::HTML5.get is deprecated and will be removed in a future version of Nokogiri.",
254
- uplevel: 1, category: :deprecated)
259
+ uplevel: 1, category: :deprecated)
255
260
  get_impl(uri, options)
256
261
  end
257
262
 
258
263
  private
259
264
 
260
- def self.get_impl(uri, options={})
265
+ def self.get_impl(uri, options = {})
261
266
  headers = options.clone
262
- headers = {:follow_limit => headers} if Numeric === headers # deprecated
263
- limit=headers[:follow_limit] ? headers.delete(:follow_limit).to_i : 10
267
+ headers = { follow_limit: headers } if Numeric === headers # deprecated
268
+ limit = headers[:follow_limit] ? headers.delete(:follow_limit).to_i : 10
264
269
 
265
- require 'net/http'
270
+ require "net/http"
266
271
  uri = URI(uri) unless URI === uri
267
272
 
268
273
  http = Net::HTTP.new(uri.host, uri.port)
269
274
 
270
275
  # TLS / SSL support
271
- http.use_ssl = true if uri.scheme == 'https'
276
+ http.use_ssl = true if uri.scheme == "https"
272
277
 
273
278
  # Pass through Net::HTTP override values, which currently include:
274
279
  # :ca_file, :ca_path, :cert, :cert_store, :ciphers,
275
280
  # :close_on_empty_response, :continue_timeout, :key, :open_timeout,
276
281
  # :read_timeout, :ssl_timeout, :ssl_version, :use_ssl,
277
282
  # :verify_callback, :verify_depth, :verify_mode
278
- options.each do |key, value|
279
- http.send "#{key}=", headers.delete(key) if http.respond_to? "#{key}="
283
+ options.each do |key, _value|
284
+ http.send("#{key}=", headers.delete(key)) if http.respond_to?("#{key}=")
280
285
  end
281
286
 
282
287
  request = Net::HTTP::Get.new(uri.request_uri)
@@ -284,23 +289,23 @@ module Nokogiri
284
289
  # basic authentication
285
290
  auth = headers.delete(:basic_auth)
286
291
  auth ||= [uri.user, uri.password] if uri.user && uri.password
287
- request.basic_auth auth.first, auth.last if auth
292
+ request.basic_auth(auth.first, auth.last) if auth
288
293
 
289
294
  # remaining options are treated as headers
290
- headers.each {|key, value| request[key.to_s] = value.to_s}
295
+ headers.each { |key, value| request[key.to_s] = value.to_s }
291
296
 
292
297
  response = http.request(request)
293
298
 
294
299
  case response
295
300
  when Net::HTTPSuccess
296
- doc = parse(reencode(response.body, response['content-type']), options)
297
- doc.instance_variable_set('@response', response)
301
+ doc = parse(reencode(response.body, response["content-type"]), options)
302
+ doc.instance_variable_set("@response", response)
298
303
  doc.class.send(:attr_reader, :response)
299
304
  doc
300
305
  when Net::HTTPRedirection
301
306
  response.value if limit <= 1
302
- location = URI.join(uri, response['location'])
303
- get_impl(location, options.merge(:follow_limit => limit-1))
307
+ location = URI.join(uri, response["location"])
308
+ get_impl(location, options.merge(follow_limit: limit - 1))
304
309
  else
305
310
  response.value
306
311
  end
@@ -309,10 +314,10 @@ module Nokogiri
309
314
  def self.read_and_encode(string, encoding)
310
315
  # Read the string with the given encoding.
311
316
  if string.respond_to?(:read)
312
- if encoding.nil?
313
- string = string.read
317
+ string = if encoding.nil?
318
+ string.read
314
319
  else
315
- string = string.read(encoding: encoding)
320
+ string.read(encoding: encoding)
316
321
  end
317
322
  else
318
323
  # Otherwise the string has the given encoding.
@@ -342,7 +347,7 @@ module Nokogiri
342
347
  # http://bugs.ruby-lang.org/issues/2567
343
348
  # http://www.w3.org/TR/html5/syntax.html#determining-the-character-encoding
344
349
  #
345
- def self.reencode(body, content_type=nil)
350
+ def self.reencode(body, content_type = nil)
346
351
  if body.encoding == Encoding::ASCII_8BIT
347
352
  encoding = nil
348
353
 
@@ -362,8 +367,8 @@ module Nokogiri
362
367
  end
363
368
 
364
369
  # look for a charset in a meta tag in the first 1024 bytes
365
- if not encoding
366
- data = body[0..1023].gsub(/<!--.*?(-->|\Z)/m, '')
370
+ unless encoding
371
+ data = body[0..1023].gsub(/<!--.*?(-->|\Z)/m, "")
367
372
  data.scan(/<meta.*?>/m).each do |meta|
368
373
  encoding ||= meta[/charset=["']?([^>]*?)($|["'\s>])/im, 1]
369
374
  end
@@ -390,57 +395,56 @@ module Nokogiri
390
395
  ns = current_node.namespace
391
396
  ns_uri = ns.nil? ? nil : ns.href
392
397
  # XXX(sfc): attach namespaces to all nodes, even html?
393
- if ns_uri.nil? || ns_uri == HTML_NAMESPACE || ns_uri == MATHML_NAMESPACE || ns_uri == SVG_NAMESPACE
394
- tagname = current_node.name
398
+ tagname = if ns_uri.nil? || ns_uri == HTML_NAMESPACE || ns_uri == MATHML_NAMESPACE || ns_uri == SVG_NAMESPACE
399
+ current_node.name
395
400
  else
396
- tagname = "#{ns.prefix}:#{current_node.name}"
401
+ "#{ns.prefix}:#{current_node.name}"
397
402
  end
398
- io << '<' << tagname
403
+ io << "<" << tagname
399
404
  current_node.attribute_nodes.each do |attr|
400
405
  attr_ns = attr.namespace
401
406
  if attr_ns.nil?
402
407
  attr_name = attr.name
403
408
  else
404
409
  ns_uri = attr_ns.href
405
- if ns_uri == XML_NAMESPACE
406
- attr_name = 'xml:' + attr.name.sub(/^[^:]*:/, '')
407
- elsif ns_uri == XMLNS_NAMESPACE && attr.name.sub(/^[^:]*:/, '') == 'xmlns'
408
- attr_name = 'xmlns'
410
+ attr_name = if ns_uri == XML_NAMESPACE
411
+ "xml:" + attr.name.sub(/^[^:]*:/, "")
412
+ elsif ns_uri == XMLNS_NAMESPACE && attr.name.sub(/^[^:]*:/, "") == "xmlns"
413
+ "xmlns"
409
414
  elsif ns_uri == XMLNS_NAMESPACE
410
- attr_name = 'xmlns:' + attr.name.sub(/^[^:]*:/, '')
415
+ "xmlns:" + attr.name.sub(/^[^:]*:/, "")
411
416
  elsif ns_uri == XLINK_NAMESPACE
412
- attr_name = 'xlink:' + attr.name.sub(/^[^:]*:/, '')
417
+ "xlink:" + attr.name.sub(/^[^:]*:/, "")
413
418
  else
414
- attr_name = "#{attr_ns.prefix}:#{attr.name}"
419
+ "#{attr_ns.prefix}:#{attr.name}"
415
420
  end
416
421
  end
417
- io << ' ' << attr_name << '="' << escape_text(attr.content, encoding, true) << '"'
422
+ io << " " << attr_name << '="' << escape_text(attr.content, encoding, true) << '"'
418
423
  end
419
- io << '>'
420
- if !%w[area base basefont bgsound br col embed frame hr img input keygen
421
- link meta param source track wbr].include?(current_node.name)
424
+ io << ">"
425
+ unless ["area", "base", "basefont", "bgsound", "br", "col", "embed", "frame", "hr", "img", "input", "keygen", "link", "meta", "param", "source", "track", "wbr"].include?(current_node.name)
422
426
  io << "\n" if options[:preserve_newline] && prepend_newline?(current_node)
423
427
  current_node.children.each do |child|
424
428
  # XXX(sfc): Templates handled specially?
425
429
  serialize_node_internal(child, io, encoding, options)
426
430
  end
427
- io << '</' << tagname << '>'
431
+ io << "</" << tagname << ">"
428
432
  end
429
433
  when XML::Node::TEXT_NODE
430
434
  parent = current_node.parent
431
- if parent.element? && %w[style script xmp iframe noembed noframes plaintext noscript].include?(parent.name)
432
- io << current_node.content
435
+ io << if parent.element? && ["style", "script", "xmp", "iframe", "noembed", "noframes", "plaintext", "noscript"].include?(parent.name)
436
+ current_node.content
433
437
  else
434
- io << escape_text(current_node.content, encoding, false)
438
+ escape_text(current_node.content, encoding, false)
435
439
  end
436
440
  when XML::Node::CDATA_SECTION_NODE
437
- io << '<![CDATA[' << current_node.content << ']]>'
441
+ io << "<![CDATA[" << current_node.content << "]]>"
438
442
  when XML::Node::COMMENT_NODE
439
- io << '<!--' << current_node.content << '-->'
443
+ io << "<!--" << current_node.content << "-->"
440
444
  when XML::Node::PI_NODE
441
- io << '<?' << current_node.content << '>'
445
+ io << "<?" << current_node.content << ">"
442
446
  when XML::Node::DOCUMENT_TYPE_NODE, XML::Node::DTD_NODE
443
- io << '<!DOCTYPE ' << current_node.name << '>'
447
+ io << "<!DOCTYPE " << current_node.name << ">"
444
448
  when XML::Node::HTML_DOCUMENT_NODE, XML::Node::DOCUMENT_FRAG_NODE
445
449
  current_node.children.each do |child|
446
450
  serialize_node_internal(child, io, encoding, options)
@@ -451,23 +455,23 @@ module Nokogiri
451
455
  end
452
456
 
453
457
  def self.escape_text(text, encoding, attribute_mode)
454
- if attribute_mode
455
- text = text.gsub(/[&\u00a0"]/,
456
- '&' => '&amp;', "\u00a0" => '&nbsp;', '"' => '&quot;')
458
+ text = if attribute_mode
459
+ text.gsub(/[&\u00a0"]/,
460
+ "&" => "&amp;", "\u00a0" => "&nbsp;", '"' => "&quot;")
457
461
  else
458
- text = text.gsub(/[&\u00a0<>]/,
459
- '&' => '&amp;', "\u00a0" => '&nbsp;', '<' => '&lt;', '>' => '&gt;')
462
+ text.gsub(/[&\u00a0<>]/,
463
+ "&" => "&amp;", "\u00a0" => "&nbsp;", "<" => "&lt;", ">" => "&gt;")
460
464
  end
461
465
  # Not part of the standard
462
466
  text.encode(encoding, fallback: lambda { |c| "&\#x#{c.ord.to_s(16)};" })
463
467
  end
464
468
 
465
469
  def self.prepend_newline?(node)
466
- return false unless %w[pre textarea listing].include?(node.name) && !node.children.empty?
470
+ return false unless ["pre", "textarea", "listing"].include?(node.name) && !node.children.empty?
467
471
  first_child = node.children[0]
468
472
  first_child.text? && first_child.content.start_with?("\n")
469
473
  end
470
474
  end
471
475
  end
472
476
 
473
- require_relative 'gumbo'
477
+ require_relative "gumbo"
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  # The line below caused a problem on non-GAE rack environment.
3
4
  # unless defined?(JRuby::Rack::VERSION) || defined?(AppEngine::ApiProxy)
4
5
  #
@@ -8,13 +9,13 @@
8
9
  # should skip loading xml jars. This is because those are in WEB-INF/lib and
9
10
  # already set in the classpath.
10
11
  unless $LOAD_PATH.to_s.include?("appengine-rack")
11
- require 'stringio'
12
- require 'isorelax.jar'
13
- require 'jing.jar'
14
- require 'nekohtml.jar'
15
- require 'nekodtd.jar'
16
- require 'xercesImpl.jar'
17
- require 'serializer.jar'
18
- require 'xalan.jar'
19
- require 'xml-apis.jar'
12
+ require "stringio"
13
+ require "isorelax.jar"
14
+ require "jing.jar"
15
+ require "nekohtml.jar"
16
+ require "nekodtd.jar"
17
+ require "xercesImpl.jar"
18
+ require "serializer.jar"
19
+ require "xalan.jar"
20
+ require "xml-apis.jar"
20
21
  end
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Nokogiri
3
4
  class SyntaxError < ::StandardError
4
5
  end
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Nokogiri
3
4
  # The version of Nokogiri you are using
4
- VERSION = "1.12.3"
5
+ VERSION = "1.13.1"
5
6
  end
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  require "singleton"
3
4
  require "shellwords"
4
5
 
@@ -102,8 +103,8 @@ module Nokogiri
102
103
  ldflags = []
103
104
 
104
105
  if libxml2_using_packaged?
105
- cppflags << "-I#{File.join(header_directory, 'include').shellescape}"
106
- cppflags << "-I#{File.join(header_directory, 'include/libxml2').shellescape}"
106
+ cppflags << "-I#{File.join(header_directory, "include").shellescape}"
107
+ cppflags << "-I#{File.join(header_directory, "include/libxml2").shellescape}"
107
108
 
108
109
  if windows?
109
110
  # on windows, nokogumbo needs to link against nokogiri.so to resolve symbols. see #2167
@@ -176,13 +177,9 @@ module Nokogiri
176
177
  end
177
178
 
178
179
  def to_markdown
179
- begin
180
- require "psych"
181
- rescue LoadError
182
- end
183
180
  require "yaml"
184
181
  "# Nokogiri (#{Nokogiri::VERSION})\n" +
185
- YAML.dump(to_hash).each_line.map { |line| " #{line}" }.join
182
+ YAML.dump(to_hash).each_line.map { |line| " #{line}" }.join
186
183
  end
187
184
 
188
185
  instance.warnings.each do |warning|
@@ -190,26 +187,35 @@ module Nokogiri
190
187
  end
191
188
  end
192
189
 
193
- def self.uses_libxml?(requirement = nil) # :nodoc:
190
+ # :nodoc:
191
+ def self.uses_libxml?(requirement = nil)
194
192
  return false unless VersionInfo.instance.libxml2?
195
193
  return true unless requirement
196
194
  Gem::Requirement.new(requirement).satisfied_by?(VersionInfo.instance.loaded_libxml_version)
197
195
  end
198
196
 
197
+ # :nodoc:
199
198
  def self.uses_gumbo?
200
199
  uses_libxml? # TODO: replace with Gumbo functionality
201
200
  end
202
201
 
203
- def self.jruby? # :nodoc:
202
+ # :nodoc:
203
+ def self.jruby?
204
204
  VersionInfo.instance.jruby?
205
205
  end
206
206
 
207
- # Ensure constants used in this file are loaded - see #1896
208
- if Nokogiri.jruby?
209
- require_relative "../jruby/dependencies"
207
+ # :nodoc:
208
+ def self.libxml2_patches
209
+ if VersionInfo.instance.libxml2_using_packaged?
210
+ Nokogiri::VERSION_INFO["libxml"]["patches"]
211
+ else
212
+ []
213
+ end
210
214
  end
215
+
216
+ require_relative "../jruby/dependencies" if Nokogiri.jruby?
211
217
  require_relative "../extension"
212
218
 
213
- # More complete version information about libxml
219
+ # Detailed version info about Nokogiri and the installed extension dependencies.
214
220
  VERSION_INFO = VersionInfo.instance.to_hash
215
221
  end
@@ -1,3 +1,4 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  require_relative "version/constant"
3
4
  require_relative "version/info"