nokogiri 1.11.3 → 1.13.8

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (179) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +2 -0
  3. data/LICENSE-DEPENDENCIES.md +243 -22
  4. data/LICENSE.md +1 -1
  5. data/README.md +14 -11
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +13 -64
  8. data/ext/nokogiri/depend +35 -34
  9. data/ext/nokogiri/extconf.rb +237 -133
  10. data/ext/nokogiri/gumbo.c +584 -0
  11. data/ext/nokogiri/{html_document.c → html4_document.c} +8 -8
  12. data/ext/nokogiri/{html_element_description.c → html4_element_description.c} +21 -19
  13. data/ext/nokogiri/{html_entity_lookup.c → html4_entity_lookup.c} +7 -7
  14. data/ext/nokogiri/{html_sax_parser_context.c → html4_sax_parser_context.c} +8 -8
  15. data/ext/nokogiri/{html_sax_push_parser.c → html4_sax_push_parser.c} +4 -4
  16. data/ext/nokogiri/libxml2_backwards_compat.c +30 -30
  17. data/ext/nokogiri/nokogiri.c +70 -38
  18. data/ext/nokogiri/nokogiri.h +27 -9
  19. data/ext/nokogiri/xml_attr.c +2 -2
  20. data/ext/nokogiri/xml_attribute_decl.c +3 -3
  21. data/ext/nokogiri/xml_cdata.c +1 -1
  22. data/ext/nokogiri/xml_document.c +50 -50
  23. data/ext/nokogiri/xml_document_fragment.c +0 -2
  24. data/ext/nokogiri/xml_dtd.c +10 -10
  25. data/ext/nokogiri/xml_element_content.c +2 -0
  26. data/ext/nokogiri/xml_element_decl.c +3 -3
  27. data/ext/nokogiri/xml_encoding_handler.c +31 -12
  28. data/ext/nokogiri/xml_entity_decl.c +5 -5
  29. data/ext/nokogiri/xml_namespace.c +4 -2
  30. data/ext/nokogiri/xml_node.c +833 -492
  31. data/ext/nokogiri/xml_node_set.c +24 -24
  32. data/ext/nokogiri/xml_reader.c +90 -11
  33. data/ext/nokogiri/xml_sax_parser.c +6 -6
  34. data/ext/nokogiri/xml_sax_parser_context.c +12 -3
  35. data/ext/nokogiri/xml_schema.c +5 -3
  36. data/ext/nokogiri/xml_text.c +1 -1
  37. data/ext/nokogiri/xml_xpath_context.c +110 -85
  38. data/ext/nokogiri/xslt_stylesheet.c +109 -10
  39. data/gumbo-parser/CHANGES.md +63 -0
  40. data/gumbo-parser/Makefile +101 -0
  41. data/gumbo-parser/THANKS +27 -0
  42. data/gumbo-parser/src/Makefile +34 -0
  43. data/gumbo-parser/src/README.md +41 -0
  44. data/gumbo-parser/src/ascii.c +75 -0
  45. data/gumbo-parser/src/ascii.h +115 -0
  46. data/gumbo-parser/src/attribute.c +42 -0
  47. data/gumbo-parser/src/attribute.h +17 -0
  48. data/gumbo-parser/src/char_ref.c +22225 -0
  49. data/gumbo-parser/src/char_ref.h +29 -0
  50. data/gumbo-parser/src/char_ref.rl +2154 -0
  51. data/gumbo-parser/src/error.c +626 -0
  52. data/gumbo-parser/src/error.h +148 -0
  53. data/gumbo-parser/src/foreign_attrs.c +104 -0
  54. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  55. data/gumbo-parser/src/gumbo.h +943 -0
  56. data/gumbo-parser/src/insertion_mode.h +33 -0
  57. data/gumbo-parser/src/macros.h +91 -0
  58. data/gumbo-parser/src/parser.c +4875 -0
  59. data/gumbo-parser/src/parser.h +41 -0
  60. data/gumbo-parser/src/replacement.h +33 -0
  61. data/gumbo-parser/src/string_buffer.c +103 -0
  62. data/gumbo-parser/src/string_buffer.h +68 -0
  63. data/gumbo-parser/src/string_piece.c +48 -0
  64. data/gumbo-parser/src/svg_attrs.c +174 -0
  65. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  66. data/gumbo-parser/src/svg_tags.c +137 -0
  67. data/gumbo-parser/src/svg_tags.gperf +55 -0
  68. data/gumbo-parser/src/tag.c +222 -0
  69. data/gumbo-parser/src/tag_lookup.c +382 -0
  70. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  71. data/gumbo-parser/src/tag_lookup.h +13 -0
  72. data/gumbo-parser/src/token_buffer.c +79 -0
  73. data/gumbo-parser/src/token_buffer.h +71 -0
  74. data/gumbo-parser/src/token_type.h +17 -0
  75. data/gumbo-parser/src/tokenizer.c +3463 -0
  76. data/gumbo-parser/src/tokenizer.h +112 -0
  77. data/gumbo-parser/src/tokenizer_states.h +339 -0
  78. data/gumbo-parser/src/utf8.c +245 -0
  79. data/gumbo-parser/src/utf8.h +164 -0
  80. data/gumbo-parser/src/util.c +68 -0
  81. data/gumbo-parser/src/util.h +30 -0
  82. data/gumbo-parser/src/vector.c +111 -0
  83. data/gumbo-parser/src/vector.h +45 -0
  84. data/lib/nokogiri/class_resolver.rb +67 -0
  85. data/lib/nokogiri/css/node.rb +9 -8
  86. data/lib/nokogiri/css/parser.rb +361 -342
  87. data/lib/nokogiri/css/parser.y +250 -245
  88. data/lib/nokogiri/css/parser_extras.rb +22 -20
  89. data/lib/nokogiri/css/syntax_error.rb +2 -1
  90. data/lib/nokogiri/css/tokenizer.rb +4 -3
  91. data/lib/nokogiri/css/tokenizer.rex +3 -2
  92. data/lib/nokogiri/css/xpath_visitor.rb +179 -82
  93. data/lib/nokogiri/css.rb +49 -17
  94. data/lib/nokogiri/decorators/slop.rb +8 -7
  95. data/lib/nokogiri/extension.rb +8 -3
  96. data/lib/nokogiri/gumbo.rb +15 -0
  97. data/lib/nokogiri/html.rb +37 -27
  98. data/lib/nokogiri/{html → html4}/builder.rb +3 -2
  99. data/lib/nokogiri/{html → html4}/document.rb +92 -81
  100. data/lib/nokogiri/{html → html4}/document_fragment.rb +13 -9
  101. data/lib/nokogiri/{html → html4}/element_description.rb +2 -1
  102. data/lib/nokogiri/html4/element_description_defaults.rb +578 -0
  103. data/lib/nokogiri/{html → html4}/entity_lookup.rb +3 -2
  104. data/lib/nokogiri/{html → html4}/sax/parser.rb +16 -16
  105. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  106. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +11 -11
  107. data/lib/nokogiri/html4.rb +46 -0
  108. data/lib/nokogiri/html5/document.rb +91 -0
  109. data/lib/nokogiri/html5/document_fragment.rb +83 -0
  110. data/lib/nokogiri/html5/node.rb +100 -0
  111. data/lib/nokogiri/html5.rb +478 -0
  112. data/lib/nokogiri/jruby/dependencies.rb +10 -9
  113. data/lib/nokogiri/syntax_error.rb +1 -0
  114. data/lib/nokogiri/version/constant.rb +2 -1
  115. data/lib/nokogiri/version/info.rb +31 -14
  116. data/lib/nokogiri/version.rb +1 -0
  117. data/lib/nokogiri/xml/attr.rb +5 -3
  118. data/lib/nokogiri/xml/attribute_decl.rb +2 -1
  119. data/lib/nokogiri/xml/builder.rb +71 -31
  120. data/lib/nokogiri/xml/cdata.rb +2 -1
  121. data/lib/nokogiri/xml/character_data.rb +1 -0
  122. data/lib/nokogiri/xml/document.rb +183 -96
  123. data/lib/nokogiri/xml/document_fragment.rb +41 -38
  124. data/lib/nokogiri/xml/dtd.rb +3 -2
  125. data/lib/nokogiri/xml/element_content.rb +1 -0
  126. data/lib/nokogiri/xml/element_decl.rb +2 -1
  127. data/lib/nokogiri/xml/entity_decl.rb +3 -2
  128. data/lib/nokogiri/xml/entity_reference.rb +1 -0
  129. data/lib/nokogiri/xml/namespace.rb +2 -0
  130. data/lib/nokogiri/xml/node/save_options.rb +9 -5
  131. data/lib/nokogiri/xml/node.rb +525 -354
  132. data/lib/nokogiri/xml/node_set.rb +50 -54
  133. data/lib/nokogiri/xml/notation.rb +12 -0
  134. data/lib/nokogiri/xml/parse_options.rb +13 -6
  135. data/lib/nokogiri/xml/pp/character_data.rb +8 -6
  136. data/lib/nokogiri/xml/pp/node.rb +24 -26
  137. data/lib/nokogiri/xml/pp.rb +3 -2
  138. data/lib/nokogiri/xml/processing_instruction.rb +2 -1
  139. data/lib/nokogiri/xml/reader.rb +20 -24
  140. data/lib/nokogiri/xml/relax_ng.rb +1 -0
  141. data/lib/nokogiri/xml/sax/document.rb +44 -49
  142. data/lib/nokogiri/xml/sax/parser.rb +37 -34
  143. data/lib/nokogiri/xml/sax/parser_context.rb +7 -3
  144. data/lib/nokogiri/xml/sax/push_parser.rb +5 -5
  145. data/lib/nokogiri/xml/sax.rb +5 -4
  146. data/lib/nokogiri/xml/schema.rb +7 -6
  147. data/lib/nokogiri/xml/searchable.rb +93 -62
  148. data/lib/nokogiri/xml/syntax_error.rb +5 -4
  149. data/lib/nokogiri/xml/text.rb +1 -0
  150. data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
  151. data/lib/nokogiri/xml/xpath.rb +13 -1
  152. data/lib/nokogiri/xml/xpath_context.rb +2 -3
  153. data/lib/nokogiri/xml.rb +37 -37
  154. data/lib/nokogiri/xslt/stylesheet.rb +2 -1
  155. data/lib/nokogiri/xslt.rb +28 -20
  156. data/lib/nokogiri.rb +48 -43
  157. data/lib/xsd/xmlparser/nokogiri.rb +25 -24
  158. data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
  159. data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
  160. data/patches/libxml2/{0004-libxml2.la-is-in-top_builddir.patch → 0003-libxml2.la-is-in-top_builddir.patch} +1 -1
  161. data/patches/libxml2/{0008-use-glibc-strlen.patch → 0004-use-glibc-strlen.patch} +3 -3
  162. data/patches/libxml2/{0009-avoid-isnan-isinf.patch → 0005-avoid-isnan-isinf.patch} +4 -4
  163. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +3040 -0
  164. data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +61 -0
  165. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  166. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2445 -1919
  167. data/ports/archives/libxml2-2.9.14.tar.xz +0 -0
  168. data/ports/archives/libxslt-1.1.35.tar.xz +0 -0
  169. metadata +204 -93
  170. data/lib/nokogiri/html/element_description_defaults.rb +0 -672
  171. data/lib/nokogiri/html/sax/parser_context.rb +0 -17
  172. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  173. data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
  174. data/patches/libxml2/0006-htmlParseComment-treat-as-if-it-closed-the-comment.patch +0 -73
  175. data/patches/libxml2/0007-use-new-htmlParseLookupCommentEnd-to-find-comment-en.patch +0 -103
  176. data/patches/libxml2/0010-parser.c-shrink-the-input-buffer-when-appropriate.patch +0 -70
  177. data/patches/libxml2/0011-update-automake-files-for-arm64.patch +0 -2511
  178. data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
  179. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Nokogiri
3
4
  module XML
4
5
  ###
@@ -196,6 +197,41 @@ module Nokogiri
196
197
  #
197
198
  # Note the "foo:object" tag.
198
199
  #
200
+ # === Namespace inheritance
201
+ #
202
+ # In the Builder context, children will inherit their parent's namespace. This is the same
203
+ # behavior as if the underlying {XML::Document} set +namespace_inheritance+ to +true+:
204
+ #
205
+ # result = Nokogiri::XML::Builder.new do |xml|
206
+ # xml["soapenv"].Envelope("xmlns:soapenv" => "http://schemas.xmlsoap.org/soap/envelope/") do
207
+ # xml.Header
208
+ # end
209
+ # end
210
+ # result.doc.to_xml
211
+ # # => <?xml version="1.0" encoding="utf-8"?>
212
+ # # <soapenv:Envelope xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/">
213
+ # # <soapenv:Header/>
214
+ # # </soapenv:Envelope>
215
+ #
216
+ # Users may turn this behavior off by passing a keyword argument +namespace_inheritance:false+
217
+ # to the initializer:
218
+ #
219
+ # result = Nokogiri::XML::Builder.new(namespace_inheritance: false) do |xml|
220
+ # xml["soapenv"].Envelope("xmlns:soapenv" => "http://schemas.xmlsoap.org/soap/envelope/") do
221
+ # xml.Header
222
+ # xml["soapenv"].Body # users may explicitly opt into the namespace
223
+ # end
224
+ # end
225
+ # result.doc.to_xml
226
+ # # => <?xml version="1.0" encoding="utf-8"?>
227
+ # # <soapenv:Envelope xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/">
228
+ # # <Header/>
229
+ # # <soapenv:Body/>
230
+ # # </soapenv:Envelope>
231
+ #
232
+ # For more information on namespace inheritance, please see {XML::Document#namespace_inheritance}
233
+ #
234
+ #
199
235
  # == Document Types
200
236
  #
201
237
  # To create a document type (DTD), access use the Builder#doc method to get
@@ -226,6 +262,10 @@ module Nokogiri
226
262
  # </root>
227
263
  #
228
264
  class Builder
265
+ include Nokogiri::ClassResolver
266
+
267
+ DEFAULT_DOCUMENT_OPTIONS = { namespace_inheritance: true }
268
+
229
269
  # The current Document object being built
230
270
  attr_accessor :doc
231
271
 
@@ -269,24 +309,19 @@ module Nokogiri
269
309
  @doc = root.document
270
310
  @parent = root
271
311
  else
272
- klassname = "::" + (self.class.name.split("::")[0..-2] + ["Document"]).join("::")
273
- klass = begin
274
- Object.const_get(klassname)
275
- rescue NameError
276
- Nokogiri::XML::Document
277
- end
278
- @parent = @doc = klass.new
312
+ @parent = @doc = related_class("Document").new
279
313
  end
280
314
 
281
315
  @context = nil
282
316
  @arity = nil
283
317
  @ns = nil
284
318
 
319
+ options = DEFAULT_DOCUMENT_OPTIONS.merge(options)
285
320
  options.each do |k, v|
286
321
  @doc.send(:"#{k}=", v)
287
322
  end
288
323
 
289
- return unless block_given?
324
+ return unless block
290
325
 
291
326
  @arity = block.arity
292
327
  if @arity <= 0
@@ -302,19 +337,19 @@ module Nokogiri
302
337
  ###
303
338
  # Create a Text Node with content of +string+
304
339
  def text(string)
305
- insert @doc.create_text_node(string)
340
+ insert(@doc.create_text_node(string))
306
341
  end
307
342
 
308
343
  ###
309
344
  # Create a CDATA Node with content of +string+
310
345
  def cdata(string)
311
- insert doc.create_cdata(string)
346
+ insert(doc.create_cdata(string))
312
347
  end
313
348
 
314
349
  ###
315
350
  # Create a Comment Node with content of +string+
316
351
  def comment(string)
317
- insert doc.create_comment(string)
352
+ insert(doc.create_comment(string))
318
353
  end
319
354
 
320
355
  ###
@@ -328,12 +363,13 @@ module Nokogiri
328
363
 
329
364
  @parent.ancestors.each do |a|
330
365
  next if a == doc
366
+
331
367
  @ns = a.namespace_definitions.find { |x| x.prefix == ns.to_s }
332
368
  return self if @ns
333
369
  end
334
370
 
335
- @ns = { :pending => ns.to_s }
336
- return self
371
+ @ns = { pending: ns.to_s }
372
+ self
337
373
  end
338
374
 
339
375
  ###
@@ -341,7 +377,7 @@ module Nokogiri
341
377
  def to_xml(*args)
342
378
  if Nokogiri.jruby?
343
379
  options = args.first.is_a?(Hash) ? args.shift : {}
344
- if !options[:save_with]
380
+ unless options[:save_with]
345
381
  options[:save_with] = Node::SaveOptions::AS_BUILDER
346
382
  end
347
383
  args.insert(0, options)
@@ -356,22 +392,23 @@ module Nokogiri
356
392
  end
357
393
 
358
394
  def method_missing(method, *args, &block) # :nodoc:
359
- if @context && @context.respond_to?(method)
395
+ if @context&.respond_to?(method)
360
396
  @context.send(method, *args, &block)
361
397
  else
362
- node = @doc.create_element(method.to_s.sub(/[_!]$/, ""), *args) { |n|
398
+ node = @doc.create_element(method.to_s.sub(/[_!]$/, ""), *args) do |n|
363
399
  # Set up the namespace
364
- if @ns.is_a? Nokogiri::XML::Namespace
400
+ if @ns.is_a?(Nokogiri::XML::Namespace)
365
401
  n.namespace = @ns
366
402
  @ns = nil
367
403
  end
368
- }
404
+ end
369
405
 
370
- if @ns.is_a? Hash
406
+ if @ns.is_a?(Hash)
371
407
  node.namespace = node.namespace_definitions.find { |x| x.prefix == @ns[:pending] }
372
408
  if node.namespace.nil?
373
409
  raise ArgumentError, "Namespace #{@ns[:pending]} has not been defined"
374
410
  end
411
+
375
412
  @ns = nil
376
413
  end
377
414
 
@@ -385,16 +422,19 @@ module Nokogiri
385
422
  # Insert +node+ as a child of the current Node
386
423
  def insert(node, &block)
387
424
  node = @parent.add_child(node)
388
- if block_given?
389
- old_parent = @parent
390
- @parent = node
391
- @arity ||= block.arity
392
- if @arity <= 0
393
- instance_eval(&block)
394
- else
395
- block.call(self)
425
+ if block
426
+ begin
427
+ old_parent = @parent
428
+ @parent = node
429
+ @arity ||= block.arity
430
+ if @arity <= 0
431
+ instance_eval(&block)
432
+ else
433
+ yield(self)
434
+ end
435
+ ensure
436
+ @parent = old_parent
396
437
  end
397
- @parent = old_parent
398
438
  end
399
439
  NodeBuilder.new(node, self)
400
440
  end
@@ -417,10 +457,10 @@ module Nokogiri
417
457
  opts = args.last.is_a?(Hash) ? args.pop : {}
418
458
  case method.to_s
419
459
  when /^(.*)!$/
420
- @node["id"] = $1
460
+ @node["id"] = Regexp.last_match(1)
421
461
  @node.content = args.first if args.first
422
462
  when /^(.*)=/
423
- @node[$1] = args.first
463
+ @node[Regexp.last_match(1)] = args.first
424
464
  else
425
465
  @node["class"] =
426
466
  ((@node["class"] || "").split(/\s/) + [method.to_s]).join(" ")
@@ -432,7 +472,7 @@ module Nokogiri
432
472
  @node[k.to_s] = ((@node[k.to_s] || "").split(/\s/) + [v]).join(" ")
433
473
  end
434
474
 
435
- if block_given?
475
+ if block
436
476
  old_parent = @doc_builder.parent
437
477
  @doc_builder.parent = @node
438
478
  value = @doc_builder.instance_eval(&block)
@@ -1,11 +1,12 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Nokogiri
3
4
  module XML
4
5
  class CDATA < Nokogiri::XML::Text
5
6
  ###
6
7
  # Get the name of this CDATA node
7
8
  def name
8
- '#cdata-section'
9
+ "#cdata-section"
9
10
  end
10
11
  end
11
12
  end
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Nokogiri
3
4
  module XML
4
5
  class CharacterData < Nokogiri::XML::Node