nokogiri 1.13.10 → 1.14.0.rc1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +33 -0
  3. data/LICENSE-DEPENDENCIES.md +830 -509
  4. data/README.md +18 -11
  5. data/dependencies.yml +25 -7
  6. data/ext/nokogiri/extconf.rb +79 -20
  7. data/ext/nokogiri/gumbo.c +19 -9
  8. data/ext/nokogiri/html4_document.c +1 -1
  9. data/ext/nokogiri/html4_entity_lookup.c +1 -1
  10. data/ext/nokogiri/html4_sax_parser_context.c +0 -5
  11. data/ext/nokogiri/nokogiri.c +32 -51
  12. data/ext/nokogiri/nokogiri.h +17 -14
  13. data/ext/nokogiri/xml_attribute_decl.c +1 -1
  14. data/ext/nokogiri/xml_cdata.c +1 -1
  15. data/ext/nokogiri/xml_document.c +16 -11
  16. data/ext/nokogiri/xml_element_content.c +2 -2
  17. data/ext/nokogiri/xml_element_decl.c +1 -1
  18. data/ext/nokogiri/xml_encoding_handler.c +2 -2
  19. data/ext/nokogiri/xml_namespace.c +38 -8
  20. data/ext/nokogiri/xml_node.c +286 -26
  21. data/ext/nokogiri/xml_node_set.c +0 -2
  22. data/ext/nokogiri/xml_reader.c +40 -20
  23. data/ext/nokogiri/xml_relax_ng.c +0 -2
  24. data/ext/nokogiri/xml_sax_parser.c +22 -16
  25. data/ext/nokogiri/xml_sax_parser_context.c +0 -5
  26. data/ext/nokogiri/xml_sax_push_parser.c +0 -2
  27. data/ext/nokogiri/xml_schema.c +0 -2
  28. data/ext/nokogiri/xml_xpath_context.c +87 -83
  29. data/ext/nokogiri/xslt_stylesheet.c +14 -13
  30. data/gumbo-parser/Makefile +10 -0
  31. data/gumbo-parser/src/attribute.h +1 -1
  32. data/gumbo-parser/src/error.c +1 -1
  33. data/gumbo-parser/src/error.h +1 -1
  34. data/gumbo-parser/src/foreign_attrs.c +2 -2
  35. data/gumbo-parser/src/{gumbo.h → nokogiri_gumbo.h} +1 -0
  36. data/gumbo-parser/src/parser.c +7 -4
  37. data/gumbo-parser/src/replacement.h +1 -1
  38. data/gumbo-parser/src/string_buffer.h +1 -1
  39. data/gumbo-parser/src/string_piece.c +1 -1
  40. data/gumbo-parser/src/svg_attrs.c +2 -2
  41. data/gumbo-parser/src/svg_tags.c +2 -2
  42. data/gumbo-parser/src/tag.c +2 -1
  43. data/gumbo-parser/src/tag_lookup.c +7 -7
  44. data/gumbo-parser/src/tag_lookup.gperf +1 -0
  45. data/gumbo-parser/src/tag_lookup.h +1 -1
  46. data/gumbo-parser/src/token_buffer.h +1 -1
  47. data/gumbo-parser/src/tokenizer.c +1 -1
  48. data/gumbo-parser/src/tokenizer.h +1 -1
  49. data/gumbo-parser/src/utf8.c +1 -1
  50. data/gumbo-parser/src/utf8.h +1 -1
  51. data/gumbo-parser/src/util.c +1 -3
  52. data/gumbo-parser/src/util.h +4 -0
  53. data/gumbo-parser/src/vector.h +1 -1
  54. data/lib/nokogiri/css/node.rb +2 -2
  55. data/lib/nokogiri/css/xpath_visitor.rb +3 -1
  56. data/lib/nokogiri/css.rb +6 -0
  57. data/lib/nokogiri/encoding_handler.rb +57 -0
  58. data/lib/nokogiri/extension.rb +3 -2
  59. data/lib/nokogiri/html4/document.rb +2 -121
  60. data/lib/nokogiri/html4/element_description_defaults.rb +6 -12
  61. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  62. data/lib/nokogiri/html4.rb +1 -0
  63. data/lib/nokogiri/html5/document.rb +113 -36
  64. data/lib/nokogiri/html5/document_fragment.rb +9 -2
  65. data/lib/nokogiri/html5/node.rb +3 -5
  66. data/lib/nokogiri/html5.rb +127 -216
  67. data/lib/nokogiri/jruby/dependencies.rb +1 -19
  68. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  69. data/lib/nokogiri/version/constant.rb +1 -1
  70. data/lib/nokogiri/version/info.rb +11 -10
  71. data/lib/nokogiri/xml/attr.rb +49 -0
  72. data/lib/nokogiri/xml/builder.rb +1 -1
  73. data/lib/nokogiri/xml/document.rb +102 -54
  74. data/lib/nokogiri/xml/document_fragment.rb +49 -6
  75. data/lib/nokogiri/xml/namespace.rb +42 -0
  76. data/lib/nokogiri/xml/node/save_options.rb +4 -2
  77. data/lib/nokogiri/xml/node.rb +190 -35
  78. data/lib/nokogiri/xml/node_set.rb +87 -9
  79. data/lib/nokogiri/xml/parse_options.rb +127 -48
  80. data/lib/nokogiri/xml/pp/node.rb +6 -4
  81. data/lib/nokogiri/xml/processing_instruction.rb +2 -1
  82. data/lib/nokogiri/xml/sax/parser.rb +2 -3
  83. data/lib/nokogiri/xslt.rb +1 -1
  84. data/lib/nokogiri.rb +3 -11
  85. metadata +11 -247
  86. data/patches/libxml2/0005-avoid-isnan-isinf.patch +0 -81
@@ -19,63 +19,72 @@ module Nokogiri
19
19
  NCNAME_CHAR = NCNAME_START_CHAR + "\\-\\.0-9"
20
20
  NCNAME_RE = /^xmlns(?::([#{NCNAME_START_CHAR}][#{NCNAME_CHAR}]*))?$/
21
21
 
22
- ##
23
- # Parse an XML file.
24
- #
25
- # +string_or_io+ may be a String, or any object that responds to
26
- # _read_ and _close_ such as an IO, or StringIO.
27
- #
28
- # +url+ (optional) is the URI where this document is located.
29
- #
30
- # +encoding+ (optional) is the encoding that should be used when processing
31
- # the document.
32
- #
33
- # +options+ (optional) is a configuration object that sets options during
34
- # parsing, such as Nokogiri::XML::ParseOptions::RECOVER. See the
35
- # Nokogiri::XML::ParseOptions for more information.
36
- #
37
- # +block+ (optional) is passed a configuration object on which
38
- # parse options may be set.
39
- #
40
- # By default, Nokogiri treats documents as untrusted, and so
41
- # does not attempt to load DTDs or access the network. See
42
- # Nokogiri::XML::ParseOptions for a complete list of options;
43
- # and that module's DEFAULT_XML constant for what's set (and not
44
- # set) by default.
45
- #
46
- # Nokogiri.XML() is a convenience method which will call this method.
47
- #
48
- def self.parse(string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML)
49
- options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
50
- yield options if block_given?
22
+ class << self
23
+ # Parse an XML file.
24
+ #
25
+ # +string_or_io+ may be a String, or any object that responds to
26
+ # _read_ and _close_ such as an IO, or StringIO.
27
+ #
28
+ # +url+ (optional) is the URI where this document is located.
29
+ #
30
+ # +encoding+ (optional) is the encoding that should be used when processing
31
+ # the document.
32
+ #
33
+ # +options+ (optional) is a configuration object that sets options during
34
+ # parsing, such as Nokogiri::XML::ParseOptions::RECOVER. See the
35
+ # Nokogiri::XML::ParseOptions for more information.
36
+ #
37
+ # +block+ (optional) is passed a configuration object on which
38
+ # parse options may be set.
39
+ #
40
+ # By default, Nokogiri treats documents as untrusted, and so
41
+ # does not attempt to load DTDs or access the network. See
42
+ # Nokogiri::XML::ParseOptions for a complete list of options;
43
+ # and that module's DEFAULT_XML constant for what's set (and not
44
+ # set) by default.
45
+ #
46
+ # Nokogiri.XML() is a convenience method which will call this method.
47
+ #
48
+ def parse(string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML)
49
+ options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
50
+ yield options if block_given?
51
+
52
+ url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
53
+
54
+ if empty_doc?(string_or_io)
55
+ if options.strict?
56
+ raise Nokogiri::XML::SyntaxError, "Empty document"
57
+ else
58
+ return encoding ? new.tap { |i| i.encoding = encoding } : new
59
+ end
60
+ end
51
61
 
52
- url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
62
+ doc = if string_or_io.respond_to?(:read)
63
+ if string_or_io.is_a?(Pathname)
64
+ # resolve the Pathname to the file and open it as an IO object, see #2110
65
+ string_or_io = string_or_io.expand_path.open
66
+ url ||= string_or_io.path
67
+ end
53
68
 
54
- if empty_doc?(string_or_io)
55
- if options.strict?
56
- raise Nokogiri::XML::SyntaxError, "Empty document"
69
+ read_io(string_or_io, url, encoding, options.to_i)
57
70
  else
58
- return encoding ? new.tap { |i| i.encoding = encoding } : new
71
+ # read_memory pukes on empty docs
72
+ read_memory(string_or_io, url, encoding, options.to_i)
59
73
  end
60
- end
61
74
 
62
- doc = if string_or_io.respond_to?(:read)
63
- if string_or_io.is_a?(Pathname)
64
- # resolve the Pathname to the file and open it as an IO object, see #2110
65
- string_or_io = string_or_io.expand_path.open
66
- url ||= string_or_io.path
67
- end
75
+ # do xinclude processing
76
+ doc.do_xinclude(options) if options.xinclude?
68
77
 
69
- read_io(string_or_io, url, encoding, options.to_i)
70
- else
71
- # read_memory pukes on empty docs
72
- read_memory(string_or_io, url, encoding, options.to_i)
78
+ doc
73
79
  end
74
80
 
75
- # do xinclude processing
76
- doc.do_xinclude(options) if options.xinclude?
81
+ private
77
82
 
78
- doc
83
+ def empty_doc?(string_or_io)
84
+ string_or_io.nil? ||
85
+ (string_or_io.respond_to?(:empty?) && string_or_io.empty?) ||
86
+ (string_or_io.respond_to?(:eof?) && string_or_io.eof?)
87
+ end
79
88
  end
80
89
 
81
90
  ##
@@ -165,6 +174,7 @@ module Nokogiri
165
174
  # Since v1.12.4
166
175
  attr_accessor :namespace_inheritance
167
176
 
177
+ # rubocop:disable Lint/MissingSuper
168
178
  def initialize(*args) # :nodoc:
169
179
  @errors = []
170
180
  @decorators = nil
@@ -405,14 +415,52 @@ module Nokogiri
405
415
  Nokogiri::CSS::XPathVisitor::DoctypeConfig::XML
406
416
  end
407
417
 
408
- private
409
-
410
- def self.empty_doc?(string_or_io)
411
- string_or_io.nil? ||
412
- (string_or_io.respond_to?(:empty?) && string_or_io.empty?) ||
413
- (string_or_io.respond_to?(:eof?) && string_or_io.eof?)
418
+ #
419
+ # :call-seq: deconstruct_keys(array_of_names) → Hash
420
+ #
421
+ # Returns a hash describing the Document, to use in pattern matching.
422
+ #
423
+ # Valid keys and their values:
424
+ # - +root+ → (Node, nil) The root node of the Document, or +nil+ if the document is empty.
425
+ #
426
+ # In the future, other keys may allow accessing things like doctype and processing
427
+ # instructions. If you have a use case and would like this functionality, please let us know
428
+ # by opening an issue or a discussion on the github project.
429
+ #
430
+ # ⚡ This is an experimental feature, available since v1.14.0
431
+ #
432
+ # *Example*
433
+ #
434
+ # doc = Nokogiri::XML.parse(<<~XML)
435
+ # <?xml version="1.0"?>
436
+ # <root>
437
+ # <child>
438
+ # </root>
439
+ # XML
440
+ #
441
+ # doc.deconstruct_keys([:root])
442
+ # # => {:root=>
443
+ # # #(Element:0x35c {
444
+ # # name = "root",
445
+ # # children = [
446
+ # # #(Text "\n" + " "),
447
+ # # #(Element:0x370 { name = "child", children = [ #(Text "\n")] }),
448
+ # # #(Text "\n")]
449
+ # # })}
450
+ #
451
+ # *Example* of an empty document
452
+ #
453
+ # doc = Nokogiri::XML::Document.new
454
+ #
455
+ # doc.deconstruct_keys([:root])
456
+ # # => {:root=>nil}
457
+ #
458
+ def deconstruct_keys(keys)
459
+ { root: root }
414
460
  end
415
461
 
462
+ private
463
+
416
464
  IMPLIED_XPATH_CONTEXTS = ["//"].freeze # :nodoc:
417
465
 
418
466
  def inspect_attributes
@@ -1,3 +1,4 @@
1
+ # coding: utf-8
1
2
  # frozen_string_literal: true
2
3
 
3
4
  module Nokogiri
@@ -66,9 +67,7 @@ module Nokogiri
66
67
  def to_html(*args)
67
68
  if Nokogiri.jruby?
68
69
  options = args.first.is_a?(Hash) ? args.shift : {}
69
- unless options[:save_with]
70
- options[:save_with] = Node::SaveOptions::NO_DECLARATION | Node::SaveOptions::NO_EMPTY_TAGS | Node::SaveOptions::AS_HTML
71
- end
70
+ options[:save_with] ||= Node::SaveOptions::DEFAULT_HTML
72
71
  args.insert(0, options)
73
72
  end
74
73
  children.to_html(*args)
@@ -80,9 +79,7 @@ module Nokogiri
80
79
  def to_xhtml(*args)
81
80
  if Nokogiri.jruby?
82
81
  options = args.first.is_a?(Hash) ? args.shift : {}
83
- unless options[:save_with]
84
- options[:save_with] = Node::SaveOptions::NO_DECLARATION | Node::SaveOptions::NO_EMPTY_TAGS | Node::SaveOptions::AS_XHTML
85
- end
82
+ options[:save_with] ||= Node::SaveOptions::DEFAULT_XHTML
86
83
  args.insert(0, options)
87
84
  end
88
85
  children.to_xhtml(*args)
@@ -148,6 +145,52 @@ module Nokogiri
148
145
  document.fragment(data)
149
146
  end
150
147
 
148
+ #
149
+ # :call-seq: deconstruct() → Array
150
+ #
151
+ # Returns the root nodes of this document fragment as an array, to use in pattern matching.
152
+ #
153
+ # 💡 Note that text nodes are returned as well as elements. If you wish to operate only on
154
+ # root elements, you should deconstruct the array returned by
155
+ # <tt>DocumentFragment#elements</tt>.
156
+ #
157
+ # ⚡ This is an experimental feature, available since v1.14.0
158
+ #
159
+ # *Example*
160
+ #
161
+ # frag = Nokogiri::HTML5.fragment(<<~HTML)
162
+ # <div>Start</div>
163
+ # This is a <a href="#jump">shortcut</a> for you.
164
+ # <div>End</div>
165
+ # HTML
166
+ #
167
+ # frag.deconstruct
168
+ # # => [#(Element:0x35c { name = "div", children = [ #(Text "Start")] }),
169
+ # # #(Text "\n" + "This is a "),
170
+ # # #(Element:0x370 {
171
+ # # name = "a",
172
+ # # attributes = [ #(Attr:0x384 { name = "href", value = "#jump" })],
173
+ # # children = [ #(Text "shortcut")]
174
+ # # }),
175
+ # # #(Text " for you.\n"),
176
+ # # #(Element:0x398 { name = "div", children = [ #(Text "End")] }),
177
+ # # #(Text "\n")]
178
+ #
179
+ # *Example* only the elements, not the text nodes.
180
+ #
181
+ # frag.elements.deconstruct
182
+ # # => [#(Element:0x35c { name = "div", children = [ #(Text "Start")] }),
183
+ # # #(Element:0x370 {
184
+ # # name = "a",
185
+ # # attributes = [ #(Attr:0x384 { name = "href", value = "#jump" })],
186
+ # # children = [ #(Text "shortcut")]
187
+ # # }),
188
+ # # #(Element:0x398 { name = "div", children = [ #(Text "End")] })]
189
+ #
190
+ def deconstruct
191
+ children.to_a
192
+ end
193
+
151
194
  private
152
195
 
153
196
  # fix for issue 770
@@ -1,3 +1,4 @@
1
+ # coding: utf-8
1
2
  # frozen_string_literal: true
2
3
 
3
4
  module Nokogiri
@@ -6,6 +7,47 @@ module Nokogiri
6
7
  include Nokogiri::XML::PP::Node
7
8
  attr_reader :document
8
9
 
10
+ #
11
+ # :call-seq: deconstruct_keys(array_of_names) → Hash
12
+ #
13
+ # Returns a hash describing the Namespace, to use in pattern matching.
14
+ #
15
+ # Valid keys and their values:
16
+ # - +prefix+ → (String, nil) The namespace's prefix, or +nil+ if there is no prefix (e.g., default namespace).
17
+ # - +href+ → (String) The namespace's URI
18
+ #
19
+ # ⚡ This is an experimental feature, available since v1.14.0
20
+ #
21
+ # *Example*
22
+ #
23
+ # doc = Nokogiri::XML.parse(<<~XML)
24
+ # <?xml version="1.0"?>
25
+ # <root xmlns="http://nokogiri.org/ns/default" xmlns:noko="http://nokogiri.org/ns/noko">
26
+ # <child1 foo="abc" noko:bar="def"/>
27
+ # <noko:child2 foo="qwe" noko:bar="rty"/>
28
+ # </root>
29
+ # XML
30
+ #
31
+ # doc.root.elements.first.namespace
32
+ # # => #(Namespace:0x35c { href = "http://nokogiri.org/ns/default" })
33
+ #
34
+ # doc.root.elements.first.namespace.deconstruct_keys([:prefix, :href])
35
+ # # => {:prefix=>nil, :href=>"http://nokogiri.org/ns/default"}
36
+ #
37
+ # doc.root.elements.last.namespace
38
+ # # => #(Namespace:0x370 {
39
+ # # prefix = "noko",
40
+ # # href = "http://nokogiri.org/ns/noko"
41
+ # # })
42
+ #
43
+ # doc.root.elements.last.namespace.deconstruct_keys([:prefix, :href])
44
+ # # => {:prefix=>"noko", :href=>"http://nokogiri.org/ns/noko"}
45
+ #
46
+ #
47
+ def deconstruct_keys(keys)
48
+ { prefix: prefix, href: href }
49
+ end
50
+
9
51
  private
10
52
 
11
53
  def inspect_attributes
@@ -29,14 +29,16 @@ module Nokogiri
29
29
  DEFAULT_XML = AS_XML # https://github.com/sparklemotion/nokogiri/issues/#issue/415
30
30
  # the default for HTML document
31
31
  DEFAULT_HTML = NO_DECLARATION | NO_EMPTY_TAGS | AS_HTML
32
+ # the default for XHTML document
33
+ DEFAULT_XHTML = NO_DECLARATION | AS_XHTML
32
34
  else
33
35
  # the default for XML documents
34
36
  DEFAULT_XML = FORMAT | AS_XML
35
37
  # the default for HTML document
36
38
  DEFAULT_HTML = FORMAT | NO_DECLARATION | NO_EMPTY_TAGS | AS_HTML
39
+ # the default for XHTML document
40
+ DEFAULT_XHTML = FORMAT | NO_DECLARATION | AS_XHTML
37
41
  end
38
- # the default for XHTML document
39
- DEFAULT_XHTML = FORMAT | NO_DECLARATION | AS_XHTML
40
42
 
41
43
  # Integer representation of the SaveOptions
42
44
  attr_reader :options
@@ -137,9 +137,12 @@ module Nokogiri
137
137
 
138
138
  ###
139
139
  # Add +node_or_tags+ as a child of this Node.
140
- # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
141
140
  #
142
- # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
141
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a String
142
+ # containing markup.
143
+ #
144
+ # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is
145
+ # a DocumentFragment, NodeSet, or String).
143
146
  #
144
147
  # Also see related method +<<+.
145
148
  def add_child(node_or_tags)
@@ -154,9 +157,12 @@ module Nokogiri
154
157
 
155
158
  ###
156
159
  # Add +node_or_tags+ as the first child of this Node.
157
- # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
158
160
  #
159
- # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
161
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a String
162
+ # containing markup.
163
+ #
164
+ # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is
165
+ # a DocumentFragment, NodeSet, or String).
160
166
  #
161
167
  # Also see related method +add_child+.
162
168
  def prepend_child(node_or_tags)
@@ -170,22 +176,81 @@ module Nokogiri
170
176
  end
171
177
  end
172
178
 
173
- ###
174
- # Add html around this node
179
+ # :call-seq:
180
+ # wrap(markup) -> self
181
+ # wrap(node) -> self
182
+ #
183
+ # Wrap this Node with the node parsed from +markup+ or a dup of the +node+.
184
+ #
185
+ # [Parameters]
186
+ # - *markup* (String)
187
+ # Markup that is parsed and used as the wrapper. This node's parent, if it exists, is used
188
+ # as the context node for parsing; otherwise the associated document is used. If the parsed
189
+ # fragment has multiple roots, the first root node is used as the wrapper.
190
+ # - *node* (Nokogiri::XML::Node)
191
+ # An element that is `#dup`ed and used as the wrapper.
192
+ #
193
+ # [Returns] +self+, to support chaining.
194
+ #
195
+ # Also see NodeSet#wrap
196
+ #
197
+ # *Example* with a +String+ argument:
175
198
  #
176
- # Returns self
177
- def wrap(html)
178
- new_parent = document.parse(html).first
179
- add_next_sibling(new_parent)
199
+ # doc = Nokogiri::HTML5(<<~HTML)
200
+ # <html><body>
201
+ # <a>asdf</a>
202
+ # </body></html>
203
+ # HTML
204
+ # doc.at_css("a").wrap("<div></div>")
205
+ # doc.to_html
206
+ # # => <html><head></head><body>
207
+ # # <div><a>asdf</a></div>
208
+ # # </body></html>
209
+ #
210
+ # *Example* with a +Node+ argument:
211
+ #
212
+ # doc = Nokogiri::HTML5(<<~HTML)
213
+ # <html><body>
214
+ # <a>asdf</a>
215
+ # </body></html>
216
+ # HTML
217
+ # doc.at_css("a").wrap(doc.create_element("div"))
218
+ # doc.to_html
219
+ # # <html><head></head><body>
220
+ # # <div><a>asdf</a></div>
221
+ # # </body></html>
222
+ #
223
+ def wrap(node_or_tags)
224
+ case node_or_tags
225
+ when String
226
+ context_node = parent || document
227
+ new_parent = context_node.coerce(node_or_tags).first
228
+ if new_parent.nil?
229
+ raise "Failed to parse '#{node_or_tags}' in the context of a '#{context_node.name}' element"
230
+ end
231
+ when XML::Node
232
+ new_parent = node_or_tags.dup
233
+ else
234
+ raise ArgumentError, "Requires a String or Node argument, and cannot accept a #{node_or_tags.class}"
235
+ end
236
+
237
+ if parent
238
+ add_next_sibling(new_parent)
239
+ else
240
+ new_parent.unlink
241
+ end
180
242
  new_parent.add_child(self)
243
+
181
244
  self
182
245
  end
183
246
 
184
247
  ###
185
248
  # Add +node_or_tags+ as a child of this Node.
186
- # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
187
249
  #
188
- # Returns self, to support chaining of calls (e.g., root << child1 << child2)
250
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a String
251
+ # containing markup.
252
+ #
253
+ # Returns +self+, to support chaining of calls (e.g., root << child1 << child2)
189
254
  #
190
255
  # Also see related method +add_child+.
191
256
  def <<(node_or_tags)
@@ -195,9 +260,12 @@ module Nokogiri
195
260
 
196
261
  ###
197
262
  # Insert +node_or_tags+ before this Node (as a sibling).
198
- # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
199
263
  #
200
- # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
264
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a String
265
+ # containing markup.
266
+ #
267
+ # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is
268
+ # a DocumentFragment, NodeSet, or String).
201
269
  #
202
270
  # Also see related method +before+.
203
271
  def add_previous_sibling(node_or_tags)
@@ -209,9 +277,12 @@ module Nokogiri
209
277
 
210
278
  ###
211
279
  # Insert +node_or_tags+ after this Node (as a sibling).
212
- # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
213
280
  #
214
- # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
281
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a String
282
+ # containing markup.
283
+ #
284
+ # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is
285
+ # a DocumentFragment, NodeSet, or String).
215
286
  #
216
287
  # Also see related method +after+.
217
288
  def add_next_sibling(node_or_tags)
@@ -223,9 +294,11 @@ module Nokogiri
223
294
 
224
295
  ####
225
296
  # Insert +node_or_tags+ before this node (as a sibling).
226
- # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
227
297
  #
228
- # Returns self, to support chaining of calls.
298
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a String
299
+ # containing markup.
300
+ #
301
+ # Returns +self+, to support chaining of calls.
229
302
  #
230
303
  # Also see related method +add_previous_sibling+.
231
304
  def before(node_or_tags)
@@ -235,9 +308,11 @@ module Nokogiri
235
308
 
236
309
  ####
237
310
  # Insert +node_or_tags+ after this node (as a sibling).
238
- # +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a string containing markup.
239
311
  #
240
- # Returns self, to support chaining of calls.
312
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a String
313
+ # containing markup.
314
+ #
315
+ # Returns +self+, to support chaining of calls.
241
316
  #
242
317
  # Also see related method +add_next_sibling+.
243
318
  def after(node_or_tags)
@@ -246,8 +321,18 @@ module Nokogiri
246
321
  end
247
322
 
248
323
  ####
249
- # Set the inner html for this Node to +node_or_tags+
250
- # +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a string containing markup.
324
+ # Set the content for this Node to +node_or_tags+.
325
+ #
326
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a String
327
+ # containing markup.
328
+ #
329
+ # ⚠ Please note that despite the name, this method will *not* always parse a String argument
330
+ # as HTML. A String argument will be parsed with the +DocumentFragment+ parser related to this
331
+ # node's document.
332
+ #
333
+ # For example, if the document is an HTML4::Document then the string will be parsed as HTML4
334
+ # using HTML4::DocumentFragment; but if the document is an XML::Document then it will
335
+ # parse the string as XML using XML::DocumentFragment.
251
336
  #
252
337
  # Also see related method +children=+
253
338
  def inner_html=(node_or_tags)
@@ -255,8 +340,10 @@ module Nokogiri
255
340
  end
256
341
 
257
342
  ####
258
- # Set the inner html for this Node +node_or_tags+
259
- # +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a string containing markup.
343
+ # Set the content for this Node +node_or_tags+
344
+ #
345
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a String
346
+ # containing markup.
260
347
  #
261
348
  # Also see related method +inner_html=+
262
349
  def children=(node_or_tags)
@@ -271,9 +358,12 @@ module Nokogiri
271
358
 
272
359
  ####
273
360
  # Replace this Node with +node_or_tags+.
274
- # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
275
361
  #
276
- # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
362
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a String
363
+ # containing markup.
364
+ #
365
+ # Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is
366
+ # a DocumentFragment, NodeSet, or String).
277
367
  #
278
368
  # Also see related method +swap+.
279
369
  def replace(node_or_tags)
@@ -303,7 +393,9 @@ module Nokogiri
303
393
 
304
394
  ####
305
395
  # Swap this Node for +node_or_tags+
306
- # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
396
+ #
397
+ # +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a String
398
+ # Containing markup.
307
399
  #
308
400
  # Returns self, to support chaining of calls.
309
401
  #
@@ -314,7 +406,8 @@ module Nokogiri
314
406
  end
315
407
 
316
408
  ####
317
- # Set the Node's content to a Text node containing +string+. The string gets XML escaped, not interpreted as markup.
409
+ # Set the Node's content to a Text node containing +string+. The string gets XML escaped, not
410
+ # interpreted as markup.
318
411
  def content=(string)
319
412
  self.native_content = encode_special_chars(string.to_s)
320
413
  end
@@ -1105,9 +1198,9 @@ module Nokogiri
1105
1198
 
1106
1199
  # Get the path to this node as a CSS expression
1107
1200
  def css_path
1108
- path.split(%r{/}).map do |part|
1201
+ path.split(%r{/}).filter_map do |part|
1109
1202
  part.empty? ? nil : part.gsub(/\[(\d+)\]/, ':nth-of-type(\1)')
1110
- end.compact.join(" > ")
1203
+ end.join(" > ")
1111
1204
  end
1112
1205
 
1113
1206
  ###
@@ -1194,12 +1287,11 @@ module Nokogiri
1194
1287
  }
1195
1288
  end
1196
1289
 
1197
- encoding = options[:encoding] || document.encoding
1198
- options[:encoding] = encoding
1290
+ options[:encoding] ||= document.encoding
1291
+ encoding = Encoding.find(options[:encoding] || "UTF-8")
1292
+
1293
+ io = StringIO.new(String.new(encoding: encoding))
1199
1294
 
1200
- outstring = +""
1201
- outstring.force_encoding(Encoding.find(encoding || "utf-8"))
1202
- io = StringIO.new(outstring)
1203
1295
  write_to(io, options, &block)
1204
1296
  io.string
1205
1297
  end
@@ -1311,6 +1403,69 @@ module Nokogiri
1311
1403
  end
1312
1404
  end
1313
1405
 
1406
+ DECONSTRUCT_KEYS = [:name, :attributes, :children, :namespace, :content, :elements, :inner_html].freeze # :nodoc:
1407
+ DECONSTRUCT_METHODS = { attributes: :attribute_nodes }.freeze # :nodoc:
1408
+
1409
+ #
1410
+ # :call-seq: deconstruct_keys(array_of_names) → Hash
1411
+ #
1412
+ # Returns a hash describing the Node, to use in pattern matching.
1413
+ #
1414
+ # Valid keys and their values:
1415
+ # - +name+ → (String) The name of this node, or "text" if it is a Text node.
1416
+ # - +namespace+ → (Namespace, nil) The namespace of this node, or nil if there is no namespace.
1417
+ # - +attributes+ → (Array<Attr>) The attributes of this node.
1418
+ # - +children+ → (Array<Node>) The children of this node. 💡 Note this includes text nodes.
1419
+ # - +elements+ → (Array<Node>) The child elements of this node. 💡 Note this does not include text nodes.
1420
+ # - +content+ → (String) The contents of all the text nodes in this node's subtree. See #content.
1421
+ # - +inner_html+ → (String) The inner markup for the children of this node. See #inner_html.
1422
+ #
1423
+ # ⚡ This is an experimental feature, available since v1.14.0
1424
+ #
1425
+ # *Example*
1426
+ #
1427
+ # doc = Nokogiri::XML.parse(<<~XML)
1428
+ # <?xml version="1.0"?>
1429
+ # <parent xmlns="http://nokogiri.org/ns/default" xmlns:noko="http://nokogiri.org/ns/noko">
1430
+ # <child1 foo="abc" noko:bar="def">First</child1>
1431
+ # <noko:child2 foo="qwe" noko:bar="rty">Second</noko:child2>
1432
+ # </parent>
1433
+ # XML
1434
+ #
1435
+ # doc.root.deconstruct_keys([:name, :namespace])
1436
+ # # => {:name=>"parent",
1437
+ # # :namespace=>
1438
+ # # #(Namespace:0x35c { href = "http://nokogiri.org/ns/default" })}
1439
+ #
1440
+ # doc.root.deconstruct_keys([:inner_html, :content])
1441
+ # # => {:content=>"\n" + " First\n" + " Second\n",
1442
+ # # :inner_html=>
1443
+ # # "\n" +
1444
+ # # " <child1 foo=\"abc\" noko:bar=\"def\">First</child1>\n" +
1445
+ # # " <noko:child2 foo=\"qwe\" noko:bar=\"rty\">Second</noko:child2>\n"}
1446
+ #
1447
+ # doc.root.elements.first.deconstruct_keys([:attributes])
1448
+ # # => {:attributes=>
1449
+ # # [#(Attr:0x370 { name = "foo", value = "abc" }),
1450
+ # # #(Attr:0x384 {
1451
+ # # name = "bar",
1452
+ # # namespace = #(Namespace:0x398 {
1453
+ # # prefix = "noko",
1454
+ # # href = "http://nokogiri.org/ns/noko"
1455
+ # # }),
1456
+ # # value = "def"
1457
+ # # })]}
1458
+ #
1459
+ def deconstruct_keys(keys)
1460
+ requested_keys = DECONSTRUCT_KEYS & keys
1461
+ {}.tap do |values|
1462
+ requested_keys.each do |key|
1463
+ method = DECONSTRUCT_METHODS[key] || key
1464
+ values[key] = send(method)
1465
+ end
1466
+ end
1467
+ end
1468
+
1314
1469
  # :section:
1315
1470
 
1316
1471
  protected