nokogiri 1.15.4 → 1.17.2

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (98) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +12 -19
  3. data/README.md +8 -1
  4. data/dependencies.yml +9 -8
  5. data/ext/nokogiri/extconf.rb +194 -141
  6. data/ext/nokogiri/gumbo.c +69 -53
  7. data/ext/nokogiri/html4_document.c +10 -4
  8. data/ext/nokogiri/html4_element_description.c +18 -18
  9. data/ext/nokogiri/html4_sax_parser.c +40 -0
  10. data/ext/nokogiri/html4_sax_parser_context.c +48 -58
  11. data/ext/nokogiri/html4_sax_push_parser.c +26 -25
  12. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  13. data/ext/nokogiri/nokogiri.c +9 -2
  14. data/ext/nokogiri/nokogiri.h +25 -33
  15. data/ext/nokogiri/test_global_handlers.c +1 -1
  16. data/ext/nokogiri/xml_attr.c +1 -1
  17. data/ext/nokogiri/xml_cdata.c +3 -12
  18. data/ext/nokogiri/xml_comment.c +3 -8
  19. data/ext/nokogiri/xml_document.c +167 -156
  20. data/ext/nokogiri/xml_document_fragment.c +10 -25
  21. data/ext/nokogiri/xml_dtd.c +1 -1
  22. data/ext/nokogiri/xml_element_content.c +9 -9
  23. data/ext/nokogiri/xml_encoding_handler.c +4 -4
  24. data/ext/nokogiri/xml_namespace.c +6 -10
  25. data/ext/nokogiri/xml_node.c +142 -108
  26. data/ext/nokogiri/xml_node_set.c +46 -44
  27. data/ext/nokogiri/xml_reader.c +74 -100
  28. data/ext/nokogiri/xml_relax_ng.c +35 -56
  29. data/ext/nokogiri/xml_sax_parser.c +156 -88
  30. data/ext/nokogiri/xml_sax_parser_context.c +214 -128
  31. data/ext/nokogiri/xml_sax_push_parser.c +69 -50
  32. data/ext/nokogiri/xml_schema.c +51 -87
  33. data/ext/nokogiri/xml_syntax_error.c +19 -11
  34. data/ext/nokogiri/xml_text.c +3 -6
  35. data/ext/nokogiri/xml_xpath_context.c +4 -7
  36. data/ext/nokogiri/xslt_stylesheet.c +16 -11
  37. data/gumbo-parser/Makefile +18 -0
  38. data/gumbo-parser/src/error.c +76 -48
  39. data/gumbo-parser/src/error.h +5 -1
  40. data/gumbo-parser/src/nokogiri_gumbo.h +11 -2
  41. data/gumbo-parser/src/parser.c +64 -23
  42. data/gumbo-parser/src/tokenizer.c +7 -6
  43. data/lib/nokogiri/class_resolver.rb +1 -1
  44. data/lib/nokogiri/css/node.rb +6 -2
  45. data/lib/nokogiri/css/parser.rb +6 -4
  46. data/lib/nokogiri/css/parser.y +2 -2
  47. data/lib/nokogiri/css/parser_extras.rb +6 -66
  48. data/lib/nokogiri/css/selector_cache.rb +38 -0
  49. data/lib/nokogiri/css/tokenizer.rb +4 -4
  50. data/lib/nokogiri/css/tokenizer.rex +9 -8
  51. data/lib/nokogiri/css/xpath_visitor.rb +43 -27
  52. data/lib/nokogiri/css.rb +86 -20
  53. data/lib/nokogiri/decorators/slop.rb +3 -5
  54. data/lib/nokogiri/encoding_handler.rb +2 -2
  55. data/lib/nokogiri/html4/document.rb +45 -24
  56. data/lib/nokogiri/html4/document_fragment.rb +124 -12
  57. data/lib/nokogiri/html4/encoding_reader.rb +2 -2
  58. data/lib/nokogiri/html4/sax/parser.rb +23 -38
  59. data/lib/nokogiri/html4/sax/parser_context.rb +4 -9
  60. data/lib/nokogiri/html4.rb +9 -14
  61. data/lib/nokogiri/html5/builder.rb +40 -0
  62. data/lib/nokogiri/html5/document.rb +61 -30
  63. data/lib/nokogiri/html5/document_fragment.rb +130 -20
  64. data/lib/nokogiri/html5/node.rb +4 -4
  65. data/lib/nokogiri/html5.rb +114 -138
  66. data/lib/nokogiri/version/constant.rb +1 -1
  67. data/lib/nokogiri/version/info.rb +6 -5
  68. data/lib/nokogiri/xml/attr.rb +2 -2
  69. data/lib/nokogiri/xml/builder.rb +8 -1
  70. data/lib/nokogiri/xml/document.rb +74 -31
  71. data/lib/nokogiri/xml/document_fragment.rb +86 -15
  72. data/lib/nokogiri/xml/namespace.rb +1 -2
  73. data/lib/nokogiri/xml/node.rb +113 -35
  74. data/lib/nokogiri/xml/node_set.rb +12 -10
  75. data/lib/nokogiri/xml/parse_options.rb +1 -1
  76. data/lib/nokogiri/xml/pp/node.rb +6 -1
  77. data/lib/nokogiri/xml/reader.rb +51 -17
  78. data/lib/nokogiri/xml/relax_ng.rb +57 -20
  79. data/lib/nokogiri/xml/sax/document.rb +174 -83
  80. data/lib/nokogiri/xml/sax/parser.rb +115 -41
  81. data/lib/nokogiri/xml/sax/parser_context.rb +116 -8
  82. data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
  83. data/lib/nokogiri/xml/sax.rb +48 -0
  84. data/lib/nokogiri/xml/schema.rb +112 -45
  85. data/lib/nokogiri/xml/searchable.rb +9 -11
  86. data/lib/nokogiri/xml/syntax_error.rb +23 -1
  87. data/lib/nokogiri/xml.rb +14 -25
  88. data/lib/nokogiri/xslt/stylesheet.rb +29 -7
  89. data/lib/nokogiri/xslt.rb +4 -10
  90. data/lib/nokogiri.rb +1 -1
  91. data/lib/xsd/xmlparser/nokogiri.rb +3 -4
  92. data/ports/archives/libxml2-2.13.5.tar.xz +0 -0
  93. data/ports/archives/libxslt-1.1.42.tar.xz +0 -0
  94. metadata +15 -14
  95. data/ext/nokogiri/libxml2_backwards_compat.c +0 -121
  96. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +0 -25
  97. data/ports/archives/libxml2-2.11.5.tar.xz +0 -0
  98. data/ports/archives/libxslt-1.1.38.tar.xz +0 -0
@@ -475,7 +475,14 @@ module Nokogiri
475
475
  if block
476
476
  old_parent = @doc_builder.parent
477
477
  @doc_builder.parent = @node
478
- value = @doc_builder.instance_eval(&block)
478
+
479
+ arity = @doc_builder.arity || block.arity
480
+ value = if arity <= 0
481
+ @doc_builder.instance_eval(&block)
482
+ else
483
+ yield(@doc_builder)
484
+ end
485
+
479
486
  @doc_builder.parent = old_parent
480
487
  return value
481
488
  end
@@ -5,12 +5,12 @@ require "pathname"
5
5
 
6
6
  module Nokogiri
7
7
  module XML
8
- # Nokogiri::XML::Document is the main entry point for dealing with XML documents. The Document
9
- # is created by parsing an XML document. See Nokogiri::XML::Document.parse for more information
10
- # on parsing.
8
+ # Nokogiri::XML::Document is the main entry point for dealing with \XML documents. The Document
9
+ # is created by parsing \XML content from a String or an IO object. See
10
+ # Nokogiri::XML::Document.parse for more information on parsing.
11
11
  #
12
- # For searching a Document, see Nokogiri::XML::Searchable#css and
13
- # Nokogiri::XML::Searchable#xpath
12
+ # Document inherits a great deal of functionality from its superclass Nokogiri::XML::Node, so
13
+ # please read that class's documentation as well.
14
14
  class Document < Nokogiri::XML::Node
15
15
  # See http://www.w3.org/TR/REC-xml-names/#ns-decl for more details. Note that we're not
16
16
  # attempting to handle unicode characters partly because libxml2 doesn't handle unicode
@@ -19,33 +19,45 @@ module Nokogiri
19
19
  NCNAME_CHAR = NCNAME_START_CHAR + "\\-\\.0-9"
20
20
  NCNAME_RE = /^xmlns(?::([#{NCNAME_START_CHAR}][#{NCNAME_CHAR}]*))?$/
21
21
 
22
+ OBJECT_DUP_METHOD = Object.instance_method(:dup)
23
+ OBJECT_CLONE_METHOD = Object.instance_method(:clone)
24
+ private_constant :OBJECT_DUP_METHOD, :OBJECT_CLONE_METHOD
25
+
22
26
  class << self
23
- # Parse an XML file.
27
+ # call-seq:
28
+ # parse(input) { |options| ... } => Nokogiri::XML::Document
29
+ # parse(input, url:, encoding:, options:) => Nokogiri::XML::Document
24
30
  #
25
- # +string_or_io+ may be a String, or any object that responds to
26
- # _read_ and _close_ such as an IO, or StringIO.
31
+ # Parse \XML input from a String or IO object, and return a new XML::Document.
27
32
  #
28
- # +url+ (optional) is the URI where this document is located.
33
+ # 🛡 By default, Nokogiri treats documents as untrusted, and so does not attempt to load DTDs
34
+ # or access the network. See Nokogiri::XML::ParseOptions for a complete list of options; and
35
+ # that module's DEFAULT_XML constant for what's set (and not set) by default.
29
36
  #
30
- # +encoding+ (optional) is the encoding that should be used when processing
31
- # the document.
37
+ # [Required Parameters]
38
+ # - +input+ (String | IO) The content to be parsed.
32
39
  #
33
- # +options+ (optional) is a configuration object that sets options during
34
- # parsing, such as Nokogiri::XML::ParseOptions::RECOVER. See the
35
- # Nokogiri::XML::ParseOptions for more information.
40
+ # [Optional Keyword Arguments]
41
+ # - +url:+ (String) The base URI for this document.
36
42
  #
37
- # +block+ (optional) is passed a configuration object on which
38
- # parse options may be set.
43
+ # - +encoding:+ (String) The name of the encoding that should be used when processing the
44
+ # document. When not provided, the encoding will be determined based on the document
45
+ # content.
39
46
  #
40
- # By default, Nokogiri treats documents as untrusted, and so
41
- # does not attempt to load DTDs or access the network. See
42
- # Nokogiri::XML::ParseOptions for a complete list of options;
43
- # and that module's DEFAULT_XML constant for what's set (and not
44
- # set) by default.
47
+ # - +options:+ (Nokogiri::XML::ParseOptions) Configuration object that determines some
48
+ # behaviors during parsing. See ParseOptions for more information. The default value is
49
+ # +ParseOptions::DEFAULT_XML+.
45
50
  #
46
- # Nokogiri.XML() is a convenience method which will call this method.
51
+ # [Yields]
52
+ # If a block is given, a Nokogiri::XML::ParseOptions object is yielded to the block which
53
+ # can be configured before parsing. See Nokogiri::XML::ParseOptions for more information.
47
54
  #
48
- def parse(string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML)
55
+ # [Returns] Nokogiri::XML::Document
56
+ def parse(
57
+ string_or_io,
58
+ url_ = nil, encoding_ = nil, options_ = XML::ParseOptions::DEFAULT_XML,
59
+ url: url_, encoding: encoding_, options: options_
60
+ )
49
61
  options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
50
62
  yield options if block_given?
51
63
 
@@ -60,6 +72,7 @@ module Nokogiri
60
72
  end
61
73
 
62
74
  doc = if string_or_io.respond_to?(:read)
75
+ # TODO: should we instead check for respond_to?(:to_path) ?
63
76
  if string_or_io.is_a?(Pathname)
64
77
  # resolve the Pathname to the file and open it as an IO object, see #2110
65
78
  string_or_io = string_or_io.expand_path.open
@@ -174,13 +187,44 @@ module Nokogiri
174
187
  # Since v1.12.4
175
188
  attr_accessor :namespace_inheritance
176
189
 
177
- # :nodoc:
178
- def initialize(*args) # rubocop:disable Lint/MissingSuper
190
+ def initialize(*args) # :nodoc: # rubocop:disable Lint/MissingSuper
179
191
  @errors = []
180
192
  @decorators = nil
181
193
  @namespace_inheritance = false
182
194
  end
183
195
 
196
+ #
197
+ # :call-seq:
198
+ # dup → Nokogiri::XML::Document
199
+ # dup(level) → Nokogiri::XML::Document
200
+ #
201
+ # Duplicate this node.
202
+ #
203
+ # [Parameters]
204
+ # - +level+ (optional Integer). 0 is a shallow copy, 1 (the default) is a deep copy.
205
+ # [Returns] The new Nokogiri::XML::Document
206
+ #
207
+ def dup(level = 1)
208
+ copy = OBJECT_DUP_METHOD.bind_call(self)
209
+ copy.initialize_copy_with_args(self, level)
210
+ end
211
+
212
+ #
213
+ # :call-seq:
214
+ # clone → Nokogiri::XML::Document
215
+ # clone(level) → Nokogiri::XML::Document
216
+ #
217
+ # Clone this node.
218
+ #
219
+ # [Parameters]
220
+ # - +level+ (optional Integer). 0 is a shallow copy, 1 (the default) is a deep copy.
221
+ # [Returns] The new Nokogiri::XML::Document
222
+ #
223
+ def clone(level = 1)
224
+ copy = OBJECT_CLONE_METHOD.bind_call(self)
225
+ copy.initialize_copy_with_args(self, level)
226
+ end
227
+
184
228
  # :call-seq:
185
229
  # create_element(name, *contents_or_attrs, &block) → Nokogiri::XML::Element
186
230
  #
@@ -327,10 +371,10 @@ module Nokogiri
327
371
  end
328
372
 
329
373
  ##
330
- # Validate this Document against it's DTD. Returns a list of errors on
374
+ # Validate this Document against its DTD. Returns a list of errors on
331
375
  # the document or +nil+ when there is no DTD.
332
376
  def validate
333
- return nil unless internal_subset
377
+ return unless internal_subset
334
378
 
335
379
  internal_subset.validate(self)
336
380
  end
@@ -368,12 +412,11 @@ module Nokogiri
368
412
  @decorators.each do |klass, list|
369
413
  next unless node.is_a?(klass)
370
414
 
371
- list.each { |moodule| node.extend(moodule) }
415
+ list.each { |mod| node.extend(mod) }
372
416
  end
373
417
  end
374
418
 
375
419
  alias_method :to_xml, :serialize
376
- alias_method :clone, :dup
377
420
 
378
421
  # Get the hash of namespaces on the root Nokogiri::XML::Node
379
422
  def namespaces
@@ -427,8 +470,6 @@ module Nokogiri
427
470
  # instructions. If you have a use case and would like this functionality, please let us know
428
471
  # by opening an issue or a discussion on the github project.
429
472
  #
430
- # ⚡ This is an experimental feature, available since v1.14.0
431
- #
432
473
  # *Example*
433
474
  #
434
475
  # doc = Nokogiri::XML.parse(<<~XML)
@@ -455,6 +496,8 @@ module Nokogiri
455
496
  # doc.deconstruct_keys([:root])
456
497
  # # => {:root=>nil}
457
498
  #
499
+ # Since v1.14.0
500
+ #
458
501
  def deconstruct_keys(keys)
459
502
  { root: root }
460
503
  end
@@ -3,32 +3,103 @@
3
3
 
4
4
  module Nokogiri
5
5
  module XML
6
+ # DocumentFragment represents a fragment of an \XML document. It provides the same functionality
7
+ # exposed by XML::Node and can be used to contain one or more \XML subtrees.
6
8
  class DocumentFragment < Nokogiri::XML::Node
7
- ####
8
- # Create a Nokogiri::XML::DocumentFragment from +tags+
9
- def self.parse(tags, options = ParseOptions::DEFAULT_XML, &block)
10
- new(XML::Document.new, tags, nil, options, &block)
9
+ # The options used to parse the document fragment. Returns the value of any options that were
10
+ # passed into the constructor as a parameter or set in a config block, else the default
11
+ # options for the specific subclass.
12
+ attr_reader :parse_options
13
+
14
+ class << self
15
+ # :call-seq:
16
+ # parse(input) { |options| ... } → XML::DocumentFragment
17
+ # parse(input, options:) → XML::DocumentFragment
18
+ #
19
+ # Parse \XML fragment input from a String, and return a new XML::DocumentFragment. This
20
+ # method creates a new, empty XML::Document to contain the fragment.
21
+ #
22
+ # [Required Parameters]
23
+ # - +input+ (String) The content to be parsed.
24
+ #
25
+ # [Optional Keyword Arguments]
26
+ # - +options+ (Nokogiri::XML::ParseOptions) Configuration object that determines some
27
+ # behaviors during parsing. See ParseOptions for more information. The default value is
28
+ # +ParseOptions::DEFAULT_XML+.
29
+ #
30
+ # [Yields]
31
+ # If a block is given, a Nokogiri::XML::ParseOptions object is yielded to the block which
32
+ # can be configured before parsing. See Nokogiri::XML::ParseOptions for more information.
33
+ #
34
+ # [Returns] Nokogiri::XML::DocumentFragment
35
+ def parse(tags, options_ = ParseOptions::DEFAULT_XML, options: options_, &block)
36
+ new(XML::Document.new, tags, options: options, &block)
37
+ end
38
+
39
+ # Wrapper method to separate the concerns of:
40
+ # - the native object allocator's parameter (it only requires `document`)
41
+ # - the initializer's parameters
42
+ def new(document, ...) # :nodoc:
43
+ instance = native_new(document)
44
+ instance.send(:initialize, document, ...)
45
+ instance
46
+ end
11
47
  end
12
48
 
13
- ##
14
- # Create a new DocumentFragment from +tags+.
49
+ # :call-seq:
50
+ # new(document, input=nil) { |options| ... } → DocumentFragment
51
+ # new(document, input=nil, context:, options:) → DocumentFragment
52
+ #
53
+ # Parse \XML fragment input from a String, and return a new DocumentFragment that is
54
+ # associated with the given +document+.
55
+ #
56
+ # 💡 It's recommended to use either XML::DocumentFragment.parse or Node#parse rather than call
57
+ # this method directly.
58
+ #
59
+ # [Required Parameters]
60
+ # - +document+ (XML::Document) The parent document to associate the returned fragment with.
61
+ #
62
+ # [Optional Parameters]
63
+ # - +input+ (String) The content to be parsed.
15
64
  #
16
- # If +ctx+ is present, it is used as a context node for the
17
- # subtree created, e.g., namespaces will be resolved relative
18
- # to +ctx+.
19
- def initialize(document, tags = nil, ctx = nil, options = ParseOptions::DEFAULT_XML) # rubocop:disable Lint/MissingSuper
65
+ # [Optional Keyword Arguments]
66
+ # - +context:+ (Nokogiri::XML::Node) The <b>context node</b> for the subtree created. See
67
+ # below for more information.
68
+ #
69
+ # - +options:+ (Nokogiri::XML::ParseOptions) Configuration object that determines some
70
+ # behaviors during parsing. See ParseOptions for more information. The default value is
71
+ # +ParseOptions::DEFAULT_XML+.
72
+ #
73
+ # [Yields]
74
+ # If a block is given, a Nokogiri::XML::ParseOptions object is yielded to the block which
75
+ # can be configured before parsing. See ParseOptions for more information.
76
+ #
77
+ # [Returns] XML::DocumentFragment
78
+ #
79
+ # === Context \Node
80
+ #
81
+ # If a context node is specified using +context:+, then the fragment will be created by
82
+ # calling Node#parse on that node, so the parser will behave as if that Node is the parent of
83
+ # the fragment subtree, and will resolve namespaces relative to that node.
84
+ #
85
+ def initialize(
86
+ document, tags = nil,
87
+ context_ = nil, options_ = ParseOptions::DEFAULT_XML,
88
+ context: context_, options: options_
89
+ ) # rubocop:disable Lint/MissingSuper
20
90
  return self unless tags
21
91
 
22
92
  options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
93
+ @parse_options = options
23
94
  yield options if block_given?
24
95
 
25
- children = if ctx
96
+ children = if context
26
97
  # Fix for issue#490
27
98
  if Nokogiri.jruby?
28
99
  # fix for issue #770
29
- ctx.parse("<root #{namespace_declarations(ctx)}>#{tags}</root>", options).children
100
+ context.parse("<root #{namespace_declarations(context)}>#{tags}</root>", options).children
30
101
  else
31
- ctx.parse(tags, options)
102
+ context.parse(tags, options)
32
103
  end
33
104
  else
34
105
  wrapper_doc = XML::Document.parse("<root>#{tags}</root>", nil, nil, options)
@@ -154,8 +225,6 @@ module Nokogiri
154
225
  # root elements, you should deconstruct the array returned by
155
226
  # <tt>DocumentFragment#elements</tt>.
156
227
  #
157
- # ⚡ This is an experimental feature, available since v1.14.0
158
- #
159
228
  # *Example*
160
229
  #
161
230
  # frag = Nokogiri::HTML5.fragment(<<~HTML)
@@ -187,6 +256,8 @@ module Nokogiri
187
256
  # # }),
188
257
  # # #(Element:0x398 { name = "div", children = [ #(Text "End")] })]
189
258
  #
259
+ # Since v1.14.0
260
+ #
190
261
  def deconstruct
191
262
  children.to_a
192
263
  end
@@ -16,8 +16,6 @@ module Nokogiri
16
16
  # - +prefix+ → (String, nil) The namespace's prefix, or +nil+ if there is no prefix (e.g., default namespace).
17
17
  # - +href+ → (String) The namespace's URI
18
18
  #
19
- # ⚡ This is an experimental feature, available since v1.14.0
20
- #
21
19
  # *Example*
22
20
  #
23
21
  # doc = Nokogiri::XML.parse(<<~XML)
@@ -43,6 +41,7 @@ module Nokogiri
43
41
  # doc.root.elements.last.namespace.deconstruct_keys([:prefix, :href])
44
42
  # # => {:prefix=>"noko", :href=>"http://nokogiri.org/ns/noko"}
45
43
  #
44
+ # Since v1.14.0
46
45
  #
47
46
  def deconstruct_keys(keys)
48
47
  { prefix: prefix, href: href }
@@ -127,6 +127,42 @@ module Nokogiri
127
127
  # This is intentionally empty, and sets the method signature for subclasses.
128
128
  end
129
129
 
130
+ #
131
+ # :call-seq:
132
+ # dup → Nokogiri::XML::Node
133
+ # dup(level) → Nokogiri::XML::Node
134
+ # dup(level, new_parent_doc) → Nokogiri::XML::Node
135
+ #
136
+ # Duplicate this node.
137
+ #
138
+ # [Parameters]
139
+ # - +level+ (optional Integer). 0 is a shallow copy, 1 (the default) is a deep copy.
140
+ # - +new_parent_doc+ (optional Nokogiri::XML::Document)
141
+ # The new node's parent Document. Defaults to the the Document of the current node.
142
+ # [Returns] The new Nokogiri::XML::Node
143
+ #
144
+ def dup(level = 1, new_parent_doc = document)
145
+ super().initialize_copy_with_args(self, level, new_parent_doc)
146
+ end
147
+
148
+ #
149
+ # :call-seq:
150
+ # clone → Nokogiri::XML::Node
151
+ # clone(level) → Nokogiri::XML::Node
152
+ # clone(level, new_parent_doc) → Nokogiri::XML::Node
153
+ #
154
+ # Clone this node.
155
+ #
156
+ # [Parameters]
157
+ # - +level+ (optional Integer). 0 is a shallow copy, 1 (the default) is a deep copy.
158
+ # - +new_parent_doc+
159
+ # The new node's parent Document. Defaults to the the Document of the current node.
160
+ # [Returns] The new Nokogiri::XML::Node
161
+ #
162
+ def clone(level = 1, new_parent_doc = document)
163
+ super().initialize_copy_with_args(self, level, new_parent_doc)
164
+ end
165
+
130
166
  ###
131
167
  # Decorate this node with the decorators set up in this node's Document
132
168
  def decorate!
@@ -228,7 +264,7 @@ module Nokogiri
228
264
  if new_parent.nil?
229
265
  raise "Failed to parse '#{node_or_tags}' in the context of a '#{context_node.name}' element"
230
266
  end
231
- when XML::Node
267
+ when Node
232
268
  new_parent = node_or_tags.dup
233
269
  else
234
270
  raise ArgumentError, "Requires a String or Node argument, and cannot accept a #{node_or_tags.class}"
@@ -406,8 +442,48 @@ module Nokogiri
406
442
  end
407
443
 
408
444
  ####
409
- # Set the Node's content to a Text node containing +string+. The string gets XML escaped, not
410
- # interpreted as markup.
445
+ # call-seq:
446
+ # content=(input)
447
+ #
448
+ # Set the content of this node to +input+.
449
+ #
450
+ # [Parameters]
451
+ # - +input+ (String) The new content for this node. Input is considered to be raw content, and
452
+ # so will be entity-escaped in the final DOM string.
453
+ #
454
+ # [Example]
455
+ # Note how entities are handled:
456
+ #
457
+ # doc = Nokogiri::HTML::Document.parse(<<~HTML)
458
+ # <html>
459
+ # <body>
460
+ # <div id="first">asdf</div>
461
+ # <div id="second">asdf</div>
462
+ # HTML
463
+ #
464
+ # text_node = doc.at_css("div#first").children.first
465
+ # div_node = doc.at_css("div#second")
466
+ #
467
+ # value = "You &amp; Me"
468
+ #
469
+ # text_node.content = value
470
+ # div_node.content = value
471
+ #
472
+ # doc.css("div").to_html
473
+ # # => "<div id=\"first\">You &amp;amp; Me</div>
474
+ # # <div id=\"second\">You &amp;amp; Me</div>"
475
+ #
476
+ # For content that is already entity-escaped, use CGI::unescapeHTML to decode it:
477
+ #
478
+ # text_node.content = CGI::unescapeHTML(value)
479
+ # div_node.content = CGI::unescapeHTML(value)
480
+ #
481
+ # doc.css("div").to_html
482
+ # # => "<div id=\"first\">You &amp; Me</div>
483
+ # # <div id=\"second\">You &amp; Me</div>"
484
+ #
485
+ # See also: #native_content=
486
+ #
411
487
  def content=(string)
412
488
  self.native_content = encode_special_chars(string.to_s)
413
489
  end
@@ -474,7 +550,6 @@ module Nokogiri
474
550
  alias_method :to_str, :content
475
551
  alias_method :name, :node_name
476
552
  alias_method :type, :node_type
477
- alias_method :clone, :dup
478
553
  alias_method :elements, :element_children
479
554
 
480
555
  # :section: Working With Node Attributes
@@ -1049,31 +1124,40 @@ module Nokogiri
1049
1124
 
1050
1125
  return Nokogiri::XML::NodeSet.new(document) if contents.empty?
1051
1126
 
1052
- # libxml2 does not obey the +recover+ option after encountering errors during +in_context+
1053
- # parsing, and so this horrible hack is here to try to emulate recovery behavior.
1054
- #
1055
- # Unfortunately, this means we're no longer parsing "in context" and so namespaces that
1056
- # would have been inherited from the context node won't be handled correctly. This hack was
1057
- # written in 2010, and I regret it, because it's silently degrading functionality in a way
1058
- # that's not easily prevented (or even detected).
1059
- #
1060
- # I think preferable behavior would be to either:
1061
- #
1062
- # a. add an error noting that we "fell back" and pointing the user to turning off the +recover+ option
1063
- # b. don't recover, but raise a sensible exception
1064
- #
1065
- # For context and background: https://github.com/sparklemotion/nokogiri/issues/313
1066
- # FIXME bug report: https://github.com/sparklemotion/nokogiri/issues/2092
1067
1127
  error_count = document.errors.length
1068
1128
  node_set = in_context(contents, options.to_i)
1069
- if node_set.empty? && (document.errors.length > error_count)
1070
- if options.recover?
1129
+
1130
+ if document.errors.length > error_count
1131
+ raise document.errors[error_count] unless options.recover?
1132
+
1133
+ # TODO: remove this block when libxml2 < 2.13 is no longer supported
1134
+ if node_set.empty?
1135
+ # libxml2 < 2.13 does not obey the +recover+ option after encountering errors during
1136
+ # +in_context+ parsing, and so this horrible hack is here to try to emulate recovery
1137
+ # behavior.
1138
+ #
1139
+ # (Note that HTML4 fragment parsing seems to have been fixed in abd74186, and XML
1140
+ # fragment parsing is fixed in 1c106edf. Both are in 2.13.)
1141
+ #
1142
+ # Unfortunately, this means we're no longer parsing "in context" and so namespaces that
1143
+ # would have been inherited from the context node won't be handled correctly. This hack
1144
+ # was written in 2010, and I regret it, because it's silently degrading functionality in
1145
+ # a way that's not easily prevented (or even detected).
1146
+ #
1147
+ # I think preferable behavior would be to either:
1148
+ #
1149
+ # a. add an error noting that we "fell back" and pointing the user to turning off the
1150
+ # +recover+ option
1151
+ # b. don't recover, but raise a sensible exception
1152
+ #
1153
+ # For context and background:
1154
+ # - https://github.com/sparklemotion/nokogiri/issues/313
1155
+ # - https://github.com/sparklemotion/nokogiri/issues/2092
1071
1156
  fragment = document.related_class("DocumentFragment").parse(contents)
1072
1157
  node_set = fragment.children
1073
- else
1074
- raise document.errors[error_count]
1075
1158
  end
1076
1159
  end
1160
+
1077
1161
  node_set
1078
1162
  end
1079
1163
 
@@ -1165,7 +1249,7 @@ module Nokogiri
1165
1249
  # Fetch the Nokogiri::HTML4::ElementDescription for this node. Returns
1166
1250
  # nil on XML documents and on unknown tags.
1167
1251
  def description
1168
- return nil if document.xml?
1252
+ return if document.xml?
1169
1253
 
1170
1254
  Nokogiri::HTML4::ElementDescription[name]
1171
1255
  end
@@ -1254,8 +1338,8 @@ module Nokogiri
1254
1338
  # Compare two Node objects with respect to their Document. Nodes from
1255
1339
  # different documents cannot be compared.
1256
1340
  def <=>(other)
1257
- return nil unless other.is_a?(Nokogiri::XML::Node)
1258
- return nil unless document == other.document
1341
+ return unless other.is_a?(Nokogiri::XML::Node)
1342
+ return unless document == other.document
1259
1343
 
1260
1344
  compare(other)
1261
1345
  end
@@ -1278,6 +1362,7 @@ module Nokogiri
1278
1362
  # end
1279
1363
  #
1280
1364
  def serialize(*args, &block)
1365
+ # TODO: deprecate non-hash options, see 46c68ed 2009-06-20 for context
1281
1366
  options = if args.first.is_a?(Hash)
1282
1367
  args.shift
1283
1368
  else
@@ -1429,8 +1514,6 @@ module Nokogiri
1429
1514
  # - +content+ → (String) The contents of all the text nodes in this node's subtree. See #content.
1430
1515
  # - +inner_html+ → (String) The inner markup for the children of this node. See #inner_html.
1431
1516
  #
1432
- # ⚡ This is an experimental feature, available since v1.14.0
1433
- #
1434
1517
  # *Example*
1435
1518
  #
1436
1519
  # doc = Nokogiri::XML.parse(<<~XML)
@@ -1465,6 +1548,8 @@ module Nokogiri
1465
1548
  # # value = "def"
1466
1549
  # # })]}
1467
1550
  #
1551
+ # Since v1.14.0
1552
+ #
1468
1553
  def deconstruct_keys(keys)
1469
1554
  requested_keys = DECONSTRUCT_KEYS & keys
1470
1555
  {}.tap do |values|
@@ -1535,19 +1620,12 @@ module Nokogiri
1535
1620
  node_or_tags
1536
1621
  end
1537
1622
 
1538
- USING_LIBXML_WITH_BROKEN_SERIALIZATION = Nokogiri.uses_libxml?("~> 2.6.0").freeze
1539
- private_constant :USING_LIBXML_WITH_BROKEN_SERIALIZATION
1540
-
1541
1623
  def to_format(save_option, options)
1542
- return dump_html if USING_LIBXML_WITH_BROKEN_SERIALIZATION
1543
-
1544
1624
  options[:save_with] = save_option unless options[:save_with]
1545
1625
  serialize(options)
1546
1626
  end
1547
1627
 
1548
1628
  def write_format_to(save_option, io, options)
1549
- return (io << dump_html) if USING_LIBXML_WITH_BROKEN_SERIALIZATION
1550
-
1551
1629
  options[:save_with] ||= save_option
1552
1630
  write_to(io, options)
1553
1631
  end
@@ -4,9 +4,13 @@
4
4
  module Nokogiri
5
5
  module XML
6
6
  ####
7
- # A NodeSet contains a list of Nokogiri::XML::Node objects. Typically
8
- # a NodeSet is return as a result of searching a Document via
9
- # Nokogiri::XML::Searchable#css or Nokogiri::XML::Searchable#xpath
7
+ # A NodeSet is an Enumerable that contains a list of Nokogiri::XML::Node objects.
8
+ #
9
+ # Typically a NodeSet is returned as a result of searching a Document via
10
+ # Nokogiri::XML::Searchable#css or Nokogiri::XML::Searchable#xpath.
11
+ #
12
+ # Note that the `#dup` and `#clone` methods perform shallow copies; these methods do not copy
13
+ # the Nodes contained in the NodeSet (similar to how Array and other Enumerable classes work).
10
14
  class NodeSet
11
15
  include Nokogiri::XML::Searchable
12
16
  include Enumerable
@@ -14,8 +18,6 @@ module Nokogiri
14
18
  # The Document this NodeSet is associated with
15
19
  attr_accessor :document
16
20
 
17
- alias_method :clone, :dup
18
-
19
21
  # Create a NodeSet with +document+ defaulting to +list+
20
22
  def initialize(document, list = [])
21
23
  @document = document
@@ -121,7 +123,7 @@ module Nokogiri
121
123
  return self[args.first]
122
124
  end
123
125
 
124
- super(*args)
126
+ super
125
127
  end
126
128
  alias_method :%, :at
127
129
 
@@ -372,7 +374,7 @@ module Nokogiri
372
374
  # Removes the last element from set and returns it, or +nil+ if
373
375
  # the set is empty
374
376
  def pop
375
- return nil if length == 0
377
+ return if length == 0
376
378
 
377
379
  delete(last)
378
380
  end
@@ -381,7 +383,7 @@ module Nokogiri
381
383
  # Returns the first element of the NodeSet and removes it. Returns
382
384
  # +nil+ if the set is empty.
383
385
  def shift
384
- return nil if length == 0
386
+ return if length == 0
385
387
 
386
388
  delete(first)
387
389
  end
@@ -423,7 +425,7 @@ module Nokogiri
423
425
  end
424
426
 
425
427
  ###
426
- # Return a nicely formated string representation
428
+ # Return a nicely formatted string representation
427
429
  def inspect
428
430
  "[#{map(&:inspect).join(", ")}]"
429
431
  end
@@ -435,7 +437,7 @@ module Nokogiri
435
437
  #
436
438
  # Returns the members of this NodeSet as an array, to use in pattern matching.
437
439
  #
438
- # This is an experimental feature, available since v1.14.0
440
+ # Since v1.14.0
439
441
  #
440
442
  def deconstruct
441
443
  to_a
@@ -140,7 +140,7 @@ module Nokogiri
140
140
 
141
141
  # Relax any hardcoded limit from the parser. Off by default.
142
142
  #
143
- # ⚠ There may be a performance penalty when this option is set.
143
+ # ⚠ <b>It is UNSAFE to set this option</b> when parsing untrusted documents.
144
144
  HUGE = 1 << 19
145
145
 
146
146
  # Support line numbers up to <code>long int</code> (default is a <code>short int</code>). On