nokogiri 1.16.8-x86-mingw32 → 1.17.0-x86-mingw32

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (113) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +11 -21
  3. data/README.md +4 -0
  4. data/dependencies.yml +6 -6
  5. data/ext/nokogiri/extconf.rb +191 -137
  6. data/ext/nokogiri/gumbo.c +69 -53
  7. data/ext/nokogiri/html4_document.c +10 -4
  8. data/ext/nokogiri/html4_element_description.c +18 -18
  9. data/ext/nokogiri/html4_sax_parser.c +40 -0
  10. data/ext/nokogiri/html4_sax_parser_context.c +48 -58
  11. data/ext/nokogiri/html4_sax_push_parser.c +25 -24
  12. data/ext/nokogiri/include/libexslt/exsltconfig.h +3 -3
  13. data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +12 -19
  14. data/ext/nokogiri/include/libxml2/libxml/c14n.h +1 -12
  15. data/ext/nokogiri/include/libxml2/libxml/debugXML.h +1 -1
  16. data/ext/nokogiri/include/libxml2/libxml/encoding.h +9 -0
  17. data/ext/nokogiri/include/libxml2/libxml/entities.h +12 -1
  18. data/ext/nokogiri/include/libxml2/libxml/hash.h +19 -0
  19. data/ext/nokogiri/include/libxml2/libxml/list.h +2 -2
  20. data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +17 -0
  21. data/ext/nokogiri/include/libxml2/libxml/parser.h +60 -54
  22. data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +9 -1
  23. data/ext/nokogiri/include/libxml2/libxml/pattern.h +6 -0
  24. data/ext/nokogiri/include/libxml2/libxml/tree.h +32 -12
  25. data/ext/nokogiri/include/libxml2/libxml/uri.h +11 -0
  26. data/ext/nokogiri/include/libxml2/libxml/valid.h +29 -2
  27. data/ext/nokogiri/include/libxml2/libxml/xinclude.h +7 -0
  28. data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +21 -4
  29. data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +14 -0
  30. data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +111 -15
  31. data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +8 -45
  32. data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +2 -0
  33. data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +5 -0
  34. data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +165 -1
  35. data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +7 -171
  36. data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +1 -0
  37. data/ext/nokogiri/include/libxml2/libxml/xpath.h +4 -0
  38. data/ext/nokogiri/include/libxslt/xsltInternals.h +3 -0
  39. data/ext/nokogiri/include/libxslt/xsltconfig.h +4 -37
  40. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  41. data/ext/nokogiri/nokogiri.c +9 -2
  42. data/ext/nokogiri/nokogiri.h +18 -33
  43. data/ext/nokogiri/xml_attr.c +1 -1
  44. data/ext/nokogiri/xml_cdata.c +2 -10
  45. data/ext/nokogiri/xml_comment.c +3 -8
  46. data/ext/nokogiri/xml_document.c +163 -156
  47. data/ext/nokogiri/xml_document_fragment.c +10 -25
  48. data/ext/nokogiri/xml_dtd.c +1 -1
  49. data/ext/nokogiri/xml_element_content.c +9 -9
  50. data/ext/nokogiri/xml_encoding_handler.c +4 -4
  51. data/ext/nokogiri/xml_namespace.c +6 -6
  52. data/ext/nokogiri/xml_node.c +130 -104
  53. data/ext/nokogiri/xml_node_set.c +46 -44
  54. data/ext/nokogiri/xml_reader.c +54 -58
  55. data/ext/nokogiri/xml_relax_ng.c +35 -56
  56. data/ext/nokogiri/xml_sax_parser.c +156 -88
  57. data/ext/nokogiri/xml_sax_parser_context.c +213 -131
  58. data/ext/nokogiri/xml_sax_push_parser.c +68 -49
  59. data/ext/nokogiri/xml_schema.c +50 -85
  60. data/ext/nokogiri/xml_syntax_error.c +19 -11
  61. data/ext/nokogiri/xml_text.c +2 -4
  62. data/ext/nokogiri/xml_xpath_context.c +2 -2
  63. data/ext/nokogiri/xslt_stylesheet.c +8 -8
  64. data/lib/nokogiri/3.0/nokogiri.so +0 -0
  65. data/lib/nokogiri/3.1/nokogiri.so +0 -0
  66. data/lib/nokogiri/3.2/nokogiri.so +0 -0
  67. data/lib/nokogiri/3.3/nokogiri.so +0 -0
  68. data/lib/nokogiri/class_resolver.rb +1 -1
  69. data/lib/nokogiri/css/node.rb +6 -2
  70. data/lib/nokogiri/css/parser.rb +6 -4
  71. data/lib/nokogiri/css/parser.y +2 -2
  72. data/lib/nokogiri/css/parser_extras.rb +6 -66
  73. data/lib/nokogiri/css/selector_cache.rb +38 -0
  74. data/lib/nokogiri/css/tokenizer.rb +4 -4
  75. data/lib/nokogiri/css/tokenizer.rex +9 -8
  76. data/lib/nokogiri/css/xpath_visitor.rb +42 -6
  77. data/lib/nokogiri/css.rb +86 -20
  78. data/lib/nokogiri/decorators/slop.rb +3 -5
  79. data/lib/nokogiri/encoding_handler.rb +2 -2
  80. data/lib/nokogiri/html4/document.rb +44 -23
  81. data/lib/nokogiri/html4/document_fragment.rb +124 -12
  82. data/lib/nokogiri/html4/encoding_reader.rb +1 -1
  83. data/lib/nokogiri/html4/sax/parser.rb +23 -38
  84. data/lib/nokogiri/html4/sax/parser_context.rb +4 -9
  85. data/lib/nokogiri/html4.rb +9 -14
  86. data/lib/nokogiri/html5/builder.rb +40 -0
  87. data/lib/nokogiri/html5/document.rb +61 -30
  88. data/lib/nokogiri/html5/document_fragment.rb +130 -20
  89. data/lib/nokogiri/html5/node.rb +4 -4
  90. data/lib/nokogiri/html5.rb +114 -72
  91. data/lib/nokogiri/version/constant.rb +1 -1
  92. data/lib/nokogiri/xml/builder.rb +8 -1
  93. data/lib/nokogiri/xml/document.rb +70 -26
  94. data/lib/nokogiri/xml/document_fragment.rb +84 -13
  95. data/lib/nokogiri/xml/node.rb +82 -11
  96. data/lib/nokogiri/xml/node_set.rb +9 -7
  97. data/lib/nokogiri/xml/parse_options.rb +1 -1
  98. data/lib/nokogiri/xml/pp/node.rb +6 -1
  99. data/lib/nokogiri/xml/reader.rb +46 -13
  100. data/lib/nokogiri/xml/relax_ng.rb +57 -20
  101. data/lib/nokogiri/xml/sax/document.rb +174 -83
  102. data/lib/nokogiri/xml/sax/parser.rb +115 -41
  103. data/lib/nokogiri/xml/sax/parser_context.rb +116 -8
  104. data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
  105. data/lib/nokogiri/xml/sax.rb +48 -0
  106. data/lib/nokogiri/xml/schema.rb +112 -45
  107. data/lib/nokogiri/xml/searchable.rb +6 -8
  108. data/lib/nokogiri/xml/syntax_error.rb +22 -0
  109. data/lib/nokogiri/xml.rb +13 -24
  110. data/lib/nokogiri/xslt.rb +3 -9
  111. data/lib/xsd/xmlparser/nokogiri.rb +3 -4
  112. metadata +8 -4
  113. data/ext/nokogiri/libxml2_backwards_compat.c +0 -121
@@ -5,12 +5,12 @@ require "pathname"
5
5
 
6
6
  module Nokogiri
7
7
  module XML
8
- # Nokogiri::XML::Document is the main entry point for dealing with XML documents. The Document
9
- # is created by parsing an XML document. See Nokogiri::XML::Document.parse for more information
10
- # on parsing.
8
+ # Nokogiri::XML::Document is the main entry point for dealing with \XML documents. The Document
9
+ # is created by parsing \XML content from a String or an IO object. See
10
+ # Nokogiri::XML::Document.parse for more information on parsing.
11
11
  #
12
- # For searching a Document, see Nokogiri::XML::Searchable#css and
13
- # Nokogiri::XML::Searchable#xpath
12
+ # Document inherits a great deal of functionality from its superclass Nokogiri::XML::Node, so
13
+ # please read that class's documentation as well.
14
14
  class Document < Nokogiri::XML::Node
15
15
  # See http://www.w3.org/TR/REC-xml-names/#ns-decl for more details. Note that we're not
16
16
  # attempting to handle unicode characters partly because libxml2 doesn't handle unicode
@@ -19,33 +19,45 @@ module Nokogiri
19
19
  NCNAME_CHAR = NCNAME_START_CHAR + "\\-\\.0-9"
20
20
  NCNAME_RE = /^xmlns(?::([#{NCNAME_START_CHAR}][#{NCNAME_CHAR}]*))?$/
21
21
 
22
+ OBJECT_DUP_METHOD = Object.instance_method(:dup)
23
+ OBJECT_CLONE_METHOD = Object.instance_method(:clone)
24
+ private_constant :OBJECT_DUP_METHOD, :OBJECT_CLONE_METHOD
25
+
22
26
  class << self
23
- # Parse an XML file.
27
+ # call-seq:
28
+ # parse(input) { |options| ... } => Nokogiri::XML::Document
29
+ # parse(input, url:, encoding:, options:) => Nokogiri::XML::Document
24
30
  #
25
- # +string_or_io+ may be a String, or any object that responds to
26
- # _read_ and _close_ such as an IO, or StringIO.
31
+ # Parse \XML input from a String or IO object, and return a new XML::Document.
27
32
  #
28
- # +url+ (optional) is the URI where this document is located.
33
+ # 🛡 By default, Nokogiri treats documents as untrusted, and so does not attempt to load DTDs
34
+ # or access the network. See Nokogiri::XML::ParseOptions for a complete list of options; and
35
+ # that module's DEFAULT_XML constant for what's set (and not set) by default.
29
36
  #
30
- # +encoding+ (optional) is the encoding that should be used when processing
31
- # the document.
37
+ # [Required Parameters]
38
+ # - +input+ (String | IO) The content to be parsed.
32
39
  #
33
- # +options+ (optional) is a configuration object that sets options during
34
- # parsing, such as Nokogiri::XML::ParseOptions::RECOVER. See the
35
- # Nokogiri::XML::ParseOptions for more information.
40
+ # [Optional Keyword Arguments]
41
+ # - +url:+ (String) The base URI for this document.
36
42
  #
37
- # +block+ (optional) is passed a configuration object on which
38
- # parse options may be set.
43
+ # - +encoding:+ (String) The name of the encoding that should be used when processing the
44
+ # document. When not provided, the encoding will be determined based on the document
45
+ # content.
39
46
  #
40
- # By default, Nokogiri treats documents as untrusted, and so
41
- # does not attempt to load DTDs or access the network. See
42
- # Nokogiri::XML::ParseOptions for a complete list of options;
43
- # and that module's DEFAULT_XML constant for what's set (and not
44
- # set) by default.
47
+ # - +options:+ (Nokogiri::XML::ParseOptions) Configuration object that determines some
48
+ # behaviors during parsing. See ParseOptions for more information. The default value is
49
+ # +ParseOptions::DEFAULT_XML+.
45
50
  #
46
- # Nokogiri.XML() is a convenience method which will call this method.
51
+ # [Yields]
52
+ # If a block is given, a Nokogiri::XML::ParseOptions object is yielded to the block which
53
+ # can be configured before parsing. See Nokogiri::XML::ParseOptions for more information.
47
54
  #
48
- def parse(string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML)
55
+ # [Returns] Nokogiri::XML::Document
56
+ def parse(
57
+ string_or_io,
58
+ url_ = nil, encoding_ = nil, options_ = XML::ParseOptions::DEFAULT_XML,
59
+ url: url_, encoding: encoding_, options: options_
60
+ )
49
61
  options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
50
62
  yield options if block_given?
51
63
 
@@ -60,6 +72,7 @@ module Nokogiri
60
72
  end
61
73
 
62
74
  doc = if string_or_io.respond_to?(:read)
75
+ # TODO: should we instead check for respond_to?(:to_path) ?
63
76
  if string_or_io.is_a?(Pathname)
64
77
  # resolve the Pathname to the file and open it as an IO object, see #2110
65
78
  string_or_io = string_or_io.expand_path.open
@@ -180,6 +193,38 @@ module Nokogiri
180
193
  @namespace_inheritance = false
181
194
  end
182
195
 
196
+ #
197
+ # :call-seq:
198
+ # dup → Nokogiri::XML::Document
199
+ # dup(level) → Nokogiri::XML::Document
200
+ #
201
+ # Duplicate this node.
202
+ #
203
+ # [Parameters]
204
+ # - +level+ (optional Integer). 0 is a shallow copy, 1 (the default) is a deep copy.
205
+ # [Returns] The new Nokogiri::XML::Document
206
+ #
207
+ def dup(level = 1)
208
+ copy = OBJECT_DUP_METHOD.bind_call(self)
209
+ copy.initialize_copy_with_args(self, level)
210
+ end
211
+
212
+ #
213
+ # :call-seq:
214
+ # clone → Nokogiri::XML::Document
215
+ # clone(level) → Nokogiri::XML::Document
216
+ #
217
+ # Clone this node.
218
+ #
219
+ # [Parameters]
220
+ # - +level+ (optional Integer). 0 is a shallow copy, 1 (the default) is a deep copy.
221
+ # [Returns] The new Nokogiri::XML::Document
222
+ #
223
+ def clone(level = 1)
224
+ copy = OBJECT_CLONE_METHOD.bind_call(self)
225
+ copy.initialize_copy_with_args(self, level)
226
+ end
227
+
183
228
  # :call-seq:
184
229
  # create_element(name, *contents_or_attrs, &block) → Nokogiri::XML::Element
185
230
  #
@@ -326,7 +371,7 @@ module Nokogiri
326
371
  end
327
372
 
328
373
  ##
329
- # Validate this Document against it's DTD. Returns a list of errors on
374
+ # Validate this Document against its DTD. Returns a list of errors on
330
375
  # the document or +nil+ when there is no DTD.
331
376
  def validate
332
377
  return unless internal_subset
@@ -367,12 +412,11 @@ module Nokogiri
367
412
  @decorators.each do |klass, list|
368
413
  next unless node.is_a?(klass)
369
414
 
370
- list.each { |moodule| node.extend(moodule) }
415
+ list.each { |mod| node.extend(mod) }
371
416
  end
372
417
  end
373
418
 
374
419
  alias_method :to_xml, :serialize
375
- alias_method :clone, :dup
376
420
 
377
421
  # Get the hash of namespaces on the root Nokogiri::XML::Node
378
422
  def namespaces
@@ -3,32 +3,103 @@
3
3
 
4
4
  module Nokogiri
5
5
  module XML
6
+ # DocumentFragment represents a fragment of an \XML document. It provides the same functionality
7
+ # exposed by XML::Node and can be used to contain one or more \XML subtrees.
6
8
  class DocumentFragment < Nokogiri::XML::Node
7
- ####
8
- # Create a Nokogiri::XML::DocumentFragment from +tags+
9
- def self.parse(tags, options = ParseOptions::DEFAULT_XML, &block)
10
- new(XML::Document.new, tags, nil, options, &block)
9
+ # The options used to parse the document fragment. Returns the value of any options that were
10
+ # passed into the constructor as a parameter or set in a config block, else the default
11
+ # options for the specific subclass.
12
+ attr_reader :parse_options
13
+
14
+ class << self
15
+ # :call-seq:
16
+ # parse(input) { |options| ... } → XML::DocumentFragment
17
+ # parse(input, options:) → XML::DocumentFragment
18
+ #
19
+ # Parse \XML fragment input from a String, and return a new XML::DocumentFragment. This
20
+ # method creates a new, empty XML::Document to contain the fragment.
21
+ #
22
+ # [Required Parameters]
23
+ # - +input+ (String) The content to be parsed.
24
+ #
25
+ # [Optional Keyword Arguments]
26
+ # - +options+ (Nokogiri::XML::ParseOptions) Configuration object that determines some
27
+ # behaviors during parsing. See ParseOptions for more information. The default value is
28
+ # +ParseOptions::DEFAULT_XML+.
29
+ #
30
+ # [Yields]
31
+ # If a block is given, a Nokogiri::XML::ParseOptions object is yielded to the block which
32
+ # can be configured before parsing. See Nokogiri::XML::ParseOptions for more information.
33
+ #
34
+ # [Returns] Nokogiri::XML::DocumentFragment
35
+ def parse(tags, options_ = ParseOptions::DEFAULT_XML, options: options_, &block)
36
+ new(XML::Document.new, tags, options: options, &block)
37
+ end
38
+
39
+ # Wrapper method to separate the concerns of:
40
+ # - the native object allocator's parameter (it only requires `document`)
41
+ # - the initializer's parameters
42
+ def new(document, ...) # :nodoc:
43
+ instance = native_new(document)
44
+ instance.send(:initialize, document, ...)
45
+ instance
46
+ end
11
47
  end
12
48
 
13
- ##
14
- # Create a new DocumentFragment from +tags+.
49
+ # :call-seq:
50
+ # new(document, input=nil) { |options| ... } → DocumentFragment
51
+ # new(document, input=nil, context:, options:) → DocumentFragment
52
+ #
53
+ # Parse \XML fragment input from a String, and return a new DocumentFragment that is
54
+ # associated with the given +document+.
55
+ #
56
+ # 💡 It's recommended to use either XML::DocumentFragment.parse or Node#parse rather than call
57
+ # this method directly.
58
+ #
59
+ # [Required Parameters]
60
+ # - +document+ (XML::Document) The parent document to associate the returned fragment with.
61
+ #
62
+ # [Optional Parameters]
63
+ # - +input+ (String) The content to be parsed.
64
+ #
65
+ # [Optional Keyword Arguments]
66
+ # - +context:+ (Nokogiri::XML::Node) The <b>context node</b> for the subtree created. See
67
+ # below for more information.
68
+ #
69
+ # - +options:+ (Nokogiri::XML::ParseOptions) Configuration object that determines some
70
+ # behaviors during parsing. See ParseOptions for more information. The default value is
71
+ # +ParseOptions::DEFAULT_XML+.
72
+ #
73
+ # [Yields]
74
+ # If a block is given, a Nokogiri::XML::ParseOptions object is yielded to the block which
75
+ # can be configured before parsing. See ParseOptions for more information.
76
+ #
77
+ # [Returns] XML::DocumentFragment
78
+ #
79
+ # === Context \Node
80
+ #
81
+ # If a context node is specified using +context:+, then the fragment will be created by
82
+ # calling Node#parse on that node, so the parser will behave as if that Node is the parent of
83
+ # the fragment subtree, and will resolve namespaces relative to that node.
15
84
  #
16
- # If +ctx+ is present, it is used as a context node for the
17
- # subtree created, e.g., namespaces will be resolved relative
18
- # to +ctx+.
19
- def initialize(document, tags = nil, ctx = nil, options = ParseOptions::DEFAULT_XML) # rubocop:disable Lint/MissingSuper
85
+ def initialize(
86
+ document, tags = nil,
87
+ context_ = nil, options_ = ParseOptions::DEFAULT_XML,
88
+ context: context_, options: options_
89
+ ) # rubocop:disable Lint/MissingSuper
20
90
  return self unless tags
21
91
 
22
92
  options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
93
+ @parse_options = options
23
94
  yield options if block_given?
24
95
 
25
- children = if ctx
96
+ children = if context
26
97
  # Fix for issue#490
27
98
  if Nokogiri.jruby?
28
99
  # fix for issue #770
29
- ctx.parse("<root #{namespace_declarations(ctx)}>#{tags}</root>", options).children
100
+ context.parse("<root #{namespace_declarations(context)}>#{tags}</root>", options).children
30
101
  else
31
- ctx.parse(tags, options)
102
+ context.parse(tags, options)
32
103
  end
33
104
  else
34
105
  wrapper_doc = XML::Document.parse("<root>#{tags}</root>", nil, nil, options)
@@ -127,6 +127,42 @@ module Nokogiri
127
127
  # This is intentionally empty, and sets the method signature for subclasses.
128
128
  end
129
129
 
130
+ #
131
+ # :call-seq:
132
+ # dup → Nokogiri::XML::Node
133
+ # dup(level) → Nokogiri::XML::Node
134
+ # dup(level, new_parent_doc) → Nokogiri::XML::Node
135
+ #
136
+ # Duplicate this node.
137
+ #
138
+ # [Parameters]
139
+ # - +level+ (optional Integer). 0 is a shallow copy, 1 (the default) is a deep copy.
140
+ # - +new_parent_doc+ (optional Nokogiri::XML::Document)
141
+ # The new node's parent Document. Defaults to the the Document of the current node.
142
+ # [Returns] The new Nokogiri::XML::Node
143
+ #
144
+ def dup(level = 1, new_parent_doc = document)
145
+ super().initialize_copy_with_args(self, level, new_parent_doc)
146
+ end
147
+
148
+ #
149
+ # :call-seq:
150
+ # clone → Nokogiri::XML::Node
151
+ # clone(level) → Nokogiri::XML::Node
152
+ # clone(level, new_parent_doc) → Nokogiri::XML::Node
153
+ #
154
+ # Clone this node.
155
+ #
156
+ # [Parameters]
157
+ # - +level+ (optional Integer). 0 is a shallow copy, 1 (the default) is a deep copy.
158
+ # - +new_parent_doc+
159
+ # The new node's parent Document. Defaults to the the Document of the current node.
160
+ # [Returns] The new Nokogiri::XML::Node
161
+ #
162
+ def clone(level = 1, new_parent_doc = document)
163
+ super().initialize_copy_with_args(self, level, new_parent_doc)
164
+ end
165
+
130
166
  ###
131
167
  # Decorate this node with the decorators set up in this node's Document
132
168
  def decorate!
@@ -228,7 +264,7 @@ module Nokogiri
228
264
  if new_parent.nil?
229
265
  raise "Failed to parse '#{node_or_tags}' in the context of a '#{context_node.name}' element"
230
266
  end
231
- when XML::Node
267
+ when Node
232
268
  new_parent = node_or_tags.dup
233
269
  else
234
270
  raise ArgumentError, "Requires a String or Node argument, and cannot accept a #{node_or_tags.class}"
@@ -406,8 +442,48 @@ module Nokogiri
406
442
  end
407
443
 
408
444
  ####
409
- # Set the Node's content to a Text node containing +string+. The string gets XML escaped, not
410
- # interpreted as markup.
445
+ # call-seq:
446
+ # content=(input)
447
+ #
448
+ # Set the content of this node to +input+.
449
+ #
450
+ # [Parameters]
451
+ # - +input+ (String) The new content for this node. Input is considered to be raw content, and
452
+ # so will be entity-escaped in the final DOM string.
453
+ #
454
+ # [Example]
455
+ # Note how entities are handled:
456
+ #
457
+ # doc = Nokogiri::HTML::Document.parse(<<~HTML)
458
+ # <html>
459
+ # <body>
460
+ # <div id="first">asdf</div>
461
+ # <div id="second">asdf</div>
462
+ # HTML
463
+ #
464
+ # text_node = doc.at_css("div#first").children.first
465
+ # div_node = doc.at_css("div#second")
466
+ #
467
+ # value = "You &amp; Me"
468
+ #
469
+ # text_node.content = value
470
+ # div_node.content = value
471
+ #
472
+ # doc.css("div").to_html
473
+ # # => "<div id=\"first\">You &amp;amp; Me</div>
474
+ # # <div id=\"second\">You &amp;amp; Me</div>"
475
+ #
476
+ # For content that is already entity-escaped, use CGI::unescapeHTML to decode it:
477
+ #
478
+ # text_node.content = CGI::unescapeHTML(value)
479
+ # div_node.content = CGI::unescapeHTML(value)
480
+ #
481
+ # doc.css("div").to_html
482
+ # # => "<div id=\"first\">You &amp; Me</div>
483
+ # # <div id=\"second\">You &amp; Me</div>"
484
+ #
485
+ # See also: #native_content=
486
+ #
411
487
  def content=(string)
412
488
  self.native_content = encode_special_chars(string.to_s)
413
489
  end
@@ -474,7 +550,6 @@ module Nokogiri
474
550
  alias_method :to_str, :content
475
551
  alias_method :name, :node_name
476
552
  alias_method :type, :node_type
477
- alias_method :clone, :dup
478
553
  alias_method :elements, :element_children
479
554
 
480
555
  # :section: Working With Node Attributes
@@ -1051,9 +1126,11 @@ module Nokogiri
1051
1126
 
1052
1127
  error_count = document.errors.length
1053
1128
  node_set = in_context(contents, options.to_i)
1129
+
1054
1130
  if document.errors.length > error_count
1055
1131
  raise document.errors[error_count] unless options.recover?
1056
1132
 
1133
+ # TODO: remove this block when libxml2 < 2.13 is no longer supported
1057
1134
  if node_set.empty?
1058
1135
  # libxml2 < 2.13 does not obey the +recover+ option after encountering errors during
1059
1136
  # +in_context+ parsing, and so this horrible hack is here to try to emulate recovery
@@ -1080,6 +1157,7 @@ module Nokogiri
1080
1157
  node_set = fragment.children
1081
1158
  end
1082
1159
  end
1160
+
1083
1161
  node_set
1084
1162
  end
1085
1163
 
@@ -1542,19 +1620,12 @@ module Nokogiri
1542
1620
  node_or_tags
1543
1621
  end
1544
1622
 
1545
- USING_LIBXML_WITH_BROKEN_SERIALIZATION = Nokogiri.uses_libxml?("~> 2.6.0").freeze
1546
- private_constant :USING_LIBXML_WITH_BROKEN_SERIALIZATION
1547
-
1548
1623
  def to_format(save_option, options)
1549
- return dump_html if USING_LIBXML_WITH_BROKEN_SERIALIZATION
1550
-
1551
1624
  options[:save_with] = save_option unless options[:save_with]
1552
1625
  serialize(options)
1553
1626
  end
1554
1627
 
1555
1628
  def write_format_to(save_option, io, options)
1556
- return (io << dump_html) if USING_LIBXML_WITH_BROKEN_SERIALIZATION
1557
-
1558
1629
  options[:save_with] ||= save_option
1559
1630
  write_to(io, options)
1560
1631
  end
@@ -4,9 +4,13 @@
4
4
  module Nokogiri
5
5
  module XML
6
6
  ####
7
- # A NodeSet contains a list of Nokogiri::XML::Node objects. Typically
8
- # a NodeSet is return as a result of searching a Document via
9
- # Nokogiri::XML::Searchable#css or Nokogiri::XML::Searchable#xpath
7
+ # A NodeSet is an Enumerable that contains a list of Nokogiri::XML::Node objects.
8
+ #
9
+ # Typically a NodeSet is returned as a result of searching a Document via
10
+ # Nokogiri::XML::Searchable#css or Nokogiri::XML::Searchable#xpath.
11
+ #
12
+ # Note that the `#dup` and `#clone` methods perform shallow copies; these methods do not copy
13
+ # the Nodes contained in the NodeSet (similar to how Array and other Enumerable classes work).
10
14
  class NodeSet
11
15
  include Nokogiri::XML::Searchable
12
16
  include Enumerable
@@ -14,8 +18,6 @@ module Nokogiri
14
18
  # The Document this NodeSet is associated with
15
19
  attr_accessor :document
16
20
 
17
- alias_method :clone, :dup
18
-
19
21
  # Create a NodeSet with +document+ defaulting to +list+
20
22
  def initialize(document, list = [])
21
23
  @document = document
@@ -121,7 +123,7 @@ module Nokogiri
121
123
  return self[args.first]
122
124
  end
123
125
 
124
- super(*args)
126
+ super
125
127
  end
126
128
  alias_method :%, :at
127
129
 
@@ -423,7 +425,7 @@ module Nokogiri
423
425
  end
424
426
 
425
427
  ###
426
- # Return a nicely formated string representation
428
+ # Return a nicely formatted string representation
427
429
  def inspect
428
430
  "[#{map(&:inspect).join(", ")}]"
429
431
  end
@@ -140,7 +140,7 @@ module Nokogiri
140
140
 
141
141
  # Relax any hardcoded limit from the parser. Off by default.
142
142
  #
143
- # ⚠ There may be a performance penalty when this option is set.
143
+ # ⚠ <b>It is UNSAFE to set this option</b> when parsing untrusted documents.
144
144
  HUGE = 1 << 19
145
145
 
146
146
  # Support line numbers up to <code>long int</code> (default is a <code>short int</code>). On
@@ -8,6 +8,11 @@ module Nokogiri
8
8
  COLLECTIONS = [:attribute_nodes, :children]
9
9
 
10
10
  def inspect
11
+ # handle the case where an exception is thrown during object construction
12
+ if respond_to?(:data_ptr?) && !data_ptr?
13
+ return "#<#{self.class}:#{format("0x%x", object_id)} (no data)>"
14
+ end
15
+
11
16
  attributes = inspect_attributes.reject do |x|
12
17
  attribute = send(x)
13
18
  !attribute || (attribute.respond_to?(:empty?) && attribute.empty?)
@@ -21,7 +26,7 @@ module Nokogiri
21
26
  "#{attribute}=#{send(attribute).inspect}"
22
27
  end.join(" ")
23
28
  end
24
- "#<#{self.class.name}:#{format("0x%x", object_id)} #{attributes}>"
29
+ "#<#{self.class}:#{format("0x%x", object_id)} #{attributes}>"
25
30
  end
26
31
 
27
32
  def pretty_print(pp)
@@ -3,32 +3,33 @@
3
3
  module Nokogiri
4
4
  module XML
5
5
  ###
6
- # Nokogiri::XML::Reader parses an XML document similar to the way a cursor would move. The
7
- # Reader is given an XML document, and yields nodes to an each block.
6
+ # The Reader parser allows you to effectively pull parse an \XML document. Once instantiated,
7
+ # call Nokogiri::XML::Reader#each to iterate over each node.
8
8
  #
9
- # The Reader parser might be good for when you need the speed and low memory usage of the SAX
10
- # parser, but do not want to write a Document handler.
9
+ # Nokogiri::XML::Reader parses an \XML document similar to the way a cursor would move. The
10
+ # Reader is given an \XML document, and yields nodes to an each block.
11
+ #
12
+ # The Reader parser might be good for when you need the speed and low memory usage of a \SAX
13
+ # parser, but do not want to write a SAX::Document handler.
11
14
  #
12
15
  # Here is an example of usage:
13
16
  #
14
- # reader = Nokogiri::XML::Reader(<<-eoxml)
17
+ # reader = Nokogiri::XML::Reader.new <<~XML
15
18
  # <x xmlns:tenderlove='http://tenderlovemaking.com/'>
16
19
  # <tenderlove:foo awesome='true'>snuggles!</tenderlove:foo>
17
20
  # </x>
18
- # eoxml
21
+ # XML
19
22
  #
20
23
  # reader.each do |node|
21
- #
22
24
  # # node is an instance of Nokogiri::XML::Reader
23
25
  # puts node.name
24
- #
25
26
  # end
26
27
  #
27
28
  # ⚠ Nokogiri::XML::Reader#each can only be called once! Once the cursor moves through the entire
28
29
  # document, you must parse the document again. It may be better to capture all information you
29
30
  # need during a single iteration.
30
31
  #
31
- # ⚠ libxml2 does not support error recovery in the Reader parser. The `RECOVER` ParseOption is
32
+ # ⚠ libxml2 does not support error recovery in the Reader parser. The +RECOVER+ ParseOption is
32
33
  # ignored. If a syntax error is encountered during parsing, an exception will be raised.
33
34
  class Reader
34
35
  include Enumerable
@@ -66,23 +67,55 @@ module Nokogiri
66
67
  TYPE_END_ELEMENT = 15
67
68
  # Entity end node type
68
69
  TYPE_END_ENTITY = 16
69
- # XML Declaration node type
70
+ # \XML Declaration node type
70
71
  TYPE_XML_DECLARATION = 17
71
72
 
72
73
  # A list of errors encountered while parsing
73
74
  attr_accessor :errors
74
75
 
75
- # The XML source
76
+ # The \XML source
76
77
  attr_reader :source
77
78
 
78
79
  alias_method :self_closing?, :empty_element?
79
80
 
80
- def initialize(source, url = nil, encoding = nil) # :nodoc:
81
+ # :call-seq:
82
+ # Reader.new(input) { |options| ... } → Reader
83
+ # Reader.new(input, url:, encoding:, options:) { |options| ... } → Reader
84
+ #
85
+ # Create a new Reader to parse an \XML document.
86
+ #
87
+ # [Required Parameters]
88
+ # - +input+ (String | IO): The \XML document to parse.
89
+ #
90
+ # [Optional Parameters]
91
+ # - +url:+ (String) The base URL of the document.
92
+ # - +encoding:+ (String) The name of the encoding of the document.
93
+ # - +options:+ (Integer | ParseOptions) Options to control the parser behavior.
94
+ # Defaults to +ParseOptions::STRICT+.
95
+ #
96
+ # [Yields]
97
+ # If present, the block will be passed a Nokogiri::XML::ParseOptions object to modify before
98
+ # the fragment is parsed. See Nokogiri::XML::ParseOptions for more information.
99
+ def self.new(
100
+ string_or_io,
101
+ url_ = nil, encoding_ = nil, options_ = ParseOptions::STRICT,
102
+ url: url_, encoding: encoding_, options: options_
103
+ )
104
+ options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
105
+ yield options if block_given?
106
+
107
+ if string_or_io.respond_to?(:read)
108
+ return Reader.from_io(string_or_io, url, encoding, options.to_i)
109
+ end
110
+
111
+ Reader.from_memory(string_or_io, url, encoding, options.to_i)
112
+ end
113
+
114
+ private def initialize(source, url = nil, encoding = nil) # :nodoc:
81
115
  @source = source
82
116
  @errors = []
83
117
  @encoding = encoding
84
118
  end
85
- private :initialize
86
119
 
87
120
  # Get the attributes and namespaces of the current node as a Hash.
88
121
  #