nokogiri 1.15.4 → 1.17.2
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +12 -19
- data/README.md +8 -1
- data/dependencies.yml +9 -8
- data/ext/nokogiri/extconf.rb +194 -141
- data/ext/nokogiri/gumbo.c +69 -53
- data/ext/nokogiri/html4_document.c +10 -4
- data/ext/nokogiri/html4_element_description.c +18 -18
- data/ext/nokogiri/html4_sax_parser.c +40 -0
- data/ext/nokogiri/html4_sax_parser_context.c +48 -58
- data/ext/nokogiri/html4_sax_push_parser.c +26 -25
- data/ext/nokogiri/libxml2_polyfill.c +114 -0
- data/ext/nokogiri/nokogiri.c +9 -2
- data/ext/nokogiri/nokogiri.h +25 -33
- data/ext/nokogiri/test_global_handlers.c +1 -1
- data/ext/nokogiri/xml_attr.c +1 -1
- data/ext/nokogiri/xml_cdata.c +3 -12
- data/ext/nokogiri/xml_comment.c +3 -8
- data/ext/nokogiri/xml_document.c +167 -156
- data/ext/nokogiri/xml_document_fragment.c +10 -25
- data/ext/nokogiri/xml_dtd.c +1 -1
- data/ext/nokogiri/xml_element_content.c +9 -9
- data/ext/nokogiri/xml_encoding_handler.c +4 -4
- data/ext/nokogiri/xml_namespace.c +6 -10
- data/ext/nokogiri/xml_node.c +142 -108
- data/ext/nokogiri/xml_node_set.c +46 -44
- data/ext/nokogiri/xml_reader.c +74 -100
- data/ext/nokogiri/xml_relax_ng.c +35 -56
- data/ext/nokogiri/xml_sax_parser.c +156 -88
- data/ext/nokogiri/xml_sax_parser_context.c +214 -128
- data/ext/nokogiri/xml_sax_push_parser.c +69 -50
- data/ext/nokogiri/xml_schema.c +51 -87
- data/ext/nokogiri/xml_syntax_error.c +19 -11
- data/ext/nokogiri/xml_text.c +3 -6
- data/ext/nokogiri/xml_xpath_context.c +4 -7
- data/ext/nokogiri/xslt_stylesheet.c +16 -11
- data/gumbo-parser/Makefile +18 -0
- data/gumbo-parser/src/error.c +76 -48
- data/gumbo-parser/src/error.h +5 -1
- data/gumbo-parser/src/nokogiri_gumbo.h +11 -2
- data/gumbo-parser/src/parser.c +64 -23
- data/gumbo-parser/src/tokenizer.c +7 -6
- data/lib/nokogiri/class_resolver.rb +1 -1
- data/lib/nokogiri/css/node.rb +6 -2
- data/lib/nokogiri/css/parser.rb +6 -4
- data/lib/nokogiri/css/parser.y +2 -2
- data/lib/nokogiri/css/parser_extras.rb +6 -66
- data/lib/nokogiri/css/selector_cache.rb +38 -0
- data/lib/nokogiri/css/tokenizer.rb +4 -4
- data/lib/nokogiri/css/tokenizer.rex +9 -8
- data/lib/nokogiri/css/xpath_visitor.rb +43 -27
- data/lib/nokogiri/css.rb +86 -20
- data/lib/nokogiri/decorators/slop.rb +3 -5
- data/lib/nokogiri/encoding_handler.rb +2 -2
- data/lib/nokogiri/html4/document.rb +45 -24
- data/lib/nokogiri/html4/document_fragment.rb +124 -12
- data/lib/nokogiri/html4/encoding_reader.rb +2 -2
- data/lib/nokogiri/html4/sax/parser.rb +23 -38
- data/lib/nokogiri/html4/sax/parser_context.rb +4 -9
- data/lib/nokogiri/html4.rb +9 -14
- data/lib/nokogiri/html5/builder.rb +40 -0
- data/lib/nokogiri/html5/document.rb +61 -30
- data/lib/nokogiri/html5/document_fragment.rb +130 -20
- data/lib/nokogiri/html5/node.rb +4 -4
- data/lib/nokogiri/html5.rb +114 -138
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +6 -5
- data/lib/nokogiri/xml/attr.rb +2 -2
- data/lib/nokogiri/xml/builder.rb +8 -1
- data/lib/nokogiri/xml/document.rb +74 -31
- data/lib/nokogiri/xml/document_fragment.rb +86 -15
- data/lib/nokogiri/xml/namespace.rb +1 -2
- data/lib/nokogiri/xml/node.rb +113 -35
- data/lib/nokogiri/xml/node_set.rb +12 -10
- data/lib/nokogiri/xml/parse_options.rb +1 -1
- data/lib/nokogiri/xml/pp/node.rb +6 -1
- data/lib/nokogiri/xml/reader.rb +51 -17
- data/lib/nokogiri/xml/relax_ng.rb +57 -20
- data/lib/nokogiri/xml/sax/document.rb +174 -83
- data/lib/nokogiri/xml/sax/parser.rb +115 -41
- data/lib/nokogiri/xml/sax/parser_context.rb +116 -8
- data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
- data/lib/nokogiri/xml/sax.rb +48 -0
- data/lib/nokogiri/xml/schema.rb +112 -45
- data/lib/nokogiri/xml/searchable.rb +9 -11
- data/lib/nokogiri/xml/syntax_error.rb +23 -1
- data/lib/nokogiri/xml.rb +14 -25
- data/lib/nokogiri/xslt/stylesheet.rb +29 -7
- data/lib/nokogiri/xslt.rb +4 -10
- data/lib/nokogiri.rb +1 -1
- data/lib/xsd/xmlparser/nokogiri.rb +3 -4
- data/ports/archives/libxml2-2.13.5.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.42.tar.xz +0 -0
- metadata +15 -14
- data/ext/nokogiri/libxml2_backwards_compat.c +0 -121
- data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +0 -25
- data/ports/archives/libxml2-2.11.5.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.38.tar.xz +0 -0
data/lib/nokogiri/xml/builder.rb
CHANGED
@@ -475,7 +475,14 @@ module Nokogiri
|
|
475
475
|
if block
|
476
476
|
old_parent = @doc_builder.parent
|
477
477
|
@doc_builder.parent = @node
|
478
|
-
|
478
|
+
|
479
|
+
arity = @doc_builder.arity || block.arity
|
480
|
+
value = if arity <= 0
|
481
|
+
@doc_builder.instance_eval(&block)
|
482
|
+
else
|
483
|
+
yield(@doc_builder)
|
484
|
+
end
|
485
|
+
|
479
486
|
@doc_builder.parent = old_parent
|
480
487
|
return value
|
481
488
|
end
|
@@ -5,12 +5,12 @@ require "pathname"
|
|
5
5
|
|
6
6
|
module Nokogiri
|
7
7
|
module XML
|
8
|
-
# Nokogiri::XML::Document is the main entry point for dealing with XML documents.
|
9
|
-
# is created by parsing
|
10
|
-
# on parsing.
|
8
|
+
# Nokogiri::XML::Document is the main entry point for dealing with \XML documents. The Document
|
9
|
+
# is created by parsing \XML content from a String or an IO object. See
|
10
|
+
# Nokogiri::XML::Document.parse for more information on parsing.
|
11
11
|
#
|
12
|
-
#
|
13
|
-
#
|
12
|
+
# Document inherits a great deal of functionality from its superclass Nokogiri::XML::Node, so
|
13
|
+
# please read that class's documentation as well.
|
14
14
|
class Document < Nokogiri::XML::Node
|
15
15
|
# See http://www.w3.org/TR/REC-xml-names/#ns-decl for more details. Note that we're not
|
16
16
|
# attempting to handle unicode characters partly because libxml2 doesn't handle unicode
|
@@ -19,33 +19,45 @@ module Nokogiri
|
|
19
19
|
NCNAME_CHAR = NCNAME_START_CHAR + "\\-\\.0-9"
|
20
20
|
NCNAME_RE = /^xmlns(?::([#{NCNAME_START_CHAR}][#{NCNAME_CHAR}]*))?$/
|
21
21
|
|
22
|
+
OBJECT_DUP_METHOD = Object.instance_method(:dup)
|
23
|
+
OBJECT_CLONE_METHOD = Object.instance_method(:clone)
|
24
|
+
private_constant :OBJECT_DUP_METHOD, :OBJECT_CLONE_METHOD
|
25
|
+
|
22
26
|
class << self
|
23
|
-
#
|
27
|
+
# call-seq:
|
28
|
+
# parse(input) { |options| ... } => Nokogiri::XML::Document
|
29
|
+
# parse(input, url:, encoding:, options:) => Nokogiri::XML::Document
|
24
30
|
#
|
25
|
-
#
|
26
|
-
# _read_ and _close_ such as an IO, or StringIO.
|
31
|
+
# Parse \XML input from a String or IO object, and return a new XML::Document.
|
27
32
|
#
|
28
|
-
#
|
33
|
+
# 🛡 By default, Nokogiri treats documents as untrusted, and so does not attempt to load DTDs
|
34
|
+
# or access the network. See Nokogiri::XML::ParseOptions for a complete list of options; and
|
35
|
+
# that module's DEFAULT_XML constant for what's set (and not set) by default.
|
29
36
|
#
|
30
|
-
#
|
31
|
-
#
|
37
|
+
# [Required Parameters]
|
38
|
+
# - +input+ (String | IO) The content to be parsed.
|
32
39
|
#
|
33
|
-
#
|
34
|
-
#
|
35
|
-
# Nokogiri::XML::ParseOptions for more information.
|
40
|
+
# [Optional Keyword Arguments]
|
41
|
+
# - +url:+ (String) The base URI for this document.
|
36
42
|
#
|
37
|
-
# +
|
38
|
-
#
|
43
|
+
# - +encoding:+ (String) The name of the encoding that should be used when processing the
|
44
|
+
# document. When not provided, the encoding will be determined based on the document
|
45
|
+
# content.
|
39
46
|
#
|
40
|
-
#
|
41
|
-
#
|
42
|
-
#
|
43
|
-
# and that module's DEFAULT_XML constant for what's set (and not
|
44
|
-
# set) by default.
|
47
|
+
# - +options:+ (Nokogiri::XML::ParseOptions) Configuration object that determines some
|
48
|
+
# behaviors during parsing. See ParseOptions for more information. The default value is
|
49
|
+
# +ParseOptions::DEFAULT_XML+.
|
45
50
|
#
|
46
|
-
#
|
51
|
+
# [Yields]
|
52
|
+
# If a block is given, a Nokogiri::XML::ParseOptions object is yielded to the block which
|
53
|
+
# can be configured before parsing. See Nokogiri::XML::ParseOptions for more information.
|
47
54
|
#
|
48
|
-
|
55
|
+
# [Returns] Nokogiri::XML::Document
|
56
|
+
def parse(
|
57
|
+
string_or_io,
|
58
|
+
url_ = nil, encoding_ = nil, options_ = XML::ParseOptions::DEFAULT_XML,
|
59
|
+
url: url_, encoding: encoding_, options: options_
|
60
|
+
)
|
49
61
|
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
50
62
|
yield options if block_given?
|
51
63
|
|
@@ -60,6 +72,7 @@ module Nokogiri
|
|
60
72
|
end
|
61
73
|
|
62
74
|
doc = if string_or_io.respond_to?(:read)
|
75
|
+
# TODO: should we instead check for respond_to?(:to_path) ?
|
63
76
|
if string_or_io.is_a?(Pathname)
|
64
77
|
# resolve the Pathname to the file and open it as an IO object, see #2110
|
65
78
|
string_or_io = string_or_io.expand_path.open
|
@@ -174,13 +187,44 @@ module Nokogiri
|
|
174
187
|
# Since v1.12.4
|
175
188
|
attr_accessor :namespace_inheritance
|
176
189
|
|
177
|
-
# :nodoc:
|
178
|
-
def initialize(*args) # rubocop:disable Lint/MissingSuper
|
190
|
+
def initialize(*args) # :nodoc: # rubocop:disable Lint/MissingSuper
|
179
191
|
@errors = []
|
180
192
|
@decorators = nil
|
181
193
|
@namespace_inheritance = false
|
182
194
|
end
|
183
195
|
|
196
|
+
#
|
197
|
+
# :call-seq:
|
198
|
+
# dup → Nokogiri::XML::Document
|
199
|
+
# dup(level) → Nokogiri::XML::Document
|
200
|
+
#
|
201
|
+
# Duplicate this node.
|
202
|
+
#
|
203
|
+
# [Parameters]
|
204
|
+
# - +level+ (optional Integer). 0 is a shallow copy, 1 (the default) is a deep copy.
|
205
|
+
# [Returns] The new Nokogiri::XML::Document
|
206
|
+
#
|
207
|
+
def dup(level = 1)
|
208
|
+
copy = OBJECT_DUP_METHOD.bind_call(self)
|
209
|
+
copy.initialize_copy_with_args(self, level)
|
210
|
+
end
|
211
|
+
|
212
|
+
#
|
213
|
+
# :call-seq:
|
214
|
+
# clone → Nokogiri::XML::Document
|
215
|
+
# clone(level) → Nokogiri::XML::Document
|
216
|
+
#
|
217
|
+
# Clone this node.
|
218
|
+
#
|
219
|
+
# [Parameters]
|
220
|
+
# - +level+ (optional Integer). 0 is a shallow copy, 1 (the default) is a deep copy.
|
221
|
+
# [Returns] The new Nokogiri::XML::Document
|
222
|
+
#
|
223
|
+
def clone(level = 1)
|
224
|
+
copy = OBJECT_CLONE_METHOD.bind_call(self)
|
225
|
+
copy.initialize_copy_with_args(self, level)
|
226
|
+
end
|
227
|
+
|
184
228
|
# :call-seq:
|
185
229
|
# create_element(name, *contents_or_attrs, &block) → Nokogiri::XML::Element
|
186
230
|
#
|
@@ -327,10 +371,10 @@ module Nokogiri
|
|
327
371
|
end
|
328
372
|
|
329
373
|
##
|
330
|
-
# Validate this Document against
|
374
|
+
# Validate this Document against its DTD. Returns a list of errors on
|
331
375
|
# the document or +nil+ when there is no DTD.
|
332
376
|
def validate
|
333
|
-
return
|
377
|
+
return unless internal_subset
|
334
378
|
|
335
379
|
internal_subset.validate(self)
|
336
380
|
end
|
@@ -368,12 +412,11 @@ module Nokogiri
|
|
368
412
|
@decorators.each do |klass, list|
|
369
413
|
next unless node.is_a?(klass)
|
370
414
|
|
371
|
-
list.each { |
|
415
|
+
list.each { |mod| node.extend(mod) }
|
372
416
|
end
|
373
417
|
end
|
374
418
|
|
375
419
|
alias_method :to_xml, :serialize
|
376
|
-
alias_method :clone, :dup
|
377
420
|
|
378
421
|
# Get the hash of namespaces on the root Nokogiri::XML::Node
|
379
422
|
def namespaces
|
@@ -427,8 +470,6 @@ module Nokogiri
|
|
427
470
|
# instructions. If you have a use case and would like this functionality, please let us know
|
428
471
|
# by opening an issue or a discussion on the github project.
|
429
472
|
#
|
430
|
-
# ⚡ This is an experimental feature, available since v1.14.0
|
431
|
-
#
|
432
473
|
# *Example*
|
433
474
|
#
|
434
475
|
# doc = Nokogiri::XML.parse(<<~XML)
|
@@ -455,6 +496,8 @@ module Nokogiri
|
|
455
496
|
# doc.deconstruct_keys([:root])
|
456
497
|
# # => {:root=>nil}
|
457
498
|
#
|
499
|
+
# Since v1.14.0
|
500
|
+
#
|
458
501
|
def deconstruct_keys(keys)
|
459
502
|
{ root: root }
|
460
503
|
end
|
@@ -3,32 +3,103 @@
|
|
3
3
|
|
4
4
|
module Nokogiri
|
5
5
|
module XML
|
6
|
+
# DocumentFragment represents a fragment of an \XML document. It provides the same functionality
|
7
|
+
# exposed by XML::Node and can be used to contain one or more \XML subtrees.
|
6
8
|
class DocumentFragment < Nokogiri::XML::Node
|
7
|
-
|
8
|
-
#
|
9
|
-
|
10
|
-
|
9
|
+
# The options used to parse the document fragment. Returns the value of any options that were
|
10
|
+
# passed into the constructor as a parameter or set in a config block, else the default
|
11
|
+
# options for the specific subclass.
|
12
|
+
attr_reader :parse_options
|
13
|
+
|
14
|
+
class << self
|
15
|
+
# :call-seq:
|
16
|
+
# parse(input) { |options| ... } → XML::DocumentFragment
|
17
|
+
# parse(input, options:) → XML::DocumentFragment
|
18
|
+
#
|
19
|
+
# Parse \XML fragment input from a String, and return a new XML::DocumentFragment. This
|
20
|
+
# method creates a new, empty XML::Document to contain the fragment.
|
21
|
+
#
|
22
|
+
# [Required Parameters]
|
23
|
+
# - +input+ (String) The content to be parsed.
|
24
|
+
#
|
25
|
+
# [Optional Keyword Arguments]
|
26
|
+
# - +options+ (Nokogiri::XML::ParseOptions) Configuration object that determines some
|
27
|
+
# behaviors during parsing. See ParseOptions for more information. The default value is
|
28
|
+
# +ParseOptions::DEFAULT_XML+.
|
29
|
+
#
|
30
|
+
# [Yields]
|
31
|
+
# If a block is given, a Nokogiri::XML::ParseOptions object is yielded to the block which
|
32
|
+
# can be configured before parsing. See Nokogiri::XML::ParseOptions for more information.
|
33
|
+
#
|
34
|
+
# [Returns] Nokogiri::XML::DocumentFragment
|
35
|
+
def parse(tags, options_ = ParseOptions::DEFAULT_XML, options: options_, &block)
|
36
|
+
new(XML::Document.new, tags, options: options, &block)
|
37
|
+
end
|
38
|
+
|
39
|
+
# Wrapper method to separate the concerns of:
|
40
|
+
# - the native object allocator's parameter (it only requires `document`)
|
41
|
+
# - the initializer's parameters
|
42
|
+
def new(document, ...) # :nodoc:
|
43
|
+
instance = native_new(document)
|
44
|
+
instance.send(:initialize, document, ...)
|
45
|
+
instance
|
46
|
+
end
|
11
47
|
end
|
12
48
|
|
13
|
-
|
14
|
-
#
|
49
|
+
# :call-seq:
|
50
|
+
# new(document, input=nil) { |options| ... } → DocumentFragment
|
51
|
+
# new(document, input=nil, context:, options:) → DocumentFragment
|
52
|
+
#
|
53
|
+
# Parse \XML fragment input from a String, and return a new DocumentFragment that is
|
54
|
+
# associated with the given +document+.
|
55
|
+
#
|
56
|
+
# 💡 It's recommended to use either XML::DocumentFragment.parse or Node#parse rather than call
|
57
|
+
# this method directly.
|
58
|
+
#
|
59
|
+
# [Required Parameters]
|
60
|
+
# - +document+ (XML::Document) The parent document to associate the returned fragment with.
|
61
|
+
#
|
62
|
+
# [Optional Parameters]
|
63
|
+
# - +input+ (String) The content to be parsed.
|
15
64
|
#
|
16
|
-
#
|
17
|
-
#
|
18
|
-
#
|
19
|
-
|
65
|
+
# [Optional Keyword Arguments]
|
66
|
+
# - +context:+ (Nokogiri::XML::Node) The <b>context node</b> for the subtree created. See
|
67
|
+
# below for more information.
|
68
|
+
#
|
69
|
+
# - +options:+ (Nokogiri::XML::ParseOptions) Configuration object that determines some
|
70
|
+
# behaviors during parsing. See ParseOptions for more information. The default value is
|
71
|
+
# +ParseOptions::DEFAULT_XML+.
|
72
|
+
#
|
73
|
+
# [Yields]
|
74
|
+
# If a block is given, a Nokogiri::XML::ParseOptions object is yielded to the block which
|
75
|
+
# can be configured before parsing. See ParseOptions for more information.
|
76
|
+
#
|
77
|
+
# [Returns] XML::DocumentFragment
|
78
|
+
#
|
79
|
+
# === Context \Node
|
80
|
+
#
|
81
|
+
# If a context node is specified using +context:+, then the fragment will be created by
|
82
|
+
# calling Node#parse on that node, so the parser will behave as if that Node is the parent of
|
83
|
+
# the fragment subtree, and will resolve namespaces relative to that node.
|
84
|
+
#
|
85
|
+
def initialize(
|
86
|
+
document, tags = nil,
|
87
|
+
context_ = nil, options_ = ParseOptions::DEFAULT_XML,
|
88
|
+
context: context_, options: options_
|
89
|
+
) # rubocop:disable Lint/MissingSuper
|
20
90
|
return self unless tags
|
21
91
|
|
22
92
|
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
93
|
+
@parse_options = options
|
23
94
|
yield options if block_given?
|
24
95
|
|
25
|
-
children = if
|
96
|
+
children = if context
|
26
97
|
# Fix for issue#490
|
27
98
|
if Nokogiri.jruby?
|
28
99
|
# fix for issue #770
|
29
|
-
|
100
|
+
context.parse("<root #{namespace_declarations(context)}>#{tags}</root>", options).children
|
30
101
|
else
|
31
|
-
|
102
|
+
context.parse(tags, options)
|
32
103
|
end
|
33
104
|
else
|
34
105
|
wrapper_doc = XML::Document.parse("<root>#{tags}</root>", nil, nil, options)
|
@@ -154,8 +225,6 @@ module Nokogiri
|
|
154
225
|
# root elements, you should deconstruct the array returned by
|
155
226
|
# <tt>DocumentFragment#elements</tt>.
|
156
227
|
#
|
157
|
-
# ⚡ This is an experimental feature, available since v1.14.0
|
158
|
-
#
|
159
228
|
# *Example*
|
160
229
|
#
|
161
230
|
# frag = Nokogiri::HTML5.fragment(<<~HTML)
|
@@ -187,6 +256,8 @@ module Nokogiri
|
|
187
256
|
# # }),
|
188
257
|
# # #(Element:0x398 { name = "div", children = [ #(Text "End")] })]
|
189
258
|
#
|
259
|
+
# Since v1.14.0
|
260
|
+
#
|
190
261
|
def deconstruct
|
191
262
|
children.to_a
|
192
263
|
end
|
@@ -16,8 +16,6 @@ module Nokogiri
|
|
16
16
|
# - +prefix+ → (String, nil) The namespace's prefix, or +nil+ if there is no prefix (e.g., default namespace).
|
17
17
|
# - +href+ → (String) The namespace's URI
|
18
18
|
#
|
19
|
-
# ⚡ This is an experimental feature, available since v1.14.0
|
20
|
-
#
|
21
19
|
# *Example*
|
22
20
|
#
|
23
21
|
# doc = Nokogiri::XML.parse(<<~XML)
|
@@ -43,6 +41,7 @@ module Nokogiri
|
|
43
41
|
# doc.root.elements.last.namespace.deconstruct_keys([:prefix, :href])
|
44
42
|
# # => {:prefix=>"noko", :href=>"http://nokogiri.org/ns/noko"}
|
45
43
|
#
|
44
|
+
# Since v1.14.0
|
46
45
|
#
|
47
46
|
def deconstruct_keys(keys)
|
48
47
|
{ prefix: prefix, href: href }
|
data/lib/nokogiri/xml/node.rb
CHANGED
@@ -127,6 +127,42 @@ module Nokogiri
|
|
127
127
|
# This is intentionally empty, and sets the method signature for subclasses.
|
128
128
|
end
|
129
129
|
|
130
|
+
#
|
131
|
+
# :call-seq:
|
132
|
+
# dup → Nokogiri::XML::Node
|
133
|
+
# dup(level) → Nokogiri::XML::Node
|
134
|
+
# dup(level, new_parent_doc) → Nokogiri::XML::Node
|
135
|
+
#
|
136
|
+
# Duplicate this node.
|
137
|
+
#
|
138
|
+
# [Parameters]
|
139
|
+
# - +level+ (optional Integer). 0 is a shallow copy, 1 (the default) is a deep copy.
|
140
|
+
# - +new_parent_doc+ (optional Nokogiri::XML::Document)
|
141
|
+
# The new node's parent Document. Defaults to the the Document of the current node.
|
142
|
+
# [Returns] The new Nokogiri::XML::Node
|
143
|
+
#
|
144
|
+
def dup(level = 1, new_parent_doc = document)
|
145
|
+
super().initialize_copy_with_args(self, level, new_parent_doc)
|
146
|
+
end
|
147
|
+
|
148
|
+
#
|
149
|
+
# :call-seq:
|
150
|
+
# clone → Nokogiri::XML::Node
|
151
|
+
# clone(level) → Nokogiri::XML::Node
|
152
|
+
# clone(level, new_parent_doc) → Nokogiri::XML::Node
|
153
|
+
#
|
154
|
+
# Clone this node.
|
155
|
+
#
|
156
|
+
# [Parameters]
|
157
|
+
# - +level+ (optional Integer). 0 is a shallow copy, 1 (the default) is a deep copy.
|
158
|
+
# - +new_parent_doc+
|
159
|
+
# The new node's parent Document. Defaults to the the Document of the current node.
|
160
|
+
# [Returns] The new Nokogiri::XML::Node
|
161
|
+
#
|
162
|
+
def clone(level = 1, new_parent_doc = document)
|
163
|
+
super().initialize_copy_with_args(self, level, new_parent_doc)
|
164
|
+
end
|
165
|
+
|
130
166
|
###
|
131
167
|
# Decorate this node with the decorators set up in this node's Document
|
132
168
|
def decorate!
|
@@ -228,7 +264,7 @@ module Nokogiri
|
|
228
264
|
if new_parent.nil?
|
229
265
|
raise "Failed to parse '#{node_or_tags}' in the context of a '#{context_node.name}' element"
|
230
266
|
end
|
231
|
-
when
|
267
|
+
when Node
|
232
268
|
new_parent = node_or_tags.dup
|
233
269
|
else
|
234
270
|
raise ArgumentError, "Requires a String or Node argument, and cannot accept a #{node_or_tags.class}"
|
@@ -406,8 +442,48 @@ module Nokogiri
|
|
406
442
|
end
|
407
443
|
|
408
444
|
####
|
409
|
-
#
|
410
|
-
#
|
445
|
+
# call-seq:
|
446
|
+
# content=(input)
|
447
|
+
#
|
448
|
+
# Set the content of this node to +input+.
|
449
|
+
#
|
450
|
+
# [Parameters]
|
451
|
+
# - +input+ (String) The new content for this node. Input is considered to be raw content, and
|
452
|
+
# so will be entity-escaped in the final DOM string.
|
453
|
+
#
|
454
|
+
# [Example]
|
455
|
+
# Note how entities are handled:
|
456
|
+
#
|
457
|
+
# doc = Nokogiri::HTML::Document.parse(<<~HTML)
|
458
|
+
# <html>
|
459
|
+
# <body>
|
460
|
+
# <div id="first">asdf</div>
|
461
|
+
# <div id="second">asdf</div>
|
462
|
+
# HTML
|
463
|
+
#
|
464
|
+
# text_node = doc.at_css("div#first").children.first
|
465
|
+
# div_node = doc.at_css("div#second")
|
466
|
+
#
|
467
|
+
# value = "You & Me"
|
468
|
+
#
|
469
|
+
# text_node.content = value
|
470
|
+
# div_node.content = value
|
471
|
+
#
|
472
|
+
# doc.css("div").to_html
|
473
|
+
# # => "<div id=\"first\">You &amp; Me</div>
|
474
|
+
# # <div id=\"second\">You &amp; Me</div>"
|
475
|
+
#
|
476
|
+
# For content that is already entity-escaped, use CGI::unescapeHTML to decode it:
|
477
|
+
#
|
478
|
+
# text_node.content = CGI::unescapeHTML(value)
|
479
|
+
# div_node.content = CGI::unescapeHTML(value)
|
480
|
+
#
|
481
|
+
# doc.css("div").to_html
|
482
|
+
# # => "<div id=\"first\">You & Me</div>
|
483
|
+
# # <div id=\"second\">You & Me</div>"
|
484
|
+
#
|
485
|
+
# See also: #native_content=
|
486
|
+
#
|
411
487
|
def content=(string)
|
412
488
|
self.native_content = encode_special_chars(string.to_s)
|
413
489
|
end
|
@@ -474,7 +550,6 @@ module Nokogiri
|
|
474
550
|
alias_method :to_str, :content
|
475
551
|
alias_method :name, :node_name
|
476
552
|
alias_method :type, :node_type
|
477
|
-
alias_method :clone, :dup
|
478
553
|
alias_method :elements, :element_children
|
479
554
|
|
480
555
|
# :section: Working With Node Attributes
|
@@ -1049,31 +1124,40 @@ module Nokogiri
|
|
1049
1124
|
|
1050
1125
|
return Nokogiri::XML::NodeSet.new(document) if contents.empty?
|
1051
1126
|
|
1052
|
-
# libxml2 does not obey the +recover+ option after encountering errors during +in_context+
|
1053
|
-
# parsing, and so this horrible hack is here to try to emulate recovery behavior.
|
1054
|
-
#
|
1055
|
-
# Unfortunately, this means we're no longer parsing "in context" and so namespaces that
|
1056
|
-
# would have been inherited from the context node won't be handled correctly. This hack was
|
1057
|
-
# written in 2010, and I regret it, because it's silently degrading functionality in a way
|
1058
|
-
# that's not easily prevented (or even detected).
|
1059
|
-
#
|
1060
|
-
# I think preferable behavior would be to either:
|
1061
|
-
#
|
1062
|
-
# a. add an error noting that we "fell back" and pointing the user to turning off the +recover+ option
|
1063
|
-
# b. don't recover, but raise a sensible exception
|
1064
|
-
#
|
1065
|
-
# For context and background: https://github.com/sparklemotion/nokogiri/issues/313
|
1066
|
-
# FIXME bug report: https://github.com/sparklemotion/nokogiri/issues/2092
|
1067
1127
|
error_count = document.errors.length
|
1068
1128
|
node_set = in_context(contents, options.to_i)
|
1069
|
-
|
1070
|
-
|
1129
|
+
|
1130
|
+
if document.errors.length > error_count
|
1131
|
+
raise document.errors[error_count] unless options.recover?
|
1132
|
+
|
1133
|
+
# TODO: remove this block when libxml2 < 2.13 is no longer supported
|
1134
|
+
if node_set.empty?
|
1135
|
+
# libxml2 < 2.13 does not obey the +recover+ option after encountering errors during
|
1136
|
+
# +in_context+ parsing, and so this horrible hack is here to try to emulate recovery
|
1137
|
+
# behavior.
|
1138
|
+
#
|
1139
|
+
# (Note that HTML4 fragment parsing seems to have been fixed in abd74186, and XML
|
1140
|
+
# fragment parsing is fixed in 1c106edf. Both are in 2.13.)
|
1141
|
+
#
|
1142
|
+
# Unfortunately, this means we're no longer parsing "in context" and so namespaces that
|
1143
|
+
# would have been inherited from the context node won't be handled correctly. This hack
|
1144
|
+
# was written in 2010, and I regret it, because it's silently degrading functionality in
|
1145
|
+
# a way that's not easily prevented (or even detected).
|
1146
|
+
#
|
1147
|
+
# I think preferable behavior would be to either:
|
1148
|
+
#
|
1149
|
+
# a. add an error noting that we "fell back" and pointing the user to turning off the
|
1150
|
+
# +recover+ option
|
1151
|
+
# b. don't recover, but raise a sensible exception
|
1152
|
+
#
|
1153
|
+
# For context and background:
|
1154
|
+
# - https://github.com/sparklemotion/nokogiri/issues/313
|
1155
|
+
# - https://github.com/sparklemotion/nokogiri/issues/2092
|
1071
1156
|
fragment = document.related_class("DocumentFragment").parse(contents)
|
1072
1157
|
node_set = fragment.children
|
1073
|
-
else
|
1074
|
-
raise document.errors[error_count]
|
1075
1158
|
end
|
1076
1159
|
end
|
1160
|
+
|
1077
1161
|
node_set
|
1078
1162
|
end
|
1079
1163
|
|
@@ -1165,7 +1249,7 @@ module Nokogiri
|
|
1165
1249
|
# Fetch the Nokogiri::HTML4::ElementDescription for this node. Returns
|
1166
1250
|
# nil on XML documents and on unknown tags.
|
1167
1251
|
def description
|
1168
|
-
return
|
1252
|
+
return if document.xml?
|
1169
1253
|
|
1170
1254
|
Nokogiri::HTML4::ElementDescription[name]
|
1171
1255
|
end
|
@@ -1254,8 +1338,8 @@ module Nokogiri
|
|
1254
1338
|
# Compare two Node objects with respect to their Document. Nodes from
|
1255
1339
|
# different documents cannot be compared.
|
1256
1340
|
def <=>(other)
|
1257
|
-
return
|
1258
|
-
return
|
1341
|
+
return unless other.is_a?(Nokogiri::XML::Node)
|
1342
|
+
return unless document == other.document
|
1259
1343
|
|
1260
1344
|
compare(other)
|
1261
1345
|
end
|
@@ -1278,6 +1362,7 @@ module Nokogiri
|
|
1278
1362
|
# end
|
1279
1363
|
#
|
1280
1364
|
def serialize(*args, &block)
|
1365
|
+
# TODO: deprecate non-hash options, see 46c68ed 2009-06-20 for context
|
1281
1366
|
options = if args.first.is_a?(Hash)
|
1282
1367
|
args.shift
|
1283
1368
|
else
|
@@ -1429,8 +1514,6 @@ module Nokogiri
|
|
1429
1514
|
# - +content+ → (String) The contents of all the text nodes in this node's subtree. See #content.
|
1430
1515
|
# - +inner_html+ → (String) The inner markup for the children of this node. See #inner_html.
|
1431
1516
|
#
|
1432
|
-
# ⚡ This is an experimental feature, available since v1.14.0
|
1433
|
-
#
|
1434
1517
|
# *Example*
|
1435
1518
|
#
|
1436
1519
|
# doc = Nokogiri::XML.parse(<<~XML)
|
@@ -1465,6 +1548,8 @@ module Nokogiri
|
|
1465
1548
|
# # value = "def"
|
1466
1549
|
# # })]}
|
1467
1550
|
#
|
1551
|
+
# Since v1.14.0
|
1552
|
+
#
|
1468
1553
|
def deconstruct_keys(keys)
|
1469
1554
|
requested_keys = DECONSTRUCT_KEYS & keys
|
1470
1555
|
{}.tap do |values|
|
@@ -1535,19 +1620,12 @@ module Nokogiri
|
|
1535
1620
|
node_or_tags
|
1536
1621
|
end
|
1537
1622
|
|
1538
|
-
USING_LIBXML_WITH_BROKEN_SERIALIZATION = Nokogiri.uses_libxml?("~> 2.6.0").freeze
|
1539
|
-
private_constant :USING_LIBXML_WITH_BROKEN_SERIALIZATION
|
1540
|
-
|
1541
1623
|
def to_format(save_option, options)
|
1542
|
-
return dump_html if USING_LIBXML_WITH_BROKEN_SERIALIZATION
|
1543
|
-
|
1544
1624
|
options[:save_with] = save_option unless options[:save_with]
|
1545
1625
|
serialize(options)
|
1546
1626
|
end
|
1547
1627
|
|
1548
1628
|
def write_format_to(save_option, io, options)
|
1549
|
-
return (io << dump_html) if USING_LIBXML_WITH_BROKEN_SERIALIZATION
|
1550
|
-
|
1551
1629
|
options[:save_with] ||= save_option
|
1552
1630
|
write_to(io, options)
|
1553
1631
|
end
|
@@ -4,9 +4,13 @@
|
|
4
4
|
module Nokogiri
|
5
5
|
module XML
|
6
6
|
####
|
7
|
-
# A NodeSet contains a list of Nokogiri::XML::Node objects.
|
8
|
-
#
|
9
|
-
#
|
7
|
+
# A NodeSet is an Enumerable that contains a list of Nokogiri::XML::Node objects.
|
8
|
+
#
|
9
|
+
# Typically a NodeSet is returned as a result of searching a Document via
|
10
|
+
# Nokogiri::XML::Searchable#css or Nokogiri::XML::Searchable#xpath.
|
11
|
+
#
|
12
|
+
# Note that the `#dup` and `#clone` methods perform shallow copies; these methods do not copy
|
13
|
+
# the Nodes contained in the NodeSet (similar to how Array and other Enumerable classes work).
|
10
14
|
class NodeSet
|
11
15
|
include Nokogiri::XML::Searchable
|
12
16
|
include Enumerable
|
@@ -14,8 +18,6 @@ module Nokogiri
|
|
14
18
|
# The Document this NodeSet is associated with
|
15
19
|
attr_accessor :document
|
16
20
|
|
17
|
-
alias_method :clone, :dup
|
18
|
-
|
19
21
|
# Create a NodeSet with +document+ defaulting to +list+
|
20
22
|
def initialize(document, list = [])
|
21
23
|
@document = document
|
@@ -121,7 +123,7 @@ module Nokogiri
|
|
121
123
|
return self[args.first]
|
122
124
|
end
|
123
125
|
|
124
|
-
super
|
126
|
+
super
|
125
127
|
end
|
126
128
|
alias_method :%, :at
|
127
129
|
|
@@ -372,7 +374,7 @@ module Nokogiri
|
|
372
374
|
# Removes the last element from set and returns it, or +nil+ if
|
373
375
|
# the set is empty
|
374
376
|
def pop
|
375
|
-
return
|
377
|
+
return if length == 0
|
376
378
|
|
377
379
|
delete(last)
|
378
380
|
end
|
@@ -381,7 +383,7 @@ module Nokogiri
|
|
381
383
|
# Returns the first element of the NodeSet and removes it. Returns
|
382
384
|
# +nil+ if the set is empty.
|
383
385
|
def shift
|
384
|
-
return
|
386
|
+
return if length == 0
|
385
387
|
|
386
388
|
delete(first)
|
387
389
|
end
|
@@ -423,7 +425,7 @@ module Nokogiri
|
|
423
425
|
end
|
424
426
|
|
425
427
|
###
|
426
|
-
# Return a nicely
|
428
|
+
# Return a nicely formatted string representation
|
427
429
|
def inspect
|
428
430
|
"[#{map(&:inspect).join(", ")}]"
|
429
431
|
end
|
@@ -435,7 +437,7 @@ module Nokogiri
|
|
435
437
|
#
|
436
438
|
# Returns the members of this NodeSet as an array, to use in pattern matching.
|
437
439
|
#
|
438
|
-
#
|
440
|
+
# Since v1.14.0
|
439
441
|
#
|
440
442
|
def deconstruct
|
441
443
|
to_a
|
@@ -140,7 +140,7 @@ module Nokogiri
|
|
140
140
|
|
141
141
|
# Relax any hardcoded limit from the parser. Off by default.
|
142
142
|
#
|
143
|
-
# ⚠
|
143
|
+
# ⚠ <b>It is UNSAFE to set this option</b> when parsing untrusted documents.
|
144
144
|
HUGE = 1 << 19
|
145
145
|
|
146
146
|
# Support line numbers up to <code>long int</code> (default is a <code>short int</code>). On
|