nokogiri 1.16.8-x64-mingw-ucrt → 1.17.0-x64-mingw-ucrt
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +11 -21
- data/README.md +4 -0
- data/dependencies.yml +6 -6
- data/ext/nokogiri/extconf.rb +191 -137
- data/ext/nokogiri/gumbo.c +69 -53
- data/ext/nokogiri/html4_document.c +10 -4
- data/ext/nokogiri/html4_element_description.c +18 -18
- data/ext/nokogiri/html4_sax_parser.c +40 -0
- data/ext/nokogiri/html4_sax_parser_context.c +48 -58
- data/ext/nokogiri/html4_sax_push_parser.c +25 -24
- data/ext/nokogiri/include/libexslt/exsltconfig.h +3 -3
- data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +12 -19
- data/ext/nokogiri/include/libxml2/libxml/c14n.h +1 -12
- data/ext/nokogiri/include/libxml2/libxml/debugXML.h +1 -1
- data/ext/nokogiri/include/libxml2/libxml/encoding.h +9 -0
- data/ext/nokogiri/include/libxml2/libxml/entities.h +12 -1
- data/ext/nokogiri/include/libxml2/libxml/hash.h +19 -0
- data/ext/nokogiri/include/libxml2/libxml/list.h +2 -2
- data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +17 -0
- data/ext/nokogiri/include/libxml2/libxml/parser.h +60 -54
- data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +9 -1
- data/ext/nokogiri/include/libxml2/libxml/pattern.h +6 -0
- data/ext/nokogiri/include/libxml2/libxml/tree.h +32 -12
- data/ext/nokogiri/include/libxml2/libxml/uri.h +11 -0
- data/ext/nokogiri/include/libxml2/libxml/valid.h +29 -2
- data/ext/nokogiri/include/libxml2/libxml/xinclude.h +7 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +21 -4
- data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +14 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +111 -15
- data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +8 -45
- data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +2 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +5 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +165 -1
- data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +7 -171
- data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +1 -0
- data/ext/nokogiri/include/libxml2/libxml/xpath.h +4 -0
- data/ext/nokogiri/include/libxslt/xsltInternals.h +3 -0
- data/ext/nokogiri/include/libxslt/xsltconfig.h +4 -37
- data/ext/nokogiri/libxml2_polyfill.c +114 -0
- data/ext/nokogiri/nokogiri.c +9 -2
- data/ext/nokogiri/nokogiri.h +18 -33
- data/ext/nokogiri/xml_attr.c +1 -1
- data/ext/nokogiri/xml_cdata.c +2 -10
- data/ext/nokogiri/xml_comment.c +3 -8
- data/ext/nokogiri/xml_document.c +163 -156
- data/ext/nokogiri/xml_document_fragment.c +10 -25
- data/ext/nokogiri/xml_dtd.c +1 -1
- data/ext/nokogiri/xml_element_content.c +9 -9
- data/ext/nokogiri/xml_encoding_handler.c +4 -4
- data/ext/nokogiri/xml_namespace.c +6 -6
- data/ext/nokogiri/xml_node.c +130 -104
- data/ext/nokogiri/xml_node_set.c +46 -44
- data/ext/nokogiri/xml_reader.c +54 -58
- data/ext/nokogiri/xml_relax_ng.c +35 -56
- data/ext/nokogiri/xml_sax_parser.c +156 -88
- data/ext/nokogiri/xml_sax_parser_context.c +213 -131
- data/ext/nokogiri/xml_sax_push_parser.c +68 -49
- data/ext/nokogiri/xml_schema.c +50 -85
- data/ext/nokogiri/xml_syntax_error.c +19 -11
- data/ext/nokogiri/xml_text.c +2 -4
- data/ext/nokogiri/xml_xpath_context.c +2 -2
- data/ext/nokogiri/xslt_stylesheet.c +8 -8
- data/lib/nokogiri/3.1/nokogiri.so +0 -0
- data/lib/nokogiri/3.2/nokogiri.so +0 -0
- data/lib/nokogiri/3.3/nokogiri.so +0 -0
- data/lib/nokogiri/class_resolver.rb +1 -1
- data/lib/nokogiri/css/node.rb +6 -2
- data/lib/nokogiri/css/parser.rb +6 -4
- data/lib/nokogiri/css/parser.y +2 -2
- data/lib/nokogiri/css/parser_extras.rb +6 -66
- data/lib/nokogiri/css/selector_cache.rb +38 -0
- data/lib/nokogiri/css/tokenizer.rb +4 -4
- data/lib/nokogiri/css/tokenizer.rex +9 -8
- data/lib/nokogiri/css/xpath_visitor.rb +42 -6
- data/lib/nokogiri/css.rb +86 -20
- data/lib/nokogiri/decorators/slop.rb +3 -5
- data/lib/nokogiri/encoding_handler.rb +2 -2
- data/lib/nokogiri/html4/document.rb +44 -23
- data/lib/nokogiri/html4/document_fragment.rb +124 -12
- data/lib/nokogiri/html4/encoding_reader.rb +1 -1
- data/lib/nokogiri/html4/sax/parser.rb +23 -38
- data/lib/nokogiri/html4/sax/parser_context.rb +4 -9
- data/lib/nokogiri/html4.rb +9 -14
- data/lib/nokogiri/html5/builder.rb +40 -0
- data/lib/nokogiri/html5/document.rb +61 -30
- data/lib/nokogiri/html5/document_fragment.rb +130 -20
- data/lib/nokogiri/html5/node.rb +4 -4
- data/lib/nokogiri/html5.rb +114 -72
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/xml/builder.rb +8 -1
- data/lib/nokogiri/xml/document.rb +70 -26
- data/lib/nokogiri/xml/document_fragment.rb +84 -13
- data/lib/nokogiri/xml/node.rb +82 -11
- data/lib/nokogiri/xml/node_set.rb +9 -7
- data/lib/nokogiri/xml/parse_options.rb +1 -1
- data/lib/nokogiri/xml/pp/node.rb +6 -1
- data/lib/nokogiri/xml/reader.rb +46 -13
- data/lib/nokogiri/xml/relax_ng.rb +57 -20
- data/lib/nokogiri/xml/sax/document.rb +174 -83
- data/lib/nokogiri/xml/sax/parser.rb +115 -41
- data/lib/nokogiri/xml/sax/parser_context.rb +116 -8
- data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
- data/lib/nokogiri/xml/sax.rb +48 -0
- data/lib/nokogiri/xml/schema.rb +112 -45
- data/lib/nokogiri/xml/searchable.rb +6 -8
- data/lib/nokogiri/xml/syntax_error.rb +22 -0
- data/lib/nokogiri/xml.rb +13 -24
- data/lib/nokogiri/xslt.rb +3 -9
- data/lib/xsd/xmlparser/nokogiri.rb +3 -4
- metadata +8 -4
- data/ext/nokogiri/libxml2_backwards_compat.c +0 -121
@@ -5,12 +5,12 @@ require "pathname"
|
|
5
5
|
|
6
6
|
module Nokogiri
|
7
7
|
module XML
|
8
|
-
# Nokogiri::XML::Document is the main entry point for dealing with XML documents.
|
9
|
-
# is created by parsing
|
10
|
-
# on parsing.
|
8
|
+
# Nokogiri::XML::Document is the main entry point for dealing with \XML documents. The Document
|
9
|
+
# is created by parsing \XML content from a String or an IO object. See
|
10
|
+
# Nokogiri::XML::Document.parse for more information on parsing.
|
11
11
|
#
|
12
|
-
#
|
13
|
-
#
|
12
|
+
# Document inherits a great deal of functionality from its superclass Nokogiri::XML::Node, so
|
13
|
+
# please read that class's documentation as well.
|
14
14
|
class Document < Nokogiri::XML::Node
|
15
15
|
# See http://www.w3.org/TR/REC-xml-names/#ns-decl for more details. Note that we're not
|
16
16
|
# attempting to handle unicode characters partly because libxml2 doesn't handle unicode
|
@@ -19,33 +19,45 @@ module Nokogiri
|
|
19
19
|
NCNAME_CHAR = NCNAME_START_CHAR + "\\-\\.0-9"
|
20
20
|
NCNAME_RE = /^xmlns(?::([#{NCNAME_START_CHAR}][#{NCNAME_CHAR}]*))?$/
|
21
21
|
|
22
|
+
OBJECT_DUP_METHOD = Object.instance_method(:dup)
|
23
|
+
OBJECT_CLONE_METHOD = Object.instance_method(:clone)
|
24
|
+
private_constant :OBJECT_DUP_METHOD, :OBJECT_CLONE_METHOD
|
25
|
+
|
22
26
|
class << self
|
23
|
-
#
|
27
|
+
# call-seq:
|
28
|
+
# parse(input) { |options| ... } => Nokogiri::XML::Document
|
29
|
+
# parse(input, url:, encoding:, options:) => Nokogiri::XML::Document
|
24
30
|
#
|
25
|
-
#
|
26
|
-
# _read_ and _close_ such as an IO, or StringIO.
|
31
|
+
# Parse \XML input from a String or IO object, and return a new XML::Document.
|
27
32
|
#
|
28
|
-
#
|
33
|
+
# 🛡 By default, Nokogiri treats documents as untrusted, and so does not attempt to load DTDs
|
34
|
+
# or access the network. See Nokogiri::XML::ParseOptions for a complete list of options; and
|
35
|
+
# that module's DEFAULT_XML constant for what's set (and not set) by default.
|
29
36
|
#
|
30
|
-
#
|
31
|
-
#
|
37
|
+
# [Required Parameters]
|
38
|
+
# - +input+ (String | IO) The content to be parsed.
|
32
39
|
#
|
33
|
-
#
|
34
|
-
#
|
35
|
-
# Nokogiri::XML::ParseOptions for more information.
|
40
|
+
# [Optional Keyword Arguments]
|
41
|
+
# - +url:+ (String) The base URI for this document.
|
36
42
|
#
|
37
|
-
# +
|
38
|
-
#
|
43
|
+
# - +encoding:+ (String) The name of the encoding that should be used when processing the
|
44
|
+
# document. When not provided, the encoding will be determined based on the document
|
45
|
+
# content.
|
39
46
|
#
|
40
|
-
#
|
41
|
-
#
|
42
|
-
#
|
43
|
-
# and that module's DEFAULT_XML constant for what's set (and not
|
44
|
-
# set) by default.
|
47
|
+
# - +options:+ (Nokogiri::XML::ParseOptions) Configuration object that determines some
|
48
|
+
# behaviors during parsing. See ParseOptions for more information. The default value is
|
49
|
+
# +ParseOptions::DEFAULT_XML+.
|
45
50
|
#
|
46
|
-
#
|
51
|
+
# [Yields]
|
52
|
+
# If a block is given, a Nokogiri::XML::ParseOptions object is yielded to the block which
|
53
|
+
# can be configured before parsing. See Nokogiri::XML::ParseOptions for more information.
|
47
54
|
#
|
48
|
-
|
55
|
+
# [Returns] Nokogiri::XML::Document
|
56
|
+
def parse(
|
57
|
+
string_or_io,
|
58
|
+
url_ = nil, encoding_ = nil, options_ = XML::ParseOptions::DEFAULT_XML,
|
59
|
+
url: url_, encoding: encoding_, options: options_
|
60
|
+
)
|
49
61
|
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
50
62
|
yield options if block_given?
|
51
63
|
|
@@ -60,6 +72,7 @@ module Nokogiri
|
|
60
72
|
end
|
61
73
|
|
62
74
|
doc = if string_or_io.respond_to?(:read)
|
75
|
+
# TODO: should we instead check for respond_to?(:to_path) ?
|
63
76
|
if string_or_io.is_a?(Pathname)
|
64
77
|
# resolve the Pathname to the file and open it as an IO object, see #2110
|
65
78
|
string_or_io = string_or_io.expand_path.open
|
@@ -180,6 +193,38 @@ module Nokogiri
|
|
180
193
|
@namespace_inheritance = false
|
181
194
|
end
|
182
195
|
|
196
|
+
#
|
197
|
+
# :call-seq:
|
198
|
+
# dup → Nokogiri::XML::Document
|
199
|
+
# dup(level) → Nokogiri::XML::Document
|
200
|
+
#
|
201
|
+
# Duplicate this node.
|
202
|
+
#
|
203
|
+
# [Parameters]
|
204
|
+
# - +level+ (optional Integer). 0 is a shallow copy, 1 (the default) is a deep copy.
|
205
|
+
# [Returns] The new Nokogiri::XML::Document
|
206
|
+
#
|
207
|
+
def dup(level = 1)
|
208
|
+
copy = OBJECT_DUP_METHOD.bind_call(self)
|
209
|
+
copy.initialize_copy_with_args(self, level)
|
210
|
+
end
|
211
|
+
|
212
|
+
#
|
213
|
+
# :call-seq:
|
214
|
+
# clone → Nokogiri::XML::Document
|
215
|
+
# clone(level) → Nokogiri::XML::Document
|
216
|
+
#
|
217
|
+
# Clone this node.
|
218
|
+
#
|
219
|
+
# [Parameters]
|
220
|
+
# - +level+ (optional Integer). 0 is a shallow copy, 1 (the default) is a deep copy.
|
221
|
+
# [Returns] The new Nokogiri::XML::Document
|
222
|
+
#
|
223
|
+
def clone(level = 1)
|
224
|
+
copy = OBJECT_CLONE_METHOD.bind_call(self)
|
225
|
+
copy.initialize_copy_with_args(self, level)
|
226
|
+
end
|
227
|
+
|
183
228
|
# :call-seq:
|
184
229
|
# create_element(name, *contents_or_attrs, &block) → Nokogiri::XML::Element
|
185
230
|
#
|
@@ -326,7 +371,7 @@ module Nokogiri
|
|
326
371
|
end
|
327
372
|
|
328
373
|
##
|
329
|
-
# Validate this Document against
|
374
|
+
# Validate this Document against its DTD. Returns a list of errors on
|
330
375
|
# the document or +nil+ when there is no DTD.
|
331
376
|
def validate
|
332
377
|
return unless internal_subset
|
@@ -367,12 +412,11 @@ module Nokogiri
|
|
367
412
|
@decorators.each do |klass, list|
|
368
413
|
next unless node.is_a?(klass)
|
369
414
|
|
370
|
-
list.each { |
|
415
|
+
list.each { |mod| node.extend(mod) }
|
371
416
|
end
|
372
417
|
end
|
373
418
|
|
374
419
|
alias_method :to_xml, :serialize
|
375
|
-
alias_method :clone, :dup
|
376
420
|
|
377
421
|
# Get the hash of namespaces on the root Nokogiri::XML::Node
|
378
422
|
def namespaces
|
@@ -3,32 +3,103 @@
|
|
3
3
|
|
4
4
|
module Nokogiri
|
5
5
|
module XML
|
6
|
+
# DocumentFragment represents a fragment of an \XML document. It provides the same functionality
|
7
|
+
# exposed by XML::Node and can be used to contain one or more \XML subtrees.
|
6
8
|
class DocumentFragment < Nokogiri::XML::Node
|
7
|
-
|
8
|
-
#
|
9
|
-
|
10
|
-
|
9
|
+
# The options used to parse the document fragment. Returns the value of any options that were
|
10
|
+
# passed into the constructor as a parameter or set in a config block, else the default
|
11
|
+
# options for the specific subclass.
|
12
|
+
attr_reader :parse_options
|
13
|
+
|
14
|
+
class << self
|
15
|
+
# :call-seq:
|
16
|
+
# parse(input) { |options| ... } → XML::DocumentFragment
|
17
|
+
# parse(input, options:) → XML::DocumentFragment
|
18
|
+
#
|
19
|
+
# Parse \XML fragment input from a String, and return a new XML::DocumentFragment. This
|
20
|
+
# method creates a new, empty XML::Document to contain the fragment.
|
21
|
+
#
|
22
|
+
# [Required Parameters]
|
23
|
+
# - +input+ (String) The content to be parsed.
|
24
|
+
#
|
25
|
+
# [Optional Keyword Arguments]
|
26
|
+
# - +options+ (Nokogiri::XML::ParseOptions) Configuration object that determines some
|
27
|
+
# behaviors during parsing. See ParseOptions for more information. The default value is
|
28
|
+
# +ParseOptions::DEFAULT_XML+.
|
29
|
+
#
|
30
|
+
# [Yields]
|
31
|
+
# If a block is given, a Nokogiri::XML::ParseOptions object is yielded to the block which
|
32
|
+
# can be configured before parsing. See Nokogiri::XML::ParseOptions for more information.
|
33
|
+
#
|
34
|
+
# [Returns] Nokogiri::XML::DocumentFragment
|
35
|
+
def parse(tags, options_ = ParseOptions::DEFAULT_XML, options: options_, &block)
|
36
|
+
new(XML::Document.new, tags, options: options, &block)
|
37
|
+
end
|
38
|
+
|
39
|
+
# Wrapper method to separate the concerns of:
|
40
|
+
# - the native object allocator's parameter (it only requires `document`)
|
41
|
+
# - the initializer's parameters
|
42
|
+
def new(document, ...) # :nodoc:
|
43
|
+
instance = native_new(document)
|
44
|
+
instance.send(:initialize, document, ...)
|
45
|
+
instance
|
46
|
+
end
|
11
47
|
end
|
12
48
|
|
13
|
-
|
14
|
-
#
|
49
|
+
# :call-seq:
|
50
|
+
# new(document, input=nil) { |options| ... } → DocumentFragment
|
51
|
+
# new(document, input=nil, context:, options:) → DocumentFragment
|
52
|
+
#
|
53
|
+
# Parse \XML fragment input from a String, and return a new DocumentFragment that is
|
54
|
+
# associated with the given +document+.
|
55
|
+
#
|
56
|
+
# 💡 It's recommended to use either XML::DocumentFragment.parse or Node#parse rather than call
|
57
|
+
# this method directly.
|
58
|
+
#
|
59
|
+
# [Required Parameters]
|
60
|
+
# - +document+ (XML::Document) The parent document to associate the returned fragment with.
|
61
|
+
#
|
62
|
+
# [Optional Parameters]
|
63
|
+
# - +input+ (String) The content to be parsed.
|
64
|
+
#
|
65
|
+
# [Optional Keyword Arguments]
|
66
|
+
# - +context:+ (Nokogiri::XML::Node) The <b>context node</b> for the subtree created. See
|
67
|
+
# below for more information.
|
68
|
+
#
|
69
|
+
# - +options:+ (Nokogiri::XML::ParseOptions) Configuration object that determines some
|
70
|
+
# behaviors during parsing. See ParseOptions for more information. The default value is
|
71
|
+
# +ParseOptions::DEFAULT_XML+.
|
72
|
+
#
|
73
|
+
# [Yields]
|
74
|
+
# If a block is given, a Nokogiri::XML::ParseOptions object is yielded to the block which
|
75
|
+
# can be configured before parsing. See ParseOptions for more information.
|
76
|
+
#
|
77
|
+
# [Returns] XML::DocumentFragment
|
78
|
+
#
|
79
|
+
# === Context \Node
|
80
|
+
#
|
81
|
+
# If a context node is specified using +context:+, then the fragment will be created by
|
82
|
+
# calling Node#parse on that node, so the parser will behave as if that Node is the parent of
|
83
|
+
# the fragment subtree, and will resolve namespaces relative to that node.
|
15
84
|
#
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
85
|
+
def initialize(
|
86
|
+
document, tags = nil,
|
87
|
+
context_ = nil, options_ = ParseOptions::DEFAULT_XML,
|
88
|
+
context: context_, options: options_
|
89
|
+
) # rubocop:disable Lint/MissingSuper
|
20
90
|
return self unless tags
|
21
91
|
|
22
92
|
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
93
|
+
@parse_options = options
|
23
94
|
yield options if block_given?
|
24
95
|
|
25
|
-
children = if
|
96
|
+
children = if context
|
26
97
|
# Fix for issue#490
|
27
98
|
if Nokogiri.jruby?
|
28
99
|
# fix for issue #770
|
29
|
-
|
100
|
+
context.parse("<root #{namespace_declarations(context)}>#{tags}</root>", options).children
|
30
101
|
else
|
31
|
-
|
102
|
+
context.parse(tags, options)
|
32
103
|
end
|
33
104
|
else
|
34
105
|
wrapper_doc = XML::Document.parse("<root>#{tags}</root>", nil, nil, options)
|
data/lib/nokogiri/xml/node.rb
CHANGED
@@ -127,6 +127,42 @@ module Nokogiri
|
|
127
127
|
# This is intentionally empty, and sets the method signature for subclasses.
|
128
128
|
end
|
129
129
|
|
130
|
+
#
|
131
|
+
# :call-seq:
|
132
|
+
# dup → Nokogiri::XML::Node
|
133
|
+
# dup(level) → Nokogiri::XML::Node
|
134
|
+
# dup(level, new_parent_doc) → Nokogiri::XML::Node
|
135
|
+
#
|
136
|
+
# Duplicate this node.
|
137
|
+
#
|
138
|
+
# [Parameters]
|
139
|
+
# - +level+ (optional Integer). 0 is a shallow copy, 1 (the default) is a deep copy.
|
140
|
+
# - +new_parent_doc+ (optional Nokogiri::XML::Document)
|
141
|
+
# The new node's parent Document. Defaults to the the Document of the current node.
|
142
|
+
# [Returns] The new Nokogiri::XML::Node
|
143
|
+
#
|
144
|
+
def dup(level = 1, new_parent_doc = document)
|
145
|
+
super().initialize_copy_with_args(self, level, new_parent_doc)
|
146
|
+
end
|
147
|
+
|
148
|
+
#
|
149
|
+
# :call-seq:
|
150
|
+
# clone → Nokogiri::XML::Node
|
151
|
+
# clone(level) → Nokogiri::XML::Node
|
152
|
+
# clone(level, new_parent_doc) → Nokogiri::XML::Node
|
153
|
+
#
|
154
|
+
# Clone this node.
|
155
|
+
#
|
156
|
+
# [Parameters]
|
157
|
+
# - +level+ (optional Integer). 0 is a shallow copy, 1 (the default) is a deep copy.
|
158
|
+
# - +new_parent_doc+
|
159
|
+
# The new node's parent Document. Defaults to the the Document of the current node.
|
160
|
+
# [Returns] The new Nokogiri::XML::Node
|
161
|
+
#
|
162
|
+
def clone(level = 1, new_parent_doc = document)
|
163
|
+
super().initialize_copy_with_args(self, level, new_parent_doc)
|
164
|
+
end
|
165
|
+
|
130
166
|
###
|
131
167
|
# Decorate this node with the decorators set up in this node's Document
|
132
168
|
def decorate!
|
@@ -228,7 +264,7 @@ module Nokogiri
|
|
228
264
|
if new_parent.nil?
|
229
265
|
raise "Failed to parse '#{node_or_tags}' in the context of a '#{context_node.name}' element"
|
230
266
|
end
|
231
|
-
when
|
267
|
+
when Node
|
232
268
|
new_parent = node_or_tags.dup
|
233
269
|
else
|
234
270
|
raise ArgumentError, "Requires a String or Node argument, and cannot accept a #{node_or_tags.class}"
|
@@ -406,8 +442,48 @@ module Nokogiri
|
|
406
442
|
end
|
407
443
|
|
408
444
|
####
|
409
|
-
#
|
410
|
-
#
|
445
|
+
# call-seq:
|
446
|
+
# content=(input)
|
447
|
+
#
|
448
|
+
# Set the content of this node to +input+.
|
449
|
+
#
|
450
|
+
# [Parameters]
|
451
|
+
# - +input+ (String) The new content for this node. Input is considered to be raw content, and
|
452
|
+
# so will be entity-escaped in the final DOM string.
|
453
|
+
#
|
454
|
+
# [Example]
|
455
|
+
# Note how entities are handled:
|
456
|
+
#
|
457
|
+
# doc = Nokogiri::HTML::Document.parse(<<~HTML)
|
458
|
+
# <html>
|
459
|
+
# <body>
|
460
|
+
# <div id="first">asdf</div>
|
461
|
+
# <div id="second">asdf</div>
|
462
|
+
# HTML
|
463
|
+
#
|
464
|
+
# text_node = doc.at_css("div#first").children.first
|
465
|
+
# div_node = doc.at_css("div#second")
|
466
|
+
#
|
467
|
+
# value = "You & Me"
|
468
|
+
#
|
469
|
+
# text_node.content = value
|
470
|
+
# div_node.content = value
|
471
|
+
#
|
472
|
+
# doc.css("div").to_html
|
473
|
+
# # => "<div id=\"first\">You &amp; Me</div>
|
474
|
+
# # <div id=\"second\">You &amp; Me</div>"
|
475
|
+
#
|
476
|
+
# For content that is already entity-escaped, use CGI::unescapeHTML to decode it:
|
477
|
+
#
|
478
|
+
# text_node.content = CGI::unescapeHTML(value)
|
479
|
+
# div_node.content = CGI::unescapeHTML(value)
|
480
|
+
#
|
481
|
+
# doc.css("div").to_html
|
482
|
+
# # => "<div id=\"first\">You & Me</div>
|
483
|
+
# # <div id=\"second\">You & Me</div>"
|
484
|
+
#
|
485
|
+
# See also: #native_content=
|
486
|
+
#
|
411
487
|
def content=(string)
|
412
488
|
self.native_content = encode_special_chars(string.to_s)
|
413
489
|
end
|
@@ -474,7 +550,6 @@ module Nokogiri
|
|
474
550
|
alias_method :to_str, :content
|
475
551
|
alias_method :name, :node_name
|
476
552
|
alias_method :type, :node_type
|
477
|
-
alias_method :clone, :dup
|
478
553
|
alias_method :elements, :element_children
|
479
554
|
|
480
555
|
# :section: Working With Node Attributes
|
@@ -1051,9 +1126,11 @@ module Nokogiri
|
|
1051
1126
|
|
1052
1127
|
error_count = document.errors.length
|
1053
1128
|
node_set = in_context(contents, options.to_i)
|
1129
|
+
|
1054
1130
|
if document.errors.length > error_count
|
1055
1131
|
raise document.errors[error_count] unless options.recover?
|
1056
1132
|
|
1133
|
+
# TODO: remove this block when libxml2 < 2.13 is no longer supported
|
1057
1134
|
if node_set.empty?
|
1058
1135
|
# libxml2 < 2.13 does not obey the +recover+ option after encountering errors during
|
1059
1136
|
# +in_context+ parsing, and so this horrible hack is here to try to emulate recovery
|
@@ -1080,6 +1157,7 @@ module Nokogiri
|
|
1080
1157
|
node_set = fragment.children
|
1081
1158
|
end
|
1082
1159
|
end
|
1160
|
+
|
1083
1161
|
node_set
|
1084
1162
|
end
|
1085
1163
|
|
@@ -1542,19 +1620,12 @@ module Nokogiri
|
|
1542
1620
|
node_or_tags
|
1543
1621
|
end
|
1544
1622
|
|
1545
|
-
USING_LIBXML_WITH_BROKEN_SERIALIZATION = Nokogiri.uses_libxml?("~> 2.6.0").freeze
|
1546
|
-
private_constant :USING_LIBXML_WITH_BROKEN_SERIALIZATION
|
1547
|
-
|
1548
1623
|
def to_format(save_option, options)
|
1549
|
-
return dump_html if USING_LIBXML_WITH_BROKEN_SERIALIZATION
|
1550
|
-
|
1551
1624
|
options[:save_with] = save_option unless options[:save_with]
|
1552
1625
|
serialize(options)
|
1553
1626
|
end
|
1554
1627
|
|
1555
1628
|
def write_format_to(save_option, io, options)
|
1556
|
-
return (io << dump_html) if USING_LIBXML_WITH_BROKEN_SERIALIZATION
|
1557
|
-
|
1558
1629
|
options[:save_with] ||= save_option
|
1559
1630
|
write_to(io, options)
|
1560
1631
|
end
|
@@ -4,9 +4,13 @@
|
|
4
4
|
module Nokogiri
|
5
5
|
module XML
|
6
6
|
####
|
7
|
-
# A NodeSet contains a list of Nokogiri::XML::Node objects.
|
8
|
-
#
|
9
|
-
#
|
7
|
+
# A NodeSet is an Enumerable that contains a list of Nokogiri::XML::Node objects.
|
8
|
+
#
|
9
|
+
# Typically a NodeSet is returned as a result of searching a Document via
|
10
|
+
# Nokogiri::XML::Searchable#css or Nokogiri::XML::Searchable#xpath.
|
11
|
+
#
|
12
|
+
# Note that the `#dup` and `#clone` methods perform shallow copies; these methods do not copy
|
13
|
+
# the Nodes contained in the NodeSet (similar to how Array and other Enumerable classes work).
|
10
14
|
class NodeSet
|
11
15
|
include Nokogiri::XML::Searchable
|
12
16
|
include Enumerable
|
@@ -14,8 +18,6 @@ module Nokogiri
|
|
14
18
|
# The Document this NodeSet is associated with
|
15
19
|
attr_accessor :document
|
16
20
|
|
17
|
-
alias_method :clone, :dup
|
18
|
-
|
19
21
|
# Create a NodeSet with +document+ defaulting to +list+
|
20
22
|
def initialize(document, list = [])
|
21
23
|
@document = document
|
@@ -121,7 +123,7 @@ module Nokogiri
|
|
121
123
|
return self[args.first]
|
122
124
|
end
|
123
125
|
|
124
|
-
super
|
126
|
+
super
|
125
127
|
end
|
126
128
|
alias_method :%, :at
|
127
129
|
|
@@ -423,7 +425,7 @@ module Nokogiri
|
|
423
425
|
end
|
424
426
|
|
425
427
|
###
|
426
|
-
# Return a nicely
|
428
|
+
# Return a nicely formatted string representation
|
427
429
|
def inspect
|
428
430
|
"[#{map(&:inspect).join(", ")}]"
|
429
431
|
end
|
@@ -140,7 +140,7 @@ module Nokogiri
|
|
140
140
|
|
141
141
|
# Relax any hardcoded limit from the parser. Off by default.
|
142
142
|
#
|
143
|
-
# ⚠
|
143
|
+
# ⚠ <b>It is UNSAFE to set this option</b> when parsing untrusted documents.
|
144
144
|
HUGE = 1 << 19
|
145
145
|
|
146
146
|
# Support line numbers up to <code>long int</code> (default is a <code>short int</code>). On
|
data/lib/nokogiri/xml/pp/node.rb
CHANGED
@@ -8,6 +8,11 @@ module Nokogiri
|
|
8
8
|
COLLECTIONS = [:attribute_nodes, :children]
|
9
9
|
|
10
10
|
def inspect
|
11
|
+
# handle the case where an exception is thrown during object construction
|
12
|
+
if respond_to?(:data_ptr?) && !data_ptr?
|
13
|
+
return "#<#{self.class}:#{format("0x%x", object_id)} (no data)>"
|
14
|
+
end
|
15
|
+
|
11
16
|
attributes = inspect_attributes.reject do |x|
|
12
17
|
attribute = send(x)
|
13
18
|
!attribute || (attribute.respond_to?(:empty?) && attribute.empty?)
|
@@ -21,7 +26,7 @@ module Nokogiri
|
|
21
26
|
"#{attribute}=#{send(attribute).inspect}"
|
22
27
|
end.join(" ")
|
23
28
|
end
|
24
|
-
"#<#{self.class
|
29
|
+
"#<#{self.class}:#{format("0x%x", object_id)} #{attributes}>"
|
25
30
|
end
|
26
31
|
|
27
32
|
def pretty_print(pp)
|
data/lib/nokogiri/xml/reader.rb
CHANGED
@@ -3,32 +3,33 @@
|
|
3
3
|
module Nokogiri
|
4
4
|
module XML
|
5
5
|
###
|
6
|
-
#
|
7
|
-
#
|
6
|
+
# The Reader parser allows you to effectively pull parse an \XML document. Once instantiated,
|
7
|
+
# call Nokogiri::XML::Reader#each to iterate over each node.
|
8
8
|
#
|
9
|
-
#
|
10
|
-
#
|
9
|
+
# Nokogiri::XML::Reader parses an \XML document similar to the way a cursor would move. The
|
10
|
+
# Reader is given an \XML document, and yields nodes to an each block.
|
11
|
+
#
|
12
|
+
# The Reader parser might be good for when you need the speed and low memory usage of a \SAX
|
13
|
+
# parser, but do not want to write a SAX::Document handler.
|
11
14
|
#
|
12
15
|
# Here is an example of usage:
|
13
16
|
#
|
14
|
-
# reader = Nokogiri::XML::Reader
|
17
|
+
# reader = Nokogiri::XML::Reader.new <<~XML
|
15
18
|
# <x xmlns:tenderlove='http://tenderlovemaking.com/'>
|
16
19
|
# <tenderlove:foo awesome='true'>snuggles!</tenderlove:foo>
|
17
20
|
# </x>
|
18
|
-
#
|
21
|
+
# XML
|
19
22
|
#
|
20
23
|
# reader.each do |node|
|
21
|
-
#
|
22
24
|
# # node is an instance of Nokogiri::XML::Reader
|
23
25
|
# puts node.name
|
24
|
-
#
|
25
26
|
# end
|
26
27
|
#
|
27
28
|
# ⚠ Nokogiri::XML::Reader#each can only be called once! Once the cursor moves through the entire
|
28
29
|
# document, you must parse the document again. It may be better to capture all information you
|
29
30
|
# need during a single iteration.
|
30
31
|
#
|
31
|
-
# ⚠ libxml2 does not support error recovery in the Reader parser. The
|
32
|
+
# ⚠ libxml2 does not support error recovery in the Reader parser. The +RECOVER+ ParseOption is
|
32
33
|
# ignored. If a syntax error is encountered during parsing, an exception will be raised.
|
33
34
|
class Reader
|
34
35
|
include Enumerable
|
@@ -66,23 +67,55 @@ module Nokogiri
|
|
66
67
|
TYPE_END_ELEMENT = 15
|
67
68
|
# Entity end node type
|
68
69
|
TYPE_END_ENTITY = 16
|
69
|
-
# XML Declaration node type
|
70
|
+
# \XML Declaration node type
|
70
71
|
TYPE_XML_DECLARATION = 17
|
71
72
|
|
72
73
|
# A list of errors encountered while parsing
|
73
74
|
attr_accessor :errors
|
74
75
|
|
75
|
-
# The XML source
|
76
|
+
# The \XML source
|
76
77
|
attr_reader :source
|
77
78
|
|
78
79
|
alias_method :self_closing?, :empty_element?
|
79
80
|
|
80
|
-
|
81
|
+
# :call-seq:
|
82
|
+
# Reader.new(input) { |options| ... } → Reader
|
83
|
+
# Reader.new(input, url:, encoding:, options:) { |options| ... } → Reader
|
84
|
+
#
|
85
|
+
# Create a new Reader to parse an \XML document.
|
86
|
+
#
|
87
|
+
# [Required Parameters]
|
88
|
+
# - +input+ (String | IO): The \XML document to parse.
|
89
|
+
#
|
90
|
+
# [Optional Parameters]
|
91
|
+
# - +url:+ (String) The base URL of the document.
|
92
|
+
# - +encoding:+ (String) The name of the encoding of the document.
|
93
|
+
# - +options:+ (Integer | ParseOptions) Options to control the parser behavior.
|
94
|
+
# Defaults to +ParseOptions::STRICT+.
|
95
|
+
#
|
96
|
+
# [Yields]
|
97
|
+
# If present, the block will be passed a Nokogiri::XML::ParseOptions object to modify before
|
98
|
+
# the fragment is parsed. See Nokogiri::XML::ParseOptions for more information.
|
99
|
+
def self.new(
|
100
|
+
string_or_io,
|
101
|
+
url_ = nil, encoding_ = nil, options_ = ParseOptions::STRICT,
|
102
|
+
url: url_, encoding: encoding_, options: options_
|
103
|
+
)
|
104
|
+
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
105
|
+
yield options if block_given?
|
106
|
+
|
107
|
+
if string_or_io.respond_to?(:read)
|
108
|
+
return Reader.from_io(string_or_io, url, encoding, options.to_i)
|
109
|
+
end
|
110
|
+
|
111
|
+
Reader.from_memory(string_or_io, url, encoding, options.to_i)
|
112
|
+
end
|
113
|
+
|
114
|
+
private def initialize(source, url = nil, encoding = nil) # :nodoc:
|
81
115
|
@source = source
|
82
116
|
@errors = []
|
83
117
|
@encoding = encoding
|
84
118
|
end
|
85
|
-
private :initialize
|
86
119
|
|
87
120
|
# Get the attributes and namespaces of the current node as a Hash.
|
88
121
|
#
|