nokogiri 1.16.8-x86-mingw32 → 1.17.0-x86-mingw32
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +11 -21
- data/README.md +4 -0
- data/dependencies.yml +6 -6
- data/ext/nokogiri/extconf.rb +191 -137
- data/ext/nokogiri/gumbo.c +69 -53
- data/ext/nokogiri/html4_document.c +10 -4
- data/ext/nokogiri/html4_element_description.c +18 -18
- data/ext/nokogiri/html4_sax_parser.c +40 -0
- data/ext/nokogiri/html4_sax_parser_context.c +48 -58
- data/ext/nokogiri/html4_sax_push_parser.c +25 -24
- data/ext/nokogiri/include/libexslt/exsltconfig.h +3 -3
- data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +12 -19
- data/ext/nokogiri/include/libxml2/libxml/c14n.h +1 -12
- data/ext/nokogiri/include/libxml2/libxml/debugXML.h +1 -1
- data/ext/nokogiri/include/libxml2/libxml/encoding.h +9 -0
- data/ext/nokogiri/include/libxml2/libxml/entities.h +12 -1
- data/ext/nokogiri/include/libxml2/libxml/hash.h +19 -0
- data/ext/nokogiri/include/libxml2/libxml/list.h +2 -2
- data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +17 -0
- data/ext/nokogiri/include/libxml2/libxml/parser.h +60 -54
- data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +9 -1
- data/ext/nokogiri/include/libxml2/libxml/pattern.h +6 -0
- data/ext/nokogiri/include/libxml2/libxml/tree.h +32 -12
- data/ext/nokogiri/include/libxml2/libxml/uri.h +11 -0
- data/ext/nokogiri/include/libxml2/libxml/valid.h +29 -2
- data/ext/nokogiri/include/libxml2/libxml/xinclude.h +7 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +21 -4
- data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +14 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +111 -15
- data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +8 -45
- data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +2 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +5 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +165 -1
- data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +7 -171
- data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +1 -0
- data/ext/nokogiri/include/libxml2/libxml/xpath.h +4 -0
- data/ext/nokogiri/include/libxslt/xsltInternals.h +3 -0
- data/ext/nokogiri/include/libxslt/xsltconfig.h +4 -37
- data/ext/nokogiri/libxml2_polyfill.c +114 -0
- data/ext/nokogiri/nokogiri.c +9 -2
- data/ext/nokogiri/nokogiri.h +18 -33
- data/ext/nokogiri/xml_attr.c +1 -1
- data/ext/nokogiri/xml_cdata.c +2 -10
- data/ext/nokogiri/xml_comment.c +3 -8
- data/ext/nokogiri/xml_document.c +163 -156
- data/ext/nokogiri/xml_document_fragment.c +10 -25
- data/ext/nokogiri/xml_dtd.c +1 -1
- data/ext/nokogiri/xml_element_content.c +9 -9
- data/ext/nokogiri/xml_encoding_handler.c +4 -4
- data/ext/nokogiri/xml_namespace.c +6 -6
- data/ext/nokogiri/xml_node.c +130 -104
- data/ext/nokogiri/xml_node_set.c +46 -44
- data/ext/nokogiri/xml_reader.c +54 -58
- data/ext/nokogiri/xml_relax_ng.c +35 -56
- data/ext/nokogiri/xml_sax_parser.c +156 -88
- data/ext/nokogiri/xml_sax_parser_context.c +213 -131
- data/ext/nokogiri/xml_sax_push_parser.c +68 -49
- data/ext/nokogiri/xml_schema.c +50 -85
- data/ext/nokogiri/xml_syntax_error.c +19 -11
- data/ext/nokogiri/xml_text.c +2 -4
- data/ext/nokogiri/xml_xpath_context.c +2 -2
- data/ext/nokogiri/xslt_stylesheet.c +8 -8
- data/lib/nokogiri/3.0/nokogiri.so +0 -0
- data/lib/nokogiri/3.1/nokogiri.so +0 -0
- data/lib/nokogiri/3.2/nokogiri.so +0 -0
- data/lib/nokogiri/3.3/nokogiri.so +0 -0
- data/lib/nokogiri/class_resolver.rb +1 -1
- data/lib/nokogiri/css/node.rb +6 -2
- data/lib/nokogiri/css/parser.rb +6 -4
- data/lib/nokogiri/css/parser.y +2 -2
- data/lib/nokogiri/css/parser_extras.rb +6 -66
- data/lib/nokogiri/css/selector_cache.rb +38 -0
- data/lib/nokogiri/css/tokenizer.rb +4 -4
- data/lib/nokogiri/css/tokenizer.rex +9 -8
- data/lib/nokogiri/css/xpath_visitor.rb +42 -6
- data/lib/nokogiri/css.rb +86 -20
- data/lib/nokogiri/decorators/slop.rb +3 -5
- data/lib/nokogiri/encoding_handler.rb +2 -2
- data/lib/nokogiri/html4/document.rb +44 -23
- data/lib/nokogiri/html4/document_fragment.rb +124 -12
- data/lib/nokogiri/html4/encoding_reader.rb +1 -1
- data/lib/nokogiri/html4/sax/parser.rb +23 -38
- data/lib/nokogiri/html4/sax/parser_context.rb +4 -9
- data/lib/nokogiri/html4.rb +9 -14
- data/lib/nokogiri/html5/builder.rb +40 -0
- data/lib/nokogiri/html5/document.rb +61 -30
- data/lib/nokogiri/html5/document_fragment.rb +130 -20
- data/lib/nokogiri/html5/node.rb +4 -4
- data/lib/nokogiri/html5.rb +114 -72
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/xml/builder.rb +8 -1
- data/lib/nokogiri/xml/document.rb +70 -26
- data/lib/nokogiri/xml/document_fragment.rb +84 -13
- data/lib/nokogiri/xml/node.rb +82 -11
- data/lib/nokogiri/xml/node_set.rb +9 -7
- data/lib/nokogiri/xml/parse_options.rb +1 -1
- data/lib/nokogiri/xml/pp/node.rb +6 -1
- data/lib/nokogiri/xml/reader.rb +46 -13
- data/lib/nokogiri/xml/relax_ng.rb +57 -20
- data/lib/nokogiri/xml/sax/document.rb +174 -83
- data/lib/nokogiri/xml/sax/parser.rb +115 -41
- data/lib/nokogiri/xml/sax/parser_context.rb +116 -8
- data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
- data/lib/nokogiri/xml/sax.rb +48 -0
- data/lib/nokogiri/xml/schema.rb +112 -45
- data/lib/nokogiri/xml/searchable.rb +6 -8
- data/lib/nokogiri/xml/syntax_error.rb +22 -0
- data/lib/nokogiri/xml.rb +13 -24
- data/lib/nokogiri/xslt.rb +3 -9
- data/lib/xsd/xmlparser/nokogiri.rb +3 -4
- metadata +8 -4
- data/ext/nokogiri/libxml2_backwards_compat.c +0 -121
@@ -5,12 +5,12 @@ require "pathname"
|
|
5
5
|
|
6
6
|
module Nokogiri
|
7
7
|
module XML
|
8
|
-
# Nokogiri::XML::Document is the main entry point for dealing with XML documents.
|
9
|
-
# is created by parsing
|
10
|
-
# on parsing.
|
8
|
+
# Nokogiri::XML::Document is the main entry point for dealing with \XML documents. The Document
|
9
|
+
# is created by parsing \XML content from a String or an IO object. See
|
10
|
+
# Nokogiri::XML::Document.parse for more information on parsing.
|
11
11
|
#
|
12
|
-
#
|
13
|
-
#
|
12
|
+
# Document inherits a great deal of functionality from its superclass Nokogiri::XML::Node, so
|
13
|
+
# please read that class's documentation as well.
|
14
14
|
class Document < Nokogiri::XML::Node
|
15
15
|
# See http://www.w3.org/TR/REC-xml-names/#ns-decl for more details. Note that we're not
|
16
16
|
# attempting to handle unicode characters partly because libxml2 doesn't handle unicode
|
@@ -19,33 +19,45 @@ module Nokogiri
|
|
19
19
|
NCNAME_CHAR = NCNAME_START_CHAR + "\\-\\.0-9"
|
20
20
|
NCNAME_RE = /^xmlns(?::([#{NCNAME_START_CHAR}][#{NCNAME_CHAR}]*))?$/
|
21
21
|
|
22
|
+
OBJECT_DUP_METHOD = Object.instance_method(:dup)
|
23
|
+
OBJECT_CLONE_METHOD = Object.instance_method(:clone)
|
24
|
+
private_constant :OBJECT_DUP_METHOD, :OBJECT_CLONE_METHOD
|
25
|
+
|
22
26
|
class << self
|
23
|
-
#
|
27
|
+
# call-seq:
|
28
|
+
# parse(input) { |options| ... } => Nokogiri::XML::Document
|
29
|
+
# parse(input, url:, encoding:, options:) => Nokogiri::XML::Document
|
24
30
|
#
|
25
|
-
#
|
26
|
-
# _read_ and _close_ such as an IO, or StringIO.
|
31
|
+
# Parse \XML input from a String or IO object, and return a new XML::Document.
|
27
32
|
#
|
28
|
-
#
|
33
|
+
# 🛡 By default, Nokogiri treats documents as untrusted, and so does not attempt to load DTDs
|
34
|
+
# or access the network. See Nokogiri::XML::ParseOptions for a complete list of options; and
|
35
|
+
# that module's DEFAULT_XML constant for what's set (and not set) by default.
|
29
36
|
#
|
30
|
-
#
|
31
|
-
#
|
37
|
+
# [Required Parameters]
|
38
|
+
# - +input+ (String | IO) The content to be parsed.
|
32
39
|
#
|
33
|
-
#
|
34
|
-
#
|
35
|
-
# Nokogiri::XML::ParseOptions for more information.
|
40
|
+
# [Optional Keyword Arguments]
|
41
|
+
# - +url:+ (String) The base URI for this document.
|
36
42
|
#
|
37
|
-
# +
|
38
|
-
#
|
43
|
+
# - +encoding:+ (String) The name of the encoding that should be used when processing the
|
44
|
+
# document. When not provided, the encoding will be determined based on the document
|
45
|
+
# content.
|
39
46
|
#
|
40
|
-
#
|
41
|
-
#
|
42
|
-
#
|
43
|
-
# and that module's DEFAULT_XML constant for what's set (and not
|
44
|
-
# set) by default.
|
47
|
+
# - +options:+ (Nokogiri::XML::ParseOptions) Configuration object that determines some
|
48
|
+
# behaviors during parsing. See ParseOptions for more information. The default value is
|
49
|
+
# +ParseOptions::DEFAULT_XML+.
|
45
50
|
#
|
46
|
-
#
|
51
|
+
# [Yields]
|
52
|
+
# If a block is given, a Nokogiri::XML::ParseOptions object is yielded to the block which
|
53
|
+
# can be configured before parsing. See Nokogiri::XML::ParseOptions for more information.
|
47
54
|
#
|
48
|
-
|
55
|
+
# [Returns] Nokogiri::XML::Document
|
56
|
+
def parse(
|
57
|
+
string_or_io,
|
58
|
+
url_ = nil, encoding_ = nil, options_ = XML::ParseOptions::DEFAULT_XML,
|
59
|
+
url: url_, encoding: encoding_, options: options_
|
60
|
+
)
|
49
61
|
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
50
62
|
yield options if block_given?
|
51
63
|
|
@@ -60,6 +72,7 @@ module Nokogiri
|
|
60
72
|
end
|
61
73
|
|
62
74
|
doc = if string_or_io.respond_to?(:read)
|
75
|
+
# TODO: should we instead check for respond_to?(:to_path) ?
|
63
76
|
if string_or_io.is_a?(Pathname)
|
64
77
|
# resolve the Pathname to the file and open it as an IO object, see #2110
|
65
78
|
string_or_io = string_or_io.expand_path.open
|
@@ -180,6 +193,38 @@ module Nokogiri
|
|
180
193
|
@namespace_inheritance = false
|
181
194
|
end
|
182
195
|
|
196
|
+
#
|
197
|
+
# :call-seq:
|
198
|
+
# dup → Nokogiri::XML::Document
|
199
|
+
# dup(level) → Nokogiri::XML::Document
|
200
|
+
#
|
201
|
+
# Duplicate this node.
|
202
|
+
#
|
203
|
+
# [Parameters]
|
204
|
+
# - +level+ (optional Integer). 0 is a shallow copy, 1 (the default) is a deep copy.
|
205
|
+
# [Returns] The new Nokogiri::XML::Document
|
206
|
+
#
|
207
|
+
def dup(level = 1)
|
208
|
+
copy = OBJECT_DUP_METHOD.bind_call(self)
|
209
|
+
copy.initialize_copy_with_args(self, level)
|
210
|
+
end
|
211
|
+
|
212
|
+
#
|
213
|
+
# :call-seq:
|
214
|
+
# clone → Nokogiri::XML::Document
|
215
|
+
# clone(level) → Nokogiri::XML::Document
|
216
|
+
#
|
217
|
+
# Clone this node.
|
218
|
+
#
|
219
|
+
# [Parameters]
|
220
|
+
# - +level+ (optional Integer). 0 is a shallow copy, 1 (the default) is a deep copy.
|
221
|
+
# [Returns] The new Nokogiri::XML::Document
|
222
|
+
#
|
223
|
+
def clone(level = 1)
|
224
|
+
copy = OBJECT_CLONE_METHOD.bind_call(self)
|
225
|
+
copy.initialize_copy_with_args(self, level)
|
226
|
+
end
|
227
|
+
|
183
228
|
# :call-seq:
|
184
229
|
# create_element(name, *contents_or_attrs, &block) → Nokogiri::XML::Element
|
185
230
|
#
|
@@ -326,7 +371,7 @@ module Nokogiri
|
|
326
371
|
end
|
327
372
|
|
328
373
|
##
|
329
|
-
# Validate this Document against
|
374
|
+
# Validate this Document against its DTD. Returns a list of errors on
|
330
375
|
# the document or +nil+ when there is no DTD.
|
331
376
|
def validate
|
332
377
|
return unless internal_subset
|
@@ -367,12 +412,11 @@ module Nokogiri
|
|
367
412
|
@decorators.each do |klass, list|
|
368
413
|
next unless node.is_a?(klass)
|
369
414
|
|
370
|
-
list.each { |
|
415
|
+
list.each { |mod| node.extend(mod) }
|
371
416
|
end
|
372
417
|
end
|
373
418
|
|
374
419
|
alias_method :to_xml, :serialize
|
375
|
-
alias_method :clone, :dup
|
376
420
|
|
377
421
|
# Get the hash of namespaces on the root Nokogiri::XML::Node
|
378
422
|
def namespaces
|
@@ -3,32 +3,103 @@
|
|
3
3
|
|
4
4
|
module Nokogiri
|
5
5
|
module XML
|
6
|
+
# DocumentFragment represents a fragment of an \XML document. It provides the same functionality
|
7
|
+
# exposed by XML::Node and can be used to contain one or more \XML subtrees.
|
6
8
|
class DocumentFragment < Nokogiri::XML::Node
|
7
|
-
|
8
|
-
#
|
9
|
-
|
10
|
-
|
9
|
+
# The options used to parse the document fragment. Returns the value of any options that were
|
10
|
+
# passed into the constructor as a parameter or set in a config block, else the default
|
11
|
+
# options for the specific subclass.
|
12
|
+
attr_reader :parse_options
|
13
|
+
|
14
|
+
class << self
|
15
|
+
# :call-seq:
|
16
|
+
# parse(input) { |options| ... } → XML::DocumentFragment
|
17
|
+
# parse(input, options:) → XML::DocumentFragment
|
18
|
+
#
|
19
|
+
# Parse \XML fragment input from a String, and return a new XML::DocumentFragment. This
|
20
|
+
# method creates a new, empty XML::Document to contain the fragment.
|
21
|
+
#
|
22
|
+
# [Required Parameters]
|
23
|
+
# - +input+ (String) The content to be parsed.
|
24
|
+
#
|
25
|
+
# [Optional Keyword Arguments]
|
26
|
+
# - +options+ (Nokogiri::XML::ParseOptions) Configuration object that determines some
|
27
|
+
# behaviors during parsing. See ParseOptions for more information. The default value is
|
28
|
+
# +ParseOptions::DEFAULT_XML+.
|
29
|
+
#
|
30
|
+
# [Yields]
|
31
|
+
# If a block is given, a Nokogiri::XML::ParseOptions object is yielded to the block which
|
32
|
+
# can be configured before parsing. See Nokogiri::XML::ParseOptions for more information.
|
33
|
+
#
|
34
|
+
# [Returns] Nokogiri::XML::DocumentFragment
|
35
|
+
def parse(tags, options_ = ParseOptions::DEFAULT_XML, options: options_, &block)
|
36
|
+
new(XML::Document.new, tags, options: options, &block)
|
37
|
+
end
|
38
|
+
|
39
|
+
# Wrapper method to separate the concerns of:
|
40
|
+
# - the native object allocator's parameter (it only requires `document`)
|
41
|
+
# - the initializer's parameters
|
42
|
+
def new(document, ...) # :nodoc:
|
43
|
+
instance = native_new(document)
|
44
|
+
instance.send(:initialize, document, ...)
|
45
|
+
instance
|
46
|
+
end
|
11
47
|
end
|
12
48
|
|
13
|
-
|
14
|
-
#
|
49
|
+
# :call-seq:
|
50
|
+
# new(document, input=nil) { |options| ... } → DocumentFragment
|
51
|
+
# new(document, input=nil, context:, options:) → DocumentFragment
|
52
|
+
#
|
53
|
+
# Parse \XML fragment input from a String, and return a new DocumentFragment that is
|
54
|
+
# associated with the given +document+.
|
55
|
+
#
|
56
|
+
# 💡 It's recommended to use either XML::DocumentFragment.parse or Node#parse rather than call
|
57
|
+
# this method directly.
|
58
|
+
#
|
59
|
+
# [Required Parameters]
|
60
|
+
# - +document+ (XML::Document) The parent document to associate the returned fragment with.
|
61
|
+
#
|
62
|
+
# [Optional Parameters]
|
63
|
+
# - +input+ (String) The content to be parsed.
|
64
|
+
#
|
65
|
+
# [Optional Keyword Arguments]
|
66
|
+
# - +context:+ (Nokogiri::XML::Node) The <b>context node</b> for the subtree created. See
|
67
|
+
# below for more information.
|
68
|
+
#
|
69
|
+
# - +options:+ (Nokogiri::XML::ParseOptions) Configuration object that determines some
|
70
|
+
# behaviors during parsing. See ParseOptions for more information. The default value is
|
71
|
+
# +ParseOptions::DEFAULT_XML+.
|
72
|
+
#
|
73
|
+
# [Yields]
|
74
|
+
# If a block is given, a Nokogiri::XML::ParseOptions object is yielded to the block which
|
75
|
+
# can be configured before parsing. See ParseOptions for more information.
|
76
|
+
#
|
77
|
+
# [Returns] XML::DocumentFragment
|
78
|
+
#
|
79
|
+
# === Context \Node
|
80
|
+
#
|
81
|
+
# If a context node is specified using +context:+, then the fragment will be created by
|
82
|
+
# calling Node#parse on that node, so the parser will behave as if that Node is the parent of
|
83
|
+
# the fragment subtree, and will resolve namespaces relative to that node.
|
15
84
|
#
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
85
|
+
def initialize(
|
86
|
+
document, tags = nil,
|
87
|
+
context_ = nil, options_ = ParseOptions::DEFAULT_XML,
|
88
|
+
context: context_, options: options_
|
89
|
+
) # rubocop:disable Lint/MissingSuper
|
20
90
|
return self unless tags
|
21
91
|
|
22
92
|
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
93
|
+
@parse_options = options
|
23
94
|
yield options if block_given?
|
24
95
|
|
25
|
-
children = if
|
96
|
+
children = if context
|
26
97
|
# Fix for issue#490
|
27
98
|
if Nokogiri.jruby?
|
28
99
|
# fix for issue #770
|
29
|
-
|
100
|
+
context.parse("<root #{namespace_declarations(context)}>#{tags}</root>", options).children
|
30
101
|
else
|
31
|
-
|
102
|
+
context.parse(tags, options)
|
32
103
|
end
|
33
104
|
else
|
34
105
|
wrapper_doc = XML::Document.parse("<root>#{tags}</root>", nil, nil, options)
|
data/lib/nokogiri/xml/node.rb
CHANGED
@@ -127,6 +127,42 @@ module Nokogiri
|
|
127
127
|
# This is intentionally empty, and sets the method signature for subclasses.
|
128
128
|
end
|
129
129
|
|
130
|
+
#
|
131
|
+
# :call-seq:
|
132
|
+
# dup → Nokogiri::XML::Node
|
133
|
+
# dup(level) → Nokogiri::XML::Node
|
134
|
+
# dup(level, new_parent_doc) → Nokogiri::XML::Node
|
135
|
+
#
|
136
|
+
# Duplicate this node.
|
137
|
+
#
|
138
|
+
# [Parameters]
|
139
|
+
# - +level+ (optional Integer). 0 is a shallow copy, 1 (the default) is a deep copy.
|
140
|
+
# - +new_parent_doc+ (optional Nokogiri::XML::Document)
|
141
|
+
# The new node's parent Document. Defaults to the the Document of the current node.
|
142
|
+
# [Returns] The new Nokogiri::XML::Node
|
143
|
+
#
|
144
|
+
def dup(level = 1, new_parent_doc = document)
|
145
|
+
super().initialize_copy_with_args(self, level, new_parent_doc)
|
146
|
+
end
|
147
|
+
|
148
|
+
#
|
149
|
+
# :call-seq:
|
150
|
+
# clone → Nokogiri::XML::Node
|
151
|
+
# clone(level) → Nokogiri::XML::Node
|
152
|
+
# clone(level, new_parent_doc) → Nokogiri::XML::Node
|
153
|
+
#
|
154
|
+
# Clone this node.
|
155
|
+
#
|
156
|
+
# [Parameters]
|
157
|
+
# - +level+ (optional Integer). 0 is a shallow copy, 1 (the default) is a deep copy.
|
158
|
+
# - +new_parent_doc+
|
159
|
+
# The new node's parent Document. Defaults to the the Document of the current node.
|
160
|
+
# [Returns] The new Nokogiri::XML::Node
|
161
|
+
#
|
162
|
+
def clone(level = 1, new_parent_doc = document)
|
163
|
+
super().initialize_copy_with_args(self, level, new_parent_doc)
|
164
|
+
end
|
165
|
+
|
130
166
|
###
|
131
167
|
# Decorate this node with the decorators set up in this node's Document
|
132
168
|
def decorate!
|
@@ -228,7 +264,7 @@ module Nokogiri
|
|
228
264
|
if new_parent.nil?
|
229
265
|
raise "Failed to parse '#{node_or_tags}' in the context of a '#{context_node.name}' element"
|
230
266
|
end
|
231
|
-
when
|
267
|
+
when Node
|
232
268
|
new_parent = node_or_tags.dup
|
233
269
|
else
|
234
270
|
raise ArgumentError, "Requires a String or Node argument, and cannot accept a #{node_or_tags.class}"
|
@@ -406,8 +442,48 @@ module Nokogiri
|
|
406
442
|
end
|
407
443
|
|
408
444
|
####
|
409
|
-
#
|
410
|
-
#
|
445
|
+
# call-seq:
|
446
|
+
# content=(input)
|
447
|
+
#
|
448
|
+
# Set the content of this node to +input+.
|
449
|
+
#
|
450
|
+
# [Parameters]
|
451
|
+
# - +input+ (String) The new content for this node. Input is considered to be raw content, and
|
452
|
+
# so will be entity-escaped in the final DOM string.
|
453
|
+
#
|
454
|
+
# [Example]
|
455
|
+
# Note how entities are handled:
|
456
|
+
#
|
457
|
+
# doc = Nokogiri::HTML::Document.parse(<<~HTML)
|
458
|
+
# <html>
|
459
|
+
# <body>
|
460
|
+
# <div id="first">asdf</div>
|
461
|
+
# <div id="second">asdf</div>
|
462
|
+
# HTML
|
463
|
+
#
|
464
|
+
# text_node = doc.at_css("div#first").children.first
|
465
|
+
# div_node = doc.at_css("div#second")
|
466
|
+
#
|
467
|
+
# value = "You & Me"
|
468
|
+
#
|
469
|
+
# text_node.content = value
|
470
|
+
# div_node.content = value
|
471
|
+
#
|
472
|
+
# doc.css("div").to_html
|
473
|
+
# # => "<div id=\"first\">You &amp; Me</div>
|
474
|
+
# # <div id=\"second\">You &amp; Me</div>"
|
475
|
+
#
|
476
|
+
# For content that is already entity-escaped, use CGI::unescapeHTML to decode it:
|
477
|
+
#
|
478
|
+
# text_node.content = CGI::unescapeHTML(value)
|
479
|
+
# div_node.content = CGI::unescapeHTML(value)
|
480
|
+
#
|
481
|
+
# doc.css("div").to_html
|
482
|
+
# # => "<div id=\"first\">You & Me</div>
|
483
|
+
# # <div id=\"second\">You & Me</div>"
|
484
|
+
#
|
485
|
+
# See also: #native_content=
|
486
|
+
#
|
411
487
|
def content=(string)
|
412
488
|
self.native_content = encode_special_chars(string.to_s)
|
413
489
|
end
|
@@ -474,7 +550,6 @@ module Nokogiri
|
|
474
550
|
alias_method :to_str, :content
|
475
551
|
alias_method :name, :node_name
|
476
552
|
alias_method :type, :node_type
|
477
|
-
alias_method :clone, :dup
|
478
553
|
alias_method :elements, :element_children
|
479
554
|
|
480
555
|
# :section: Working With Node Attributes
|
@@ -1051,9 +1126,11 @@ module Nokogiri
|
|
1051
1126
|
|
1052
1127
|
error_count = document.errors.length
|
1053
1128
|
node_set = in_context(contents, options.to_i)
|
1129
|
+
|
1054
1130
|
if document.errors.length > error_count
|
1055
1131
|
raise document.errors[error_count] unless options.recover?
|
1056
1132
|
|
1133
|
+
# TODO: remove this block when libxml2 < 2.13 is no longer supported
|
1057
1134
|
if node_set.empty?
|
1058
1135
|
# libxml2 < 2.13 does not obey the +recover+ option after encountering errors during
|
1059
1136
|
# +in_context+ parsing, and so this horrible hack is here to try to emulate recovery
|
@@ -1080,6 +1157,7 @@ module Nokogiri
|
|
1080
1157
|
node_set = fragment.children
|
1081
1158
|
end
|
1082
1159
|
end
|
1160
|
+
|
1083
1161
|
node_set
|
1084
1162
|
end
|
1085
1163
|
|
@@ -1542,19 +1620,12 @@ module Nokogiri
|
|
1542
1620
|
node_or_tags
|
1543
1621
|
end
|
1544
1622
|
|
1545
|
-
USING_LIBXML_WITH_BROKEN_SERIALIZATION = Nokogiri.uses_libxml?("~> 2.6.0").freeze
|
1546
|
-
private_constant :USING_LIBXML_WITH_BROKEN_SERIALIZATION
|
1547
|
-
|
1548
1623
|
def to_format(save_option, options)
|
1549
|
-
return dump_html if USING_LIBXML_WITH_BROKEN_SERIALIZATION
|
1550
|
-
|
1551
1624
|
options[:save_with] = save_option unless options[:save_with]
|
1552
1625
|
serialize(options)
|
1553
1626
|
end
|
1554
1627
|
|
1555
1628
|
def write_format_to(save_option, io, options)
|
1556
|
-
return (io << dump_html) if USING_LIBXML_WITH_BROKEN_SERIALIZATION
|
1557
|
-
|
1558
1629
|
options[:save_with] ||= save_option
|
1559
1630
|
write_to(io, options)
|
1560
1631
|
end
|
@@ -4,9 +4,13 @@
|
|
4
4
|
module Nokogiri
|
5
5
|
module XML
|
6
6
|
####
|
7
|
-
# A NodeSet contains a list of Nokogiri::XML::Node objects.
|
8
|
-
#
|
9
|
-
#
|
7
|
+
# A NodeSet is an Enumerable that contains a list of Nokogiri::XML::Node objects.
|
8
|
+
#
|
9
|
+
# Typically a NodeSet is returned as a result of searching a Document via
|
10
|
+
# Nokogiri::XML::Searchable#css or Nokogiri::XML::Searchable#xpath.
|
11
|
+
#
|
12
|
+
# Note that the `#dup` and `#clone` methods perform shallow copies; these methods do not copy
|
13
|
+
# the Nodes contained in the NodeSet (similar to how Array and other Enumerable classes work).
|
10
14
|
class NodeSet
|
11
15
|
include Nokogiri::XML::Searchable
|
12
16
|
include Enumerable
|
@@ -14,8 +18,6 @@ module Nokogiri
|
|
14
18
|
# The Document this NodeSet is associated with
|
15
19
|
attr_accessor :document
|
16
20
|
|
17
|
-
alias_method :clone, :dup
|
18
|
-
|
19
21
|
# Create a NodeSet with +document+ defaulting to +list+
|
20
22
|
def initialize(document, list = [])
|
21
23
|
@document = document
|
@@ -121,7 +123,7 @@ module Nokogiri
|
|
121
123
|
return self[args.first]
|
122
124
|
end
|
123
125
|
|
124
|
-
super
|
126
|
+
super
|
125
127
|
end
|
126
128
|
alias_method :%, :at
|
127
129
|
|
@@ -423,7 +425,7 @@ module Nokogiri
|
|
423
425
|
end
|
424
426
|
|
425
427
|
###
|
426
|
-
# Return a nicely
|
428
|
+
# Return a nicely formatted string representation
|
427
429
|
def inspect
|
428
430
|
"[#{map(&:inspect).join(", ")}]"
|
429
431
|
end
|
@@ -140,7 +140,7 @@ module Nokogiri
|
|
140
140
|
|
141
141
|
# Relax any hardcoded limit from the parser. Off by default.
|
142
142
|
#
|
143
|
-
# ⚠
|
143
|
+
# ⚠ <b>It is UNSAFE to set this option</b> when parsing untrusted documents.
|
144
144
|
HUGE = 1 << 19
|
145
145
|
|
146
146
|
# Support line numbers up to <code>long int</code> (default is a <code>short int</code>). On
|
data/lib/nokogiri/xml/pp/node.rb
CHANGED
@@ -8,6 +8,11 @@ module Nokogiri
|
|
8
8
|
COLLECTIONS = [:attribute_nodes, :children]
|
9
9
|
|
10
10
|
def inspect
|
11
|
+
# handle the case where an exception is thrown during object construction
|
12
|
+
if respond_to?(:data_ptr?) && !data_ptr?
|
13
|
+
return "#<#{self.class}:#{format("0x%x", object_id)} (no data)>"
|
14
|
+
end
|
15
|
+
|
11
16
|
attributes = inspect_attributes.reject do |x|
|
12
17
|
attribute = send(x)
|
13
18
|
!attribute || (attribute.respond_to?(:empty?) && attribute.empty?)
|
@@ -21,7 +26,7 @@ module Nokogiri
|
|
21
26
|
"#{attribute}=#{send(attribute).inspect}"
|
22
27
|
end.join(" ")
|
23
28
|
end
|
24
|
-
"#<#{self.class
|
29
|
+
"#<#{self.class}:#{format("0x%x", object_id)} #{attributes}>"
|
25
30
|
end
|
26
31
|
|
27
32
|
def pretty_print(pp)
|
data/lib/nokogiri/xml/reader.rb
CHANGED
@@ -3,32 +3,33 @@
|
|
3
3
|
module Nokogiri
|
4
4
|
module XML
|
5
5
|
###
|
6
|
-
#
|
7
|
-
#
|
6
|
+
# The Reader parser allows you to effectively pull parse an \XML document. Once instantiated,
|
7
|
+
# call Nokogiri::XML::Reader#each to iterate over each node.
|
8
8
|
#
|
9
|
-
#
|
10
|
-
#
|
9
|
+
# Nokogiri::XML::Reader parses an \XML document similar to the way a cursor would move. The
|
10
|
+
# Reader is given an \XML document, and yields nodes to an each block.
|
11
|
+
#
|
12
|
+
# The Reader parser might be good for when you need the speed and low memory usage of a \SAX
|
13
|
+
# parser, but do not want to write a SAX::Document handler.
|
11
14
|
#
|
12
15
|
# Here is an example of usage:
|
13
16
|
#
|
14
|
-
# reader = Nokogiri::XML::Reader
|
17
|
+
# reader = Nokogiri::XML::Reader.new <<~XML
|
15
18
|
# <x xmlns:tenderlove='http://tenderlovemaking.com/'>
|
16
19
|
# <tenderlove:foo awesome='true'>snuggles!</tenderlove:foo>
|
17
20
|
# </x>
|
18
|
-
#
|
21
|
+
# XML
|
19
22
|
#
|
20
23
|
# reader.each do |node|
|
21
|
-
#
|
22
24
|
# # node is an instance of Nokogiri::XML::Reader
|
23
25
|
# puts node.name
|
24
|
-
#
|
25
26
|
# end
|
26
27
|
#
|
27
28
|
# ⚠ Nokogiri::XML::Reader#each can only be called once! Once the cursor moves through the entire
|
28
29
|
# document, you must parse the document again. It may be better to capture all information you
|
29
30
|
# need during a single iteration.
|
30
31
|
#
|
31
|
-
# ⚠ libxml2 does not support error recovery in the Reader parser. The
|
32
|
+
# ⚠ libxml2 does not support error recovery in the Reader parser. The +RECOVER+ ParseOption is
|
32
33
|
# ignored. If a syntax error is encountered during parsing, an exception will be raised.
|
33
34
|
class Reader
|
34
35
|
include Enumerable
|
@@ -66,23 +67,55 @@ module Nokogiri
|
|
66
67
|
TYPE_END_ELEMENT = 15
|
67
68
|
# Entity end node type
|
68
69
|
TYPE_END_ENTITY = 16
|
69
|
-
# XML Declaration node type
|
70
|
+
# \XML Declaration node type
|
70
71
|
TYPE_XML_DECLARATION = 17
|
71
72
|
|
72
73
|
# A list of errors encountered while parsing
|
73
74
|
attr_accessor :errors
|
74
75
|
|
75
|
-
# The XML source
|
76
|
+
# The \XML source
|
76
77
|
attr_reader :source
|
77
78
|
|
78
79
|
alias_method :self_closing?, :empty_element?
|
79
80
|
|
80
|
-
|
81
|
+
# :call-seq:
|
82
|
+
# Reader.new(input) { |options| ... } → Reader
|
83
|
+
# Reader.new(input, url:, encoding:, options:) { |options| ... } → Reader
|
84
|
+
#
|
85
|
+
# Create a new Reader to parse an \XML document.
|
86
|
+
#
|
87
|
+
# [Required Parameters]
|
88
|
+
# - +input+ (String | IO): The \XML document to parse.
|
89
|
+
#
|
90
|
+
# [Optional Parameters]
|
91
|
+
# - +url:+ (String) The base URL of the document.
|
92
|
+
# - +encoding:+ (String) The name of the encoding of the document.
|
93
|
+
# - +options:+ (Integer | ParseOptions) Options to control the parser behavior.
|
94
|
+
# Defaults to +ParseOptions::STRICT+.
|
95
|
+
#
|
96
|
+
# [Yields]
|
97
|
+
# If present, the block will be passed a Nokogiri::XML::ParseOptions object to modify before
|
98
|
+
# the fragment is parsed. See Nokogiri::XML::ParseOptions for more information.
|
99
|
+
def self.new(
|
100
|
+
string_or_io,
|
101
|
+
url_ = nil, encoding_ = nil, options_ = ParseOptions::STRICT,
|
102
|
+
url: url_, encoding: encoding_, options: options_
|
103
|
+
)
|
104
|
+
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
105
|
+
yield options if block_given?
|
106
|
+
|
107
|
+
if string_or_io.respond_to?(:read)
|
108
|
+
return Reader.from_io(string_or_io, url, encoding, options.to_i)
|
109
|
+
end
|
110
|
+
|
111
|
+
Reader.from_memory(string_or_io, url, encoding, options.to_i)
|
112
|
+
end
|
113
|
+
|
114
|
+
private def initialize(source, url = nil, encoding = nil) # :nodoc:
|
81
115
|
@source = source
|
82
116
|
@errors = []
|
83
117
|
@encoding = encoding
|
84
118
|
end
|
85
|
-
private :initialize
|
86
119
|
|
87
120
|
# Get the attributes and namespaces of the current node as a Hash.
|
88
121
|
#
|