nokogiri 1.14.5 → 1.16.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +19 -15
- data/README.md +4 -1
- data/dependencies.yml +9 -8
- data/ext/nokogiri/extconf.rb +69 -26
- data/ext/nokogiri/html4_document.c +1 -2
- data/ext/nokogiri/html4_element_description.c +19 -14
- data/ext/nokogiri/html4_sax_parser_context.c +10 -16
- data/ext/nokogiri/html4_sax_push_parser.c +3 -3
- data/ext/nokogiri/nokogiri.c +46 -24
- data/ext/nokogiri/nokogiri.h +23 -5
- data/ext/nokogiri/test_global_handlers.c +1 -1
- data/ext/nokogiri/xml_attr.c +1 -1
- data/ext/nokogiri/xml_cdata.c +30 -17
- data/ext/nokogiri/xml_comment.c +1 -1
- data/ext/nokogiri/xml_document.c +113 -25
- data/ext/nokogiri/xml_document_fragment.c +1 -1
- data/ext/nokogiri/xml_dtd.c +1 -1
- data/ext/nokogiri/xml_element_content.c +32 -29
- data/ext/nokogiri/xml_element_decl.c +5 -5
- data/ext/nokogiri/xml_encoding_handler.c +12 -4
- data/ext/nokogiri/xml_entity_reference.c +1 -1
- data/ext/nokogiri/xml_namespace.c +11 -16
- data/ext/nokogiri/xml_node.c +13 -16
- data/ext/nokogiri/xml_node_set.c +125 -105
- data/ext/nokogiri/xml_processing_instruction.c +1 -1
- data/ext/nokogiri/xml_reader.c +61 -74
- data/ext/nokogiri/xml_relax_ng.c +66 -79
- data/ext/nokogiri/xml_sax_parser.c +24 -5
- data/ext/nokogiri/xml_sax_parser_context.c +50 -25
- data/ext/nokogiri/xml_sax_push_parser.c +30 -9
- data/ext/nokogiri/xml_schema.c +94 -115
- data/ext/nokogiri/xml_syntax_error.c +3 -3
- data/ext/nokogiri/xml_text.c +26 -13
- data/ext/nokogiri/xml_xpath_context.c +153 -83
- data/ext/nokogiri/xslt_stylesheet.c +111 -53
- data/gumbo-parser/Makefile +18 -0
- data/gumbo-parser/src/error.c +8 -4
- data/gumbo-parser/src/foreign_attrs.c +13 -14
- data/gumbo-parser/src/foreign_attrs.gperf +1 -1
- data/gumbo-parser/src/parser.c +21 -5
- data/gumbo-parser/src/tokenizer.c +1 -0
- data/lib/nokogiri/css/parser_extras.rb +1 -1
- data/lib/nokogiri/css/xpath_visitor.rb +3 -23
- data/lib/nokogiri/extension.rb +1 -1
- data/lib/nokogiri/html4/document.rb +1 -1
- data/lib/nokogiri/html4/document_fragment.rb +1 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +1821 -353
- data/lib/nokogiri/html4/encoding_reader.rb +1 -1
- data/lib/nokogiri/html5/document_fragment.rb +1 -1
- data/lib/nokogiri/html5/node.rb +5 -0
- data/lib/nokogiri/html5.rb +0 -63
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +6 -5
- data/lib/nokogiri/xml/attr.rb +2 -2
- data/lib/nokogiri/xml/attribute_decl.rb +4 -2
- data/lib/nokogiri/xml/document.rb +4 -5
- data/lib/nokogiri/xml/document_fragment.rb +3 -3
- data/lib/nokogiri/xml/element_content.rb +10 -2
- data/lib/nokogiri/xml/element_decl.rb +4 -2
- data/lib/nokogiri/xml/entity_decl.rb +4 -2
- data/lib/nokogiri/xml/namespace.rb +1 -2
- data/lib/nokogiri/xml/node/save_options.rb +8 -0
- data/lib/nokogiri/xml/node.rb +53 -37
- data/lib/nokogiri/xml/node_set.rb +3 -3
- data/lib/nokogiri/xml/pp/node.rb +23 -12
- data/lib/nokogiri/xml/reader.rb +10 -9
- data/lib/nokogiri/xml/sax/document.rb +1 -1
- data/lib/nokogiri/xml/searchable.rb +21 -13
- data/lib/nokogiri/xml/syntax_error.rb +1 -1
- data/lib/nokogiri/xml.rb +1 -1
- data/lib/nokogiri/xslt/stylesheet.rb +29 -7
- data/lib/nokogiri/xslt.rb +74 -4
- data/lib/nokogiri.rb +13 -5
- data/lib/xsd/xmlparser/nokogiri.rb +1 -1
- data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
- data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
- data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
- data/ports/archives/libxml2-2.12.7.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
- metadata +14 -12
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +0 -3037
- data/ports/archives/libxml2-2.10.4.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.37.tar.xz +0 -0
@@ -94,7 +94,7 @@ module Nokogiri
|
|
94
94
|
# no support for a call without len
|
95
95
|
|
96
96
|
unless @firstchunk
|
97
|
-
(@firstchunk = @io.read(len)) ||
|
97
|
+
(@firstchunk = @io.read(len)) || return
|
98
98
|
|
99
99
|
# This implementation expects that the first call from
|
100
100
|
# htmlReadIO() is made with a length long enough (~1KB) to
|
@@ -36,7 +36,7 @@ module Nokogiri
|
|
36
36
|
attr_reader :quirks_mode
|
37
37
|
|
38
38
|
# Create a document fragment.
|
39
|
-
def initialize(doc, tags = nil, ctx = nil, options = {})
|
39
|
+
def initialize(doc, tags = nil, ctx = nil, options = {}) # rubocop:disable Lint/MissingSuper
|
40
40
|
self.document = doc
|
41
41
|
self.errors = []
|
42
42
|
return self unless tags
|
data/lib/nokogiri/html5/node.rb
CHANGED
@@ -17,6 +17,9 @@
|
|
17
17
|
# limitations under the License.
|
18
18
|
#
|
19
19
|
|
20
|
+
#
|
21
|
+
# TODO: this whole file should go away. maybe make it a decorator?
|
22
|
+
#
|
20
23
|
require_relative "../xml/node"
|
21
24
|
|
22
25
|
module Nokogiri
|
@@ -50,6 +53,8 @@ module Nokogiri
|
|
50
53
|
config = XML::Node::SaveOptions.new(save_options.to_i)
|
51
54
|
yield config if block_given?
|
52
55
|
|
56
|
+
encoding = encoding.is_a?(Encoding) ? encoding.name : encoding
|
57
|
+
|
53
58
|
config_options = config.options
|
54
59
|
if config_options & (XML::Node::SaveOptions::AS_XML | XML::Node::SaveOptions::AS_XHTML) != 0
|
55
60
|
# Use Nokogiri's serializing code.
|
data/lib/nokogiri/html5.rb
CHANGED
@@ -239,20 +239,6 @@ module Nokogiri
|
|
239
239
|
DocumentFragment.parse(string, encoding, options)
|
240
240
|
end
|
241
241
|
|
242
|
-
# Fetch and parse a HTML document from the web, following redirects,
|
243
|
-
# handling https, and determining the character encoding using HTML5
|
244
|
-
# rules. +uri+ may be a +String+ or a +URI+. +options+ contains
|
245
|
-
# http headers and special options. Everything which is not a
|
246
|
-
# special option is considered a header. Special options include:
|
247
|
-
# * :follow_limit => number of redirects which are followed
|
248
|
-
# * :basic_auth => [username, password]
|
249
|
-
def get(uri, options = {})
|
250
|
-
# TODO: deprecate
|
251
|
-
warn("Nokogiri::HTML5.get is deprecated and will be removed in a future version of Nokogiri.",
|
252
|
-
uplevel: 1, category: :deprecated)
|
253
|
-
get_impl(uri, options)
|
254
|
-
end
|
255
|
-
|
256
242
|
# :nodoc:
|
257
243
|
def read_and_encode(string, encoding)
|
258
244
|
# Read the string with the given encoding.
|
@@ -280,55 +266,6 @@ module Nokogiri
|
|
280
266
|
|
281
267
|
private
|
282
268
|
|
283
|
-
def get_impl(uri, options = {})
|
284
|
-
headers = options.clone
|
285
|
-
headers = { follow_limit: headers } if Numeric === headers # deprecated
|
286
|
-
limit = headers[:follow_limit] ? headers.delete(:follow_limit).to_i : 10
|
287
|
-
|
288
|
-
require "net/http"
|
289
|
-
uri = URI(uri) unless URI === uri
|
290
|
-
|
291
|
-
http = Net::HTTP.new(uri.host, uri.port)
|
292
|
-
|
293
|
-
# TLS / SSL support
|
294
|
-
http.use_ssl = true if uri.scheme == "https"
|
295
|
-
|
296
|
-
# Pass through Net::HTTP override values, which currently include:
|
297
|
-
# :ca_file, :ca_path, :cert, :cert_store, :ciphers,
|
298
|
-
# :close_on_empty_response, :continue_timeout, :key, :open_timeout,
|
299
|
-
# :read_timeout, :ssl_timeout, :ssl_version, :use_ssl,
|
300
|
-
# :verify_callback, :verify_depth, :verify_mode
|
301
|
-
options.each do |key, _value|
|
302
|
-
http.send("#{key}=", headers.delete(key)) if http.respond_to?("#{key}=")
|
303
|
-
end
|
304
|
-
|
305
|
-
request = Net::HTTP::Get.new(uri.request_uri)
|
306
|
-
|
307
|
-
# basic authentication
|
308
|
-
auth = headers.delete(:basic_auth)
|
309
|
-
auth ||= [uri.user, uri.password] if uri.user && uri.password
|
310
|
-
request.basic_auth(auth.first, auth.last) if auth
|
311
|
-
|
312
|
-
# remaining options are treated as headers
|
313
|
-
headers.each { |key, value| request[key.to_s] = value.to_s }
|
314
|
-
|
315
|
-
response = http.request(request)
|
316
|
-
|
317
|
-
case response
|
318
|
-
when Net::HTTPSuccess
|
319
|
-
doc = parse(reencode(response.body, response["content-type"]), options)
|
320
|
-
doc.instance_variable_set(:@response, response)
|
321
|
-
doc.class.send(:attr_reader, :response)
|
322
|
-
doc
|
323
|
-
when Net::HTTPRedirection
|
324
|
-
response.value if limit <= 1
|
325
|
-
location = URI.join(uri, response["location"])
|
326
|
-
get_impl(location, options.merge(follow_limit: limit - 1))
|
327
|
-
else
|
328
|
-
response.value
|
329
|
-
end
|
330
|
-
end
|
331
|
-
|
332
269
|
# Charset sniffing is a complex and controversial topic that understandably isn't done _by
|
333
270
|
# default_ by the Ruby Net::HTTP library. This being said, it is a very real problem for
|
334
271
|
# consumers of HTML as the default for HTML is iso-8859-1, most "good" producers use utf-8, and
|
@@ -94,11 +94,14 @@ module Nokogiri
|
|
94
94
|
nokogiri["version"] = Nokogiri::VERSION
|
95
95
|
|
96
96
|
unless jruby?
|
97
|
-
# enable gems
|
97
|
+
# enable gems to build against Nokogiri with the following in their extconf.rb:
|
98
98
|
#
|
99
99
|
# append_cflags(Nokogiri::VERSION_INFO["nokogiri"]["cppflags"])
|
100
100
|
# append_ldflags(Nokogiri::VERSION_INFO["nokogiri"]["ldflags"])
|
101
101
|
#
|
102
|
+
# though, this won't work on all platform and versions of Ruby, and won't be supported
|
103
|
+
# forever, see https://github.com/sparklemotion/nokogiri/discussions/2746 for context.
|
104
|
+
#
|
102
105
|
cppflags = ["-I#{header_directory.shellescape}"]
|
103
106
|
ldflags = []
|
104
107
|
|
@@ -108,7 +111,8 @@ module Nokogiri
|
|
108
111
|
end
|
109
112
|
|
110
113
|
if windows?
|
111
|
-
# on windows,
|
114
|
+
# on windows, third party libraries that wish to link against nokogiri
|
115
|
+
# should link against nokogiri.so to resolve symbols. see #2167
|
112
116
|
lib_directory = File.expand_path(File.join(File.dirname(__FILE__), "../#{ruby_minor}"))
|
113
117
|
unless File.exist?(lib_directory)
|
114
118
|
lib_directory = File.expand_path(File.join(File.dirname(__FILE__), ".."))
|
@@ -136,9 +140,6 @@ module Nokogiri
|
|
136
140
|
libxml["source"] = "packaged"
|
137
141
|
libxml["precompiled"] = libxml2_precompiled?
|
138
142
|
libxml["patches"] = Nokogiri::LIBXML2_PATCHES
|
139
|
-
|
140
|
-
# this is for nokogumbo and shouldn't be forever
|
141
|
-
libxml["libxml2_path"] = header_directory
|
142
143
|
else
|
143
144
|
libxml["source"] = "system"
|
144
145
|
end
|
data/lib/nokogiri/xml/attr.rb
CHANGED
@@ -18,8 +18,6 @@ module Nokogiri
|
|
18
18
|
# - +value+ → (String) The value of the attribute.
|
19
19
|
# - +namespace+ → (Namespace, nil) The Namespace of the attribute, or +nil+ if there is no namespace.
|
20
20
|
#
|
21
|
-
# ⚡ This is an experimental feature, available since v1.14.0
|
22
|
-
#
|
23
21
|
# *Example*
|
24
22
|
#
|
25
23
|
# doc = Nokogiri::XML.parse(<<~XML)
|
@@ -52,6 +50,8 @@ module Nokogiri
|
|
52
50
|
# # href = "http://nokogiri.org/ns/noko"
|
53
51
|
# # })}
|
54
52
|
#
|
53
|
+
# Since v1.14.0
|
54
|
+
#
|
55
55
|
def deconstruct_keys(keys)
|
56
56
|
{ name: name, value: value, namespace: namespace }
|
57
57
|
end
|
@@ -12,8 +12,10 @@ module Nokogiri
|
|
12
12
|
undef_method :namespace_definitions
|
13
13
|
undef_method :line if method_defined?(:line)
|
14
14
|
|
15
|
-
|
16
|
-
|
15
|
+
private
|
16
|
+
|
17
|
+
def inspect_attributes
|
18
|
+
[:to_s]
|
17
19
|
end
|
18
20
|
end
|
19
21
|
end
|
@@ -174,8 +174,7 @@ module Nokogiri
|
|
174
174
|
# Since v1.12.4
|
175
175
|
attr_accessor :namespace_inheritance
|
176
176
|
|
177
|
-
# :nodoc:
|
178
|
-
def initialize(*args) # rubocop:disable Lint/MissingSuper
|
177
|
+
def initialize(*args) # :nodoc: # rubocop:disable Lint/MissingSuper
|
179
178
|
@errors = []
|
180
179
|
@decorators = nil
|
181
180
|
@namespace_inheritance = false
|
@@ -330,7 +329,7 @@ module Nokogiri
|
|
330
329
|
# Validate this Document against it's DTD. Returns a list of errors on
|
331
330
|
# the document or +nil+ when there is no DTD.
|
332
331
|
def validate
|
333
|
-
return
|
332
|
+
return unless internal_subset
|
334
333
|
|
335
334
|
internal_subset.validate(self)
|
336
335
|
end
|
@@ -427,8 +426,6 @@ module Nokogiri
|
|
427
426
|
# instructions. If you have a use case and would like this functionality, please let us know
|
428
427
|
# by opening an issue or a discussion on the github project.
|
429
428
|
#
|
430
|
-
# ⚡ This is an experimental feature, available since v1.14.0
|
431
|
-
#
|
432
429
|
# *Example*
|
433
430
|
#
|
434
431
|
# doc = Nokogiri::XML.parse(<<~XML)
|
@@ -455,6 +452,8 @@ module Nokogiri
|
|
455
452
|
# doc.deconstruct_keys([:root])
|
456
453
|
# # => {:root=>nil}
|
457
454
|
#
|
455
|
+
# Since v1.14.0
|
456
|
+
#
|
458
457
|
def deconstruct_keys(keys)
|
459
458
|
{ root: root }
|
460
459
|
end
|
@@ -16,7 +16,7 @@ module Nokogiri
|
|
16
16
|
# If +ctx+ is present, it is used as a context node for the
|
17
17
|
# subtree created, e.g., namespaces will be resolved relative
|
18
18
|
# to +ctx+.
|
19
|
-
def initialize(document, tags = nil, ctx = nil, options = ParseOptions::DEFAULT_XML)
|
19
|
+
def initialize(document, tags = nil, ctx = nil, options = ParseOptions::DEFAULT_XML) # rubocop:disable Lint/MissingSuper
|
20
20
|
return self unless tags
|
21
21
|
|
22
22
|
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
@@ -154,8 +154,6 @@ module Nokogiri
|
|
154
154
|
# root elements, you should deconstruct the array returned by
|
155
155
|
# <tt>DocumentFragment#elements</tt>.
|
156
156
|
#
|
157
|
-
# ⚡ This is an experimental feature, available since v1.14.0
|
158
|
-
#
|
159
157
|
# *Example*
|
160
158
|
#
|
161
159
|
# frag = Nokogiri::HTML5.fragment(<<~HTML)
|
@@ -187,6 +185,8 @@ module Nokogiri
|
|
187
185
|
# # }),
|
188
186
|
# # #(Element:0x398 { name = "div", children = [ #(Text "End")] })]
|
189
187
|
#
|
188
|
+
# Since v1.14.0
|
189
|
+
#
|
190
190
|
def deconstruct
|
191
191
|
children.to_a
|
192
192
|
end
|
@@ -11,9 +11,11 @@ module Nokogiri
|
|
11
11
|
# ]>
|
12
12
|
# </root>
|
13
13
|
#
|
14
|
-
# ElementContent represents the tree inside the <!ELEMENT> tag shown above
|
15
|
-
#
|
14
|
+
# ElementContent represents the binary tree inside the <!ELEMENT> tag shown above that lists the
|
15
|
+
# possible content for the div1 tag.
|
16
16
|
class ElementContent
|
17
|
+
include Nokogiri::XML::PP::Node
|
18
|
+
|
17
19
|
# Possible definitions of type
|
18
20
|
PCDATA = 1
|
19
21
|
ELEMENT = 2
|
@@ -33,6 +35,12 @@ module Nokogiri
|
|
33
35
|
def children
|
34
36
|
[c1, c2].compact
|
35
37
|
end
|
38
|
+
|
39
|
+
private
|
40
|
+
|
41
|
+
def inspect_attributes
|
42
|
+
[:prefix, :name, :type, :occur, :children]
|
43
|
+
end
|
36
44
|
end
|
37
45
|
end
|
38
46
|
end
|
@@ -7,8 +7,10 @@ module Nokogiri
|
|
7
7
|
undef_method :namespace_definitions
|
8
8
|
undef_method :line if method_defined?(:line)
|
9
9
|
|
10
|
-
|
11
|
-
|
10
|
+
private
|
11
|
+
|
12
|
+
def inspect_attributes
|
13
|
+
[:to_s]
|
12
14
|
end
|
13
15
|
end
|
14
16
|
end
|
@@ -16,8 +16,6 @@ module Nokogiri
|
|
16
16
|
# - +prefix+ → (String, nil) The namespace's prefix, or +nil+ if there is no prefix (e.g., default namespace).
|
17
17
|
# - +href+ → (String) The namespace's URI
|
18
18
|
#
|
19
|
-
# ⚡ This is an experimental feature, available since v1.14.0
|
20
|
-
#
|
21
19
|
# *Example*
|
22
20
|
#
|
23
21
|
# doc = Nokogiri::XML.parse(<<~XML)
|
@@ -43,6 +41,7 @@ module Nokogiri
|
|
43
41
|
# doc.root.elements.last.namespace.deconstruct_keys([:prefix, :href])
|
44
42
|
# # => {:prefix=>"noko", :href=>"http://nokogiri.org/ns/noko"}
|
45
43
|
#
|
44
|
+
# Since v1.14.0
|
46
45
|
#
|
47
46
|
def deconstruct_keys(keys)
|
48
47
|
{ prefix: prefix, href: href }
|
@@ -62,6 +62,14 @@ module Nokogiri
|
|
62
62
|
end
|
63
63
|
|
64
64
|
alias_method :to_i, :options
|
65
|
+
|
66
|
+
def inspect
|
67
|
+
options = []
|
68
|
+
self.class.constants.each do |k|
|
69
|
+
options << k.downcase if send(:"#{k.downcase}?")
|
70
|
+
end
|
71
|
+
super.sub(/>$/, " " + options.join(", ") + ">")
|
72
|
+
end
|
65
73
|
end
|
66
74
|
end
|
67
75
|
end
|
data/lib/nokogiri/xml/node.rb
CHANGED
@@ -1049,29 +1049,35 @@ module Nokogiri
|
|
1049
1049
|
|
1050
1050
|
return Nokogiri::XML::NodeSet.new(document) if contents.empty?
|
1051
1051
|
|
1052
|
-
# libxml2 does not obey the +recover+ option after encountering errors during +in_context+
|
1053
|
-
# parsing, and so this horrible hack is here to try to emulate recovery behavior.
|
1054
|
-
#
|
1055
|
-
# Unfortunately, this means we're no longer parsing "in context" and so namespaces that
|
1056
|
-
# would have been inherited from the context node won't be handled correctly. This hack was
|
1057
|
-
# written in 2010, and I regret it, because it's silently degrading functionality in a way
|
1058
|
-
# that's not easily prevented (or even detected).
|
1059
|
-
#
|
1060
|
-
# I think preferable behavior would be to either:
|
1061
|
-
#
|
1062
|
-
# a. add an error noting that we "fell back" and pointing the user to turning off the +recover+ option
|
1063
|
-
# b. don't recover, but raise a sensible exception
|
1064
|
-
#
|
1065
|
-
# For context and background: https://github.com/sparklemotion/nokogiri/issues/313
|
1066
|
-
# FIXME bug report: https://github.com/sparklemotion/nokogiri/issues/2092
|
1067
1052
|
error_count = document.errors.length
|
1068
1053
|
node_set = in_context(contents, options.to_i)
|
1069
|
-
if
|
1070
|
-
|
1054
|
+
if document.errors.length > error_count
|
1055
|
+
raise document.errors[error_count] unless options.recover?
|
1056
|
+
|
1057
|
+
if node_set.empty?
|
1058
|
+
# libxml2 < 2.13 does not obey the +recover+ option after encountering errors during
|
1059
|
+
# +in_context+ parsing, and so this horrible hack is here to try to emulate recovery
|
1060
|
+
# behavior.
|
1061
|
+
#
|
1062
|
+
# (Note that HTML4 fragment parsing seems to have been fixed in abd74186, and XML
|
1063
|
+
# fragment parsing is fixed in 1c106edf. Both are in 2.13.)
|
1064
|
+
#
|
1065
|
+
# Unfortunately, this means we're no longer parsing "in context" and so namespaces that
|
1066
|
+
# would have been inherited from the context node won't be handled correctly. This hack
|
1067
|
+
# was written in 2010, and I regret it, because it's silently degrading functionality in
|
1068
|
+
# a way that's not easily prevented (or even detected).
|
1069
|
+
#
|
1070
|
+
# I think preferable behavior would be to either:
|
1071
|
+
#
|
1072
|
+
# a. add an error noting that we "fell back" and pointing the user to turning off the
|
1073
|
+
# +recover+ option
|
1074
|
+
# b. don't recover, but raise a sensible exception
|
1075
|
+
#
|
1076
|
+
# For context and background:
|
1077
|
+
# - https://github.com/sparklemotion/nokogiri/issues/313
|
1078
|
+
# - https://github.com/sparklemotion/nokogiri/issues/2092
|
1071
1079
|
fragment = document.related_class("DocumentFragment").parse(contents)
|
1072
1080
|
node_set = fragment.children
|
1073
|
-
else
|
1074
|
-
raise document.errors[error_count]
|
1075
1081
|
end
|
1076
1082
|
end
|
1077
1083
|
node_set
|
@@ -1165,7 +1171,7 @@ module Nokogiri
|
|
1165
1171
|
# Fetch the Nokogiri::HTML4::ElementDescription for this node. Returns
|
1166
1172
|
# nil on XML documents and on unknown tags.
|
1167
1173
|
def description
|
1168
|
-
return
|
1174
|
+
return if document.xml?
|
1169
1175
|
|
1170
1176
|
Nokogiri::HTML4::ElementDescription[name]
|
1171
1177
|
end
|
@@ -1254,8 +1260,8 @@ module Nokogiri
|
|
1254
1260
|
# Compare two Node objects with respect to their Document. Nodes from
|
1255
1261
|
# different documents cannot be compared.
|
1256
1262
|
def <=>(other)
|
1257
|
-
return
|
1258
|
-
return
|
1263
|
+
return unless other.is_a?(Nokogiri::XML::Node)
|
1264
|
+
return unless document == other.document
|
1259
1265
|
|
1260
1266
|
compare(other)
|
1261
1267
|
end
|
@@ -1269,15 +1275,16 @@ module Nokogiri
|
|
1269
1275
|
#
|
1270
1276
|
# These two statements are equivalent:
|
1271
1277
|
#
|
1272
|
-
#
|
1278
|
+
# node.serialize(encoding: 'UTF-8', save_with: FORMAT | AS_XML)
|
1273
1279
|
#
|
1274
1280
|
# or
|
1275
1281
|
#
|
1276
|
-
# node.serialize(:
|
1282
|
+
# node.serialize(encoding: 'UTF-8') do |config|
|
1277
1283
|
# config.format.as_xml
|
1278
1284
|
# end
|
1279
1285
|
#
|
1280
1286
|
def serialize(*args, &block)
|
1287
|
+
# TODO: deprecate non-hash options, see 46c68ed 2009-06-20 for context
|
1281
1288
|
options = if args.first.is_a?(Hash)
|
1282
1289
|
args.shift
|
1283
1290
|
else
|
@@ -1310,7 +1317,7 @@ module Nokogiri
|
|
1310
1317
|
###
|
1311
1318
|
# Serialize this Node to XML using +options+
|
1312
1319
|
#
|
1313
|
-
# doc.to_xml(:
|
1320
|
+
# doc.to_xml(indent: 5, encoding: 'UTF-8')
|
1314
1321
|
#
|
1315
1322
|
# See Node#write_to for a list of +options+
|
1316
1323
|
def to_xml(options = {})
|
@@ -1321,7 +1328,7 @@ module Nokogiri
|
|
1321
1328
|
###
|
1322
1329
|
# Serialize this Node to XHTML using +options+
|
1323
1330
|
#
|
1324
|
-
# doc.to_xhtml(:
|
1331
|
+
# doc.to_xhtml(indent: 5, encoding: 'UTF-8')
|
1325
1332
|
#
|
1326
1333
|
# See Node#write_to for a list of +options+
|
1327
1334
|
def to_xhtml(options = {})
|
@@ -1329,25 +1336,32 @@ module Nokogiri
|
|
1329
1336
|
end
|
1330
1337
|
|
1331
1338
|
###
|
1332
|
-
#
|
1333
|
-
#
|
1339
|
+
# :call-seq:
|
1340
|
+
# write_to(io, *options)
|
1341
|
+
#
|
1342
|
+
# Serialize this node or document to +io+.
|
1343
|
+
#
|
1344
|
+
# [Parameters]
|
1345
|
+
# - +io+ (IO) An IO-like object to which the serialized content will be written.
|
1346
|
+
# - +options+ (Hash) See below
|
1334
1347
|
#
|
1335
|
-
#
|
1336
|
-
# * +:
|
1337
|
-
# * +:
|
1338
|
-
# * +:
|
1348
|
+
# [Options]
|
1349
|
+
# * +:encoding+ (String or Encoding) specify the encoding of the output (defaults to document encoding)
|
1350
|
+
# * +:indent_text+ (String) the indentation text (defaults to <code>" "</code>)
|
1351
|
+
# * +:indent+ (Integer) the number of +:indent_text+ to use (defaults to +2+)
|
1352
|
+
# * +:save_with+ (Integer) a combination of SaveOptions constants
|
1339
1353
|
#
|
1340
1354
|
# To save with UTF-8 indented twice:
|
1341
1355
|
#
|
1342
|
-
# node.write_to(io, :
|
1356
|
+
# node.write_to(io, encoding: 'UTF-8', indent: 2)
|
1343
1357
|
#
|
1344
1358
|
# To save indented with two dashes:
|
1345
1359
|
#
|
1346
|
-
# node.write_to(io, :
|
1360
|
+
# node.write_to(io, indent_text: '-', indent: 2)
|
1347
1361
|
#
|
1348
1362
|
def write_to(io, *options)
|
1349
1363
|
options = options.first.is_a?(Hash) ? options.shift : {}
|
1350
|
-
encoding = options[:encoding] || options[0]
|
1364
|
+
encoding = options[:encoding] || options[0] || document.encoding
|
1351
1365
|
if Nokogiri.jruby?
|
1352
1366
|
save_options = options[:save_with] || options[1]
|
1353
1367
|
indent_times = options[:indent] || 0
|
@@ -1365,6 +1379,8 @@ module Nokogiri
|
|
1365
1379
|
config = SaveOptions.new(save_options.to_i)
|
1366
1380
|
yield config if block_given?
|
1367
1381
|
|
1382
|
+
encoding = encoding.is_a?(Encoding) ? encoding.name : encoding
|
1383
|
+
|
1368
1384
|
native_write_to(io, encoding, indentation, config.options)
|
1369
1385
|
end
|
1370
1386
|
|
@@ -1420,8 +1436,6 @@ module Nokogiri
|
|
1420
1436
|
# - +content+ → (String) The contents of all the text nodes in this node's subtree. See #content.
|
1421
1437
|
# - +inner_html+ → (String) The inner markup for the children of this node. See #inner_html.
|
1422
1438
|
#
|
1423
|
-
# ⚡ This is an experimental feature, available since v1.14.0
|
1424
|
-
#
|
1425
1439
|
# *Example*
|
1426
1440
|
#
|
1427
1441
|
# doc = Nokogiri::XML.parse(<<~XML)
|
@@ -1456,6 +1470,8 @@ module Nokogiri
|
|
1456
1470
|
# # value = "def"
|
1457
1471
|
# # })]}
|
1458
1472
|
#
|
1473
|
+
# Since v1.14.0
|
1474
|
+
#
|
1459
1475
|
def deconstruct_keys(keys)
|
1460
1476
|
requested_keys = DECONSTRUCT_KEYS & keys
|
1461
1477
|
{}.tap do |values|
|
@@ -372,7 +372,7 @@ module Nokogiri
|
|
372
372
|
# Removes the last element from set and returns it, or +nil+ if
|
373
373
|
# the set is empty
|
374
374
|
def pop
|
375
|
-
return
|
375
|
+
return if length == 0
|
376
376
|
|
377
377
|
delete(last)
|
378
378
|
end
|
@@ -381,7 +381,7 @@ module Nokogiri
|
|
381
381
|
# Returns the first element of the NodeSet and removes it. Returns
|
382
382
|
# +nil+ if the set is empty.
|
383
383
|
def shift
|
384
|
-
return
|
384
|
+
return if length == 0
|
385
385
|
|
386
386
|
delete(first)
|
387
387
|
end
|
@@ -435,7 +435,7 @@ module Nokogiri
|
|
435
435
|
#
|
436
436
|
# Returns the members of this NodeSet as an array, to use in pattern matching.
|
437
437
|
#
|
438
|
-
#
|
438
|
+
# Since v1.14.0
|
439
439
|
#
|
440
440
|
def deconstruct
|
441
441
|
to_a
|
data/lib/nokogiri/xml/pp/node.rb
CHANGED
@@ -13,9 +13,14 @@ module Nokogiri
|
|
13
13
|
!attribute || (attribute.respond_to?(:empty?) && attribute.empty?)
|
14
14
|
rescue NoMethodError
|
15
15
|
true
|
16
|
-
end
|
17
|
-
|
18
|
-
|
16
|
+
end
|
17
|
+
attributes = if inspect_attributes.length == 1
|
18
|
+
send(attributes.first).inspect
|
19
|
+
else
|
20
|
+
attributes.map do |attribute|
|
21
|
+
"#{attribute}=#{send(attribute).inspect}"
|
22
|
+
end.join(" ")
|
23
|
+
end
|
19
24
|
"#<#{self.class.name}:#{format("0x%x", object_id)} #{attributes}>"
|
20
25
|
end
|
21
26
|
|
@@ -23,6 +28,7 @@ module Nokogiri
|
|
23
28
|
nice_name = self.class.name.split("::").last
|
24
29
|
pp.group(2, "#(#{nice_name}:#{format("0x%x", object_id)} {", "})") do
|
25
30
|
pp.breakable
|
31
|
+
|
26
32
|
attrs = inspect_attributes.filter_map do |t|
|
27
33
|
[t, send(t)] if respond_to?(t)
|
28
34
|
end.find_all do |x|
|
@@ -35,19 +41,24 @@ module Nokogiri
|
|
35
41
|
end
|
36
42
|
end
|
37
43
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
+
if inspect_attributes.length == 1
|
45
|
+
pp.pp(attrs.first.last)
|
46
|
+
else
|
47
|
+
pp.seplist(attrs) do |v|
|
48
|
+
if COLLECTIONS.include?(v.first)
|
49
|
+
pp.group(2, "#{v.first} = [", "]") do
|
50
|
+
pp.breakable
|
51
|
+
pp.seplist(v.last) do |item|
|
52
|
+
pp.pp(item)
|
53
|
+
end
|
44
54
|
end
|
55
|
+
else
|
56
|
+
pp.text("#{v.first} = ")
|
57
|
+
pp.pp(v.last)
|
45
58
|
end
|
46
|
-
else
|
47
|
-
pp.text("#{v.first} = ")
|
48
|
-
pp.pp(v.last)
|
49
59
|
end
|
50
60
|
end
|
61
|
+
|
51
62
|
pp.breakable
|
52
63
|
end
|
53
64
|
end
|
data/lib/nokogiri/xml/reader.rb
CHANGED
@@ -3,9 +3,11 @@
|
|
3
3
|
module Nokogiri
|
4
4
|
module XML
|
5
5
|
###
|
6
|
-
# Nokogiri::XML::Reader parses an XML document similar to the way a cursor
|
7
|
-
#
|
8
|
-
#
|
6
|
+
# Nokogiri::XML::Reader parses an XML document similar to the way a cursor would move. The
|
7
|
+
# Reader is given an XML document, and yields nodes to an each block.
|
8
|
+
#
|
9
|
+
# The Reader parser might be good for when you need the speed and low memory usage of the SAX
|
10
|
+
# parser, but do not want to write a Document handler.
|
9
11
|
#
|
10
12
|
# Here is an example of usage:
|
11
13
|
#
|
@@ -22,13 +24,12 @@ module Nokogiri
|
|
22
24
|
#
|
23
25
|
# end
|
24
26
|
#
|
25
|
-
#
|
26
|
-
#
|
27
|
-
#
|
28
|
-
# need during the first iteration.
|
27
|
+
# ⚠ Nokogiri::XML::Reader#each can only be called once! Once the cursor moves through the entire
|
28
|
+
# document, you must parse the document again. It may be better to capture all information you
|
29
|
+
# need during a single iteration.
|
29
30
|
#
|
30
|
-
#
|
31
|
-
#
|
31
|
+
# ⚠ libxml2 does not support error recovery in the Reader parser. The `RECOVER` ParseOption is
|
32
|
+
# ignored. If a syntax error is encountered during parsing, an exception will be raised.
|
32
33
|
class Reader
|
33
34
|
include Enumerable
|
34
35
|
|
@@ -100,7 +100,7 @@ module Nokogiri
|
|
100
100
|
# +prefix+ is the namespace prefix for the element
|
101
101
|
# +uri+ is the associated namespace URI
|
102
102
|
# +ns+ is a hash of namespace prefix:urls associated with the element
|
103
|
-
def start_element_namespace(name, attrs = [], prefix = nil, uri = nil, ns = [])
|
103
|
+
def start_element_namespace(name, attrs = [], prefix = nil, uri = nil, ns = []) # rubocop:disable Metrics/ParameterLists
|
104
104
|
###
|
105
105
|
# Deal with SAX v1 interface
|
106
106
|
name = [prefix, name].compact.join(":")
|