nokogiri 1.14.2 → 1.16.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +19 -15
- data/README.md +4 -1
- data/dependencies.yml +9 -8
- data/ext/nokogiri/extconf.rb +69 -26
- data/ext/nokogiri/html4_document.c +1 -2
- data/ext/nokogiri/html4_element_description.c +19 -14
- data/ext/nokogiri/html4_sax_parser_context.c +10 -16
- data/ext/nokogiri/html4_sax_push_parser.c +3 -3
- data/ext/nokogiri/nokogiri.c +46 -24
- data/ext/nokogiri/nokogiri.h +23 -5
- data/ext/nokogiri/test_global_handlers.c +1 -1
- data/ext/nokogiri/xml_attr.c +1 -1
- data/ext/nokogiri/xml_cdata.c +30 -17
- data/ext/nokogiri/xml_comment.c +1 -1
- data/ext/nokogiri/xml_document.c +113 -25
- data/ext/nokogiri/xml_document_fragment.c +1 -1
- data/ext/nokogiri/xml_dtd.c +1 -1
- data/ext/nokogiri/xml_element_content.c +32 -29
- data/ext/nokogiri/xml_element_decl.c +5 -5
- data/ext/nokogiri/xml_encoding_handler.c +12 -4
- data/ext/nokogiri/xml_entity_reference.c +1 -1
- data/ext/nokogiri/xml_namespace.c +11 -16
- data/ext/nokogiri/xml_node.c +13 -16
- data/ext/nokogiri/xml_node_set.c +125 -105
- data/ext/nokogiri/xml_processing_instruction.c +1 -1
- data/ext/nokogiri/xml_reader.c +61 -74
- data/ext/nokogiri/xml_relax_ng.c +66 -79
- data/ext/nokogiri/xml_sax_parser.c +24 -5
- data/ext/nokogiri/xml_sax_parser_context.c +50 -25
- data/ext/nokogiri/xml_sax_push_parser.c +30 -9
- data/ext/nokogiri/xml_schema.c +94 -115
- data/ext/nokogiri/xml_syntax_error.c +3 -3
- data/ext/nokogiri/xml_text.c +26 -13
- data/ext/nokogiri/xml_xpath_context.c +153 -83
- data/ext/nokogiri/xslt_stylesheet.c +111 -53
- data/gumbo-parser/Makefile +18 -0
- data/gumbo-parser/src/error.c +8 -4
- data/gumbo-parser/src/foreign_attrs.c +13 -14
- data/gumbo-parser/src/foreign_attrs.gperf +1 -1
- data/gumbo-parser/src/parser.c +21 -5
- data/gumbo-parser/src/tokenizer.c +1 -0
- data/lib/nokogiri/css/parser_extras.rb +1 -1
- data/lib/nokogiri/css/xpath_visitor.rb +3 -23
- data/lib/nokogiri/extension.rb +1 -1
- data/lib/nokogiri/html4/document.rb +1 -1
- data/lib/nokogiri/html4/document_fragment.rb +1 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +1821 -353
- data/lib/nokogiri/html4/encoding_reader.rb +1 -1
- data/lib/nokogiri/html5/document_fragment.rb +1 -1
- data/lib/nokogiri/html5/node.rb +5 -0
- data/lib/nokogiri/html5.rb +0 -63
- data/lib/nokogiri/jruby/nokogiri_jars.rb +9 -9
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +6 -5
- data/lib/nokogiri/xml/attr.rb +2 -2
- data/lib/nokogiri/xml/attribute_decl.rb +4 -2
- data/lib/nokogiri/xml/document.rb +4 -5
- data/lib/nokogiri/xml/document_fragment.rb +3 -3
- data/lib/nokogiri/xml/element_content.rb +10 -2
- data/lib/nokogiri/xml/element_decl.rb +4 -2
- data/lib/nokogiri/xml/entity_decl.rb +4 -2
- data/lib/nokogiri/xml/namespace.rb +1 -2
- data/lib/nokogiri/xml/node/save_options.rb +8 -0
- data/lib/nokogiri/xml/node.rb +53 -37
- data/lib/nokogiri/xml/node_set.rb +3 -3
- data/lib/nokogiri/xml/pp/node.rb +23 -12
- data/lib/nokogiri/xml/reader.rb +10 -9
- data/lib/nokogiri/xml/sax/document.rb +1 -1
- data/lib/nokogiri/xml/searchable.rb +21 -13
- data/lib/nokogiri/xml/syntax_error.rb +1 -1
- data/lib/nokogiri/xml.rb +1 -1
- data/lib/nokogiri/xslt/stylesheet.rb +29 -7
- data/lib/nokogiri/xslt.rb +74 -4
- data/lib/nokogiri.rb +13 -5
- data/lib/xsd/xmlparser/nokogiri.rb +1 -1
- data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
- data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
- data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
- data/ports/archives/libxml2-2.12.8.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
- metadata +16 -12
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +0 -3037
- data/ports/archives/libxml2-2.10.3.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.37.tar.xz +0 -0
@@ -94,7 +94,7 @@ module Nokogiri
|
|
94
94
|
# no support for a call without len
|
95
95
|
|
96
96
|
unless @firstchunk
|
97
|
-
(@firstchunk = @io.read(len)) ||
|
97
|
+
(@firstchunk = @io.read(len)) || return
|
98
98
|
|
99
99
|
# This implementation expects that the first call from
|
100
100
|
# htmlReadIO() is made with a length long enough (~1KB) to
|
@@ -36,7 +36,7 @@ module Nokogiri
|
|
36
36
|
attr_reader :quirks_mode
|
37
37
|
|
38
38
|
# Create a document fragment.
|
39
|
-
def initialize(doc, tags = nil, ctx = nil, options = {})
|
39
|
+
def initialize(doc, tags = nil, ctx = nil, options = {}) # rubocop:disable Lint/MissingSuper
|
40
40
|
self.document = doc
|
41
41
|
self.errors = []
|
42
42
|
return self unless tags
|
data/lib/nokogiri/html5/node.rb
CHANGED
@@ -17,6 +17,9 @@
|
|
17
17
|
# limitations under the License.
|
18
18
|
#
|
19
19
|
|
20
|
+
#
|
21
|
+
# TODO: this whole file should go away. maybe make it a decorator?
|
22
|
+
#
|
20
23
|
require_relative "../xml/node"
|
21
24
|
|
22
25
|
module Nokogiri
|
@@ -50,6 +53,8 @@ module Nokogiri
|
|
50
53
|
config = XML::Node::SaveOptions.new(save_options.to_i)
|
51
54
|
yield config if block_given?
|
52
55
|
|
56
|
+
encoding = encoding.is_a?(Encoding) ? encoding.name : encoding
|
57
|
+
|
53
58
|
config_options = config.options
|
54
59
|
if config_options & (XML::Node::SaveOptions::AS_XML | XML::Node::SaveOptions::AS_XHTML) != 0
|
55
60
|
# Use Nokogiri's serializing code.
|
data/lib/nokogiri/html5.rb
CHANGED
@@ -239,20 +239,6 @@ module Nokogiri
|
|
239
239
|
DocumentFragment.parse(string, encoding, options)
|
240
240
|
end
|
241
241
|
|
242
|
-
# Fetch and parse a HTML document from the web, following redirects,
|
243
|
-
# handling https, and determining the character encoding using HTML5
|
244
|
-
# rules. +uri+ may be a +String+ or a +URI+. +options+ contains
|
245
|
-
# http headers and special options. Everything which is not a
|
246
|
-
# special option is considered a header. Special options include:
|
247
|
-
# * :follow_limit => number of redirects which are followed
|
248
|
-
# * :basic_auth => [username, password]
|
249
|
-
def get(uri, options = {})
|
250
|
-
# TODO: deprecate
|
251
|
-
warn("Nokogiri::HTML5.get is deprecated and will be removed in a future version of Nokogiri.",
|
252
|
-
uplevel: 1, category: :deprecated)
|
253
|
-
get_impl(uri, options)
|
254
|
-
end
|
255
|
-
|
256
242
|
# :nodoc:
|
257
243
|
def read_and_encode(string, encoding)
|
258
244
|
# Read the string with the given encoding.
|
@@ -280,55 +266,6 @@ module Nokogiri
|
|
280
266
|
|
281
267
|
private
|
282
268
|
|
283
|
-
def get_impl(uri, options = {})
|
284
|
-
headers = options.clone
|
285
|
-
headers = { follow_limit: headers } if Numeric === headers # deprecated
|
286
|
-
limit = headers[:follow_limit] ? headers.delete(:follow_limit).to_i : 10
|
287
|
-
|
288
|
-
require "net/http"
|
289
|
-
uri = URI(uri) unless URI === uri
|
290
|
-
|
291
|
-
http = Net::HTTP.new(uri.host, uri.port)
|
292
|
-
|
293
|
-
# TLS / SSL support
|
294
|
-
http.use_ssl = true if uri.scheme == "https"
|
295
|
-
|
296
|
-
# Pass through Net::HTTP override values, which currently include:
|
297
|
-
# :ca_file, :ca_path, :cert, :cert_store, :ciphers,
|
298
|
-
# :close_on_empty_response, :continue_timeout, :key, :open_timeout,
|
299
|
-
# :read_timeout, :ssl_timeout, :ssl_version, :use_ssl,
|
300
|
-
# :verify_callback, :verify_depth, :verify_mode
|
301
|
-
options.each do |key, _value|
|
302
|
-
http.send("#{key}=", headers.delete(key)) if http.respond_to?("#{key}=")
|
303
|
-
end
|
304
|
-
|
305
|
-
request = Net::HTTP::Get.new(uri.request_uri)
|
306
|
-
|
307
|
-
# basic authentication
|
308
|
-
auth = headers.delete(:basic_auth)
|
309
|
-
auth ||= [uri.user, uri.password] if uri.user && uri.password
|
310
|
-
request.basic_auth(auth.first, auth.last) if auth
|
311
|
-
|
312
|
-
# remaining options are treated as headers
|
313
|
-
headers.each { |key, value| request[key.to_s] = value.to_s }
|
314
|
-
|
315
|
-
response = http.request(request)
|
316
|
-
|
317
|
-
case response
|
318
|
-
when Net::HTTPSuccess
|
319
|
-
doc = parse(reencode(response.body, response["content-type"]), options)
|
320
|
-
doc.instance_variable_set(:@response, response)
|
321
|
-
doc.class.send(:attr_reader, :response)
|
322
|
-
doc
|
323
|
-
when Net::HTTPRedirection
|
324
|
-
response.value if limit <= 1
|
325
|
-
location = URI.join(uri, response["location"])
|
326
|
-
get_impl(location, options.merge(follow_limit: limit - 1))
|
327
|
-
else
|
328
|
-
response.value
|
329
|
-
end
|
330
|
-
end
|
331
|
-
|
332
269
|
# Charset sniffing is a complex and controversial topic that understandably isn't done _by
|
333
270
|
# default_ by the Ruby Net::HTTP library. This being said, it is a very real problem for
|
334
271
|
# consumers of HTML as the default for HTML is iso-8859-1, most "good" producers use utf-8, and
|
@@ -2,26 +2,26 @@
|
|
2
2
|
begin
|
3
3
|
require 'jar_dependencies'
|
4
4
|
rescue LoadError
|
5
|
-
require 'xalan/
|
5
|
+
require 'xalan/serializer/2.7.3/serializer-2.7.3.jar'
|
6
6
|
require 'net/sourceforge/htmlunit/neko-htmlunit/2.63.0/neko-htmlunit-2.63.0.jar'
|
7
7
|
require 'nu/validator/jing/20200702VNU/jing-20200702VNU.jar'
|
8
8
|
require 'xerces/xercesImpl/2.12.2/xercesImpl-2.12.2.jar'
|
9
|
-
require 'org/nokogiri/nekodtd/0.1.11.noko1/nekodtd-0.1.11.noko1.jar'
|
10
9
|
require 'net/sf/saxon/Saxon-HE/9.6.0-4/Saxon-HE-9.6.0-4.jar'
|
10
|
+
require 'xalan/xalan/2.7.3/xalan-2.7.3.jar'
|
11
11
|
require 'xml-apis/xml-apis/1.4.01/xml-apis-1.4.01.jar'
|
12
|
-
require '
|
12
|
+
require 'org/nokogiri/nekodtd/0.1.11.noko2/nekodtd-0.1.11.noko2.jar'
|
13
13
|
require 'isorelax/isorelax/20030108/isorelax-20030108.jar'
|
14
14
|
end
|
15
15
|
|
16
16
|
if defined? Jars
|
17
|
-
require_jar 'xalan', '
|
17
|
+
require_jar 'xalan', 'serializer', '2.7.3'
|
18
18
|
require_jar 'net.sourceforge.htmlunit', 'neko-htmlunit', '2.63.0'
|
19
19
|
require_jar 'nu.validator', 'jing', '20200702VNU'
|
20
20
|
require_jar 'xerces', 'xercesImpl', '2.12.2'
|
21
|
-
require_jar 'org.nokogiri', 'nekodtd', '0.1.11.noko1'
|
22
21
|
require_jar 'net.sf.saxon', 'Saxon-HE', '9.6.0-4'
|
22
|
+
require_jar 'xalan', 'xalan', '2.7.3'
|
23
23
|
require_jar 'xml-apis', 'xml-apis', '1.4.01'
|
24
|
-
require_jar '
|
24
|
+
require_jar 'org.nokogiri', 'nekodtd', '0.1.11.noko2'
|
25
25
|
require_jar 'isorelax', 'isorelax', '20030108'
|
26
26
|
end
|
27
27
|
|
@@ -32,9 +32,9 @@ module Nokogiri
|
|
32
32
|
"net.sf.saxon:Saxon-HE" => "9.6.0-4",
|
33
33
|
"net.sourceforge.htmlunit:neko-htmlunit" => "2.63.0",
|
34
34
|
"nu.validator:jing" => "20200702VNU",
|
35
|
-
"org.nokogiri:nekodtd" => "0.1.11.
|
36
|
-
"xalan:serializer" => "2.7.
|
37
|
-
"xalan:xalan" => "2.7.
|
35
|
+
"org.nokogiri:nekodtd" => "0.1.11.noko2",
|
36
|
+
"xalan:serializer" => "2.7.3",
|
37
|
+
"xalan:xalan" => "2.7.3",
|
38
38
|
"xerces:xercesImpl" => "2.12.2",
|
39
39
|
"xml-apis:xml-apis" => "1.4.01",
|
40
40
|
}.freeze
|
@@ -94,11 +94,14 @@ module Nokogiri
|
|
94
94
|
nokogiri["version"] = Nokogiri::VERSION
|
95
95
|
|
96
96
|
unless jruby?
|
97
|
-
# enable gems
|
97
|
+
# enable gems to build against Nokogiri with the following in their extconf.rb:
|
98
98
|
#
|
99
99
|
# append_cflags(Nokogiri::VERSION_INFO["nokogiri"]["cppflags"])
|
100
100
|
# append_ldflags(Nokogiri::VERSION_INFO["nokogiri"]["ldflags"])
|
101
101
|
#
|
102
|
+
# though, this won't work on all platform and versions of Ruby, and won't be supported
|
103
|
+
# forever, see https://github.com/sparklemotion/nokogiri/discussions/2746 for context.
|
104
|
+
#
|
102
105
|
cppflags = ["-I#{header_directory.shellescape}"]
|
103
106
|
ldflags = []
|
104
107
|
|
@@ -108,7 +111,8 @@ module Nokogiri
|
|
108
111
|
end
|
109
112
|
|
110
113
|
if windows?
|
111
|
-
# on windows,
|
114
|
+
# on windows, third party libraries that wish to link against nokogiri
|
115
|
+
# should link against nokogiri.so to resolve symbols. see #2167
|
112
116
|
lib_directory = File.expand_path(File.join(File.dirname(__FILE__), "../#{ruby_minor}"))
|
113
117
|
unless File.exist?(lib_directory)
|
114
118
|
lib_directory = File.expand_path(File.join(File.dirname(__FILE__), ".."))
|
@@ -136,9 +140,6 @@ module Nokogiri
|
|
136
140
|
libxml["source"] = "packaged"
|
137
141
|
libxml["precompiled"] = libxml2_precompiled?
|
138
142
|
libxml["patches"] = Nokogiri::LIBXML2_PATCHES
|
139
|
-
|
140
|
-
# this is for nokogumbo and shouldn't be forever
|
141
|
-
libxml["libxml2_path"] = header_directory
|
142
143
|
else
|
143
144
|
libxml["source"] = "system"
|
144
145
|
end
|
data/lib/nokogiri/xml/attr.rb
CHANGED
@@ -18,8 +18,6 @@ module Nokogiri
|
|
18
18
|
# - +value+ → (String) The value of the attribute.
|
19
19
|
# - +namespace+ → (Namespace, nil) The Namespace of the attribute, or +nil+ if there is no namespace.
|
20
20
|
#
|
21
|
-
# ⚡ This is an experimental feature, available since v1.14.0
|
22
|
-
#
|
23
21
|
# *Example*
|
24
22
|
#
|
25
23
|
# doc = Nokogiri::XML.parse(<<~XML)
|
@@ -52,6 +50,8 @@ module Nokogiri
|
|
52
50
|
# # href = "http://nokogiri.org/ns/noko"
|
53
51
|
# # })}
|
54
52
|
#
|
53
|
+
# Since v1.14.0
|
54
|
+
#
|
55
55
|
def deconstruct_keys(keys)
|
56
56
|
{ name: name, value: value, namespace: namespace }
|
57
57
|
end
|
@@ -12,8 +12,10 @@ module Nokogiri
|
|
12
12
|
undef_method :namespace_definitions
|
13
13
|
undef_method :line if method_defined?(:line)
|
14
14
|
|
15
|
-
|
16
|
-
|
15
|
+
private
|
16
|
+
|
17
|
+
def inspect_attributes
|
18
|
+
[:to_s]
|
17
19
|
end
|
18
20
|
end
|
19
21
|
end
|
@@ -174,8 +174,7 @@ module Nokogiri
|
|
174
174
|
# Since v1.12.4
|
175
175
|
attr_accessor :namespace_inheritance
|
176
176
|
|
177
|
-
# :nodoc:
|
178
|
-
def initialize(*args) # rubocop:disable Lint/MissingSuper
|
177
|
+
def initialize(*args) # :nodoc: # rubocop:disable Lint/MissingSuper
|
179
178
|
@errors = []
|
180
179
|
@decorators = nil
|
181
180
|
@namespace_inheritance = false
|
@@ -330,7 +329,7 @@ module Nokogiri
|
|
330
329
|
# Validate this Document against it's DTD. Returns a list of errors on
|
331
330
|
# the document or +nil+ when there is no DTD.
|
332
331
|
def validate
|
333
|
-
return
|
332
|
+
return unless internal_subset
|
334
333
|
|
335
334
|
internal_subset.validate(self)
|
336
335
|
end
|
@@ -427,8 +426,6 @@ module Nokogiri
|
|
427
426
|
# instructions. If you have a use case and would like this functionality, please let us know
|
428
427
|
# by opening an issue or a discussion on the github project.
|
429
428
|
#
|
430
|
-
# ⚡ This is an experimental feature, available since v1.14.0
|
431
|
-
#
|
432
429
|
# *Example*
|
433
430
|
#
|
434
431
|
# doc = Nokogiri::XML.parse(<<~XML)
|
@@ -455,6 +452,8 @@ module Nokogiri
|
|
455
452
|
# doc.deconstruct_keys([:root])
|
456
453
|
# # => {:root=>nil}
|
457
454
|
#
|
455
|
+
# Since v1.14.0
|
456
|
+
#
|
458
457
|
def deconstruct_keys(keys)
|
459
458
|
{ root: root }
|
460
459
|
end
|
@@ -16,7 +16,7 @@ module Nokogiri
|
|
16
16
|
# If +ctx+ is present, it is used as a context node for the
|
17
17
|
# subtree created, e.g., namespaces will be resolved relative
|
18
18
|
# to +ctx+.
|
19
|
-
def initialize(document, tags = nil, ctx = nil, options = ParseOptions::DEFAULT_XML)
|
19
|
+
def initialize(document, tags = nil, ctx = nil, options = ParseOptions::DEFAULT_XML) # rubocop:disable Lint/MissingSuper
|
20
20
|
return self unless tags
|
21
21
|
|
22
22
|
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
@@ -154,8 +154,6 @@ module Nokogiri
|
|
154
154
|
# root elements, you should deconstruct the array returned by
|
155
155
|
# <tt>DocumentFragment#elements</tt>.
|
156
156
|
#
|
157
|
-
# ⚡ This is an experimental feature, available since v1.14.0
|
158
|
-
#
|
159
157
|
# *Example*
|
160
158
|
#
|
161
159
|
# frag = Nokogiri::HTML5.fragment(<<~HTML)
|
@@ -187,6 +185,8 @@ module Nokogiri
|
|
187
185
|
# # }),
|
188
186
|
# # #(Element:0x398 { name = "div", children = [ #(Text "End")] })]
|
189
187
|
#
|
188
|
+
# Since v1.14.0
|
189
|
+
#
|
190
190
|
def deconstruct
|
191
191
|
children.to_a
|
192
192
|
end
|
@@ -11,9 +11,11 @@ module Nokogiri
|
|
11
11
|
# ]>
|
12
12
|
# </root>
|
13
13
|
#
|
14
|
-
# ElementContent represents the tree inside the <!ELEMENT> tag shown above
|
15
|
-
#
|
14
|
+
# ElementContent represents the binary tree inside the <!ELEMENT> tag shown above that lists the
|
15
|
+
# possible content for the div1 tag.
|
16
16
|
class ElementContent
|
17
|
+
include Nokogiri::XML::PP::Node
|
18
|
+
|
17
19
|
# Possible definitions of type
|
18
20
|
PCDATA = 1
|
19
21
|
ELEMENT = 2
|
@@ -33,6 +35,12 @@ module Nokogiri
|
|
33
35
|
def children
|
34
36
|
[c1, c2].compact
|
35
37
|
end
|
38
|
+
|
39
|
+
private
|
40
|
+
|
41
|
+
def inspect_attributes
|
42
|
+
[:prefix, :name, :type, :occur, :children]
|
43
|
+
end
|
36
44
|
end
|
37
45
|
end
|
38
46
|
end
|
@@ -7,8 +7,10 @@ module Nokogiri
|
|
7
7
|
undef_method :namespace_definitions
|
8
8
|
undef_method :line if method_defined?(:line)
|
9
9
|
|
10
|
-
|
11
|
-
|
10
|
+
private
|
11
|
+
|
12
|
+
def inspect_attributes
|
13
|
+
[:to_s]
|
12
14
|
end
|
13
15
|
end
|
14
16
|
end
|
@@ -16,8 +16,6 @@ module Nokogiri
|
|
16
16
|
# - +prefix+ → (String, nil) The namespace's prefix, or +nil+ if there is no prefix (e.g., default namespace).
|
17
17
|
# - +href+ → (String) The namespace's URI
|
18
18
|
#
|
19
|
-
# ⚡ This is an experimental feature, available since v1.14.0
|
20
|
-
#
|
21
19
|
# *Example*
|
22
20
|
#
|
23
21
|
# doc = Nokogiri::XML.parse(<<~XML)
|
@@ -43,6 +41,7 @@ module Nokogiri
|
|
43
41
|
# doc.root.elements.last.namespace.deconstruct_keys([:prefix, :href])
|
44
42
|
# # => {:prefix=>"noko", :href=>"http://nokogiri.org/ns/noko"}
|
45
43
|
#
|
44
|
+
# Since v1.14.0
|
46
45
|
#
|
47
46
|
def deconstruct_keys(keys)
|
48
47
|
{ prefix: prefix, href: href }
|
@@ -62,6 +62,14 @@ module Nokogiri
|
|
62
62
|
end
|
63
63
|
|
64
64
|
alias_method :to_i, :options
|
65
|
+
|
66
|
+
def inspect
|
67
|
+
options = []
|
68
|
+
self.class.constants.each do |k|
|
69
|
+
options << k.downcase if send(:"#{k.downcase}?")
|
70
|
+
end
|
71
|
+
super.sub(/>$/, " " + options.join(", ") + ">")
|
72
|
+
end
|
65
73
|
end
|
66
74
|
end
|
67
75
|
end
|
data/lib/nokogiri/xml/node.rb
CHANGED
@@ -1049,29 +1049,35 @@ module Nokogiri
|
|
1049
1049
|
|
1050
1050
|
return Nokogiri::XML::NodeSet.new(document) if contents.empty?
|
1051
1051
|
|
1052
|
-
# libxml2 does not obey the +recover+ option after encountering errors during +in_context+
|
1053
|
-
# parsing, and so this horrible hack is here to try to emulate recovery behavior.
|
1054
|
-
#
|
1055
|
-
# Unfortunately, this means we're no longer parsing "in context" and so namespaces that
|
1056
|
-
# would have been inherited from the context node won't be handled correctly. This hack was
|
1057
|
-
# written in 2010, and I regret it, because it's silently degrading functionality in a way
|
1058
|
-
# that's not easily prevented (or even detected).
|
1059
|
-
#
|
1060
|
-
# I think preferable behavior would be to either:
|
1061
|
-
#
|
1062
|
-
# a. add an error noting that we "fell back" and pointing the user to turning off the +recover+ option
|
1063
|
-
# b. don't recover, but raise a sensible exception
|
1064
|
-
#
|
1065
|
-
# For context and background: https://github.com/sparklemotion/nokogiri/issues/313
|
1066
|
-
# FIXME bug report: https://github.com/sparklemotion/nokogiri/issues/2092
|
1067
1052
|
error_count = document.errors.length
|
1068
1053
|
node_set = in_context(contents, options.to_i)
|
1069
|
-
if
|
1070
|
-
|
1054
|
+
if document.errors.length > error_count
|
1055
|
+
raise document.errors[error_count] unless options.recover?
|
1056
|
+
|
1057
|
+
if node_set.empty?
|
1058
|
+
# libxml2 < 2.13 does not obey the +recover+ option after encountering errors during
|
1059
|
+
# +in_context+ parsing, and so this horrible hack is here to try to emulate recovery
|
1060
|
+
# behavior.
|
1061
|
+
#
|
1062
|
+
# (Note that HTML4 fragment parsing seems to have been fixed in abd74186, and XML
|
1063
|
+
# fragment parsing is fixed in 1c106edf. Both are in 2.13.)
|
1064
|
+
#
|
1065
|
+
# Unfortunately, this means we're no longer parsing "in context" and so namespaces that
|
1066
|
+
# would have been inherited from the context node won't be handled correctly. This hack
|
1067
|
+
# was written in 2010, and I regret it, because it's silently degrading functionality in
|
1068
|
+
# a way that's not easily prevented (or even detected).
|
1069
|
+
#
|
1070
|
+
# I think preferable behavior would be to either:
|
1071
|
+
#
|
1072
|
+
# a. add an error noting that we "fell back" and pointing the user to turning off the
|
1073
|
+
# +recover+ option
|
1074
|
+
# b. don't recover, but raise a sensible exception
|
1075
|
+
#
|
1076
|
+
# For context and background:
|
1077
|
+
# - https://github.com/sparklemotion/nokogiri/issues/313
|
1078
|
+
# - https://github.com/sparklemotion/nokogiri/issues/2092
|
1071
1079
|
fragment = document.related_class("DocumentFragment").parse(contents)
|
1072
1080
|
node_set = fragment.children
|
1073
|
-
else
|
1074
|
-
raise document.errors[error_count]
|
1075
1081
|
end
|
1076
1082
|
end
|
1077
1083
|
node_set
|
@@ -1165,7 +1171,7 @@ module Nokogiri
|
|
1165
1171
|
# Fetch the Nokogiri::HTML4::ElementDescription for this node. Returns
|
1166
1172
|
# nil on XML documents and on unknown tags.
|
1167
1173
|
def description
|
1168
|
-
return
|
1174
|
+
return if document.xml?
|
1169
1175
|
|
1170
1176
|
Nokogiri::HTML4::ElementDescription[name]
|
1171
1177
|
end
|
@@ -1254,8 +1260,8 @@ module Nokogiri
|
|
1254
1260
|
# Compare two Node objects with respect to their Document. Nodes from
|
1255
1261
|
# different documents cannot be compared.
|
1256
1262
|
def <=>(other)
|
1257
|
-
return
|
1258
|
-
return
|
1263
|
+
return unless other.is_a?(Nokogiri::XML::Node)
|
1264
|
+
return unless document == other.document
|
1259
1265
|
|
1260
1266
|
compare(other)
|
1261
1267
|
end
|
@@ -1269,15 +1275,16 @@ module Nokogiri
|
|
1269
1275
|
#
|
1270
1276
|
# These two statements are equivalent:
|
1271
1277
|
#
|
1272
|
-
#
|
1278
|
+
# node.serialize(encoding: 'UTF-8', save_with: FORMAT | AS_XML)
|
1273
1279
|
#
|
1274
1280
|
# or
|
1275
1281
|
#
|
1276
|
-
# node.serialize(:
|
1282
|
+
# node.serialize(encoding: 'UTF-8') do |config|
|
1277
1283
|
# config.format.as_xml
|
1278
1284
|
# end
|
1279
1285
|
#
|
1280
1286
|
def serialize(*args, &block)
|
1287
|
+
# TODO: deprecate non-hash options, see 46c68ed 2009-06-20 for context
|
1281
1288
|
options = if args.first.is_a?(Hash)
|
1282
1289
|
args.shift
|
1283
1290
|
else
|
@@ -1310,7 +1317,7 @@ module Nokogiri
|
|
1310
1317
|
###
|
1311
1318
|
# Serialize this Node to XML using +options+
|
1312
1319
|
#
|
1313
|
-
# doc.to_xml(:
|
1320
|
+
# doc.to_xml(indent: 5, encoding: 'UTF-8')
|
1314
1321
|
#
|
1315
1322
|
# See Node#write_to for a list of +options+
|
1316
1323
|
def to_xml(options = {})
|
@@ -1321,7 +1328,7 @@ module Nokogiri
|
|
1321
1328
|
###
|
1322
1329
|
# Serialize this Node to XHTML using +options+
|
1323
1330
|
#
|
1324
|
-
# doc.to_xhtml(:
|
1331
|
+
# doc.to_xhtml(indent: 5, encoding: 'UTF-8')
|
1325
1332
|
#
|
1326
1333
|
# See Node#write_to for a list of +options+
|
1327
1334
|
def to_xhtml(options = {})
|
@@ -1329,25 +1336,32 @@ module Nokogiri
|
|
1329
1336
|
end
|
1330
1337
|
|
1331
1338
|
###
|
1332
|
-
#
|
1333
|
-
#
|
1339
|
+
# :call-seq:
|
1340
|
+
# write_to(io, *options)
|
1341
|
+
#
|
1342
|
+
# Serialize this node or document to +io+.
|
1343
|
+
#
|
1344
|
+
# [Parameters]
|
1345
|
+
# - +io+ (IO) An IO-like object to which the serialized content will be written.
|
1346
|
+
# - +options+ (Hash) See below
|
1334
1347
|
#
|
1335
|
-
#
|
1336
|
-
# * +:
|
1337
|
-
# * +:
|
1338
|
-
# * +:
|
1348
|
+
# [Options]
|
1349
|
+
# * +:encoding+ (String or Encoding) specify the encoding of the output (defaults to document encoding)
|
1350
|
+
# * +:indent_text+ (String) the indentation text (defaults to <code>" "</code>)
|
1351
|
+
# * +:indent+ (Integer) the number of +:indent_text+ to use (defaults to +2+)
|
1352
|
+
# * +:save_with+ (Integer) a combination of SaveOptions constants
|
1339
1353
|
#
|
1340
1354
|
# To save with UTF-8 indented twice:
|
1341
1355
|
#
|
1342
|
-
# node.write_to(io, :
|
1356
|
+
# node.write_to(io, encoding: 'UTF-8', indent: 2)
|
1343
1357
|
#
|
1344
1358
|
# To save indented with two dashes:
|
1345
1359
|
#
|
1346
|
-
# node.write_to(io, :
|
1360
|
+
# node.write_to(io, indent_text: '-', indent: 2)
|
1347
1361
|
#
|
1348
1362
|
def write_to(io, *options)
|
1349
1363
|
options = options.first.is_a?(Hash) ? options.shift : {}
|
1350
|
-
encoding = options[:encoding] || options[0]
|
1364
|
+
encoding = options[:encoding] || options[0] || document.encoding
|
1351
1365
|
if Nokogiri.jruby?
|
1352
1366
|
save_options = options[:save_with] || options[1]
|
1353
1367
|
indent_times = options[:indent] || 0
|
@@ -1365,6 +1379,8 @@ module Nokogiri
|
|
1365
1379
|
config = SaveOptions.new(save_options.to_i)
|
1366
1380
|
yield config if block_given?
|
1367
1381
|
|
1382
|
+
encoding = encoding.is_a?(Encoding) ? encoding.name : encoding
|
1383
|
+
|
1368
1384
|
native_write_to(io, encoding, indentation, config.options)
|
1369
1385
|
end
|
1370
1386
|
|
@@ -1420,8 +1436,6 @@ module Nokogiri
|
|
1420
1436
|
# - +content+ → (String) The contents of all the text nodes in this node's subtree. See #content.
|
1421
1437
|
# - +inner_html+ → (String) The inner markup for the children of this node. See #inner_html.
|
1422
1438
|
#
|
1423
|
-
# ⚡ This is an experimental feature, available since v1.14.0
|
1424
|
-
#
|
1425
1439
|
# *Example*
|
1426
1440
|
#
|
1427
1441
|
# doc = Nokogiri::XML.parse(<<~XML)
|
@@ -1456,6 +1470,8 @@ module Nokogiri
|
|
1456
1470
|
# # value = "def"
|
1457
1471
|
# # })]}
|
1458
1472
|
#
|
1473
|
+
# Since v1.14.0
|
1474
|
+
#
|
1459
1475
|
def deconstruct_keys(keys)
|
1460
1476
|
requested_keys = DECONSTRUCT_KEYS & keys
|
1461
1477
|
{}.tap do |values|
|
@@ -372,7 +372,7 @@ module Nokogiri
|
|
372
372
|
# Removes the last element from set and returns it, or +nil+ if
|
373
373
|
# the set is empty
|
374
374
|
def pop
|
375
|
-
return
|
375
|
+
return if length == 0
|
376
376
|
|
377
377
|
delete(last)
|
378
378
|
end
|
@@ -381,7 +381,7 @@ module Nokogiri
|
|
381
381
|
# Returns the first element of the NodeSet and removes it. Returns
|
382
382
|
# +nil+ if the set is empty.
|
383
383
|
def shift
|
384
|
-
return
|
384
|
+
return if length == 0
|
385
385
|
|
386
386
|
delete(first)
|
387
387
|
end
|
@@ -435,7 +435,7 @@ module Nokogiri
|
|
435
435
|
#
|
436
436
|
# Returns the members of this NodeSet as an array, to use in pattern matching.
|
437
437
|
#
|
438
|
-
#
|
438
|
+
# Since v1.14.0
|
439
439
|
#
|
440
440
|
def deconstruct
|
441
441
|
to_a
|
data/lib/nokogiri/xml/pp/node.rb
CHANGED
@@ -13,9 +13,14 @@ module Nokogiri
|
|
13
13
|
!attribute || (attribute.respond_to?(:empty?) && attribute.empty?)
|
14
14
|
rescue NoMethodError
|
15
15
|
true
|
16
|
-
end
|
17
|
-
|
18
|
-
|
16
|
+
end
|
17
|
+
attributes = if inspect_attributes.length == 1
|
18
|
+
send(attributes.first).inspect
|
19
|
+
else
|
20
|
+
attributes.map do |attribute|
|
21
|
+
"#{attribute}=#{send(attribute).inspect}"
|
22
|
+
end.join(" ")
|
23
|
+
end
|
19
24
|
"#<#{self.class.name}:#{format("0x%x", object_id)} #{attributes}>"
|
20
25
|
end
|
21
26
|
|
@@ -23,6 +28,7 @@ module Nokogiri
|
|
23
28
|
nice_name = self.class.name.split("::").last
|
24
29
|
pp.group(2, "#(#{nice_name}:#{format("0x%x", object_id)} {", "})") do
|
25
30
|
pp.breakable
|
31
|
+
|
26
32
|
attrs = inspect_attributes.filter_map do |t|
|
27
33
|
[t, send(t)] if respond_to?(t)
|
28
34
|
end.find_all do |x|
|
@@ -35,19 +41,24 @@ module Nokogiri
|
|
35
41
|
end
|
36
42
|
end
|
37
43
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
+
if inspect_attributes.length == 1
|
45
|
+
pp.pp(attrs.first.last)
|
46
|
+
else
|
47
|
+
pp.seplist(attrs) do |v|
|
48
|
+
if COLLECTIONS.include?(v.first)
|
49
|
+
pp.group(2, "#{v.first} = [", "]") do
|
50
|
+
pp.breakable
|
51
|
+
pp.seplist(v.last) do |item|
|
52
|
+
pp.pp(item)
|
53
|
+
end
|
44
54
|
end
|
55
|
+
else
|
56
|
+
pp.text("#{v.first} = ")
|
57
|
+
pp.pp(v.last)
|
45
58
|
end
|
46
|
-
else
|
47
|
-
pp.text("#{v.first} = ")
|
48
|
-
pp.pp(v.last)
|
49
59
|
end
|
50
60
|
end
|
61
|
+
|
51
62
|
pp.breakable
|
52
63
|
end
|
53
64
|
end
|