nokogiri 1.15.2 → 1.16.2
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +17 -14
- data/README.md +4 -1
- data/dependencies.yml +9 -8
- data/ext/nokogiri/extconf.rb +3 -4
- data/ext/nokogiri/html4_sax_push_parser.c +1 -1
- data/ext/nokogiri/nokogiri.h +10 -3
- data/ext/nokogiri/test_global_handlers.c +1 -1
- data/ext/nokogiri/xml_cdata.c +28 -23
- data/ext/nokogiri/xml_document.c +13 -5
- data/ext/nokogiri/xml_namespace.c +0 -4
- data/ext/nokogiri/xml_node.c +6 -9
- data/ext/nokogiri/xml_reader.c +25 -48
- data/ext/nokogiri/xml_relax_ng.c +1 -1
- data/ext/nokogiri/xml_sax_parser_context.c +4 -0
- data/ext/nokogiri/xml_sax_push_parser.c +1 -1
- data/ext/nokogiri/xml_schema.c +13 -8
- data/ext/nokogiri/xml_syntax_error.c +3 -3
- data/ext/nokogiri/xml_text.c +24 -19
- data/ext/nokogiri/xml_xpath_context.c +2 -5
- data/ext/nokogiri/xslt_stylesheet.c +8 -3
- data/gumbo-parser/Makefile +18 -0
- data/gumbo-parser/src/error.c +1 -1
- data/gumbo-parser/src/parser.c +8 -5
- data/gumbo-parser/src/tokenizer.c +1 -0
- data/lib/nokogiri/css/parser_extras.rb +1 -1
- data/lib/nokogiri/css/xpath_visitor.rb +1 -21
- data/lib/nokogiri/html4/document.rb +1 -1
- data/lib/nokogiri/html4/encoding_reader.rb +1 -1
- data/lib/nokogiri/html5.rb +0 -66
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +6 -5
- data/lib/nokogiri/xml/attr.rb +2 -2
- data/lib/nokogiri/xml/document.rb +4 -5
- data/lib/nokogiri/xml/document_fragment.rb +2 -2
- data/lib/nokogiri/xml/namespace.rb +1 -2
- data/lib/nokogiri/xml/node.rb +31 -24
- data/lib/nokogiri/xml/node_set.rb +3 -3
- data/lib/nokogiri/xml/reader.rb +10 -9
- data/lib/nokogiri/xml/searchable.rb +3 -3
- data/lib/nokogiri/xml/syntax_error.rb +1 -1
- data/lib/nokogiri/xml.rb +1 -1
- data/lib/nokogiri/xslt/stylesheet.rb +29 -7
- data/lib/nokogiri/xslt.rb +1 -1
- data/lib/nokogiri.rb +1 -1
- data/ports/archives/libxml2-2.12.5.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
- metadata +6 -6
- data/ports/archives/libxml2-2.11.4.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.38.tar.xz +0 -0
data/ext/nokogiri/xml_text.c
CHANGED
@@ -9,33 +9,38 @@ VALUE cNokogiriXmlText ;
|
|
9
9
|
* Create a new Text element on the +document+ with +content+
|
10
10
|
*/
|
11
11
|
static VALUE
|
12
|
-
|
12
|
+
rb_xml_text_s_new(int argc, VALUE *argv, VALUE klass)
|
13
13
|
{
|
14
|
-
xmlDocPtr
|
15
|
-
xmlNodePtr
|
16
|
-
VALUE
|
17
|
-
VALUE
|
18
|
-
VALUE
|
14
|
+
xmlDocPtr c_document;
|
15
|
+
xmlNodePtr c_node;
|
16
|
+
VALUE rb_string;
|
17
|
+
VALUE rb_document;
|
18
|
+
VALUE rb_rest;
|
19
19
|
VALUE rb_node;
|
20
20
|
|
21
|
-
rb_scan_args(argc, argv, "2*", &
|
21
|
+
rb_scan_args(argc, argv, "2*", &rb_string, &rb_document, &rb_rest);
|
22
22
|
|
23
|
-
if (rb_obj_is_kind_of(
|
24
|
-
|
25
|
-
|
23
|
+
if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlNode)) {
|
24
|
+
rb_raise(rb_eTypeError,
|
25
|
+
"expected second parameter to be a Nokogiri::XML::Document, received %"PRIsVALUE,
|
26
|
+
rb_obj_class(rb_document));
|
27
|
+
}
|
28
|
+
|
29
|
+
if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlDocument)) {
|
26
30
|
xmlNodePtr deprecated_node_type_arg;
|
27
|
-
// TODO:
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
+
NOKO_WARN_DEPRECATION("Passing a Node as the second parameter to Text.new is deprecated. Please pass a Document instead. This will become an error in Nokogiri v1.17.0."); // TODO: deprecated in v1.15.3, remove in v1.17.0
|
32
|
+
Noko_Node_Get_Struct(rb_document, xmlNode, deprecated_node_type_arg);
|
33
|
+
c_document = deprecated_node_type_arg->doc;
|
34
|
+
} else {
|
35
|
+
c_document = noko_xml_document_unwrap(rb_document);
|
31
36
|
}
|
32
37
|
|
33
|
-
|
34
|
-
|
38
|
+
c_node = xmlNewText((xmlChar *)StringValueCStr(rb_string));
|
39
|
+
c_node->doc = c_document;
|
35
40
|
|
36
|
-
noko_xml_document_pin_node(
|
41
|
+
noko_xml_document_pin_node(c_node);
|
37
42
|
|
38
|
-
rb_node = noko_xml_node_wrap(klass,
|
43
|
+
rb_node = noko_xml_node_wrap(klass, c_node) ;
|
39
44
|
rb_obj_call_init(rb_node, argc, argv);
|
40
45
|
|
41
46
|
if (rb_block_given_p()) { rb_yield(rb_node); }
|
@@ -52,5 +57,5 @@ noko_init_xml_text(void)
|
|
52
57
|
*/
|
53
58
|
cNokogiriXmlText = rb_define_class_under(mNokogiriXml, "Text", cNokogiriXmlCharacterData);
|
54
59
|
|
55
|
-
rb_define_singleton_method(cNokogiriXmlText, "new",
|
60
|
+
rb_define_singleton_method(cNokogiriXmlText, "new", rb_xml_text_s_new, -1);
|
56
61
|
}
|
@@ -321,11 +321,8 @@ handler_lookup(void *data, const xmlChar *c_name, const xmlChar *c_ns_uri)
|
|
321
321
|
VALUE rb_handler = (VALUE)data;
|
322
322
|
if (rb_respond_to(rb_handler, rb_intern((const char *)c_name))) {
|
323
323
|
if (c_ns_uri == NULL) {
|
324
|
-
NOKO_WARN_DEPRECATION(
|
325
|
-
|
326
|
-
" Please update your query to reference this function as 'nokogiri:%s'."
|
327
|
-
" Invoking custom handler functions without a namespace is deprecated and support will be removed in a future release of Nokogiri.",
|
328
|
-
c_name, c_name);
|
324
|
+
NOKO_WARN_DEPRECATION("A custom XPath or CSS handler function named '%s' is being invoked without a namespace. Please update your query to reference this function as 'nokogiri:%s'. Invoking custom handler functions without a namespace is deprecated and will become an error in Nokogiri v1.17.0.",
|
325
|
+
c_name, c_name); // deprecated in v1.15.0, remove in v1.17.0
|
329
326
|
}
|
330
327
|
return method_caller;
|
331
328
|
}
|
@@ -71,7 +71,12 @@ Nokogiri_wrap_xslt_stylesheet(xsltStylesheetPtr ss)
|
|
71
71
|
* call-seq:
|
72
72
|
* parse_stylesheet_doc(document)
|
73
73
|
*
|
74
|
-
* Parse
|
74
|
+
* Parse an XSLT::Stylesheet from +document+.
|
75
|
+
*
|
76
|
+
* [Parameters]
|
77
|
+
* - +document+ (Nokogiri::XML::Document) the document to be parsed.
|
78
|
+
*
|
79
|
+
* [Returns] Nokogiri::XSLT::Stylesheet
|
75
80
|
*/
|
76
81
|
static VALUE
|
77
82
|
parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
|
@@ -104,7 +109,7 @@ parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
|
|
104
109
|
* call-seq:
|
105
110
|
* serialize(document)
|
106
111
|
*
|
107
|
-
* Serialize +document+ to an xml string.
|
112
|
+
* Serialize +document+ to an xml string, as specified by the +method+ parameter in the Stylesheet.
|
108
113
|
*/
|
109
114
|
static VALUE
|
110
115
|
rb_xslt_stylesheet_serialize(VALUE self, VALUE xmlobj)
|
@@ -133,7 +138,7 @@ rb_xslt_stylesheet_serialize(VALUE self, VALUE xmlobj)
|
|
133
138
|
* transform(document)
|
134
139
|
* transform(document, params = {})
|
135
140
|
*
|
136
|
-
*
|
141
|
+
* Transform an XML::Document as defined by an XSLT::Stylesheet.
|
137
142
|
*
|
138
143
|
* [Parameters]
|
139
144
|
* - +document+ (Nokogiri::XML::Document) the document to be transformed.
|
data/gumbo-parser/Makefile
CHANGED
@@ -13,6 +13,23 @@ LDFLAGS := -pthread
|
|
13
13
|
|
14
14
|
all: check
|
15
15
|
|
16
|
+
oss-fuzz:
|
17
|
+
./fuzzer/build-ossfuzz.sh
|
18
|
+
|
19
|
+
fuzzers: fuzzer-normal fuzzer-asan fuzzer-ubsan fuzzer-msan
|
20
|
+
|
21
|
+
fuzzer-normal:
|
22
|
+
./fuzzer/build.sh
|
23
|
+
|
24
|
+
fuzzer-asan:
|
25
|
+
SANITIZER=asan ./fuzzer/build.sh
|
26
|
+
|
27
|
+
fuzzer-ubsan:
|
28
|
+
SANITIZER=ubsan ./fuzzer/build.sh
|
29
|
+
|
30
|
+
fuzzer-msan:
|
31
|
+
SANITIZER=msan ./fuzzer/build.sh
|
32
|
+
|
16
33
|
# don't try to regenerate ragel or gperf files in CI, that should be a development-only action and
|
17
34
|
# the generated files should be committed to SCM
|
18
35
|
ifneq ($(CI),true)
|
@@ -81,6 +98,7 @@ coverage:
|
|
81
98
|
|
82
99
|
clean:
|
83
100
|
$(RM) -r build
|
101
|
+
$(RM) -r fuzzer/build fuzzer/src-* fuzzer/gumbo_corpus
|
84
102
|
|
85
103
|
build/src/flags: | build/src
|
86
104
|
@echo 'old_CC := $(CC)' > $@
|
data/gumbo-parser/src/error.c
CHANGED
@@ -357,7 +357,7 @@ static void handle_parser_error (
|
|
357
357
|
print_tag_stack(error, output);
|
358
358
|
return;
|
359
359
|
case GUMBO_TOKEN_END_TAG:
|
360
|
-
print_message(output, "
|
360
|
+
print_message(output, "End tag '%s' isn't allowed here.",
|
361
361
|
gumbo_normalized_tagname(error->input_tag));
|
362
362
|
print_tag_stack(error, output);
|
363
363
|
return;
|
data/gumbo-parser/src/parser.c
CHANGED
@@ -4826,14 +4826,17 @@ GumboOutput* gumbo_parse_with_options (
|
|
4826
4826
|
// to a token.
|
4827
4827
|
if (token.type == GUMBO_TOKEN_END_TAG &&
|
4828
4828
|
token.v.end_tag.tag == GUMBO_TAG_UNKNOWN)
|
4829
|
+
{
|
4829
4830
|
gumbo_free(token.v.end_tag.name);
|
4831
|
+
token.v.end_tag.name = NULL;
|
4832
|
+
}
|
4833
|
+
if (unlikely(state->_open_elements.length > max_tree_depth)) {
|
4834
|
+
parser._output->status = GUMBO_STATUS_TREE_TOO_DEEP;
|
4835
|
+
gumbo_debug("Tree depth limit exceeded.\n");
|
4836
|
+
break;
|
4837
|
+
}
|
4830
4838
|
}
|
4831
4839
|
|
4832
|
-
if (unlikely(state->_open_elements.length > max_tree_depth)) {
|
4833
|
-
parser._output->status = GUMBO_STATUS_TREE_TOO_DEEP;
|
4834
|
-
gumbo_debug("Tree depth limit exceeded.\n");
|
4835
|
-
break;
|
4836
|
-
}
|
4837
4840
|
|
4838
4841
|
++loop_count;
|
4839
4842
|
assert(loop_count < 1000000000UL);
|
@@ -506,6 +506,7 @@ static void abandon_current_tag(GumboParser* parser) {
|
|
506
506
|
for (unsigned int i = 0; i < tag_state->_attributes.length; ++i) {
|
507
507
|
gumbo_destroy_attribute(tag_state->_attributes.data[i]);
|
508
508
|
}
|
509
|
+
gumbo_free(tag_state->_name);
|
509
510
|
gumbo_free(tag_state->_attributes.data);
|
510
511
|
mark_tag_state_as_empty(tag_state);
|
511
512
|
gumbo_string_buffer_destroy(&tag_state->_buffer);
|
@@ -302,7 +302,7 @@ module Nokogiri
|
|
302
302
|
end
|
303
303
|
|
304
304
|
def read_a_and_positive_b(values)
|
305
|
-
op = values[2]
|
305
|
+
op = values[2].strip
|
306
306
|
if op == "+"
|
307
307
|
a = values[0].to_i
|
308
308
|
b = values[3].to_i
|
@@ -335,25 +335,5 @@ module Nokogiri
|
|
335
335
|
end
|
336
336
|
end
|
337
337
|
end
|
338
|
-
|
339
|
-
module XPathVisitorAlwaysUseBuiltins # :nodoc:
|
340
|
-
def self.new
|
341
|
-
warn(
|
342
|
-
"Nokogiri::CSS::XPathVisitorAlwaysUseBuiltins is deprecated and will be removed in a future version of Nokogiri",
|
343
|
-
{ uplevel: 1 },
|
344
|
-
)
|
345
|
-
XPathVisitor.new(builtins: :always)
|
346
|
-
end
|
347
|
-
end
|
348
|
-
|
349
|
-
module XPathVisitorOptimallyUseBuiltins # :nodoc:
|
350
|
-
def self.new
|
351
|
-
warn(
|
352
|
-
"Nokogiri::CSS::XPathVisitorOptimallyUseBuiltins is deprecated and will be removed in a future version of Nokogiri",
|
353
|
-
{ uplevel: 1 },
|
354
|
-
)
|
355
|
-
XPathVisitor.new(builtins: :optimal)
|
356
|
-
end
|
357
|
-
end
|
358
338
|
end
|
359
339
|
end
|
@@ -92,7 +92,7 @@ module Nokogiri
|
|
92
92
|
title = XML::Node.new("title", self) << tnode
|
93
93
|
if (head = at_xpath("//head"))
|
94
94
|
head << title
|
95
|
-
elsif (meta =
|
95
|
+
elsif (meta = at_xpath("//meta[@charset]") || meta_content_type)
|
96
96
|
# better put after charset declaration
|
97
97
|
meta.add_next_sibling(title)
|
98
98
|
else
|
@@ -94,7 +94,7 @@ module Nokogiri
|
|
94
94
|
# no support for a call without len
|
95
95
|
|
96
96
|
unless @firstchunk
|
97
|
-
(@firstchunk = @io.read(len)) ||
|
97
|
+
(@firstchunk = @io.read(len)) || return
|
98
98
|
|
99
99
|
# This implementation expects that the first call from
|
100
100
|
# htmlReadIO() is made with a length long enough (~1KB) to
|
data/lib/nokogiri/html5.rb
CHANGED
@@ -239,23 +239,6 @@ module Nokogiri
|
|
239
239
|
DocumentFragment.parse(string, encoding, options)
|
240
240
|
end
|
241
241
|
|
242
|
-
# Fetch and parse a HTML document from the web, following redirects,
|
243
|
-
# handling https, and determining the character encoding using HTML5
|
244
|
-
# rules. +uri+ may be a +String+ or a +URI+. +options+ contains
|
245
|
-
# http headers and special options. Everything which is not a
|
246
|
-
# special option is considered a header. Special options include:
|
247
|
-
# * :follow_limit => number of redirects which are followed
|
248
|
-
# * :basic_auth => [username, password]
|
249
|
-
def get(uri, options = {})
|
250
|
-
# TODO: deprecate
|
251
|
-
warn(
|
252
|
-
"Nokogiri::HTML5.get is deprecated and will be removed in a future version of Nokogiri.",
|
253
|
-
uplevel: 1,
|
254
|
-
category: :deprecated,
|
255
|
-
)
|
256
|
-
get_impl(uri, options)
|
257
|
-
end
|
258
|
-
|
259
242
|
# :nodoc:
|
260
243
|
def read_and_encode(string, encoding)
|
261
244
|
# Read the string with the given encoding.
|
@@ -283,55 +266,6 @@ module Nokogiri
|
|
283
266
|
|
284
267
|
private
|
285
268
|
|
286
|
-
def get_impl(uri, options = {})
|
287
|
-
headers = options.clone
|
288
|
-
headers = { follow_limit: headers } if Numeric === headers # deprecated
|
289
|
-
limit = headers[:follow_limit] ? headers.delete(:follow_limit).to_i : 10
|
290
|
-
|
291
|
-
require "net/http"
|
292
|
-
uri = URI(uri) unless URI === uri
|
293
|
-
|
294
|
-
http = Net::HTTP.new(uri.host, uri.port)
|
295
|
-
|
296
|
-
# TLS / SSL support
|
297
|
-
http.use_ssl = true if uri.scheme == "https"
|
298
|
-
|
299
|
-
# Pass through Net::HTTP override values, which currently include:
|
300
|
-
# :ca_file, :ca_path, :cert, :cert_store, :ciphers,
|
301
|
-
# :close_on_empty_response, :continue_timeout, :key, :open_timeout,
|
302
|
-
# :read_timeout, :ssl_timeout, :ssl_version, :use_ssl,
|
303
|
-
# :verify_callback, :verify_depth, :verify_mode
|
304
|
-
options.each do |key, _value|
|
305
|
-
http.send("#{key}=", headers.delete(key)) if http.respond_to?("#{key}=")
|
306
|
-
end
|
307
|
-
|
308
|
-
request = Net::HTTP::Get.new(uri.request_uri)
|
309
|
-
|
310
|
-
# basic authentication
|
311
|
-
auth = headers.delete(:basic_auth)
|
312
|
-
auth ||= [uri.user, uri.password] if uri.user && uri.password
|
313
|
-
request.basic_auth(auth.first, auth.last) if auth
|
314
|
-
|
315
|
-
# remaining options are treated as headers
|
316
|
-
headers.each { |key, value| request[key.to_s] = value.to_s }
|
317
|
-
|
318
|
-
response = http.request(request)
|
319
|
-
|
320
|
-
case response
|
321
|
-
when Net::HTTPSuccess
|
322
|
-
doc = parse(reencode(response.body, response["content-type"]), options)
|
323
|
-
doc.instance_variable_set(:@response, response)
|
324
|
-
doc.class.send(:attr_reader, :response)
|
325
|
-
doc
|
326
|
-
when Net::HTTPRedirection
|
327
|
-
response.value if limit <= 1
|
328
|
-
location = URI.join(uri, response["location"])
|
329
|
-
get_impl(location, options.merge(follow_limit: limit - 1))
|
330
|
-
else
|
331
|
-
response.value
|
332
|
-
end
|
333
|
-
end
|
334
|
-
|
335
269
|
# Charset sniffing is a complex and controversial topic that understandably isn't done _by
|
336
270
|
# default_ by the Ruby Net::HTTP library. This being said, it is a very real problem for
|
337
271
|
# consumers of HTML as the default for HTML is iso-8859-1, most "good" producers use utf-8, and
|
@@ -94,11 +94,14 @@ module Nokogiri
|
|
94
94
|
nokogiri["version"] = Nokogiri::VERSION
|
95
95
|
|
96
96
|
unless jruby?
|
97
|
-
# enable gems
|
97
|
+
# enable gems to build against Nokogiri with the following in their extconf.rb:
|
98
98
|
#
|
99
99
|
# append_cflags(Nokogiri::VERSION_INFO["nokogiri"]["cppflags"])
|
100
100
|
# append_ldflags(Nokogiri::VERSION_INFO["nokogiri"]["ldflags"])
|
101
101
|
#
|
102
|
+
# though, this won't work on all platform and versions of Ruby, and won't be supported
|
103
|
+
# forever, see https://github.com/sparklemotion/nokogiri/discussions/2746 for context.
|
104
|
+
#
|
102
105
|
cppflags = ["-I#{header_directory.shellescape}"]
|
103
106
|
ldflags = []
|
104
107
|
|
@@ -108,7 +111,8 @@ module Nokogiri
|
|
108
111
|
end
|
109
112
|
|
110
113
|
if windows?
|
111
|
-
# on windows,
|
114
|
+
# on windows, third party libraries that wish to link against nokogiri
|
115
|
+
# should link against nokogiri.so to resolve symbols. see #2167
|
112
116
|
lib_directory = File.expand_path(File.join(File.dirname(__FILE__), "../#{ruby_minor}"))
|
113
117
|
unless File.exist?(lib_directory)
|
114
118
|
lib_directory = File.expand_path(File.join(File.dirname(__FILE__), ".."))
|
@@ -136,9 +140,6 @@ module Nokogiri
|
|
136
140
|
libxml["source"] = "packaged"
|
137
141
|
libxml["precompiled"] = libxml2_precompiled?
|
138
142
|
libxml["patches"] = Nokogiri::LIBXML2_PATCHES
|
139
|
-
|
140
|
-
# this is for nokogumbo and shouldn't be forever
|
141
|
-
libxml["libxml2_path"] = header_directory
|
142
143
|
else
|
143
144
|
libxml["source"] = "system"
|
144
145
|
end
|
data/lib/nokogiri/xml/attr.rb
CHANGED
@@ -18,8 +18,6 @@ module Nokogiri
|
|
18
18
|
# - +value+ → (String) The value of the attribute.
|
19
19
|
# - +namespace+ → (Namespace, nil) The Namespace of the attribute, or +nil+ if there is no namespace.
|
20
20
|
#
|
21
|
-
# ⚡ This is an experimental feature, available since v1.14.0
|
22
|
-
#
|
23
21
|
# *Example*
|
24
22
|
#
|
25
23
|
# doc = Nokogiri::XML.parse(<<~XML)
|
@@ -52,6 +50,8 @@ module Nokogiri
|
|
52
50
|
# # href = "http://nokogiri.org/ns/noko"
|
53
51
|
# # })}
|
54
52
|
#
|
53
|
+
# Since v1.14.0
|
54
|
+
#
|
55
55
|
def deconstruct_keys(keys)
|
56
56
|
{ name: name, value: value, namespace: namespace }
|
57
57
|
end
|
@@ -174,8 +174,7 @@ module Nokogiri
|
|
174
174
|
# Since v1.12.4
|
175
175
|
attr_accessor :namespace_inheritance
|
176
176
|
|
177
|
-
# :nodoc:
|
178
|
-
def initialize(*args) # rubocop:disable Lint/MissingSuper
|
177
|
+
def initialize(*args) # :nodoc: # rubocop:disable Lint/MissingSuper
|
179
178
|
@errors = []
|
180
179
|
@decorators = nil
|
181
180
|
@namespace_inheritance = false
|
@@ -330,7 +329,7 @@ module Nokogiri
|
|
330
329
|
# Validate this Document against it's DTD. Returns a list of errors on
|
331
330
|
# the document or +nil+ when there is no DTD.
|
332
331
|
def validate
|
333
|
-
return
|
332
|
+
return unless internal_subset
|
334
333
|
|
335
334
|
internal_subset.validate(self)
|
336
335
|
end
|
@@ -427,8 +426,6 @@ module Nokogiri
|
|
427
426
|
# instructions. If you have a use case and would like this functionality, please let us know
|
428
427
|
# by opening an issue or a discussion on the github project.
|
429
428
|
#
|
430
|
-
# ⚡ This is an experimental feature, available since v1.14.0
|
431
|
-
#
|
432
429
|
# *Example*
|
433
430
|
#
|
434
431
|
# doc = Nokogiri::XML.parse(<<~XML)
|
@@ -455,6 +452,8 @@ module Nokogiri
|
|
455
452
|
# doc.deconstruct_keys([:root])
|
456
453
|
# # => {:root=>nil}
|
457
454
|
#
|
455
|
+
# Since v1.14.0
|
456
|
+
#
|
458
457
|
def deconstruct_keys(keys)
|
459
458
|
{ root: root }
|
460
459
|
end
|
@@ -154,8 +154,6 @@ module Nokogiri
|
|
154
154
|
# root elements, you should deconstruct the array returned by
|
155
155
|
# <tt>DocumentFragment#elements</tt>.
|
156
156
|
#
|
157
|
-
# ⚡ This is an experimental feature, available since v1.14.0
|
158
|
-
#
|
159
157
|
# *Example*
|
160
158
|
#
|
161
159
|
# frag = Nokogiri::HTML5.fragment(<<~HTML)
|
@@ -187,6 +185,8 @@ module Nokogiri
|
|
187
185
|
# # }),
|
188
186
|
# # #(Element:0x398 { name = "div", children = [ #(Text "End")] })]
|
189
187
|
#
|
188
|
+
# Since v1.14.0
|
189
|
+
#
|
190
190
|
def deconstruct
|
191
191
|
children.to_a
|
192
192
|
end
|
@@ -16,8 +16,6 @@ module Nokogiri
|
|
16
16
|
# - +prefix+ → (String, nil) The namespace's prefix, or +nil+ if there is no prefix (e.g., default namespace).
|
17
17
|
# - +href+ → (String) The namespace's URI
|
18
18
|
#
|
19
|
-
# ⚡ This is an experimental feature, available since v1.14.0
|
20
|
-
#
|
21
19
|
# *Example*
|
22
20
|
#
|
23
21
|
# doc = Nokogiri::XML.parse(<<~XML)
|
@@ -43,6 +41,7 @@ module Nokogiri
|
|
43
41
|
# doc.root.elements.last.namespace.deconstruct_keys([:prefix, :href])
|
44
42
|
# # => {:prefix=>"noko", :href=>"http://nokogiri.org/ns/noko"}
|
45
43
|
#
|
44
|
+
# Since v1.14.0
|
46
45
|
#
|
47
46
|
def deconstruct_keys(keys)
|
48
47
|
{ prefix: prefix, href: href }
|
data/lib/nokogiri/xml/node.rb
CHANGED
@@ -1049,29 +1049,35 @@ module Nokogiri
|
|
1049
1049
|
|
1050
1050
|
return Nokogiri::XML::NodeSet.new(document) if contents.empty?
|
1051
1051
|
|
1052
|
-
# libxml2 does not obey the +recover+ option after encountering errors during +in_context+
|
1053
|
-
# parsing, and so this horrible hack is here to try to emulate recovery behavior.
|
1054
|
-
#
|
1055
|
-
# Unfortunately, this means we're no longer parsing "in context" and so namespaces that
|
1056
|
-
# would have been inherited from the context node won't be handled correctly. This hack was
|
1057
|
-
# written in 2010, and I regret it, because it's silently degrading functionality in a way
|
1058
|
-
# that's not easily prevented (or even detected).
|
1059
|
-
#
|
1060
|
-
# I think preferable behavior would be to either:
|
1061
|
-
#
|
1062
|
-
# a. add an error noting that we "fell back" and pointing the user to turning off the +recover+ option
|
1063
|
-
# b. don't recover, but raise a sensible exception
|
1064
|
-
#
|
1065
|
-
# For context and background: https://github.com/sparklemotion/nokogiri/issues/313
|
1066
|
-
# FIXME bug report: https://github.com/sparklemotion/nokogiri/issues/2092
|
1067
1052
|
error_count = document.errors.length
|
1068
1053
|
node_set = in_context(contents, options.to_i)
|
1069
|
-
if
|
1070
|
-
|
1054
|
+
if document.errors.length > error_count
|
1055
|
+
raise document.errors[error_count] unless options.recover?
|
1056
|
+
|
1057
|
+
if node_set.empty?
|
1058
|
+
# libxml2 < 2.13 does not obey the +recover+ option after encountering errors during
|
1059
|
+
# +in_context+ parsing, and so this horrible hack is here to try to emulate recovery
|
1060
|
+
# behavior.
|
1061
|
+
#
|
1062
|
+
# (Note that HTML4 fragment parsing seems to have been fixed in abd74186, and XML
|
1063
|
+
# fragment parsing is fixed in 1c106edf. Both are in 2.13.)
|
1064
|
+
#
|
1065
|
+
# Unfortunately, this means we're no longer parsing "in context" and so namespaces that
|
1066
|
+
# would have been inherited from the context node won't be handled correctly. This hack
|
1067
|
+
# was written in 2010, and I regret it, because it's silently degrading functionality in
|
1068
|
+
# a way that's not easily prevented (or even detected).
|
1069
|
+
#
|
1070
|
+
# I think preferable behavior would be to either:
|
1071
|
+
#
|
1072
|
+
# a. add an error noting that we "fell back" and pointing the user to turning off the
|
1073
|
+
# +recover+ option
|
1074
|
+
# b. don't recover, but raise a sensible exception
|
1075
|
+
#
|
1076
|
+
# For context and background:
|
1077
|
+
# - https://github.com/sparklemotion/nokogiri/issues/313
|
1078
|
+
# - https://github.com/sparklemotion/nokogiri/issues/2092
|
1071
1079
|
fragment = document.related_class("DocumentFragment").parse(contents)
|
1072
1080
|
node_set = fragment.children
|
1073
|
-
else
|
1074
|
-
raise document.errors[error_count]
|
1075
1081
|
end
|
1076
1082
|
end
|
1077
1083
|
node_set
|
@@ -1165,7 +1171,7 @@ module Nokogiri
|
|
1165
1171
|
# Fetch the Nokogiri::HTML4::ElementDescription for this node. Returns
|
1166
1172
|
# nil on XML documents and on unknown tags.
|
1167
1173
|
def description
|
1168
|
-
return
|
1174
|
+
return if document.xml?
|
1169
1175
|
|
1170
1176
|
Nokogiri::HTML4::ElementDescription[name]
|
1171
1177
|
end
|
@@ -1254,8 +1260,8 @@ module Nokogiri
|
|
1254
1260
|
# Compare two Node objects with respect to their Document. Nodes from
|
1255
1261
|
# different documents cannot be compared.
|
1256
1262
|
def <=>(other)
|
1257
|
-
return
|
1258
|
-
return
|
1263
|
+
return unless other.is_a?(Nokogiri::XML::Node)
|
1264
|
+
return unless document == other.document
|
1259
1265
|
|
1260
1266
|
compare(other)
|
1261
1267
|
end
|
@@ -1278,6 +1284,7 @@ module Nokogiri
|
|
1278
1284
|
# end
|
1279
1285
|
#
|
1280
1286
|
def serialize(*args, &block)
|
1287
|
+
# TODO: deprecate non-hash options, see 46c68ed 2009-06-20 for context
|
1281
1288
|
options = if args.first.is_a?(Hash)
|
1282
1289
|
args.shift
|
1283
1290
|
else
|
@@ -1429,8 +1436,6 @@ module Nokogiri
|
|
1429
1436
|
# - +content+ → (String) The contents of all the text nodes in this node's subtree. See #content.
|
1430
1437
|
# - +inner_html+ → (String) The inner markup for the children of this node. See #inner_html.
|
1431
1438
|
#
|
1432
|
-
# ⚡ This is an experimental feature, available since v1.14.0
|
1433
|
-
#
|
1434
1439
|
# *Example*
|
1435
1440
|
#
|
1436
1441
|
# doc = Nokogiri::XML.parse(<<~XML)
|
@@ -1465,6 +1470,8 @@ module Nokogiri
|
|
1465
1470
|
# # value = "def"
|
1466
1471
|
# # })]}
|
1467
1472
|
#
|
1473
|
+
# Since v1.14.0
|
1474
|
+
#
|
1468
1475
|
def deconstruct_keys(keys)
|
1469
1476
|
requested_keys = DECONSTRUCT_KEYS & keys
|
1470
1477
|
{}.tap do |values|
|
@@ -372,7 +372,7 @@ module Nokogiri
|
|
372
372
|
# Removes the last element from set and returns it, or +nil+ if
|
373
373
|
# the set is empty
|
374
374
|
def pop
|
375
|
-
return
|
375
|
+
return if length == 0
|
376
376
|
|
377
377
|
delete(last)
|
378
378
|
end
|
@@ -381,7 +381,7 @@ module Nokogiri
|
|
381
381
|
# Returns the first element of the NodeSet and removes it. Returns
|
382
382
|
# +nil+ if the set is empty.
|
383
383
|
def shift
|
384
|
-
return
|
384
|
+
return if length == 0
|
385
385
|
|
386
386
|
delete(first)
|
387
387
|
end
|
@@ -435,7 +435,7 @@ module Nokogiri
|
|
435
435
|
#
|
436
436
|
# Returns the members of this NodeSet as an array, to use in pattern matching.
|
437
437
|
#
|
438
|
-
#
|
438
|
+
# Since v1.14.0
|
439
439
|
#
|
440
440
|
def deconstruct
|
441
441
|
to_a
|
data/lib/nokogiri/xml/reader.rb
CHANGED
@@ -3,9 +3,11 @@
|
|
3
3
|
module Nokogiri
|
4
4
|
module XML
|
5
5
|
###
|
6
|
-
# Nokogiri::XML::Reader parses an XML document similar to the way a cursor
|
7
|
-
#
|
8
|
-
#
|
6
|
+
# Nokogiri::XML::Reader parses an XML document similar to the way a cursor would move. The
|
7
|
+
# Reader is given an XML document, and yields nodes to an each block.
|
8
|
+
#
|
9
|
+
# The Reader parser might be good for when you need the speed and low memory usage of the SAX
|
10
|
+
# parser, but do not want to write a Document handler.
|
9
11
|
#
|
10
12
|
# Here is an example of usage:
|
11
13
|
#
|
@@ -22,13 +24,12 @@ module Nokogiri
|
|
22
24
|
#
|
23
25
|
# end
|
24
26
|
#
|
25
|
-
#
|
26
|
-
#
|
27
|
-
#
|
28
|
-
# need during the first iteration.
|
27
|
+
# ⚠ Nokogiri::XML::Reader#each can only be called once! Once the cursor moves through the entire
|
28
|
+
# document, you must parse the document again. It may be better to capture all information you
|
29
|
+
# need during a single iteration.
|
29
30
|
#
|
30
|
-
#
|
31
|
-
#
|
31
|
+
# ⚠ libxml2 does not support error recovery in the Reader parser. The `RECOVER` ParseOption is
|
32
|
+
# ignored. If a syntax error is encountered during parsing, an exception will be raised.
|
32
33
|
class Reader
|
33
34
|
include Enumerable
|
34
35
|
|
@@ -199,7 +199,7 @@ module Nokogiri
|
|
199
199
|
#
|
200
200
|
# Search this node's immediate children using CSS selector +selector+
|
201
201
|
def >(selector) # rubocop:disable Naming/BinaryOperatorParameterName
|
202
|
-
ns =
|
202
|
+
ns = document.root&.namespaces || {}
|
203
203
|
xpath(CSS.xpath_for(selector, prefix: "./", ns: ns).first)
|
204
204
|
end
|
205
205
|
|
@@ -229,7 +229,7 @@ module Nokogiri
|
|
229
229
|
def xpath_impl(node, path, handler, ns, binds)
|
230
230
|
ctx = XPathContext.new(node)
|
231
231
|
ctx.register_namespaces(ns)
|
232
|
-
path = path.gsub(
|
232
|
+
path = path.gsub("xmlns:", " :") unless Nokogiri.uses_libxml?
|
233
233
|
|
234
234
|
binds&.each do |key, value|
|
235
235
|
ctx.register_variable(key.to_s, value)
|
@@ -269,7 +269,7 @@ module Nokogiri
|
|
269
269
|
end
|
270
270
|
ns, binds = hashes.reverse
|
271
271
|
|
272
|
-
ns ||=
|
272
|
+
ns ||= document.root&.namespaces || {}
|
273
273
|
|
274
274
|
[params, handler, ns, binds]
|
275
275
|
end
|