nokogiri 1.15.1 → 1.16.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +17 -14
- data/README.md +4 -1
- data/dependencies.yml +9 -8
- data/ext/nokogiri/extconf.rb +3 -4
- data/ext/nokogiri/html4_sax_push_parser.c +1 -1
- data/ext/nokogiri/nokogiri.h +10 -3
- data/ext/nokogiri/test_global_handlers.c +1 -1
- data/ext/nokogiri/xml_cdata.c +28 -23
- data/ext/nokogiri/xml_document.c +13 -5
- data/ext/nokogiri/xml_namespace.c +0 -4
- data/ext/nokogiri/xml_node.c +6 -9
- data/ext/nokogiri/xml_reader.c +26 -48
- data/ext/nokogiri/xml_relax_ng.c +1 -1
- data/ext/nokogiri/xml_sax_parser_context.c +4 -0
- data/ext/nokogiri/xml_sax_push_parser.c +1 -1
- data/ext/nokogiri/xml_schema.c +13 -8
- data/ext/nokogiri/xml_syntax_error.c +3 -3
- data/ext/nokogiri/xml_text.c +24 -19
- data/ext/nokogiri/xml_xpath_context.c +2 -5
- data/ext/nokogiri/xslt_stylesheet.c +8 -3
- data/gumbo-parser/Makefile +18 -0
- data/gumbo-parser/src/error.c +1 -1
- data/gumbo-parser/src/parser.c +8 -5
- data/gumbo-parser/src/tokenizer.c +1 -0
- data/lib/nokogiri/css/parser_extras.rb +1 -1
- data/lib/nokogiri/css/xpath_visitor.rb +1 -21
- data/lib/nokogiri/html4/document.rb +1 -1
- data/lib/nokogiri/html4/encoding_reader.rb +1 -1
- data/lib/nokogiri/html5.rb +0 -66
- data/lib/nokogiri/jruby/nokogiri_jars.rb +3 -3
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +6 -5
- data/lib/nokogiri/xml/attr.rb +2 -2
- data/lib/nokogiri/xml/document.rb +4 -5
- data/lib/nokogiri/xml/document_fragment.rb +2 -2
- data/lib/nokogiri/xml/namespace.rb +1 -2
- data/lib/nokogiri/xml/node.rb +31 -24
- data/lib/nokogiri/xml/node_set.rb +3 -3
- data/lib/nokogiri/xml/reader.rb +10 -9
- data/lib/nokogiri/xml/searchable.rb +3 -3
- data/lib/nokogiri/xml/syntax_error.rb +1 -1
- data/lib/nokogiri/xml.rb +1 -1
- data/lib/nokogiri/xslt/stylesheet.rb +29 -7
- data/lib/nokogiri/xslt.rb +1 -1
- data/lib/nokogiri.rb +1 -1
- data/ports/archives/libxml2-2.12.7.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
- metadata +9 -9
- data/ports/archives/libxml2-2.11.4.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.38.tar.xz +0 -0
data/ext/nokogiri/xml_text.c
CHANGED
@@ -9,33 +9,38 @@ VALUE cNokogiriXmlText ;
|
|
9
9
|
* Create a new Text element on the +document+ with +content+
|
10
10
|
*/
|
11
11
|
static VALUE
|
12
|
-
|
12
|
+
rb_xml_text_s_new(int argc, VALUE *argv, VALUE klass)
|
13
13
|
{
|
14
|
-
xmlDocPtr
|
15
|
-
xmlNodePtr
|
16
|
-
VALUE
|
17
|
-
VALUE
|
18
|
-
VALUE
|
14
|
+
xmlDocPtr c_document;
|
15
|
+
xmlNodePtr c_node;
|
16
|
+
VALUE rb_string;
|
17
|
+
VALUE rb_document;
|
18
|
+
VALUE rb_rest;
|
19
19
|
VALUE rb_node;
|
20
20
|
|
21
|
-
rb_scan_args(argc, argv, "2*", &
|
21
|
+
rb_scan_args(argc, argv, "2*", &rb_string, &rb_document, &rb_rest);
|
22
22
|
|
23
|
-
if (rb_obj_is_kind_of(
|
24
|
-
|
25
|
-
|
23
|
+
if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlNode)) {
|
24
|
+
rb_raise(rb_eTypeError,
|
25
|
+
"expected second parameter to be a Nokogiri::XML::Document, received %"PRIsVALUE,
|
26
|
+
rb_obj_class(rb_document));
|
27
|
+
}
|
28
|
+
|
29
|
+
if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlDocument)) {
|
26
30
|
xmlNodePtr deprecated_node_type_arg;
|
27
|
-
// TODO:
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
+
NOKO_WARN_DEPRECATION("Passing a Node as the second parameter to Text.new is deprecated. Please pass a Document instead. This will become an error in Nokogiri v1.17.0."); // TODO: deprecated in v1.15.3, remove in v1.17.0
|
32
|
+
Noko_Node_Get_Struct(rb_document, xmlNode, deprecated_node_type_arg);
|
33
|
+
c_document = deprecated_node_type_arg->doc;
|
34
|
+
} else {
|
35
|
+
c_document = noko_xml_document_unwrap(rb_document);
|
31
36
|
}
|
32
37
|
|
33
|
-
|
34
|
-
|
38
|
+
c_node = xmlNewText((xmlChar *)StringValueCStr(rb_string));
|
39
|
+
c_node->doc = c_document;
|
35
40
|
|
36
|
-
noko_xml_document_pin_node(
|
41
|
+
noko_xml_document_pin_node(c_node);
|
37
42
|
|
38
|
-
rb_node = noko_xml_node_wrap(klass,
|
43
|
+
rb_node = noko_xml_node_wrap(klass, c_node) ;
|
39
44
|
rb_obj_call_init(rb_node, argc, argv);
|
40
45
|
|
41
46
|
if (rb_block_given_p()) { rb_yield(rb_node); }
|
@@ -52,5 +57,5 @@ noko_init_xml_text(void)
|
|
52
57
|
*/
|
53
58
|
cNokogiriXmlText = rb_define_class_under(mNokogiriXml, "Text", cNokogiriXmlCharacterData);
|
54
59
|
|
55
|
-
rb_define_singleton_method(cNokogiriXmlText, "new",
|
60
|
+
rb_define_singleton_method(cNokogiriXmlText, "new", rb_xml_text_s_new, -1);
|
56
61
|
}
|
@@ -321,11 +321,8 @@ handler_lookup(void *data, const xmlChar *c_name, const xmlChar *c_ns_uri)
|
|
321
321
|
VALUE rb_handler = (VALUE)data;
|
322
322
|
if (rb_respond_to(rb_handler, rb_intern((const char *)c_name))) {
|
323
323
|
if (c_ns_uri == NULL) {
|
324
|
-
NOKO_WARN_DEPRECATION(
|
325
|
-
|
326
|
-
" Please update your query to reference this function as 'nokogiri:%s'."
|
327
|
-
" Invoking custom handler functions without a namespace is deprecated and support will be removed in a future release of Nokogiri.",
|
328
|
-
c_name, c_name);
|
324
|
+
NOKO_WARN_DEPRECATION("A custom XPath or CSS handler function named '%s' is being invoked without a namespace. Please update your query to reference this function as 'nokogiri:%s'. Invoking custom handler functions without a namespace is deprecated and will become an error in Nokogiri v1.17.0.",
|
325
|
+
c_name, c_name); // deprecated in v1.15.0, remove in v1.17.0
|
329
326
|
}
|
330
327
|
return method_caller;
|
331
328
|
}
|
@@ -71,7 +71,12 @@ Nokogiri_wrap_xslt_stylesheet(xsltStylesheetPtr ss)
|
|
71
71
|
* call-seq:
|
72
72
|
* parse_stylesheet_doc(document)
|
73
73
|
*
|
74
|
-
* Parse
|
74
|
+
* Parse an XSLT::Stylesheet from +document+.
|
75
|
+
*
|
76
|
+
* [Parameters]
|
77
|
+
* - +document+ (Nokogiri::XML::Document) the document to be parsed.
|
78
|
+
*
|
79
|
+
* [Returns] Nokogiri::XSLT::Stylesheet
|
75
80
|
*/
|
76
81
|
static VALUE
|
77
82
|
parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
|
@@ -104,7 +109,7 @@ parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
|
|
104
109
|
* call-seq:
|
105
110
|
* serialize(document)
|
106
111
|
*
|
107
|
-
* Serialize +document+ to an xml string.
|
112
|
+
* Serialize +document+ to an xml string, as specified by the +method+ parameter in the Stylesheet.
|
108
113
|
*/
|
109
114
|
static VALUE
|
110
115
|
rb_xslt_stylesheet_serialize(VALUE self, VALUE xmlobj)
|
@@ -133,7 +138,7 @@ rb_xslt_stylesheet_serialize(VALUE self, VALUE xmlobj)
|
|
133
138
|
* transform(document)
|
134
139
|
* transform(document, params = {})
|
135
140
|
*
|
136
|
-
*
|
141
|
+
* Transform an XML::Document as defined by an XSLT::Stylesheet.
|
137
142
|
*
|
138
143
|
* [Parameters]
|
139
144
|
* - +document+ (Nokogiri::XML::Document) the document to be transformed.
|
data/gumbo-parser/Makefile
CHANGED
@@ -13,6 +13,23 @@ LDFLAGS := -pthread
|
|
13
13
|
|
14
14
|
all: check
|
15
15
|
|
16
|
+
oss-fuzz:
|
17
|
+
./fuzzer/build-ossfuzz.sh
|
18
|
+
|
19
|
+
fuzzers: fuzzer-normal fuzzer-asan fuzzer-ubsan fuzzer-msan
|
20
|
+
|
21
|
+
fuzzer-normal:
|
22
|
+
./fuzzer/build.sh
|
23
|
+
|
24
|
+
fuzzer-asan:
|
25
|
+
SANITIZER=asan ./fuzzer/build.sh
|
26
|
+
|
27
|
+
fuzzer-ubsan:
|
28
|
+
SANITIZER=ubsan ./fuzzer/build.sh
|
29
|
+
|
30
|
+
fuzzer-msan:
|
31
|
+
SANITIZER=msan ./fuzzer/build.sh
|
32
|
+
|
16
33
|
# don't try to regenerate ragel or gperf files in CI, that should be a development-only action and
|
17
34
|
# the generated files should be committed to SCM
|
18
35
|
ifneq ($(CI),true)
|
@@ -81,6 +98,7 @@ coverage:
|
|
81
98
|
|
82
99
|
clean:
|
83
100
|
$(RM) -r build
|
101
|
+
$(RM) -r fuzzer/build fuzzer/src-* fuzzer/gumbo_corpus
|
84
102
|
|
85
103
|
build/src/flags: | build/src
|
86
104
|
@echo 'old_CC := $(CC)' > $@
|
data/gumbo-parser/src/error.c
CHANGED
@@ -357,7 +357,7 @@ static void handle_parser_error (
|
|
357
357
|
print_tag_stack(error, output);
|
358
358
|
return;
|
359
359
|
case GUMBO_TOKEN_END_TAG:
|
360
|
-
print_message(output, "
|
360
|
+
print_message(output, "End tag '%s' isn't allowed here.",
|
361
361
|
gumbo_normalized_tagname(error->input_tag));
|
362
362
|
print_tag_stack(error, output);
|
363
363
|
return;
|
data/gumbo-parser/src/parser.c
CHANGED
@@ -4826,14 +4826,17 @@ GumboOutput* gumbo_parse_with_options (
|
|
4826
4826
|
// to a token.
|
4827
4827
|
if (token.type == GUMBO_TOKEN_END_TAG &&
|
4828
4828
|
token.v.end_tag.tag == GUMBO_TAG_UNKNOWN)
|
4829
|
+
{
|
4829
4830
|
gumbo_free(token.v.end_tag.name);
|
4831
|
+
token.v.end_tag.name = NULL;
|
4832
|
+
}
|
4833
|
+
if (unlikely(state->_open_elements.length > max_tree_depth)) {
|
4834
|
+
parser._output->status = GUMBO_STATUS_TREE_TOO_DEEP;
|
4835
|
+
gumbo_debug("Tree depth limit exceeded.\n");
|
4836
|
+
break;
|
4837
|
+
}
|
4830
4838
|
}
|
4831
4839
|
|
4832
|
-
if (unlikely(state->_open_elements.length > max_tree_depth)) {
|
4833
|
-
parser._output->status = GUMBO_STATUS_TREE_TOO_DEEP;
|
4834
|
-
gumbo_debug("Tree depth limit exceeded.\n");
|
4835
|
-
break;
|
4836
|
-
}
|
4837
4840
|
|
4838
4841
|
++loop_count;
|
4839
4842
|
assert(loop_count < 1000000000UL);
|
@@ -506,6 +506,7 @@ static void abandon_current_tag(GumboParser* parser) {
|
|
506
506
|
for (unsigned int i = 0; i < tag_state->_attributes.length; ++i) {
|
507
507
|
gumbo_destroy_attribute(tag_state->_attributes.data[i]);
|
508
508
|
}
|
509
|
+
gumbo_free(tag_state->_name);
|
509
510
|
gumbo_free(tag_state->_attributes.data);
|
510
511
|
mark_tag_state_as_empty(tag_state);
|
511
512
|
gumbo_string_buffer_destroy(&tag_state->_buffer);
|
@@ -302,7 +302,7 @@ module Nokogiri
|
|
302
302
|
end
|
303
303
|
|
304
304
|
def read_a_and_positive_b(values)
|
305
|
-
op = values[2]
|
305
|
+
op = values[2].strip
|
306
306
|
if op == "+"
|
307
307
|
a = values[0].to_i
|
308
308
|
b = values[3].to_i
|
@@ -335,25 +335,5 @@ module Nokogiri
|
|
335
335
|
end
|
336
336
|
end
|
337
337
|
end
|
338
|
-
|
339
|
-
module XPathVisitorAlwaysUseBuiltins # :nodoc:
|
340
|
-
def self.new
|
341
|
-
warn(
|
342
|
-
"Nokogiri::CSS::XPathVisitorAlwaysUseBuiltins is deprecated and will be removed in a future version of Nokogiri",
|
343
|
-
{ uplevel: 1 },
|
344
|
-
)
|
345
|
-
XPathVisitor.new(builtins: :always)
|
346
|
-
end
|
347
|
-
end
|
348
|
-
|
349
|
-
module XPathVisitorOptimallyUseBuiltins # :nodoc:
|
350
|
-
def self.new
|
351
|
-
warn(
|
352
|
-
"Nokogiri::CSS::XPathVisitorOptimallyUseBuiltins is deprecated and will be removed in a future version of Nokogiri",
|
353
|
-
{ uplevel: 1 },
|
354
|
-
)
|
355
|
-
XPathVisitor.new(builtins: :optimal)
|
356
|
-
end
|
357
|
-
end
|
358
338
|
end
|
359
339
|
end
|
@@ -92,7 +92,7 @@ module Nokogiri
|
|
92
92
|
title = XML::Node.new("title", self) << tnode
|
93
93
|
if (head = at_xpath("//head"))
|
94
94
|
head << title
|
95
|
-
elsif (meta =
|
95
|
+
elsif (meta = at_xpath("//meta[@charset]") || meta_content_type)
|
96
96
|
# better put after charset declaration
|
97
97
|
meta.add_next_sibling(title)
|
98
98
|
else
|
@@ -94,7 +94,7 @@ module Nokogiri
|
|
94
94
|
# no support for a call without len
|
95
95
|
|
96
96
|
unless @firstchunk
|
97
|
-
(@firstchunk = @io.read(len)) ||
|
97
|
+
(@firstchunk = @io.read(len)) || return
|
98
98
|
|
99
99
|
# This implementation expects that the first call from
|
100
100
|
# htmlReadIO() is made with a length long enough (~1KB) to
|
data/lib/nokogiri/html5.rb
CHANGED
@@ -239,23 +239,6 @@ module Nokogiri
|
|
239
239
|
DocumentFragment.parse(string, encoding, options)
|
240
240
|
end
|
241
241
|
|
242
|
-
# Fetch and parse a HTML document from the web, following redirects,
|
243
|
-
# handling https, and determining the character encoding using HTML5
|
244
|
-
# rules. +uri+ may be a +String+ or a +URI+. +options+ contains
|
245
|
-
# http headers and special options. Everything which is not a
|
246
|
-
# special option is considered a header. Special options include:
|
247
|
-
# * :follow_limit => number of redirects which are followed
|
248
|
-
# * :basic_auth => [username, password]
|
249
|
-
def get(uri, options = {})
|
250
|
-
# TODO: deprecate
|
251
|
-
warn(
|
252
|
-
"Nokogiri::HTML5.get is deprecated and will be removed in a future version of Nokogiri.",
|
253
|
-
uplevel: 1,
|
254
|
-
category: :deprecated,
|
255
|
-
)
|
256
|
-
get_impl(uri, options)
|
257
|
-
end
|
258
|
-
|
259
242
|
# :nodoc:
|
260
243
|
def read_and_encode(string, encoding)
|
261
244
|
# Read the string with the given encoding.
|
@@ -283,55 +266,6 @@ module Nokogiri
|
|
283
266
|
|
284
267
|
private
|
285
268
|
|
286
|
-
def get_impl(uri, options = {})
|
287
|
-
headers = options.clone
|
288
|
-
headers = { follow_limit: headers } if Numeric === headers # deprecated
|
289
|
-
limit = headers[:follow_limit] ? headers.delete(:follow_limit).to_i : 10
|
290
|
-
|
291
|
-
require "net/http"
|
292
|
-
uri = URI(uri) unless URI === uri
|
293
|
-
|
294
|
-
http = Net::HTTP.new(uri.host, uri.port)
|
295
|
-
|
296
|
-
# TLS / SSL support
|
297
|
-
http.use_ssl = true if uri.scheme == "https"
|
298
|
-
|
299
|
-
# Pass through Net::HTTP override values, which currently include:
|
300
|
-
# :ca_file, :ca_path, :cert, :cert_store, :ciphers,
|
301
|
-
# :close_on_empty_response, :continue_timeout, :key, :open_timeout,
|
302
|
-
# :read_timeout, :ssl_timeout, :ssl_version, :use_ssl,
|
303
|
-
# :verify_callback, :verify_depth, :verify_mode
|
304
|
-
options.each do |key, _value|
|
305
|
-
http.send("#{key}=", headers.delete(key)) if http.respond_to?("#{key}=")
|
306
|
-
end
|
307
|
-
|
308
|
-
request = Net::HTTP::Get.new(uri.request_uri)
|
309
|
-
|
310
|
-
# basic authentication
|
311
|
-
auth = headers.delete(:basic_auth)
|
312
|
-
auth ||= [uri.user, uri.password] if uri.user && uri.password
|
313
|
-
request.basic_auth(auth.first, auth.last) if auth
|
314
|
-
|
315
|
-
# remaining options are treated as headers
|
316
|
-
headers.each { |key, value| request[key.to_s] = value.to_s }
|
317
|
-
|
318
|
-
response = http.request(request)
|
319
|
-
|
320
|
-
case response
|
321
|
-
when Net::HTTPSuccess
|
322
|
-
doc = parse(reencode(response.body, response["content-type"]), options)
|
323
|
-
doc.instance_variable_set(:@response, response)
|
324
|
-
doc.class.send(:attr_reader, :response)
|
325
|
-
doc
|
326
|
-
when Net::HTTPRedirection
|
327
|
-
response.value if limit <= 1
|
328
|
-
location = URI.join(uri, response["location"])
|
329
|
-
get_impl(location, options.merge(follow_limit: limit - 1))
|
330
|
-
else
|
331
|
-
response.value
|
332
|
-
end
|
333
|
-
end
|
334
|
-
|
335
269
|
# Charset sniffing is a complex and controversial topic that understandably isn't done _by
|
336
270
|
# default_ by the Ruby Net::HTTP library. This being said, it is a very real problem for
|
337
271
|
# consumers of HTML as the default for HTML is iso-8859-1, most "good" producers use utf-8, and
|
@@ -6,10 +6,10 @@ rescue LoadError
|
|
6
6
|
require 'net/sourceforge/htmlunit/neko-htmlunit/2.63.0/neko-htmlunit-2.63.0.jar'
|
7
7
|
require 'nu/validator/jing/20200702VNU/jing-20200702VNU.jar'
|
8
8
|
require 'xerces/xercesImpl/2.12.2/xercesImpl-2.12.2.jar'
|
9
|
-
require 'org/nokogiri/nekodtd/0.1.11.noko1/nekodtd-0.1.11.noko1.jar'
|
10
9
|
require 'net/sf/saxon/Saxon-HE/9.6.0-4/Saxon-HE-9.6.0-4.jar'
|
11
10
|
require 'xalan/xalan/2.7.3/xalan-2.7.3.jar'
|
12
11
|
require 'xml-apis/xml-apis/1.4.01/xml-apis-1.4.01.jar'
|
12
|
+
require 'org/nokogiri/nekodtd/0.1.11.noko2/nekodtd-0.1.11.noko2.jar'
|
13
13
|
require 'isorelax/isorelax/20030108/isorelax-20030108.jar'
|
14
14
|
end
|
15
15
|
|
@@ -18,10 +18,10 @@ if defined? Jars
|
|
18
18
|
require_jar 'net.sourceforge.htmlunit', 'neko-htmlunit', '2.63.0'
|
19
19
|
require_jar 'nu.validator', 'jing', '20200702VNU'
|
20
20
|
require_jar 'xerces', 'xercesImpl', '2.12.2'
|
21
|
-
require_jar 'org.nokogiri', 'nekodtd', '0.1.11.noko1'
|
22
21
|
require_jar 'net.sf.saxon', 'Saxon-HE', '9.6.0-4'
|
23
22
|
require_jar 'xalan', 'xalan', '2.7.3'
|
24
23
|
require_jar 'xml-apis', 'xml-apis', '1.4.01'
|
24
|
+
require_jar 'org.nokogiri', 'nekodtd', '0.1.11.noko2'
|
25
25
|
require_jar 'isorelax', 'isorelax', '20030108'
|
26
26
|
end
|
27
27
|
|
@@ -32,7 +32,7 @@ module Nokogiri
|
|
32
32
|
"net.sf.saxon:Saxon-HE" => "9.6.0-4",
|
33
33
|
"net.sourceforge.htmlunit:neko-htmlunit" => "2.63.0",
|
34
34
|
"nu.validator:jing" => "20200702VNU",
|
35
|
-
"org.nokogiri:nekodtd" => "0.1.11.
|
35
|
+
"org.nokogiri:nekodtd" => "0.1.11.noko2",
|
36
36
|
"xalan:serializer" => "2.7.3",
|
37
37
|
"xalan:xalan" => "2.7.3",
|
38
38
|
"xerces:xercesImpl" => "2.12.2",
|
@@ -94,11 +94,14 @@ module Nokogiri
|
|
94
94
|
nokogiri["version"] = Nokogiri::VERSION
|
95
95
|
|
96
96
|
unless jruby?
|
97
|
-
# enable gems
|
97
|
+
# enable gems to build against Nokogiri with the following in their extconf.rb:
|
98
98
|
#
|
99
99
|
# append_cflags(Nokogiri::VERSION_INFO["nokogiri"]["cppflags"])
|
100
100
|
# append_ldflags(Nokogiri::VERSION_INFO["nokogiri"]["ldflags"])
|
101
101
|
#
|
102
|
+
# though, this won't work on all platform and versions of Ruby, and won't be supported
|
103
|
+
# forever, see https://github.com/sparklemotion/nokogiri/discussions/2746 for context.
|
104
|
+
#
|
102
105
|
cppflags = ["-I#{header_directory.shellescape}"]
|
103
106
|
ldflags = []
|
104
107
|
|
@@ -108,7 +111,8 @@ module Nokogiri
|
|
108
111
|
end
|
109
112
|
|
110
113
|
if windows?
|
111
|
-
# on windows,
|
114
|
+
# on windows, third party libraries that wish to link against nokogiri
|
115
|
+
# should link against nokogiri.so to resolve symbols. see #2167
|
112
116
|
lib_directory = File.expand_path(File.join(File.dirname(__FILE__), "../#{ruby_minor}"))
|
113
117
|
unless File.exist?(lib_directory)
|
114
118
|
lib_directory = File.expand_path(File.join(File.dirname(__FILE__), ".."))
|
@@ -136,9 +140,6 @@ module Nokogiri
|
|
136
140
|
libxml["source"] = "packaged"
|
137
141
|
libxml["precompiled"] = libxml2_precompiled?
|
138
142
|
libxml["patches"] = Nokogiri::LIBXML2_PATCHES
|
139
|
-
|
140
|
-
# this is for nokogumbo and shouldn't be forever
|
141
|
-
libxml["libxml2_path"] = header_directory
|
142
143
|
else
|
143
144
|
libxml["source"] = "system"
|
144
145
|
end
|
data/lib/nokogiri/xml/attr.rb
CHANGED
@@ -18,8 +18,6 @@ module Nokogiri
|
|
18
18
|
# - +value+ → (String) The value of the attribute.
|
19
19
|
# - +namespace+ → (Namespace, nil) The Namespace of the attribute, or +nil+ if there is no namespace.
|
20
20
|
#
|
21
|
-
# ⚡ This is an experimental feature, available since v1.14.0
|
22
|
-
#
|
23
21
|
# *Example*
|
24
22
|
#
|
25
23
|
# doc = Nokogiri::XML.parse(<<~XML)
|
@@ -52,6 +50,8 @@ module Nokogiri
|
|
52
50
|
# # href = "http://nokogiri.org/ns/noko"
|
53
51
|
# # })}
|
54
52
|
#
|
53
|
+
# Since v1.14.0
|
54
|
+
#
|
55
55
|
def deconstruct_keys(keys)
|
56
56
|
{ name: name, value: value, namespace: namespace }
|
57
57
|
end
|
@@ -174,8 +174,7 @@ module Nokogiri
|
|
174
174
|
# Since v1.12.4
|
175
175
|
attr_accessor :namespace_inheritance
|
176
176
|
|
177
|
-
# :nodoc:
|
178
|
-
def initialize(*args) # rubocop:disable Lint/MissingSuper
|
177
|
+
def initialize(*args) # :nodoc: # rubocop:disable Lint/MissingSuper
|
179
178
|
@errors = []
|
180
179
|
@decorators = nil
|
181
180
|
@namespace_inheritance = false
|
@@ -330,7 +329,7 @@ module Nokogiri
|
|
330
329
|
# Validate this Document against it's DTD. Returns a list of errors on
|
331
330
|
# the document or +nil+ when there is no DTD.
|
332
331
|
def validate
|
333
|
-
return
|
332
|
+
return unless internal_subset
|
334
333
|
|
335
334
|
internal_subset.validate(self)
|
336
335
|
end
|
@@ -427,8 +426,6 @@ module Nokogiri
|
|
427
426
|
# instructions. If you have a use case and would like this functionality, please let us know
|
428
427
|
# by opening an issue or a discussion on the github project.
|
429
428
|
#
|
430
|
-
# ⚡ This is an experimental feature, available since v1.14.0
|
431
|
-
#
|
432
429
|
# *Example*
|
433
430
|
#
|
434
431
|
# doc = Nokogiri::XML.parse(<<~XML)
|
@@ -455,6 +452,8 @@ module Nokogiri
|
|
455
452
|
# doc.deconstruct_keys([:root])
|
456
453
|
# # => {:root=>nil}
|
457
454
|
#
|
455
|
+
# Since v1.14.0
|
456
|
+
#
|
458
457
|
def deconstruct_keys(keys)
|
459
458
|
{ root: root }
|
460
459
|
end
|
@@ -154,8 +154,6 @@ module Nokogiri
|
|
154
154
|
# root elements, you should deconstruct the array returned by
|
155
155
|
# <tt>DocumentFragment#elements</tt>.
|
156
156
|
#
|
157
|
-
# ⚡ This is an experimental feature, available since v1.14.0
|
158
|
-
#
|
159
157
|
# *Example*
|
160
158
|
#
|
161
159
|
# frag = Nokogiri::HTML5.fragment(<<~HTML)
|
@@ -187,6 +185,8 @@ module Nokogiri
|
|
187
185
|
# # }),
|
188
186
|
# # #(Element:0x398 { name = "div", children = [ #(Text "End")] })]
|
189
187
|
#
|
188
|
+
# Since v1.14.0
|
189
|
+
#
|
190
190
|
def deconstruct
|
191
191
|
children.to_a
|
192
192
|
end
|
@@ -16,8 +16,6 @@ module Nokogiri
|
|
16
16
|
# - +prefix+ → (String, nil) The namespace's prefix, or +nil+ if there is no prefix (e.g., default namespace).
|
17
17
|
# - +href+ → (String) The namespace's URI
|
18
18
|
#
|
19
|
-
# ⚡ This is an experimental feature, available since v1.14.0
|
20
|
-
#
|
21
19
|
# *Example*
|
22
20
|
#
|
23
21
|
# doc = Nokogiri::XML.parse(<<~XML)
|
@@ -43,6 +41,7 @@ module Nokogiri
|
|
43
41
|
# doc.root.elements.last.namespace.deconstruct_keys([:prefix, :href])
|
44
42
|
# # => {:prefix=>"noko", :href=>"http://nokogiri.org/ns/noko"}
|
45
43
|
#
|
44
|
+
# Since v1.14.0
|
46
45
|
#
|
47
46
|
def deconstruct_keys(keys)
|
48
47
|
{ prefix: prefix, href: href }
|
data/lib/nokogiri/xml/node.rb
CHANGED
@@ -1049,29 +1049,35 @@ module Nokogiri
|
|
1049
1049
|
|
1050
1050
|
return Nokogiri::XML::NodeSet.new(document) if contents.empty?
|
1051
1051
|
|
1052
|
-
# libxml2 does not obey the +recover+ option after encountering errors during +in_context+
|
1053
|
-
# parsing, and so this horrible hack is here to try to emulate recovery behavior.
|
1054
|
-
#
|
1055
|
-
# Unfortunately, this means we're no longer parsing "in context" and so namespaces that
|
1056
|
-
# would have been inherited from the context node won't be handled correctly. This hack was
|
1057
|
-
# written in 2010, and I regret it, because it's silently degrading functionality in a way
|
1058
|
-
# that's not easily prevented (or even detected).
|
1059
|
-
#
|
1060
|
-
# I think preferable behavior would be to either:
|
1061
|
-
#
|
1062
|
-
# a. add an error noting that we "fell back" and pointing the user to turning off the +recover+ option
|
1063
|
-
# b. don't recover, but raise a sensible exception
|
1064
|
-
#
|
1065
|
-
# For context and background: https://github.com/sparklemotion/nokogiri/issues/313
|
1066
|
-
# FIXME bug report: https://github.com/sparklemotion/nokogiri/issues/2092
|
1067
1052
|
error_count = document.errors.length
|
1068
1053
|
node_set = in_context(contents, options.to_i)
|
1069
|
-
if
|
1070
|
-
|
1054
|
+
if document.errors.length > error_count
|
1055
|
+
raise document.errors[error_count] unless options.recover?
|
1056
|
+
|
1057
|
+
if node_set.empty?
|
1058
|
+
# libxml2 < 2.13 does not obey the +recover+ option after encountering errors during
|
1059
|
+
# +in_context+ parsing, and so this horrible hack is here to try to emulate recovery
|
1060
|
+
# behavior.
|
1061
|
+
#
|
1062
|
+
# (Note that HTML4 fragment parsing seems to have been fixed in abd74186, and XML
|
1063
|
+
# fragment parsing is fixed in 1c106edf. Both are in 2.13.)
|
1064
|
+
#
|
1065
|
+
# Unfortunately, this means we're no longer parsing "in context" and so namespaces that
|
1066
|
+
# would have been inherited from the context node won't be handled correctly. This hack
|
1067
|
+
# was written in 2010, and I regret it, because it's silently degrading functionality in
|
1068
|
+
# a way that's not easily prevented (or even detected).
|
1069
|
+
#
|
1070
|
+
# I think preferable behavior would be to either:
|
1071
|
+
#
|
1072
|
+
# a. add an error noting that we "fell back" and pointing the user to turning off the
|
1073
|
+
# +recover+ option
|
1074
|
+
# b. don't recover, but raise a sensible exception
|
1075
|
+
#
|
1076
|
+
# For context and background:
|
1077
|
+
# - https://github.com/sparklemotion/nokogiri/issues/313
|
1078
|
+
# - https://github.com/sparklemotion/nokogiri/issues/2092
|
1071
1079
|
fragment = document.related_class("DocumentFragment").parse(contents)
|
1072
1080
|
node_set = fragment.children
|
1073
|
-
else
|
1074
|
-
raise document.errors[error_count]
|
1075
1081
|
end
|
1076
1082
|
end
|
1077
1083
|
node_set
|
@@ -1165,7 +1171,7 @@ module Nokogiri
|
|
1165
1171
|
# Fetch the Nokogiri::HTML4::ElementDescription for this node. Returns
|
1166
1172
|
# nil on XML documents and on unknown tags.
|
1167
1173
|
def description
|
1168
|
-
return
|
1174
|
+
return if document.xml?
|
1169
1175
|
|
1170
1176
|
Nokogiri::HTML4::ElementDescription[name]
|
1171
1177
|
end
|
@@ -1254,8 +1260,8 @@ module Nokogiri
|
|
1254
1260
|
# Compare two Node objects with respect to their Document. Nodes from
|
1255
1261
|
# different documents cannot be compared.
|
1256
1262
|
def <=>(other)
|
1257
|
-
return
|
1258
|
-
return
|
1263
|
+
return unless other.is_a?(Nokogiri::XML::Node)
|
1264
|
+
return unless document == other.document
|
1259
1265
|
|
1260
1266
|
compare(other)
|
1261
1267
|
end
|
@@ -1278,6 +1284,7 @@ module Nokogiri
|
|
1278
1284
|
# end
|
1279
1285
|
#
|
1280
1286
|
def serialize(*args, &block)
|
1287
|
+
# TODO: deprecate non-hash options, see 46c68ed 2009-06-20 for context
|
1281
1288
|
options = if args.first.is_a?(Hash)
|
1282
1289
|
args.shift
|
1283
1290
|
else
|
@@ -1429,8 +1436,6 @@ module Nokogiri
|
|
1429
1436
|
# - +content+ → (String) The contents of all the text nodes in this node's subtree. See #content.
|
1430
1437
|
# - +inner_html+ → (String) The inner markup for the children of this node. See #inner_html.
|
1431
1438
|
#
|
1432
|
-
# ⚡ This is an experimental feature, available since v1.14.0
|
1433
|
-
#
|
1434
1439
|
# *Example*
|
1435
1440
|
#
|
1436
1441
|
# doc = Nokogiri::XML.parse(<<~XML)
|
@@ -1465,6 +1470,8 @@ module Nokogiri
|
|
1465
1470
|
# # value = "def"
|
1466
1471
|
# # })]}
|
1467
1472
|
#
|
1473
|
+
# Since v1.14.0
|
1474
|
+
#
|
1468
1475
|
def deconstruct_keys(keys)
|
1469
1476
|
requested_keys = DECONSTRUCT_KEYS & keys
|
1470
1477
|
{}.tap do |values|
|
@@ -372,7 +372,7 @@ module Nokogiri
|
|
372
372
|
# Removes the last element from set and returns it, or +nil+ if
|
373
373
|
# the set is empty
|
374
374
|
def pop
|
375
|
-
return
|
375
|
+
return if length == 0
|
376
376
|
|
377
377
|
delete(last)
|
378
378
|
end
|
@@ -381,7 +381,7 @@ module Nokogiri
|
|
381
381
|
# Returns the first element of the NodeSet and removes it. Returns
|
382
382
|
# +nil+ if the set is empty.
|
383
383
|
def shift
|
384
|
-
return
|
384
|
+
return if length == 0
|
385
385
|
|
386
386
|
delete(first)
|
387
387
|
end
|
@@ -435,7 +435,7 @@ module Nokogiri
|
|
435
435
|
#
|
436
436
|
# Returns the members of this NodeSet as an array, to use in pattern matching.
|
437
437
|
#
|
438
|
-
#
|
438
|
+
# Since v1.14.0
|
439
439
|
#
|
440
440
|
def deconstruct
|
441
441
|
to_a
|
data/lib/nokogiri/xml/reader.rb
CHANGED
@@ -3,9 +3,11 @@
|
|
3
3
|
module Nokogiri
|
4
4
|
module XML
|
5
5
|
###
|
6
|
-
# Nokogiri::XML::Reader parses an XML document similar to the way a cursor
|
7
|
-
#
|
8
|
-
#
|
6
|
+
# Nokogiri::XML::Reader parses an XML document similar to the way a cursor would move. The
|
7
|
+
# Reader is given an XML document, and yields nodes to an each block.
|
8
|
+
#
|
9
|
+
# The Reader parser might be good for when you need the speed and low memory usage of the SAX
|
10
|
+
# parser, but do not want to write a Document handler.
|
9
11
|
#
|
10
12
|
# Here is an example of usage:
|
11
13
|
#
|
@@ -22,13 +24,12 @@ module Nokogiri
|
|
22
24
|
#
|
23
25
|
# end
|
24
26
|
#
|
25
|
-
#
|
26
|
-
#
|
27
|
-
#
|
28
|
-
# need during the first iteration.
|
27
|
+
# ⚠ Nokogiri::XML::Reader#each can only be called once! Once the cursor moves through the entire
|
28
|
+
# document, you must parse the document again. It may be better to capture all information you
|
29
|
+
# need during a single iteration.
|
29
30
|
#
|
30
|
-
#
|
31
|
-
#
|
31
|
+
# ⚠ libxml2 does not support error recovery in the Reader parser. The `RECOVER` ParseOption is
|
32
|
+
# ignored. If a syntax error is encountered during parsing, an exception will be raised.
|
32
33
|
class Reader
|
33
34
|
include Enumerable
|
34
35
|
|