nokogiri 1.15.6 → 1.16.5
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +17 -14
- data/README.md +4 -1
- data/dependencies.yml +6 -5
- data/ext/nokogiri/extconf.rb +3 -4
- data/ext/nokogiri/html4_sax_push_parser.c +1 -1
- data/ext/nokogiri/nokogiri.h +10 -3
- data/ext/nokogiri/test_global_handlers.c +1 -1
- data/ext/nokogiri/xml_cdata.c +1 -2
- data/ext/nokogiri/xml_document.c +7 -3
- data/ext/nokogiri/xml_namespace.c +0 -4
- data/ext/nokogiri/xml_node.c +5 -8
- data/ext/nokogiri/xml_reader.c +26 -48
- data/ext/nokogiri/xml_relax_ng.c +1 -1
- data/ext/nokogiri/xml_sax_parser_context.c +4 -0
- data/ext/nokogiri/xml_sax_push_parser.c +1 -1
- data/ext/nokogiri/xml_schema.c +2 -3
- data/ext/nokogiri/xml_syntax_error.c +3 -3
- data/ext/nokogiri/xml_text.c +1 -2
- data/ext/nokogiri/xml_xpath_context.c +2 -5
- data/ext/nokogiri/xslt_stylesheet.c +8 -3
- data/gumbo-parser/Makefile +18 -0
- data/gumbo-parser/src/parser.c +8 -5
- data/gumbo-parser/src/tokenizer.c +1 -0
- data/lib/nokogiri/css/parser_extras.rb +1 -1
- data/lib/nokogiri/css/xpath_visitor.rb +1 -21
- data/lib/nokogiri/html4/document.rb +1 -1
- data/lib/nokogiri/html4/encoding_reader.rb +1 -1
- data/lib/nokogiri/html5.rb +0 -66
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +6 -5
- data/lib/nokogiri/xml/attr.rb +2 -2
- data/lib/nokogiri/xml/document.rb +3 -3
- data/lib/nokogiri/xml/document_fragment.rb +2 -2
- data/lib/nokogiri/xml/namespace.rb +1 -2
- data/lib/nokogiri/xml/node.rb +31 -24
- data/lib/nokogiri/xml/node_set.rb +3 -3
- data/lib/nokogiri/xml/reader.rb +10 -9
- data/lib/nokogiri/xml/searchable.rb +3 -3
- data/lib/nokogiri/xml/syntax_error.rb +1 -1
- data/lib/nokogiri/xml.rb +1 -1
- data/lib/nokogiri/xslt/stylesheet.rb +29 -7
- data/lib/nokogiri/xslt.rb +1 -1
- data/lib/nokogiri.rb +1 -1
- data/ports/archives/libxml2-2.12.7.tar.xz +0 -0
- metadata +5 -5
- data/ports/archives/libxml2-2.11.7.tar.xz +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f268b2edaa2782a3e9a64e671e36d88149ef5dc23fcf3967a8a54c4475ed7b85
|
4
|
+
data.tar.gz: b49fbf8708f4733e34c211935637ad5999d3950b89aca2da71899c71eaa14901
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e2f096bf7e99c453450c2a9d78c7274a3472d29ea8e8aeeff2dfb6976b5b009a8bfd5e673c7eb1a2575d820df26a830ce4e4043d33fb4c0239bf8098bdb98188
|
7
|
+
data.tar.gz: aa7658f376b0278150550d5426ddbe11469be8624d6fc732cf035d0ed00736e177e2848182c9077a9cc724ae6efe92666e8f3664b71282c4e74e79ad48c51373
|
data/Gemfile
CHANGED
@@ -5,35 +5,38 @@ source "https://rubygems.org"
|
|
5
5
|
gemspec
|
6
6
|
|
7
7
|
group :development do
|
8
|
+
# ruby 3.4.0-dev removed some gems from the default set
|
9
|
+
#
|
10
|
+
# TODO: we should be able to remove these as our gem dependencies sort it out and we pull them in
|
11
|
+
# transitively.
|
12
|
+
gem "mutex_m"
|
13
|
+
|
8
14
|
# bootstrapping
|
9
15
|
gem "bundler", "~> 2.3"
|
10
|
-
gem "rake", "
|
16
|
+
gem "rake", "13.1.0"
|
11
17
|
|
12
18
|
# building extensions
|
13
|
-
gem "rake-compiler", "
|
14
|
-
gem "rake-compiler-dock", "
|
15
|
-
|
16
|
-
# documentation
|
17
|
-
gem "hoe-markdown", "= 1.4.0"
|
19
|
+
gem "rake-compiler", "1.2.6"
|
20
|
+
gem "rake-compiler-dock", "1.4.0"
|
18
21
|
|
19
22
|
# parser generator
|
20
23
|
gem "rexical", "= 1.0.7"
|
21
24
|
|
22
25
|
# tests
|
23
|
-
gem "minitest", "5.
|
24
|
-
gem "minitest-
|
25
|
-
gem "ruby_memcheck", "
|
26
|
+
gem "minitest", "5.21.2"
|
27
|
+
gem "minitest-parallel_fork", "2.0.0"
|
28
|
+
gem "ruby_memcheck", "2.3.0"
|
26
29
|
gem "rubyzip", "~> 2.3.2"
|
27
30
|
gem "simplecov", "= 0.21.2"
|
28
31
|
|
29
32
|
# rubocop
|
30
33
|
if Gem::Requirement.new("~> 3.0").satisfied_by?(Gem::Version.new(RUBY_VERSION))
|
31
|
-
gem "rubocop", "1.
|
32
|
-
gem "rubocop-minitest", "0.
|
34
|
+
gem "rubocop", "1.60.2"
|
35
|
+
gem "rubocop-minitest", "0.34.5"
|
33
36
|
gem "rubocop-packaging", "0.5.2"
|
34
|
-
gem "rubocop-performance", "1.
|
37
|
+
gem "rubocop-performance", "1.20.2"
|
35
38
|
gem "rubocop-rake", "= 0.6.0"
|
36
|
-
gem "rubocop-shopify", "2.
|
39
|
+
gem "rubocop-shopify", "2.14.0"
|
37
40
|
end
|
38
41
|
end
|
39
42
|
|
@@ -41,5 +44,5 @@ end
|
|
41
44
|
# `bundle config set --local without rdoc`
|
42
45
|
# Then re-run `bundle install`.
|
43
46
|
group :rdoc do
|
44
|
-
gem "rdoc", "6.
|
47
|
+
gem "rdoc", "6.6.2"
|
45
48
|
end
|
data/README.md
CHANGED
@@ -55,6 +55,7 @@ There are a few ways to ask exploratory questions:
|
|
55
55
|
|
56
56
|
- The Nokogiri mailing list is active at https://groups.google.com/group/nokogiri-talk
|
57
57
|
- Open an issue using the "Help Request" template at https://github.com/sparklemotion/nokogiri/issues
|
58
|
+
- Open a discussion at https://github.com/sparklemotion/nokogiri/discussions
|
58
59
|
|
59
60
|
Please do not mail the maintainers at their personal addresses.
|
60
61
|
|
@@ -90,6 +91,8 @@ We bump `Major.Minor.Patch` versions following this guidance:
|
|
90
91
|
- Updating packaged libraries for non-security-related reasons.
|
91
92
|
- Dropping support for EOLed Ruby versions. [Some folks find this objectionable](https://github.com/sparklemotion/nokogiri/issues/1568), but [SemVer says this is OK if the public API hasn't changed](https://semver.org/#what-should-i-do-if-i-update-my-own-dependencies-without-changing-the-public-api).
|
92
93
|
- Backwards-incompatible changes to internal or private methods and constants. These are detailed in the "Changes" section of each changelog entry.
|
94
|
+
- Removal of deprecated methods or parameters, after a generous transition period; usually when those methods or parameters are rarely-used or dangerous to the user. Essentially, removals that do not justify a major version bump.
|
95
|
+
|
93
96
|
|
94
97
|
`Patch`:
|
95
98
|
|
@@ -111,7 +114,7 @@ You can help sponsor the maintainers of this software through one of these organ
|
|
111
114
|
|
112
115
|
Requirements:
|
113
116
|
|
114
|
-
- Ruby >=
|
117
|
+
- Ruby >= 3.0
|
115
118
|
- JRuby >= 9.4.0.0
|
116
119
|
|
117
120
|
|
data/dependencies.yml
CHANGED
@@ -1,7 +1,8 @@
|
|
1
|
+
---
|
1
2
|
libxml2:
|
2
|
-
version: "2.
|
3
|
-
sha256: "
|
4
|
-
# sha-256 hash provided in https://download.gnome.org/sources/libxml2/2.
|
3
|
+
version: "2.12.7"
|
4
|
+
sha256: "24ae78ff1363a973e6d8beba941a7945da2ac056e19b53956aeb6927fd6cfb56"
|
5
|
+
# sha-256 hash provided in https://download.gnome.org/sources/libxml2/2.12/libxml2-2.12.7.sha256sum
|
5
6
|
|
6
7
|
libxslt:
|
7
8
|
version: "1.1.39"
|
@@ -9,8 +10,8 @@ libxslt:
|
|
9
10
|
# sha-256 hash provided in https://download.gnome.org/sources/libxslt/1.1/libxslt-1.1.39.sha256sum
|
10
11
|
|
11
12
|
zlib:
|
12
|
-
version: "1.
|
13
|
-
sha256: "
|
13
|
+
version: "1.3.1"
|
14
|
+
sha256: "9a93b2b7dfdac77ceba5a558a580e74667dd6fede4585b91eefb60f03b72df23"
|
14
15
|
# SHA-256 hash provided on http://zlib.net/
|
15
16
|
|
16
17
|
libiconv:
|
data/ext/nokogiri/extconf.rb
CHANGED
@@ -443,7 +443,7 @@ def process_recipe(name, version, static_p, cross_p, cacheable_p = true)
|
|
443
443
|
# We use 'host' to set compiler prefix for cross-compiling. Prefer host_alias over host. And
|
444
444
|
# prefer i686 (what external dev tools use) to i386 (what ruby's configure.ac emits).
|
445
445
|
recipe.host = RbConfig::CONFIG["host_alias"].empty? ? RbConfig::CONFIG["host"] : RbConfig::CONFIG["host_alias"]
|
446
|
-
recipe.host = recipe.host.gsub(
|
446
|
+
recipe.host = recipe.host.gsub("i386", "i686")
|
447
447
|
|
448
448
|
recipe.target = File.join(PACKAGE_ROOT_DIR, "ports") if cacheable_p
|
449
449
|
recipe.configure_options << "--libdir=#{File.join(recipe.path, "lib")}"
|
@@ -651,8 +651,7 @@ append_cppflags(ENV["CPPFLAGS"].split) unless ENV["CPPFLAGS"].nil?
|
|
651
651
|
append_ldflags(ENV["LDFLAGS"].split) unless ENV["LDFLAGS"].nil?
|
652
652
|
$LIBS = concat_flags($LIBS, ENV["LIBS"])
|
653
653
|
|
654
|
-
#
|
655
|
-
# errors/warnings. see #2302
|
654
|
+
# libgumbo uses C90/C99 features, see #2302
|
656
655
|
append_cflags(["-std=c99", "-Wno-declaration-after-statement"])
|
657
656
|
|
658
657
|
# gumbo html5 serialization is slower with O3, let's make sure we use O2
|
@@ -919,6 +918,7 @@ else
|
|
919
918
|
"--with-c14n",
|
920
919
|
"--with-debug",
|
921
920
|
"--with-threads",
|
921
|
+
"--without-tls", # see https://github.com/sparklemotion/nokogiri/issues/3031
|
922
922
|
"CPPFLAGS=#{cppflags}",
|
923
923
|
"CFLAGS=#{cflags}",
|
924
924
|
]
|
@@ -1087,7 +1087,6 @@ have_func("xmlRelaxNGSetParserStructuredErrors") # introduced in libxml 2.6.24
|
|
1087
1087
|
have_func("xmlRelaxNGSetValidStructuredErrors") # introduced in libxml 2.6.21
|
1088
1088
|
have_func("xmlSchemaSetValidStructuredErrors") # introduced in libxml 2.6.23
|
1089
1089
|
have_func("xmlSchemaSetParserStructuredErrors") # introduced in libxml 2.6.23
|
1090
|
-
have_func("rb_gc_location") # introduced in Ruby 2.7
|
1091
1090
|
have_func("rb_category_warning") # introduced in Ruby 3.0
|
1092
1091
|
|
1093
1092
|
other_library_versions_string = OTHER_LIBRARY_VERSIONS.map { |k, v| [k, v].join(":") }.join(",")
|
@@ -32,7 +32,7 @@ native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
|
|
32
32
|
|
33
33
|
if ((status != 0) && !(ctx->options & XML_PARSE_RECOVER)) {
|
34
34
|
// TODO: there appear to be no tests for this block
|
35
|
-
|
35
|
+
xmlErrorConstPtr e = xmlCtxtGetLastError(ctx);
|
36
36
|
Nokogiri_error_raise(NULL, e);
|
37
37
|
}
|
38
38
|
|
data/ext/nokogiri/nokogiri.h
CHANGED
@@ -66,6 +66,13 @@ xmlNodePtr xmlLastElementChild(xmlNodePtr parent);
|
|
66
66
|
#define XMLNS_PREFIX "xmlns"
|
67
67
|
#define XMLNS_PREFIX_LEN 6 /* including either colon or \0 */
|
68
68
|
|
69
|
+
#ifndef xmlErrorConstPtr
|
70
|
+
# if LIBXML_VERSION >= 21200
|
71
|
+
# define xmlErrorConstPtr const xmlError *
|
72
|
+
# else
|
73
|
+
# define xmlErrorConstPtr xmlError *
|
74
|
+
# endif
|
75
|
+
#endif
|
69
76
|
|
70
77
|
#include <ruby.h>
|
71
78
|
#include <ruby/st.h>
|
@@ -227,9 +234,9 @@ void Nokogiri_structured_error_func_save(libxmlStructuredErrorHandlerState *hand
|
|
227
234
|
void Nokogiri_structured_error_func_save_and_set(libxmlStructuredErrorHandlerState *handler_state, void *user_data,
|
228
235
|
xmlStructuredErrorFunc handler);
|
229
236
|
void Nokogiri_structured_error_func_restore(libxmlStructuredErrorHandlerState *handler_state);
|
230
|
-
VALUE Nokogiri_wrap_xml_syntax_error(
|
231
|
-
void Nokogiri_error_array_pusher(void *ctx,
|
232
|
-
NORETURN_DECL void Nokogiri_error_raise(void *ctx,
|
237
|
+
VALUE Nokogiri_wrap_xml_syntax_error(xmlErrorConstPtr error);
|
238
|
+
void Nokogiri_error_array_pusher(void *ctx, xmlErrorConstPtr error);
|
239
|
+
NORETURN_DECL void Nokogiri_error_raise(void *ctx, xmlErrorConstPtr error);
|
233
240
|
void Nokogiri_marshal_xpath_funcall_and_return_values(xmlXPathParserContextPtr ctx, int nargs, VALUE handler,
|
234
241
|
const char *function_name) ;
|
235
242
|
|
@@ -3,7 +3,7 @@
|
|
3
3
|
static VALUE foreign_error_handler_block = Qnil;
|
4
4
|
|
5
5
|
static void
|
6
|
-
foreign_error_handler(void *user_data,
|
6
|
+
foreign_error_handler(void *user_data, xmlErrorConstPtr c_error)
|
7
7
|
{
|
8
8
|
rb_funcall(foreign_error_handler_block, rb_intern("call"), 0);
|
9
9
|
}
|
data/ext/nokogiri/xml_cdata.c
CHANGED
@@ -33,8 +33,7 @@ rb_xml_cdata_s_new(int argc, VALUE *argv, VALUE klass)
|
|
33
33
|
|
34
34
|
if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlDocument)) {
|
35
35
|
xmlNodePtr deprecated_node_type_arg;
|
36
|
-
// TODO:
|
37
|
-
NOKO_WARN_DEPRECATION("Passing a Node as the first parameter to CDATA.new is deprecated. Please pass a Document instead. This will become an error in a future release of Nokogiri.");
|
36
|
+
NOKO_WARN_DEPRECATION("Passing a Node as the first parameter to CDATA.new is deprecated. Please pass a Document instead. This will become an error in Nokogiri v1.17.0."); // TODO: deprecated in v1.15.3, remove in v1.17.0
|
38
37
|
Noko_Node_Get_Struct(rb_document, xmlNode, deprecated_node_type_arg);
|
39
38
|
c_document = deprecated_node_type_arg->doc;
|
40
39
|
} else {
|
data/ext/nokogiri/xml_document.c
CHANGED
@@ -74,8 +74,10 @@ dealloc(void *data)
|
|
74
74
|
|
75
75
|
ruby_xfree(doc->_private);
|
76
76
|
|
77
|
+
#if defined(__GNUC__) && __GNUC__ >= 5
|
77
78
|
#pragma GCC diagnostic push
|
78
79
|
#pragma GCC diagnostic ignored "-Wdeprecated-declarations" // xmlDeregisterNodeDefault is deprecated as of libxml2 2.11.0
|
80
|
+
#endif
|
79
81
|
/*
|
80
82
|
* libxml-ruby < 3.0.0 uses xmlDeregisterNodeDefault. If the user is using one of those older
|
81
83
|
* versions, the registered callback from libxml-ruby will access the _private pointers set by
|
@@ -90,7 +92,9 @@ dealloc(void *data)
|
|
90
92
|
if (xmlDeregisterNodeDefaultValue) {
|
91
93
|
remove_private((xmlNodePtr)doc);
|
92
94
|
}
|
95
|
+
#if defined(__GNUC__) && __GNUC__ >= 5
|
93
96
|
#pragma GCC diagnostic pop
|
97
|
+
#endif
|
94
98
|
|
95
99
|
xmlFreeDoc(doc);
|
96
100
|
}
|
@@ -337,7 +341,7 @@ read_io(VALUE klass,
|
|
337
341
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
338
342
|
|
339
343
|
if (doc == NULL) {
|
340
|
-
|
344
|
+
xmlErrorConstPtr error;
|
341
345
|
|
342
346
|
xmlFreeDoc(doc);
|
343
347
|
|
@@ -383,7 +387,7 @@ read_memory(VALUE klass,
|
|
383
387
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
384
388
|
|
385
389
|
if (doc == NULL) {
|
386
|
-
|
390
|
+
xmlErrorConstPtr error;
|
387
391
|
|
388
392
|
xmlFreeDoc(doc);
|
389
393
|
|
@@ -537,7 +541,7 @@ create_entity(int argc, VALUE *argv, VALUE self)
|
|
537
541
|
);
|
538
542
|
|
539
543
|
if (NULL == ptr) {
|
540
|
-
|
544
|
+
xmlErrorConstPtr error = xmlGetLastError();
|
541
545
|
if (error) {
|
542
546
|
rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
|
543
547
|
} else {
|
@@ -42,7 +42,6 @@ _xml_namespace_dealloc(void *ptr)
|
|
42
42
|
xmlFree(ns);
|
43
43
|
}
|
44
44
|
|
45
|
-
#ifdef HAVE_RB_GC_LOCATION
|
46
45
|
static void
|
47
46
|
_xml_namespace_update_references(void *ptr)
|
48
47
|
{
|
@@ -51,9 +50,6 @@ _xml_namespace_update_references(void *ptr)
|
|
51
50
|
ns->_private = (void *)rb_gc_location((VALUE)ns->_private);
|
52
51
|
}
|
53
52
|
}
|
54
|
-
#else
|
55
|
-
# define _xml_namespace_update_references 0
|
56
|
-
#endif
|
57
53
|
|
58
54
|
static const rb_data_type_t nokogiri_xml_namespace_type_with_dealloc = {
|
59
55
|
.wrap_struct_name = "Nokogiri::XML::Namespace#with_dealloc",
|
data/ext/nokogiri/xml_node.c
CHANGED
@@ -28,7 +28,6 @@ _xml_node_mark(void *ptr)
|
|
28
28
|
}
|
29
29
|
}
|
30
30
|
|
31
|
-
#ifdef HAVE_RB_GC_LOCATION
|
32
31
|
static void
|
33
32
|
_xml_node_update_references(void *ptr)
|
34
33
|
{
|
@@ -38,9 +37,6 @@ _xml_node_update_references(void *ptr)
|
|
38
37
|
node->_private = (void *)rb_gc_location((VALUE)node->_private);
|
39
38
|
}
|
40
39
|
}
|
41
|
-
#else
|
42
|
-
# define _xml_node_update_references 0
|
43
|
-
#endif
|
44
40
|
|
45
41
|
static const rb_data_type_t nokogiri_node_type = {
|
46
42
|
.wrap_struct_name = "Nokogiri::XML::Node",
|
@@ -961,7 +957,7 @@ internal_subset(VALUE self)
|
|
961
957
|
* - +new_parent_doc+
|
962
958
|
* The new node's parent Document. Defaults to the this node's document.
|
963
959
|
*
|
964
|
-
* [Returns] The new
|
960
|
+
* [Returns] The new Nokogiri::XML::Node
|
965
961
|
*/
|
966
962
|
static VALUE
|
967
963
|
duplicate_node(int argc, VALUE *argv, VALUE self)
|
@@ -2058,8 +2054,7 @@ rb_xml_node_new(int argc, VALUE *argv, VALUE klass)
|
|
2058
2054
|
rb_raise(rb_eArgError, "document must be a Nokogiri::XML::Node");
|
2059
2055
|
}
|
2060
2056
|
if (!rb_obj_is_kind_of(rb_document_node, cNokogiriXmlDocument)) {
|
2061
|
-
// TODO:
|
2062
|
-
NOKO_WARN_DEPRECATION("Passing a Node as the second parameter to Node.new is deprecated. Please pass a Document instead, or prefer an alternative constructor like Node#add_child. This will become an error in a future release of Nokogiri.");
|
2057
|
+
NOKO_WARN_DEPRECATION("Passing a Node as the second parameter to Node.new is deprecated. Please pass a Document instead, or prefer an alternative constructor like Node#add_child. This will become an error in Nokogiri v1.17.0."); // TODO: deprecated in v1.13.0, remove in v1.17.0
|
2063
2058
|
}
|
2064
2059
|
Noko_Node_Get_Struct(rb_document_node, xmlNode, c_document_node);
|
2065
2060
|
|
@@ -2138,7 +2133,7 @@ process_xincludes(VALUE self, VALUE options)
|
|
2138
2133
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
2139
2134
|
|
2140
2135
|
if (rcode < 0) {
|
2141
|
-
|
2136
|
+
xmlErrorConstPtr error;
|
2142
2137
|
|
2143
2138
|
error = xmlGetLastError();
|
2144
2139
|
if (error) {
|
@@ -2174,6 +2169,8 @@ in_context(VALUE self, VALUE _str, VALUE _options)
|
|
2174
2169
|
|
2175
2170
|
/* Twiddle global variable because of a bug in libxml2.
|
2176
2171
|
* http://git.gnome.org/browse/libxml2/commit/?id=e20fb5a72c83cbfc8e4a8aa3943c6be8febadab7
|
2172
|
+
*
|
2173
|
+
* TODO: this is fixed, and HTML_PARSE_NOIMPLIED is defined, in libxml2 2.7.7
|
2177
2174
|
*/
|
2178
2175
|
#ifndef HTML_PARSE_NOIMPLIED
|
2179
2176
|
htmlHandleOmittedElem(0);
|
data/ext/nokogiri/xml_reader.c
CHANGED
@@ -5,8 +5,14 @@ VALUE cNokogiriXmlReader;
|
|
5
5
|
static void
|
6
6
|
xml_reader_deallocate(void *data)
|
7
7
|
{
|
8
|
+
// free the document separately because we _may_ have triggered preservation by calling
|
9
|
+
// xmlTextReaderCurrentDoc during a read_more.
|
8
10
|
xmlTextReaderPtr reader = data;
|
11
|
+
xmlDocPtr doc = xmlTextReaderCurrentDoc(reader);
|
9
12
|
xmlFreeTextReader(reader);
|
13
|
+
if (doc) {
|
14
|
+
xmlFreeDoc(doc);
|
15
|
+
}
|
10
16
|
}
|
11
17
|
|
12
18
|
static const rb_data_type_t xml_reader_type = {
|
@@ -166,48 +172,6 @@ rb_xml_reader_namespaces(VALUE rb_reader)
|
|
166
172
|
return rb_namespaces ;
|
167
173
|
}
|
168
174
|
|
169
|
-
/*
|
170
|
-
:call-seq: attribute_nodes() → Array<Nokogiri::XML::Attr>
|
171
|
-
|
172
|
-
Get the attributes of the current node as an Array of XML:Attr
|
173
|
-
|
174
|
-
⚠ This method is deprecated and unsafe to use. It will be removed in a future version of Nokogiri.
|
175
|
-
|
176
|
-
See related: #attribute_hash, #attributes
|
177
|
-
*/
|
178
|
-
static VALUE
|
179
|
-
rb_xml_reader_attribute_nodes(VALUE rb_reader)
|
180
|
-
{
|
181
|
-
xmlTextReaderPtr c_reader;
|
182
|
-
xmlNodePtr c_node;
|
183
|
-
VALUE attr_nodes;
|
184
|
-
int j;
|
185
|
-
|
186
|
-
// TODO: deprecated, remove in Nokogiri v1.15, see https://github.com/sparklemotion/nokogiri/issues/2598
|
187
|
-
// After removal, we can also remove all the "node_has_a_document" special handling from xml_node.c
|
188
|
-
NOKO_WARN_DEPRECATION("Reader#attribute_nodes is deprecated and will be removed in a future version of Nokogiri. Please use Reader#attribute_hash instead.");
|
189
|
-
|
190
|
-
TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
|
191
|
-
|
192
|
-
if (! has_attributes(c_reader)) {
|
193
|
-
return rb_ary_new() ;
|
194
|
-
}
|
195
|
-
|
196
|
-
c_node = xmlTextReaderExpand(c_reader);
|
197
|
-
if (c_node == NULL) {
|
198
|
-
return Qnil;
|
199
|
-
}
|
200
|
-
|
201
|
-
attr_nodes = noko_xml_node_attrs(c_node);
|
202
|
-
|
203
|
-
/* ensure that the Reader won't be GCed as long as a node is referenced */
|
204
|
-
for (j = 0 ; j < RARRAY_LEN(attr_nodes) ; j++) {
|
205
|
-
rb_iv_set(rb_ary_entry(attr_nodes, j), "@reader", rb_reader);
|
206
|
-
}
|
207
|
-
|
208
|
-
return attr_nodes;
|
209
|
-
}
|
210
|
-
|
211
175
|
/*
|
212
176
|
:call-seq: attribute_hash() → Hash<String ⇒ String>
|
213
177
|
|
@@ -554,9 +518,10 @@ static VALUE
|
|
554
518
|
read_more(VALUE self)
|
555
519
|
{
|
556
520
|
xmlTextReaderPtr reader;
|
557
|
-
|
521
|
+
xmlErrorConstPtr error;
|
558
522
|
VALUE error_list;
|
559
523
|
int ret;
|
524
|
+
xmlDocPtr c_document;
|
560
525
|
|
561
526
|
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
562
527
|
|
@@ -566,6 +531,17 @@ read_more(VALUE self)
|
|
566
531
|
ret = xmlTextReaderRead(reader);
|
567
532
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
568
533
|
|
534
|
+
c_document = xmlTextReaderCurrentDoc(reader);
|
535
|
+
if (c_document && c_document->encoding == NULL) {
|
536
|
+
VALUE constructor_encoding = rb_iv_get(self, "@encoding");
|
537
|
+
if (RTEST(constructor_encoding)) {
|
538
|
+
c_document->encoding = xmlStrdup(BAD_CAST StringValueCStr(constructor_encoding));
|
539
|
+
} else {
|
540
|
+
rb_iv_set(self, "@encoding", NOKOGIRI_STR_NEW2("UTF-8"));
|
541
|
+
c_document->encoding = xmlStrdup(BAD_CAST "UTF-8");
|
542
|
+
}
|
543
|
+
}
|
544
|
+
|
569
545
|
if (ret == 1) { return self; }
|
570
546
|
if (ret == 0) { return Qnil; }
|
571
547
|
|
@@ -749,15 +725,18 @@ rb_xml_reader_encoding(VALUE rb_reader)
|
|
749
725
|
const char *parser_encoding;
|
750
726
|
VALUE constructor_encoding;
|
751
727
|
|
728
|
+
TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
|
729
|
+
parser_encoding = (const char *)xmlTextReaderConstEncoding(c_reader);
|
730
|
+
if (parser_encoding) {
|
731
|
+
return NOKOGIRI_STR_NEW2(parser_encoding);
|
732
|
+
}
|
733
|
+
|
752
734
|
constructor_encoding = rb_iv_get(rb_reader, "@encoding");
|
753
735
|
if (RTEST(constructor_encoding)) {
|
754
736
|
return constructor_encoding;
|
755
737
|
}
|
756
738
|
|
757
|
-
|
758
|
-
parser_encoding = (const char *)xmlTextReaderConstEncoding(c_reader);
|
759
|
-
if (parser_encoding == NULL) { return Qnil; }
|
760
|
-
return NOKOGIRI_STR_NEW2(parser_encoding);
|
739
|
+
return Qnil;
|
761
740
|
}
|
762
741
|
|
763
742
|
void
|
@@ -778,7 +757,6 @@ noko_init_xml_reader(void)
|
|
778
757
|
rb_define_method(cNokogiriXmlReader, "attribute", reader_attribute, 1);
|
779
758
|
rb_define_method(cNokogiriXmlReader, "attribute_at", attribute_at, 1);
|
780
759
|
rb_define_method(cNokogiriXmlReader, "attribute_count", attribute_count, 0);
|
781
|
-
rb_define_method(cNokogiriXmlReader, "attribute_nodes", rb_xml_reader_attribute_nodes, 0);
|
782
760
|
rb_define_method(cNokogiriXmlReader, "attribute_hash", rb_xml_reader_attribute_hash, 0);
|
783
761
|
rb_define_method(cNokogiriXmlReader, "attributes?", attributes_eh, 0);
|
784
762
|
rb_define_method(cNokogiriXmlReader, "base_uri", rb_xml_reader_base_uri, 0);
|
data/ext/nokogiri/xml_relax_ng.c
CHANGED
@@ -59,6 +59,10 @@ parse_io(VALUE klass, VALUE io, VALUE encoding)
|
|
59
59
|
(xmlInputReadCallback)noko_io_read,
|
60
60
|
(xmlInputCloseCallback)noko_io_close,
|
61
61
|
(void *)io, enc);
|
62
|
+
if (!ctxt) {
|
63
|
+
rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
|
64
|
+
}
|
65
|
+
|
62
66
|
if (ctxt->sax) {
|
63
67
|
xmlFree(ctxt->sax);
|
64
68
|
ctxt->sax = NULL;
|
@@ -59,7 +59,7 @@ native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
|
|
59
59
|
|
60
60
|
if (xmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0)) {
|
61
61
|
if (!(ctx->options & XML_PARSE_RECOVER)) {
|
62
|
-
|
62
|
+
xmlErrorConstPtr e = xmlCtxtGetLastError(ctx);
|
63
63
|
Nokogiri_error_raise(NULL, e);
|
64
64
|
}
|
65
65
|
}
|
data/ext/nokogiri/xml_schema.c
CHANGED
@@ -146,7 +146,7 @@ xml_schema_parse_schema(
|
|
146
146
|
xmlSchemaFreeParserCtxt(c_parser_context);
|
147
147
|
|
148
148
|
if (NULL == c_schema) {
|
149
|
-
|
149
|
+
xmlErrorConstPtr error = xmlGetLastError();
|
150
150
|
if (error) {
|
151
151
|
Nokogiri_error_raise(NULL, error);
|
152
152
|
} else {
|
@@ -222,8 +222,7 @@ rb_xml_schema_s_from_document(int argc, VALUE *argv, VALUE klass)
|
|
222
222
|
|
223
223
|
if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlDocument)) {
|
224
224
|
xmlNodePtr deprecated_node_type_arg;
|
225
|
-
// TODO:
|
226
|
-
NOKO_WARN_DEPRECATION("Passing a Node as the first parameter to Schema.from_document is deprecated. Please pass a Document instead. This will become an error in a future release of Nokogiri.");
|
225
|
+
NOKO_WARN_DEPRECATION("Passing a Node as the first parameter to Schema.from_document is deprecated. Please pass a Document instead. This will become an error in Nokogiri v1.17.0."); // TODO: deprecated in v1.15.3, remove in v1.17.0
|
227
226
|
Noko_Node_Get_Struct(rb_document, xmlNode, deprecated_node_type_arg);
|
228
227
|
c_document = deprecated_node_type_arg->doc;
|
229
228
|
} else {
|
@@ -26,7 +26,7 @@ Nokogiri_structured_error_func_restore(libxmlStructuredErrorHandlerState *handle
|
|
26
26
|
}
|
27
27
|
|
28
28
|
void
|
29
|
-
Nokogiri_error_array_pusher(void *ctx,
|
29
|
+
Nokogiri_error_array_pusher(void *ctx, xmlErrorConstPtr error)
|
30
30
|
{
|
31
31
|
VALUE list = (VALUE)ctx;
|
32
32
|
Check_Type(list, T_ARRAY);
|
@@ -34,13 +34,13 @@ Nokogiri_error_array_pusher(void *ctx, xmlErrorPtr error)
|
|
34
34
|
}
|
35
35
|
|
36
36
|
void
|
37
|
-
Nokogiri_error_raise(void *ctx,
|
37
|
+
Nokogiri_error_raise(void *ctx, xmlErrorConstPtr error)
|
38
38
|
{
|
39
39
|
rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
|
40
40
|
}
|
41
41
|
|
42
42
|
VALUE
|
43
|
-
Nokogiri_wrap_xml_syntax_error(
|
43
|
+
Nokogiri_wrap_xml_syntax_error(xmlErrorConstPtr error)
|
44
44
|
{
|
45
45
|
VALUE msg, e, klass;
|
46
46
|
|
data/ext/nokogiri/xml_text.c
CHANGED
@@ -28,8 +28,7 @@ rb_xml_text_s_new(int argc, VALUE *argv, VALUE klass)
|
|
28
28
|
|
29
29
|
if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlDocument)) {
|
30
30
|
xmlNodePtr deprecated_node_type_arg;
|
31
|
-
// TODO:
|
32
|
-
NOKO_WARN_DEPRECATION("Passing a Node as the second parameter to Text.new is deprecated. Please pass a Document instead. This will become an error in a future release of Nokogiri.");
|
31
|
+
NOKO_WARN_DEPRECATION("Passing a Node as the second parameter to Text.new is deprecated. Please pass a Document instead. This will become an error in Nokogiri v1.17.0."); // TODO: deprecated in v1.15.3, remove in v1.17.0
|
33
32
|
Noko_Node_Get_Struct(rb_document, xmlNode, deprecated_node_type_arg);
|
34
33
|
c_document = deprecated_node_type_arg->doc;
|
35
34
|
} else {
|
@@ -321,11 +321,8 @@ handler_lookup(void *data, const xmlChar *c_name, const xmlChar *c_ns_uri)
|
|
321
321
|
VALUE rb_handler = (VALUE)data;
|
322
322
|
if (rb_respond_to(rb_handler, rb_intern((const char *)c_name))) {
|
323
323
|
if (c_ns_uri == NULL) {
|
324
|
-
NOKO_WARN_DEPRECATION(
|
325
|
-
|
326
|
-
" Please update your query to reference this function as 'nokogiri:%s'."
|
327
|
-
" Invoking custom handler functions without a namespace is deprecated and support will be removed in a future release of Nokogiri.",
|
328
|
-
c_name, c_name);
|
324
|
+
NOKO_WARN_DEPRECATION("A custom XPath or CSS handler function named '%s' is being invoked without a namespace. Please update your query to reference this function as 'nokogiri:%s'. Invoking custom handler functions without a namespace is deprecated and will become an error in Nokogiri v1.17.0.",
|
325
|
+
c_name, c_name); // deprecated in v1.15.0, remove in v1.17.0
|
329
326
|
}
|
330
327
|
return method_caller;
|
331
328
|
}
|
@@ -71,7 +71,12 @@ Nokogiri_wrap_xslt_stylesheet(xsltStylesheetPtr ss)
|
|
71
71
|
* call-seq:
|
72
72
|
* parse_stylesheet_doc(document)
|
73
73
|
*
|
74
|
-
* Parse
|
74
|
+
* Parse an XSLT::Stylesheet from +document+.
|
75
|
+
*
|
76
|
+
* [Parameters]
|
77
|
+
* - +document+ (Nokogiri::XML::Document) the document to be parsed.
|
78
|
+
*
|
79
|
+
* [Returns] Nokogiri::XSLT::Stylesheet
|
75
80
|
*/
|
76
81
|
static VALUE
|
77
82
|
parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
|
@@ -104,7 +109,7 @@ parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
|
|
104
109
|
* call-seq:
|
105
110
|
* serialize(document)
|
106
111
|
*
|
107
|
-
* Serialize +document+ to an xml string.
|
112
|
+
* Serialize +document+ to an xml string, as specified by the +method+ parameter in the Stylesheet.
|
108
113
|
*/
|
109
114
|
static VALUE
|
110
115
|
rb_xslt_stylesheet_serialize(VALUE self, VALUE xmlobj)
|
@@ -133,7 +138,7 @@ rb_xslt_stylesheet_serialize(VALUE self, VALUE xmlobj)
|
|
133
138
|
* transform(document)
|
134
139
|
* transform(document, params = {})
|
135
140
|
*
|
136
|
-
*
|
141
|
+
* Transform an XML::Document as defined by an XSLT::Stylesheet.
|
137
142
|
*
|
138
143
|
* [Parameters]
|
139
144
|
* - +document+ (Nokogiri::XML::Document) the document to be transformed.
|
data/gumbo-parser/Makefile
CHANGED
@@ -13,6 +13,23 @@ LDFLAGS := -pthread
|
|
13
13
|
|
14
14
|
all: check
|
15
15
|
|
16
|
+
oss-fuzz:
|
17
|
+
./fuzzer/build-ossfuzz.sh
|
18
|
+
|
19
|
+
fuzzers: fuzzer-normal fuzzer-asan fuzzer-ubsan fuzzer-msan
|
20
|
+
|
21
|
+
fuzzer-normal:
|
22
|
+
./fuzzer/build.sh
|
23
|
+
|
24
|
+
fuzzer-asan:
|
25
|
+
SANITIZER=asan ./fuzzer/build.sh
|
26
|
+
|
27
|
+
fuzzer-ubsan:
|
28
|
+
SANITIZER=ubsan ./fuzzer/build.sh
|
29
|
+
|
30
|
+
fuzzer-msan:
|
31
|
+
SANITIZER=msan ./fuzzer/build.sh
|
32
|
+
|
16
33
|
# don't try to regenerate ragel or gperf files in CI, that should be a development-only action and
|
17
34
|
# the generated files should be committed to SCM
|
18
35
|
ifneq ($(CI),true)
|
@@ -81,6 +98,7 @@ coverage:
|
|
81
98
|
|
82
99
|
clean:
|
83
100
|
$(RM) -r build
|
101
|
+
$(RM) -r fuzzer/build fuzzer/src-* fuzzer/gumbo_corpus
|
84
102
|
|
85
103
|
build/src/flags: | build/src
|
86
104
|
@echo 'old_CC := $(CC)' > $@
|
data/gumbo-parser/src/parser.c
CHANGED
@@ -4826,14 +4826,17 @@ GumboOutput* gumbo_parse_with_options (
|
|
4826
4826
|
// to a token.
|
4827
4827
|
if (token.type == GUMBO_TOKEN_END_TAG &&
|
4828
4828
|
token.v.end_tag.tag == GUMBO_TAG_UNKNOWN)
|
4829
|
+
{
|
4829
4830
|
gumbo_free(token.v.end_tag.name);
|
4831
|
+
token.v.end_tag.name = NULL;
|
4832
|
+
}
|
4833
|
+
if (unlikely(state->_open_elements.length > max_tree_depth)) {
|
4834
|
+
parser._output->status = GUMBO_STATUS_TREE_TOO_DEEP;
|
4835
|
+
gumbo_debug("Tree depth limit exceeded.\n");
|
4836
|
+
break;
|
4837
|
+
}
|
4830
4838
|
}
|
4831
4839
|
|
4832
|
-
if (unlikely(state->_open_elements.length > max_tree_depth)) {
|
4833
|
-
parser._output->status = GUMBO_STATUS_TREE_TOO_DEEP;
|
4834
|
-
gumbo_debug("Tree depth limit exceeded.\n");
|
4835
|
-
break;
|
4836
|
-
}
|
4837
4840
|
|
4838
4841
|
++loop_count;
|
4839
4842
|
assert(loop_count < 1000000000UL);
|
@@ -506,6 +506,7 @@ static void abandon_current_tag(GumboParser* parser) {
|
|
506
506
|
for (unsigned int i = 0; i < tag_state->_attributes.length; ++i) {
|
507
507
|
gumbo_destroy_attribute(tag_state->_attributes.data[i]);
|
508
508
|
}
|
509
|
+
gumbo_free(tag_state->_name);
|
509
510
|
gumbo_free(tag_state->_attributes.data);
|
510
511
|
mark_tag_state_as_empty(tag_state);
|
511
512
|
gumbo_string_buffer_destroy(&tag_state->_buffer);
|