nokogiri 1.15.5 → 1.16.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +17 -14
- data/README.md +4 -1
- data/dependencies.yml +6 -6
- data/ext/nokogiri/extconf.rb +3 -4
- data/ext/nokogiri/html4_sax_push_parser.c +1 -1
- data/ext/nokogiri/nokogiri.h +10 -3
- data/ext/nokogiri/test_global_handlers.c +1 -1
- data/ext/nokogiri/xml_cdata.c +1 -2
- data/ext/nokogiri/xml_document.c +7 -3
- data/ext/nokogiri/xml_namespace.c +0 -4
- data/ext/nokogiri/xml_node.c +5 -8
- data/ext/nokogiri/xml_reader.c +26 -48
- data/ext/nokogiri/xml_relax_ng.c +1 -1
- data/ext/nokogiri/xml_sax_parser_context.c +4 -0
- data/ext/nokogiri/xml_sax_push_parser.c +1 -1
- data/ext/nokogiri/xml_schema.c +2 -3
- data/ext/nokogiri/xml_syntax_error.c +3 -3
- data/ext/nokogiri/xml_text.c +1 -2
- data/ext/nokogiri/xml_xpath_context.c +2 -5
- data/ext/nokogiri/xslt_stylesheet.c +8 -3
- data/gumbo-parser/Makefile +18 -0
- data/gumbo-parser/src/parser.c +8 -5
- data/gumbo-parser/src/tokenizer.c +1 -0
- data/lib/nokogiri/css/parser_extras.rb +1 -1
- data/lib/nokogiri/css/xpath_visitor.rb +1 -21
- data/lib/nokogiri/html4/document.rb +1 -1
- data/lib/nokogiri/html4/encoding_reader.rb +1 -1
- data/lib/nokogiri/html5.rb +0 -66
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +6 -5
- data/lib/nokogiri/xml/attr.rb +2 -2
- data/lib/nokogiri/xml/document.rb +3 -3
- data/lib/nokogiri/xml/document_fragment.rb +2 -2
- data/lib/nokogiri/xml/namespace.rb +1 -2
- data/lib/nokogiri/xml/node.rb +31 -24
- data/lib/nokogiri/xml/node_set.rb +3 -3
- data/lib/nokogiri/xml/reader.rb +10 -9
- data/lib/nokogiri/xml/searchable.rb +3 -3
- data/lib/nokogiri/xml/syntax_error.rb +1 -1
- data/lib/nokogiri/xml.rb +1 -1
- data/lib/nokogiri/xslt/stylesheet.rb +29 -7
- data/lib/nokogiri/xslt.rb +1 -1
- data/lib/nokogiri.rb +1 -1
- data/ports/archives/libxml2-2.12.7.tar.xz +0 -0
- metadata +8 -8
- data/ports/archives/libxml2-2.11.6.tar.xz +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f268b2edaa2782a3e9a64e671e36d88149ef5dc23fcf3967a8a54c4475ed7b85
|
4
|
+
data.tar.gz: b49fbf8708f4733e34c211935637ad5999d3950b89aca2da71899c71eaa14901
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e2f096bf7e99c453450c2a9d78c7274a3472d29ea8e8aeeff2dfb6976b5b009a8bfd5e673c7eb1a2575d820df26a830ce4e4043d33fb4c0239bf8098bdb98188
|
7
|
+
data.tar.gz: aa7658f376b0278150550d5426ddbe11469be8624d6fc732cf035d0ed00736e177e2848182c9077a9cc724ae6efe92666e8f3664b71282c4e74e79ad48c51373
|
data/Gemfile
CHANGED
@@ -5,35 +5,38 @@ source "https://rubygems.org"
|
|
5
5
|
gemspec
|
6
6
|
|
7
7
|
group :development do
|
8
|
+
# ruby 3.4.0-dev removed some gems from the default set
|
9
|
+
#
|
10
|
+
# TODO: we should be able to remove these as our gem dependencies sort it out and we pull them in
|
11
|
+
# transitively.
|
12
|
+
gem "mutex_m"
|
13
|
+
|
8
14
|
# bootstrapping
|
9
15
|
gem "bundler", "~> 2.3"
|
10
|
-
gem "rake", "
|
16
|
+
gem "rake", "13.1.0"
|
11
17
|
|
12
18
|
# building extensions
|
13
|
-
gem "rake-compiler", "
|
14
|
-
gem "rake-compiler-dock", "
|
15
|
-
|
16
|
-
# documentation
|
17
|
-
gem "hoe-markdown", "= 1.4.0"
|
19
|
+
gem "rake-compiler", "1.2.6"
|
20
|
+
gem "rake-compiler-dock", "1.4.0"
|
18
21
|
|
19
22
|
# parser generator
|
20
23
|
gem "rexical", "= 1.0.7"
|
21
24
|
|
22
25
|
# tests
|
23
|
-
gem "minitest", "5.
|
24
|
-
gem "minitest-
|
25
|
-
gem "ruby_memcheck", "
|
26
|
+
gem "minitest", "5.21.2"
|
27
|
+
gem "minitest-parallel_fork", "2.0.0"
|
28
|
+
gem "ruby_memcheck", "2.3.0"
|
26
29
|
gem "rubyzip", "~> 2.3.2"
|
27
30
|
gem "simplecov", "= 0.21.2"
|
28
31
|
|
29
32
|
# rubocop
|
30
33
|
if Gem::Requirement.new("~> 3.0").satisfied_by?(Gem::Version.new(RUBY_VERSION))
|
31
|
-
gem "rubocop", "1.
|
32
|
-
gem "rubocop-minitest", "0.
|
34
|
+
gem "rubocop", "1.60.2"
|
35
|
+
gem "rubocop-minitest", "0.34.5"
|
33
36
|
gem "rubocop-packaging", "0.5.2"
|
34
|
-
gem "rubocop-performance", "1.
|
37
|
+
gem "rubocop-performance", "1.20.2"
|
35
38
|
gem "rubocop-rake", "= 0.6.0"
|
36
|
-
gem "rubocop-shopify", "2.
|
39
|
+
gem "rubocop-shopify", "2.14.0"
|
37
40
|
end
|
38
41
|
end
|
39
42
|
|
@@ -41,5 +44,5 @@ end
|
|
41
44
|
# `bundle config set --local without rdoc`
|
42
45
|
# Then re-run `bundle install`.
|
43
46
|
group :rdoc do
|
44
|
-
gem "rdoc", "6.
|
47
|
+
gem "rdoc", "6.6.2"
|
45
48
|
end
|
data/README.md
CHANGED
@@ -55,6 +55,7 @@ There are a few ways to ask exploratory questions:
|
|
55
55
|
|
56
56
|
- The Nokogiri mailing list is active at https://groups.google.com/group/nokogiri-talk
|
57
57
|
- Open an issue using the "Help Request" template at https://github.com/sparklemotion/nokogiri/issues
|
58
|
+
- Open a discussion at https://github.com/sparklemotion/nokogiri/discussions
|
58
59
|
|
59
60
|
Please do not mail the maintainers at their personal addresses.
|
60
61
|
|
@@ -90,6 +91,8 @@ We bump `Major.Minor.Patch` versions following this guidance:
|
|
90
91
|
- Updating packaged libraries for non-security-related reasons.
|
91
92
|
- Dropping support for EOLed Ruby versions. [Some folks find this objectionable](https://github.com/sparklemotion/nokogiri/issues/1568), but [SemVer says this is OK if the public API hasn't changed](https://semver.org/#what-should-i-do-if-i-update-my-own-dependencies-without-changing-the-public-api).
|
92
93
|
- Backwards-incompatible changes to internal or private methods and constants. These are detailed in the "Changes" section of each changelog entry.
|
94
|
+
- Removal of deprecated methods or parameters, after a generous transition period; usually when those methods or parameters are rarely-used or dangerous to the user. Essentially, removals that do not justify a major version bump.
|
95
|
+
|
93
96
|
|
94
97
|
`Patch`:
|
95
98
|
|
@@ -111,7 +114,7 @@ You can help sponsor the maintainers of this software through one of these organ
|
|
111
114
|
|
112
115
|
Requirements:
|
113
116
|
|
114
|
-
- Ruby >=
|
117
|
+
- Ruby >= 3.0
|
115
118
|
- JRuby >= 9.4.0.0
|
116
119
|
|
117
120
|
|
data/dependencies.yml
CHANGED
@@ -1,8 +1,8 @@
|
|
1
|
-
|
1
|
+
---
|
2
2
|
libxml2:
|
3
|
-
version: "2.
|
4
|
-
sha256: "
|
5
|
-
# sha-256 hash provided in https://download.gnome.org/sources/libxml2/2.
|
3
|
+
version: "2.12.7"
|
4
|
+
sha256: "24ae78ff1363a973e6d8beba941a7945da2ac056e19b53956aeb6927fd6cfb56"
|
5
|
+
# sha-256 hash provided in https://download.gnome.org/sources/libxml2/2.12/libxml2-2.12.7.sha256sum
|
6
6
|
|
7
7
|
libxslt:
|
8
8
|
version: "1.1.39"
|
@@ -10,8 +10,8 @@ libxslt:
|
|
10
10
|
# sha-256 hash provided in https://download.gnome.org/sources/libxslt/1.1/libxslt-1.1.39.sha256sum
|
11
11
|
|
12
12
|
zlib:
|
13
|
-
version: "1.
|
14
|
-
sha256: "
|
13
|
+
version: "1.3.1"
|
14
|
+
sha256: "9a93b2b7dfdac77ceba5a558a580e74667dd6fede4585b91eefb60f03b72df23"
|
15
15
|
# SHA-256 hash provided on http://zlib.net/
|
16
16
|
|
17
17
|
libiconv:
|
data/ext/nokogiri/extconf.rb
CHANGED
@@ -443,7 +443,7 @@ def process_recipe(name, version, static_p, cross_p, cacheable_p = true)
|
|
443
443
|
# We use 'host' to set compiler prefix for cross-compiling. Prefer host_alias over host. And
|
444
444
|
# prefer i686 (what external dev tools use) to i386 (what ruby's configure.ac emits).
|
445
445
|
recipe.host = RbConfig::CONFIG["host_alias"].empty? ? RbConfig::CONFIG["host"] : RbConfig::CONFIG["host_alias"]
|
446
|
-
recipe.host = recipe.host.gsub(
|
446
|
+
recipe.host = recipe.host.gsub("i386", "i686")
|
447
447
|
|
448
448
|
recipe.target = File.join(PACKAGE_ROOT_DIR, "ports") if cacheable_p
|
449
449
|
recipe.configure_options << "--libdir=#{File.join(recipe.path, "lib")}"
|
@@ -651,8 +651,7 @@ append_cppflags(ENV["CPPFLAGS"].split) unless ENV["CPPFLAGS"].nil?
|
|
651
651
|
append_ldflags(ENV["LDFLAGS"].split) unless ENV["LDFLAGS"].nil?
|
652
652
|
$LIBS = concat_flags($LIBS, ENV["LIBS"])
|
653
653
|
|
654
|
-
#
|
655
|
-
# errors/warnings. see #2302
|
654
|
+
# libgumbo uses C90/C99 features, see #2302
|
656
655
|
append_cflags(["-std=c99", "-Wno-declaration-after-statement"])
|
657
656
|
|
658
657
|
# gumbo html5 serialization is slower with O3, let's make sure we use O2
|
@@ -919,6 +918,7 @@ else
|
|
919
918
|
"--with-c14n",
|
920
919
|
"--with-debug",
|
921
920
|
"--with-threads",
|
921
|
+
"--without-tls", # see https://github.com/sparklemotion/nokogiri/issues/3031
|
922
922
|
"CPPFLAGS=#{cppflags}",
|
923
923
|
"CFLAGS=#{cflags}",
|
924
924
|
]
|
@@ -1087,7 +1087,6 @@ have_func("xmlRelaxNGSetParserStructuredErrors") # introduced in libxml 2.6.24
|
|
1087
1087
|
have_func("xmlRelaxNGSetValidStructuredErrors") # introduced in libxml 2.6.21
|
1088
1088
|
have_func("xmlSchemaSetValidStructuredErrors") # introduced in libxml 2.6.23
|
1089
1089
|
have_func("xmlSchemaSetParserStructuredErrors") # introduced in libxml 2.6.23
|
1090
|
-
have_func("rb_gc_location") # introduced in Ruby 2.7
|
1091
1090
|
have_func("rb_category_warning") # introduced in Ruby 3.0
|
1092
1091
|
|
1093
1092
|
other_library_versions_string = OTHER_LIBRARY_VERSIONS.map { |k, v| [k, v].join(":") }.join(",")
|
@@ -32,7 +32,7 @@ native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
|
|
32
32
|
|
33
33
|
if ((status != 0) && !(ctx->options & XML_PARSE_RECOVER)) {
|
34
34
|
// TODO: there appear to be no tests for this block
|
35
|
-
|
35
|
+
xmlErrorConstPtr e = xmlCtxtGetLastError(ctx);
|
36
36
|
Nokogiri_error_raise(NULL, e);
|
37
37
|
}
|
38
38
|
|
data/ext/nokogiri/nokogiri.h
CHANGED
@@ -66,6 +66,13 @@ xmlNodePtr xmlLastElementChild(xmlNodePtr parent);
|
|
66
66
|
#define XMLNS_PREFIX "xmlns"
|
67
67
|
#define XMLNS_PREFIX_LEN 6 /* including either colon or \0 */
|
68
68
|
|
69
|
+
#ifndef xmlErrorConstPtr
|
70
|
+
# if LIBXML_VERSION >= 21200
|
71
|
+
# define xmlErrorConstPtr const xmlError *
|
72
|
+
# else
|
73
|
+
# define xmlErrorConstPtr xmlError *
|
74
|
+
# endif
|
75
|
+
#endif
|
69
76
|
|
70
77
|
#include <ruby.h>
|
71
78
|
#include <ruby/st.h>
|
@@ -227,9 +234,9 @@ void Nokogiri_structured_error_func_save(libxmlStructuredErrorHandlerState *hand
|
|
227
234
|
void Nokogiri_structured_error_func_save_and_set(libxmlStructuredErrorHandlerState *handler_state, void *user_data,
|
228
235
|
xmlStructuredErrorFunc handler);
|
229
236
|
void Nokogiri_structured_error_func_restore(libxmlStructuredErrorHandlerState *handler_state);
|
230
|
-
VALUE Nokogiri_wrap_xml_syntax_error(
|
231
|
-
void Nokogiri_error_array_pusher(void *ctx,
|
232
|
-
NORETURN_DECL void Nokogiri_error_raise(void *ctx,
|
237
|
+
VALUE Nokogiri_wrap_xml_syntax_error(xmlErrorConstPtr error);
|
238
|
+
void Nokogiri_error_array_pusher(void *ctx, xmlErrorConstPtr error);
|
239
|
+
NORETURN_DECL void Nokogiri_error_raise(void *ctx, xmlErrorConstPtr error);
|
233
240
|
void Nokogiri_marshal_xpath_funcall_and_return_values(xmlXPathParserContextPtr ctx, int nargs, VALUE handler,
|
234
241
|
const char *function_name) ;
|
235
242
|
|
@@ -3,7 +3,7 @@
|
|
3
3
|
static VALUE foreign_error_handler_block = Qnil;
|
4
4
|
|
5
5
|
static void
|
6
|
-
foreign_error_handler(void *user_data,
|
6
|
+
foreign_error_handler(void *user_data, xmlErrorConstPtr c_error)
|
7
7
|
{
|
8
8
|
rb_funcall(foreign_error_handler_block, rb_intern("call"), 0);
|
9
9
|
}
|
data/ext/nokogiri/xml_cdata.c
CHANGED
@@ -33,8 +33,7 @@ rb_xml_cdata_s_new(int argc, VALUE *argv, VALUE klass)
|
|
33
33
|
|
34
34
|
if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlDocument)) {
|
35
35
|
xmlNodePtr deprecated_node_type_arg;
|
36
|
-
// TODO:
|
37
|
-
NOKO_WARN_DEPRECATION("Passing a Node as the first parameter to CDATA.new is deprecated. Please pass a Document instead. This will become an error in a future release of Nokogiri.");
|
36
|
+
NOKO_WARN_DEPRECATION("Passing a Node as the first parameter to CDATA.new is deprecated. Please pass a Document instead. This will become an error in Nokogiri v1.17.0."); // TODO: deprecated in v1.15.3, remove in v1.17.0
|
38
37
|
Noko_Node_Get_Struct(rb_document, xmlNode, deprecated_node_type_arg);
|
39
38
|
c_document = deprecated_node_type_arg->doc;
|
40
39
|
} else {
|
data/ext/nokogiri/xml_document.c
CHANGED
@@ -74,8 +74,10 @@ dealloc(void *data)
|
|
74
74
|
|
75
75
|
ruby_xfree(doc->_private);
|
76
76
|
|
77
|
+
#if defined(__GNUC__) && __GNUC__ >= 5
|
77
78
|
#pragma GCC diagnostic push
|
78
79
|
#pragma GCC diagnostic ignored "-Wdeprecated-declarations" // xmlDeregisterNodeDefault is deprecated as of libxml2 2.11.0
|
80
|
+
#endif
|
79
81
|
/*
|
80
82
|
* libxml-ruby < 3.0.0 uses xmlDeregisterNodeDefault. If the user is using one of those older
|
81
83
|
* versions, the registered callback from libxml-ruby will access the _private pointers set by
|
@@ -90,7 +92,9 @@ dealloc(void *data)
|
|
90
92
|
if (xmlDeregisterNodeDefaultValue) {
|
91
93
|
remove_private((xmlNodePtr)doc);
|
92
94
|
}
|
95
|
+
#if defined(__GNUC__) && __GNUC__ >= 5
|
93
96
|
#pragma GCC diagnostic pop
|
97
|
+
#endif
|
94
98
|
|
95
99
|
xmlFreeDoc(doc);
|
96
100
|
}
|
@@ -337,7 +341,7 @@ read_io(VALUE klass,
|
|
337
341
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
338
342
|
|
339
343
|
if (doc == NULL) {
|
340
|
-
|
344
|
+
xmlErrorConstPtr error;
|
341
345
|
|
342
346
|
xmlFreeDoc(doc);
|
343
347
|
|
@@ -383,7 +387,7 @@ read_memory(VALUE klass,
|
|
383
387
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
384
388
|
|
385
389
|
if (doc == NULL) {
|
386
|
-
|
390
|
+
xmlErrorConstPtr error;
|
387
391
|
|
388
392
|
xmlFreeDoc(doc);
|
389
393
|
|
@@ -537,7 +541,7 @@ create_entity(int argc, VALUE *argv, VALUE self)
|
|
537
541
|
);
|
538
542
|
|
539
543
|
if (NULL == ptr) {
|
540
|
-
|
544
|
+
xmlErrorConstPtr error = xmlGetLastError();
|
541
545
|
if (error) {
|
542
546
|
rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
|
543
547
|
} else {
|
@@ -42,7 +42,6 @@ _xml_namespace_dealloc(void *ptr)
|
|
42
42
|
xmlFree(ns);
|
43
43
|
}
|
44
44
|
|
45
|
-
#ifdef HAVE_RB_GC_LOCATION
|
46
45
|
static void
|
47
46
|
_xml_namespace_update_references(void *ptr)
|
48
47
|
{
|
@@ -51,9 +50,6 @@ _xml_namespace_update_references(void *ptr)
|
|
51
50
|
ns->_private = (void *)rb_gc_location((VALUE)ns->_private);
|
52
51
|
}
|
53
52
|
}
|
54
|
-
#else
|
55
|
-
# define _xml_namespace_update_references 0
|
56
|
-
#endif
|
57
53
|
|
58
54
|
static const rb_data_type_t nokogiri_xml_namespace_type_with_dealloc = {
|
59
55
|
.wrap_struct_name = "Nokogiri::XML::Namespace#with_dealloc",
|
data/ext/nokogiri/xml_node.c
CHANGED
@@ -28,7 +28,6 @@ _xml_node_mark(void *ptr)
|
|
28
28
|
}
|
29
29
|
}
|
30
30
|
|
31
|
-
#ifdef HAVE_RB_GC_LOCATION
|
32
31
|
static void
|
33
32
|
_xml_node_update_references(void *ptr)
|
34
33
|
{
|
@@ -38,9 +37,6 @@ _xml_node_update_references(void *ptr)
|
|
38
37
|
node->_private = (void *)rb_gc_location((VALUE)node->_private);
|
39
38
|
}
|
40
39
|
}
|
41
|
-
#else
|
42
|
-
# define _xml_node_update_references 0
|
43
|
-
#endif
|
44
40
|
|
45
41
|
static const rb_data_type_t nokogiri_node_type = {
|
46
42
|
.wrap_struct_name = "Nokogiri::XML::Node",
|
@@ -961,7 +957,7 @@ internal_subset(VALUE self)
|
|
961
957
|
* - +new_parent_doc+
|
962
958
|
* The new node's parent Document. Defaults to the this node's document.
|
963
959
|
*
|
964
|
-
* [Returns] The new
|
960
|
+
* [Returns] The new Nokogiri::XML::Node
|
965
961
|
*/
|
966
962
|
static VALUE
|
967
963
|
duplicate_node(int argc, VALUE *argv, VALUE self)
|
@@ -2058,8 +2054,7 @@ rb_xml_node_new(int argc, VALUE *argv, VALUE klass)
|
|
2058
2054
|
rb_raise(rb_eArgError, "document must be a Nokogiri::XML::Node");
|
2059
2055
|
}
|
2060
2056
|
if (!rb_obj_is_kind_of(rb_document_node, cNokogiriXmlDocument)) {
|
2061
|
-
// TODO:
|
2062
|
-
NOKO_WARN_DEPRECATION("Passing a Node as the second parameter to Node.new is deprecated. Please pass a Document instead, or prefer an alternative constructor like Node#add_child. This will become an error in a future release of Nokogiri.");
|
2057
|
+
NOKO_WARN_DEPRECATION("Passing a Node as the second parameter to Node.new is deprecated. Please pass a Document instead, or prefer an alternative constructor like Node#add_child. This will become an error in Nokogiri v1.17.0."); // TODO: deprecated in v1.13.0, remove in v1.17.0
|
2063
2058
|
}
|
2064
2059
|
Noko_Node_Get_Struct(rb_document_node, xmlNode, c_document_node);
|
2065
2060
|
|
@@ -2138,7 +2133,7 @@ process_xincludes(VALUE self, VALUE options)
|
|
2138
2133
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
2139
2134
|
|
2140
2135
|
if (rcode < 0) {
|
2141
|
-
|
2136
|
+
xmlErrorConstPtr error;
|
2142
2137
|
|
2143
2138
|
error = xmlGetLastError();
|
2144
2139
|
if (error) {
|
@@ -2174,6 +2169,8 @@ in_context(VALUE self, VALUE _str, VALUE _options)
|
|
2174
2169
|
|
2175
2170
|
/* Twiddle global variable because of a bug in libxml2.
|
2176
2171
|
* http://git.gnome.org/browse/libxml2/commit/?id=e20fb5a72c83cbfc8e4a8aa3943c6be8febadab7
|
2172
|
+
*
|
2173
|
+
* TODO: this is fixed, and HTML_PARSE_NOIMPLIED is defined, in libxml2 2.7.7
|
2177
2174
|
*/
|
2178
2175
|
#ifndef HTML_PARSE_NOIMPLIED
|
2179
2176
|
htmlHandleOmittedElem(0);
|
data/ext/nokogiri/xml_reader.c
CHANGED
@@ -5,8 +5,14 @@ VALUE cNokogiriXmlReader;
|
|
5
5
|
static void
|
6
6
|
xml_reader_deallocate(void *data)
|
7
7
|
{
|
8
|
+
// free the document separately because we _may_ have triggered preservation by calling
|
9
|
+
// xmlTextReaderCurrentDoc during a read_more.
|
8
10
|
xmlTextReaderPtr reader = data;
|
11
|
+
xmlDocPtr doc = xmlTextReaderCurrentDoc(reader);
|
9
12
|
xmlFreeTextReader(reader);
|
13
|
+
if (doc) {
|
14
|
+
xmlFreeDoc(doc);
|
15
|
+
}
|
10
16
|
}
|
11
17
|
|
12
18
|
static const rb_data_type_t xml_reader_type = {
|
@@ -166,48 +172,6 @@ rb_xml_reader_namespaces(VALUE rb_reader)
|
|
166
172
|
return rb_namespaces ;
|
167
173
|
}
|
168
174
|
|
169
|
-
/*
|
170
|
-
:call-seq: attribute_nodes() → Array<Nokogiri::XML::Attr>
|
171
|
-
|
172
|
-
Get the attributes of the current node as an Array of XML:Attr
|
173
|
-
|
174
|
-
⚠ This method is deprecated and unsafe to use. It will be removed in a future version of Nokogiri.
|
175
|
-
|
176
|
-
See related: #attribute_hash, #attributes
|
177
|
-
*/
|
178
|
-
static VALUE
|
179
|
-
rb_xml_reader_attribute_nodes(VALUE rb_reader)
|
180
|
-
{
|
181
|
-
xmlTextReaderPtr c_reader;
|
182
|
-
xmlNodePtr c_node;
|
183
|
-
VALUE attr_nodes;
|
184
|
-
int j;
|
185
|
-
|
186
|
-
// TODO: deprecated, remove in Nokogiri v1.15, see https://github.com/sparklemotion/nokogiri/issues/2598
|
187
|
-
// After removal, we can also remove all the "node_has_a_document" special handling from xml_node.c
|
188
|
-
NOKO_WARN_DEPRECATION("Reader#attribute_nodes is deprecated and will be removed in a future version of Nokogiri. Please use Reader#attribute_hash instead.");
|
189
|
-
|
190
|
-
TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
|
191
|
-
|
192
|
-
if (! has_attributes(c_reader)) {
|
193
|
-
return rb_ary_new() ;
|
194
|
-
}
|
195
|
-
|
196
|
-
c_node = xmlTextReaderExpand(c_reader);
|
197
|
-
if (c_node == NULL) {
|
198
|
-
return Qnil;
|
199
|
-
}
|
200
|
-
|
201
|
-
attr_nodes = noko_xml_node_attrs(c_node);
|
202
|
-
|
203
|
-
/* ensure that the Reader won't be GCed as long as a node is referenced */
|
204
|
-
for (j = 0 ; j < RARRAY_LEN(attr_nodes) ; j++) {
|
205
|
-
rb_iv_set(rb_ary_entry(attr_nodes, j), "@reader", rb_reader);
|
206
|
-
}
|
207
|
-
|
208
|
-
return attr_nodes;
|
209
|
-
}
|
210
|
-
|
211
175
|
/*
|
212
176
|
:call-seq: attribute_hash() → Hash<String ⇒ String>
|
213
177
|
|
@@ -554,9 +518,10 @@ static VALUE
|
|
554
518
|
read_more(VALUE self)
|
555
519
|
{
|
556
520
|
xmlTextReaderPtr reader;
|
557
|
-
|
521
|
+
xmlErrorConstPtr error;
|
558
522
|
VALUE error_list;
|
559
523
|
int ret;
|
524
|
+
xmlDocPtr c_document;
|
560
525
|
|
561
526
|
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
562
527
|
|
@@ -566,6 +531,17 @@ read_more(VALUE self)
|
|
566
531
|
ret = xmlTextReaderRead(reader);
|
567
532
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
568
533
|
|
534
|
+
c_document = xmlTextReaderCurrentDoc(reader);
|
535
|
+
if (c_document && c_document->encoding == NULL) {
|
536
|
+
VALUE constructor_encoding = rb_iv_get(self, "@encoding");
|
537
|
+
if (RTEST(constructor_encoding)) {
|
538
|
+
c_document->encoding = xmlStrdup(BAD_CAST StringValueCStr(constructor_encoding));
|
539
|
+
} else {
|
540
|
+
rb_iv_set(self, "@encoding", NOKOGIRI_STR_NEW2("UTF-8"));
|
541
|
+
c_document->encoding = xmlStrdup(BAD_CAST "UTF-8");
|
542
|
+
}
|
543
|
+
}
|
544
|
+
|
569
545
|
if (ret == 1) { return self; }
|
570
546
|
if (ret == 0) { return Qnil; }
|
571
547
|
|
@@ -749,15 +725,18 @@ rb_xml_reader_encoding(VALUE rb_reader)
|
|
749
725
|
const char *parser_encoding;
|
750
726
|
VALUE constructor_encoding;
|
751
727
|
|
728
|
+
TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
|
729
|
+
parser_encoding = (const char *)xmlTextReaderConstEncoding(c_reader);
|
730
|
+
if (parser_encoding) {
|
731
|
+
return NOKOGIRI_STR_NEW2(parser_encoding);
|
732
|
+
}
|
733
|
+
|
752
734
|
constructor_encoding = rb_iv_get(rb_reader, "@encoding");
|
753
735
|
if (RTEST(constructor_encoding)) {
|
754
736
|
return constructor_encoding;
|
755
737
|
}
|
756
738
|
|
757
|
-
|
758
|
-
parser_encoding = (const char *)xmlTextReaderConstEncoding(c_reader);
|
759
|
-
if (parser_encoding == NULL) { return Qnil; }
|
760
|
-
return NOKOGIRI_STR_NEW2(parser_encoding);
|
739
|
+
return Qnil;
|
761
740
|
}
|
762
741
|
|
763
742
|
void
|
@@ -778,7 +757,6 @@ noko_init_xml_reader(void)
|
|
778
757
|
rb_define_method(cNokogiriXmlReader, "attribute", reader_attribute, 1);
|
779
758
|
rb_define_method(cNokogiriXmlReader, "attribute_at", attribute_at, 1);
|
780
759
|
rb_define_method(cNokogiriXmlReader, "attribute_count", attribute_count, 0);
|
781
|
-
rb_define_method(cNokogiriXmlReader, "attribute_nodes", rb_xml_reader_attribute_nodes, 0);
|
782
760
|
rb_define_method(cNokogiriXmlReader, "attribute_hash", rb_xml_reader_attribute_hash, 0);
|
783
761
|
rb_define_method(cNokogiriXmlReader, "attributes?", attributes_eh, 0);
|
784
762
|
rb_define_method(cNokogiriXmlReader, "base_uri", rb_xml_reader_base_uri, 0);
|
data/ext/nokogiri/xml_relax_ng.c
CHANGED
@@ -59,6 +59,10 @@ parse_io(VALUE klass, VALUE io, VALUE encoding)
|
|
59
59
|
(xmlInputReadCallback)noko_io_read,
|
60
60
|
(xmlInputCloseCallback)noko_io_close,
|
61
61
|
(void *)io, enc);
|
62
|
+
if (!ctxt) {
|
63
|
+
rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
|
64
|
+
}
|
65
|
+
|
62
66
|
if (ctxt->sax) {
|
63
67
|
xmlFree(ctxt->sax);
|
64
68
|
ctxt->sax = NULL;
|
@@ -59,7 +59,7 @@ native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
|
|
59
59
|
|
60
60
|
if (xmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0)) {
|
61
61
|
if (!(ctx->options & XML_PARSE_RECOVER)) {
|
62
|
-
|
62
|
+
xmlErrorConstPtr e = xmlCtxtGetLastError(ctx);
|
63
63
|
Nokogiri_error_raise(NULL, e);
|
64
64
|
}
|
65
65
|
}
|
data/ext/nokogiri/xml_schema.c
CHANGED
@@ -146,7 +146,7 @@ xml_schema_parse_schema(
|
|
146
146
|
xmlSchemaFreeParserCtxt(c_parser_context);
|
147
147
|
|
148
148
|
if (NULL == c_schema) {
|
149
|
-
|
149
|
+
xmlErrorConstPtr error = xmlGetLastError();
|
150
150
|
if (error) {
|
151
151
|
Nokogiri_error_raise(NULL, error);
|
152
152
|
} else {
|
@@ -222,8 +222,7 @@ rb_xml_schema_s_from_document(int argc, VALUE *argv, VALUE klass)
|
|
222
222
|
|
223
223
|
if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlDocument)) {
|
224
224
|
xmlNodePtr deprecated_node_type_arg;
|
225
|
-
// TODO:
|
226
|
-
NOKO_WARN_DEPRECATION("Passing a Node as the first parameter to Schema.from_document is deprecated. Please pass a Document instead. This will become an error in a future release of Nokogiri.");
|
225
|
+
NOKO_WARN_DEPRECATION("Passing a Node as the first parameter to Schema.from_document is deprecated. Please pass a Document instead. This will become an error in Nokogiri v1.17.0."); // TODO: deprecated in v1.15.3, remove in v1.17.0
|
227
226
|
Noko_Node_Get_Struct(rb_document, xmlNode, deprecated_node_type_arg);
|
228
227
|
c_document = deprecated_node_type_arg->doc;
|
229
228
|
} else {
|
@@ -26,7 +26,7 @@ Nokogiri_structured_error_func_restore(libxmlStructuredErrorHandlerState *handle
|
|
26
26
|
}
|
27
27
|
|
28
28
|
void
|
29
|
-
Nokogiri_error_array_pusher(void *ctx,
|
29
|
+
Nokogiri_error_array_pusher(void *ctx, xmlErrorConstPtr error)
|
30
30
|
{
|
31
31
|
VALUE list = (VALUE)ctx;
|
32
32
|
Check_Type(list, T_ARRAY);
|
@@ -34,13 +34,13 @@ Nokogiri_error_array_pusher(void *ctx, xmlErrorPtr error)
|
|
34
34
|
}
|
35
35
|
|
36
36
|
void
|
37
|
-
Nokogiri_error_raise(void *ctx,
|
37
|
+
Nokogiri_error_raise(void *ctx, xmlErrorConstPtr error)
|
38
38
|
{
|
39
39
|
rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
|
40
40
|
}
|
41
41
|
|
42
42
|
VALUE
|
43
|
-
Nokogiri_wrap_xml_syntax_error(
|
43
|
+
Nokogiri_wrap_xml_syntax_error(xmlErrorConstPtr error)
|
44
44
|
{
|
45
45
|
VALUE msg, e, klass;
|
46
46
|
|
data/ext/nokogiri/xml_text.c
CHANGED
@@ -28,8 +28,7 @@ rb_xml_text_s_new(int argc, VALUE *argv, VALUE klass)
|
|
28
28
|
|
29
29
|
if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlDocument)) {
|
30
30
|
xmlNodePtr deprecated_node_type_arg;
|
31
|
-
// TODO:
|
32
|
-
NOKO_WARN_DEPRECATION("Passing a Node as the second parameter to Text.new is deprecated. Please pass a Document instead. This will become an error in a future release of Nokogiri.");
|
31
|
+
NOKO_WARN_DEPRECATION("Passing a Node as the second parameter to Text.new is deprecated. Please pass a Document instead. This will become an error in Nokogiri v1.17.0."); // TODO: deprecated in v1.15.3, remove in v1.17.0
|
33
32
|
Noko_Node_Get_Struct(rb_document, xmlNode, deprecated_node_type_arg);
|
34
33
|
c_document = deprecated_node_type_arg->doc;
|
35
34
|
} else {
|
@@ -321,11 +321,8 @@ handler_lookup(void *data, const xmlChar *c_name, const xmlChar *c_ns_uri)
|
|
321
321
|
VALUE rb_handler = (VALUE)data;
|
322
322
|
if (rb_respond_to(rb_handler, rb_intern((const char *)c_name))) {
|
323
323
|
if (c_ns_uri == NULL) {
|
324
|
-
NOKO_WARN_DEPRECATION(
|
325
|
-
|
326
|
-
" Please update your query to reference this function as 'nokogiri:%s'."
|
327
|
-
" Invoking custom handler functions without a namespace is deprecated and support will be removed in a future release of Nokogiri.",
|
328
|
-
c_name, c_name);
|
324
|
+
NOKO_WARN_DEPRECATION("A custom XPath or CSS handler function named '%s' is being invoked without a namespace. Please update your query to reference this function as 'nokogiri:%s'. Invoking custom handler functions without a namespace is deprecated and will become an error in Nokogiri v1.17.0.",
|
325
|
+
c_name, c_name); // deprecated in v1.15.0, remove in v1.17.0
|
329
326
|
}
|
330
327
|
return method_caller;
|
331
328
|
}
|
@@ -71,7 +71,12 @@ Nokogiri_wrap_xslt_stylesheet(xsltStylesheetPtr ss)
|
|
71
71
|
* call-seq:
|
72
72
|
* parse_stylesheet_doc(document)
|
73
73
|
*
|
74
|
-
* Parse
|
74
|
+
* Parse an XSLT::Stylesheet from +document+.
|
75
|
+
*
|
76
|
+
* [Parameters]
|
77
|
+
* - +document+ (Nokogiri::XML::Document) the document to be parsed.
|
78
|
+
*
|
79
|
+
* [Returns] Nokogiri::XSLT::Stylesheet
|
75
80
|
*/
|
76
81
|
static VALUE
|
77
82
|
parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
|
@@ -104,7 +109,7 @@ parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
|
|
104
109
|
* call-seq:
|
105
110
|
* serialize(document)
|
106
111
|
*
|
107
|
-
* Serialize +document+ to an xml string.
|
112
|
+
* Serialize +document+ to an xml string, as specified by the +method+ parameter in the Stylesheet.
|
108
113
|
*/
|
109
114
|
static VALUE
|
110
115
|
rb_xslt_stylesheet_serialize(VALUE self, VALUE xmlobj)
|
@@ -133,7 +138,7 @@ rb_xslt_stylesheet_serialize(VALUE self, VALUE xmlobj)
|
|
133
138
|
* transform(document)
|
134
139
|
* transform(document, params = {})
|
135
140
|
*
|
136
|
-
*
|
141
|
+
* Transform an XML::Document as defined by an XSLT::Stylesheet.
|
137
142
|
*
|
138
143
|
* [Parameters]
|
139
144
|
* - +document+ (Nokogiri::XML::Document) the document to be transformed.
|
data/gumbo-parser/Makefile
CHANGED
@@ -13,6 +13,23 @@ LDFLAGS := -pthread
|
|
13
13
|
|
14
14
|
all: check
|
15
15
|
|
16
|
+
oss-fuzz:
|
17
|
+
./fuzzer/build-ossfuzz.sh
|
18
|
+
|
19
|
+
fuzzers: fuzzer-normal fuzzer-asan fuzzer-ubsan fuzzer-msan
|
20
|
+
|
21
|
+
fuzzer-normal:
|
22
|
+
./fuzzer/build.sh
|
23
|
+
|
24
|
+
fuzzer-asan:
|
25
|
+
SANITIZER=asan ./fuzzer/build.sh
|
26
|
+
|
27
|
+
fuzzer-ubsan:
|
28
|
+
SANITIZER=ubsan ./fuzzer/build.sh
|
29
|
+
|
30
|
+
fuzzer-msan:
|
31
|
+
SANITIZER=msan ./fuzzer/build.sh
|
32
|
+
|
16
33
|
# don't try to regenerate ragel or gperf files in CI, that should be a development-only action and
|
17
34
|
# the generated files should be committed to SCM
|
18
35
|
ifneq ($(CI),true)
|
@@ -81,6 +98,7 @@ coverage:
|
|
81
98
|
|
82
99
|
clean:
|
83
100
|
$(RM) -r build
|
101
|
+
$(RM) -r fuzzer/build fuzzer/src-* fuzzer/gumbo_corpus
|
84
102
|
|
85
103
|
build/src/flags: | build/src
|
86
104
|
@echo 'old_CC := $(CC)' > $@
|
data/gumbo-parser/src/parser.c
CHANGED
@@ -4826,14 +4826,17 @@ GumboOutput* gumbo_parse_with_options (
|
|
4826
4826
|
// to a token.
|
4827
4827
|
if (token.type == GUMBO_TOKEN_END_TAG &&
|
4828
4828
|
token.v.end_tag.tag == GUMBO_TAG_UNKNOWN)
|
4829
|
+
{
|
4829
4830
|
gumbo_free(token.v.end_tag.name);
|
4831
|
+
token.v.end_tag.name = NULL;
|
4832
|
+
}
|
4833
|
+
if (unlikely(state->_open_elements.length > max_tree_depth)) {
|
4834
|
+
parser._output->status = GUMBO_STATUS_TREE_TOO_DEEP;
|
4835
|
+
gumbo_debug("Tree depth limit exceeded.\n");
|
4836
|
+
break;
|
4837
|
+
}
|
4830
4838
|
}
|
4831
4839
|
|
4832
|
-
if (unlikely(state->_open_elements.length > max_tree_depth)) {
|
4833
|
-
parser._output->status = GUMBO_STATUS_TREE_TOO_DEEP;
|
4834
|
-
gumbo_debug("Tree depth limit exceeded.\n");
|
4835
|
-
break;
|
4836
|
-
}
|
4837
4840
|
|
4838
4841
|
++loop_count;
|
4839
4842
|
assert(loop_count < 1000000000UL);
|
@@ -506,6 +506,7 @@ static void abandon_current_tag(GumboParser* parser) {
|
|
506
506
|
for (unsigned int i = 0; i < tag_state->_attributes.length; ++i) {
|
507
507
|
gumbo_destroy_attribute(tag_state->_attributes.data[i]);
|
508
508
|
}
|
509
|
+
gumbo_free(tag_state->_name);
|
509
510
|
gumbo_free(tag_state->_attributes.data);
|
510
511
|
mark_tag_state_as_empty(tag_state);
|
511
512
|
gumbo_string_buffer_destroy(&tag_state->_buffer);
|