nokogiri 1.15.4 → 1.16.2

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +17 -14
  3. data/README.md +4 -1
  4. data/dependencies.yml +9 -8
  5. data/ext/nokogiri/extconf.rb +3 -4
  6. data/ext/nokogiri/html4_sax_push_parser.c +1 -1
  7. data/ext/nokogiri/nokogiri.h +10 -3
  8. data/ext/nokogiri/test_global_handlers.c +1 -1
  9. data/ext/nokogiri/xml_cdata.c +1 -2
  10. data/ext/nokogiri/xml_document.c +7 -3
  11. data/ext/nokogiri/xml_namespace.c +0 -4
  12. data/ext/nokogiri/xml_node.c +5 -8
  13. data/ext/nokogiri/xml_reader.c +25 -48
  14. data/ext/nokogiri/xml_relax_ng.c +1 -1
  15. data/ext/nokogiri/xml_sax_parser_context.c +4 -0
  16. data/ext/nokogiri/xml_sax_push_parser.c +1 -1
  17. data/ext/nokogiri/xml_schema.c +2 -3
  18. data/ext/nokogiri/xml_syntax_error.c +3 -3
  19. data/ext/nokogiri/xml_text.c +1 -2
  20. data/ext/nokogiri/xml_xpath_context.c +2 -5
  21. data/ext/nokogiri/xslt_stylesheet.c +8 -3
  22. data/gumbo-parser/Makefile +18 -0
  23. data/gumbo-parser/src/parser.c +8 -5
  24. data/gumbo-parser/src/tokenizer.c +1 -0
  25. data/lib/nokogiri/css/parser_extras.rb +1 -1
  26. data/lib/nokogiri/css/xpath_visitor.rb +1 -21
  27. data/lib/nokogiri/html4/document.rb +1 -1
  28. data/lib/nokogiri/html4/encoding_reader.rb +1 -1
  29. data/lib/nokogiri/html5.rb +0 -66
  30. data/lib/nokogiri/version/constant.rb +1 -1
  31. data/lib/nokogiri/version/info.rb +6 -5
  32. data/lib/nokogiri/xml/attr.rb +2 -2
  33. data/lib/nokogiri/xml/document.rb +4 -5
  34. data/lib/nokogiri/xml/document_fragment.rb +2 -2
  35. data/lib/nokogiri/xml/namespace.rb +1 -2
  36. data/lib/nokogiri/xml/node.rb +31 -24
  37. data/lib/nokogiri/xml/node_set.rb +3 -3
  38. data/lib/nokogiri/xml/reader.rb +10 -9
  39. data/lib/nokogiri/xml/searchable.rb +3 -3
  40. data/lib/nokogiri/xml/syntax_error.rb +1 -1
  41. data/lib/nokogiri/xml.rb +1 -1
  42. data/lib/nokogiri/xslt/stylesheet.rb +29 -7
  43. data/lib/nokogiri/xslt.rb +1 -1
  44. data/lib/nokogiri.rb +1 -1
  45. data/ports/archives/libxml2-2.12.5.tar.xz +0 -0
  46. data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
  47. metadata +6 -6
  48. data/ports/archives/libxml2-2.11.5.tar.xz +0 -0
  49. data/ports/archives/libxslt-1.1.38.tar.xz +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 244e673a87f9559a0aa597c75f7c8848f96f67db9d6fa8b9b21fecc466c472d2
4
- data.tar.gz: f046f870e270f8daa484bb16fc8915a7228dfee3d6c61220e78fea753a2250ee
3
+ metadata.gz: eff8843eb712576ae46f11395a29e0daf5b2b99fbec124a604c784281fab2f69
4
+ data.tar.gz: c4aa6b876e82af3b9d6620cda1040130e17e493d80e7dc84c6c7715d7783d1a1
5
5
  SHA512:
6
- metadata.gz: bce186e13d536128d231590d143bd40db88cf1c3008e8d0775b411b1ea7e26eb27ff81af47695b0b073a603bdc111361e56626dbdc7ae43f0f419f06b7128f26
7
- data.tar.gz: 252a25c25d530d20012094fa6ba73c87823ca40dd73ec42cde8f579ff14a5c8bc8a3e799e37c9e5848bcc2c4d4ae3d94b88f8b0e6d0771136549bbfd5bd4b7c8
6
+ metadata.gz: 350715c0687e8d463bc56b666f2113f21ec778998d4862ae88c13a6dbe5ab471ffff1c123870a5ceb9934b0e645ce9e01e25e9242c140244e234076a47cd1922
7
+ data.tar.gz: 5f617afb7c937a1f321d2f11c13995bd26a7344c9137a63198c88e56512f9cbb8d86d1cb4184cd67dbca70fa380fc2e35cbcd49e487bf73bde323312db547cff
data/Gemfile CHANGED
@@ -5,35 +5,38 @@ source "https://rubygems.org"
5
5
  gemspec
6
6
 
7
7
  group :development do
8
+ # ruby 3.4.0-dev removed some gems from the default set
9
+ #
10
+ # TODO: we should be able to remove these as our gem dependencies sort it out and we pull them in
11
+ # transitively.
12
+ gem "mutex_m"
13
+
8
14
  # bootstrapping
9
15
  gem "bundler", "~> 2.3"
10
- gem "rake", "= 13.0.6"
16
+ gem "rake", "13.1.0"
11
17
 
12
18
  # building extensions
13
- gem "rake-compiler", "= 1.2.1"
14
- gem "rake-compiler-dock", "= 1.3.0"
15
-
16
- # documentation
17
- gem "hoe-markdown", "= 1.4.0"
19
+ gem "rake-compiler", "1.2.6"
20
+ gem "rake-compiler-dock", "1.4.0"
18
21
 
19
22
  # parser generator
20
23
  gem "rexical", "= 1.0.7"
21
24
 
22
25
  # tests
23
- gem "minitest", "5.18.0"
24
- gem "minitest-reporters", "1.6.0"
25
- gem "ruby_memcheck", "1.3.2"
26
+ gem "minitest", "5.21.2"
27
+ gem "minitest-parallel_fork", "2.0.0"
28
+ gem "ruby_memcheck", "2.3.0"
26
29
  gem "rubyzip", "~> 2.3.2"
27
30
  gem "simplecov", "= 0.21.2"
28
31
 
29
32
  # rubocop
30
33
  if Gem::Requirement.new("~> 3.0").satisfied_by?(Gem::Version.new(RUBY_VERSION))
31
- gem "rubocop", "1.51.0"
32
- gem "rubocop-minitest", "0.31.0"
34
+ gem "rubocop", "1.60.2"
35
+ gem "rubocop-minitest", "0.34.5"
33
36
  gem "rubocop-packaging", "0.5.2"
34
- gem "rubocop-performance", "1.18.0"
37
+ gem "rubocop-performance", "1.20.2"
35
38
  gem "rubocop-rake", "= 0.6.0"
36
- gem "rubocop-shopify", "2.13.0"
39
+ gem "rubocop-shopify", "2.14.0"
37
40
  end
38
41
  end
39
42
 
@@ -41,5 +44,5 @@ end
41
44
  # `bundle config set --local without rdoc`
42
45
  # Then re-run `bundle install`.
43
46
  group :rdoc do
44
- gem "rdoc", "6.5.0"
47
+ gem "rdoc", "6.6.2"
45
48
  end
data/README.md CHANGED
@@ -55,6 +55,7 @@ There are a few ways to ask exploratory questions:
55
55
 
56
56
  - The Nokogiri mailing list is active at https://groups.google.com/group/nokogiri-talk
57
57
  - Open an issue using the "Help Request" template at https://github.com/sparklemotion/nokogiri/issues
58
+ - Open a discussion at https://github.com/sparklemotion/nokogiri/discussions
58
59
 
59
60
  Please do not mail the maintainers at their personal addresses.
60
61
 
@@ -90,6 +91,8 @@ We bump `Major.Minor.Patch` versions following this guidance:
90
91
  - Updating packaged libraries for non-security-related reasons.
91
92
  - Dropping support for EOLed Ruby versions. [Some folks find this objectionable](https://github.com/sparklemotion/nokogiri/issues/1568), but [SemVer says this is OK if the public API hasn't changed](https://semver.org/#what-should-i-do-if-i-update-my-own-dependencies-without-changing-the-public-api).
92
93
  - Backwards-incompatible changes to internal or private methods and constants. These are detailed in the "Changes" section of each changelog entry.
94
+ - Removal of deprecated methods or parameters, after a generous transition period; usually when those methods or parameters are rarely-used or dangerous to the user. Essentially, removals that do not justify a major version bump.
95
+
93
96
 
94
97
  `Patch`:
95
98
 
@@ -111,7 +114,7 @@ You can help sponsor the maintainers of this software through one of these organ
111
114
 
112
115
  Requirements:
113
116
 
114
- - Ruby >= 2.7
117
+ - Ruby >= 3.0
115
118
  - JRuby >= 9.4.0.0
116
119
 
117
120
 
data/dependencies.yml CHANGED
@@ -1,16 +1,17 @@
1
+
1
2
  libxml2:
2
- version: "2.11.5"
3
- sha256: "3727b078c360ec69fa869de14bd6f75d7ee8d36987b071e6928d4720a28df3a6"
4
- # sha-256 hash provided in https://download.gnome.org/sources/libxml2/2.11/libxml2-2.11.5.sha256sum
3
+ version: "2.12.5"
4
+ sha256: "a972796696afd38073e0f59c283c3a2f5a560b5268b4babc391b286166526b21"
5
+ # sha-256 hash provided in https://download.gnome.org/sources/libxml2/2.12/libxml2-2.12.5.sha256sum
5
6
 
6
7
  libxslt:
7
- version: "1.1.38"
8
- sha256: "1f32450425819a09acaff2ab7a5a7f8a2ec7956e505d7beeb45e843d0e1ecab1"
9
- # sha-256 hash provided in https://download.gnome.org/sources/libxslt/1.1/libxslt-1.1.38.sha256sum
8
+ version: "1.1.39"
9
+ sha256: "2a20ad621148339b0759c4d4e96719362dee64c9a096dbba625ba053846349f0"
10
+ # sha-256 hash provided in https://download.gnome.org/sources/libxslt/1.1/libxslt-1.1.39.sha256sum
10
11
 
11
12
  zlib:
12
- version: "1.2.13"
13
- sha256: "b3a24de97a8fdbc835b9833169501030b8977031bcb54b3b3ac13740f846ab30"
13
+ version: "1.3"
14
+ sha256: "ff0ba4c292013dbc27530b3a81e1f9a813cd39de01ca5e0f8bf355702efa593e"
14
15
  # SHA-256 hash provided on http://zlib.net/
15
16
 
16
17
  libiconv:
@@ -443,7 +443,7 @@ def process_recipe(name, version, static_p, cross_p, cacheable_p = true)
443
443
  # We use 'host' to set compiler prefix for cross-compiling. Prefer host_alias over host. And
444
444
  # prefer i686 (what external dev tools use) to i386 (what ruby's configure.ac emits).
445
445
  recipe.host = RbConfig::CONFIG["host_alias"].empty? ? RbConfig::CONFIG["host"] : RbConfig::CONFIG["host_alias"]
446
- recipe.host = recipe.host.gsub(/i386/, "i686")
446
+ recipe.host = recipe.host.gsub("i386", "i686")
447
447
 
448
448
  recipe.target = File.join(PACKAGE_ROOT_DIR, "ports") if cacheable_p
449
449
  recipe.configure_options << "--libdir=#{File.join(recipe.path, "lib")}"
@@ -651,8 +651,7 @@ append_cppflags(ENV["CPPFLAGS"].split) unless ENV["CPPFLAGS"].nil?
651
651
  append_ldflags(ENV["LDFLAGS"].split) unless ENV["LDFLAGS"].nil?
652
652
  $LIBS = concat_flags($LIBS, ENV["LIBS"])
653
653
 
654
- # nokogumbo code uses C90/C99 features, let's make sure older compilers won't give
655
- # errors/warnings. see #2302
654
+ # libgumbo uses C90/C99 features, see #2302
656
655
  append_cflags(["-std=c99", "-Wno-declaration-after-statement"])
657
656
 
658
657
  # gumbo html5 serialization is slower with O3, let's make sure we use O2
@@ -919,6 +918,7 @@ else
919
918
  "--with-c14n",
920
919
  "--with-debug",
921
920
  "--with-threads",
921
+ "--without-tls", # see https://github.com/sparklemotion/nokogiri/issues/3031
922
922
  "CPPFLAGS=#{cppflags}",
923
923
  "CFLAGS=#{cflags}",
924
924
  ]
@@ -1087,7 +1087,6 @@ have_func("xmlRelaxNGSetParserStructuredErrors") # introduced in libxml 2.6.24
1087
1087
  have_func("xmlRelaxNGSetValidStructuredErrors") # introduced in libxml 2.6.21
1088
1088
  have_func("xmlSchemaSetValidStructuredErrors") # introduced in libxml 2.6.23
1089
1089
  have_func("xmlSchemaSetParserStructuredErrors") # introduced in libxml 2.6.23
1090
- have_func("rb_gc_location") # introduced in Ruby 2.7
1091
1090
  have_func("rb_category_warning") # introduced in Ruby 3.0
1092
1091
 
1093
1092
  other_library_versions_string = OTHER_LIBRARY_VERSIONS.map { |k, v| [k, v].join(":") }.join(",")
@@ -32,7 +32,7 @@ native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
32
32
 
33
33
  if ((status != 0) && !(ctx->options & XML_PARSE_RECOVER)) {
34
34
  // TODO: there appear to be no tests for this block
35
- xmlErrorPtr e = xmlCtxtGetLastError(ctx);
35
+ xmlErrorConstPtr e = xmlCtxtGetLastError(ctx);
36
36
  Nokogiri_error_raise(NULL, e);
37
37
  }
38
38
 
@@ -66,6 +66,13 @@ xmlNodePtr xmlLastElementChild(xmlNodePtr parent);
66
66
  #define XMLNS_PREFIX "xmlns"
67
67
  #define XMLNS_PREFIX_LEN 6 /* including either colon or \0 */
68
68
 
69
+ #ifndef xmlErrorConstPtr
70
+ # if LIBXML_VERSION >= 21200
71
+ # define xmlErrorConstPtr const xmlError *
72
+ # else
73
+ # define xmlErrorConstPtr xmlError *
74
+ # endif
75
+ #endif
69
76
 
70
77
  #include <ruby.h>
71
78
  #include <ruby/st.h>
@@ -227,9 +234,9 @@ void Nokogiri_structured_error_func_save(libxmlStructuredErrorHandlerState *hand
227
234
  void Nokogiri_structured_error_func_save_and_set(libxmlStructuredErrorHandlerState *handler_state, void *user_data,
228
235
  xmlStructuredErrorFunc handler);
229
236
  void Nokogiri_structured_error_func_restore(libxmlStructuredErrorHandlerState *handler_state);
230
- VALUE Nokogiri_wrap_xml_syntax_error(xmlErrorPtr error);
231
- void Nokogiri_error_array_pusher(void *ctx, xmlErrorPtr error);
232
- NORETURN_DECL void Nokogiri_error_raise(void *ctx, xmlErrorPtr error);
237
+ VALUE Nokogiri_wrap_xml_syntax_error(xmlErrorConstPtr error);
238
+ void Nokogiri_error_array_pusher(void *ctx, xmlErrorConstPtr error);
239
+ NORETURN_DECL void Nokogiri_error_raise(void *ctx, xmlErrorConstPtr error);
233
240
  void Nokogiri_marshal_xpath_funcall_and_return_values(xmlXPathParserContextPtr ctx, int nargs, VALUE handler,
234
241
  const char *function_name) ;
235
242
 
@@ -3,7 +3,7 @@
3
3
  static VALUE foreign_error_handler_block = Qnil;
4
4
 
5
5
  static void
6
- foreign_error_handler(void *user_data, xmlErrorPtr c_error)
6
+ foreign_error_handler(void *user_data, xmlErrorConstPtr c_error)
7
7
  {
8
8
  rb_funcall(foreign_error_handler_block, rb_intern("call"), 0);
9
9
  }
@@ -33,8 +33,7 @@ rb_xml_cdata_s_new(int argc, VALUE *argv, VALUE klass)
33
33
 
34
34
  if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlDocument)) {
35
35
  xmlNodePtr deprecated_node_type_arg;
36
- // TODO: deprecate allowing Node
37
- NOKO_WARN_DEPRECATION("Passing a Node as the first parameter to CDATA.new is deprecated. Please pass a Document instead. This will become an error in a future release of Nokogiri.");
36
+ NOKO_WARN_DEPRECATION("Passing a Node as the first parameter to CDATA.new is deprecated. Please pass a Document instead. This will become an error in Nokogiri v1.17.0."); // TODO: deprecated in v1.15.3, remove in v1.17.0
38
37
  Noko_Node_Get_Struct(rb_document, xmlNode, deprecated_node_type_arg);
39
38
  c_document = deprecated_node_type_arg->doc;
40
39
  } else {
@@ -74,8 +74,10 @@ dealloc(void *data)
74
74
 
75
75
  ruby_xfree(doc->_private);
76
76
 
77
+ #if defined(__GNUC__) && __GNUC__ >= 5
77
78
  #pragma GCC diagnostic push
78
79
  #pragma GCC diagnostic ignored "-Wdeprecated-declarations" // xmlDeregisterNodeDefault is deprecated as of libxml2 2.11.0
80
+ #endif
79
81
  /*
80
82
  * libxml-ruby < 3.0.0 uses xmlDeregisterNodeDefault. If the user is using one of those older
81
83
  * versions, the registered callback from libxml-ruby will access the _private pointers set by
@@ -90,7 +92,9 @@ dealloc(void *data)
90
92
  if (xmlDeregisterNodeDefaultValue) {
91
93
  remove_private((xmlNodePtr)doc);
92
94
  }
95
+ #if defined(__GNUC__) && __GNUC__ >= 5
93
96
  #pragma GCC diagnostic pop
97
+ #endif
94
98
 
95
99
  xmlFreeDoc(doc);
96
100
  }
@@ -337,7 +341,7 @@ read_io(VALUE klass,
337
341
  xmlSetStructuredErrorFunc(NULL, NULL);
338
342
 
339
343
  if (doc == NULL) {
340
- xmlErrorPtr error;
344
+ xmlErrorConstPtr error;
341
345
 
342
346
  xmlFreeDoc(doc);
343
347
 
@@ -383,7 +387,7 @@ read_memory(VALUE klass,
383
387
  xmlSetStructuredErrorFunc(NULL, NULL);
384
388
 
385
389
  if (doc == NULL) {
386
- xmlErrorPtr error;
390
+ xmlErrorConstPtr error;
387
391
 
388
392
  xmlFreeDoc(doc);
389
393
 
@@ -537,7 +541,7 @@ create_entity(int argc, VALUE *argv, VALUE self)
537
541
  );
538
542
 
539
543
  if (NULL == ptr) {
540
- xmlErrorPtr error = xmlGetLastError();
544
+ xmlErrorConstPtr error = xmlGetLastError();
541
545
  if (error) {
542
546
  rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
543
547
  } else {
@@ -42,7 +42,6 @@ _xml_namespace_dealloc(void *ptr)
42
42
  xmlFree(ns);
43
43
  }
44
44
 
45
- #ifdef HAVE_RB_GC_LOCATION
46
45
  static void
47
46
  _xml_namespace_update_references(void *ptr)
48
47
  {
@@ -51,9 +50,6 @@ _xml_namespace_update_references(void *ptr)
51
50
  ns->_private = (void *)rb_gc_location((VALUE)ns->_private);
52
51
  }
53
52
  }
54
- #else
55
- # define _xml_namespace_update_references 0
56
- #endif
57
53
 
58
54
  static const rb_data_type_t nokogiri_xml_namespace_type_with_dealloc = {
59
55
  .wrap_struct_name = "Nokogiri::XML::Namespace#with_dealloc",
@@ -28,7 +28,6 @@ _xml_node_mark(void *ptr)
28
28
  }
29
29
  }
30
30
 
31
- #ifdef HAVE_RB_GC_LOCATION
32
31
  static void
33
32
  _xml_node_update_references(void *ptr)
34
33
  {
@@ -38,9 +37,6 @@ _xml_node_update_references(void *ptr)
38
37
  node->_private = (void *)rb_gc_location((VALUE)node->_private);
39
38
  }
40
39
  }
41
- #else
42
- # define _xml_node_update_references 0
43
- #endif
44
40
 
45
41
  static const rb_data_type_t nokogiri_node_type = {
46
42
  .wrap_struct_name = "Nokogiri::XML::Node",
@@ -961,7 +957,7 @@ internal_subset(VALUE self)
961
957
  * - +new_parent_doc+
962
958
  * The new node's parent Document. Defaults to the this node's document.
963
959
  *
964
- * [Returns] The new Nokgiri::XML::Node
960
+ * [Returns] The new Nokogiri::XML::Node
965
961
  */
966
962
  static VALUE
967
963
  duplicate_node(int argc, VALUE *argv, VALUE self)
@@ -2058,8 +2054,7 @@ rb_xml_node_new(int argc, VALUE *argv, VALUE klass)
2058
2054
  rb_raise(rb_eArgError, "document must be a Nokogiri::XML::Node");
2059
2055
  }
2060
2056
  if (!rb_obj_is_kind_of(rb_document_node, cNokogiriXmlDocument)) {
2061
- // TODO: deprecate allowing Node
2062
- NOKO_WARN_DEPRECATION("Passing a Node as the second parameter to Node.new is deprecated. Please pass a Document instead, or prefer an alternative constructor like Node#add_child. This will become an error in a future release of Nokogiri.");
2057
+ NOKO_WARN_DEPRECATION("Passing a Node as the second parameter to Node.new is deprecated. Please pass a Document instead, or prefer an alternative constructor like Node#add_child. This will become an error in Nokogiri v1.17.0."); // TODO: deprecated in v1.13.0, remove in v1.17.0
2063
2058
  }
2064
2059
  Noko_Node_Get_Struct(rb_document_node, xmlNode, c_document_node);
2065
2060
 
@@ -2138,7 +2133,7 @@ process_xincludes(VALUE self, VALUE options)
2138
2133
  xmlSetStructuredErrorFunc(NULL, NULL);
2139
2134
 
2140
2135
  if (rcode < 0) {
2141
- xmlErrorPtr error;
2136
+ xmlErrorConstPtr error;
2142
2137
 
2143
2138
  error = xmlGetLastError();
2144
2139
  if (error) {
@@ -2174,6 +2169,8 @@ in_context(VALUE self, VALUE _str, VALUE _options)
2174
2169
 
2175
2170
  /* Twiddle global variable because of a bug in libxml2.
2176
2171
  * http://git.gnome.org/browse/libxml2/commit/?id=e20fb5a72c83cbfc8e4a8aa3943c6be8febadab7
2172
+ *
2173
+ * TODO: this is fixed, and HTML_PARSE_NOIMPLIED is defined, in libxml2 2.7.7
2177
2174
  */
2178
2175
  #ifndef HTML_PARSE_NOIMPLIED
2179
2176
  htmlHandleOmittedElem(0);
@@ -5,8 +5,14 @@ VALUE cNokogiriXmlReader;
5
5
  static void
6
6
  xml_reader_deallocate(void *data)
7
7
  {
8
+ // free the document separately because we _may_ have triggered preservation by calling
9
+ // xmlTextReaderCurrentDoc during a read_more.
8
10
  xmlTextReaderPtr reader = data;
11
+ xmlDocPtr doc = xmlTextReaderCurrentDoc(reader);
9
12
  xmlFreeTextReader(reader);
13
+ if (doc) {
14
+ xmlFreeDoc(doc);
15
+ }
10
16
  }
11
17
 
12
18
  static const rb_data_type_t xml_reader_type = {
@@ -166,48 +172,6 @@ rb_xml_reader_namespaces(VALUE rb_reader)
166
172
  return rb_namespaces ;
167
173
  }
168
174
 
169
- /*
170
- :call-seq: attribute_nodes() → Array<Nokogiri::XML::Attr>
171
-
172
- Get the attributes of the current node as an Array of XML:Attr
173
-
174
- ⚠ This method is deprecated and unsafe to use. It will be removed in a future version of Nokogiri.
175
-
176
- See related: #attribute_hash, #attributes
177
- */
178
- static VALUE
179
- rb_xml_reader_attribute_nodes(VALUE rb_reader)
180
- {
181
- xmlTextReaderPtr c_reader;
182
- xmlNodePtr c_node;
183
- VALUE attr_nodes;
184
- int j;
185
-
186
- // TODO: deprecated, remove in Nokogiri v1.15, see https://github.com/sparklemotion/nokogiri/issues/2598
187
- // After removal, we can also remove all the "node_has_a_document" special handling from xml_node.c
188
- NOKO_WARN_DEPRECATION("Reader#attribute_nodes is deprecated and will be removed in a future version of Nokogiri. Please use Reader#attribute_hash instead.");
189
-
190
- TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
191
-
192
- if (! has_attributes(c_reader)) {
193
- return rb_ary_new() ;
194
- }
195
-
196
- c_node = xmlTextReaderExpand(c_reader);
197
- if (c_node == NULL) {
198
- return Qnil;
199
- }
200
-
201
- attr_nodes = noko_xml_node_attrs(c_node);
202
-
203
- /* ensure that the Reader won't be GCed as long as a node is referenced */
204
- for (j = 0 ; j < RARRAY_LEN(attr_nodes) ; j++) {
205
- rb_iv_set(rb_ary_entry(attr_nodes, j), "@reader", rb_reader);
206
- }
207
-
208
- return attr_nodes;
209
- }
210
-
211
175
  /*
212
176
  :call-seq: attribute_hash() → Hash<String ⇒ String>
213
177
 
@@ -554,9 +518,10 @@ static VALUE
554
518
  read_more(VALUE self)
555
519
  {
556
520
  xmlTextReaderPtr reader;
557
- xmlErrorPtr error;
521
+ xmlErrorConstPtr error;
558
522
  VALUE error_list;
559
523
  int ret;
524
+ xmlDocPtr c_document;
560
525
 
561
526
  TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
562
527
 
@@ -566,6 +531,16 @@ read_more(VALUE self)
566
531
  ret = xmlTextReaderRead(reader);
567
532
  xmlSetStructuredErrorFunc(NULL, NULL);
568
533
 
534
+ c_document = xmlTextReaderCurrentDoc(reader);
535
+ if (c_document && c_document->encoding == NULL) {
536
+ VALUE constructor_encoding = rb_iv_get(self, "@encoding");
537
+ if (RTEST(constructor_encoding)) {
538
+ c_document->encoding = xmlStrdup(BAD_CAST StringValueCStr(constructor_encoding));
539
+ } else {
540
+ c_document->encoding = xmlStrdup(BAD_CAST "UTF-8");
541
+ }
542
+ }
543
+
569
544
  if (ret == 1) { return self; }
570
545
  if (ret == 0) { return Qnil; }
571
546
 
@@ -749,15 +724,18 @@ rb_xml_reader_encoding(VALUE rb_reader)
749
724
  const char *parser_encoding;
750
725
  VALUE constructor_encoding;
751
726
 
727
+ TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
728
+ parser_encoding = (const char *)xmlTextReaderConstEncoding(c_reader);
729
+ if (parser_encoding) {
730
+ return NOKOGIRI_STR_NEW2(parser_encoding);
731
+ }
732
+
752
733
  constructor_encoding = rb_iv_get(rb_reader, "@encoding");
753
734
  if (RTEST(constructor_encoding)) {
754
735
  return constructor_encoding;
755
736
  }
756
737
 
757
- TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
758
- parser_encoding = (const char *)xmlTextReaderConstEncoding(c_reader);
759
- if (parser_encoding == NULL) { return Qnil; }
760
- return NOKOGIRI_STR_NEW2(parser_encoding);
738
+ return Qnil;
761
739
  }
762
740
 
763
741
  void
@@ -778,7 +756,6 @@ noko_init_xml_reader(void)
778
756
  rb_define_method(cNokogiriXmlReader, "attribute", reader_attribute, 1);
779
757
  rb_define_method(cNokogiriXmlReader, "attribute_at", attribute_at, 1);
780
758
  rb_define_method(cNokogiriXmlReader, "attribute_count", attribute_count, 0);
781
- rb_define_method(cNokogiriXmlReader, "attribute_nodes", rb_xml_reader_attribute_nodes, 0);
782
759
  rb_define_method(cNokogiriXmlReader, "attribute_hash", rb_xml_reader_attribute_hash, 0);
783
760
  rb_define_method(cNokogiriXmlReader, "attributes?", attributes_eh, 0);
784
761
  rb_define_method(cNokogiriXmlReader, "base_uri", rb_xml_reader_base_uri, 0);
@@ -93,7 +93,7 @@ xml_relax_ng_parse_schema(
93
93
  xmlRelaxNGFreeParserCtxt(c_parser_context);
94
94
 
95
95
  if (NULL == c_schema) {
96
- xmlErrorPtr error = xmlGetLastError();
96
+ xmlErrorConstPtr error = xmlGetLastError();
97
97
  if (error) {
98
98
  Nokogiri_error_raise(NULL, error);
99
99
  } else {
@@ -59,6 +59,10 @@ parse_io(VALUE klass, VALUE io, VALUE encoding)
59
59
  (xmlInputReadCallback)noko_io_read,
60
60
  (xmlInputCloseCallback)noko_io_close,
61
61
  (void *)io, enc);
62
+ if (!ctxt) {
63
+ rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
64
+ }
65
+
62
66
  if (ctxt->sax) {
63
67
  xmlFree(ctxt->sax);
64
68
  ctxt->sax = NULL;
@@ -59,7 +59,7 @@ native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
59
59
 
60
60
  if (xmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0)) {
61
61
  if (!(ctx->options & XML_PARSE_RECOVER)) {
62
- xmlErrorPtr e = xmlCtxtGetLastError(ctx);
62
+ xmlErrorConstPtr e = xmlCtxtGetLastError(ctx);
63
63
  Nokogiri_error_raise(NULL, e);
64
64
  }
65
65
  }
@@ -146,7 +146,7 @@ xml_schema_parse_schema(
146
146
  xmlSchemaFreeParserCtxt(c_parser_context);
147
147
 
148
148
  if (NULL == c_schema) {
149
- xmlErrorPtr error = xmlGetLastError();
149
+ xmlErrorConstPtr error = xmlGetLastError();
150
150
  if (error) {
151
151
  Nokogiri_error_raise(NULL, error);
152
152
  } else {
@@ -222,8 +222,7 @@ rb_xml_schema_s_from_document(int argc, VALUE *argv, VALUE klass)
222
222
 
223
223
  if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlDocument)) {
224
224
  xmlNodePtr deprecated_node_type_arg;
225
- // TODO: deprecate allowing Node
226
- NOKO_WARN_DEPRECATION("Passing a Node as the first parameter to Schema.from_document is deprecated. Please pass a Document instead. This will become an error in a future release of Nokogiri.");
225
+ NOKO_WARN_DEPRECATION("Passing a Node as the first parameter to Schema.from_document is deprecated. Please pass a Document instead. This will become an error in Nokogiri v1.17.0."); // TODO: deprecated in v1.15.3, remove in v1.17.0
227
226
  Noko_Node_Get_Struct(rb_document, xmlNode, deprecated_node_type_arg);
228
227
  c_document = deprecated_node_type_arg->doc;
229
228
  } else {
@@ -26,7 +26,7 @@ Nokogiri_structured_error_func_restore(libxmlStructuredErrorHandlerState *handle
26
26
  }
27
27
 
28
28
  void
29
- Nokogiri_error_array_pusher(void *ctx, xmlErrorPtr error)
29
+ Nokogiri_error_array_pusher(void *ctx, xmlErrorConstPtr error)
30
30
  {
31
31
  VALUE list = (VALUE)ctx;
32
32
  Check_Type(list, T_ARRAY);
@@ -34,13 +34,13 @@ Nokogiri_error_array_pusher(void *ctx, xmlErrorPtr error)
34
34
  }
35
35
 
36
36
  void
37
- Nokogiri_error_raise(void *ctx, xmlErrorPtr error)
37
+ Nokogiri_error_raise(void *ctx, xmlErrorConstPtr error)
38
38
  {
39
39
  rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
40
40
  }
41
41
 
42
42
  VALUE
43
- Nokogiri_wrap_xml_syntax_error(xmlErrorPtr error)
43
+ Nokogiri_wrap_xml_syntax_error(xmlErrorConstPtr error)
44
44
  {
45
45
  VALUE msg, e, klass;
46
46
 
@@ -28,8 +28,7 @@ rb_xml_text_s_new(int argc, VALUE *argv, VALUE klass)
28
28
 
29
29
  if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlDocument)) {
30
30
  xmlNodePtr deprecated_node_type_arg;
31
- // TODO: deprecate allowing Node
32
- NOKO_WARN_DEPRECATION("Passing a Node as the second parameter to Text.new is deprecated. Please pass a Document instead. This will become an error in a future release of Nokogiri.");
31
+ NOKO_WARN_DEPRECATION("Passing a Node as the second parameter to Text.new is deprecated. Please pass a Document instead. This will become an error in Nokogiri v1.17.0."); // TODO: deprecated in v1.15.3, remove in v1.17.0
33
32
  Noko_Node_Get_Struct(rb_document, xmlNode, deprecated_node_type_arg);
34
33
  c_document = deprecated_node_type_arg->doc;
35
34
  } else {
@@ -321,11 +321,8 @@ handler_lookup(void *data, const xmlChar *c_name, const xmlChar *c_ns_uri)
321
321
  VALUE rb_handler = (VALUE)data;
322
322
  if (rb_respond_to(rb_handler, rb_intern((const char *)c_name))) {
323
323
  if (c_ns_uri == NULL) {
324
- NOKO_WARN_DEPRECATION(
325
- "A custom XPath or CSS handler function named '%s' is being invoked without a namespace."
326
- " Please update your query to reference this function as 'nokogiri:%s'."
327
- " Invoking custom handler functions without a namespace is deprecated and support will be removed in a future release of Nokogiri.",
328
- c_name, c_name);
324
+ NOKO_WARN_DEPRECATION("A custom XPath or CSS handler function named '%s' is being invoked without a namespace. Please update your query to reference this function as 'nokogiri:%s'. Invoking custom handler functions without a namespace is deprecated and will become an error in Nokogiri v1.17.0.",
325
+ c_name, c_name); // deprecated in v1.15.0, remove in v1.17.0
329
326
  }
330
327
  return method_caller;
331
328
  }
@@ -71,7 +71,12 @@ Nokogiri_wrap_xslt_stylesheet(xsltStylesheetPtr ss)
71
71
  * call-seq:
72
72
  * parse_stylesheet_doc(document)
73
73
  *
74
- * Parse a stylesheet from +document+.
74
+ * Parse an XSLT::Stylesheet from +document+.
75
+ *
76
+ * [Parameters]
77
+ * - +document+ (Nokogiri::XML::Document) the document to be parsed.
78
+ *
79
+ * [Returns] Nokogiri::XSLT::Stylesheet
75
80
  */
76
81
  static VALUE
77
82
  parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
@@ -104,7 +109,7 @@ parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
104
109
  * call-seq:
105
110
  * serialize(document)
106
111
  *
107
- * Serialize +document+ to an xml string.
112
+ * Serialize +document+ to an xml string, as specified by the +method+ parameter in the Stylesheet.
108
113
  */
109
114
  static VALUE
110
115
  rb_xslt_stylesheet_serialize(VALUE self, VALUE xmlobj)
@@ -133,7 +138,7 @@ rb_xslt_stylesheet_serialize(VALUE self, VALUE xmlobj)
133
138
  * transform(document)
134
139
  * transform(document, params = {})
135
140
  *
136
- * Apply an XSLT stylesheet to an XML::Document.
141
+ * Transform an XML::Document as defined by an XSLT::Stylesheet.
137
142
  *
138
143
  * [Parameters]
139
144
  * - +document+ (Nokogiri::XML::Document) the document to be transformed.
@@ -13,6 +13,23 @@ LDFLAGS := -pthread
13
13
 
14
14
  all: check
15
15
 
16
+ oss-fuzz:
17
+ ./fuzzer/build-ossfuzz.sh
18
+
19
+ fuzzers: fuzzer-normal fuzzer-asan fuzzer-ubsan fuzzer-msan
20
+
21
+ fuzzer-normal:
22
+ ./fuzzer/build.sh
23
+
24
+ fuzzer-asan:
25
+ SANITIZER=asan ./fuzzer/build.sh
26
+
27
+ fuzzer-ubsan:
28
+ SANITIZER=ubsan ./fuzzer/build.sh
29
+
30
+ fuzzer-msan:
31
+ SANITIZER=msan ./fuzzer/build.sh
32
+
16
33
  # don't try to regenerate ragel or gperf files in CI, that should be a development-only action and
17
34
  # the generated files should be committed to SCM
18
35
  ifneq ($(CI),true)
@@ -81,6 +98,7 @@ coverage:
81
98
 
82
99
  clean:
83
100
  $(RM) -r build
101
+ $(RM) -r fuzzer/build fuzzer/src-* fuzzer/gumbo_corpus
84
102
 
85
103
  build/src/flags: | build/src
86
104
  @echo 'old_CC := $(CC)' > $@
@@ -4826,14 +4826,17 @@ GumboOutput* gumbo_parse_with_options (
4826
4826
  // to a token.
4827
4827
  if (token.type == GUMBO_TOKEN_END_TAG &&
4828
4828
  token.v.end_tag.tag == GUMBO_TAG_UNKNOWN)
4829
+ {
4829
4830
  gumbo_free(token.v.end_tag.name);
4831
+ token.v.end_tag.name = NULL;
4832
+ }
4833
+ if (unlikely(state->_open_elements.length > max_tree_depth)) {
4834
+ parser._output->status = GUMBO_STATUS_TREE_TOO_DEEP;
4835
+ gumbo_debug("Tree depth limit exceeded.\n");
4836
+ break;
4837
+ }
4830
4838
  }
4831
4839
 
4832
- if (unlikely(state->_open_elements.length > max_tree_depth)) {
4833
- parser._output->status = GUMBO_STATUS_TREE_TOO_DEEP;
4834
- gumbo_debug("Tree depth limit exceeded.\n");
4835
- break;
4836
- }
4837
4840
 
4838
4841
  ++loop_count;
4839
4842
  assert(loop_count < 1000000000UL);
@@ -506,6 +506,7 @@ static void abandon_current_tag(GumboParser* parser) {
506
506
  for (unsigned int i = 0; i < tag_state->_attributes.length; ++i) {
507
507
  gumbo_destroy_attribute(tag_state->_attributes.data[i]);
508
508
  }
509
+ gumbo_free(tag_state->_name);
509
510
  gumbo_free(tag_state->_attributes.data);
510
511
  mark_tag_state_as_empty(tag_state);
511
512
  gumbo_string_buffer_destroy(&tag_state->_buffer);