nokogiri 1.15.4 → 1.16.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +17 -14
  3. data/README.md +4 -1
  4. data/dependencies.yml +9 -8
  5. data/ext/nokogiri/extconf.rb +3 -4
  6. data/ext/nokogiri/html4_sax_push_parser.c +1 -1
  7. data/ext/nokogiri/nokogiri.h +10 -3
  8. data/ext/nokogiri/test_global_handlers.c +1 -1
  9. data/ext/nokogiri/xml_cdata.c +1 -2
  10. data/ext/nokogiri/xml_document.c +7 -3
  11. data/ext/nokogiri/xml_namespace.c +0 -4
  12. data/ext/nokogiri/xml_node.c +5 -8
  13. data/ext/nokogiri/xml_reader.c +26 -48
  14. data/ext/nokogiri/xml_relax_ng.c +1 -1
  15. data/ext/nokogiri/xml_sax_parser_context.c +4 -0
  16. data/ext/nokogiri/xml_sax_push_parser.c +1 -1
  17. data/ext/nokogiri/xml_schema.c +2 -3
  18. data/ext/nokogiri/xml_syntax_error.c +3 -3
  19. data/ext/nokogiri/xml_text.c +1 -2
  20. data/ext/nokogiri/xml_xpath_context.c +2 -5
  21. data/ext/nokogiri/xslt_stylesheet.c +8 -3
  22. data/gumbo-parser/Makefile +18 -0
  23. data/gumbo-parser/src/parser.c +8 -5
  24. data/gumbo-parser/src/tokenizer.c +1 -0
  25. data/lib/nokogiri/css/parser_extras.rb +1 -1
  26. data/lib/nokogiri/css/xpath_visitor.rb +1 -21
  27. data/lib/nokogiri/html4/document.rb +1 -1
  28. data/lib/nokogiri/html4/encoding_reader.rb +1 -1
  29. data/lib/nokogiri/html5.rb +0 -66
  30. data/lib/nokogiri/version/constant.rb +1 -1
  31. data/lib/nokogiri/version/info.rb +6 -5
  32. data/lib/nokogiri/xml/attr.rb +2 -2
  33. data/lib/nokogiri/xml/document.rb +4 -5
  34. data/lib/nokogiri/xml/document_fragment.rb +2 -2
  35. data/lib/nokogiri/xml/namespace.rb +1 -2
  36. data/lib/nokogiri/xml/node.rb +31 -24
  37. data/lib/nokogiri/xml/node_set.rb +3 -3
  38. data/lib/nokogiri/xml/reader.rb +10 -9
  39. data/lib/nokogiri/xml/searchable.rb +3 -3
  40. data/lib/nokogiri/xml/syntax_error.rb +1 -1
  41. data/lib/nokogiri/xml.rb +1 -1
  42. data/lib/nokogiri/xslt/stylesheet.rb +29 -7
  43. data/lib/nokogiri/xslt.rb +1 -1
  44. data/lib/nokogiri.rb +1 -1
  45. data/ports/archives/libxml2-2.12.9.tar.xz +0 -0
  46. data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
  47. metadata +9 -9
  48. data/ports/archives/libxml2-2.11.5.tar.xz +0 -0
  49. data/ports/archives/libxslt-1.1.38.tar.xz +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 244e673a87f9559a0aa597c75f7c8848f96f67db9d6fa8b9b21fecc466c472d2
4
- data.tar.gz: f046f870e270f8daa484bb16fc8915a7228dfee3d6c61220e78fea753a2250ee
3
+ metadata.gz: ecac71fbec559a9b68c4009b18d8ecbc511fbfbfad6cebd462deda560134ecfc
4
+ data.tar.gz: 9d513df3dbd9e4f676b1adb46bae239b6006483dbaf0734fcc4e39db8497a3a9
5
5
  SHA512:
6
- metadata.gz: bce186e13d536128d231590d143bd40db88cf1c3008e8d0775b411b1ea7e26eb27ff81af47695b0b073a603bdc111361e56626dbdc7ae43f0f419f06b7128f26
7
- data.tar.gz: 252a25c25d530d20012094fa6ba73c87823ca40dd73ec42cde8f579ff14a5c8bc8a3e799e37c9e5848bcc2c4d4ae3d94b88f8b0e6d0771136549bbfd5bd4b7c8
6
+ metadata.gz: eefac12f8f3ec311128199c8448a234a79bb691a76a165527fe8a66522407b0e994f87270ba6de1159fc9c561c2c67645f03ad4bbb1a3c8a38e2d353fdcbdea8
7
+ data.tar.gz: 8b78cd520efbec6a0959d325f5326ae49c2e4a6cf1cbbcc67bbe7688ad8c90c7e2be412ae205734eb3cf8df54b8fac793a249cc5eaf2ad6033938d801eee15e9
data/Gemfile CHANGED
@@ -5,35 +5,38 @@ source "https://rubygems.org"
5
5
  gemspec
6
6
 
7
7
  group :development do
8
+ # ruby 3.4.0-dev removed some gems from the default set
9
+ #
10
+ # TODO: we should be able to remove these as our gem dependencies sort it out and we pull them in
11
+ # transitively.
12
+ gem "mutex_m"
13
+
8
14
  # bootstrapping
9
15
  gem "bundler", "~> 2.3"
10
- gem "rake", "= 13.0.6"
16
+ gem "rake", "13.1.0"
11
17
 
12
18
  # building extensions
13
- gem "rake-compiler", "= 1.2.1"
14
- gem "rake-compiler-dock", "= 1.3.0"
15
-
16
- # documentation
17
- gem "hoe-markdown", "= 1.4.0"
19
+ gem "rake-compiler", "1.2.6"
20
+ gem "rake-compiler-dock", "1.4.0"
18
21
 
19
22
  # parser generator
20
23
  gem "rexical", "= 1.0.7"
21
24
 
22
25
  # tests
23
- gem "minitest", "5.18.0"
24
- gem "minitest-reporters", "1.6.0"
25
- gem "ruby_memcheck", "1.3.2"
26
+ gem "minitest", "5.21.2"
27
+ gem "minitest-parallel_fork", "2.0.0"
28
+ gem "ruby_memcheck", "2.3.0"
26
29
  gem "rubyzip", "~> 2.3.2"
27
30
  gem "simplecov", "= 0.21.2"
28
31
 
29
32
  # rubocop
30
33
  if Gem::Requirement.new("~> 3.0").satisfied_by?(Gem::Version.new(RUBY_VERSION))
31
- gem "rubocop", "1.51.0"
32
- gem "rubocop-minitest", "0.31.0"
34
+ gem "rubocop", "1.60.2"
35
+ gem "rubocop-minitest", "0.34.5"
33
36
  gem "rubocop-packaging", "0.5.2"
34
- gem "rubocop-performance", "1.18.0"
37
+ gem "rubocop-performance", "1.20.2"
35
38
  gem "rubocop-rake", "= 0.6.0"
36
- gem "rubocop-shopify", "2.13.0"
39
+ gem "rubocop-shopify", "2.14.0"
37
40
  end
38
41
  end
39
42
 
@@ -41,5 +44,5 @@ end
41
44
  # `bundle config set --local without rdoc`
42
45
  # Then re-run `bundle install`.
43
46
  group :rdoc do
44
- gem "rdoc", "6.5.0"
47
+ gem "rdoc", "6.6.2"
45
48
  end
data/README.md CHANGED
@@ -55,6 +55,7 @@ There are a few ways to ask exploratory questions:
55
55
 
56
56
  - The Nokogiri mailing list is active at https://groups.google.com/group/nokogiri-talk
57
57
  - Open an issue using the "Help Request" template at https://github.com/sparklemotion/nokogiri/issues
58
+ - Open a discussion at https://github.com/sparklemotion/nokogiri/discussions
58
59
 
59
60
  Please do not mail the maintainers at their personal addresses.
60
61
 
@@ -90,6 +91,8 @@ We bump `Major.Minor.Patch` versions following this guidance:
90
91
  - Updating packaged libraries for non-security-related reasons.
91
92
  - Dropping support for EOLed Ruby versions. [Some folks find this objectionable](https://github.com/sparklemotion/nokogiri/issues/1568), but [SemVer says this is OK if the public API hasn't changed](https://semver.org/#what-should-i-do-if-i-update-my-own-dependencies-without-changing-the-public-api).
92
93
  - Backwards-incompatible changes to internal or private methods and constants. These are detailed in the "Changes" section of each changelog entry.
94
+ - Removal of deprecated methods or parameters, after a generous transition period; usually when those methods or parameters are rarely-used or dangerous to the user. Essentially, removals that do not justify a major version bump.
95
+
93
96
 
94
97
  `Patch`:
95
98
 
@@ -111,7 +114,7 @@ You can help sponsor the maintainers of this software through one of these organ
111
114
 
112
115
  Requirements:
113
116
 
114
- - Ruby >= 2.7
117
+ - Ruby >= 3.0
115
118
  - JRuby >= 9.4.0.0
116
119
 
117
120
 
data/dependencies.yml CHANGED
@@ -1,16 +1,17 @@
1
+ ---
1
2
  libxml2:
2
- version: "2.11.5"
3
- sha256: "3727b078c360ec69fa869de14bd6f75d7ee8d36987b071e6928d4720a28df3a6"
4
- # sha-256 hash provided in https://download.gnome.org/sources/libxml2/2.11/libxml2-2.11.5.sha256sum
3
+ version: "2.12.9"
4
+ sha256: "59912db536ab56a3996489ea0299768c7bcffe57169f0235e7f962a91f483590"
5
+ # sha-256 hash provided in https://download.gnome.org/sources/libxml2/2.12/libxml2-2.12.9.sha256sum
5
6
 
6
7
  libxslt:
7
- version: "1.1.38"
8
- sha256: "1f32450425819a09acaff2ab7a5a7f8a2ec7956e505d7beeb45e843d0e1ecab1"
9
- # sha-256 hash provided in https://download.gnome.org/sources/libxslt/1.1/libxslt-1.1.38.sha256sum
8
+ version: "1.1.39"
9
+ sha256: "2a20ad621148339b0759c4d4e96719362dee64c9a096dbba625ba053846349f0"
10
+ # sha-256 hash provided in https://download.gnome.org/sources/libxslt/1.1/libxslt-1.1.39.sha256sum
10
11
 
11
12
  zlib:
12
- version: "1.2.13"
13
- sha256: "b3a24de97a8fdbc835b9833169501030b8977031bcb54b3b3ac13740f846ab30"
13
+ version: "1.3.1"
14
+ sha256: "9a93b2b7dfdac77ceba5a558a580e74667dd6fede4585b91eefb60f03b72df23"
14
15
  # SHA-256 hash provided on http://zlib.net/
15
16
 
16
17
  libiconv:
@@ -443,7 +443,7 @@ def process_recipe(name, version, static_p, cross_p, cacheable_p = true)
443
443
  # We use 'host' to set compiler prefix for cross-compiling. Prefer host_alias over host. And
444
444
  # prefer i686 (what external dev tools use) to i386 (what ruby's configure.ac emits).
445
445
  recipe.host = RbConfig::CONFIG["host_alias"].empty? ? RbConfig::CONFIG["host"] : RbConfig::CONFIG["host_alias"]
446
- recipe.host = recipe.host.gsub(/i386/, "i686")
446
+ recipe.host = recipe.host.gsub("i386", "i686")
447
447
 
448
448
  recipe.target = File.join(PACKAGE_ROOT_DIR, "ports") if cacheable_p
449
449
  recipe.configure_options << "--libdir=#{File.join(recipe.path, "lib")}"
@@ -651,8 +651,7 @@ append_cppflags(ENV["CPPFLAGS"].split) unless ENV["CPPFLAGS"].nil?
651
651
  append_ldflags(ENV["LDFLAGS"].split) unless ENV["LDFLAGS"].nil?
652
652
  $LIBS = concat_flags($LIBS, ENV["LIBS"])
653
653
 
654
- # nokogumbo code uses C90/C99 features, let's make sure older compilers won't give
655
- # errors/warnings. see #2302
654
+ # libgumbo uses C90/C99 features, see #2302
656
655
  append_cflags(["-std=c99", "-Wno-declaration-after-statement"])
657
656
 
658
657
  # gumbo html5 serialization is slower with O3, let's make sure we use O2
@@ -919,6 +918,7 @@ else
919
918
  "--with-c14n",
920
919
  "--with-debug",
921
920
  "--with-threads",
921
+ "--without-tls", # see https://github.com/sparklemotion/nokogiri/issues/3031
922
922
  "CPPFLAGS=#{cppflags}",
923
923
  "CFLAGS=#{cflags}",
924
924
  ]
@@ -1087,7 +1087,6 @@ have_func("xmlRelaxNGSetParserStructuredErrors") # introduced in libxml 2.6.24
1087
1087
  have_func("xmlRelaxNGSetValidStructuredErrors") # introduced in libxml 2.6.21
1088
1088
  have_func("xmlSchemaSetValidStructuredErrors") # introduced in libxml 2.6.23
1089
1089
  have_func("xmlSchemaSetParserStructuredErrors") # introduced in libxml 2.6.23
1090
- have_func("rb_gc_location") # introduced in Ruby 2.7
1091
1090
  have_func("rb_category_warning") # introduced in Ruby 3.0
1092
1091
 
1093
1092
  other_library_versions_string = OTHER_LIBRARY_VERSIONS.map { |k, v| [k, v].join(":") }.join(",")
@@ -32,7 +32,7 @@ native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
32
32
 
33
33
  if ((status != 0) && !(ctx->options & XML_PARSE_RECOVER)) {
34
34
  // TODO: there appear to be no tests for this block
35
- xmlErrorPtr e = xmlCtxtGetLastError(ctx);
35
+ xmlErrorConstPtr e = xmlCtxtGetLastError(ctx);
36
36
  Nokogiri_error_raise(NULL, e);
37
37
  }
38
38
 
@@ -66,6 +66,13 @@ xmlNodePtr xmlLastElementChild(xmlNodePtr parent);
66
66
  #define XMLNS_PREFIX "xmlns"
67
67
  #define XMLNS_PREFIX_LEN 6 /* including either colon or \0 */
68
68
 
69
+ #ifndef xmlErrorConstPtr
70
+ # if LIBXML_VERSION >= 21200
71
+ # define xmlErrorConstPtr const xmlError *
72
+ # else
73
+ # define xmlErrorConstPtr xmlError *
74
+ # endif
75
+ #endif
69
76
 
70
77
  #include <ruby.h>
71
78
  #include <ruby/st.h>
@@ -227,9 +234,9 @@ void Nokogiri_structured_error_func_save(libxmlStructuredErrorHandlerState *hand
227
234
  void Nokogiri_structured_error_func_save_and_set(libxmlStructuredErrorHandlerState *handler_state, void *user_data,
228
235
  xmlStructuredErrorFunc handler);
229
236
  void Nokogiri_structured_error_func_restore(libxmlStructuredErrorHandlerState *handler_state);
230
- VALUE Nokogiri_wrap_xml_syntax_error(xmlErrorPtr error);
231
- void Nokogiri_error_array_pusher(void *ctx, xmlErrorPtr error);
232
- NORETURN_DECL void Nokogiri_error_raise(void *ctx, xmlErrorPtr error);
237
+ VALUE Nokogiri_wrap_xml_syntax_error(xmlErrorConstPtr error);
238
+ void Nokogiri_error_array_pusher(void *ctx, xmlErrorConstPtr error);
239
+ NORETURN_DECL void Nokogiri_error_raise(void *ctx, xmlErrorConstPtr error);
233
240
  void Nokogiri_marshal_xpath_funcall_and_return_values(xmlXPathParserContextPtr ctx, int nargs, VALUE handler,
234
241
  const char *function_name) ;
235
242
 
@@ -3,7 +3,7 @@
3
3
  static VALUE foreign_error_handler_block = Qnil;
4
4
 
5
5
  static void
6
- foreign_error_handler(void *user_data, xmlErrorPtr c_error)
6
+ foreign_error_handler(void *user_data, xmlErrorConstPtr c_error)
7
7
  {
8
8
  rb_funcall(foreign_error_handler_block, rb_intern("call"), 0);
9
9
  }
@@ -33,8 +33,7 @@ rb_xml_cdata_s_new(int argc, VALUE *argv, VALUE klass)
33
33
 
34
34
  if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlDocument)) {
35
35
  xmlNodePtr deprecated_node_type_arg;
36
- // TODO: deprecate allowing Node
37
- NOKO_WARN_DEPRECATION("Passing a Node as the first parameter to CDATA.new is deprecated. Please pass a Document instead. This will become an error in a future release of Nokogiri.");
36
+ NOKO_WARN_DEPRECATION("Passing a Node as the first parameter to CDATA.new is deprecated. Please pass a Document instead. This will become an error in Nokogiri v1.17.0."); // TODO: deprecated in v1.15.3, remove in v1.17.0
38
37
  Noko_Node_Get_Struct(rb_document, xmlNode, deprecated_node_type_arg);
39
38
  c_document = deprecated_node_type_arg->doc;
40
39
  } else {
@@ -74,8 +74,10 @@ dealloc(void *data)
74
74
 
75
75
  ruby_xfree(doc->_private);
76
76
 
77
+ #if defined(__GNUC__) && __GNUC__ >= 5
77
78
  #pragma GCC diagnostic push
78
79
  #pragma GCC diagnostic ignored "-Wdeprecated-declarations" // xmlDeregisterNodeDefault is deprecated as of libxml2 2.11.0
80
+ #endif
79
81
  /*
80
82
  * libxml-ruby < 3.0.0 uses xmlDeregisterNodeDefault. If the user is using one of those older
81
83
  * versions, the registered callback from libxml-ruby will access the _private pointers set by
@@ -90,7 +92,9 @@ dealloc(void *data)
90
92
  if (xmlDeregisterNodeDefaultValue) {
91
93
  remove_private((xmlNodePtr)doc);
92
94
  }
95
+ #if defined(__GNUC__) && __GNUC__ >= 5
93
96
  #pragma GCC diagnostic pop
97
+ #endif
94
98
 
95
99
  xmlFreeDoc(doc);
96
100
  }
@@ -337,7 +341,7 @@ read_io(VALUE klass,
337
341
  xmlSetStructuredErrorFunc(NULL, NULL);
338
342
 
339
343
  if (doc == NULL) {
340
- xmlErrorPtr error;
344
+ xmlErrorConstPtr error;
341
345
 
342
346
  xmlFreeDoc(doc);
343
347
 
@@ -383,7 +387,7 @@ read_memory(VALUE klass,
383
387
  xmlSetStructuredErrorFunc(NULL, NULL);
384
388
 
385
389
  if (doc == NULL) {
386
- xmlErrorPtr error;
390
+ xmlErrorConstPtr error;
387
391
 
388
392
  xmlFreeDoc(doc);
389
393
 
@@ -537,7 +541,7 @@ create_entity(int argc, VALUE *argv, VALUE self)
537
541
  );
538
542
 
539
543
  if (NULL == ptr) {
540
- xmlErrorPtr error = xmlGetLastError();
544
+ xmlErrorConstPtr error = xmlGetLastError();
541
545
  if (error) {
542
546
  rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
543
547
  } else {
@@ -42,7 +42,6 @@ _xml_namespace_dealloc(void *ptr)
42
42
  xmlFree(ns);
43
43
  }
44
44
 
45
- #ifdef HAVE_RB_GC_LOCATION
46
45
  static void
47
46
  _xml_namespace_update_references(void *ptr)
48
47
  {
@@ -51,9 +50,6 @@ _xml_namespace_update_references(void *ptr)
51
50
  ns->_private = (void *)rb_gc_location((VALUE)ns->_private);
52
51
  }
53
52
  }
54
- #else
55
- # define _xml_namespace_update_references 0
56
- #endif
57
53
 
58
54
  static const rb_data_type_t nokogiri_xml_namespace_type_with_dealloc = {
59
55
  .wrap_struct_name = "Nokogiri::XML::Namespace#with_dealloc",
@@ -28,7 +28,6 @@ _xml_node_mark(void *ptr)
28
28
  }
29
29
  }
30
30
 
31
- #ifdef HAVE_RB_GC_LOCATION
32
31
  static void
33
32
  _xml_node_update_references(void *ptr)
34
33
  {
@@ -38,9 +37,6 @@ _xml_node_update_references(void *ptr)
38
37
  node->_private = (void *)rb_gc_location((VALUE)node->_private);
39
38
  }
40
39
  }
41
- #else
42
- # define _xml_node_update_references 0
43
- #endif
44
40
 
45
41
  static const rb_data_type_t nokogiri_node_type = {
46
42
  .wrap_struct_name = "Nokogiri::XML::Node",
@@ -961,7 +957,7 @@ internal_subset(VALUE self)
961
957
  * - +new_parent_doc+
962
958
  * The new node's parent Document. Defaults to the this node's document.
963
959
  *
964
- * [Returns] The new Nokgiri::XML::Node
960
+ * [Returns] The new Nokogiri::XML::Node
965
961
  */
966
962
  static VALUE
967
963
  duplicate_node(int argc, VALUE *argv, VALUE self)
@@ -2058,8 +2054,7 @@ rb_xml_node_new(int argc, VALUE *argv, VALUE klass)
2058
2054
  rb_raise(rb_eArgError, "document must be a Nokogiri::XML::Node");
2059
2055
  }
2060
2056
  if (!rb_obj_is_kind_of(rb_document_node, cNokogiriXmlDocument)) {
2061
- // TODO: deprecate allowing Node
2062
- NOKO_WARN_DEPRECATION("Passing a Node as the second parameter to Node.new is deprecated. Please pass a Document instead, or prefer an alternative constructor like Node#add_child. This will become an error in a future release of Nokogiri.");
2057
+ NOKO_WARN_DEPRECATION("Passing a Node as the second parameter to Node.new is deprecated. Please pass a Document instead, or prefer an alternative constructor like Node#add_child. This will become an error in Nokogiri v1.17.0."); // TODO: deprecated in v1.13.0, remove in v1.17.0
2063
2058
  }
2064
2059
  Noko_Node_Get_Struct(rb_document_node, xmlNode, c_document_node);
2065
2060
 
@@ -2138,7 +2133,7 @@ process_xincludes(VALUE self, VALUE options)
2138
2133
  xmlSetStructuredErrorFunc(NULL, NULL);
2139
2134
 
2140
2135
  if (rcode < 0) {
2141
- xmlErrorPtr error;
2136
+ xmlErrorConstPtr error;
2142
2137
 
2143
2138
  error = xmlGetLastError();
2144
2139
  if (error) {
@@ -2174,6 +2169,8 @@ in_context(VALUE self, VALUE _str, VALUE _options)
2174
2169
 
2175
2170
  /* Twiddle global variable because of a bug in libxml2.
2176
2171
  * http://git.gnome.org/browse/libxml2/commit/?id=e20fb5a72c83cbfc8e4a8aa3943c6be8febadab7
2172
+ *
2173
+ * TODO: this is fixed, and HTML_PARSE_NOIMPLIED is defined, in libxml2 2.7.7
2177
2174
  */
2178
2175
  #ifndef HTML_PARSE_NOIMPLIED
2179
2176
  htmlHandleOmittedElem(0);
@@ -5,8 +5,14 @@ VALUE cNokogiriXmlReader;
5
5
  static void
6
6
  xml_reader_deallocate(void *data)
7
7
  {
8
+ // free the document separately because we _may_ have triggered preservation by calling
9
+ // xmlTextReaderCurrentDoc during a read_more.
8
10
  xmlTextReaderPtr reader = data;
11
+ xmlDocPtr doc = xmlTextReaderCurrentDoc(reader);
9
12
  xmlFreeTextReader(reader);
13
+ if (doc) {
14
+ xmlFreeDoc(doc);
15
+ }
10
16
  }
11
17
 
12
18
  static const rb_data_type_t xml_reader_type = {
@@ -166,48 +172,6 @@ rb_xml_reader_namespaces(VALUE rb_reader)
166
172
  return rb_namespaces ;
167
173
  }
168
174
 
169
- /*
170
- :call-seq: attribute_nodes() → Array<Nokogiri::XML::Attr>
171
-
172
- Get the attributes of the current node as an Array of XML:Attr
173
-
174
- ⚠ This method is deprecated and unsafe to use. It will be removed in a future version of Nokogiri.
175
-
176
- See related: #attribute_hash, #attributes
177
- */
178
- static VALUE
179
- rb_xml_reader_attribute_nodes(VALUE rb_reader)
180
- {
181
- xmlTextReaderPtr c_reader;
182
- xmlNodePtr c_node;
183
- VALUE attr_nodes;
184
- int j;
185
-
186
- // TODO: deprecated, remove in Nokogiri v1.15, see https://github.com/sparklemotion/nokogiri/issues/2598
187
- // After removal, we can also remove all the "node_has_a_document" special handling from xml_node.c
188
- NOKO_WARN_DEPRECATION("Reader#attribute_nodes is deprecated and will be removed in a future version of Nokogiri. Please use Reader#attribute_hash instead.");
189
-
190
- TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
191
-
192
- if (! has_attributes(c_reader)) {
193
- return rb_ary_new() ;
194
- }
195
-
196
- c_node = xmlTextReaderExpand(c_reader);
197
- if (c_node == NULL) {
198
- return Qnil;
199
- }
200
-
201
- attr_nodes = noko_xml_node_attrs(c_node);
202
-
203
- /* ensure that the Reader won't be GCed as long as a node is referenced */
204
- for (j = 0 ; j < RARRAY_LEN(attr_nodes) ; j++) {
205
- rb_iv_set(rb_ary_entry(attr_nodes, j), "@reader", rb_reader);
206
- }
207
-
208
- return attr_nodes;
209
- }
210
-
211
175
  /*
212
176
  :call-seq: attribute_hash() → Hash<String ⇒ String>
213
177
 
@@ -554,9 +518,10 @@ static VALUE
554
518
  read_more(VALUE self)
555
519
  {
556
520
  xmlTextReaderPtr reader;
557
- xmlErrorPtr error;
521
+ xmlErrorConstPtr error;
558
522
  VALUE error_list;
559
523
  int ret;
524
+ xmlDocPtr c_document;
560
525
 
561
526
  TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
562
527
 
@@ -566,6 +531,17 @@ read_more(VALUE self)
566
531
  ret = xmlTextReaderRead(reader);
567
532
  xmlSetStructuredErrorFunc(NULL, NULL);
568
533
 
534
+ c_document = xmlTextReaderCurrentDoc(reader);
535
+ if (c_document && c_document->encoding == NULL) {
536
+ VALUE constructor_encoding = rb_iv_get(self, "@encoding");
537
+ if (RTEST(constructor_encoding)) {
538
+ c_document->encoding = xmlStrdup(BAD_CAST StringValueCStr(constructor_encoding));
539
+ } else {
540
+ rb_iv_set(self, "@encoding", NOKOGIRI_STR_NEW2("UTF-8"));
541
+ c_document->encoding = xmlStrdup(BAD_CAST "UTF-8");
542
+ }
543
+ }
544
+
569
545
  if (ret == 1) { return self; }
570
546
  if (ret == 0) { return Qnil; }
571
547
 
@@ -749,15 +725,18 @@ rb_xml_reader_encoding(VALUE rb_reader)
749
725
  const char *parser_encoding;
750
726
  VALUE constructor_encoding;
751
727
 
728
+ TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
729
+ parser_encoding = (const char *)xmlTextReaderConstEncoding(c_reader);
730
+ if (parser_encoding) {
731
+ return NOKOGIRI_STR_NEW2(parser_encoding);
732
+ }
733
+
752
734
  constructor_encoding = rb_iv_get(rb_reader, "@encoding");
753
735
  if (RTEST(constructor_encoding)) {
754
736
  return constructor_encoding;
755
737
  }
756
738
 
757
- TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
758
- parser_encoding = (const char *)xmlTextReaderConstEncoding(c_reader);
759
- if (parser_encoding == NULL) { return Qnil; }
760
- return NOKOGIRI_STR_NEW2(parser_encoding);
739
+ return Qnil;
761
740
  }
762
741
 
763
742
  void
@@ -778,7 +757,6 @@ noko_init_xml_reader(void)
778
757
  rb_define_method(cNokogiriXmlReader, "attribute", reader_attribute, 1);
779
758
  rb_define_method(cNokogiriXmlReader, "attribute_at", attribute_at, 1);
780
759
  rb_define_method(cNokogiriXmlReader, "attribute_count", attribute_count, 0);
781
- rb_define_method(cNokogiriXmlReader, "attribute_nodes", rb_xml_reader_attribute_nodes, 0);
782
760
  rb_define_method(cNokogiriXmlReader, "attribute_hash", rb_xml_reader_attribute_hash, 0);
783
761
  rb_define_method(cNokogiriXmlReader, "attributes?", attributes_eh, 0);
784
762
  rb_define_method(cNokogiriXmlReader, "base_uri", rb_xml_reader_base_uri, 0);
@@ -93,7 +93,7 @@ xml_relax_ng_parse_schema(
93
93
  xmlRelaxNGFreeParserCtxt(c_parser_context);
94
94
 
95
95
  if (NULL == c_schema) {
96
- xmlErrorPtr error = xmlGetLastError();
96
+ xmlErrorConstPtr error = xmlGetLastError();
97
97
  if (error) {
98
98
  Nokogiri_error_raise(NULL, error);
99
99
  } else {
@@ -59,6 +59,10 @@ parse_io(VALUE klass, VALUE io, VALUE encoding)
59
59
  (xmlInputReadCallback)noko_io_read,
60
60
  (xmlInputCloseCallback)noko_io_close,
61
61
  (void *)io, enc);
62
+ if (!ctxt) {
63
+ rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
64
+ }
65
+
62
66
  if (ctxt->sax) {
63
67
  xmlFree(ctxt->sax);
64
68
  ctxt->sax = NULL;
@@ -59,7 +59,7 @@ native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
59
59
 
60
60
  if (xmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0)) {
61
61
  if (!(ctx->options & XML_PARSE_RECOVER)) {
62
- xmlErrorPtr e = xmlCtxtGetLastError(ctx);
62
+ xmlErrorConstPtr e = xmlCtxtGetLastError(ctx);
63
63
  Nokogiri_error_raise(NULL, e);
64
64
  }
65
65
  }
@@ -146,7 +146,7 @@ xml_schema_parse_schema(
146
146
  xmlSchemaFreeParserCtxt(c_parser_context);
147
147
 
148
148
  if (NULL == c_schema) {
149
- xmlErrorPtr error = xmlGetLastError();
149
+ xmlErrorConstPtr error = xmlGetLastError();
150
150
  if (error) {
151
151
  Nokogiri_error_raise(NULL, error);
152
152
  } else {
@@ -222,8 +222,7 @@ rb_xml_schema_s_from_document(int argc, VALUE *argv, VALUE klass)
222
222
 
223
223
  if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlDocument)) {
224
224
  xmlNodePtr deprecated_node_type_arg;
225
- // TODO: deprecate allowing Node
226
- NOKO_WARN_DEPRECATION("Passing a Node as the first parameter to Schema.from_document is deprecated. Please pass a Document instead. This will become an error in a future release of Nokogiri.");
225
+ NOKO_WARN_DEPRECATION("Passing a Node as the first parameter to Schema.from_document is deprecated. Please pass a Document instead. This will become an error in Nokogiri v1.17.0."); // TODO: deprecated in v1.15.3, remove in v1.17.0
227
226
  Noko_Node_Get_Struct(rb_document, xmlNode, deprecated_node_type_arg);
228
227
  c_document = deprecated_node_type_arg->doc;
229
228
  } else {
@@ -26,7 +26,7 @@ Nokogiri_structured_error_func_restore(libxmlStructuredErrorHandlerState *handle
26
26
  }
27
27
 
28
28
  void
29
- Nokogiri_error_array_pusher(void *ctx, xmlErrorPtr error)
29
+ Nokogiri_error_array_pusher(void *ctx, xmlErrorConstPtr error)
30
30
  {
31
31
  VALUE list = (VALUE)ctx;
32
32
  Check_Type(list, T_ARRAY);
@@ -34,13 +34,13 @@ Nokogiri_error_array_pusher(void *ctx, xmlErrorPtr error)
34
34
  }
35
35
 
36
36
  void
37
- Nokogiri_error_raise(void *ctx, xmlErrorPtr error)
37
+ Nokogiri_error_raise(void *ctx, xmlErrorConstPtr error)
38
38
  {
39
39
  rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
40
40
  }
41
41
 
42
42
  VALUE
43
- Nokogiri_wrap_xml_syntax_error(xmlErrorPtr error)
43
+ Nokogiri_wrap_xml_syntax_error(xmlErrorConstPtr error)
44
44
  {
45
45
  VALUE msg, e, klass;
46
46
 
@@ -28,8 +28,7 @@ rb_xml_text_s_new(int argc, VALUE *argv, VALUE klass)
28
28
 
29
29
  if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlDocument)) {
30
30
  xmlNodePtr deprecated_node_type_arg;
31
- // TODO: deprecate allowing Node
32
- NOKO_WARN_DEPRECATION("Passing a Node as the second parameter to Text.new is deprecated. Please pass a Document instead. This will become an error in a future release of Nokogiri.");
31
+ NOKO_WARN_DEPRECATION("Passing a Node as the second parameter to Text.new is deprecated. Please pass a Document instead. This will become an error in Nokogiri v1.17.0."); // TODO: deprecated in v1.15.3, remove in v1.17.0
33
32
  Noko_Node_Get_Struct(rb_document, xmlNode, deprecated_node_type_arg);
34
33
  c_document = deprecated_node_type_arg->doc;
35
34
  } else {
@@ -321,11 +321,8 @@ handler_lookup(void *data, const xmlChar *c_name, const xmlChar *c_ns_uri)
321
321
  VALUE rb_handler = (VALUE)data;
322
322
  if (rb_respond_to(rb_handler, rb_intern((const char *)c_name))) {
323
323
  if (c_ns_uri == NULL) {
324
- NOKO_WARN_DEPRECATION(
325
- "A custom XPath or CSS handler function named '%s' is being invoked without a namespace."
326
- " Please update your query to reference this function as 'nokogiri:%s'."
327
- " Invoking custom handler functions without a namespace is deprecated and support will be removed in a future release of Nokogiri.",
328
- c_name, c_name);
324
+ NOKO_WARN_DEPRECATION("A custom XPath or CSS handler function named '%s' is being invoked without a namespace. Please update your query to reference this function as 'nokogiri:%s'. Invoking custom handler functions without a namespace is deprecated and will become an error in Nokogiri v1.17.0.",
325
+ c_name, c_name); // deprecated in v1.15.0, remove in v1.17.0
329
326
  }
330
327
  return method_caller;
331
328
  }
@@ -71,7 +71,12 @@ Nokogiri_wrap_xslt_stylesheet(xsltStylesheetPtr ss)
71
71
  * call-seq:
72
72
  * parse_stylesheet_doc(document)
73
73
  *
74
- * Parse a stylesheet from +document+.
74
+ * Parse an XSLT::Stylesheet from +document+.
75
+ *
76
+ * [Parameters]
77
+ * - +document+ (Nokogiri::XML::Document) the document to be parsed.
78
+ *
79
+ * [Returns] Nokogiri::XSLT::Stylesheet
75
80
  */
76
81
  static VALUE
77
82
  parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
@@ -104,7 +109,7 @@ parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
104
109
  * call-seq:
105
110
  * serialize(document)
106
111
  *
107
- * Serialize +document+ to an xml string.
112
+ * Serialize +document+ to an xml string, as specified by the +method+ parameter in the Stylesheet.
108
113
  */
109
114
  static VALUE
110
115
  rb_xslt_stylesheet_serialize(VALUE self, VALUE xmlobj)
@@ -133,7 +138,7 @@ rb_xslt_stylesheet_serialize(VALUE self, VALUE xmlobj)
133
138
  * transform(document)
134
139
  * transform(document, params = {})
135
140
  *
136
- * Apply an XSLT stylesheet to an XML::Document.
141
+ * Transform an XML::Document as defined by an XSLT::Stylesheet.
137
142
  *
138
143
  * [Parameters]
139
144
  * - +document+ (Nokogiri::XML::Document) the document to be transformed.
@@ -13,6 +13,23 @@ LDFLAGS := -pthread
13
13
 
14
14
  all: check
15
15
 
16
+ oss-fuzz:
17
+ ./fuzzer/build-ossfuzz.sh
18
+
19
+ fuzzers: fuzzer-normal fuzzer-asan fuzzer-ubsan fuzzer-msan
20
+
21
+ fuzzer-normal:
22
+ ./fuzzer/build.sh
23
+
24
+ fuzzer-asan:
25
+ SANITIZER=asan ./fuzzer/build.sh
26
+
27
+ fuzzer-ubsan:
28
+ SANITIZER=ubsan ./fuzzer/build.sh
29
+
30
+ fuzzer-msan:
31
+ SANITIZER=msan ./fuzzer/build.sh
32
+
16
33
  # don't try to regenerate ragel or gperf files in CI, that should be a development-only action and
17
34
  # the generated files should be committed to SCM
18
35
  ifneq ($(CI),true)
@@ -81,6 +98,7 @@ coverage:
81
98
 
82
99
  clean:
83
100
  $(RM) -r build
101
+ $(RM) -r fuzzer/build fuzzer/src-* fuzzer/gumbo_corpus
84
102
 
85
103
  build/src/flags: | build/src
86
104
  @echo 'old_CC := $(CC)' > $@
@@ -4826,14 +4826,17 @@ GumboOutput* gumbo_parse_with_options (
4826
4826
  // to a token.
4827
4827
  if (token.type == GUMBO_TOKEN_END_TAG &&
4828
4828
  token.v.end_tag.tag == GUMBO_TAG_UNKNOWN)
4829
+ {
4829
4830
  gumbo_free(token.v.end_tag.name);
4831
+ token.v.end_tag.name = NULL;
4832
+ }
4833
+ if (unlikely(state->_open_elements.length > max_tree_depth)) {
4834
+ parser._output->status = GUMBO_STATUS_TREE_TOO_DEEP;
4835
+ gumbo_debug("Tree depth limit exceeded.\n");
4836
+ break;
4837
+ }
4830
4838
  }
4831
4839
 
4832
- if (unlikely(state->_open_elements.length > max_tree_depth)) {
4833
- parser._output->status = GUMBO_STATUS_TREE_TOO_DEEP;
4834
- gumbo_debug("Tree depth limit exceeded.\n");
4835
- break;
4836
- }
4837
4840
 
4838
4841
  ++loop_count;
4839
4842
  assert(loop_count < 1000000000UL);