nokogiri 1.16.0 → 1.16.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 32e35ebb760d0c42b28ca4c33b0e2a5ce31f2ddc958e99e2d4b0db36d0ef0447
4
- data.tar.gz: 89af37b43be9f5e371ee29e1ed632ddda89a540e101da0d8b2e5f718c3cab522
3
+ metadata.gz: da609d95e9ec1de75acd8ff2b8ef217b23df9431a17c5d37d8968f90bcbd6f5b
4
+ data.tar.gz: a9573dc8e6cbd63f97104651f38c9bfa06549eab4d77c602dedba2094adc320f
5
5
  SHA512:
6
- metadata.gz: f09c5b0b8567122114285a73abcf69cea3763381c56b4d5296f238b241caa8a41ffe8bf75aaecd0545582eb4c0c8526b6df44291a40db184ed0669edaae890a0
7
- data.tar.gz: 37cd5f0c2f1473a075f2de4a6e0f1823ba91b747b3c6afec48086d23e1630134e4f28882b94ab5c0476c6406c42f462ff5fc93427c68afc3b53a398b6048fe74
6
+ metadata.gz: ceb8cb32c790d4f3b9162bc86169373c8b7084e15e7ca9991712aed69f495e8a9e58480e94536fae3072d4b0bb9ee5e553367bb41e896087e24073e667650969
7
+ data.tar.gz: '094a214ed1c7a5462a6ae2fbc73b0247496e1dd0dcc9542fd5db5cee87a5916776f1f1adb3c04af858fd8d7722cc19473dbde560f483ab3a0efcd9c881fe96ff'
data/Gemfile CHANGED
@@ -5,19 +5,25 @@ source "https://rubygems.org"
5
5
  gemspec
6
6
 
7
7
  group :development do
8
+ # ruby 3.4.0-dev removed some gems from the default set
9
+ #
10
+ # TODO: we should be able to remove these as our gem dependencies sort it out and we pull them in
11
+ # transitively.
12
+ gem "mutex_m"
13
+
8
14
  # bootstrapping
9
15
  gem "bundler", "~> 2.3"
10
16
  gem "rake", "13.1.0"
11
17
 
12
18
  # building extensions
13
- gem "rake-compiler", "1.2.5"
19
+ gem "rake-compiler", "1.2.6"
14
20
  gem "rake-compiler-dock", "1.4.0"
15
21
 
16
22
  # parser generator
17
23
  gem "rexical", "= 1.0.7"
18
24
 
19
25
  # tests
20
- gem "minitest", "5.20.0"
26
+ gem "minitest", "5.21.2"
21
27
  gem "minitest-parallel_fork", "2.0.0"
22
28
  gem "ruby_memcheck", "2.3.0"
23
29
  gem "rubyzip", "~> 2.3.2"
@@ -25,10 +31,10 @@ group :development do
25
31
 
26
32
  # rubocop
27
33
  if Gem::Requirement.new("~> 3.0").satisfied_by?(Gem::Version.new(RUBY_VERSION))
28
- gem "rubocop", "1.59.0"
29
- gem "rubocop-minitest", "0.34.2"
34
+ gem "rubocop", "1.60.2"
35
+ gem "rubocop-minitest", "0.34.5"
30
36
  gem "rubocop-packaging", "0.5.2"
31
- gem "rubocop-performance", "1.20.1"
37
+ gem "rubocop-performance", "1.20.2"
32
38
  gem "rubocop-rake", "= 0.6.0"
33
39
  gem "rubocop-shopify", "2.14.0"
34
40
  end
data/dependencies.yml CHANGED
@@ -1,8 +1,8 @@
1
1
 
2
2
  libxml2:
3
- version: "2.12.3"
4
- sha256: "8c8f1092340a89ff32bc44ad5c9693aff9bc8a7a3e161bb239666e5d15ac9aaa"
5
- # sha-256 hash provided in https://download.gnome.org/sources/libxml2/2.12/libxml2-2.12.3.sha256sum
3
+ version: "2.12.4"
4
+ sha256: "497360e423cf0bd99eacdb7c6215dea92e6d6e89ee940393c2bae0e77cb9b7d0"
5
+ # sha-256 hash provided in https://download.gnome.org/sources/libxml2/2.12/libxml2-2.12.4.sha256sum
6
6
 
7
7
  libxslt:
8
8
  version: "1.1.39"
@@ -74,8 +74,10 @@ dealloc(void *data)
74
74
 
75
75
  ruby_xfree(doc->_private);
76
76
 
77
+ #if defined(__GNUC__) && __GNUC__ >= 5
77
78
  #pragma GCC diagnostic push
78
79
  #pragma GCC diagnostic ignored "-Wdeprecated-declarations" // xmlDeregisterNodeDefault is deprecated as of libxml2 2.11.0
80
+ #endif
79
81
  /*
80
82
  * libxml-ruby < 3.0.0 uses xmlDeregisterNodeDefault. If the user is using one of those older
81
83
  * versions, the registered callback from libxml-ruby will access the _private pointers set by
@@ -90,7 +92,9 @@ dealloc(void *data)
90
92
  if (xmlDeregisterNodeDefaultValue) {
91
93
  remove_private((xmlNodePtr)doc);
92
94
  }
95
+ #if defined(__GNUC__) && __GNUC__ >= 5
93
96
  #pragma GCC diagnostic pop
97
+ #endif
94
98
 
95
99
  xmlFreeDoc(doc);
96
100
  }
@@ -5,8 +5,14 @@ VALUE cNokogiriXmlReader;
5
5
  static void
6
6
  xml_reader_deallocate(void *data)
7
7
  {
8
+ // free the document separately because we _may_ have triggered preservation by calling
9
+ // xmlTextReaderCurrentDoc during a read_more.
8
10
  xmlTextReaderPtr reader = data;
11
+ xmlDocPtr doc = xmlTextReaderCurrentDoc(reader);
9
12
  xmlFreeTextReader(reader);
13
+ if (doc) {
14
+ xmlFreeDoc(doc);
15
+ }
10
16
  }
11
17
 
12
18
  static const rb_data_type_t xml_reader_type = {
@@ -515,6 +521,7 @@ read_more(VALUE self)
515
521
  xmlErrorConstPtr error;
516
522
  VALUE error_list;
517
523
  int ret;
524
+ xmlDocPtr c_document;
518
525
 
519
526
  TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
520
527
 
@@ -524,6 +531,16 @@ read_more(VALUE self)
524
531
  ret = xmlTextReaderRead(reader);
525
532
  xmlSetStructuredErrorFunc(NULL, NULL);
526
533
 
534
+ c_document = xmlTextReaderCurrentDoc(reader);
535
+ if (c_document && c_document->encoding == NULL) {
536
+ VALUE constructor_encoding = rb_iv_get(self, "@encoding");
537
+ if (RTEST(constructor_encoding)) {
538
+ c_document->encoding = xmlStrdup(BAD_CAST StringValueCStr(constructor_encoding));
539
+ } else {
540
+ c_document->encoding = xmlStrdup(BAD_CAST "UTF-8");
541
+ }
542
+ }
543
+
527
544
  if (ret == 1) { return self; }
528
545
  if (ret == 0) { return Qnil; }
529
546
 
@@ -707,15 +724,18 @@ rb_xml_reader_encoding(VALUE rb_reader)
707
724
  const char *parser_encoding;
708
725
  VALUE constructor_encoding;
709
726
 
727
+ TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
728
+ parser_encoding = (const char *)xmlTextReaderConstEncoding(c_reader);
729
+ if (parser_encoding) {
730
+ return NOKOGIRI_STR_NEW2(parser_encoding);
731
+ }
732
+
710
733
  constructor_encoding = rb_iv_get(rb_reader, "@encoding");
711
734
  if (RTEST(constructor_encoding)) {
712
735
  return constructor_encoding;
713
736
  }
714
737
 
715
- TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
716
- parser_encoding = (const char *)xmlTextReaderConstEncoding(c_reader);
717
- if (parser_encoding == NULL) { return Qnil; }
718
- return NOKOGIRI_STR_NEW2(parser_encoding);
738
+ return Qnil;
719
739
  }
720
740
 
721
741
  void
@@ -71,7 +71,12 @@ Nokogiri_wrap_xslt_stylesheet(xsltStylesheetPtr ss)
71
71
  * call-seq:
72
72
  * parse_stylesheet_doc(document)
73
73
  *
74
- * Parse a stylesheet from +document+.
74
+ * Parse an XSLT::Stylesheet from +document+.
75
+ *
76
+ * [Parameters]
77
+ * - +document+ (Nokogiri::XML::Document) the document to be parsed.
78
+ *
79
+ * [Returns] Nokogiri::XSLT::Stylesheet
75
80
  */
76
81
  static VALUE
77
82
  parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
@@ -104,7 +109,7 @@ parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
104
109
  * call-seq:
105
110
  * serialize(document)
106
111
  *
107
- * Serialize +document+ to an xml string.
112
+ * Serialize +document+ to an xml string, as specified by the +method+ parameter in the Stylesheet.
108
113
  */
109
114
  static VALUE
110
115
  rb_xslt_stylesheet_serialize(VALUE self, VALUE xmlobj)
@@ -133,7 +138,7 @@ rb_xslt_stylesheet_serialize(VALUE self, VALUE xmlobj)
133
138
  * transform(document)
134
139
  * transform(document, params = {})
135
140
  *
136
- * Apply an XSLT stylesheet to an XML::Document.
141
+ * Transform an XML::Document as defined by an XSLT::Stylesheet.
137
142
  *
138
143
  * [Parameters]
139
144
  * - +document+ (Nokogiri::XML::Document) the document to be transformed.
@@ -13,6 +13,9 @@ LDFLAGS := -pthread
13
13
 
14
14
  all: check
15
15
 
16
+ oss-fuzz:
17
+ ./fuzzer/build-ossfuzz.sh
18
+
16
19
  fuzzers: fuzzer-normal fuzzer-asan fuzzer-ubsan fuzzer-msan
17
20
 
18
21
  fuzzer-normal:
@@ -4826,14 +4826,17 @@ GumboOutput* gumbo_parse_with_options (
4826
4826
  // to a token.
4827
4827
  if (token.type == GUMBO_TOKEN_END_TAG &&
4828
4828
  token.v.end_tag.tag == GUMBO_TAG_UNKNOWN)
4829
+ {
4829
4830
  gumbo_free(token.v.end_tag.name);
4831
+ token.v.end_tag.name = NULL;
4832
+ }
4833
+ if (unlikely(state->_open_elements.length > max_tree_depth)) {
4834
+ parser._output->status = GUMBO_STATUS_TREE_TOO_DEEP;
4835
+ gumbo_debug("Tree depth limit exceeded.\n");
4836
+ break;
4837
+ }
4830
4838
  }
4831
4839
 
4832
- if (unlikely(state->_open_elements.length > max_tree_depth)) {
4833
- parser._output->status = GUMBO_STATUS_TREE_TOO_DEEP;
4834
- gumbo_debug("Tree depth limit exceeded.\n");
4835
- break;
4836
- }
4837
4840
 
4838
4841
  ++loop_count;
4839
4842
  assert(loop_count < 1000000000UL);
@@ -2,5 +2,5 @@
2
2
 
3
3
  module Nokogiri
4
4
  # The version of Nokogiri you are using
5
- VERSION = "1.16.0"
5
+ VERSION = "1.16.1"
6
6
  end
@@ -3,9 +3,11 @@
3
3
  module Nokogiri
4
4
  module XML
5
5
  ###
6
- # Nokogiri::XML::Reader parses an XML document similar to the way a cursor
7
- # would move. The Reader is given an XML document, and yields nodes
8
- # to an each block.
6
+ # Nokogiri::XML::Reader parses an XML document similar to the way a cursor would move. The
7
+ # Reader is given an XML document, and yields nodes to an each block.
8
+ #
9
+ # The Reader parser might be good for when you need the speed and low memory usage of the SAX
10
+ # parser, but do not want to write a Document handler.
9
11
  #
10
12
  # Here is an example of usage:
11
13
  #
@@ -22,13 +24,12 @@ module Nokogiri
22
24
  #
23
25
  # end
24
26
  #
25
- # Note that Nokogiri::XML::Reader#each can only be called once!! Once
26
- # the cursor moves through the entire document, you must parse the
27
- # document again. So make sure that you capture any information you
28
- # need during the first iteration.
27
+ # Nokogiri::XML::Reader#each can only be called once! Once the cursor moves through the entire
28
+ # document, you must parse the document again. It may be better to capture all information you
29
+ # need during a single iteration.
29
30
  #
30
- # The Reader parser is good for when you need the speed of a SAX parser,
31
- # but do not want to write a Document handler.
31
+ # libxml2 does not support error recovery in the Reader parser. The `RECOVER` ParseOption is
32
+ # ignored. If a syntax error is encountered during parsing, an exception will be raised.
32
33
  class Reader
33
34
  include Enumerable
34
35
 
@@ -10,15 +10,37 @@ module Nokogiri
10
10
  # doc = Nokogiri::XML(File.read('some_file.xml'))
11
11
  # xslt = Nokogiri::XSLT(File.read('some_transformer.xslt'))
12
12
  #
13
- # puts xslt.transform(doc)
13
+ # xslt.transform(doc) # => Nokogiri::XML::Document
14
14
  #
15
- # See Nokogiri::XSLT::Stylesheet#transform for more transformation
16
- # information.
15
+ # Many XSLT transformations include serialization behavior to emit a non-XML document. For these
16
+ # cases, please take care to invoke the #serialize method on the result of the transformation:
17
+ #
18
+ # doc = Nokogiri::XML(File.read('some_file.xml'))
19
+ # xslt = Nokogiri::XSLT(File.read('some_transformer.xslt'))
20
+ # xslt.serialize(xslt.transform(doc)) # => String
21
+ #
22
+ # or use the #apply_to method, which is a shortcut for `serialize(transform(document))`:
23
+ #
24
+ # doc = Nokogiri::XML(File.read('some_file.xml'))
25
+ # xslt = Nokogiri::XSLT(File.read('some_transformer.xslt'))
26
+ # xslt.apply_to(doc) # => String
27
+ #
28
+ # See Nokogiri::XSLT::Stylesheet#transform for more information and examples.
17
29
  class Stylesheet
18
- ###
19
- # Apply an XSLT stylesheet to an XML::Document.
20
- # +params+ is an array of strings used as XSLT parameters.
21
- # returns serialized document
30
+ # :call-seq:
31
+ # apply_to(document, params = []) -> String
32
+ #
33
+ # Apply an XSLT stylesheet to an XML::Document and serialize it properly. This method is
34
+ # equivalent to calling #serialize on the result of #transform.
35
+ #
36
+ # [Parameters]
37
+ # - +document+ is an instance of XML::Document to transform
38
+ # - +params+ is an array of strings used as XSLT parameters, passed into #transform
39
+ #
40
+ # [Returns]
41
+ # A string containing the serialized result of the transformation.
42
+ #
43
+ # See Nokogiri::XSLT::Stylesheet#transform for more information and examples.
22
44
  def apply_to(document, params = [])
23
45
  serialize(transform(document, params))
24
46
  end
@@ -0,0 +1,33 @@
1
+ From 95f2a17440568694a6df6a326c5b411e77597be2 Mon Sep 17 00:00:00 2001
2
+ From: Nick Wellnhofer <wellnhofer@aevum.de>
3
+ Date: Tue, 30 Jan 2024 13:25:17 +0100
4
+ Subject: [PATCH] parser: Fix crash in xmlParseInNodeContext with HTML
5
+ documents
6
+
7
+ Ignore namespaces if we have an HTML document with namespaces added
8
+ manually.
9
+
10
+ Fixes #672.
11
+ ---
12
+ parser.c | 4 +++-
13
+ 1 file changed, 3 insertions(+), 1 deletion(-)
14
+
15
+ diff --git a/parser.c b/parser.c
16
+ index 1038d71b..f7842ed1 100644
17
+ --- a/parser.c
18
+ +++ b/parser.c
19
+ @@ -12415,8 +12415,10 @@ xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
20
+ }
21
+ xmlAddChild(node, fake);
22
+
23
+ - if (node->type == XML_ELEMENT_NODE) {
24
+ + if (node->type == XML_ELEMENT_NODE)
25
+ nodePush(ctxt, node);
26
+ +
27
+ + if ((ctxt->html == 0) && (node->type == XML_ELEMENT_NODE)) {
28
+ /*
29
+ * initialize the SAX2 namespaces stack
30
+ */
31
+ --
32
+ 2.42.0
33
+
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nokogiri
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.16.0
4
+ version: 1.16.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mike Dalessio
@@ -20,7 +20,7 @@ authors:
20
20
  autorequire:
21
21
  bindir: bin
22
22
  cert_chain: []
23
- date: 2023-12-27 00:00:00.000000000 Z
23
+ date: 2024-02-03 00:00:00.000000000 Z
24
24
  dependencies:
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: mini_portile2
@@ -271,8 +271,9 @@ files:
271
271
  - patches/libxml2/0009-allow-wildcard-namespaces.patch
272
272
  - patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch
273
273
  - patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch
274
+ - patches/libxml2/0012-parser-Fix-crash-in-xmlParseInNodeContext-with-HTML.patch
274
275
  - patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch
275
- - ports/archives/libxml2-2.12.3.tar.xz
276
+ - ports/archives/libxml2-2.12.4.tar.xz
276
277
  - ports/archives/libxslt-1.1.39.tar.xz
277
278
  homepage: https://nokogiri.org
278
279
  licenses:
Binary file