nokogiri 1.16.0.rc1 → 1.16.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: edb35060c35ecf55ba01f20e731bb2a7866377ee2e06ef97ea6f3a7e5a1ef450
4
- data.tar.gz: c698841274ac4d10a894ac456bba6535a7825ed7a836bfbd27fe91b1734c7438
3
+ metadata.gz: da609d95e9ec1de75acd8ff2b8ef217b23df9431a17c5d37d8968f90bcbd6f5b
4
+ data.tar.gz: a9573dc8e6cbd63f97104651f38c9bfa06549eab4d77c602dedba2094adc320f
5
5
  SHA512:
6
- metadata.gz: 2e6907cf3ffe729da72be3ba21cf8af21e2fd448631572713fec65bd37ab2cfec290067f1c8ea3e00564d5b6b4eb1b7c63beb9b8bbeba3a1518599d11023cd84
7
- data.tar.gz: 7cdcc4e33d7ee32de3074ed49a55c97f55763ec6f2ab01076cd2712bd07d7df19cb292981435248b6d3fb85741ed78fd4ddcc913b9b565e5e963c38ff3091968
6
+ metadata.gz: ceb8cb32c790d4f3b9162bc86169373c8b7084e15e7ca9991712aed69f495e8a9e58480e94536fae3072d4b0bb9ee5e553367bb41e896087e24073e667650969
7
+ data.tar.gz: '094a214ed1c7a5462a6ae2fbc73b0247496e1dd0dcc9542fd5db5cee87a5916776f1f1adb3c04af858fd8d7722cc19473dbde560f483ab3a0efcd9c881fe96ff'
data/Gemfile CHANGED
@@ -5,30 +5,36 @@ source "https://rubygems.org"
5
5
  gemspec
6
6
 
7
7
  group :development do
8
+ # ruby 3.4.0-dev removed some gems from the default set
9
+ #
10
+ # TODO: we should be able to remove these as our gem dependencies sort it out and we pull them in
11
+ # transitively.
12
+ gem "mutex_m"
13
+
8
14
  # bootstrapping
9
15
  gem "bundler", "~> 2.3"
10
16
  gem "rake", "13.1.0"
11
17
 
12
18
  # building extensions
13
- gem "rake-compiler", "1.2.5"
14
- gem "rake-compiler-dock", "1.4.0.rc2"
19
+ gem "rake-compiler", "1.2.6"
20
+ gem "rake-compiler-dock", "1.4.0"
15
21
 
16
22
  # parser generator
17
23
  gem "rexical", "= 1.0.7"
18
24
 
19
25
  # tests
20
- gem "minitest", "5.20.0"
26
+ gem "minitest", "5.21.2"
21
27
  gem "minitest-parallel_fork", "2.0.0"
22
- gem "ruby_memcheck", "2.2.1"
28
+ gem "ruby_memcheck", "2.3.0"
23
29
  gem "rubyzip", "~> 2.3.2"
24
30
  gem "simplecov", "= 0.21.2"
25
31
 
26
32
  # rubocop
27
33
  if Gem::Requirement.new("~> 3.0").satisfied_by?(Gem::Version.new(RUBY_VERSION))
28
- gem "rubocop", "1.58.0"
29
- gem "rubocop-minitest", "0.33.0"
34
+ gem "rubocop", "1.60.2"
35
+ gem "rubocop-minitest", "0.34.5"
30
36
  gem "rubocop-packaging", "0.5.2"
31
- gem "rubocop-performance", "1.19.1"
37
+ gem "rubocop-performance", "1.20.2"
32
38
  gem "rubocop-rake", "= 0.6.0"
33
39
  gem "rubocop-shopify", "2.14.0"
34
40
  end
@@ -38,5 +44,5 @@ end
38
44
  # `bundle config set --local without rdoc`
39
45
  # Then re-run `bundle install`.
40
46
  group :rdoc do
41
- gem "rdoc", "6.6.1"
47
+ gem "rdoc", "6.6.2"
42
48
  end
data/README.md CHANGED
@@ -55,6 +55,7 @@ There are a few ways to ask exploratory questions:
55
55
 
56
56
  - The Nokogiri mailing list is active at https://groups.google.com/group/nokogiri-talk
57
57
  - Open an issue using the "Help Request" template at https://github.com/sparklemotion/nokogiri/issues
58
+ - Open a discussion at https://github.com/sparklemotion/nokogiri/discussions
58
59
 
59
60
  Please do not mail the maintainers at their personal addresses.
60
61
 
@@ -90,6 +91,8 @@ We bump `Major.Minor.Patch` versions following this guidance:
90
91
  - Updating packaged libraries for non-security-related reasons.
91
92
  - Dropping support for EOLed Ruby versions. [Some folks find this objectionable](https://github.com/sparklemotion/nokogiri/issues/1568), but [SemVer says this is OK if the public API hasn't changed](https://semver.org/#what-should-i-do-if-i-update-my-own-dependencies-without-changing-the-public-api).
92
93
  - Backwards-incompatible changes to internal or private methods and constants. These are detailed in the "Changes" section of each changelog entry.
94
+ - Removal of deprecated methods or parameters, after a generous transition period; usually when those methods or parameters are rarely-used or dangerous to the user. Essentially, removals that do not justify a major version bump.
95
+
93
96
 
94
97
  `Patch`:
95
98
 
data/dependencies.yml CHANGED
@@ -1,8 +1,8 @@
1
1
 
2
2
  libxml2:
3
- version: "2.12.3"
4
- sha256: "8c8f1092340a89ff32bc44ad5c9693aff9bc8a7a3e161bb239666e5d15ac9aaa"
5
- # sha-256 hash provided in https://download.gnome.org/sources/libxml2/2.12/libxml2-2.12.3.sha256sum
3
+ version: "2.12.4"
4
+ sha256: "497360e423cf0bd99eacdb7c6215dea92e6d6e89ee940393c2bae0e77cb9b7d0"
5
+ # sha-256 hash provided in https://download.gnome.org/sources/libxml2/2.12/libxml2-2.12.4.sha256sum
6
6
 
7
7
  libxslt:
8
8
  version: "1.1.39"
@@ -74,8 +74,10 @@ dealloc(void *data)
74
74
 
75
75
  ruby_xfree(doc->_private);
76
76
 
77
+ #if defined(__GNUC__) && __GNUC__ >= 5
77
78
  #pragma GCC diagnostic push
78
79
  #pragma GCC diagnostic ignored "-Wdeprecated-declarations" // xmlDeregisterNodeDefault is deprecated as of libxml2 2.11.0
80
+ #endif
79
81
  /*
80
82
  * libxml-ruby < 3.0.0 uses xmlDeregisterNodeDefault. If the user is using one of those older
81
83
  * versions, the registered callback from libxml-ruby will access the _private pointers set by
@@ -90,7 +92,9 @@ dealloc(void *data)
90
92
  if (xmlDeregisterNodeDefaultValue) {
91
93
  remove_private((xmlNodePtr)doc);
92
94
  }
95
+ #if defined(__GNUC__) && __GNUC__ >= 5
93
96
  #pragma GCC diagnostic pop
97
+ #endif
94
98
 
95
99
  xmlFreeDoc(doc);
96
100
  }
@@ -5,8 +5,14 @@ VALUE cNokogiriXmlReader;
5
5
  static void
6
6
  xml_reader_deallocate(void *data)
7
7
  {
8
+ // free the document separately because we _may_ have triggered preservation by calling
9
+ // xmlTextReaderCurrentDoc during a read_more.
8
10
  xmlTextReaderPtr reader = data;
11
+ xmlDocPtr doc = xmlTextReaderCurrentDoc(reader);
9
12
  xmlFreeTextReader(reader);
13
+ if (doc) {
14
+ xmlFreeDoc(doc);
15
+ }
10
16
  }
11
17
 
12
18
  static const rb_data_type_t xml_reader_type = {
@@ -515,6 +521,7 @@ read_more(VALUE self)
515
521
  xmlErrorConstPtr error;
516
522
  VALUE error_list;
517
523
  int ret;
524
+ xmlDocPtr c_document;
518
525
 
519
526
  TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
520
527
 
@@ -524,6 +531,16 @@ read_more(VALUE self)
524
531
  ret = xmlTextReaderRead(reader);
525
532
  xmlSetStructuredErrorFunc(NULL, NULL);
526
533
 
534
+ c_document = xmlTextReaderCurrentDoc(reader);
535
+ if (c_document && c_document->encoding == NULL) {
536
+ VALUE constructor_encoding = rb_iv_get(self, "@encoding");
537
+ if (RTEST(constructor_encoding)) {
538
+ c_document->encoding = xmlStrdup(BAD_CAST StringValueCStr(constructor_encoding));
539
+ } else {
540
+ c_document->encoding = xmlStrdup(BAD_CAST "UTF-8");
541
+ }
542
+ }
543
+
527
544
  if (ret == 1) { return self; }
528
545
  if (ret == 0) { return Qnil; }
529
546
 
@@ -707,15 +724,18 @@ rb_xml_reader_encoding(VALUE rb_reader)
707
724
  const char *parser_encoding;
708
725
  VALUE constructor_encoding;
709
726
 
727
+ TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
728
+ parser_encoding = (const char *)xmlTextReaderConstEncoding(c_reader);
729
+ if (parser_encoding) {
730
+ return NOKOGIRI_STR_NEW2(parser_encoding);
731
+ }
732
+
710
733
  constructor_encoding = rb_iv_get(rb_reader, "@encoding");
711
734
  if (RTEST(constructor_encoding)) {
712
735
  return constructor_encoding;
713
736
  }
714
737
 
715
- TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
716
- parser_encoding = (const char *)xmlTextReaderConstEncoding(c_reader);
717
- if (parser_encoding == NULL) { return Qnil; }
718
- return NOKOGIRI_STR_NEW2(parser_encoding);
738
+ return Qnil;
719
739
  }
720
740
 
721
741
  void
@@ -59,6 +59,10 @@ parse_io(VALUE klass, VALUE io, VALUE encoding)
59
59
  (xmlInputReadCallback)noko_io_read,
60
60
  (xmlInputCloseCallback)noko_io_close,
61
61
  (void *)io, enc);
62
+ if (!ctxt) {
63
+ rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
64
+ }
65
+
62
66
  if (ctxt->sax) {
63
67
  xmlFree(ctxt->sax);
64
68
  ctxt->sax = NULL;
@@ -71,7 +71,12 @@ Nokogiri_wrap_xslt_stylesheet(xsltStylesheetPtr ss)
71
71
  * call-seq:
72
72
  * parse_stylesheet_doc(document)
73
73
  *
74
- * Parse a stylesheet from +document+.
74
+ * Parse an XSLT::Stylesheet from +document+.
75
+ *
76
+ * [Parameters]
77
+ * - +document+ (Nokogiri::XML::Document) the document to be parsed.
78
+ *
79
+ * [Returns] Nokogiri::XSLT::Stylesheet
75
80
  */
76
81
  static VALUE
77
82
  parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
@@ -104,7 +109,7 @@ parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
104
109
  * call-seq:
105
110
  * serialize(document)
106
111
  *
107
- * Serialize +document+ to an xml string.
112
+ * Serialize +document+ to an xml string, as specified by the +method+ parameter in the Stylesheet.
108
113
  */
109
114
  static VALUE
110
115
  rb_xslt_stylesheet_serialize(VALUE self, VALUE xmlobj)
@@ -133,7 +138,7 @@ rb_xslt_stylesheet_serialize(VALUE self, VALUE xmlobj)
133
138
  * transform(document)
134
139
  * transform(document, params = {})
135
140
  *
136
- * Apply an XSLT stylesheet to an XML::Document.
141
+ * Transform an XML::Document as defined by an XSLT::Stylesheet.
137
142
  *
138
143
  * [Parameters]
139
144
  * - +document+ (Nokogiri::XML::Document) the document to be transformed.
@@ -13,6 +13,9 @@ LDFLAGS := -pthread
13
13
 
14
14
  all: check
15
15
 
16
+ oss-fuzz:
17
+ ./fuzzer/build-ossfuzz.sh
18
+
16
19
  fuzzers: fuzzer-normal fuzzer-asan fuzzer-ubsan fuzzer-msan
17
20
 
18
21
  fuzzer-normal:
@@ -4826,14 +4826,17 @@ GumboOutput* gumbo_parse_with_options (
4826
4826
  // to a token.
4827
4827
  if (token.type == GUMBO_TOKEN_END_TAG &&
4828
4828
  token.v.end_tag.tag == GUMBO_TAG_UNKNOWN)
4829
+ {
4829
4830
  gumbo_free(token.v.end_tag.name);
4831
+ token.v.end_tag.name = NULL;
4832
+ }
4833
+ if (unlikely(state->_open_elements.length > max_tree_depth)) {
4834
+ parser._output->status = GUMBO_STATUS_TREE_TOO_DEEP;
4835
+ gumbo_debug("Tree depth limit exceeded.\n");
4836
+ break;
4837
+ }
4830
4838
  }
4831
4839
 
4832
- if (unlikely(state->_open_elements.length > max_tree_depth)) {
4833
- parser._output->status = GUMBO_STATUS_TREE_TOO_DEEP;
4834
- gumbo_debug("Tree depth limit exceeded.\n");
4835
- break;
4836
- }
4837
4840
 
4838
4841
  ++loop_count;
4839
4842
  assert(loop_count < 1000000000UL);
@@ -2,5 +2,5 @@
2
2
 
3
3
  module Nokogiri
4
4
  # The version of Nokogiri you are using
5
- VERSION = "1.16.0.rc1"
5
+ VERSION = "1.16.1"
6
6
  end
@@ -1049,29 +1049,35 @@ module Nokogiri
1049
1049
 
1050
1050
  return Nokogiri::XML::NodeSet.new(document) if contents.empty?
1051
1051
 
1052
- # libxml2 does not obey the +recover+ option after encountering errors during +in_context+
1053
- # parsing, and so this horrible hack is here to try to emulate recovery behavior.
1054
- #
1055
- # Unfortunately, this means we're no longer parsing "in context" and so namespaces that
1056
- # would have been inherited from the context node won't be handled correctly. This hack was
1057
- # written in 2010, and I regret it, because it's silently degrading functionality in a way
1058
- # that's not easily prevented (or even detected).
1059
- #
1060
- # I think preferable behavior would be to either:
1061
- #
1062
- # a. add an error noting that we "fell back" and pointing the user to turning off the +recover+ option
1063
- # b. don't recover, but raise a sensible exception
1064
- #
1065
- # For context and background: https://github.com/sparklemotion/nokogiri/issues/313
1066
- # FIXME bug report: https://github.com/sparklemotion/nokogiri/issues/2092
1067
1052
  error_count = document.errors.length
1068
1053
  node_set = in_context(contents, options.to_i)
1069
- if node_set.empty? && (document.errors.length > error_count)
1070
- if options.recover?
1054
+ if document.errors.length > error_count
1055
+ raise document.errors[error_count] unless options.recover?
1056
+
1057
+ if node_set.empty?
1058
+ # libxml2 < 2.13 does not obey the +recover+ option after encountering errors during
1059
+ # +in_context+ parsing, and so this horrible hack is here to try to emulate recovery
1060
+ # behavior.
1061
+ #
1062
+ # (Note that HTML4 fragment parsing seems to have been fixed in abd74186, and XML
1063
+ # fragment parsing is fixed in 1c106edf. Both are in 2.13.)
1064
+ #
1065
+ # Unfortunately, this means we're no longer parsing "in context" and so namespaces that
1066
+ # would have been inherited from the context node won't be handled correctly. This hack
1067
+ # was written in 2010, and I regret it, because it's silently degrading functionality in
1068
+ # a way that's not easily prevented (or even detected).
1069
+ #
1070
+ # I think preferable behavior would be to either:
1071
+ #
1072
+ # a. add an error noting that we "fell back" and pointing the user to turning off the
1073
+ # +recover+ option
1074
+ # b. don't recover, but raise a sensible exception
1075
+ #
1076
+ # For context and background:
1077
+ # - https://github.com/sparklemotion/nokogiri/issues/313
1078
+ # - https://github.com/sparklemotion/nokogiri/issues/2092
1071
1079
  fragment = document.related_class("DocumentFragment").parse(contents)
1072
1080
  node_set = fragment.children
1073
- else
1074
- raise document.errors[error_count]
1075
1081
  end
1076
1082
  end
1077
1083
  node_set
@@ -3,9 +3,11 @@
3
3
  module Nokogiri
4
4
  module XML
5
5
  ###
6
- # Nokogiri::XML::Reader parses an XML document similar to the way a cursor
7
- # would move. The Reader is given an XML document, and yields nodes
8
- # to an each block.
6
+ # Nokogiri::XML::Reader parses an XML document similar to the way a cursor would move. The
7
+ # Reader is given an XML document, and yields nodes to an each block.
8
+ #
9
+ # The Reader parser might be good for when you need the speed and low memory usage of the SAX
10
+ # parser, but do not want to write a Document handler.
9
11
  #
10
12
  # Here is an example of usage:
11
13
  #
@@ -22,13 +24,12 @@ module Nokogiri
22
24
  #
23
25
  # end
24
26
  #
25
- # Note that Nokogiri::XML::Reader#each can only be called once!! Once
26
- # the cursor moves through the entire document, you must parse the
27
- # document again. So make sure that you capture any information you
28
- # need during the first iteration.
27
+ # Nokogiri::XML::Reader#each can only be called once! Once the cursor moves through the entire
28
+ # document, you must parse the document again. It may be better to capture all information you
29
+ # need during a single iteration.
29
30
  #
30
- # The Reader parser is good for when you need the speed of a SAX parser,
31
- # but do not want to write a Document handler.
31
+ # libxml2 does not support error recovery in the Reader parser. The `RECOVER` ParseOption is
32
+ # ignored. If a syntax error is encountered during parsing, an exception will be raised.
32
33
  class Reader
33
34
  include Enumerable
34
35
 
@@ -10,15 +10,37 @@ module Nokogiri
10
10
  # doc = Nokogiri::XML(File.read('some_file.xml'))
11
11
  # xslt = Nokogiri::XSLT(File.read('some_transformer.xslt'))
12
12
  #
13
- # puts xslt.transform(doc)
13
+ # xslt.transform(doc) # => Nokogiri::XML::Document
14
14
  #
15
- # See Nokogiri::XSLT::Stylesheet#transform for more transformation
16
- # information.
15
+ # Many XSLT transformations include serialization behavior to emit a non-XML document. For these
16
+ # cases, please take care to invoke the #serialize method on the result of the transformation:
17
+ #
18
+ # doc = Nokogiri::XML(File.read('some_file.xml'))
19
+ # xslt = Nokogiri::XSLT(File.read('some_transformer.xslt'))
20
+ # xslt.serialize(xslt.transform(doc)) # => String
21
+ #
22
+ # or use the #apply_to method, which is a shortcut for `serialize(transform(document))`:
23
+ #
24
+ # doc = Nokogiri::XML(File.read('some_file.xml'))
25
+ # xslt = Nokogiri::XSLT(File.read('some_transformer.xslt'))
26
+ # xslt.apply_to(doc) # => String
27
+ #
28
+ # See Nokogiri::XSLT::Stylesheet#transform for more information and examples.
17
29
  class Stylesheet
18
- ###
19
- # Apply an XSLT stylesheet to an XML::Document.
20
- # +params+ is an array of strings used as XSLT parameters.
21
- # returns serialized document
30
+ # :call-seq:
31
+ # apply_to(document, params = []) -> String
32
+ #
33
+ # Apply an XSLT stylesheet to an XML::Document and serialize it properly. This method is
34
+ # equivalent to calling #serialize on the result of #transform.
35
+ #
36
+ # [Parameters]
37
+ # - +document+ is an instance of XML::Document to transform
38
+ # - +params+ is an array of strings used as XSLT parameters, passed into #transform
39
+ #
40
+ # [Returns]
41
+ # A string containing the serialized result of the transformation.
42
+ #
43
+ # See Nokogiri::XSLT::Stylesheet#transform for more information and examples.
22
44
  def apply_to(document, params = [])
23
45
  serialize(transform(document, params))
24
46
  end
@@ -0,0 +1,33 @@
1
+ From 95f2a17440568694a6df6a326c5b411e77597be2 Mon Sep 17 00:00:00 2001
2
+ From: Nick Wellnhofer <wellnhofer@aevum.de>
3
+ Date: Tue, 30 Jan 2024 13:25:17 +0100
4
+ Subject: [PATCH] parser: Fix crash in xmlParseInNodeContext with HTML
5
+ documents
6
+
7
+ Ignore namespaces if we have an HTML document with namespaces added
8
+ manually.
9
+
10
+ Fixes #672.
11
+ ---
12
+ parser.c | 4 +++-
13
+ 1 file changed, 3 insertions(+), 1 deletion(-)
14
+
15
+ diff --git a/parser.c b/parser.c
16
+ index 1038d71b..f7842ed1 100644
17
+ --- a/parser.c
18
+ +++ b/parser.c
19
+ @@ -12415,8 +12415,10 @@ xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
20
+ }
21
+ xmlAddChild(node, fake);
22
+
23
+ - if (node->type == XML_ELEMENT_NODE) {
24
+ + if (node->type == XML_ELEMENT_NODE)
25
+ nodePush(ctxt, node);
26
+ +
27
+ + if ((ctxt->html == 0) && (node->type == XML_ELEMENT_NODE)) {
28
+ /*
29
+ * initialize the SAX2 namespaces stack
30
+ */
31
+ --
32
+ 2.42.0
33
+
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nokogiri
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.16.0.rc1
4
+ version: 1.16.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mike Dalessio
@@ -20,7 +20,7 @@ authors:
20
20
  autorequire:
21
21
  bindir: bin
22
22
  cert_chain: []
23
- date: 2023-12-13 00:00:00.000000000 Z
23
+ date: 2024-02-03 00:00:00.000000000 Z
24
24
  dependencies:
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: mini_portile2
@@ -271,8 +271,9 @@ files:
271
271
  - patches/libxml2/0009-allow-wildcard-namespaces.patch
272
272
  - patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch
273
273
  - patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch
274
+ - patches/libxml2/0012-parser-Fix-crash-in-xmlParseInNodeContext-with-HTML.patch
274
275
  - patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch
275
- - ports/archives/libxml2-2.12.3.tar.xz
276
+ - ports/archives/libxml2-2.12.4.tar.xz
276
277
  - ports/archives/libxslt-1.1.39.tar.xz
277
278
  homepage: https://nokogiri.org
278
279
  licenses:
@@ -297,9 +298,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
297
298
  version: 3.0.0
298
299
  required_rubygems_version: !ruby/object:Gem::Requirement
299
300
  requirements:
300
- - - ">"
301
+ - - ">="
301
302
  - !ruby/object:Gem::Version
302
- version: 1.3.1
303
+ version: '0'
303
304
  requirements: []
304
305
  rubygems_version: 3.4.19
305
306
  signing_key:
Binary file