nokogiri 1.16.0 → 1.16.1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +11 -5
- data/dependencies.yml +3 -3
- data/ext/nokogiri/xml_document.c +4 -0
- data/ext/nokogiri/xml_reader.c +24 -4
- data/ext/nokogiri/xslt_stylesheet.c +8 -3
- data/gumbo-parser/Makefile +3 -0
- data/gumbo-parser/src/parser.c +8 -5
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/xml/reader.rb +10 -9
- data/lib/nokogiri/xslt/stylesheet.rb +29 -7
- data/patches/libxml2/0012-parser-Fix-crash-in-xmlParseInNodeContext-with-HTML.patch +33 -0
- data/ports/archives/libxml2-2.12.4.tar.xz +0 -0
- metadata +4 -3
- data/ports/archives/libxml2-2.12.3.tar.xz +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: da609d95e9ec1de75acd8ff2b8ef217b23df9431a17c5d37d8968f90bcbd6f5b
|
4
|
+
data.tar.gz: a9573dc8e6cbd63f97104651f38c9bfa06549eab4d77c602dedba2094adc320f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ceb8cb32c790d4f3b9162bc86169373c8b7084e15e7ca9991712aed69f495e8a9e58480e94536fae3072d4b0bb9ee5e553367bb41e896087e24073e667650969
|
7
|
+
data.tar.gz: '094a214ed1c7a5462a6ae2fbc73b0247496e1dd0dcc9542fd5db5cee87a5916776f1f1adb3c04af858fd8d7722cc19473dbde560f483ab3a0efcd9c881fe96ff'
|
data/Gemfile
CHANGED
@@ -5,19 +5,25 @@ source "https://rubygems.org"
|
|
5
5
|
gemspec
|
6
6
|
|
7
7
|
group :development do
|
8
|
+
# ruby 3.4.0-dev removed some gems from the default set
|
9
|
+
#
|
10
|
+
# TODO: we should be able to remove these as our gem dependencies sort it out and we pull them in
|
11
|
+
# transitively.
|
12
|
+
gem "mutex_m"
|
13
|
+
|
8
14
|
# bootstrapping
|
9
15
|
gem "bundler", "~> 2.3"
|
10
16
|
gem "rake", "13.1.0"
|
11
17
|
|
12
18
|
# building extensions
|
13
|
-
gem "rake-compiler", "1.2.
|
19
|
+
gem "rake-compiler", "1.2.6"
|
14
20
|
gem "rake-compiler-dock", "1.4.0"
|
15
21
|
|
16
22
|
# parser generator
|
17
23
|
gem "rexical", "= 1.0.7"
|
18
24
|
|
19
25
|
# tests
|
20
|
-
gem "minitest", "5.
|
26
|
+
gem "minitest", "5.21.2"
|
21
27
|
gem "minitest-parallel_fork", "2.0.0"
|
22
28
|
gem "ruby_memcheck", "2.3.0"
|
23
29
|
gem "rubyzip", "~> 2.3.2"
|
@@ -25,10 +31,10 @@ group :development do
|
|
25
31
|
|
26
32
|
# rubocop
|
27
33
|
if Gem::Requirement.new("~> 3.0").satisfied_by?(Gem::Version.new(RUBY_VERSION))
|
28
|
-
gem "rubocop", "1.
|
29
|
-
gem "rubocop-minitest", "0.34.
|
34
|
+
gem "rubocop", "1.60.2"
|
35
|
+
gem "rubocop-minitest", "0.34.5"
|
30
36
|
gem "rubocop-packaging", "0.5.2"
|
31
|
-
gem "rubocop-performance", "1.20.
|
37
|
+
gem "rubocop-performance", "1.20.2"
|
32
38
|
gem "rubocop-rake", "= 0.6.0"
|
33
39
|
gem "rubocop-shopify", "2.14.0"
|
34
40
|
end
|
data/dependencies.yml
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
|
2
2
|
libxml2:
|
3
|
-
version: "2.12.
|
4
|
-
sha256: "
|
5
|
-
# sha-256 hash provided in https://download.gnome.org/sources/libxml2/2.12/libxml2-2.12.
|
3
|
+
version: "2.12.4"
|
4
|
+
sha256: "497360e423cf0bd99eacdb7c6215dea92e6d6e89ee940393c2bae0e77cb9b7d0"
|
5
|
+
# sha-256 hash provided in https://download.gnome.org/sources/libxml2/2.12/libxml2-2.12.4.sha256sum
|
6
6
|
|
7
7
|
libxslt:
|
8
8
|
version: "1.1.39"
|
data/ext/nokogiri/xml_document.c
CHANGED
@@ -74,8 +74,10 @@ dealloc(void *data)
|
|
74
74
|
|
75
75
|
ruby_xfree(doc->_private);
|
76
76
|
|
77
|
+
#if defined(__GNUC__) && __GNUC__ >= 5
|
77
78
|
#pragma GCC diagnostic push
|
78
79
|
#pragma GCC diagnostic ignored "-Wdeprecated-declarations" // xmlDeregisterNodeDefault is deprecated as of libxml2 2.11.0
|
80
|
+
#endif
|
79
81
|
/*
|
80
82
|
* libxml-ruby < 3.0.0 uses xmlDeregisterNodeDefault. If the user is using one of those older
|
81
83
|
* versions, the registered callback from libxml-ruby will access the _private pointers set by
|
@@ -90,7 +92,9 @@ dealloc(void *data)
|
|
90
92
|
if (xmlDeregisterNodeDefaultValue) {
|
91
93
|
remove_private((xmlNodePtr)doc);
|
92
94
|
}
|
95
|
+
#if defined(__GNUC__) && __GNUC__ >= 5
|
93
96
|
#pragma GCC diagnostic pop
|
97
|
+
#endif
|
94
98
|
|
95
99
|
xmlFreeDoc(doc);
|
96
100
|
}
|
data/ext/nokogiri/xml_reader.c
CHANGED
@@ -5,8 +5,14 @@ VALUE cNokogiriXmlReader;
|
|
5
5
|
static void
|
6
6
|
xml_reader_deallocate(void *data)
|
7
7
|
{
|
8
|
+
// free the document separately because we _may_ have triggered preservation by calling
|
9
|
+
// xmlTextReaderCurrentDoc during a read_more.
|
8
10
|
xmlTextReaderPtr reader = data;
|
11
|
+
xmlDocPtr doc = xmlTextReaderCurrentDoc(reader);
|
9
12
|
xmlFreeTextReader(reader);
|
13
|
+
if (doc) {
|
14
|
+
xmlFreeDoc(doc);
|
15
|
+
}
|
10
16
|
}
|
11
17
|
|
12
18
|
static const rb_data_type_t xml_reader_type = {
|
@@ -515,6 +521,7 @@ read_more(VALUE self)
|
|
515
521
|
xmlErrorConstPtr error;
|
516
522
|
VALUE error_list;
|
517
523
|
int ret;
|
524
|
+
xmlDocPtr c_document;
|
518
525
|
|
519
526
|
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
520
527
|
|
@@ -524,6 +531,16 @@ read_more(VALUE self)
|
|
524
531
|
ret = xmlTextReaderRead(reader);
|
525
532
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
526
533
|
|
534
|
+
c_document = xmlTextReaderCurrentDoc(reader);
|
535
|
+
if (c_document && c_document->encoding == NULL) {
|
536
|
+
VALUE constructor_encoding = rb_iv_get(self, "@encoding");
|
537
|
+
if (RTEST(constructor_encoding)) {
|
538
|
+
c_document->encoding = xmlStrdup(BAD_CAST StringValueCStr(constructor_encoding));
|
539
|
+
} else {
|
540
|
+
c_document->encoding = xmlStrdup(BAD_CAST "UTF-8");
|
541
|
+
}
|
542
|
+
}
|
543
|
+
|
527
544
|
if (ret == 1) { return self; }
|
528
545
|
if (ret == 0) { return Qnil; }
|
529
546
|
|
@@ -707,15 +724,18 @@ rb_xml_reader_encoding(VALUE rb_reader)
|
|
707
724
|
const char *parser_encoding;
|
708
725
|
VALUE constructor_encoding;
|
709
726
|
|
727
|
+
TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
|
728
|
+
parser_encoding = (const char *)xmlTextReaderConstEncoding(c_reader);
|
729
|
+
if (parser_encoding) {
|
730
|
+
return NOKOGIRI_STR_NEW2(parser_encoding);
|
731
|
+
}
|
732
|
+
|
710
733
|
constructor_encoding = rb_iv_get(rb_reader, "@encoding");
|
711
734
|
if (RTEST(constructor_encoding)) {
|
712
735
|
return constructor_encoding;
|
713
736
|
}
|
714
737
|
|
715
|
-
|
716
|
-
parser_encoding = (const char *)xmlTextReaderConstEncoding(c_reader);
|
717
|
-
if (parser_encoding == NULL) { return Qnil; }
|
718
|
-
return NOKOGIRI_STR_NEW2(parser_encoding);
|
738
|
+
return Qnil;
|
719
739
|
}
|
720
740
|
|
721
741
|
void
|
@@ -71,7 +71,12 @@ Nokogiri_wrap_xslt_stylesheet(xsltStylesheetPtr ss)
|
|
71
71
|
* call-seq:
|
72
72
|
* parse_stylesheet_doc(document)
|
73
73
|
*
|
74
|
-
* Parse
|
74
|
+
* Parse an XSLT::Stylesheet from +document+.
|
75
|
+
*
|
76
|
+
* [Parameters]
|
77
|
+
* - +document+ (Nokogiri::XML::Document) the document to be parsed.
|
78
|
+
*
|
79
|
+
* [Returns] Nokogiri::XSLT::Stylesheet
|
75
80
|
*/
|
76
81
|
static VALUE
|
77
82
|
parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
|
@@ -104,7 +109,7 @@ parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
|
|
104
109
|
* call-seq:
|
105
110
|
* serialize(document)
|
106
111
|
*
|
107
|
-
* Serialize +document+ to an xml string.
|
112
|
+
* Serialize +document+ to an xml string, as specified by the +method+ parameter in the Stylesheet.
|
108
113
|
*/
|
109
114
|
static VALUE
|
110
115
|
rb_xslt_stylesheet_serialize(VALUE self, VALUE xmlobj)
|
@@ -133,7 +138,7 @@ rb_xslt_stylesheet_serialize(VALUE self, VALUE xmlobj)
|
|
133
138
|
* transform(document)
|
134
139
|
* transform(document, params = {})
|
135
140
|
*
|
136
|
-
*
|
141
|
+
* Transform an XML::Document as defined by an XSLT::Stylesheet.
|
137
142
|
*
|
138
143
|
* [Parameters]
|
139
144
|
* - +document+ (Nokogiri::XML::Document) the document to be transformed.
|
data/gumbo-parser/Makefile
CHANGED
data/gumbo-parser/src/parser.c
CHANGED
@@ -4826,14 +4826,17 @@ GumboOutput* gumbo_parse_with_options (
|
|
4826
4826
|
// to a token.
|
4827
4827
|
if (token.type == GUMBO_TOKEN_END_TAG &&
|
4828
4828
|
token.v.end_tag.tag == GUMBO_TAG_UNKNOWN)
|
4829
|
+
{
|
4829
4830
|
gumbo_free(token.v.end_tag.name);
|
4831
|
+
token.v.end_tag.name = NULL;
|
4832
|
+
}
|
4833
|
+
if (unlikely(state->_open_elements.length > max_tree_depth)) {
|
4834
|
+
parser._output->status = GUMBO_STATUS_TREE_TOO_DEEP;
|
4835
|
+
gumbo_debug("Tree depth limit exceeded.\n");
|
4836
|
+
break;
|
4837
|
+
}
|
4830
4838
|
}
|
4831
4839
|
|
4832
|
-
if (unlikely(state->_open_elements.length > max_tree_depth)) {
|
4833
|
-
parser._output->status = GUMBO_STATUS_TREE_TOO_DEEP;
|
4834
|
-
gumbo_debug("Tree depth limit exceeded.\n");
|
4835
|
-
break;
|
4836
|
-
}
|
4837
4840
|
|
4838
4841
|
++loop_count;
|
4839
4842
|
assert(loop_count < 1000000000UL);
|
data/lib/nokogiri/xml/reader.rb
CHANGED
@@ -3,9 +3,11 @@
|
|
3
3
|
module Nokogiri
|
4
4
|
module XML
|
5
5
|
###
|
6
|
-
# Nokogiri::XML::Reader parses an XML document similar to the way a cursor
|
7
|
-
#
|
8
|
-
#
|
6
|
+
# Nokogiri::XML::Reader parses an XML document similar to the way a cursor would move. The
|
7
|
+
# Reader is given an XML document, and yields nodes to an each block.
|
8
|
+
#
|
9
|
+
# The Reader parser might be good for when you need the speed and low memory usage of the SAX
|
10
|
+
# parser, but do not want to write a Document handler.
|
9
11
|
#
|
10
12
|
# Here is an example of usage:
|
11
13
|
#
|
@@ -22,13 +24,12 @@ module Nokogiri
|
|
22
24
|
#
|
23
25
|
# end
|
24
26
|
#
|
25
|
-
#
|
26
|
-
#
|
27
|
-
#
|
28
|
-
# need during the first iteration.
|
27
|
+
# ⚠ Nokogiri::XML::Reader#each can only be called once! Once the cursor moves through the entire
|
28
|
+
# document, you must parse the document again. It may be better to capture all information you
|
29
|
+
# need during a single iteration.
|
29
30
|
#
|
30
|
-
#
|
31
|
-
#
|
31
|
+
# ⚠ libxml2 does not support error recovery in the Reader parser. The `RECOVER` ParseOption is
|
32
|
+
# ignored. If a syntax error is encountered during parsing, an exception will be raised.
|
32
33
|
class Reader
|
33
34
|
include Enumerable
|
34
35
|
|
@@ -10,15 +10,37 @@ module Nokogiri
|
|
10
10
|
# doc = Nokogiri::XML(File.read('some_file.xml'))
|
11
11
|
# xslt = Nokogiri::XSLT(File.read('some_transformer.xslt'))
|
12
12
|
#
|
13
|
-
#
|
13
|
+
# xslt.transform(doc) # => Nokogiri::XML::Document
|
14
14
|
#
|
15
|
-
#
|
16
|
-
#
|
15
|
+
# Many XSLT transformations include serialization behavior to emit a non-XML document. For these
|
16
|
+
# cases, please take care to invoke the #serialize method on the result of the transformation:
|
17
|
+
#
|
18
|
+
# doc = Nokogiri::XML(File.read('some_file.xml'))
|
19
|
+
# xslt = Nokogiri::XSLT(File.read('some_transformer.xslt'))
|
20
|
+
# xslt.serialize(xslt.transform(doc)) # => String
|
21
|
+
#
|
22
|
+
# or use the #apply_to method, which is a shortcut for `serialize(transform(document))`:
|
23
|
+
#
|
24
|
+
# doc = Nokogiri::XML(File.read('some_file.xml'))
|
25
|
+
# xslt = Nokogiri::XSLT(File.read('some_transformer.xslt'))
|
26
|
+
# xslt.apply_to(doc) # => String
|
27
|
+
#
|
28
|
+
# See Nokogiri::XSLT::Stylesheet#transform for more information and examples.
|
17
29
|
class Stylesheet
|
18
|
-
|
19
|
-
#
|
20
|
-
#
|
21
|
-
#
|
30
|
+
# :call-seq:
|
31
|
+
# apply_to(document, params = []) -> String
|
32
|
+
#
|
33
|
+
# Apply an XSLT stylesheet to an XML::Document and serialize it properly. This method is
|
34
|
+
# equivalent to calling #serialize on the result of #transform.
|
35
|
+
#
|
36
|
+
# [Parameters]
|
37
|
+
# - +document+ is an instance of XML::Document to transform
|
38
|
+
# - +params+ is an array of strings used as XSLT parameters, passed into #transform
|
39
|
+
#
|
40
|
+
# [Returns]
|
41
|
+
# A string containing the serialized result of the transformation.
|
42
|
+
#
|
43
|
+
# See Nokogiri::XSLT::Stylesheet#transform for more information and examples.
|
22
44
|
def apply_to(document, params = [])
|
23
45
|
serialize(transform(document, params))
|
24
46
|
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
From 95f2a17440568694a6df6a326c5b411e77597be2 Mon Sep 17 00:00:00 2001
|
2
|
+
From: Nick Wellnhofer <wellnhofer@aevum.de>
|
3
|
+
Date: Tue, 30 Jan 2024 13:25:17 +0100
|
4
|
+
Subject: [PATCH] parser: Fix crash in xmlParseInNodeContext with HTML
|
5
|
+
documents
|
6
|
+
|
7
|
+
Ignore namespaces if we have an HTML document with namespaces added
|
8
|
+
manually.
|
9
|
+
|
10
|
+
Fixes #672.
|
11
|
+
---
|
12
|
+
parser.c | 4 +++-
|
13
|
+
1 file changed, 3 insertions(+), 1 deletion(-)
|
14
|
+
|
15
|
+
diff --git a/parser.c b/parser.c
|
16
|
+
index 1038d71b..f7842ed1 100644
|
17
|
+
--- a/parser.c
|
18
|
+
+++ b/parser.c
|
19
|
+
@@ -12415,8 +12415,10 @@ xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
|
20
|
+
}
|
21
|
+
xmlAddChild(node, fake);
|
22
|
+
|
23
|
+
- if (node->type == XML_ELEMENT_NODE) {
|
24
|
+
+ if (node->type == XML_ELEMENT_NODE)
|
25
|
+
nodePush(ctxt, node);
|
26
|
+
+
|
27
|
+
+ if ((ctxt->html == 0) && (node->type == XML_ELEMENT_NODE)) {
|
28
|
+
/*
|
29
|
+
* initialize the SAX2 namespaces stack
|
30
|
+
*/
|
31
|
+
--
|
32
|
+
2.42.0
|
33
|
+
|
Binary file
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nokogiri
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.16.
|
4
|
+
version: 1.16.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mike Dalessio
|
@@ -20,7 +20,7 @@ authors:
|
|
20
20
|
autorequire:
|
21
21
|
bindir: bin
|
22
22
|
cert_chain: []
|
23
|
-
date:
|
23
|
+
date: 2024-02-03 00:00:00.000000000 Z
|
24
24
|
dependencies:
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: mini_portile2
|
@@ -271,8 +271,9 @@ files:
|
|
271
271
|
- patches/libxml2/0009-allow-wildcard-namespaces.patch
|
272
272
|
- patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch
|
273
273
|
- patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch
|
274
|
+
- patches/libxml2/0012-parser-Fix-crash-in-xmlParseInNodeContext-with-HTML.patch
|
274
275
|
- patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch
|
275
|
-
- ports/archives/libxml2-2.12.
|
276
|
+
- ports/archives/libxml2-2.12.4.tar.xz
|
276
277
|
- ports/archives/libxslt-1.1.39.tar.xz
|
277
278
|
homepage: https://nokogiri.org
|
278
279
|
licenses:
|
Binary file
|