nokogiri 1.16.0.rc1-java → 1.16.1-java
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +14 -8
- data/README.md +3 -0
- data/dependencies.yml +3 -3
- data/ext/nokogiri/xml_document.c +4 -0
- data/ext/nokogiri/xml_reader.c +24 -4
- data/ext/nokogiri/xml_sax_parser_context.c +4 -0
- data/ext/nokogiri/xslt_stylesheet.c +8 -3
- data/gumbo-parser/Makefile +3 -0
- data/lib/nokogiri/nokogiri.jar +0 -0
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/xml/node.rb +25 -19
- data/lib/nokogiri/xml/reader.rb +10 -9
- data/lib/nokogiri/xslt/stylesheet.rb +29 -7
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 649140ad714baea051673112a7bd3e36742c45aa7dc44530fe3973d71fa544ed
|
4
|
+
data.tar.gz: c32d0391892041b5fcb6b76fd7a5cc43549dbc87c908deeea1910f570fb72a79
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fa8911441f0e769f43302f656666735c2ff9a024fb1024ae17e093bdca98f030bed3d8733caa98dc6e327dd5a3282e20b8cf370456122c4d3aeac314abd791c2
|
7
|
+
data.tar.gz: 5daa20423f3065fb329e37fafd74e2d6c756bf2dc22e0ba9116b122883fbe6b23c351ca1c924c502bf25cfd05fe2c5a4167decb78b0cd9ea5c646450745d6ec7
|
data/Gemfile
CHANGED
@@ -5,30 +5,36 @@ source "https://rubygems.org"
|
|
5
5
|
gemspec
|
6
6
|
|
7
7
|
group :development do
|
8
|
+
# ruby 3.4.0-dev removed some gems from the default set
|
9
|
+
#
|
10
|
+
# TODO: we should be able to remove these as our gem dependencies sort it out and we pull them in
|
11
|
+
# transitively.
|
12
|
+
gem "mutex_m"
|
13
|
+
|
8
14
|
# bootstrapping
|
9
15
|
gem "bundler", "~> 2.3"
|
10
16
|
gem "rake", "13.1.0"
|
11
17
|
|
12
18
|
# building extensions
|
13
|
-
gem "rake-compiler", "1.2.
|
14
|
-
gem "rake-compiler-dock", "1.4.0
|
19
|
+
gem "rake-compiler", "1.2.6"
|
20
|
+
gem "rake-compiler-dock", "1.4.0"
|
15
21
|
|
16
22
|
# parser generator
|
17
23
|
gem "rexical", "= 1.0.7"
|
18
24
|
|
19
25
|
# tests
|
20
|
-
gem "minitest", "5.
|
26
|
+
gem "minitest", "5.21.2"
|
21
27
|
gem "minitest-parallel_fork", "2.0.0"
|
22
|
-
gem "ruby_memcheck", "2.
|
28
|
+
gem "ruby_memcheck", "2.3.0"
|
23
29
|
gem "rubyzip", "~> 2.3.2"
|
24
30
|
gem "simplecov", "= 0.21.2"
|
25
31
|
|
26
32
|
# rubocop
|
27
33
|
if Gem::Requirement.new("~> 3.0").satisfied_by?(Gem::Version.new(RUBY_VERSION))
|
28
|
-
gem "rubocop", "1.
|
29
|
-
gem "rubocop-minitest", "0.
|
34
|
+
gem "rubocop", "1.60.2"
|
35
|
+
gem "rubocop-minitest", "0.34.5"
|
30
36
|
gem "rubocop-packaging", "0.5.2"
|
31
|
-
gem "rubocop-performance", "1.
|
37
|
+
gem "rubocop-performance", "1.20.2"
|
32
38
|
gem "rubocop-rake", "= 0.6.0"
|
33
39
|
gem "rubocop-shopify", "2.14.0"
|
34
40
|
end
|
@@ -38,5 +44,5 @@ end
|
|
38
44
|
# `bundle config set --local without rdoc`
|
39
45
|
# Then re-run `bundle install`.
|
40
46
|
group :rdoc do
|
41
|
-
gem "rdoc", "6.6.
|
47
|
+
gem "rdoc", "6.6.2"
|
42
48
|
end
|
data/README.md
CHANGED
@@ -55,6 +55,7 @@ There are a few ways to ask exploratory questions:
|
|
55
55
|
|
56
56
|
- The Nokogiri mailing list is active at https://groups.google.com/group/nokogiri-talk
|
57
57
|
- Open an issue using the "Help Request" template at https://github.com/sparklemotion/nokogiri/issues
|
58
|
+
- Open a discussion at https://github.com/sparklemotion/nokogiri/discussions
|
58
59
|
|
59
60
|
Please do not mail the maintainers at their personal addresses.
|
60
61
|
|
@@ -90,6 +91,8 @@ We bump `Major.Minor.Patch` versions following this guidance:
|
|
90
91
|
- Updating packaged libraries for non-security-related reasons.
|
91
92
|
- Dropping support for EOLed Ruby versions. [Some folks find this objectionable](https://github.com/sparklemotion/nokogiri/issues/1568), but [SemVer says this is OK if the public API hasn't changed](https://semver.org/#what-should-i-do-if-i-update-my-own-dependencies-without-changing-the-public-api).
|
92
93
|
- Backwards-incompatible changes to internal or private methods and constants. These are detailed in the "Changes" section of each changelog entry.
|
94
|
+
- Removal of deprecated methods or parameters, after a generous transition period; usually when those methods or parameters are rarely-used or dangerous to the user. Essentially, removals that do not justify a major version bump.
|
95
|
+
|
93
96
|
|
94
97
|
`Patch`:
|
95
98
|
|
data/dependencies.yml
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
|
2
2
|
libxml2:
|
3
|
-
version: "2.12.
|
4
|
-
sha256: "
|
5
|
-
# sha-256 hash provided in https://download.gnome.org/sources/libxml2/2.12/libxml2-2.12.
|
3
|
+
version: "2.12.4"
|
4
|
+
sha256: "497360e423cf0bd99eacdb7c6215dea92e6d6e89ee940393c2bae0e77cb9b7d0"
|
5
|
+
# sha-256 hash provided in https://download.gnome.org/sources/libxml2/2.12/libxml2-2.12.4.sha256sum
|
6
6
|
|
7
7
|
libxslt:
|
8
8
|
version: "1.1.39"
|
data/ext/nokogiri/xml_document.c
CHANGED
@@ -74,8 +74,10 @@ dealloc(void *data)
|
|
74
74
|
|
75
75
|
ruby_xfree(doc->_private);
|
76
76
|
|
77
|
+
#if defined(__GNUC__) && __GNUC__ >= 5
|
77
78
|
#pragma GCC diagnostic push
|
78
79
|
#pragma GCC diagnostic ignored "-Wdeprecated-declarations" // xmlDeregisterNodeDefault is deprecated as of libxml2 2.11.0
|
80
|
+
#endif
|
79
81
|
/*
|
80
82
|
* libxml-ruby < 3.0.0 uses xmlDeregisterNodeDefault. If the user is using one of those older
|
81
83
|
* versions, the registered callback from libxml-ruby will access the _private pointers set by
|
@@ -90,7 +92,9 @@ dealloc(void *data)
|
|
90
92
|
if (xmlDeregisterNodeDefaultValue) {
|
91
93
|
remove_private((xmlNodePtr)doc);
|
92
94
|
}
|
95
|
+
#if defined(__GNUC__) && __GNUC__ >= 5
|
93
96
|
#pragma GCC diagnostic pop
|
97
|
+
#endif
|
94
98
|
|
95
99
|
xmlFreeDoc(doc);
|
96
100
|
}
|
data/ext/nokogiri/xml_reader.c
CHANGED
@@ -5,8 +5,14 @@ VALUE cNokogiriXmlReader;
|
|
5
5
|
static void
|
6
6
|
xml_reader_deallocate(void *data)
|
7
7
|
{
|
8
|
+
// free the document separately because we _may_ have triggered preservation by calling
|
9
|
+
// xmlTextReaderCurrentDoc during a read_more.
|
8
10
|
xmlTextReaderPtr reader = data;
|
11
|
+
xmlDocPtr doc = xmlTextReaderCurrentDoc(reader);
|
9
12
|
xmlFreeTextReader(reader);
|
13
|
+
if (doc) {
|
14
|
+
xmlFreeDoc(doc);
|
15
|
+
}
|
10
16
|
}
|
11
17
|
|
12
18
|
static const rb_data_type_t xml_reader_type = {
|
@@ -515,6 +521,7 @@ read_more(VALUE self)
|
|
515
521
|
xmlErrorConstPtr error;
|
516
522
|
VALUE error_list;
|
517
523
|
int ret;
|
524
|
+
xmlDocPtr c_document;
|
518
525
|
|
519
526
|
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
520
527
|
|
@@ -524,6 +531,16 @@ read_more(VALUE self)
|
|
524
531
|
ret = xmlTextReaderRead(reader);
|
525
532
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
526
533
|
|
534
|
+
c_document = xmlTextReaderCurrentDoc(reader);
|
535
|
+
if (c_document && c_document->encoding == NULL) {
|
536
|
+
VALUE constructor_encoding = rb_iv_get(self, "@encoding");
|
537
|
+
if (RTEST(constructor_encoding)) {
|
538
|
+
c_document->encoding = xmlStrdup(BAD_CAST StringValueCStr(constructor_encoding));
|
539
|
+
} else {
|
540
|
+
c_document->encoding = xmlStrdup(BAD_CAST "UTF-8");
|
541
|
+
}
|
542
|
+
}
|
543
|
+
|
527
544
|
if (ret == 1) { return self; }
|
528
545
|
if (ret == 0) { return Qnil; }
|
529
546
|
|
@@ -707,15 +724,18 @@ rb_xml_reader_encoding(VALUE rb_reader)
|
|
707
724
|
const char *parser_encoding;
|
708
725
|
VALUE constructor_encoding;
|
709
726
|
|
727
|
+
TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
|
728
|
+
parser_encoding = (const char *)xmlTextReaderConstEncoding(c_reader);
|
729
|
+
if (parser_encoding) {
|
730
|
+
return NOKOGIRI_STR_NEW2(parser_encoding);
|
731
|
+
}
|
732
|
+
|
710
733
|
constructor_encoding = rb_iv_get(rb_reader, "@encoding");
|
711
734
|
if (RTEST(constructor_encoding)) {
|
712
735
|
return constructor_encoding;
|
713
736
|
}
|
714
737
|
|
715
|
-
|
716
|
-
parser_encoding = (const char *)xmlTextReaderConstEncoding(c_reader);
|
717
|
-
if (parser_encoding == NULL) { return Qnil; }
|
718
|
-
return NOKOGIRI_STR_NEW2(parser_encoding);
|
738
|
+
return Qnil;
|
719
739
|
}
|
720
740
|
|
721
741
|
void
|
@@ -59,6 +59,10 @@ parse_io(VALUE klass, VALUE io, VALUE encoding)
|
|
59
59
|
(xmlInputReadCallback)noko_io_read,
|
60
60
|
(xmlInputCloseCallback)noko_io_close,
|
61
61
|
(void *)io, enc);
|
62
|
+
if (!ctxt) {
|
63
|
+
rb_raise(rb_eRuntimeError, "failed to create xml sax parser context");
|
64
|
+
}
|
65
|
+
|
62
66
|
if (ctxt->sax) {
|
63
67
|
xmlFree(ctxt->sax);
|
64
68
|
ctxt->sax = NULL;
|
@@ -71,7 +71,12 @@ Nokogiri_wrap_xslt_stylesheet(xsltStylesheetPtr ss)
|
|
71
71
|
* call-seq:
|
72
72
|
* parse_stylesheet_doc(document)
|
73
73
|
*
|
74
|
-
* Parse
|
74
|
+
* Parse an XSLT::Stylesheet from +document+.
|
75
|
+
*
|
76
|
+
* [Parameters]
|
77
|
+
* - +document+ (Nokogiri::XML::Document) the document to be parsed.
|
78
|
+
*
|
79
|
+
* [Returns] Nokogiri::XSLT::Stylesheet
|
75
80
|
*/
|
76
81
|
static VALUE
|
77
82
|
parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
|
@@ -104,7 +109,7 @@ parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
|
|
104
109
|
* call-seq:
|
105
110
|
* serialize(document)
|
106
111
|
*
|
107
|
-
* Serialize +document+ to an xml string.
|
112
|
+
* Serialize +document+ to an xml string, as specified by the +method+ parameter in the Stylesheet.
|
108
113
|
*/
|
109
114
|
static VALUE
|
110
115
|
rb_xslt_stylesheet_serialize(VALUE self, VALUE xmlobj)
|
@@ -133,7 +138,7 @@ rb_xslt_stylesheet_serialize(VALUE self, VALUE xmlobj)
|
|
133
138
|
* transform(document)
|
134
139
|
* transform(document, params = {})
|
135
140
|
*
|
136
|
-
*
|
141
|
+
* Transform an XML::Document as defined by an XSLT::Stylesheet.
|
137
142
|
*
|
138
143
|
* [Parameters]
|
139
144
|
* - +document+ (Nokogiri::XML::Document) the document to be transformed.
|
data/gumbo-parser/Makefile
CHANGED
data/lib/nokogiri/nokogiri.jar
CHANGED
Binary file
|
data/lib/nokogiri/xml/node.rb
CHANGED
@@ -1049,29 +1049,35 @@ module Nokogiri
|
|
1049
1049
|
|
1050
1050
|
return Nokogiri::XML::NodeSet.new(document) if contents.empty?
|
1051
1051
|
|
1052
|
-
# libxml2 does not obey the +recover+ option after encountering errors during +in_context+
|
1053
|
-
# parsing, and so this horrible hack is here to try to emulate recovery behavior.
|
1054
|
-
#
|
1055
|
-
# Unfortunately, this means we're no longer parsing "in context" and so namespaces that
|
1056
|
-
# would have been inherited from the context node won't be handled correctly. This hack was
|
1057
|
-
# written in 2010, and I regret it, because it's silently degrading functionality in a way
|
1058
|
-
# that's not easily prevented (or even detected).
|
1059
|
-
#
|
1060
|
-
# I think preferable behavior would be to either:
|
1061
|
-
#
|
1062
|
-
# a. add an error noting that we "fell back" and pointing the user to turning off the +recover+ option
|
1063
|
-
# b. don't recover, but raise a sensible exception
|
1064
|
-
#
|
1065
|
-
# For context and background: https://github.com/sparklemotion/nokogiri/issues/313
|
1066
|
-
# FIXME bug report: https://github.com/sparklemotion/nokogiri/issues/2092
|
1067
1052
|
error_count = document.errors.length
|
1068
1053
|
node_set = in_context(contents, options.to_i)
|
1069
|
-
if
|
1070
|
-
|
1054
|
+
if document.errors.length > error_count
|
1055
|
+
raise document.errors[error_count] unless options.recover?
|
1056
|
+
|
1057
|
+
if node_set.empty?
|
1058
|
+
# libxml2 < 2.13 does not obey the +recover+ option after encountering errors during
|
1059
|
+
# +in_context+ parsing, and so this horrible hack is here to try to emulate recovery
|
1060
|
+
# behavior.
|
1061
|
+
#
|
1062
|
+
# (Note that HTML4 fragment parsing seems to have been fixed in abd74186, and XML
|
1063
|
+
# fragment parsing is fixed in 1c106edf. Both are in 2.13.)
|
1064
|
+
#
|
1065
|
+
# Unfortunately, this means we're no longer parsing "in context" and so namespaces that
|
1066
|
+
# would have been inherited from the context node won't be handled correctly. This hack
|
1067
|
+
# was written in 2010, and I regret it, because it's silently degrading functionality in
|
1068
|
+
# a way that's not easily prevented (or even detected).
|
1069
|
+
#
|
1070
|
+
# I think preferable behavior would be to either:
|
1071
|
+
#
|
1072
|
+
# a. add an error noting that we "fell back" and pointing the user to turning off the
|
1073
|
+
# +recover+ option
|
1074
|
+
# b. don't recover, but raise a sensible exception
|
1075
|
+
#
|
1076
|
+
# For context and background:
|
1077
|
+
# - https://github.com/sparklemotion/nokogiri/issues/313
|
1078
|
+
# - https://github.com/sparklemotion/nokogiri/issues/2092
|
1071
1079
|
fragment = document.related_class("DocumentFragment").parse(contents)
|
1072
1080
|
node_set = fragment.children
|
1073
|
-
else
|
1074
|
-
raise document.errors[error_count]
|
1075
1081
|
end
|
1076
1082
|
end
|
1077
1083
|
node_set
|
data/lib/nokogiri/xml/reader.rb
CHANGED
@@ -3,9 +3,11 @@
|
|
3
3
|
module Nokogiri
|
4
4
|
module XML
|
5
5
|
###
|
6
|
-
# Nokogiri::XML::Reader parses an XML document similar to the way a cursor
|
7
|
-
#
|
8
|
-
#
|
6
|
+
# Nokogiri::XML::Reader parses an XML document similar to the way a cursor would move. The
|
7
|
+
# Reader is given an XML document, and yields nodes to an each block.
|
8
|
+
#
|
9
|
+
# The Reader parser might be good for when you need the speed and low memory usage of the SAX
|
10
|
+
# parser, but do not want to write a Document handler.
|
9
11
|
#
|
10
12
|
# Here is an example of usage:
|
11
13
|
#
|
@@ -22,13 +24,12 @@ module Nokogiri
|
|
22
24
|
#
|
23
25
|
# end
|
24
26
|
#
|
25
|
-
#
|
26
|
-
#
|
27
|
-
#
|
28
|
-
# need during the first iteration.
|
27
|
+
# ⚠ Nokogiri::XML::Reader#each can only be called once! Once the cursor moves through the entire
|
28
|
+
# document, you must parse the document again. It may be better to capture all information you
|
29
|
+
# need during a single iteration.
|
29
30
|
#
|
30
|
-
#
|
31
|
-
#
|
31
|
+
# ⚠ libxml2 does not support error recovery in the Reader parser. The `RECOVER` ParseOption is
|
32
|
+
# ignored. If a syntax error is encountered during parsing, an exception will be raised.
|
32
33
|
class Reader
|
33
34
|
include Enumerable
|
34
35
|
|
@@ -10,15 +10,37 @@ module Nokogiri
|
|
10
10
|
# doc = Nokogiri::XML(File.read('some_file.xml'))
|
11
11
|
# xslt = Nokogiri::XSLT(File.read('some_transformer.xslt'))
|
12
12
|
#
|
13
|
-
#
|
13
|
+
# xslt.transform(doc) # => Nokogiri::XML::Document
|
14
14
|
#
|
15
|
-
#
|
16
|
-
#
|
15
|
+
# Many XSLT transformations include serialization behavior to emit a non-XML document. For these
|
16
|
+
# cases, please take care to invoke the #serialize method on the result of the transformation:
|
17
|
+
#
|
18
|
+
# doc = Nokogiri::XML(File.read('some_file.xml'))
|
19
|
+
# xslt = Nokogiri::XSLT(File.read('some_transformer.xslt'))
|
20
|
+
# xslt.serialize(xslt.transform(doc)) # => String
|
21
|
+
#
|
22
|
+
# or use the #apply_to method, which is a shortcut for `serialize(transform(document))`:
|
23
|
+
#
|
24
|
+
# doc = Nokogiri::XML(File.read('some_file.xml'))
|
25
|
+
# xslt = Nokogiri::XSLT(File.read('some_transformer.xslt'))
|
26
|
+
# xslt.apply_to(doc) # => String
|
27
|
+
#
|
28
|
+
# See Nokogiri::XSLT::Stylesheet#transform for more information and examples.
|
17
29
|
class Stylesheet
|
18
|
-
|
19
|
-
#
|
20
|
-
#
|
21
|
-
#
|
30
|
+
# :call-seq:
|
31
|
+
# apply_to(document, params = []) -> String
|
32
|
+
#
|
33
|
+
# Apply an XSLT stylesheet to an XML::Document and serialize it properly. This method is
|
34
|
+
# equivalent to calling #serialize on the result of #transform.
|
35
|
+
#
|
36
|
+
# [Parameters]
|
37
|
+
# - +document+ is an instance of XML::Document to transform
|
38
|
+
# - +params+ is an array of strings used as XSLT parameters, passed into #transform
|
39
|
+
#
|
40
|
+
# [Returns]
|
41
|
+
# A string containing the serialized result of the transformation.
|
42
|
+
#
|
43
|
+
# See Nokogiri::XSLT::Stylesheet#transform for more information and examples.
|
22
44
|
def apply_to(document, params = [])
|
23
45
|
serialize(transform(document, params))
|
24
46
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nokogiri
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.16.
|
4
|
+
version: 1.16.1
|
5
5
|
platform: java
|
6
6
|
authors:
|
7
7
|
- Mike Dalessio
|
@@ -20,7 +20,7 @@ authors:
|
|
20
20
|
autorequire:
|
21
21
|
bindir: bin
|
22
22
|
cert_chain: []
|
23
|
-
date:
|
23
|
+
date: 2024-02-03 00:00:00.000000000 Z
|
24
24
|
dependencies:
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
requirement: !ruby/object:Gem::Requirement
|
@@ -340,9 +340,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
340
340
|
version: 3.0.0
|
341
341
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
342
342
|
requirements:
|
343
|
-
- - "
|
343
|
+
- - ">="
|
344
344
|
- !ruby/object:Gem::Version
|
345
|
-
version:
|
345
|
+
version: '0'
|
346
346
|
requirements:
|
347
347
|
- jar isorelax, isorelax, 20030108
|
348
348
|
- jar org.nokogiri, nekodtd, 0.1.11.noko2
|