nokogiri 1.13.8 → 1.15.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +40 -0
- data/LICENSE-DEPENDENCIES.md +830 -509
- data/LICENSE.md +1 -1
- data/README.md +18 -11
- data/dependencies.yml +33 -15
- data/ext/nokogiri/extconf.rb +164 -46
- data/ext/nokogiri/gumbo.c +20 -10
- data/ext/nokogiri/html4_document.c +3 -4
- data/ext/nokogiri/html4_element_description.c +20 -15
- data/ext/nokogiri/html4_entity_lookup.c +2 -2
- data/ext/nokogiri/html4_sax_parser_context.c +11 -22
- data/ext/nokogiri/html4_sax_push_parser.c +3 -3
- data/ext/nokogiri/nokogiri.c +84 -75
- data/ext/nokogiri/nokogiri.h +31 -16
- data/ext/nokogiri/test_global_handlers.c +1 -1
- data/ext/nokogiri/xml_attr.c +2 -2
- data/ext/nokogiri/xml_attribute_decl.c +2 -2
- data/ext/nokogiri/xml_cdata.c +32 -18
- data/ext/nokogiri/xml_comment.c +2 -2
- data/ext/nokogiri/xml_document.c +127 -34
- data/ext/nokogiri/xml_document_fragment.c +2 -2
- data/ext/nokogiri/xml_dtd.c +2 -2
- data/ext/nokogiri/xml_element_content.c +34 -31
- data/ext/nokogiri/xml_element_decl.c +7 -7
- data/ext/nokogiri/xml_encoding_handler.c +15 -7
- data/ext/nokogiri/xml_entity_decl.c +1 -1
- data/ext/nokogiri/xml_entity_reference.c +2 -2
- data/ext/nokogiri/xml_namespace.c +79 -14
- data/ext/nokogiri/xml_node.c +300 -34
- data/ext/nokogiri/xml_node_set.c +125 -107
- data/ext/nokogiri/xml_processing_instruction.c +2 -2
- data/ext/nokogiri/xml_reader.c +81 -48
- data/ext/nokogiri/xml_relax_ng.c +66 -81
- data/ext/nokogiri/xml_sax_parser.c +45 -20
- data/ext/nokogiri/xml_sax_parser_context.c +46 -30
- data/ext/nokogiri/xml_sax_push_parser.c +30 -11
- data/ext/nokogiri/xml_schema.c +95 -117
- data/ext/nokogiri/xml_syntax_error.c +1 -1
- data/ext/nokogiri/xml_text.c +28 -14
- data/ext/nokogiri/xml_xpath_context.c +216 -136
- data/ext/nokogiri/xslt_stylesheet.c +118 -64
- data/gumbo-parser/Makefile +10 -0
- data/gumbo-parser/src/attribute.h +1 -1
- data/gumbo-parser/src/error.c +10 -6
- data/gumbo-parser/src/error.h +1 -1
- data/gumbo-parser/src/foreign_attrs.c +15 -16
- data/gumbo-parser/src/foreign_attrs.gperf +1 -1
- data/gumbo-parser/src/{gumbo.h → nokogiri_gumbo.h} +1 -0
- data/gumbo-parser/src/parser.c +21 -5
- data/gumbo-parser/src/replacement.h +1 -1
- data/gumbo-parser/src/string_buffer.h +1 -1
- data/gumbo-parser/src/string_piece.c +1 -1
- data/gumbo-parser/src/svg_attrs.c +2 -2
- data/gumbo-parser/src/svg_tags.c +2 -2
- data/gumbo-parser/src/tag.c +2 -1
- data/gumbo-parser/src/tag_lookup.c +7 -7
- data/gumbo-parser/src/tag_lookup.gperf +1 -0
- data/gumbo-parser/src/tag_lookup.h +1 -1
- data/gumbo-parser/src/token_buffer.h +1 -1
- data/gumbo-parser/src/tokenizer.c +1 -1
- data/gumbo-parser/src/tokenizer.h +1 -1
- data/gumbo-parser/src/utf8.c +1 -1
- data/gumbo-parser/src/utf8.h +1 -1
- data/gumbo-parser/src/util.c +1 -3
- data/gumbo-parser/src/util.h +4 -0
- data/gumbo-parser/src/vector.h +1 -1
- data/lib/nokogiri/css/node.rb +2 -2
- data/lib/nokogiri/css/xpath_visitor.rb +7 -5
- data/lib/nokogiri/css.rb +6 -0
- data/lib/nokogiri/decorators/slop.rb +1 -1
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +4 -3
- data/lib/nokogiri/html4/document.rb +2 -121
- data/lib/nokogiri/html4/document_fragment.rb +1 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +1827 -365
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/html4.rb +1 -0
- data/lib/nokogiri/html5/document.rb +113 -36
- data/lib/nokogiri/html5/document_fragment.rb +10 -3
- data/lib/nokogiri/html5/node.rb +8 -5
- data/lib/nokogiri/html5.rb +130 -216
- data/lib/nokogiri/jruby/dependencies.rb +1 -19
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +11 -10
- data/lib/nokogiri/xml/attr.rb +49 -0
- data/lib/nokogiri/xml/attribute_decl.rb +4 -2
- data/lib/nokogiri/xml/builder.rb +1 -1
- data/lib/nokogiri/xml/document.rb +102 -55
- data/lib/nokogiri/xml/document_fragment.rb +50 -7
- data/lib/nokogiri/xml/element_content.rb +10 -2
- data/lib/nokogiri/xml/element_decl.rb +4 -2
- data/lib/nokogiri/xml/entity_decl.rb +4 -2
- data/lib/nokogiri/xml/namespace.rb +42 -0
- data/lib/nokogiri/xml/node/save_options.rb +14 -4
- data/lib/nokogiri/xml/node.rb +212 -48
- data/lib/nokogiri/xml/node_set.rb +88 -9
- data/lib/nokogiri/xml/parse_options.rb +129 -50
- data/lib/nokogiri/xml/pp/node.rb +28 -15
- data/lib/nokogiri/xml/processing_instruction.rb +2 -1
- data/lib/nokogiri/xml/sax/document.rb +1 -1
- data/lib/nokogiri/xml/sax/parser.rb +2 -3
- data/lib/nokogiri/xml/searchable.rb +18 -10
- data/lib/nokogiri/xslt.rb +74 -4
- data/lib/nokogiri.rb +15 -15
- data/lib/xsd/xmlparser/nokogiri.rb +4 -2
- data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
- data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
- data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
- data/ports/archives/libxml2-2.11.7.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
- metadata +19 -242
- data/patches/libxml2/0004-use-glibc-strlen.patch +0 -53
- data/patches/libxml2/0005-avoid-isnan-isinf.patch +0 -81
- data/patches/libxml2/0006-update-automake-files-for-arm64.patch +0 -3040
- data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +0 -61
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +0 -3037
- data/ports/archives/libxml2-2.9.14.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.35.tar.xz +0 -0
data/ext/nokogiri/nokogiri.h
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
#ifndef NOKOGIRI_NATIVE
|
2
2
|
#define NOKOGIRI_NATIVE
|
3
3
|
|
4
|
+
#include <ruby/defines.h> // https://github.com/sparklemotion/nokogiri/issues/2696
|
5
|
+
|
4
6
|
#ifdef _MSC_VER
|
5
7
|
# ifndef WIN32_LEAN_AND_MEAN
|
6
8
|
# define WIN32_LEAN_AND_MEAN
|
@@ -23,7 +25,6 @@
|
|
23
25
|
# define NOKOPUBVAR extern
|
24
26
|
#endif
|
25
27
|
|
26
|
-
|
27
28
|
#include <stdlib.h>
|
28
29
|
#include <string.h>
|
29
30
|
#include <assert.h>
|
@@ -50,6 +51,7 @@
|
|
50
51
|
#include <libxslt/xsltconfig.h>
|
51
52
|
#include <libxslt/xsltutils.h>
|
52
53
|
#include <libxslt/transform.h>
|
54
|
+
#include <libxslt/imports.h>
|
53
55
|
#include <libxslt/xsltInternals.h>
|
54
56
|
|
55
57
|
#include <libexslt/exslt.h>
|
@@ -75,22 +77,25 @@ xmlNodePtr xmlLastElementChild(xmlNodePtr parent);
|
|
75
77
|
#define NOKOGIRI_STR_NEW(str, len) rb_external_str_new_with_enc((const char *)(str), (long)(len), rb_utf8_encoding())
|
76
78
|
#define RBSTR_OR_QNIL(_str) (_str ? NOKOGIRI_STR_NEW2(_str) : Qnil)
|
77
79
|
|
78
|
-
#
|
79
|
-
#
|
80
|
-
#
|
81
|
-
#else
|
82
|
-
#
|
83
|
-
#
|
80
|
+
#ifndef NORETURN_DECL
|
81
|
+
# if defined(__GNUC__)
|
82
|
+
# define NORETURN_DECL __attribute__ ((noreturn))
|
83
|
+
# else
|
84
|
+
# define NORETURN_DECL
|
85
|
+
# endif
|
84
86
|
#endif
|
85
87
|
|
86
|
-
#ifndef
|
88
|
+
#ifndef PRINTFLIKE_DECL
|
87
89
|
# if defined(__GNUC__)
|
88
|
-
# define
|
90
|
+
# define PRINTFLIKE_DECL(stringidx, argidx) __attribute__ ((format(printf,stringidx,argidx)))
|
89
91
|
# else
|
90
|
-
# define
|
92
|
+
# define PRINTFLIKE_DECL(stringidx, argidx)
|
91
93
|
# endif
|
92
94
|
#endif
|
93
95
|
|
96
|
+
#if defined(TRUFFLERUBY) && !defined(NOKOGIRI_PACKAGED_LIBRARIES)
|
97
|
+
# define TRUFFLERUBY_NOKOGIRI_SYSTEM_LIBRARIES
|
98
|
+
#endif
|
94
99
|
|
95
100
|
NOKOPUBVAR VALUE mNokogiri ;
|
96
101
|
NOKOPUBVAR VALUE mNokogiriGumbo ;
|
@@ -162,15 +167,16 @@ typedef struct _nokogiriXsltStylesheetTuple {
|
|
162
167
|
VALUE func_instances;
|
163
168
|
} nokogiriXsltStylesheetTuple;
|
164
169
|
|
165
|
-
int vasprintf(char **strp, const char *fmt, va_list ap);
|
166
170
|
void noko_xml_document_pin_node(xmlNodePtr);
|
167
171
|
void noko_xml_document_pin_namespace(xmlNsPtr, xmlDocPtr);
|
172
|
+
int noko_xml_document_has_wrapped_blank_nodes_p(xmlDocPtr c_document);
|
168
173
|
|
169
174
|
int noko_io_read(void *ctx, char *buffer, int len);
|
170
175
|
int noko_io_write(void *ctx, char *buffer, int len);
|
171
176
|
int noko_io_close(void *ctx);
|
172
177
|
|
173
178
|
#define Noko_Node_Get_Struct(obj,type,sval) ((sval) = (type*)DATA_PTR(obj))
|
179
|
+
#define Noko_Namespace_Get_Struct(obj,type,sval) ((sval) = (type*)DATA_PTR(obj))
|
174
180
|
|
175
181
|
VALUE noko_xml_node_wrap(VALUE klass, xmlNodePtr node) ;
|
176
182
|
VALUE noko_xml_node_wrap_node_set_result(xmlNodePtr node, VALUE node_set) ;
|
@@ -182,12 +188,21 @@ VALUE noko_xml_namespace_wrap_xpath_copy(xmlNsPtr node);
|
|
182
188
|
VALUE noko_xml_element_content_wrap(VALUE doc, xmlElementContentPtr element);
|
183
189
|
|
184
190
|
VALUE noko_xml_node_set_wrap(xmlNodeSetPtr node_set, VALUE document) ;
|
191
|
+
xmlNodeSetPtr noko_xml_node_set_unwrap(VALUE rb_node_set) ;
|
185
192
|
|
186
193
|
VALUE noko_xml_document_wrap_with_init_args(VALUE klass, xmlDocPtr doc, int argc, VALUE *argv);
|
187
194
|
VALUE noko_xml_document_wrap(VALUE klass, xmlDocPtr doc);
|
195
|
+
xmlDocPtr noko_xml_document_unwrap(VALUE rb_document);
|
188
196
|
NOKOPUBFUN VALUE Nokogiri_wrap_xml_document(VALUE klass,
|
189
197
|
xmlDocPtr doc); /* deprecated. use noko_xml_document_wrap() instead. */
|
190
198
|
|
199
|
+
xmlSAXHandlerPtr noko_sax_handler_unwrap(VALUE rb_sax_handler);
|
200
|
+
|
201
|
+
xmlParserCtxtPtr noko_xml_sax_push_parser_unwrap(VALUE rb_parser);
|
202
|
+
|
203
|
+
VALUE noko_xml_sax_parser_context_wrap(VALUE klass, xmlParserCtxtPtr c_context);
|
204
|
+
xmlParserCtxtPtr noko_xml_sax_parser_context_unwrap(VALUE rb_context);
|
205
|
+
|
191
206
|
#define DOC_RUBY_OBJECT_TEST(x) ((nokogiriTuplePtr)(x->_private))
|
192
207
|
#define DOC_RUBY_OBJECT(x) (((nokogiriTuplePtr)(x->_private))->doc)
|
193
208
|
#define DOC_UNLINKED_NODE_HASH(x) (((nokogiriTuplePtr)(x->_private))->unlinkedNodes)
|
@@ -197,15 +212,15 @@ NOKOPUBFUN VALUE Nokogiri_wrap_xml_document(VALUE klass,
|
|
197
212
|
#define NOKOGIRI_SAX_SELF(_ctxt) ((nokogiriSAXTuplePtr)(_ctxt))->self
|
198
213
|
#define NOKOGIRI_SAX_CTXT(_ctxt) ((nokogiriSAXTuplePtr)(_ctxt))->ctxt
|
199
214
|
#define NOKOGIRI_SAX_TUPLE_NEW(_ctxt, _self) nokogiri_sax_tuple_new(_ctxt, _self)
|
200
|
-
#define NOKOGIRI_SAX_TUPLE_DESTROY(_tuple)
|
215
|
+
#define NOKOGIRI_SAX_TUPLE_DESTROY(_tuple) ruby_xfree(_tuple)
|
201
216
|
|
202
217
|
#define DISCARD_CONST_QUAL(t, v) ((t)(uintptr_t)(v))
|
203
218
|
#define DISCARD_CONST_QUAL_XMLCHAR(v) DISCARD_CONST_QUAL(xmlChar *, v)
|
204
219
|
|
205
220
|
#if HAVE_RB_CATEGORY_WARNING
|
206
|
-
# define NOKO_WARN_DEPRECATION(message) rb_category_warning(RB_WARN_CATEGORY_DEPRECATED, message)
|
221
|
+
# define NOKO_WARN_DEPRECATION(message...) rb_category_warning(RB_WARN_CATEGORY_DEPRECATED, message)
|
207
222
|
#else
|
208
|
-
# define NOKO_WARN_DEPRECATION(message) rb_warning(message)
|
223
|
+
# define NOKO_WARN_DEPRECATION(message...) rb_warning(message)
|
209
224
|
#endif
|
210
225
|
|
211
226
|
void Nokogiri_structured_error_func_save(libxmlStructuredErrorHandlerState *handler_state);
|
@@ -214,7 +229,7 @@ void Nokogiri_structured_error_func_save_and_set(libxmlStructuredErrorHandlerSta
|
|
214
229
|
void Nokogiri_structured_error_func_restore(libxmlStructuredErrorHandlerState *handler_state);
|
215
230
|
VALUE Nokogiri_wrap_xml_syntax_error(xmlErrorPtr error);
|
216
231
|
void Nokogiri_error_array_pusher(void *ctx, xmlErrorPtr error);
|
217
|
-
|
232
|
+
NORETURN_DECL void Nokogiri_error_raise(void *ctx, xmlErrorPtr error);
|
218
233
|
void Nokogiri_marshal_xpath_funcall_and_return_values(xmlXPathParserContextPtr ctx, int nargs, VALUE handler,
|
219
234
|
const char *function_name) ;
|
220
235
|
|
@@ -222,7 +237,7 @@ static inline
|
|
222
237
|
nokogiriSAXTuplePtr
|
223
238
|
nokogiri_sax_tuple_new(xmlParserCtxtPtr ctxt, VALUE self)
|
224
239
|
{
|
225
|
-
nokogiriSAXTuplePtr tuple =
|
240
|
+
nokogiriSAXTuplePtr tuple = ruby_xmalloc(sizeof(nokogiriSAXTuple));
|
226
241
|
tuple->self = self;
|
227
242
|
tuple->ctxt = ctxt;
|
228
243
|
return tuple;
|
@@ -32,7 +32,7 @@ rb_foreign_error_handler(VALUE klass)
|
|
32
32
|
* Do NOT use this outside of the Nokogiri test suite.
|
33
33
|
*/
|
34
34
|
void
|
35
|
-
noko_init_test_global_handlers()
|
35
|
+
noko_init_test_global_handlers(void)
|
36
36
|
{
|
37
37
|
VALUE mNokogiriTest = rb_define_module_under(mNokogiri, "Test");
|
38
38
|
|
data/ext/nokogiri/xml_attr.c
CHANGED
@@ -68,7 +68,7 @@ new (int argc, VALUE *argv, VALUE klass)
|
|
68
68
|
rb_raise(rb_eArgError, "parameter must be a Nokogiri::XML::Document");
|
69
69
|
}
|
70
70
|
|
71
|
-
|
71
|
+
xml_doc = noko_xml_document_unwrap(document);
|
72
72
|
|
73
73
|
node = xmlNewDocProp(
|
74
74
|
xml_doc,
|
@@ -89,7 +89,7 @@ new (int argc, VALUE *argv, VALUE klass)
|
|
89
89
|
}
|
90
90
|
|
91
91
|
void
|
92
|
-
noko_init_xml_attr()
|
92
|
+
noko_init_xml_attr(void)
|
93
93
|
{
|
94
94
|
assert(cNokogiriXmlNode);
|
95
95
|
/*
|
@@ -13,7 +13,7 @@ attribute_type(VALUE self)
|
|
13
13
|
{
|
14
14
|
xmlAttributePtr node;
|
15
15
|
Noko_Node_Get_Struct(self, xmlAttribute, node);
|
16
|
-
return INT2NUM(
|
16
|
+
return INT2NUM(node->atype);
|
17
17
|
}
|
18
18
|
|
19
19
|
/*
|
@@ -59,7 +59,7 @@ enumeration(VALUE self)
|
|
59
59
|
}
|
60
60
|
|
61
61
|
void
|
62
|
-
noko_init_xml_attribute_decl()
|
62
|
+
noko_init_xml_attribute_decl(void)
|
63
63
|
{
|
64
64
|
assert(cNokogiriXmlNode);
|
65
65
|
cNokogiriXmlAttributeDecl = rb_define_class_under(mNokogiriXml, "AttributeDecl", cNokogiriXmlNode);
|
data/ext/nokogiri/xml_cdata.c
CHANGED
@@ -12,31 +12,45 @@ VALUE cNokogiriXmlCData;
|
|
12
12
|
* raise a TypeError exception.
|
13
13
|
*/
|
14
14
|
static VALUE
|
15
|
-
|
15
|
+
rb_xml_cdata_s_new(int argc, VALUE *argv, VALUE klass)
|
16
16
|
{
|
17
|
-
xmlDocPtr
|
18
|
-
xmlNodePtr
|
19
|
-
VALUE
|
20
|
-
VALUE
|
21
|
-
VALUE
|
17
|
+
xmlDocPtr c_document;
|
18
|
+
xmlNodePtr c_node;
|
19
|
+
VALUE rb_document;
|
20
|
+
VALUE rb_content;
|
21
|
+
VALUE rb_rest;
|
22
22
|
VALUE rb_node;
|
23
|
-
xmlChar *
|
24
|
-
int
|
23
|
+
xmlChar *c_content = NULL;
|
24
|
+
int c_content_len = 0;
|
25
25
|
|
26
|
-
rb_scan_args(argc, argv, "2*", &
|
26
|
+
rb_scan_args(argc, argv, "2*", &rb_document, &rb_content, &rb_rest);
|
27
27
|
|
28
|
-
|
28
|
+
if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlNode)) {
|
29
|
+
rb_raise(rb_eTypeError,
|
30
|
+
"expected first parameter to be a Nokogiri::XML::Document, received %"PRIsVALUE,
|
31
|
+
rb_obj_class(rb_document));
|
32
|
+
}
|
33
|
+
|
34
|
+
if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlDocument)) {
|
35
|
+
xmlNodePtr deprecated_node_type_arg;
|
36
|
+
// TODO: deprecate allowing Node
|
37
|
+
NOKO_WARN_DEPRECATION("Passing a Node as the first parameter to CDATA.new is deprecated. Please pass a Document instead. This will become an error in a future release of Nokogiri.");
|
38
|
+
Noko_Node_Get_Struct(rb_document, xmlNode, deprecated_node_type_arg);
|
39
|
+
c_document = deprecated_node_type_arg->doc;
|
40
|
+
} else {
|
41
|
+
c_document = noko_xml_document_unwrap(rb_document);
|
42
|
+
}
|
29
43
|
|
30
|
-
if (!NIL_P(
|
31
|
-
|
32
|
-
|
44
|
+
if (!NIL_P(rb_content)) {
|
45
|
+
c_content = (xmlChar *)StringValuePtr(rb_content);
|
46
|
+
c_content_len = RSTRING_LENINT(rb_content);
|
33
47
|
}
|
34
48
|
|
35
|
-
|
49
|
+
c_node = xmlNewCDataBlock(c_document, c_content, c_content_len);
|
36
50
|
|
37
|
-
noko_xml_document_pin_node(
|
51
|
+
noko_xml_document_pin_node(c_node);
|
38
52
|
|
39
|
-
rb_node = noko_xml_node_wrap(klass,
|
53
|
+
rb_node = noko_xml_node_wrap(klass, c_node);
|
40
54
|
rb_obj_call_init(rb_node, argc, argv);
|
41
55
|
|
42
56
|
if (rb_block_given_p()) { rb_yield(rb_node); }
|
@@ -45,7 +59,7 @@ new (int argc, VALUE *argv, VALUE klass)
|
|
45
59
|
}
|
46
60
|
|
47
61
|
void
|
48
|
-
noko_init_xml_cdata()
|
62
|
+
noko_init_xml_cdata(void)
|
49
63
|
{
|
50
64
|
assert(cNokogiriXmlText);
|
51
65
|
/*
|
@@ -53,5 +67,5 @@ noko_init_xml_cdata()
|
|
53
67
|
*/
|
54
68
|
cNokogiriXmlCData = rb_define_class_under(mNokogiriXml, "CDATA", cNokogiriXmlText);
|
55
69
|
|
56
|
-
rb_define_singleton_method(cNokogiriXmlCData, "new",
|
70
|
+
rb_define_singleton_method(cNokogiriXmlCData, "new", rb_xml_cdata_s_new, -1);
|
57
71
|
}
|
data/ext/nokogiri/xml_comment.c
CHANGED
@@ -30,7 +30,7 @@ new (int argc, VALUE *argv, VALUE klass)
|
|
30
30
|
rb_raise(rb_eArgError, "first argument must be a XML::Document or XML::Node");
|
31
31
|
}
|
32
32
|
|
33
|
-
|
33
|
+
xml_doc = noko_xml_document_unwrap(document);
|
34
34
|
|
35
35
|
node = xmlNewDocComment(
|
36
36
|
xml_doc,
|
@@ -48,7 +48,7 @@ new (int argc, VALUE *argv, VALUE klass)
|
|
48
48
|
}
|
49
49
|
|
50
50
|
void
|
51
|
-
noko_init_xml_comment()
|
51
|
+
noko_init_xml_comment(void)
|
52
52
|
{
|
53
53
|
assert(cNokogiriXmlCharacterData);
|
54
54
|
/*
|
data/ext/nokogiri/xml_document.c
CHANGED
@@ -51,8 +51,9 @@ remove_private(xmlNodePtr node)
|
|
51
51
|
}
|
52
52
|
|
53
53
|
static void
|
54
|
-
mark(
|
54
|
+
mark(void *data)
|
55
55
|
{
|
56
|
+
xmlDocPtr doc = (xmlDocPtr)data;
|
56
57
|
nokogiriTuplePtr tuple = (nokogiriTuplePtr)doc->_private;
|
57
58
|
if (tuple) {
|
58
59
|
rb_gc_mark(tuple->doc);
|
@@ -61,33 +62,83 @@ mark(xmlDocPtr doc)
|
|
61
62
|
}
|
62
63
|
|
63
64
|
static void
|
64
|
-
dealloc(
|
65
|
+
dealloc(void *data)
|
65
66
|
{
|
67
|
+
xmlDocPtr doc = (xmlDocPtr)data;
|
66
68
|
st_table *node_hash;
|
67
69
|
|
68
|
-
NOKOGIRI_DEBUG_START(doc);
|
69
|
-
|
70
70
|
node_hash = DOC_UNLINKED_NODE_HASH(doc);
|
71
71
|
|
72
72
|
st_foreach(node_hash, dealloc_node_i, (st_data_t)doc);
|
73
73
|
st_free_table(node_hash);
|
74
74
|
|
75
|
-
|
75
|
+
ruby_xfree(doc->_private);
|
76
76
|
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
*
|
77
|
+
#pragma GCC diagnostic push
|
78
|
+
#pragma GCC diagnostic ignored "-Wdeprecated-declarations" // xmlDeregisterNodeDefault is deprecated as of libxml2 2.11.0
|
79
|
+
/*
|
80
|
+
* libxml-ruby < 3.0.0 uses xmlDeregisterNodeDefault. If the user is using one of those older
|
81
|
+
* versions, the registered callback from libxml-ruby will access the _private pointers set by
|
82
|
+
* nokogiri, which will result in segfaults.
|
83
|
+
*
|
84
|
+
* To avoid this, we need to clear the _private pointers from all nodes in this document tree
|
85
|
+
* before that callback gets invoked.
|
86
|
+
*
|
87
|
+
* libxml-ruby 3.0.0 was released in 2017-02, so at some point we can probably safely remove this
|
88
|
+
* safeguard (though probably pairing with a runtime check on the libxml-ruby version).
|
81
89
|
*/
|
82
90
|
if (xmlDeregisterNodeDefaultValue) {
|
83
91
|
remove_private((xmlNodePtr)doc);
|
84
92
|
}
|
93
|
+
#pragma GCC diagnostic pop
|
85
94
|
|
86
95
|
xmlFreeDoc(doc);
|
96
|
+
}
|
97
|
+
|
98
|
+
static size_t
|
99
|
+
memsize_node(const xmlNodePtr node)
|
100
|
+
{
|
101
|
+
/* note we don't count namespace definitions, just going for a good-enough number here */
|
102
|
+
xmlNodePtr child;
|
103
|
+
xmlAttrPtr property;
|
104
|
+
size_t memsize = 0;
|
105
|
+
|
106
|
+
memsize += xmlStrlen(node->name);
|
107
|
+
|
108
|
+
if (node->type == XML_ELEMENT_NODE) {
|
109
|
+
for (property = node->properties; property; property = property->next) {
|
110
|
+
memsize += sizeof(xmlAttr) + memsize_node((xmlNodePtr)property);
|
111
|
+
}
|
112
|
+
}
|
113
|
+
if (node->type == XML_TEXT_NODE) {
|
114
|
+
memsize += xmlStrlen(node->content);
|
115
|
+
}
|
116
|
+
for (child = node->children; child; child = child->next) {
|
117
|
+
memsize += sizeof(xmlNode) + memsize_node(child);
|
118
|
+
}
|
119
|
+
return memsize;
|
120
|
+
}
|
87
121
|
|
88
|
-
|
122
|
+
static size_t
|
123
|
+
memsize(const void *data)
|
124
|
+
{
|
125
|
+
xmlDocPtr doc = (const xmlDocPtr)data;
|
126
|
+
size_t memsize = sizeof(xmlDoc);
|
127
|
+
/* This may not account for all memory use */
|
128
|
+
memsize += memsize_node((xmlNodePtr)doc);
|
129
|
+
return memsize;
|
89
130
|
}
|
90
131
|
|
132
|
+
static const rb_data_type_t noko_xml_document_data_type = {
|
133
|
+
.wrap_struct_name = "Nokogiri::XML::Document",
|
134
|
+
.function = {
|
135
|
+
.dmark = mark,
|
136
|
+
.dfree = dealloc,
|
137
|
+
.dsize = memsize,
|
138
|
+
},
|
139
|
+
// .flags = RUBY_TYPED_FREE_IMMEDIATELY, // TODO see https://github.com/sparklemotion/nokogiri/issues/2822
|
140
|
+
};
|
141
|
+
|
91
142
|
static void
|
92
143
|
recursively_remove_namespaces_from_node(xmlNodePtr node)
|
93
144
|
{
|
@@ -104,7 +155,11 @@ recursively_remove_namespaces_from_node(xmlNodePtr node)
|
|
104
155
|
(node->type == XML_XINCLUDE_START) ||
|
105
156
|
(node->type == XML_XINCLUDE_END)) &&
|
106
157
|
node->nsDef) {
|
107
|
-
|
158
|
+
xmlNsPtr curr = node->nsDef;
|
159
|
+
while (curr) {
|
160
|
+
noko_xml_document_pin_namespace(curr, node->doc);
|
161
|
+
curr = curr->next;
|
162
|
+
}
|
108
163
|
node->nsDef = NULL;
|
109
164
|
}
|
110
165
|
|
@@ -126,8 +181,7 @@ recursively_remove_namespaces_from_node(xmlNodePtr node)
|
|
126
181
|
static VALUE
|
127
182
|
url(VALUE self)
|
128
183
|
{
|
129
|
-
xmlDocPtr doc;
|
130
|
-
Data_Get_Struct(self, xmlDoc, doc);
|
184
|
+
xmlDocPtr doc = noko_xml_document_unwrap(self);
|
131
185
|
|
132
186
|
if (doc->URL) { return NOKOGIRI_STR_NEW2(doc->URL); }
|
133
187
|
|
@@ -146,7 +200,7 @@ rb_xml_document_root_set(VALUE self, VALUE rb_new_root)
|
|
146
200
|
xmlDocPtr c_document;
|
147
201
|
xmlNodePtr c_new_root = NULL, c_current_root;
|
148
202
|
|
149
|
-
|
203
|
+
c_document = noko_xml_document_unwrap(self);
|
150
204
|
|
151
205
|
c_current_root = xmlDocGetRootElement(c_document);
|
152
206
|
if (c_current_root) {
|
@@ -190,7 +244,7 @@ rb_xml_document_root(VALUE self)
|
|
190
244
|
xmlDocPtr c_document;
|
191
245
|
xmlNodePtr c_root;
|
192
246
|
|
193
|
-
|
247
|
+
c_document = noko_xml_document_unwrap(self);
|
194
248
|
|
195
249
|
c_root = xmlDocGetRootElement(c_document);
|
196
250
|
if (!c_root) {
|
@@ -209,8 +263,7 @@ rb_xml_document_root(VALUE self)
|
|
209
263
|
static VALUE
|
210
264
|
set_encoding(VALUE self, VALUE encoding)
|
211
265
|
{
|
212
|
-
xmlDocPtr doc;
|
213
|
-
Data_Get_Struct(self, xmlDoc, doc);
|
266
|
+
xmlDocPtr doc = noko_xml_document_unwrap(self);
|
214
267
|
|
215
268
|
if (doc->encoding) {
|
216
269
|
xmlFree(DISCARD_CONST_QUAL_XMLCHAR(doc->encoding));
|
@@ -230,8 +283,7 @@ set_encoding(VALUE self, VALUE encoding)
|
|
230
283
|
static VALUE
|
231
284
|
encoding(VALUE self)
|
232
285
|
{
|
233
|
-
xmlDocPtr doc;
|
234
|
-
Data_Get_Struct(self, xmlDoc, doc);
|
286
|
+
xmlDocPtr doc = noko_xml_document_unwrap(self);
|
235
287
|
|
236
288
|
if (!doc->encoding) { return Qnil; }
|
237
289
|
return NOKOGIRI_STR_NEW2(doc->encoding);
|
@@ -246,8 +298,7 @@ encoding(VALUE self)
|
|
246
298
|
static VALUE
|
247
299
|
version(VALUE self)
|
248
300
|
{
|
249
|
-
xmlDocPtr doc;
|
250
|
-
Data_Get_Struct(self, xmlDoc, doc);
|
301
|
+
xmlDocPtr doc = noko_xml_document_unwrap(self);
|
251
302
|
|
252
303
|
if (!doc->version) { return Qnil; }
|
253
304
|
return NOKOGIRI_STR_NEW2(doc->version);
|
@@ -369,7 +420,7 @@ duplicate_document(int argc, VALUE *argv, VALUE self)
|
|
369
420
|
level = INT2NUM((long)1);
|
370
421
|
}
|
371
422
|
|
372
|
-
|
423
|
+
doc = noko_xml_document_unwrap(self);
|
373
424
|
|
374
425
|
dup = xmlCopyDoc(doc, (int)NUM2INT(level));
|
375
426
|
|
@@ -442,8 +493,7 @@ new (int argc, VALUE *argv, VALUE klass)
|
|
442
493
|
static VALUE
|
443
494
|
remove_namespaces_bang(VALUE self)
|
444
495
|
{
|
445
|
-
xmlDocPtr doc ;
|
446
|
-
Data_Get_Struct(self, xmlDoc, doc);
|
496
|
+
xmlDocPtr doc = noko_xml_document_unwrap(self);
|
447
497
|
|
448
498
|
recursively_remove_namespaces_from_node((xmlNodePtr)doc);
|
449
499
|
return self;
|
@@ -471,7 +521,7 @@ create_entity(int argc, VALUE *argv, VALUE self)
|
|
471
521
|
xmlEntityPtr ptr;
|
472
522
|
xmlDocPtr doc ;
|
473
523
|
|
474
|
-
|
524
|
+
doc = noko_xml_document_unwrap(self);
|
475
525
|
|
476
526
|
rb_scan_args(argc, argv, "14", &name, &type, &external_id, &system_id,
|
477
527
|
&content);
|
@@ -536,6 +586,7 @@ rb_xml_document_canonicalize(int argc, VALUE *argv, VALUE self)
|
|
536
586
|
VALUE rb_mode;
|
537
587
|
VALUE rb_namespaces;
|
538
588
|
VALUE rb_comments_p;
|
589
|
+
int c_mode = 0;
|
539
590
|
xmlChar **c_namespaces;
|
540
591
|
|
541
592
|
xmlDocPtr c_doc;
|
@@ -547,10 +598,18 @@ rb_xml_document_canonicalize(int argc, VALUE *argv, VALUE self)
|
|
547
598
|
VALUE rb_io;
|
548
599
|
|
549
600
|
rb_scan_args(argc, argv, "03", &rb_mode, &rb_namespaces, &rb_comments_p);
|
550
|
-
if (!NIL_P(rb_mode)) {
|
551
|
-
|
601
|
+
if (!NIL_P(rb_mode)) {
|
602
|
+
Check_Type(rb_mode, T_FIXNUM);
|
603
|
+
c_mode = NUM2INT(rb_mode);
|
604
|
+
}
|
605
|
+
if (!NIL_P(rb_namespaces)) {
|
606
|
+
Check_Type(rb_namespaces, T_ARRAY);
|
607
|
+
if (c_mode == XML_C14N_1_0 || c_mode == XML_C14N_1_1) {
|
608
|
+
rb_raise(rb_eRuntimeError, "This canonicalizer does not support this operation");
|
609
|
+
}
|
610
|
+
}
|
552
611
|
|
553
|
-
|
612
|
+
c_doc = noko_xml_document_unwrap(self);
|
554
613
|
|
555
614
|
rb_cStringIO = rb_const_get_at(rb_cObject, rb_intern("StringIO"));
|
556
615
|
rb_io = rb_class_new_instance(0, 0, rb_cStringIO);
|
@@ -569,7 +628,7 @@ rb_xml_document_canonicalize(int argc, VALUE *argv, VALUE self)
|
|
569
628
|
c_namespaces = NULL;
|
570
629
|
} else {
|
571
630
|
long ns_len = RARRAY_LEN(rb_namespaces);
|
572
|
-
c_namespaces =
|
631
|
+
c_namespaces = ruby_xcalloc((size_t)ns_len + 1, sizeof(xmlChar *));
|
573
632
|
for (int j = 0 ; j < ns_len ; j++) {
|
574
633
|
VALUE entry = rb_ary_entry(rb_namespaces, j);
|
575
634
|
c_namespaces[j] = (xmlChar *)StringValueCStr(entry);
|
@@ -577,12 +636,12 @@ rb_xml_document_canonicalize(int argc, VALUE *argv, VALUE self)
|
|
577
636
|
}
|
578
637
|
|
579
638
|
xmlC14NExecute(c_doc, c_callback_wrapper, rb_callback,
|
580
|
-
|
639
|
+
c_mode,
|
581
640
|
c_namespaces,
|
582
641
|
(int)RTEST(rb_comments_p),
|
583
642
|
c_obuf);
|
584
643
|
|
585
|
-
|
644
|
+
ruby_xfree(c_namespaces);
|
586
645
|
xmlOutputBufferClose(c_obuf);
|
587
646
|
|
588
647
|
return rb_funcall(rb_io, rb_intern("string"), 0);
|
@@ -598,9 +657,9 @@ noko_xml_document_wrap_with_init_args(VALUE klass, xmlDocPtr c_document, int arg
|
|
598
657
|
klass = cNokogiriXmlDocument;
|
599
658
|
}
|
600
659
|
|
601
|
-
rb_document =
|
660
|
+
rb_document = TypedData_Wrap_Struct(klass, &noko_xml_document_data_type, c_document);
|
602
661
|
|
603
|
-
tuple = (nokogiriTuplePtr)
|
662
|
+
tuple = (nokogiriTuplePtr)ruby_xmalloc(sizeof(nokogiriTuple));
|
604
663
|
tuple->doc = rb_document;
|
605
664
|
tuple->unlinkedNodes = st_init_numtable_with_size(128);
|
606
665
|
tuple->node_cache = rb_ary_new();
|
@@ -631,6 +690,40 @@ noko_xml_document_wrap(VALUE klass, xmlDocPtr doc)
|
|
631
690
|
return noko_xml_document_wrap_with_init_args(klass, doc, 0, NULL);
|
632
691
|
}
|
633
692
|
|
693
|
+
xmlDocPtr
|
694
|
+
noko_xml_document_unwrap(VALUE rb_document)
|
695
|
+
{
|
696
|
+
xmlDocPtr c_document;
|
697
|
+
TypedData_Get_Struct(rb_document, xmlDoc, &noko_xml_document_data_type, c_document);
|
698
|
+
return c_document;
|
699
|
+
}
|
700
|
+
|
701
|
+
/* Schema creation will remove and deallocate "blank" nodes.
|
702
|
+
* If those blank nodes have been exposed to Ruby, they could get freed
|
703
|
+
* out from under the VALUE pointer. This function checks to see if any of
|
704
|
+
* those nodes have been exposed to Ruby, and if so we should raise an exception.
|
705
|
+
*/
|
706
|
+
int
|
707
|
+
noko_xml_document_has_wrapped_blank_nodes_p(xmlDocPtr c_document)
|
708
|
+
{
|
709
|
+
VALUE cache = DOC_NODE_CACHE(c_document);
|
710
|
+
|
711
|
+
if (NIL_P(cache)) {
|
712
|
+
return 0;
|
713
|
+
}
|
714
|
+
|
715
|
+
for (long jnode = 0; jnode < RARRAY_LEN(cache); jnode++) {
|
716
|
+
xmlNodePtr node;
|
717
|
+
VALUE element = rb_ary_entry(cache, jnode);
|
718
|
+
|
719
|
+
Noko_Node_Get_Struct(element, xmlNode, node);
|
720
|
+
if (xmlIsBlankNode(node)) {
|
721
|
+
return 1;
|
722
|
+
}
|
723
|
+
}
|
724
|
+
|
725
|
+
return 0;
|
726
|
+
}
|
634
727
|
|
635
728
|
void
|
636
729
|
noko_xml_document_pin_node(xmlNodePtr node)
|
@@ -655,7 +748,7 @@ noko_xml_document_pin_namespace(xmlNsPtr ns, xmlDocPtr doc)
|
|
655
748
|
|
656
749
|
|
657
750
|
void
|
658
|
-
noko_init_xml_document()
|
751
|
+
noko_init_xml_document(void)
|
659
752
|
{
|
660
753
|
assert(cNokogiriXmlNode);
|
661
754
|
/*
|
@@ -19,7 +19,7 @@ new (int argc, VALUE *argv, VALUE klass)
|
|
19
19
|
|
20
20
|
rb_scan_args(argc, argv, "1*", &document, &rest);
|
21
21
|
|
22
|
-
|
22
|
+
xml_doc = noko_xml_document_unwrap(document);
|
23
23
|
|
24
24
|
node = xmlNewDocFragment(xml_doc->doc);
|
25
25
|
|
@@ -32,7 +32,7 @@ new (int argc, VALUE *argv, VALUE klass)
|
|
32
32
|
}
|
33
33
|
|
34
34
|
void
|
35
|
-
noko_init_xml_document_fragment()
|
35
|
+
noko_init_xml_document_fragment(void)
|
36
36
|
{
|
37
37
|
assert(cNokogiriXmlNode);
|
38
38
|
/*
|
data/ext/nokogiri/xml_dtd.c
CHANGED
@@ -139,7 +139,7 @@ validate(VALUE self, VALUE document)
|
|
139
139
|
VALUE error_list;
|
140
140
|
|
141
141
|
Noko_Node_Get_Struct(self, xmlDtd, dtd);
|
142
|
-
|
142
|
+
doc = noko_xml_document_unwrap(document);
|
143
143
|
error_list = rb_ary_new();
|
144
144
|
|
145
145
|
ctxt = xmlNewValidCtxt();
|
@@ -190,7 +190,7 @@ external_id(VALUE self)
|
|
190
190
|
}
|
191
191
|
|
192
192
|
void
|
193
|
-
noko_init_xml_dtd()
|
193
|
+
noko_init_xml_dtd(void)
|
194
194
|
{
|
195
195
|
assert(cNokogiriXmlNode);
|
196
196
|
/*
|