nokogiri 1.13.6 → 1.16.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +43 -0
- data/LICENSE-DEPENDENCIES.md +830 -509
- data/LICENSE.md +1 -1
- data/README.md +21 -11
- data/dependencies.yml +34 -15
- data/ext/nokogiri/extconf.rb +167 -48
- data/ext/nokogiri/gumbo.c +21 -11
- data/ext/nokogiri/html4_document.c +3 -4
- data/ext/nokogiri/html4_element_description.c +20 -15
- data/ext/nokogiri/html4_entity_lookup.c +2 -2
- data/ext/nokogiri/html4_sax_parser_context.c +11 -22
- data/ext/nokogiri/html4_sax_push_parser.c +4 -4
- data/ext/nokogiri/nokogiri.c +84 -75
- data/ext/nokogiri/nokogiri.h +46 -16
- data/ext/nokogiri/test_global_handlers.c +2 -2
- data/ext/nokogiri/xml_attr.c +3 -3
- data/ext/nokogiri/xml_attribute_decl.c +5 -5
- data/ext/nokogiri/xml_cdata.c +31 -18
- data/ext/nokogiri/xml_comment.c +2 -2
- data/ext/nokogiri/xml_document.c +135 -38
- data/ext/nokogiri/xml_document_fragment.c +2 -2
- data/ext/nokogiri/xml_dtd.c +9 -9
- data/ext/nokogiri/xml_element_content.c +34 -31
- data/ext/nokogiri/xml_element_decl.c +10 -10
- data/ext/nokogiri/xml_encoding_handler.c +15 -7
- data/ext/nokogiri/xml_entity_decl.c +6 -6
- data/ext/nokogiri/xml_entity_reference.c +2 -2
- data/ext/nokogiri/xml_namespace.c +75 -14
- data/ext/nokogiri/xml_node.c +365 -87
- data/ext/nokogiri/xml_node_set.c +129 -111
- data/ext/nokogiri/xml_processing_instruction.c +2 -2
- data/ext/nokogiri/xml_reader.c +126 -64
- data/ext/nokogiri/xml_relax_ng.c +67 -82
- data/ext/nokogiri/xml_sax_parser.c +45 -20
- data/ext/nokogiri/xml_sax_parser_context.c +50 -30
- data/ext/nokogiri/xml_sax_push_parser.c +31 -12
- data/ext/nokogiri/xml_schema.c +95 -118
- data/ext/nokogiri/xml_syntax_error.c +4 -4
- data/ext/nokogiri/xml_text.c +27 -14
- data/ext/nokogiri/xml_xpath_context.c +213 -136
- data/ext/nokogiri/xslt_stylesheet.c +126 -67
- data/gumbo-parser/Makefile +28 -0
- data/gumbo-parser/src/attribute.h +1 -1
- data/gumbo-parser/src/error.c +10 -6
- data/gumbo-parser/src/error.h +1 -1
- data/gumbo-parser/src/foreign_attrs.c +15 -16
- data/gumbo-parser/src/foreign_attrs.gperf +1 -1
- data/gumbo-parser/src/{gumbo.h → nokogiri_gumbo.h} +1 -0
- data/gumbo-parser/src/parser.c +29 -10
- data/gumbo-parser/src/replacement.h +1 -1
- data/gumbo-parser/src/string_buffer.h +1 -1
- data/gumbo-parser/src/string_piece.c +1 -1
- data/gumbo-parser/src/svg_attrs.c +2 -2
- data/gumbo-parser/src/svg_tags.c +2 -2
- data/gumbo-parser/src/tag.c +2 -1
- data/gumbo-parser/src/tag_lookup.c +7 -7
- data/gumbo-parser/src/tag_lookup.gperf +1 -0
- data/gumbo-parser/src/tag_lookup.h +1 -1
- data/gumbo-parser/src/token_buffer.h +1 -1
- data/gumbo-parser/src/tokenizer.c +2 -1
- data/gumbo-parser/src/tokenizer.h +1 -1
- data/gumbo-parser/src/utf8.c +1 -1
- data/gumbo-parser/src/utf8.h +1 -1
- data/gumbo-parser/src/util.c +1 -3
- data/gumbo-parser/src/util.h +4 -0
- data/gumbo-parser/src/vector.h +1 -1
- data/lib/nokogiri/css/node.rb +2 -2
- data/lib/nokogiri/css/parser_extras.rb +1 -1
- data/lib/nokogiri/css/xpath_visitor.rb +8 -26
- data/lib/nokogiri/css.rb +6 -0
- data/lib/nokogiri/decorators/slop.rb +1 -1
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +4 -3
- data/lib/nokogiri/html4/document.rb +3 -122
- data/lib/nokogiri/html4/document_fragment.rb +1 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +1827 -365
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/html4.rb +1 -0
- data/lib/nokogiri/html5/document.rb +113 -36
- data/lib/nokogiri/html5/document_fragment.rb +10 -3
- data/lib/nokogiri/html5/node.rb +8 -5
- data/lib/nokogiri/html5.rb +74 -226
- data/lib/nokogiri/jruby/dependencies.rb +1 -19
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +16 -14
- data/lib/nokogiri/xml/attr.rb +49 -0
- data/lib/nokogiri/xml/attribute_decl.rb +4 -2
- data/lib/nokogiri/xml/builder.rb +1 -1
- data/lib/nokogiri/xml/document.rb +103 -56
- data/lib/nokogiri/xml/document_fragment.rb +50 -7
- data/lib/nokogiri/xml/element_content.rb +10 -2
- data/lib/nokogiri/xml/element_decl.rb +4 -2
- data/lib/nokogiri/xml/entity_decl.rb +4 -2
- data/lib/nokogiri/xml/namespace.rb +41 -0
- data/lib/nokogiri/xml/node/save_options.rb +14 -4
- data/lib/nokogiri/xml/node.rb +241 -70
- data/lib/nokogiri/xml/node_set.rb +90 -11
- data/lib/nokogiri/xml/parse_options.rb +129 -50
- data/lib/nokogiri/xml/pp/node.rb +28 -15
- data/lib/nokogiri/xml/processing_instruction.rb +2 -1
- data/lib/nokogiri/xml/reader.rb +16 -17
- data/lib/nokogiri/xml/sax/document.rb +1 -1
- data/lib/nokogiri/xml/sax/parser.rb +2 -3
- data/lib/nokogiri/xml/searchable.rb +21 -13
- data/lib/nokogiri/xml/syntax_error.rb +1 -1
- data/lib/nokogiri/xml.rb +1 -1
- data/lib/nokogiri/xslt/stylesheet.rb +29 -7
- data/lib/nokogiri/xslt.rb +75 -5
- data/lib/nokogiri.rb +15 -15
- data/lib/xsd/xmlparser/nokogiri.rb +4 -2
- data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
- data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
- data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
- data/ports/archives/libxml2-2.12.9.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
- metadata +21 -248
- data/patches/libxml2/0004-use-glibc-strlen.patch +0 -53
- data/patches/libxml2/0005-avoid-isnan-isinf.patch +0 -81
- data/patches/libxml2/0006-update-automake-files-for-arm64.patch +0 -3040
- data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +0 -61
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +0 -3037
- data/ports/archives/libxml2-2.9.14.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.35.tar.xz +0 -0
data/ext/nokogiri/nokogiri.h
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
#ifndef NOKOGIRI_NATIVE
|
2
2
|
#define NOKOGIRI_NATIVE
|
3
3
|
|
4
|
+
#include <ruby/defines.h> // https://github.com/sparklemotion/nokogiri/issues/2696
|
5
|
+
|
4
6
|
#ifdef _MSC_VER
|
5
7
|
# ifndef WIN32_LEAN_AND_MEAN
|
6
8
|
# define WIN32_LEAN_AND_MEAN
|
@@ -23,7 +25,6 @@
|
|
23
25
|
# define NOKOPUBVAR extern
|
24
26
|
#endif
|
25
27
|
|
26
|
-
|
27
28
|
#include <stdlib.h>
|
28
29
|
#include <string.h>
|
29
30
|
#include <assert.h>
|
@@ -50,6 +51,7 @@
|
|
50
51
|
#include <libxslt/xsltconfig.h>
|
51
52
|
#include <libxslt/xsltutils.h>
|
52
53
|
#include <libxslt/transform.h>
|
54
|
+
#include <libxslt/imports.h>
|
53
55
|
#include <libxslt/xsltInternals.h>
|
54
56
|
|
55
57
|
#include <libexslt/exslt.h>
|
@@ -64,6 +66,13 @@ xmlNodePtr xmlLastElementChild(xmlNodePtr parent);
|
|
64
66
|
#define XMLNS_PREFIX "xmlns"
|
65
67
|
#define XMLNS_PREFIX_LEN 6 /* including either colon or \0 */
|
66
68
|
|
69
|
+
#ifndef xmlErrorConstPtr
|
70
|
+
# if LIBXML_VERSION >= 21200
|
71
|
+
# define xmlErrorConstPtr const xmlError *
|
72
|
+
# else
|
73
|
+
# define xmlErrorConstPtr xmlError *
|
74
|
+
# endif
|
75
|
+
#endif
|
67
76
|
|
68
77
|
#include <ruby.h>
|
69
78
|
#include <ruby/st.h>
|
@@ -75,22 +84,25 @@ xmlNodePtr xmlLastElementChild(xmlNodePtr parent);
|
|
75
84
|
#define NOKOGIRI_STR_NEW(str, len) rb_external_str_new_with_enc((const char *)(str), (long)(len), rb_utf8_encoding())
|
76
85
|
#define RBSTR_OR_QNIL(_str) (_str ? NOKOGIRI_STR_NEW2(_str) : Qnil)
|
77
86
|
|
78
|
-
#
|
79
|
-
#
|
80
|
-
#
|
81
|
-
#else
|
82
|
-
#
|
83
|
-
#
|
87
|
+
#ifndef NORETURN_DECL
|
88
|
+
# if defined(__GNUC__)
|
89
|
+
# define NORETURN_DECL __attribute__ ((noreturn))
|
90
|
+
# else
|
91
|
+
# define NORETURN_DECL
|
92
|
+
# endif
|
84
93
|
#endif
|
85
94
|
|
86
|
-
#ifndef
|
95
|
+
#ifndef PRINTFLIKE_DECL
|
87
96
|
# if defined(__GNUC__)
|
88
|
-
# define
|
97
|
+
# define PRINTFLIKE_DECL(stringidx, argidx) __attribute__ ((format(printf,stringidx,argidx)))
|
89
98
|
# else
|
90
|
-
# define
|
99
|
+
# define PRINTFLIKE_DECL(stringidx, argidx)
|
91
100
|
# endif
|
92
101
|
#endif
|
93
102
|
|
103
|
+
#if defined(TRUFFLERUBY) && !defined(NOKOGIRI_PACKAGED_LIBRARIES)
|
104
|
+
# define TRUFFLERUBY_NOKOGIRI_SYSTEM_LIBRARIES
|
105
|
+
#endif
|
94
106
|
|
95
107
|
NOKOPUBVAR VALUE mNokogiri ;
|
96
108
|
NOKOPUBVAR VALUE mNokogiriGumbo ;
|
@@ -162,14 +174,17 @@ typedef struct _nokogiriXsltStylesheetTuple {
|
|
162
174
|
VALUE func_instances;
|
163
175
|
} nokogiriXsltStylesheetTuple;
|
164
176
|
|
165
|
-
int vasprintf(char **strp, const char *fmt, va_list ap);
|
166
177
|
void noko_xml_document_pin_node(xmlNodePtr);
|
167
178
|
void noko_xml_document_pin_namespace(xmlNsPtr, xmlDocPtr);
|
179
|
+
int noko_xml_document_has_wrapped_blank_nodes_p(xmlDocPtr c_document);
|
168
180
|
|
169
181
|
int noko_io_read(void *ctx, char *buffer, int len);
|
170
182
|
int noko_io_write(void *ctx, char *buffer, int len);
|
171
183
|
int noko_io_close(void *ctx);
|
172
184
|
|
185
|
+
#define Noko_Node_Get_Struct(obj,type,sval) ((sval) = (type*)DATA_PTR(obj))
|
186
|
+
#define Noko_Namespace_Get_Struct(obj,type,sval) ((sval) = (type*)DATA_PTR(obj))
|
187
|
+
|
173
188
|
VALUE noko_xml_node_wrap(VALUE klass, xmlNodePtr node) ;
|
174
189
|
VALUE noko_xml_node_wrap_node_set_result(xmlNodePtr node, VALUE node_set) ;
|
175
190
|
VALUE noko_xml_node_attrs(xmlNodePtr node) ;
|
@@ -180,12 +195,21 @@ VALUE noko_xml_namespace_wrap_xpath_copy(xmlNsPtr node);
|
|
180
195
|
VALUE noko_xml_element_content_wrap(VALUE doc, xmlElementContentPtr element);
|
181
196
|
|
182
197
|
VALUE noko_xml_node_set_wrap(xmlNodeSetPtr node_set, VALUE document) ;
|
198
|
+
xmlNodeSetPtr noko_xml_node_set_unwrap(VALUE rb_node_set) ;
|
183
199
|
|
184
200
|
VALUE noko_xml_document_wrap_with_init_args(VALUE klass, xmlDocPtr doc, int argc, VALUE *argv);
|
185
201
|
VALUE noko_xml_document_wrap(VALUE klass, xmlDocPtr doc);
|
202
|
+
xmlDocPtr noko_xml_document_unwrap(VALUE rb_document);
|
186
203
|
NOKOPUBFUN VALUE Nokogiri_wrap_xml_document(VALUE klass,
|
187
204
|
xmlDocPtr doc); /* deprecated. use noko_xml_document_wrap() instead. */
|
188
205
|
|
206
|
+
xmlSAXHandlerPtr noko_sax_handler_unwrap(VALUE rb_sax_handler);
|
207
|
+
|
208
|
+
xmlParserCtxtPtr noko_xml_sax_push_parser_unwrap(VALUE rb_parser);
|
209
|
+
|
210
|
+
VALUE noko_xml_sax_parser_context_wrap(VALUE klass, xmlParserCtxtPtr c_context);
|
211
|
+
xmlParserCtxtPtr noko_xml_sax_parser_context_unwrap(VALUE rb_context);
|
212
|
+
|
189
213
|
#define DOC_RUBY_OBJECT_TEST(x) ((nokogiriTuplePtr)(x->_private))
|
190
214
|
#define DOC_RUBY_OBJECT(x) (((nokogiriTuplePtr)(x->_private))->doc)
|
191
215
|
#define DOC_UNLINKED_NODE_HASH(x) (((nokogiriTuplePtr)(x->_private))->unlinkedNodes)
|
@@ -195,18 +219,24 @@ NOKOPUBFUN VALUE Nokogiri_wrap_xml_document(VALUE klass,
|
|
195
219
|
#define NOKOGIRI_SAX_SELF(_ctxt) ((nokogiriSAXTuplePtr)(_ctxt))->self
|
196
220
|
#define NOKOGIRI_SAX_CTXT(_ctxt) ((nokogiriSAXTuplePtr)(_ctxt))->ctxt
|
197
221
|
#define NOKOGIRI_SAX_TUPLE_NEW(_ctxt, _self) nokogiri_sax_tuple_new(_ctxt, _self)
|
198
|
-
#define NOKOGIRI_SAX_TUPLE_DESTROY(_tuple)
|
222
|
+
#define NOKOGIRI_SAX_TUPLE_DESTROY(_tuple) ruby_xfree(_tuple)
|
199
223
|
|
200
224
|
#define DISCARD_CONST_QUAL(t, v) ((t)(uintptr_t)(v))
|
201
225
|
#define DISCARD_CONST_QUAL_XMLCHAR(v) DISCARD_CONST_QUAL(xmlChar *, v)
|
202
226
|
|
227
|
+
#if HAVE_RB_CATEGORY_WARNING
|
228
|
+
# define NOKO_WARN_DEPRECATION(message...) rb_category_warning(RB_WARN_CATEGORY_DEPRECATED, message)
|
229
|
+
#else
|
230
|
+
# define NOKO_WARN_DEPRECATION(message...) rb_warning(message)
|
231
|
+
#endif
|
232
|
+
|
203
233
|
void Nokogiri_structured_error_func_save(libxmlStructuredErrorHandlerState *handler_state);
|
204
234
|
void Nokogiri_structured_error_func_save_and_set(libxmlStructuredErrorHandlerState *handler_state, void *user_data,
|
205
235
|
xmlStructuredErrorFunc handler);
|
206
236
|
void Nokogiri_structured_error_func_restore(libxmlStructuredErrorHandlerState *handler_state);
|
207
|
-
VALUE Nokogiri_wrap_xml_syntax_error(
|
208
|
-
void Nokogiri_error_array_pusher(void *ctx,
|
209
|
-
|
237
|
+
VALUE Nokogiri_wrap_xml_syntax_error(xmlErrorConstPtr error);
|
238
|
+
void Nokogiri_error_array_pusher(void *ctx, xmlErrorConstPtr error);
|
239
|
+
NORETURN_DECL void Nokogiri_error_raise(void *ctx, xmlErrorConstPtr error);
|
210
240
|
void Nokogiri_marshal_xpath_funcall_and_return_values(xmlXPathParserContextPtr ctx, int nargs, VALUE handler,
|
211
241
|
const char *function_name) ;
|
212
242
|
|
@@ -214,7 +244,7 @@ static inline
|
|
214
244
|
nokogiriSAXTuplePtr
|
215
245
|
nokogiri_sax_tuple_new(xmlParserCtxtPtr ctxt, VALUE self)
|
216
246
|
{
|
217
|
-
nokogiriSAXTuplePtr tuple =
|
247
|
+
nokogiriSAXTuplePtr tuple = ruby_xmalloc(sizeof(nokogiriSAXTuple));
|
218
248
|
tuple->self = self;
|
219
249
|
tuple->ctxt = ctxt;
|
220
250
|
return tuple;
|
@@ -3,7 +3,7 @@
|
|
3
3
|
static VALUE foreign_error_handler_block = Qnil;
|
4
4
|
|
5
5
|
static void
|
6
|
-
foreign_error_handler(void *user_data,
|
6
|
+
foreign_error_handler(void *user_data, xmlErrorConstPtr c_error)
|
7
7
|
{
|
8
8
|
rb_funcall(foreign_error_handler_block, rb_intern("call"), 0);
|
9
9
|
}
|
@@ -32,7 +32,7 @@ rb_foreign_error_handler(VALUE klass)
|
|
32
32
|
* Do NOT use this outside of the Nokogiri test suite.
|
33
33
|
*/
|
34
34
|
void
|
35
|
-
noko_init_test_global_handlers()
|
35
|
+
noko_init_test_global_handlers(void)
|
36
36
|
{
|
37
37
|
VALUE mNokogiriTest = rb_define_module_under(mNokogiri, "Test");
|
38
38
|
|
data/ext/nokogiri/xml_attr.c
CHANGED
@@ -16,7 +16,7 @@ set_value(VALUE self, VALUE content)
|
|
16
16
|
xmlChar *value;
|
17
17
|
xmlNode *cur;
|
18
18
|
|
19
|
-
|
19
|
+
Noko_Node_Get_Struct(self, xmlAttr, attr);
|
20
20
|
|
21
21
|
if (attr->children) {
|
22
22
|
xmlFreeNodeList(attr->children);
|
@@ -68,7 +68,7 @@ new (int argc, VALUE *argv, VALUE klass)
|
|
68
68
|
rb_raise(rb_eArgError, "parameter must be a Nokogiri::XML::Document");
|
69
69
|
}
|
70
70
|
|
71
|
-
|
71
|
+
xml_doc = noko_xml_document_unwrap(document);
|
72
72
|
|
73
73
|
node = xmlNewDocProp(
|
74
74
|
xml_doc,
|
@@ -89,7 +89,7 @@ new (int argc, VALUE *argv, VALUE klass)
|
|
89
89
|
}
|
90
90
|
|
91
91
|
void
|
92
|
-
noko_init_xml_attr()
|
92
|
+
noko_init_xml_attr(void)
|
93
93
|
{
|
94
94
|
assert(cNokogiriXmlNode);
|
95
95
|
/*
|
@@ -12,8 +12,8 @@ static VALUE
|
|
12
12
|
attribute_type(VALUE self)
|
13
13
|
{
|
14
14
|
xmlAttributePtr node;
|
15
|
-
|
16
|
-
return INT2NUM(
|
15
|
+
Noko_Node_Get_Struct(self, xmlAttribute, node);
|
16
|
+
return INT2NUM(node->atype);
|
17
17
|
}
|
18
18
|
|
19
19
|
/*
|
@@ -26,7 +26,7 @@ static VALUE
|
|
26
26
|
default_value(VALUE self)
|
27
27
|
{
|
28
28
|
xmlAttributePtr node;
|
29
|
-
|
29
|
+
Noko_Node_Get_Struct(self, xmlAttribute, node);
|
30
30
|
|
31
31
|
if (node->defaultValue) { return NOKOGIRI_STR_NEW2(node->defaultValue); }
|
32
32
|
return Qnil;
|
@@ -45,7 +45,7 @@ enumeration(VALUE self)
|
|
45
45
|
xmlEnumerationPtr enm;
|
46
46
|
VALUE list;
|
47
47
|
|
48
|
-
|
48
|
+
Noko_Node_Get_Struct(self, xmlAttribute, node);
|
49
49
|
|
50
50
|
list = rb_ary_new();
|
51
51
|
enm = node->tree;
|
@@ -59,7 +59,7 @@ enumeration(VALUE self)
|
|
59
59
|
}
|
60
60
|
|
61
61
|
void
|
62
|
-
noko_init_xml_attribute_decl()
|
62
|
+
noko_init_xml_attribute_decl(void)
|
63
63
|
{
|
64
64
|
assert(cNokogiriXmlNode);
|
65
65
|
cNokogiriXmlAttributeDecl = rb_define_class_under(mNokogiriXml, "AttributeDecl", cNokogiriXmlNode);
|
data/ext/nokogiri/xml_cdata.c
CHANGED
@@ -12,31 +12,44 @@ VALUE cNokogiriXmlCData;
|
|
12
12
|
* raise a TypeError exception.
|
13
13
|
*/
|
14
14
|
static VALUE
|
15
|
-
|
15
|
+
rb_xml_cdata_s_new(int argc, VALUE *argv, VALUE klass)
|
16
16
|
{
|
17
|
-
xmlDocPtr
|
18
|
-
xmlNodePtr
|
19
|
-
VALUE
|
20
|
-
VALUE
|
21
|
-
VALUE
|
17
|
+
xmlDocPtr c_document;
|
18
|
+
xmlNodePtr c_node;
|
19
|
+
VALUE rb_document;
|
20
|
+
VALUE rb_content;
|
21
|
+
VALUE rb_rest;
|
22
22
|
VALUE rb_node;
|
23
|
-
xmlChar *
|
24
|
-
int
|
23
|
+
xmlChar *c_content = NULL;
|
24
|
+
int c_content_len = 0;
|
25
25
|
|
26
|
-
rb_scan_args(argc, argv, "2*", &
|
26
|
+
rb_scan_args(argc, argv, "2*", &rb_document, &rb_content, &rb_rest);
|
27
27
|
|
28
|
-
|
28
|
+
if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlNode)) {
|
29
|
+
rb_raise(rb_eTypeError,
|
30
|
+
"expected first parameter to be a Nokogiri::XML::Document, received %"PRIsVALUE,
|
31
|
+
rb_obj_class(rb_document));
|
32
|
+
}
|
33
|
+
|
34
|
+
if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlDocument)) {
|
35
|
+
xmlNodePtr deprecated_node_type_arg;
|
36
|
+
NOKO_WARN_DEPRECATION("Passing a Node as the first parameter to CDATA.new is deprecated. Please pass a Document instead. This will become an error in Nokogiri v1.17.0."); // TODO: deprecated in v1.15.3, remove in v1.17.0
|
37
|
+
Noko_Node_Get_Struct(rb_document, xmlNode, deprecated_node_type_arg);
|
38
|
+
c_document = deprecated_node_type_arg->doc;
|
39
|
+
} else {
|
40
|
+
c_document = noko_xml_document_unwrap(rb_document);
|
41
|
+
}
|
29
42
|
|
30
|
-
if (!NIL_P(
|
31
|
-
|
32
|
-
|
43
|
+
if (!NIL_P(rb_content)) {
|
44
|
+
c_content = (xmlChar *)StringValuePtr(rb_content);
|
45
|
+
c_content_len = RSTRING_LENINT(rb_content);
|
33
46
|
}
|
34
47
|
|
35
|
-
|
48
|
+
c_node = xmlNewCDataBlock(c_document, c_content, c_content_len);
|
36
49
|
|
37
|
-
noko_xml_document_pin_node(
|
50
|
+
noko_xml_document_pin_node(c_node);
|
38
51
|
|
39
|
-
rb_node = noko_xml_node_wrap(klass,
|
52
|
+
rb_node = noko_xml_node_wrap(klass, c_node);
|
40
53
|
rb_obj_call_init(rb_node, argc, argv);
|
41
54
|
|
42
55
|
if (rb_block_given_p()) { rb_yield(rb_node); }
|
@@ -45,7 +58,7 @@ new (int argc, VALUE *argv, VALUE klass)
|
|
45
58
|
}
|
46
59
|
|
47
60
|
void
|
48
|
-
noko_init_xml_cdata()
|
61
|
+
noko_init_xml_cdata(void)
|
49
62
|
{
|
50
63
|
assert(cNokogiriXmlText);
|
51
64
|
/*
|
@@ -53,5 +66,5 @@ noko_init_xml_cdata()
|
|
53
66
|
*/
|
54
67
|
cNokogiriXmlCData = rb_define_class_under(mNokogiriXml, "CDATA", cNokogiriXmlText);
|
55
68
|
|
56
|
-
rb_define_singleton_method(cNokogiriXmlCData, "new",
|
69
|
+
rb_define_singleton_method(cNokogiriXmlCData, "new", rb_xml_cdata_s_new, -1);
|
57
70
|
}
|
data/ext/nokogiri/xml_comment.c
CHANGED
@@ -30,7 +30,7 @@ new (int argc, VALUE *argv, VALUE klass)
|
|
30
30
|
rb_raise(rb_eArgError, "first argument must be a XML::Document or XML::Node");
|
31
31
|
}
|
32
32
|
|
33
|
-
|
33
|
+
xml_doc = noko_xml_document_unwrap(document);
|
34
34
|
|
35
35
|
node = xmlNewDocComment(
|
36
36
|
xml_doc,
|
@@ -48,7 +48,7 @@ new (int argc, VALUE *argv, VALUE klass)
|
|
48
48
|
}
|
49
49
|
|
50
50
|
void
|
51
|
-
noko_init_xml_comment()
|
51
|
+
noko_init_xml_comment(void)
|
52
52
|
{
|
53
53
|
assert(cNokogiriXmlCharacterData);
|
54
54
|
/*
|
data/ext/nokogiri/xml_document.c
CHANGED
@@ -51,8 +51,9 @@ remove_private(xmlNodePtr node)
|
|
51
51
|
}
|
52
52
|
|
53
53
|
static void
|
54
|
-
mark(
|
54
|
+
mark(void *data)
|
55
55
|
{
|
56
|
+
xmlDocPtr doc = (xmlDocPtr)data;
|
56
57
|
nokogiriTuplePtr tuple = (nokogiriTuplePtr)doc->_private;
|
57
58
|
if (tuple) {
|
58
59
|
rb_gc_mark(tuple->doc);
|
@@ -61,33 +62,87 @@ mark(xmlDocPtr doc)
|
|
61
62
|
}
|
62
63
|
|
63
64
|
static void
|
64
|
-
dealloc(
|
65
|
+
dealloc(void *data)
|
65
66
|
{
|
67
|
+
xmlDocPtr doc = (xmlDocPtr)data;
|
66
68
|
st_table *node_hash;
|
67
69
|
|
68
|
-
NOKOGIRI_DEBUG_START(doc);
|
69
|
-
|
70
70
|
node_hash = DOC_UNLINKED_NODE_HASH(doc);
|
71
71
|
|
72
72
|
st_foreach(node_hash, dealloc_node_i, (st_data_t)doc);
|
73
73
|
st_free_table(node_hash);
|
74
74
|
|
75
|
-
|
75
|
+
ruby_xfree(doc->_private);
|
76
76
|
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
77
|
+
#if defined(__GNUC__) && __GNUC__ >= 5
|
78
|
+
#pragma GCC diagnostic push
|
79
|
+
#pragma GCC diagnostic ignored "-Wdeprecated-declarations" // xmlDeregisterNodeDefault is deprecated as of libxml2 2.11.0
|
80
|
+
#endif
|
81
|
+
/*
|
82
|
+
* libxml-ruby < 3.0.0 uses xmlDeregisterNodeDefault. If the user is using one of those older
|
83
|
+
* versions, the registered callback from libxml-ruby will access the _private pointers set by
|
84
|
+
* nokogiri, which will result in segfaults.
|
85
|
+
*
|
86
|
+
* To avoid this, we need to clear the _private pointers from all nodes in this document tree
|
87
|
+
* before that callback gets invoked.
|
88
|
+
*
|
89
|
+
* libxml-ruby 3.0.0 was released in 2017-02, so at some point we can probably safely remove this
|
90
|
+
* safeguard (though probably pairing with a runtime check on the libxml-ruby version).
|
81
91
|
*/
|
82
92
|
if (xmlDeregisterNodeDefaultValue) {
|
83
93
|
remove_private((xmlNodePtr)doc);
|
84
94
|
}
|
95
|
+
#if defined(__GNUC__) && __GNUC__ >= 5
|
96
|
+
#pragma GCC diagnostic pop
|
97
|
+
#endif
|
85
98
|
|
86
99
|
xmlFreeDoc(doc);
|
100
|
+
}
|
101
|
+
|
102
|
+
static size_t
|
103
|
+
memsize_node(const xmlNodePtr node)
|
104
|
+
{
|
105
|
+
/* note we don't count namespace definitions, just going for a good-enough number here */
|
106
|
+
xmlNodePtr child;
|
107
|
+
xmlAttrPtr property;
|
108
|
+
size_t memsize = 0;
|
109
|
+
|
110
|
+
memsize += xmlStrlen(node->name);
|
111
|
+
|
112
|
+
if (node->type == XML_ELEMENT_NODE) {
|
113
|
+
for (property = node->properties; property; property = property->next) {
|
114
|
+
memsize += sizeof(xmlAttr) + memsize_node((xmlNodePtr)property);
|
115
|
+
}
|
116
|
+
}
|
117
|
+
if (node->type == XML_TEXT_NODE) {
|
118
|
+
memsize += xmlStrlen(node->content);
|
119
|
+
}
|
120
|
+
for (child = node->children; child; child = child->next) {
|
121
|
+
memsize += sizeof(xmlNode) + memsize_node(child);
|
122
|
+
}
|
123
|
+
return memsize;
|
124
|
+
}
|
87
125
|
|
88
|
-
|
126
|
+
static size_t
|
127
|
+
memsize(const void *data)
|
128
|
+
{
|
129
|
+
xmlDocPtr doc = (const xmlDocPtr)data;
|
130
|
+
size_t memsize = sizeof(xmlDoc);
|
131
|
+
/* This may not account for all memory use */
|
132
|
+
memsize += memsize_node((xmlNodePtr)doc);
|
133
|
+
return memsize;
|
89
134
|
}
|
90
135
|
|
136
|
+
static const rb_data_type_t noko_xml_document_data_type = {
|
137
|
+
.wrap_struct_name = "Nokogiri::XML::Document",
|
138
|
+
.function = {
|
139
|
+
.dmark = mark,
|
140
|
+
.dfree = dealloc,
|
141
|
+
.dsize = memsize,
|
142
|
+
},
|
143
|
+
// .flags = RUBY_TYPED_FREE_IMMEDIATELY, // TODO see https://github.com/sparklemotion/nokogiri/issues/2822
|
144
|
+
};
|
145
|
+
|
91
146
|
static void
|
92
147
|
recursively_remove_namespaces_from_node(xmlNodePtr node)
|
93
148
|
{
|
@@ -104,7 +159,11 @@ recursively_remove_namespaces_from_node(xmlNodePtr node)
|
|
104
159
|
(node->type == XML_XINCLUDE_START) ||
|
105
160
|
(node->type == XML_XINCLUDE_END)) &&
|
106
161
|
node->nsDef) {
|
107
|
-
|
162
|
+
xmlNsPtr curr = node->nsDef;
|
163
|
+
while (curr) {
|
164
|
+
noko_xml_document_pin_namespace(curr, node->doc);
|
165
|
+
curr = curr->next;
|
166
|
+
}
|
108
167
|
node->nsDef = NULL;
|
109
168
|
}
|
110
169
|
|
@@ -126,8 +185,7 @@ recursively_remove_namespaces_from_node(xmlNodePtr node)
|
|
126
185
|
static VALUE
|
127
186
|
url(VALUE self)
|
128
187
|
{
|
129
|
-
xmlDocPtr doc;
|
130
|
-
Data_Get_Struct(self, xmlDoc, doc);
|
188
|
+
xmlDocPtr doc = noko_xml_document_unwrap(self);
|
131
189
|
|
132
190
|
if (doc->URL) { return NOKOGIRI_STR_NEW2(doc->URL); }
|
133
191
|
|
@@ -146,7 +204,7 @@ rb_xml_document_root_set(VALUE self, VALUE rb_new_root)
|
|
146
204
|
xmlDocPtr c_document;
|
147
205
|
xmlNodePtr c_new_root = NULL, c_current_root;
|
148
206
|
|
149
|
-
|
207
|
+
c_document = noko_xml_document_unwrap(self);
|
150
208
|
|
151
209
|
c_current_root = xmlDocGetRootElement(c_document);
|
152
210
|
if (c_current_root) {
|
@@ -161,7 +219,7 @@ rb_xml_document_root_set(VALUE self, VALUE rb_new_root)
|
|
161
219
|
rb_obj_class(rb_new_root));
|
162
220
|
}
|
163
221
|
|
164
|
-
|
222
|
+
Noko_Node_Get_Struct(rb_new_root, xmlNode, c_new_root);
|
165
223
|
|
166
224
|
/* If the new root's document is not the same as the current document,
|
167
225
|
* then we need to dup the node in to this document. */
|
@@ -190,7 +248,7 @@ rb_xml_document_root(VALUE self)
|
|
190
248
|
xmlDocPtr c_document;
|
191
249
|
xmlNodePtr c_root;
|
192
250
|
|
193
|
-
|
251
|
+
c_document = noko_xml_document_unwrap(self);
|
194
252
|
|
195
253
|
c_root = xmlDocGetRootElement(c_document);
|
196
254
|
if (!c_root) {
|
@@ -209,8 +267,7 @@ rb_xml_document_root(VALUE self)
|
|
209
267
|
static VALUE
|
210
268
|
set_encoding(VALUE self, VALUE encoding)
|
211
269
|
{
|
212
|
-
xmlDocPtr doc;
|
213
|
-
Data_Get_Struct(self, xmlDoc, doc);
|
270
|
+
xmlDocPtr doc = noko_xml_document_unwrap(self);
|
214
271
|
|
215
272
|
if (doc->encoding) {
|
216
273
|
xmlFree(DISCARD_CONST_QUAL_XMLCHAR(doc->encoding));
|
@@ -230,8 +287,7 @@ set_encoding(VALUE self, VALUE encoding)
|
|
230
287
|
static VALUE
|
231
288
|
encoding(VALUE self)
|
232
289
|
{
|
233
|
-
xmlDocPtr doc;
|
234
|
-
Data_Get_Struct(self, xmlDoc, doc);
|
290
|
+
xmlDocPtr doc = noko_xml_document_unwrap(self);
|
235
291
|
|
236
292
|
if (!doc->encoding) { return Qnil; }
|
237
293
|
return NOKOGIRI_STR_NEW2(doc->encoding);
|
@@ -246,8 +302,7 @@ encoding(VALUE self)
|
|
246
302
|
static VALUE
|
247
303
|
version(VALUE self)
|
248
304
|
{
|
249
|
-
xmlDocPtr doc;
|
250
|
-
Data_Get_Struct(self, xmlDoc, doc);
|
305
|
+
xmlDocPtr doc = noko_xml_document_unwrap(self);
|
251
306
|
|
252
307
|
if (!doc->version) { return Qnil; }
|
253
308
|
return NOKOGIRI_STR_NEW2(doc->version);
|
@@ -286,7 +341,7 @@ read_io(VALUE klass,
|
|
286
341
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
287
342
|
|
288
343
|
if (doc == NULL) {
|
289
|
-
|
344
|
+
xmlErrorConstPtr error;
|
290
345
|
|
291
346
|
xmlFreeDoc(doc);
|
292
347
|
|
@@ -332,7 +387,7 @@ read_memory(VALUE klass,
|
|
332
387
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
333
388
|
|
334
389
|
if (doc == NULL) {
|
335
|
-
|
390
|
+
xmlErrorConstPtr error;
|
336
391
|
|
337
392
|
xmlFreeDoc(doc);
|
338
393
|
|
@@ -369,7 +424,7 @@ duplicate_document(int argc, VALUE *argv, VALUE self)
|
|
369
424
|
level = INT2NUM((long)1);
|
370
425
|
}
|
371
426
|
|
372
|
-
|
427
|
+
doc = noko_xml_document_unwrap(self);
|
373
428
|
|
374
429
|
dup = xmlCopyDoc(doc, (int)NUM2INT(level));
|
375
430
|
|
@@ -442,8 +497,7 @@ new (int argc, VALUE *argv, VALUE klass)
|
|
442
497
|
static VALUE
|
443
498
|
remove_namespaces_bang(VALUE self)
|
444
499
|
{
|
445
|
-
xmlDocPtr doc ;
|
446
|
-
Data_Get_Struct(self, xmlDoc, doc);
|
500
|
+
xmlDocPtr doc = noko_xml_document_unwrap(self);
|
447
501
|
|
448
502
|
recursively_remove_namespaces_from_node((xmlNodePtr)doc);
|
449
503
|
return self;
|
@@ -471,7 +525,7 @@ create_entity(int argc, VALUE *argv, VALUE self)
|
|
471
525
|
xmlEntityPtr ptr;
|
472
526
|
xmlDocPtr doc ;
|
473
527
|
|
474
|
-
|
528
|
+
doc = noko_xml_document_unwrap(self);
|
475
529
|
|
476
530
|
rb_scan_args(argc, argv, "14", &name, &type, &external_id, &system_id,
|
477
531
|
&content);
|
@@ -487,7 +541,7 @@ create_entity(int argc, VALUE *argv, VALUE self)
|
|
487
541
|
);
|
488
542
|
|
489
543
|
if (NULL == ptr) {
|
490
|
-
|
544
|
+
xmlErrorConstPtr error = xmlGetLastError();
|
491
545
|
if (error) {
|
492
546
|
rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
|
493
547
|
} else {
|
@@ -536,6 +590,7 @@ rb_xml_document_canonicalize(int argc, VALUE *argv, VALUE self)
|
|
536
590
|
VALUE rb_mode;
|
537
591
|
VALUE rb_namespaces;
|
538
592
|
VALUE rb_comments_p;
|
593
|
+
int c_mode = 0;
|
539
594
|
xmlChar **c_namespaces;
|
540
595
|
|
541
596
|
xmlDocPtr c_doc;
|
@@ -547,10 +602,18 @@ rb_xml_document_canonicalize(int argc, VALUE *argv, VALUE self)
|
|
547
602
|
VALUE rb_io;
|
548
603
|
|
549
604
|
rb_scan_args(argc, argv, "03", &rb_mode, &rb_namespaces, &rb_comments_p);
|
550
|
-
if (!NIL_P(rb_mode)) {
|
551
|
-
|
605
|
+
if (!NIL_P(rb_mode)) {
|
606
|
+
Check_Type(rb_mode, T_FIXNUM);
|
607
|
+
c_mode = NUM2INT(rb_mode);
|
608
|
+
}
|
609
|
+
if (!NIL_P(rb_namespaces)) {
|
610
|
+
Check_Type(rb_namespaces, T_ARRAY);
|
611
|
+
if (c_mode == XML_C14N_1_0 || c_mode == XML_C14N_1_1) {
|
612
|
+
rb_raise(rb_eRuntimeError, "This canonicalizer does not support this operation");
|
613
|
+
}
|
614
|
+
}
|
552
615
|
|
553
|
-
|
616
|
+
c_doc = noko_xml_document_unwrap(self);
|
554
617
|
|
555
618
|
rb_cStringIO = rb_const_get_at(rb_cObject, rb_intern("StringIO"));
|
556
619
|
rb_io = rb_class_new_instance(0, 0, rb_cStringIO);
|
@@ -569,7 +632,7 @@ rb_xml_document_canonicalize(int argc, VALUE *argv, VALUE self)
|
|
569
632
|
c_namespaces = NULL;
|
570
633
|
} else {
|
571
634
|
long ns_len = RARRAY_LEN(rb_namespaces);
|
572
|
-
c_namespaces =
|
635
|
+
c_namespaces = ruby_xcalloc((size_t)ns_len + 1, sizeof(xmlChar *));
|
573
636
|
for (int j = 0 ; j < ns_len ; j++) {
|
574
637
|
VALUE entry = rb_ary_entry(rb_namespaces, j);
|
575
638
|
c_namespaces[j] = (xmlChar *)StringValueCStr(entry);
|
@@ -577,12 +640,12 @@ rb_xml_document_canonicalize(int argc, VALUE *argv, VALUE self)
|
|
577
640
|
}
|
578
641
|
|
579
642
|
xmlC14NExecute(c_doc, c_callback_wrapper, rb_callback,
|
580
|
-
|
643
|
+
c_mode,
|
581
644
|
c_namespaces,
|
582
645
|
(int)RTEST(rb_comments_p),
|
583
646
|
c_obuf);
|
584
647
|
|
585
|
-
|
648
|
+
ruby_xfree(c_namespaces);
|
586
649
|
xmlOutputBufferClose(c_obuf);
|
587
650
|
|
588
651
|
return rb_funcall(rb_io, rb_intern("string"), 0);
|
@@ -598,9 +661,9 @@ noko_xml_document_wrap_with_init_args(VALUE klass, xmlDocPtr c_document, int arg
|
|
598
661
|
klass = cNokogiriXmlDocument;
|
599
662
|
}
|
600
663
|
|
601
|
-
rb_document =
|
664
|
+
rb_document = TypedData_Wrap_Struct(klass, &noko_xml_document_data_type, c_document);
|
602
665
|
|
603
|
-
tuple = (nokogiriTuplePtr)
|
666
|
+
tuple = (nokogiriTuplePtr)ruby_xmalloc(sizeof(nokogiriTuple));
|
604
667
|
tuple->doc = rb_document;
|
605
668
|
tuple->unlinkedNodes = st_init_numtable_with_size(128);
|
606
669
|
tuple->node_cache = rb_ary_new();
|
@@ -631,6 +694,40 @@ noko_xml_document_wrap(VALUE klass, xmlDocPtr doc)
|
|
631
694
|
return noko_xml_document_wrap_with_init_args(klass, doc, 0, NULL);
|
632
695
|
}
|
633
696
|
|
697
|
+
xmlDocPtr
|
698
|
+
noko_xml_document_unwrap(VALUE rb_document)
|
699
|
+
{
|
700
|
+
xmlDocPtr c_document;
|
701
|
+
TypedData_Get_Struct(rb_document, xmlDoc, &noko_xml_document_data_type, c_document);
|
702
|
+
return c_document;
|
703
|
+
}
|
704
|
+
|
705
|
+
/* Schema creation will remove and deallocate "blank" nodes.
|
706
|
+
* If those blank nodes have been exposed to Ruby, they could get freed
|
707
|
+
* out from under the VALUE pointer. This function checks to see if any of
|
708
|
+
* those nodes have been exposed to Ruby, and if so we should raise an exception.
|
709
|
+
*/
|
710
|
+
int
|
711
|
+
noko_xml_document_has_wrapped_blank_nodes_p(xmlDocPtr c_document)
|
712
|
+
{
|
713
|
+
VALUE cache = DOC_NODE_CACHE(c_document);
|
714
|
+
|
715
|
+
if (NIL_P(cache)) {
|
716
|
+
return 0;
|
717
|
+
}
|
718
|
+
|
719
|
+
for (long jnode = 0; jnode < RARRAY_LEN(cache); jnode++) {
|
720
|
+
xmlNodePtr node;
|
721
|
+
VALUE element = rb_ary_entry(cache, jnode);
|
722
|
+
|
723
|
+
Noko_Node_Get_Struct(element, xmlNode, node);
|
724
|
+
if (xmlIsBlankNode(node)) {
|
725
|
+
return 1;
|
726
|
+
}
|
727
|
+
}
|
728
|
+
|
729
|
+
return 0;
|
730
|
+
}
|
634
731
|
|
635
732
|
void
|
636
733
|
noko_xml_document_pin_node(xmlNodePtr node)
|
@@ -655,7 +752,7 @@ noko_xml_document_pin_namespace(xmlNsPtr ns, xmlDocPtr doc)
|
|
655
752
|
|
656
753
|
|
657
754
|
void
|
658
|
-
noko_init_xml_document()
|
755
|
+
noko_init_xml_document(void)
|
659
756
|
{
|
660
757
|
assert(cNokogiriXmlNode);
|
661
758
|
/*
|