nokogiri 1.14.2 → 1.16.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +19 -15
- data/README.md +4 -1
- data/dependencies.yml +9 -8
- data/ext/nokogiri/extconf.rb +69 -26
- data/ext/nokogiri/html4_document.c +1 -2
- data/ext/nokogiri/html4_element_description.c +19 -14
- data/ext/nokogiri/html4_sax_parser_context.c +10 -16
- data/ext/nokogiri/html4_sax_push_parser.c +3 -3
- data/ext/nokogiri/nokogiri.c +46 -24
- data/ext/nokogiri/nokogiri.h +23 -5
- data/ext/nokogiri/test_global_handlers.c +1 -1
- data/ext/nokogiri/xml_attr.c +1 -1
- data/ext/nokogiri/xml_cdata.c +30 -17
- data/ext/nokogiri/xml_comment.c +1 -1
- data/ext/nokogiri/xml_document.c +113 -25
- data/ext/nokogiri/xml_document_fragment.c +1 -1
- data/ext/nokogiri/xml_dtd.c +1 -1
- data/ext/nokogiri/xml_element_content.c +32 -29
- data/ext/nokogiri/xml_element_decl.c +5 -5
- data/ext/nokogiri/xml_encoding_handler.c +12 -4
- data/ext/nokogiri/xml_entity_reference.c +1 -1
- data/ext/nokogiri/xml_namespace.c +11 -16
- data/ext/nokogiri/xml_node.c +13 -16
- data/ext/nokogiri/xml_node_set.c +125 -105
- data/ext/nokogiri/xml_processing_instruction.c +1 -1
- data/ext/nokogiri/xml_reader.c +61 -74
- data/ext/nokogiri/xml_relax_ng.c +66 -79
- data/ext/nokogiri/xml_sax_parser.c +24 -5
- data/ext/nokogiri/xml_sax_parser_context.c +50 -25
- data/ext/nokogiri/xml_sax_push_parser.c +30 -9
- data/ext/nokogiri/xml_schema.c +94 -115
- data/ext/nokogiri/xml_syntax_error.c +3 -3
- data/ext/nokogiri/xml_text.c +26 -13
- data/ext/nokogiri/xml_xpath_context.c +153 -83
- data/ext/nokogiri/xslt_stylesheet.c +111 -53
- data/gumbo-parser/Makefile +18 -0
- data/gumbo-parser/src/error.c +8 -4
- data/gumbo-parser/src/foreign_attrs.c +13 -14
- data/gumbo-parser/src/foreign_attrs.gperf +1 -1
- data/gumbo-parser/src/parser.c +21 -5
- data/gumbo-parser/src/tokenizer.c +1 -0
- data/lib/nokogiri/css/parser_extras.rb +1 -1
- data/lib/nokogiri/css/xpath_visitor.rb +3 -23
- data/lib/nokogiri/extension.rb +1 -1
- data/lib/nokogiri/html4/document.rb +1 -1
- data/lib/nokogiri/html4/document_fragment.rb +1 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +1821 -353
- data/lib/nokogiri/html4/encoding_reader.rb +1 -1
- data/lib/nokogiri/html5/document_fragment.rb +1 -1
- data/lib/nokogiri/html5/node.rb +5 -0
- data/lib/nokogiri/html5.rb +0 -63
- data/lib/nokogiri/jruby/nokogiri_jars.rb +9 -9
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +6 -5
- data/lib/nokogiri/xml/attr.rb +2 -2
- data/lib/nokogiri/xml/attribute_decl.rb +4 -2
- data/lib/nokogiri/xml/document.rb +4 -5
- data/lib/nokogiri/xml/document_fragment.rb +3 -3
- data/lib/nokogiri/xml/element_content.rb +10 -2
- data/lib/nokogiri/xml/element_decl.rb +4 -2
- data/lib/nokogiri/xml/entity_decl.rb +4 -2
- data/lib/nokogiri/xml/namespace.rb +1 -2
- data/lib/nokogiri/xml/node/save_options.rb +8 -0
- data/lib/nokogiri/xml/node.rb +53 -37
- data/lib/nokogiri/xml/node_set.rb +3 -3
- data/lib/nokogiri/xml/pp/node.rb +23 -12
- data/lib/nokogiri/xml/reader.rb +10 -9
- data/lib/nokogiri/xml/sax/document.rb +1 -1
- data/lib/nokogiri/xml/searchable.rb +21 -13
- data/lib/nokogiri/xml/syntax_error.rb +1 -1
- data/lib/nokogiri/xml.rb +1 -1
- data/lib/nokogiri/xslt/stylesheet.rb +29 -7
- data/lib/nokogiri/xslt.rb +74 -4
- data/lib/nokogiri.rb +13 -5
- data/lib/xsd/xmlparser/nokogiri.rb +1 -1
- data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
- data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
- data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
- data/ports/archives/libxml2-2.12.8.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
- metadata +16 -12
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +0 -3037
- data/ports/archives/libxml2-2.10.3.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.37.tar.xz +0 -0
data/ext/nokogiri/nokogiri.h
CHANGED
@@ -51,6 +51,7 @@
|
|
51
51
|
#include <libxslt/xsltconfig.h>
|
52
52
|
#include <libxslt/xsltutils.h>
|
53
53
|
#include <libxslt/transform.h>
|
54
|
+
#include <libxslt/imports.h>
|
54
55
|
#include <libxslt/xsltInternals.h>
|
55
56
|
|
56
57
|
#include <libexslt/exslt.h>
|
@@ -65,6 +66,13 @@ xmlNodePtr xmlLastElementChild(xmlNodePtr parent);
|
|
65
66
|
#define XMLNS_PREFIX "xmlns"
|
66
67
|
#define XMLNS_PREFIX_LEN 6 /* including either colon or \0 */
|
67
68
|
|
69
|
+
#ifndef xmlErrorConstPtr
|
70
|
+
# if LIBXML_VERSION >= 21200
|
71
|
+
# define xmlErrorConstPtr const xmlError *
|
72
|
+
# else
|
73
|
+
# define xmlErrorConstPtr xmlError *
|
74
|
+
# endif
|
75
|
+
#endif
|
68
76
|
|
69
77
|
#include <ruby.h>
|
70
78
|
#include <ruby/st.h>
|
@@ -168,6 +176,7 @@ typedef struct _nokogiriXsltStylesheetTuple {
|
|
168
176
|
|
169
177
|
void noko_xml_document_pin_node(xmlNodePtr);
|
170
178
|
void noko_xml_document_pin_namespace(xmlNsPtr, xmlDocPtr);
|
179
|
+
int noko_xml_document_has_wrapped_blank_nodes_p(xmlDocPtr c_document);
|
171
180
|
|
172
181
|
int noko_io_read(void *ctx, char *buffer, int len);
|
173
182
|
int noko_io_write(void *ctx, char *buffer, int len);
|
@@ -186,12 +195,21 @@ VALUE noko_xml_namespace_wrap_xpath_copy(xmlNsPtr node);
|
|
186
195
|
VALUE noko_xml_element_content_wrap(VALUE doc, xmlElementContentPtr element);
|
187
196
|
|
188
197
|
VALUE noko_xml_node_set_wrap(xmlNodeSetPtr node_set, VALUE document) ;
|
198
|
+
xmlNodeSetPtr noko_xml_node_set_unwrap(VALUE rb_node_set) ;
|
189
199
|
|
190
200
|
VALUE noko_xml_document_wrap_with_init_args(VALUE klass, xmlDocPtr doc, int argc, VALUE *argv);
|
191
201
|
VALUE noko_xml_document_wrap(VALUE klass, xmlDocPtr doc);
|
202
|
+
xmlDocPtr noko_xml_document_unwrap(VALUE rb_document);
|
192
203
|
NOKOPUBFUN VALUE Nokogiri_wrap_xml_document(VALUE klass,
|
193
204
|
xmlDocPtr doc); /* deprecated. use noko_xml_document_wrap() instead. */
|
194
205
|
|
206
|
+
xmlSAXHandlerPtr noko_sax_handler_unwrap(VALUE rb_sax_handler);
|
207
|
+
|
208
|
+
xmlParserCtxtPtr noko_xml_sax_push_parser_unwrap(VALUE rb_parser);
|
209
|
+
|
210
|
+
VALUE noko_xml_sax_parser_context_wrap(VALUE klass, xmlParserCtxtPtr c_context);
|
211
|
+
xmlParserCtxtPtr noko_xml_sax_parser_context_unwrap(VALUE rb_context);
|
212
|
+
|
195
213
|
#define DOC_RUBY_OBJECT_TEST(x) ((nokogiriTuplePtr)(x->_private))
|
196
214
|
#define DOC_RUBY_OBJECT(x) (((nokogiriTuplePtr)(x->_private))->doc)
|
197
215
|
#define DOC_UNLINKED_NODE_HASH(x) (((nokogiriTuplePtr)(x->_private))->unlinkedNodes)
|
@@ -207,18 +225,18 @@ NOKOPUBFUN VALUE Nokogiri_wrap_xml_document(VALUE klass,
|
|
207
225
|
#define DISCARD_CONST_QUAL_XMLCHAR(v) DISCARD_CONST_QUAL(xmlChar *, v)
|
208
226
|
|
209
227
|
#if HAVE_RB_CATEGORY_WARNING
|
210
|
-
# define NOKO_WARN_DEPRECATION(message) rb_category_warning(RB_WARN_CATEGORY_DEPRECATED, message)
|
228
|
+
# define NOKO_WARN_DEPRECATION(message...) rb_category_warning(RB_WARN_CATEGORY_DEPRECATED, message)
|
211
229
|
#else
|
212
|
-
# define NOKO_WARN_DEPRECATION(message) rb_warning(message)
|
230
|
+
# define NOKO_WARN_DEPRECATION(message...) rb_warning(message)
|
213
231
|
#endif
|
214
232
|
|
215
233
|
void Nokogiri_structured_error_func_save(libxmlStructuredErrorHandlerState *handler_state);
|
216
234
|
void Nokogiri_structured_error_func_save_and_set(libxmlStructuredErrorHandlerState *handler_state, void *user_data,
|
217
235
|
xmlStructuredErrorFunc handler);
|
218
236
|
void Nokogiri_structured_error_func_restore(libxmlStructuredErrorHandlerState *handler_state);
|
219
|
-
VALUE Nokogiri_wrap_xml_syntax_error(
|
220
|
-
void Nokogiri_error_array_pusher(void *ctx,
|
221
|
-
NORETURN_DECL void Nokogiri_error_raise(void *ctx,
|
237
|
+
VALUE Nokogiri_wrap_xml_syntax_error(xmlErrorConstPtr error);
|
238
|
+
void Nokogiri_error_array_pusher(void *ctx, xmlErrorConstPtr error);
|
239
|
+
NORETURN_DECL void Nokogiri_error_raise(void *ctx, xmlErrorConstPtr error);
|
222
240
|
void Nokogiri_marshal_xpath_funcall_and_return_values(xmlXPathParserContextPtr ctx, int nargs, VALUE handler,
|
223
241
|
const char *function_name) ;
|
224
242
|
|
@@ -3,7 +3,7 @@
|
|
3
3
|
static VALUE foreign_error_handler_block = Qnil;
|
4
4
|
|
5
5
|
static void
|
6
|
-
foreign_error_handler(void *user_data,
|
6
|
+
foreign_error_handler(void *user_data, xmlErrorConstPtr c_error)
|
7
7
|
{
|
8
8
|
rb_funcall(foreign_error_handler_block, rb_intern("call"), 0);
|
9
9
|
}
|
data/ext/nokogiri/xml_attr.c
CHANGED
@@ -68,7 +68,7 @@ new (int argc, VALUE *argv, VALUE klass)
|
|
68
68
|
rb_raise(rb_eArgError, "parameter must be a Nokogiri::XML::Document");
|
69
69
|
}
|
70
70
|
|
71
|
-
|
71
|
+
xml_doc = noko_xml_document_unwrap(document);
|
72
72
|
|
73
73
|
node = xmlNewDocProp(
|
74
74
|
xml_doc,
|
data/ext/nokogiri/xml_cdata.c
CHANGED
@@ -12,31 +12,44 @@ VALUE cNokogiriXmlCData;
|
|
12
12
|
* raise a TypeError exception.
|
13
13
|
*/
|
14
14
|
static VALUE
|
15
|
-
|
15
|
+
rb_xml_cdata_s_new(int argc, VALUE *argv, VALUE klass)
|
16
16
|
{
|
17
|
-
xmlDocPtr
|
18
|
-
xmlNodePtr
|
19
|
-
VALUE
|
20
|
-
VALUE
|
21
|
-
VALUE
|
17
|
+
xmlDocPtr c_document;
|
18
|
+
xmlNodePtr c_node;
|
19
|
+
VALUE rb_document;
|
20
|
+
VALUE rb_content;
|
21
|
+
VALUE rb_rest;
|
22
22
|
VALUE rb_node;
|
23
|
-
xmlChar *
|
24
|
-
int
|
23
|
+
xmlChar *c_content = NULL;
|
24
|
+
int c_content_len = 0;
|
25
25
|
|
26
|
-
rb_scan_args(argc, argv, "2*", &
|
26
|
+
rb_scan_args(argc, argv, "2*", &rb_document, &rb_content, &rb_rest);
|
27
27
|
|
28
|
-
|
28
|
+
if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlNode)) {
|
29
|
+
rb_raise(rb_eTypeError,
|
30
|
+
"expected first parameter to be a Nokogiri::XML::Document, received %"PRIsVALUE,
|
31
|
+
rb_obj_class(rb_document));
|
32
|
+
}
|
33
|
+
|
34
|
+
if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlDocument)) {
|
35
|
+
xmlNodePtr deprecated_node_type_arg;
|
36
|
+
NOKO_WARN_DEPRECATION("Passing a Node as the first parameter to CDATA.new is deprecated. Please pass a Document instead. This will become an error in Nokogiri v1.17.0."); // TODO: deprecated in v1.15.3, remove in v1.17.0
|
37
|
+
Noko_Node_Get_Struct(rb_document, xmlNode, deprecated_node_type_arg);
|
38
|
+
c_document = deprecated_node_type_arg->doc;
|
39
|
+
} else {
|
40
|
+
c_document = noko_xml_document_unwrap(rb_document);
|
41
|
+
}
|
29
42
|
|
30
|
-
if (!NIL_P(
|
31
|
-
|
32
|
-
|
43
|
+
if (!NIL_P(rb_content)) {
|
44
|
+
c_content = (xmlChar *)StringValuePtr(rb_content);
|
45
|
+
c_content_len = RSTRING_LENINT(rb_content);
|
33
46
|
}
|
34
47
|
|
35
|
-
|
48
|
+
c_node = xmlNewCDataBlock(c_document, c_content, c_content_len);
|
36
49
|
|
37
|
-
noko_xml_document_pin_node(
|
50
|
+
noko_xml_document_pin_node(c_node);
|
38
51
|
|
39
|
-
rb_node = noko_xml_node_wrap(klass,
|
52
|
+
rb_node = noko_xml_node_wrap(klass, c_node);
|
40
53
|
rb_obj_call_init(rb_node, argc, argv);
|
41
54
|
|
42
55
|
if (rb_block_given_p()) { rb_yield(rb_node); }
|
@@ -53,5 +66,5 @@ noko_init_xml_cdata(void)
|
|
53
66
|
*/
|
54
67
|
cNokogiriXmlCData = rb_define_class_under(mNokogiriXml, "CDATA", cNokogiriXmlText);
|
55
68
|
|
56
|
-
rb_define_singleton_method(cNokogiriXmlCData, "new",
|
69
|
+
rb_define_singleton_method(cNokogiriXmlCData, "new", rb_xml_cdata_s_new, -1);
|
57
70
|
}
|
data/ext/nokogiri/xml_comment.c
CHANGED
@@ -30,7 +30,7 @@ new (int argc, VALUE *argv, VALUE klass)
|
|
30
30
|
rb_raise(rb_eArgError, "first argument must be a XML::Document or XML::Node");
|
31
31
|
}
|
32
32
|
|
33
|
-
|
33
|
+
xml_doc = noko_xml_document_unwrap(document);
|
34
34
|
|
35
35
|
node = xmlNewDocComment(
|
36
36
|
xml_doc,
|
data/ext/nokogiri/xml_document.c
CHANGED
@@ -51,8 +51,9 @@ remove_private(xmlNodePtr node)
|
|
51
51
|
}
|
52
52
|
|
53
53
|
static void
|
54
|
-
mark(
|
54
|
+
mark(void *data)
|
55
55
|
{
|
56
|
+
xmlDocPtr doc = (xmlDocPtr)data;
|
56
57
|
nokogiriTuplePtr tuple = (nokogiriTuplePtr)doc->_private;
|
57
58
|
if (tuple) {
|
58
59
|
rb_gc_mark(tuple->doc);
|
@@ -61,8 +62,9 @@ mark(xmlDocPtr doc)
|
|
61
62
|
}
|
62
63
|
|
63
64
|
static void
|
64
|
-
dealloc(
|
65
|
+
dealloc(void *data)
|
65
66
|
{
|
67
|
+
xmlDocPtr doc = (xmlDocPtr)data;
|
66
68
|
st_table *node_hash;
|
67
69
|
|
68
70
|
node_hash = DOC_UNLINKED_NODE_HASH(doc);
|
@@ -72,18 +74,75 @@ dealloc(xmlDocPtr doc)
|
|
72
74
|
|
73
75
|
ruby_xfree(doc->_private);
|
74
76
|
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
77
|
+
#if defined(__GNUC__) && __GNUC__ >= 5
|
78
|
+
#pragma GCC diagnostic push
|
79
|
+
#pragma GCC diagnostic ignored "-Wdeprecated-declarations" // xmlDeregisterNodeDefault is deprecated as of libxml2 2.11.0
|
80
|
+
#endif
|
81
|
+
/*
|
82
|
+
* libxml-ruby < 3.0.0 uses xmlDeregisterNodeDefault. If the user is using one of those older
|
83
|
+
* versions, the registered callback from libxml-ruby will access the _private pointers set by
|
84
|
+
* nokogiri, which will result in segfaults.
|
85
|
+
*
|
86
|
+
* To avoid this, we need to clear the _private pointers from all nodes in this document tree
|
87
|
+
* before that callback gets invoked.
|
88
|
+
*
|
89
|
+
* libxml-ruby 3.0.0 was released in 2017-02, so at some point we can probably safely remove this
|
90
|
+
* safeguard (though probably pairing with a runtime check on the libxml-ruby version).
|
79
91
|
*/
|
80
92
|
if (xmlDeregisterNodeDefaultValue) {
|
81
93
|
remove_private((xmlNodePtr)doc);
|
82
94
|
}
|
95
|
+
#if defined(__GNUC__) && __GNUC__ >= 5
|
96
|
+
#pragma GCC diagnostic pop
|
97
|
+
#endif
|
83
98
|
|
84
99
|
xmlFreeDoc(doc);
|
85
100
|
}
|
86
101
|
|
102
|
+
static size_t
|
103
|
+
memsize_node(const xmlNodePtr node)
|
104
|
+
{
|
105
|
+
/* note we don't count namespace definitions, just going for a good-enough number here */
|
106
|
+
xmlNodePtr child;
|
107
|
+
xmlAttrPtr property;
|
108
|
+
size_t memsize = 0;
|
109
|
+
|
110
|
+
memsize += xmlStrlen(node->name);
|
111
|
+
|
112
|
+
if (node->type == XML_ELEMENT_NODE) {
|
113
|
+
for (property = node->properties; property; property = property->next) {
|
114
|
+
memsize += sizeof(xmlAttr) + memsize_node((xmlNodePtr)property);
|
115
|
+
}
|
116
|
+
}
|
117
|
+
if (node->type == XML_TEXT_NODE) {
|
118
|
+
memsize += xmlStrlen(node->content);
|
119
|
+
}
|
120
|
+
for (child = node->children; child; child = child->next) {
|
121
|
+
memsize += sizeof(xmlNode) + memsize_node(child);
|
122
|
+
}
|
123
|
+
return memsize;
|
124
|
+
}
|
125
|
+
|
126
|
+
static size_t
|
127
|
+
memsize(const void *data)
|
128
|
+
{
|
129
|
+
xmlDocPtr doc = (const xmlDocPtr)data;
|
130
|
+
size_t memsize = sizeof(xmlDoc);
|
131
|
+
/* This may not account for all memory use */
|
132
|
+
memsize += memsize_node((xmlNodePtr)doc);
|
133
|
+
return memsize;
|
134
|
+
}
|
135
|
+
|
136
|
+
static const rb_data_type_t noko_xml_document_data_type = {
|
137
|
+
.wrap_struct_name = "Nokogiri::XML::Document",
|
138
|
+
.function = {
|
139
|
+
.dmark = mark,
|
140
|
+
.dfree = dealloc,
|
141
|
+
.dsize = memsize,
|
142
|
+
},
|
143
|
+
// .flags = RUBY_TYPED_FREE_IMMEDIATELY, // TODO see https://github.com/sparklemotion/nokogiri/issues/2822
|
144
|
+
};
|
145
|
+
|
87
146
|
static void
|
88
147
|
recursively_remove_namespaces_from_node(xmlNodePtr node)
|
89
148
|
{
|
@@ -126,8 +185,7 @@ recursively_remove_namespaces_from_node(xmlNodePtr node)
|
|
126
185
|
static VALUE
|
127
186
|
url(VALUE self)
|
128
187
|
{
|
129
|
-
xmlDocPtr doc;
|
130
|
-
Data_Get_Struct(self, xmlDoc, doc);
|
188
|
+
xmlDocPtr doc = noko_xml_document_unwrap(self);
|
131
189
|
|
132
190
|
if (doc->URL) { return NOKOGIRI_STR_NEW2(doc->URL); }
|
133
191
|
|
@@ -146,7 +204,7 @@ rb_xml_document_root_set(VALUE self, VALUE rb_new_root)
|
|
146
204
|
xmlDocPtr c_document;
|
147
205
|
xmlNodePtr c_new_root = NULL, c_current_root;
|
148
206
|
|
149
|
-
|
207
|
+
c_document = noko_xml_document_unwrap(self);
|
150
208
|
|
151
209
|
c_current_root = xmlDocGetRootElement(c_document);
|
152
210
|
if (c_current_root) {
|
@@ -190,7 +248,7 @@ rb_xml_document_root(VALUE self)
|
|
190
248
|
xmlDocPtr c_document;
|
191
249
|
xmlNodePtr c_root;
|
192
250
|
|
193
|
-
|
251
|
+
c_document = noko_xml_document_unwrap(self);
|
194
252
|
|
195
253
|
c_root = xmlDocGetRootElement(c_document);
|
196
254
|
if (!c_root) {
|
@@ -209,8 +267,7 @@ rb_xml_document_root(VALUE self)
|
|
209
267
|
static VALUE
|
210
268
|
set_encoding(VALUE self, VALUE encoding)
|
211
269
|
{
|
212
|
-
xmlDocPtr doc;
|
213
|
-
Data_Get_Struct(self, xmlDoc, doc);
|
270
|
+
xmlDocPtr doc = noko_xml_document_unwrap(self);
|
214
271
|
|
215
272
|
if (doc->encoding) {
|
216
273
|
xmlFree(DISCARD_CONST_QUAL_XMLCHAR(doc->encoding));
|
@@ -230,8 +287,7 @@ set_encoding(VALUE self, VALUE encoding)
|
|
230
287
|
static VALUE
|
231
288
|
encoding(VALUE self)
|
232
289
|
{
|
233
|
-
xmlDocPtr doc;
|
234
|
-
Data_Get_Struct(self, xmlDoc, doc);
|
290
|
+
xmlDocPtr doc = noko_xml_document_unwrap(self);
|
235
291
|
|
236
292
|
if (!doc->encoding) { return Qnil; }
|
237
293
|
return NOKOGIRI_STR_NEW2(doc->encoding);
|
@@ -246,8 +302,7 @@ encoding(VALUE self)
|
|
246
302
|
static VALUE
|
247
303
|
version(VALUE self)
|
248
304
|
{
|
249
|
-
xmlDocPtr doc;
|
250
|
-
Data_Get_Struct(self, xmlDoc, doc);
|
305
|
+
xmlDocPtr doc = noko_xml_document_unwrap(self);
|
251
306
|
|
252
307
|
if (!doc->version) { return Qnil; }
|
253
308
|
return NOKOGIRI_STR_NEW2(doc->version);
|
@@ -286,7 +341,7 @@ read_io(VALUE klass,
|
|
286
341
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
287
342
|
|
288
343
|
if (doc == NULL) {
|
289
|
-
|
344
|
+
xmlErrorConstPtr error;
|
290
345
|
|
291
346
|
xmlFreeDoc(doc);
|
292
347
|
|
@@ -332,7 +387,7 @@ read_memory(VALUE klass,
|
|
332
387
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
333
388
|
|
334
389
|
if (doc == NULL) {
|
335
|
-
|
390
|
+
xmlErrorConstPtr error;
|
336
391
|
|
337
392
|
xmlFreeDoc(doc);
|
338
393
|
|
@@ -369,7 +424,7 @@ duplicate_document(int argc, VALUE *argv, VALUE self)
|
|
369
424
|
level = INT2NUM((long)1);
|
370
425
|
}
|
371
426
|
|
372
|
-
|
427
|
+
doc = noko_xml_document_unwrap(self);
|
373
428
|
|
374
429
|
dup = xmlCopyDoc(doc, (int)NUM2INT(level));
|
375
430
|
|
@@ -442,8 +497,7 @@ new (int argc, VALUE *argv, VALUE klass)
|
|
442
497
|
static VALUE
|
443
498
|
remove_namespaces_bang(VALUE self)
|
444
499
|
{
|
445
|
-
xmlDocPtr doc ;
|
446
|
-
Data_Get_Struct(self, xmlDoc, doc);
|
500
|
+
xmlDocPtr doc = noko_xml_document_unwrap(self);
|
447
501
|
|
448
502
|
recursively_remove_namespaces_from_node((xmlNodePtr)doc);
|
449
503
|
return self;
|
@@ -471,7 +525,7 @@ create_entity(int argc, VALUE *argv, VALUE self)
|
|
471
525
|
xmlEntityPtr ptr;
|
472
526
|
xmlDocPtr doc ;
|
473
527
|
|
474
|
-
|
528
|
+
doc = noko_xml_document_unwrap(self);
|
475
529
|
|
476
530
|
rb_scan_args(argc, argv, "14", &name, &type, &external_id, &system_id,
|
477
531
|
&content);
|
@@ -487,7 +541,7 @@ create_entity(int argc, VALUE *argv, VALUE self)
|
|
487
541
|
);
|
488
542
|
|
489
543
|
if (NULL == ptr) {
|
490
|
-
|
544
|
+
xmlErrorConstPtr error = xmlGetLastError();
|
491
545
|
if (error) {
|
492
546
|
rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
|
493
547
|
} else {
|
@@ -559,7 +613,7 @@ rb_xml_document_canonicalize(int argc, VALUE *argv, VALUE self)
|
|
559
613
|
}
|
560
614
|
}
|
561
615
|
|
562
|
-
|
616
|
+
c_doc = noko_xml_document_unwrap(self);
|
563
617
|
|
564
618
|
rb_cStringIO = rb_const_get_at(rb_cObject, rb_intern("StringIO"));
|
565
619
|
rb_io = rb_class_new_instance(0, 0, rb_cStringIO);
|
@@ -607,7 +661,7 @@ noko_xml_document_wrap_with_init_args(VALUE klass, xmlDocPtr c_document, int arg
|
|
607
661
|
klass = cNokogiriXmlDocument;
|
608
662
|
}
|
609
663
|
|
610
|
-
rb_document =
|
664
|
+
rb_document = TypedData_Wrap_Struct(klass, &noko_xml_document_data_type, c_document);
|
611
665
|
|
612
666
|
tuple = (nokogiriTuplePtr)ruby_xmalloc(sizeof(nokogiriTuple));
|
613
667
|
tuple->doc = rb_document;
|
@@ -640,6 +694,40 @@ noko_xml_document_wrap(VALUE klass, xmlDocPtr doc)
|
|
640
694
|
return noko_xml_document_wrap_with_init_args(klass, doc, 0, NULL);
|
641
695
|
}
|
642
696
|
|
697
|
+
xmlDocPtr
|
698
|
+
noko_xml_document_unwrap(VALUE rb_document)
|
699
|
+
{
|
700
|
+
xmlDocPtr c_document;
|
701
|
+
TypedData_Get_Struct(rb_document, xmlDoc, &noko_xml_document_data_type, c_document);
|
702
|
+
return c_document;
|
703
|
+
}
|
704
|
+
|
705
|
+
/* Schema creation will remove and deallocate "blank" nodes.
|
706
|
+
* If those blank nodes have been exposed to Ruby, they could get freed
|
707
|
+
* out from under the VALUE pointer. This function checks to see if any of
|
708
|
+
* those nodes have been exposed to Ruby, and if so we should raise an exception.
|
709
|
+
*/
|
710
|
+
int
|
711
|
+
noko_xml_document_has_wrapped_blank_nodes_p(xmlDocPtr c_document)
|
712
|
+
{
|
713
|
+
VALUE cache = DOC_NODE_CACHE(c_document);
|
714
|
+
|
715
|
+
if (NIL_P(cache)) {
|
716
|
+
return 0;
|
717
|
+
}
|
718
|
+
|
719
|
+
for (long jnode = 0; jnode < RARRAY_LEN(cache); jnode++) {
|
720
|
+
xmlNodePtr node;
|
721
|
+
VALUE element = rb_ary_entry(cache, jnode);
|
722
|
+
|
723
|
+
Noko_Node_Get_Struct(element, xmlNode, node);
|
724
|
+
if (xmlIsBlankNode(node)) {
|
725
|
+
return 1;
|
726
|
+
}
|
727
|
+
}
|
728
|
+
|
729
|
+
return 0;
|
730
|
+
}
|
643
731
|
|
644
732
|
void
|
645
733
|
noko_xml_document_pin_node(xmlNodePtr node)
|
data/ext/nokogiri/xml_dtd.c
CHANGED
@@ -139,7 +139,7 @@ validate(VALUE self, VALUE document)
|
|
139
139
|
VALUE error_list;
|
140
140
|
|
141
141
|
Noko_Node_Get_Struct(self, xmlDtd, dtd);
|
142
|
-
|
142
|
+
doc = noko_xml_document_unwrap(document);
|
143
143
|
error_list = rb_ary_new();
|
144
144
|
|
145
145
|
ctxt = xmlNewValidCtxt();
|
@@ -2,17 +2,22 @@
|
|
2
2
|
|
3
3
|
VALUE cNokogiriXmlElementContent;
|
4
4
|
|
5
|
+
static const rb_data_type_t element_content_data_type = {
|
6
|
+
.wrap_struct_name = "Nokogiri::XML::ElementContent",
|
7
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
8
|
+
};
|
9
|
+
|
5
10
|
/*
|
6
11
|
* call-seq:
|
7
|
-
*
|
12
|
+
* name → String
|
8
13
|
*
|
9
|
-
*
|
14
|
+
* [Returns] The content element's +name+
|
10
15
|
*/
|
11
16
|
static VALUE
|
12
17
|
get_name(VALUE self)
|
13
18
|
{
|
14
19
|
xmlElementContentPtr elem;
|
15
|
-
|
20
|
+
TypedData_Get_Struct(self, xmlElementContent, &element_content_data_type, elem);
|
16
21
|
|
17
22
|
if (!elem->name) { return Qnil; }
|
18
23
|
return NOKOGIRI_STR_NEW2(elem->name);
|
@@ -20,47 +25,40 @@ get_name(VALUE self)
|
|
20
25
|
|
21
26
|
/*
|
22
27
|
* call-seq:
|
23
|
-
*
|
28
|
+
* type → Integer
|
24
29
|
*
|
25
|
-
*
|
26
|
-
* or OR.
|
30
|
+
* [Returns] The content element's +type+. Possible values are +PCDATA+, +ELEMENT+, +SEQ+, or +OR+.
|
27
31
|
*/
|
28
32
|
static VALUE
|
29
33
|
get_type(VALUE self)
|
30
34
|
{
|
31
35
|
xmlElementContentPtr elem;
|
32
|
-
|
36
|
+
TypedData_Get_Struct(self, xmlElementContent, &element_content_data_type, elem);
|
33
37
|
|
34
38
|
return INT2NUM(elem->type);
|
35
39
|
}
|
36
40
|
|
37
41
|
/*
|
38
|
-
* call-seq:
|
39
|
-
* c1
|
40
|
-
*
|
41
42
|
* Get the first child.
|
42
43
|
*/
|
43
44
|
static VALUE
|
44
45
|
get_c1(VALUE self)
|
45
46
|
{
|
46
47
|
xmlElementContentPtr elem;
|
47
|
-
|
48
|
+
TypedData_Get_Struct(self, xmlElementContent, &element_content_data_type, elem);
|
48
49
|
|
49
50
|
if (!elem->c1) { return Qnil; }
|
50
51
|
return noko_xml_element_content_wrap(rb_iv_get(self, "@document"), elem->c1);
|
51
52
|
}
|
52
53
|
|
53
54
|
/*
|
54
|
-
*
|
55
|
-
* c2
|
56
|
-
*
|
57
|
-
* Get the first child.
|
55
|
+
* Get the second child.
|
58
56
|
*/
|
59
57
|
static VALUE
|
60
58
|
get_c2(VALUE self)
|
61
59
|
{
|
62
60
|
xmlElementContentPtr elem;
|
63
|
-
|
61
|
+
TypedData_Get_Struct(self, xmlElementContent, &element_content_data_type, elem);
|
64
62
|
|
65
63
|
if (!elem->c2) { return Qnil; }
|
66
64
|
return noko_xml_element_content_wrap(rb_iv_get(self, "@document"), elem->c2);
|
@@ -68,45 +66,50 @@ get_c2(VALUE self)
|
|
68
66
|
|
69
67
|
/*
|
70
68
|
* call-seq:
|
71
|
-
*
|
69
|
+
* occur → Integer
|
72
70
|
*
|
73
|
-
*
|
74
|
-
* or PLUS.
|
71
|
+
* [Returns] The content element's +occur+ flag. Possible values are +ONCE+, +OPT+, +MULT+ or +PLUS+.
|
75
72
|
*/
|
76
73
|
static VALUE
|
77
74
|
get_occur(VALUE self)
|
78
75
|
{
|
79
76
|
xmlElementContentPtr elem;
|
80
|
-
|
77
|
+
TypedData_Get_Struct(self, xmlElementContent, &element_content_data_type, elem);
|
81
78
|
|
82
79
|
return INT2NUM(elem->ocur);
|
83
80
|
}
|
84
81
|
|
85
82
|
/*
|
86
83
|
* call-seq:
|
87
|
-
*
|
84
|
+
* prefix → String
|
88
85
|
*
|
89
|
-
*
|
86
|
+
* [Returns] The content element's namespace +prefix+.
|
90
87
|
*/
|
91
88
|
static VALUE
|
92
89
|
get_prefix(VALUE self)
|
93
90
|
{
|
94
91
|
xmlElementContentPtr elem;
|
95
|
-
|
92
|
+
TypedData_Get_Struct(self, xmlElementContent, &element_content_data_type, elem);
|
96
93
|
|
97
94
|
if (!elem->prefix) { return Qnil; }
|
98
95
|
|
99
96
|
return NOKOGIRI_STR_NEW2(elem->prefix);
|
100
97
|
}
|
101
98
|
|
99
|
+
/*
|
100
|
+
* create a Nokogiri::XML::ElementContent object around an +element+.
|
101
|
+
*/
|
102
102
|
VALUE
|
103
|
-
noko_xml_element_content_wrap(VALUE
|
103
|
+
noko_xml_element_content_wrap(VALUE rb_document, xmlElementContentPtr c_element_content)
|
104
104
|
{
|
105
|
-
VALUE elem =
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
105
|
+
VALUE elem = TypedData_Wrap_Struct(
|
106
|
+
cNokogiriXmlElementContent,
|
107
|
+
&element_content_data_type,
|
108
|
+
c_element_content
|
109
|
+
);
|
110
|
+
|
111
|
+
/* keep a handle on the document for GC marking */
|
112
|
+
rb_iv_set(elem, "@document", rb_document);
|
110
113
|
|
111
114
|
return elem;
|
112
115
|
}
|
@@ -6,7 +6,7 @@ static ID id_document;
|
|
6
6
|
|
7
7
|
/*
|
8
8
|
* call-seq:
|
9
|
-
*
|
9
|
+
* element_type → Integer
|
10
10
|
*
|
11
11
|
* The element_type
|
12
12
|
*/
|
@@ -20,9 +20,9 @@ element_type(VALUE self)
|
|
20
20
|
|
21
21
|
/*
|
22
22
|
* call-seq:
|
23
|
-
*
|
23
|
+
* content → Nokogiri::XML::ElementContent
|
24
24
|
*
|
25
|
-
* The
|
25
|
+
* [Returns] The root of this element declaration's content tree.
|
26
26
|
*/
|
27
27
|
static VALUE
|
28
28
|
content(VALUE self)
|
@@ -40,9 +40,9 @@ content(VALUE self)
|
|
40
40
|
|
41
41
|
/*
|
42
42
|
* call-seq:
|
43
|
-
*
|
43
|
+
* prefix → String
|
44
44
|
*
|
45
|
-
* The namespace prefix for this
|
45
|
+
* [Returns] The namespace +prefix+ for this element declaration.
|
46
46
|
*/
|
47
47
|
static VALUE
|
48
48
|
prefix(VALUE self)
|
@@ -2,14 +2,22 @@
|
|
2
2
|
|
3
3
|
VALUE cNokogiriEncodingHandler;
|
4
4
|
|
5
|
-
|
6
5
|
static void
|
7
|
-
|
6
|
+
xml_encoding_handler_dealloc(void *data)
|
8
7
|
{
|
9
8
|
/* make sure iconv handlers are cleaned up and freed */
|
9
|
+
xmlCharEncodingHandlerPtr c_handler = data;
|
10
10
|
xmlCharEncCloseFunc(c_handler);
|
11
11
|
}
|
12
12
|
|
13
|
+
static const rb_data_type_t xml_encoding_handler_type = {
|
14
|
+
.wrap_struct_name = "Nokogiri::EncodingHandler",
|
15
|
+
.function = {
|
16
|
+
.dfree = xml_encoding_handler_dealloc,
|
17
|
+
},
|
18
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
19
|
+
};
|
20
|
+
|
13
21
|
|
14
22
|
/*
|
15
23
|
* call-seq: Nokogiri::EncodingHandler.[](name)
|
@@ -23,7 +31,7 @@ rb_xml_encoding_handler_s_get(VALUE klass, VALUE key)
|
|
23
31
|
|
24
32
|
handler = xmlFindCharEncodingHandler(StringValueCStr(key));
|
25
33
|
if (handler) {
|
26
|
-
return
|
34
|
+
return TypedData_Wrap_Struct(klass, &xml_encoding_handler_type, handler);
|
27
35
|
}
|
28
36
|
|
29
37
|
return Qnil;
|
@@ -82,7 +90,7 @@ rb_xml_encoding_handler_name(VALUE self)
|
|
82
90
|
{
|
83
91
|
xmlCharEncodingHandlerPtr handler;
|
84
92
|
|
85
|
-
|
93
|
+
TypedData_Get_Struct(self, xmlCharEncodingHandler, &xml_encoding_handler_type, handler);
|
86
94
|
|
87
95
|
return NOKOGIRI_STR_NEW2(handler->name);
|
88
96
|
}
|