nokogiri 1.14.2 → 1.16.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +19 -15
- data/README.md +4 -1
- data/dependencies.yml +9 -8
- data/ext/nokogiri/extconf.rb +69 -26
- data/ext/nokogiri/html4_document.c +1 -2
- data/ext/nokogiri/html4_element_description.c +19 -14
- data/ext/nokogiri/html4_sax_parser_context.c +10 -16
- data/ext/nokogiri/html4_sax_push_parser.c +3 -3
- data/ext/nokogiri/nokogiri.c +46 -24
- data/ext/nokogiri/nokogiri.h +23 -5
- data/ext/nokogiri/test_global_handlers.c +1 -1
- data/ext/nokogiri/xml_attr.c +1 -1
- data/ext/nokogiri/xml_cdata.c +30 -17
- data/ext/nokogiri/xml_comment.c +1 -1
- data/ext/nokogiri/xml_document.c +113 -25
- data/ext/nokogiri/xml_document_fragment.c +1 -1
- data/ext/nokogiri/xml_dtd.c +1 -1
- data/ext/nokogiri/xml_element_content.c +32 -29
- data/ext/nokogiri/xml_element_decl.c +5 -5
- data/ext/nokogiri/xml_encoding_handler.c +12 -4
- data/ext/nokogiri/xml_entity_reference.c +1 -1
- data/ext/nokogiri/xml_namespace.c +11 -16
- data/ext/nokogiri/xml_node.c +13 -16
- data/ext/nokogiri/xml_node_set.c +125 -105
- data/ext/nokogiri/xml_processing_instruction.c +1 -1
- data/ext/nokogiri/xml_reader.c +61 -74
- data/ext/nokogiri/xml_relax_ng.c +66 -79
- data/ext/nokogiri/xml_sax_parser.c +24 -5
- data/ext/nokogiri/xml_sax_parser_context.c +50 -25
- data/ext/nokogiri/xml_sax_push_parser.c +30 -9
- data/ext/nokogiri/xml_schema.c +94 -115
- data/ext/nokogiri/xml_syntax_error.c +3 -3
- data/ext/nokogiri/xml_text.c +26 -13
- data/ext/nokogiri/xml_xpath_context.c +153 -83
- data/ext/nokogiri/xslt_stylesheet.c +111 -53
- data/gumbo-parser/Makefile +18 -0
- data/gumbo-parser/src/error.c +8 -4
- data/gumbo-parser/src/foreign_attrs.c +13 -14
- data/gumbo-parser/src/foreign_attrs.gperf +1 -1
- data/gumbo-parser/src/parser.c +21 -5
- data/gumbo-parser/src/tokenizer.c +1 -0
- data/lib/nokogiri/css/parser_extras.rb +1 -1
- data/lib/nokogiri/css/xpath_visitor.rb +3 -23
- data/lib/nokogiri/extension.rb +1 -1
- data/lib/nokogiri/html4/document.rb +1 -1
- data/lib/nokogiri/html4/document_fragment.rb +1 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +1821 -353
- data/lib/nokogiri/html4/encoding_reader.rb +1 -1
- data/lib/nokogiri/html5/document_fragment.rb +1 -1
- data/lib/nokogiri/html5/node.rb +5 -0
- data/lib/nokogiri/html5.rb +0 -63
- data/lib/nokogiri/jruby/nokogiri_jars.rb +9 -9
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +6 -5
- data/lib/nokogiri/xml/attr.rb +2 -2
- data/lib/nokogiri/xml/attribute_decl.rb +4 -2
- data/lib/nokogiri/xml/document.rb +4 -5
- data/lib/nokogiri/xml/document_fragment.rb +3 -3
- data/lib/nokogiri/xml/element_content.rb +10 -2
- data/lib/nokogiri/xml/element_decl.rb +4 -2
- data/lib/nokogiri/xml/entity_decl.rb +4 -2
- data/lib/nokogiri/xml/namespace.rb +1 -2
- data/lib/nokogiri/xml/node/save_options.rb +8 -0
- data/lib/nokogiri/xml/node.rb +53 -37
- data/lib/nokogiri/xml/node_set.rb +3 -3
- data/lib/nokogiri/xml/pp/node.rb +23 -12
- data/lib/nokogiri/xml/reader.rb +10 -9
- data/lib/nokogiri/xml/sax/document.rb +1 -1
- data/lib/nokogiri/xml/searchable.rb +21 -13
- data/lib/nokogiri/xml/syntax_error.rb +1 -1
- data/lib/nokogiri/xml.rb +1 -1
- data/lib/nokogiri/xslt/stylesheet.rb +29 -7
- data/lib/nokogiri/xslt.rb +74 -4
- data/lib/nokogiri.rb +13 -5
- data/lib/xsd/xmlparser/nokogiri.rb +1 -1
- data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
- data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
- data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
- data/ports/archives/libxml2-2.12.8.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
- metadata +16 -12
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +0 -3037
- data/ports/archives/libxml2-2.10.3.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.37.tar.xz +0 -0
data/ext/nokogiri/nokogiri.h
CHANGED
@@ -51,6 +51,7 @@
|
|
51
51
|
#include <libxslt/xsltconfig.h>
|
52
52
|
#include <libxslt/xsltutils.h>
|
53
53
|
#include <libxslt/transform.h>
|
54
|
+
#include <libxslt/imports.h>
|
54
55
|
#include <libxslt/xsltInternals.h>
|
55
56
|
|
56
57
|
#include <libexslt/exslt.h>
|
@@ -65,6 +66,13 @@ xmlNodePtr xmlLastElementChild(xmlNodePtr parent);
|
|
65
66
|
#define XMLNS_PREFIX "xmlns"
|
66
67
|
#define XMLNS_PREFIX_LEN 6 /* including either colon or \0 */
|
67
68
|
|
69
|
+
#ifndef xmlErrorConstPtr
|
70
|
+
# if LIBXML_VERSION >= 21200
|
71
|
+
# define xmlErrorConstPtr const xmlError *
|
72
|
+
# else
|
73
|
+
# define xmlErrorConstPtr xmlError *
|
74
|
+
# endif
|
75
|
+
#endif
|
68
76
|
|
69
77
|
#include <ruby.h>
|
70
78
|
#include <ruby/st.h>
|
@@ -168,6 +176,7 @@ typedef struct _nokogiriXsltStylesheetTuple {
|
|
168
176
|
|
169
177
|
void noko_xml_document_pin_node(xmlNodePtr);
|
170
178
|
void noko_xml_document_pin_namespace(xmlNsPtr, xmlDocPtr);
|
179
|
+
int noko_xml_document_has_wrapped_blank_nodes_p(xmlDocPtr c_document);
|
171
180
|
|
172
181
|
int noko_io_read(void *ctx, char *buffer, int len);
|
173
182
|
int noko_io_write(void *ctx, char *buffer, int len);
|
@@ -186,12 +195,21 @@ VALUE noko_xml_namespace_wrap_xpath_copy(xmlNsPtr node);
|
|
186
195
|
VALUE noko_xml_element_content_wrap(VALUE doc, xmlElementContentPtr element);
|
187
196
|
|
188
197
|
VALUE noko_xml_node_set_wrap(xmlNodeSetPtr node_set, VALUE document) ;
|
198
|
+
xmlNodeSetPtr noko_xml_node_set_unwrap(VALUE rb_node_set) ;
|
189
199
|
|
190
200
|
VALUE noko_xml_document_wrap_with_init_args(VALUE klass, xmlDocPtr doc, int argc, VALUE *argv);
|
191
201
|
VALUE noko_xml_document_wrap(VALUE klass, xmlDocPtr doc);
|
202
|
+
xmlDocPtr noko_xml_document_unwrap(VALUE rb_document);
|
192
203
|
NOKOPUBFUN VALUE Nokogiri_wrap_xml_document(VALUE klass,
|
193
204
|
xmlDocPtr doc); /* deprecated. use noko_xml_document_wrap() instead. */
|
194
205
|
|
206
|
+
xmlSAXHandlerPtr noko_sax_handler_unwrap(VALUE rb_sax_handler);
|
207
|
+
|
208
|
+
xmlParserCtxtPtr noko_xml_sax_push_parser_unwrap(VALUE rb_parser);
|
209
|
+
|
210
|
+
VALUE noko_xml_sax_parser_context_wrap(VALUE klass, xmlParserCtxtPtr c_context);
|
211
|
+
xmlParserCtxtPtr noko_xml_sax_parser_context_unwrap(VALUE rb_context);
|
212
|
+
|
195
213
|
#define DOC_RUBY_OBJECT_TEST(x) ((nokogiriTuplePtr)(x->_private))
|
196
214
|
#define DOC_RUBY_OBJECT(x) (((nokogiriTuplePtr)(x->_private))->doc)
|
197
215
|
#define DOC_UNLINKED_NODE_HASH(x) (((nokogiriTuplePtr)(x->_private))->unlinkedNodes)
|
@@ -207,18 +225,18 @@ NOKOPUBFUN VALUE Nokogiri_wrap_xml_document(VALUE klass,
|
|
207
225
|
#define DISCARD_CONST_QUAL_XMLCHAR(v) DISCARD_CONST_QUAL(xmlChar *, v)
|
208
226
|
|
209
227
|
#if HAVE_RB_CATEGORY_WARNING
|
210
|
-
# define NOKO_WARN_DEPRECATION(message) rb_category_warning(RB_WARN_CATEGORY_DEPRECATED, message)
|
228
|
+
# define NOKO_WARN_DEPRECATION(message...) rb_category_warning(RB_WARN_CATEGORY_DEPRECATED, message)
|
211
229
|
#else
|
212
|
-
# define NOKO_WARN_DEPRECATION(message) rb_warning(message)
|
230
|
+
# define NOKO_WARN_DEPRECATION(message...) rb_warning(message)
|
213
231
|
#endif
|
214
232
|
|
215
233
|
void Nokogiri_structured_error_func_save(libxmlStructuredErrorHandlerState *handler_state);
|
216
234
|
void Nokogiri_structured_error_func_save_and_set(libxmlStructuredErrorHandlerState *handler_state, void *user_data,
|
217
235
|
xmlStructuredErrorFunc handler);
|
218
236
|
void Nokogiri_structured_error_func_restore(libxmlStructuredErrorHandlerState *handler_state);
|
219
|
-
VALUE Nokogiri_wrap_xml_syntax_error(
|
220
|
-
void Nokogiri_error_array_pusher(void *ctx,
|
221
|
-
NORETURN_DECL void Nokogiri_error_raise(void *ctx,
|
237
|
+
VALUE Nokogiri_wrap_xml_syntax_error(xmlErrorConstPtr error);
|
238
|
+
void Nokogiri_error_array_pusher(void *ctx, xmlErrorConstPtr error);
|
239
|
+
NORETURN_DECL void Nokogiri_error_raise(void *ctx, xmlErrorConstPtr error);
|
222
240
|
void Nokogiri_marshal_xpath_funcall_and_return_values(xmlXPathParserContextPtr ctx, int nargs, VALUE handler,
|
223
241
|
const char *function_name) ;
|
224
242
|
|
@@ -3,7 +3,7 @@
|
|
3
3
|
static VALUE foreign_error_handler_block = Qnil;
|
4
4
|
|
5
5
|
static void
|
6
|
-
foreign_error_handler(void *user_data,
|
6
|
+
foreign_error_handler(void *user_data, xmlErrorConstPtr c_error)
|
7
7
|
{
|
8
8
|
rb_funcall(foreign_error_handler_block, rb_intern("call"), 0);
|
9
9
|
}
|
data/ext/nokogiri/xml_attr.c
CHANGED
@@ -68,7 +68,7 @@ new (int argc, VALUE *argv, VALUE klass)
|
|
68
68
|
rb_raise(rb_eArgError, "parameter must be a Nokogiri::XML::Document");
|
69
69
|
}
|
70
70
|
|
71
|
-
|
71
|
+
xml_doc = noko_xml_document_unwrap(document);
|
72
72
|
|
73
73
|
node = xmlNewDocProp(
|
74
74
|
xml_doc,
|
data/ext/nokogiri/xml_cdata.c
CHANGED
@@ -12,31 +12,44 @@ VALUE cNokogiriXmlCData;
|
|
12
12
|
* raise a TypeError exception.
|
13
13
|
*/
|
14
14
|
static VALUE
|
15
|
-
|
15
|
+
rb_xml_cdata_s_new(int argc, VALUE *argv, VALUE klass)
|
16
16
|
{
|
17
|
-
xmlDocPtr
|
18
|
-
xmlNodePtr
|
19
|
-
VALUE
|
20
|
-
VALUE
|
21
|
-
VALUE
|
17
|
+
xmlDocPtr c_document;
|
18
|
+
xmlNodePtr c_node;
|
19
|
+
VALUE rb_document;
|
20
|
+
VALUE rb_content;
|
21
|
+
VALUE rb_rest;
|
22
22
|
VALUE rb_node;
|
23
|
-
xmlChar *
|
24
|
-
int
|
23
|
+
xmlChar *c_content = NULL;
|
24
|
+
int c_content_len = 0;
|
25
25
|
|
26
|
-
rb_scan_args(argc, argv, "2*", &
|
26
|
+
rb_scan_args(argc, argv, "2*", &rb_document, &rb_content, &rb_rest);
|
27
27
|
|
28
|
-
|
28
|
+
if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlNode)) {
|
29
|
+
rb_raise(rb_eTypeError,
|
30
|
+
"expected first parameter to be a Nokogiri::XML::Document, received %"PRIsVALUE,
|
31
|
+
rb_obj_class(rb_document));
|
32
|
+
}
|
33
|
+
|
34
|
+
if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlDocument)) {
|
35
|
+
xmlNodePtr deprecated_node_type_arg;
|
36
|
+
NOKO_WARN_DEPRECATION("Passing a Node as the first parameter to CDATA.new is deprecated. Please pass a Document instead. This will become an error in Nokogiri v1.17.0."); // TODO: deprecated in v1.15.3, remove in v1.17.0
|
37
|
+
Noko_Node_Get_Struct(rb_document, xmlNode, deprecated_node_type_arg);
|
38
|
+
c_document = deprecated_node_type_arg->doc;
|
39
|
+
} else {
|
40
|
+
c_document = noko_xml_document_unwrap(rb_document);
|
41
|
+
}
|
29
42
|
|
30
|
-
if (!NIL_P(
|
31
|
-
|
32
|
-
|
43
|
+
if (!NIL_P(rb_content)) {
|
44
|
+
c_content = (xmlChar *)StringValuePtr(rb_content);
|
45
|
+
c_content_len = RSTRING_LENINT(rb_content);
|
33
46
|
}
|
34
47
|
|
35
|
-
|
48
|
+
c_node = xmlNewCDataBlock(c_document, c_content, c_content_len);
|
36
49
|
|
37
|
-
noko_xml_document_pin_node(
|
50
|
+
noko_xml_document_pin_node(c_node);
|
38
51
|
|
39
|
-
rb_node = noko_xml_node_wrap(klass,
|
52
|
+
rb_node = noko_xml_node_wrap(klass, c_node);
|
40
53
|
rb_obj_call_init(rb_node, argc, argv);
|
41
54
|
|
42
55
|
if (rb_block_given_p()) { rb_yield(rb_node); }
|
@@ -53,5 +66,5 @@ noko_init_xml_cdata(void)
|
|
53
66
|
*/
|
54
67
|
cNokogiriXmlCData = rb_define_class_under(mNokogiriXml, "CDATA", cNokogiriXmlText);
|
55
68
|
|
56
|
-
rb_define_singleton_method(cNokogiriXmlCData, "new",
|
69
|
+
rb_define_singleton_method(cNokogiriXmlCData, "new", rb_xml_cdata_s_new, -1);
|
57
70
|
}
|
data/ext/nokogiri/xml_comment.c
CHANGED
@@ -30,7 +30,7 @@ new (int argc, VALUE *argv, VALUE klass)
|
|
30
30
|
rb_raise(rb_eArgError, "first argument must be a XML::Document or XML::Node");
|
31
31
|
}
|
32
32
|
|
33
|
-
|
33
|
+
xml_doc = noko_xml_document_unwrap(document);
|
34
34
|
|
35
35
|
node = xmlNewDocComment(
|
36
36
|
xml_doc,
|
data/ext/nokogiri/xml_document.c
CHANGED
@@ -51,8 +51,9 @@ remove_private(xmlNodePtr node)
|
|
51
51
|
}
|
52
52
|
|
53
53
|
static void
|
54
|
-
mark(
|
54
|
+
mark(void *data)
|
55
55
|
{
|
56
|
+
xmlDocPtr doc = (xmlDocPtr)data;
|
56
57
|
nokogiriTuplePtr tuple = (nokogiriTuplePtr)doc->_private;
|
57
58
|
if (tuple) {
|
58
59
|
rb_gc_mark(tuple->doc);
|
@@ -61,8 +62,9 @@ mark(xmlDocPtr doc)
|
|
61
62
|
}
|
62
63
|
|
63
64
|
static void
|
64
|
-
dealloc(
|
65
|
+
dealloc(void *data)
|
65
66
|
{
|
67
|
+
xmlDocPtr doc = (xmlDocPtr)data;
|
66
68
|
st_table *node_hash;
|
67
69
|
|
68
70
|
node_hash = DOC_UNLINKED_NODE_HASH(doc);
|
@@ -72,18 +74,75 @@ dealloc(xmlDocPtr doc)
|
|
72
74
|
|
73
75
|
ruby_xfree(doc->_private);
|
74
76
|
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
77
|
+
#if defined(__GNUC__) && __GNUC__ >= 5
|
78
|
+
#pragma GCC diagnostic push
|
79
|
+
#pragma GCC diagnostic ignored "-Wdeprecated-declarations" // xmlDeregisterNodeDefault is deprecated as of libxml2 2.11.0
|
80
|
+
#endif
|
81
|
+
/*
|
82
|
+
* libxml-ruby < 3.0.0 uses xmlDeregisterNodeDefault. If the user is using one of those older
|
83
|
+
* versions, the registered callback from libxml-ruby will access the _private pointers set by
|
84
|
+
* nokogiri, which will result in segfaults.
|
85
|
+
*
|
86
|
+
* To avoid this, we need to clear the _private pointers from all nodes in this document tree
|
87
|
+
* before that callback gets invoked.
|
88
|
+
*
|
89
|
+
* libxml-ruby 3.0.0 was released in 2017-02, so at some point we can probably safely remove this
|
90
|
+
* safeguard (though probably pairing with a runtime check on the libxml-ruby version).
|
79
91
|
*/
|
80
92
|
if (xmlDeregisterNodeDefaultValue) {
|
81
93
|
remove_private((xmlNodePtr)doc);
|
82
94
|
}
|
95
|
+
#if defined(__GNUC__) && __GNUC__ >= 5
|
96
|
+
#pragma GCC diagnostic pop
|
97
|
+
#endif
|
83
98
|
|
84
99
|
xmlFreeDoc(doc);
|
85
100
|
}
|
86
101
|
|
102
|
+
static size_t
|
103
|
+
memsize_node(const xmlNodePtr node)
|
104
|
+
{
|
105
|
+
/* note we don't count namespace definitions, just going for a good-enough number here */
|
106
|
+
xmlNodePtr child;
|
107
|
+
xmlAttrPtr property;
|
108
|
+
size_t memsize = 0;
|
109
|
+
|
110
|
+
memsize += xmlStrlen(node->name);
|
111
|
+
|
112
|
+
if (node->type == XML_ELEMENT_NODE) {
|
113
|
+
for (property = node->properties; property; property = property->next) {
|
114
|
+
memsize += sizeof(xmlAttr) + memsize_node((xmlNodePtr)property);
|
115
|
+
}
|
116
|
+
}
|
117
|
+
if (node->type == XML_TEXT_NODE) {
|
118
|
+
memsize += xmlStrlen(node->content);
|
119
|
+
}
|
120
|
+
for (child = node->children; child; child = child->next) {
|
121
|
+
memsize += sizeof(xmlNode) + memsize_node(child);
|
122
|
+
}
|
123
|
+
return memsize;
|
124
|
+
}
|
125
|
+
|
126
|
+
static size_t
|
127
|
+
memsize(const void *data)
|
128
|
+
{
|
129
|
+
xmlDocPtr doc = (const xmlDocPtr)data;
|
130
|
+
size_t memsize = sizeof(xmlDoc);
|
131
|
+
/* This may not account for all memory use */
|
132
|
+
memsize += memsize_node((xmlNodePtr)doc);
|
133
|
+
return memsize;
|
134
|
+
}
|
135
|
+
|
136
|
+
static const rb_data_type_t noko_xml_document_data_type = {
|
137
|
+
.wrap_struct_name = "Nokogiri::XML::Document",
|
138
|
+
.function = {
|
139
|
+
.dmark = mark,
|
140
|
+
.dfree = dealloc,
|
141
|
+
.dsize = memsize,
|
142
|
+
},
|
143
|
+
// .flags = RUBY_TYPED_FREE_IMMEDIATELY, // TODO see https://github.com/sparklemotion/nokogiri/issues/2822
|
144
|
+
};
|
145
|
+
|
87
146
|
static void
|
88
147
|
recursively_remove_namespaces_from_node(xmlNodePtr node)
|
89
148
|
{
|
@@ -126,8 +185,7 @@ recursively_remove_namespaces_from_node(xmlNodePtr node)
|
|
126
185
|
static VALUE
|
127
186
|
url(VALUE self)
|
128
187
|
{
|
129
|
-
xmlDocPtr doc;
|
130
|
-
Data_Get_Struct(self, xmlDoc, doc);
|
188
|
+
xmlDocPtr doc = noko_xml_document_unwrap(self);
|
131
189
|
|
132
190
|
if (doc->URL) { return NOKOGIRI_STR_NEW2(doc->URL); }
|
133
191
|
|
@@ -146,7 +204,7 @@ rb_xml_document_root_set(VALUE self, VALUE rb_new_root)
|
|
146
204
|
xmlDocPtr c_document;
|
147
205
|
xmlNodePtr c_new_root = NULL, c_current_root;
|
148
206
|
|
149
|
-
|
207
|
+
c_document = noko_xml_document_unwrap(self);
|
150
208
|
|
151
209
|
c_current_root = xmlDocGetRootElement(c_document);
|
152
210
|
if (c_current_root) {
|
@@ -190,7 +248,7 @@ rb_xml_document_root(VALUE self)
|
|
190
248
|
xmlDocPtr c_document;
|
191
249
|
xmlNodePtr c_root;
|
192
250
|
|
193
|
-
|
251
|
+
c_document = noko_xml_document_unwrap(self);
|
194
252
|
|
195
253
|
c_root = xmlDocGetRootElement(c_document);
|
196
254
|
if (!c_root) {
|
@@ -209,8 +267,7 @@ rb_xml_document_root(VALUE self)
|
|
209
267
|
static VALUE
|
210
268
|
set_encoding(VALUE self, VALUE encoding)
|
211
269
|
{
|
212
|
-
xmlDocPtr doc;
|
213
|
-
Data_Get_Struct(self, xmlDoc, doc);
|
270
|
+
xmlDocPtr doc = noko_xml_document_unwrap(self);
|
214
271
|
|
215
272
|
if (doc->encoding) {
|
216
273
|
xmlFree(DISCARD_CONST_QUAL_XMLCHAR(doc->encoding));
|
@@ -230,8 +287,7 @@ set_encoding(VALUE self, VALUE encoding)
|
|
230
287
|
static VALUE
|
231
288
|
encoding(VALUE self)
|
232
289
|
{
|
233
|
-
xmlDocPtr doc;
|
234
|
-
Data_Get_Struct(self, xmlDoc, doc);
|
290
|
+
xmlDocPtr doc = noko_xml_document_unwrap(self);
|
235
291
|
|
236
292
|
if (!doc->encoding) { return Qnil; }
|
237
293
|
return NOKOGIRI_STR_NEW2(doc->encoding);
|
@@ -246,8 +302,7 @@ encoding(VALUE self)
|
|
246
302
|
static VALUE
|
247
303
|
version(VALUE self)
|
248
304
|
{
|
249
|
-
xmlDocPtr doc;
|
250
|
-
Data_Get_Struct(self, xmlDoc, doc);
|
305
|
+
xmlDocPtr doc = noko_xml_document_unwrap(self);
|
251
306
|
|
252
307
|
if (!doc->version) { return Qnil; }
|
253
308
|
return NOKOGIRI_STR_NEW2(doc->version);
|
@@ -286,7 +341,7 @@ read_io(VALUE klass,
|
|
286
341
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
287
342
|
|
288
343
|
if (doc == NULL) {
|
289
|
-
|
344
|
+
xmlErrorConstPtr error;
|
290
345
|
|
291
346
|
xmlFreeDoc(doc);
|
292
347
|
|
@@ -332,7 +387,7 @@ read_memory(VALUE klass,
|
|
332
387
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
333
388
|
|
334
389
|
if (doc == NULL) {
|
335
|
-
|
390
|
+
xmlErrorConstPtr error;
|
336
391
|
|
337
392
|
xmlFreeDoc(doc);
|
338
393
|
|
@@ -369,7 +424,7 @@ duplicate_document(int argc, VALUE *argv, VALUE self)
|
|
369
424
|
level = INT2NUM((long)1);
|
370
425
|
}
|
371
426
|
|
372
|
-
|
427
|
+
doc = noko_xml_document_unwrap(self);
|
373
428
|
|
374
429
|
dup = xmlCopyDoc(doc, (int)NUM2INT(level));
|
375
430
|
|
@@ -442,8 +497,7 @@ new (int argc, VALUE *argv, VALUE klass)
|
|
442
497
|
static VALUE
|
443
498
|
remove_namespaces_bang(VALUE self)
|
444
499
|
{
|
445
|
-
xmlDocPtr doc ;
|
446
|
-
Data_Get_Struct(self, xmlDoc, doc);
|
500
|
+
xmlDocPtr doc = noko_xml_document_unwrap(self);
|
447
501
|
|
448
502
|
recursively_remove_namespaces_from_node((xmlNodePtr)doc);
|
449
503
|
return self;
|
@@ -471,7 +525,7 @@ create_entity(int argc, VALUE *argv, VALUE self)
|
|
471
525
|
xmlEntityPtr ptr;
|
472
526
|
xmlDocPtr doc ;
|
473
527
|
|
474
|
-
|
528
|
+
doc = noko_xml_document_unwrap(self);
|
475
529
|
|
476
530
|
rb_scan_args(argc, argv, "14", &name, &type, &external_id, &system_id,
|
477
531
|
&content);
|
@@ -487,7 +541,7 @@ create_entity(int argc, VALUE *argv, VALUE self)
|
|
487
541
|
);
|
488
542
|
|
489
543
|
if (NULL == ptr) {
|
490
|
-
|
544
|
+
xmlErrorConstPtr error = xmlGetLastError();
|
491
545
|
if (error) {
|
492
546
|
rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
|
493
547
|
} else {
|
@@ -559,7 +613,7 @@ rb_xml_document_canonicalize(int argc, VALUE *argv, VALUE self)
|
|
559
613
|
}
|
560
614
|
}
|
561
615
|
|
562
|
-
|
616
|
+
c_doc = noko_xml_document_unwrap(self);
|
563
617
|
|
564
618
|
rb_cStringIO = rb_const_get_at(rb_cObject, rb_intern("StringIO"));
|
565
619
|
rb_io = rb_class_new_instance(0, 0, rb_cStringIO);
|
@@ -607,7 +661,7 @@ noko_xml_document_wrap_with_init_args(VALUE klass, xmlDocPtr c_document, int arg
|
|
607
661
|
klass = cNokogiriXmlDocument;
|
608
662
|
}
|
609
663
|
|
610
|
-
rb_document =
|
664
|
+
rb_document = TypedData_Wrap_Struct(klass, &noko_xml_document_data_type, c_document);
|
611
665
|
|
612
666
|
tuple = (nokogiriTuplePtr)ruby_xmalloc(sizeof(nokogiriTuple));
|
613
667
|
tuple->doc = rb_document;
|
@@ -640,6 +694,40 @@ noko_xml_document_wrap(VALUE klass, xmlDocPtr doc)
|
|
640
694
|
return noko_xml_document_wrap_with_init_args(klass, doc, 0, NULL);
|
641
695
|
}
|
642
696
|
|
697
|
+
xmlDocPtr
|
698
|
+
noko_xml_document_unwrap(VALUE rb_document)
|
699
|
+
{
|
700
|
+
xmlDocPtr c_document;
|
701
|
+
TypedData_Get_Struct(rb_document, xmlDoc, &noko_xml_document_data_type, c_document);
|
702
|
+
return c_document;
|
703
|
+
}
|
704
|
+
|
705
|
+
/* Schema creation will remove and deallocate "blank" nodes.
|
706
|
+
* If those blank nodes have been exposed to Ruby, they could get freed
|
707
|
+
* out from under the VALUE pointer. This function checks to see if any of
|
708
|
+
* those nodes have been exposed to Ruby, and if so we should raise an exception.
|
709
|
+
*/
|
710
|
+
int
|
711
|
+
noko_xml_document_has_wrapped_blank_nodes_p(xmlDocPtr c_document)
|
712
|
+
{
|
713
|
+
VALUE cache = DOC_NODE_CACHE(c_document);
|
714
|
+
|
715
|
+
if (NIL_P(cache)) {
|
716
|
+
return 0;
|
717
|
+
}
|
718
|
+
|
719
|
+
for (long jnode = 0; jnode < RARRAY_LEN(cache); jnode++) {
|
720
|
+
xmlNodePtr node;
|
721
|
+
VALUE element = rb_ary_entry(cache, jnode);
|
722
|
+
|
723
|
+
Noko_Node_Get_Struct(element, xmlNode, node);
|
724
|
+
if (xmlIsBlankNode(node)) {
|
725
|
+
return 1;
|
726
|
+
}
|
727
|
+
}
|
728
|
+
|
729
|
+
return 0;
|
730
|
+
}
|
643
731
|
|
644
732
|
void
|
645
733
|
noko_xml_document_pin_node(xmlNodePtr node)
|
data/ext/nokogiri/xml_dtd.c
CHANGED
@@ -139,7 +139,7 @@ validate(VALUE self, VALUE document)
|
|
139
139
|
VALUE error_list;
|
140
140
|
|
141
141
|
Noko_Node_Get_Struct(self, xmlDtd, dtd);
|
142
|
-
|
142
|
+
doc = noko_xml_document_unwrap(document);
|
143
143
|
error_list = rb_ary_new();
|
144
144
|
|
145
145
|
ctxt = xmlNewValidCtxt();
|
@@ -2,17 +2,22 @@
|
|
2
2
|
|
3
3
|
VALUE cNokogiriXmlElementContent;
|
4
4
|
|
5
|
+
static const rb_data_type_t element_content_data_type = {
|
6
|
+
.wrap_struct_name = "Nokogiri::XML::ElementContent",
|
7
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
8
|
+
};
|
9
|
+
|
5
10
|
/*
|
6
11
|
* call-seq:
|
7
|
-
*
|
12
|
+
* name → String
|
8
13
|
*
|
9
|
-
*
|
14
|
+
* [Returns] The content element's +name+
|
10
15
|
*/
|
11
16
|
static VALUE
|
12
17
|
get_name(VALUE self)
|
13
18
|
{
|
14
19
|
xmlElementContentPtr elem;
|
15
|
-
|
20
|
+
TypedData_Get_Struct(self, xmlElementContent, &element_content_data_type, elem);
|
16
21
|
|
17
22
|
if (!elem->name) { return Qnil; }
|
18
23
|
return NOKOGIRI_STR_NEW2(elem->name);
|
@@ -20,47 +25,40 @@ get_name(VALUE self)
|
|
20
25
|
|
21
26
|
/*
|
22
27
|
* call-seq:
|
23
|
-
*
|
28
|
+
* type → Integer
|
24
29
|
*
|
25
|
-
*
|
26
|
-
* or OR.
|
30
|
+
* [Returns] The content element's +type+. Possible values are +PCDATA+, +ELEMENT+, +SEQ+, or +OR+.
|
27
31
|
*/
|
28
32
|
static VALUE
|
29
33
|
get_type(VALUE self)
|
30
34
|
{
|
31
35
|
xmlElementContentPtr elem;
|
32
|
-
|
36
|
+
TypedData_Get_Struct(self, xmlElementContent, &element_content_data_type, elem);
|
33
37
|
|
34
38
|
return INT2NUM(elem->type);
|
35
39
|
}
|
36
40
|
|
37
41
|
/*
|
38
|
-
* call-seq:
|
39
|
-
* c1
|
40
|
-
*
|
41
42
|
* Get the first child.
|
42
43
|
*/
|
43
44
|
static VALUE
|
44
45
|
get_c1(VALUE self)
|
45
46
|
{
|
46
47
|
xmlElementContentPtr elem;
|
47
|
-
|
48
|
+
TypedData_Get_Struct(self, xmlElementContent, &element_content_data_type, elem);
|
48
49
|
|
49
50
|
if (!elem->c1) { return Qnil; }
|
50
51
|
return noko_xml_element_content_wrap(rb_iv_get(self, "@document"), elem->c1);
|
51
52
|
}
|
52
53
|
|
53
54
|
/*
|
54
|
-
*
|
55
|
-
* c2
|
56
|
-
*
|
57
|
-
* Get the first child.
|
55
|
+
* Get the second child.
|
58
56
|
*/
|
59
57
|
static VALUE
|
60
58
|
get_c2(VALUE self)
|
61
59
|
{
|
62
60
|
xmlElementContentPtr elem;
|
63
|
-
|
61
|
+
TypedData_Get_Struct(self, xmlElementContent, &element_content_data_type, elem);
|
64
62
|
|
65
63
|
if (!elem->c2) { return Qnil; }
|
66
64
|
return noko_xml_element_content_wrap(rb_iv_get(self, "@document"), elem->c2);
|
@@ -68,45 +66,50 @@ get_c2(VALUE self)
|
|
68
66
|
|
69
67
|
/*
|
70
68
|
* call-seq:
|
71
|
-
*
|
69
|
+
* occur → Integer
|
72
70
|
*
|
73
|
-
*
|
74
|
-
* or PLUS.
|
71
|
+
* [Returns] The content element's +occur+ flag. Possible values are +ONCE+, +OPT+, +MULT+ or +PLUS+.
|
75
72
|
*/
|
76
73
|
static VALUE
|
77
74
|
get_occur(VALUE self)
|
78
75
|
{
|
79
76
|
xmlElementContentPtr elem;
|
80
|
-
|
77
|
+
TypedData_Get_Struct(self, xmlElementContent, &element_content_data_type, elem);
|
81
78
|
|
82
79
|
return INT2NUM(elem->ocur);
|
83
80
|
}
|
84
81
|
|
85
82
|
/*
|
86
83
|
* call-seq:
|
87
|
-
*
|
84
|
+
* prefix → String
|
88
85
|
*
|
89
|
-
*
|
86
|
+
* [Returns] The content element's namespace +prefix+.
|
90
87
|
*/
|
91
88
|
static VALUE
|
92
89
|
get_prefix(VALUE self)
|
93
90
|
{
|
94
91
|
xmlElementContentPtr elem;
|
95
|
-
|
92
|
+
TypedData_Get_Struct(self, xmlElementContent, &element_content_data_type, elem);
|
96
93
|
|
97
94
|
if (!elem->prefix) { return Qnil; }
|
98
95
|
|
99
96
|
return NOKOGIRI_STR_NEW2(elem->prefix);
|
100
97
|
}
|
101
98
|
|
99
|
+
/*
|
100
|
+
* create a Nokogiri::XML::ElementContent object around an +element+.
|
101
|
+
*/
|
102
102
|
VALUE
|
103
|
-
noko_xml_element_content_wrap(VALUE
|
103
|
+
noko_xml_element_content_wrap(VALUE rb_document, xmlElementContentPtr c_element_content)
|
104
104
|
{
|
105
|
-
VALUE elem =
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
105
|
+
VALUE elem = TypedData_Wrap_Struct(
|
106
|
+
cNokogiriXmlElementContent,
|
107
|
+
&element_content_data_type,
|
108
|
+
c_element_content
|
109
|
+
);
|
110
|
+
|
111
|
+
/* keep a handle on the document for GC marking */
|
112
|
+
rb_iv_set(elem, "@document", rb_document);
|
110
113
|
|
111
114
|
return elem;
|
112
115
|
}
|
@@ -6,7 +6,7 @@ static ID id_document;
|
|
6
6
|
|
7
7
|
/*
|
8
8
|
* call-seq:
|
9
|
-
*
|
9
|
+
* element_type → Integer
|
10
10
|
*
|
11
11
|
* The element_type
|
12
12
|
*/
|
@@ -20,9 +20,9 @@ element_type(VALUE self)
|
|
20
20
|
|
21
21
|
/*
|
22
22
|
* call-seq:
|
23
|
-
*
|
23
|
+
* content → Nokogiri::XML::ElementContent
|
24
24
|
*
|
25
|
-
* The
|
25
|
+
* [Returns] The root of this element declaration's content tree.
|
26
26
|
*/
|
27
27
|
static VALUE
|
28
28
|
content(VALUE self)
|
@@ -40,9 +40,9 @@ content(VALUE self)
|
|
40
40
|
|
41
41
|
/*
|
42
42
|
* call-seq:
|
43
|
-
*
|
43
|
+
* prefix → String
|
44
44
|
*
|
45
|
-
* The namespace prefix for this
|
45
|
+
* [Returns] The namespace +prefix+ for this element declaration.
|
46
46
|
*/
|
47
47
|
static VALUE
|
48
48
|
prefix(VALUE self)
|
@@ -2,14 +2,22 @@
|
|
2
2
|
|
3
3
|
VALUE cNokogiriEncodingHandler;
|
4
4
|
|
5
|
-
|
6
5
|
static void
|
7
|
-
|
6
|
+
xml_encoding_handler_dealloc(void *data)
|
8
7
|
{
|
9
8
|
/* make sure iconv handlers are cleaned up and freed */
|
9
|
+
xmlCharEncodingHandlerPtr c_handler = data;
|
10
10
|
xmlCharEncCloseFunc(c_handler);
|
11
11
|
}
|
12
12
|
|
13
|
+
static const rb_data_type_t xml_encoding_handler_type = {
|
14
|
+
.wrap_struct_name = "Nokogiri::EncodingHandler",
|
15
|
+
.function = {
|
16
|
+
.dfree = xml_encoding_handler_dealloc,
|
17
|
+
},
|
18
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
19
|
+
};
|
20
|
+
|
13
21
|
|
14
22
|
/*
|
15
23
|
* call-seq: Nokogiri::EncodingHandler.[](name)
|
@@ -23,7 +31,7 @@ rb_xml_encoding_handler_s_get(VALUE klass, VALUE key)
|
|
23
31
|
|
24
32
|
handler = xmlFindCharEncodingHandler(StringValueCStr(key));
|
25
33
|
if (handler) {
|
26
|
-
return
|
34
|
+
return TypedData_Wrap_Struct(klass, &xml_encoding_handler_type, handler);
|
27
35
|
}
|
28
36
|
|
29
37
|
return Qnil;
|
@@ -82,7 +90,7 @@ rb_xml_encoding_handler_name(VALUE self)
|
|
82
90
|
{
|
83
91
|
xmlCharEncodingHandlerPtr handler;
|
84
92
|
|
85
|
-
|
93
|
+
TypedData_Get_Struct(self, xmlCharEncodingHandler, &xml_encoding_handler_type, handler);
|
86
94
|
|
87
95
|
return NOKOGIRI_STR_NEW2(handler->name);
|
88
96
|
}
|