nokogiri 1.11.1 → 1.11.2
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/LICENSE-DEPENDENCIES.md +12 -12
- data/LICENSE.md +1 -1
- data/README.md +20 -15
- data/ext/nokogiri/depend +34 -474
- data/ext/nokogiri/extconf.rb +253 -183
- data/ext/nokogiri/html_document.c +10 -15
- data/ext/nokogiri/html_element_description.c +84 -71
- data/ext/nokogiri/html_entity_lookup.c +21 -16
- data/ext/nokogiri/html_sax_parser_context.c +66 -65
- data/ext/nokogiri/html_sax_push_parser.c +29 -27
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +171 -63
- data/ext/nokogiri/nokogiri.h +158 -75
- data/ext/nokogiri/test_global_handlers.c +3 -4
- data/ext/nokogiri/xml_attr.c +15 -15
- data/ext/nokogiri/xml_attribute_decl.c +18 -18
- data/ext/nokogiri/xml_cdata.c +13 -18
- data/ext/nokogiri/xml_comment.c +19 -26
- data/ext/nokogiri/xml_document.c +221 -164
- data/ext/nokogiri/xml_document_fragment.c +13 -15
- data/ext/nokogiri/xml_dtd.c +54 -48
- data/ext/nokogiri/xml_element_content.c +30 -27
- data/ext/nokogiri/xml_element_decl.c +22 -22
- data/ext/nokogiri/xml_encoding_handler.c +17 -11
- data/ext/nokogiri/xml_entity_decl.c +32 -30
- data/ext/nokogiri/xml_entity_reference.c +16 -18
- data/ext/nokogiri/xml_namespace.c +56 -49
- data/ext/nokogiri/xml_node.c +338 -286
- data/ext/nokogiri/xml_node_set.c +168 -156
- data/ext/nokogiri/xml_processing_instruction.c +17 -19
- data/ext/nokogiri/xml_reader.c +191 -157
- data/ext/nokogiri/xml_relax_ng.c +29 -23
- data/ext/nokogiri/xml_sax_parser.c +117 -112
- data/ext/nokogiri/xml_sax_parser_context.c +100 -85
- data/ext/nokogiri/xml_sax_push_parser.c +34 -27
- data/ext/nokogiri/xml_schema.c +48 -42
- data/ext/nokogiri/xml_syntax_error.c +21 -23
- data/ext/nokogiri/xml_text.c +13 -17
- data/ext/nokogiri/xml_xpath_context.c +134 -127
- data/ext/nokogiri/xslt_stylesheet.c +157 -157
- data/lib/nokogiri.rb +1 -22
- data/lib/nokogiri/css/parser.rb +1 -1
- data/lib/nokogiri/extension.rb +26 -0
- data/lib/nokogiri/html/document_fragment.rb +15 -15
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +31 -8
- data/lib/nokogiri/xml/document.rb +31 -11
- data/lib/nokogiri/xml/node.rb +38 -42
- data/lib/nokogiri/xml/reader.rb +2 -9
- data/lib/nokogiri/xml/xpath.rb +1 -3
- data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -1
- data/patches/libxml2/0010-parser.c-shrink-the-input-buffer-when-appropriate.patch +70 -0
- metadata +8 -41
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -63
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -14
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -12
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -25
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
data/ext/nokogiri/xml_comment.c
CHANGED
@@ -1,4 +1,6 @@
|
|
1
|
-
#include <
|
1
|
+
#include <nokogiri.h>
|
2
|
+
|
3
|
+
VALUE cNokogiriXmlComment;
|
2
4
|
|
3
5
|
static ID document_id ;
|
4
6
|
|
@@ -9,7 +11,8 @@ static ID document_id ;
|
|
9
11
|
* Create a new Comment element on the +document+ with +content+.
|
10
12
|
* Alternatively, if a +node+ is passed, the +node+'s document is used.
|
11
13
|
*/
|
12
|
-
static VALUE
|
14
|
+
static VALUE
|
15
|
+
new (int argc, VALUE *argv, VALUE klass)
|
13
16
|
{
|
14
17
|
xmlDocPtr xml_doc;
|
15
18
|
xmlNodePtr node;
|
@@ -20,50 +23,40 @@ static VALUE new(int argc, VALUE *argv, VALUE klass)
|
|
20
23
|
|
21
24
|
rb_scan_args(argc, argv, "2*", &document, &content, &rest);
|
22
25
|
|
23
|
-
if (rb_obj_is_kind_of(document, cNokogiriXmlNode))
|
24
|
-
{
|
26
|
+
if (rb_obj_is_kind_of(document, cNokogiriXmlNode)) {
|
25
27
|
document = rb_funcall(document, document_id, 0);
|
26
|
-
}
|
27
|
-
|
28
|
-
&& !rb_obj_is_kind_of(document, cNokogiriXmlDocumentFragment))
|
29
|
-
{
|
28
|
+
} else if (!rb_obj_is_kind_of(document, cNokogiriXmlDocument)
|
29
|
+
&& !rb_obj_is_kind_of(document, cNokogiriXmlDocumentFragment)) {
|
30
30
|
rb_raise(rb_eArgError, "first argument must be a XML::Document or XML::Node");
|
31
31
|
}
|
32
32
|
|
33
33
|
Data_Get_Struct(document, xmlDoc, xml_doc);
|
34
34
|
|
35
35
|
node = xmlNewDocComment(
|
36
|
-
|
37
|
-
|
38
|
-
|
36
|
+
xml_doc,
|
37
|
+
(const xmlChar *)StringValueCStr(content)
|
38
|
+
);
|
39
39
|
|
40
|
-
rb_node =
|
40
|
+
rb_node = noko_xml_node_wrap(klass, node);
|
41
41
|
rb_obj_call_init(rb_node, argc, argv);
|
42
42
|
|
43
|
-
|
43
|
+
noko_xml_document_pin_node(node);
|
44
44
|
|
45
|
-
if(rb_block_given_p()) rb_yield(rb_node);
|
45
|
+
if (rb_block_given_p()) { rb_yield(rb_node); }
|
46
46
|
|
47
47
|
return rb_node;
|
48
48
|
}
|
49
49
|
|
50
|
-
|
51
|
-
|
50
|
+
void
|
51
|
+
noko_init_xml_comment()
|
52
52
|
{
|
53
|
-
|
54
|
-
VALUE xml = rb_define_module_under(nokogiri, "XML");
|
55
|
-
VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
|
56
|
-
VALUE char_data = rb_define_class_under(xml, "CharacterData", node);
|
57
|
-
|
53
|
+
assert(cNokogiriXmlCharacterData);
|
58
54
|
/*
|
59
55
|
* Comment represents a comment node in an xml document.
|
60
56
|
*/
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
cNokogiriXmlComment = klass;
|
57
|
+
cNokogiriXmlComment = rb_define_class_under(mNokogiriXml, "Comment", cNokogiriXmlCharacterData);
|
65
58
|
|
66
|
-
rb_define_singleton_method(
|
59
|
+
rb_define_singleton_method(cNokogiriXmlComment, "new", new, -1);
|
67
60
|
|
68
61
|
document_id = rb_intern("document");
|
69
62
|
}
|
data/ext/nokogiri/xml_document.c
CHANGED
@@ -1,8 +1,11 @@
|
|
1
|
-
#include <
|
1
|
+
#include <nokogiri.h>
|
2
2
|
|
3
|
-
|
3
|
+
VALUE cNokogiriXmlDocument ;
|
4
|
+
|
5
|
+
static int
|
6
|
+
dealloc_node_i2(xmlNodePtr key, xmlNodePtr node, xmlDocPtr doc)
|
4
7
|
{
|
5
|
-
switch(node->type) {
|
8
|
+
switch (node->type) {
|
6
9
|
case XML_ATTRIBUTE_NODE:
|
7
10
|
xmlFreePropList((xmlAttrPtr)node);
|
8
11
|
break;
|
@@ -13,46 +16,52 @@ static int dealloc_node_i2(xmlNodePtr key, xmlNodePtr node, xmlDocPtr doc)
|
|
13
16
|
xmlFreeDtd((xmlDtdPtr)node);
|
14
17
|
break;
|
15
18
|
default:
|
16
|
-
if(node->parent == NULL) {
|
19
|
+
if (node->parent == NULL) {
|
17
20
|
xmlAddChild((xmlNodePtr)doc, node);
|
18
21
|
}
|
19
22
|
}
|
20
23
|
return ST_CONTINUE;
|
21
24
|
}
|
22
25
|
|
23
|
-
static int
|
26
|
+
static int
|
27
|
+
dealloc_node_i(st_data_t key, st_data_t node, st_data_t doc)
|
24
28
|
{
|
25
29
|
return dealloc_node_i2((xmlNodePtr)key, (xmlNodePtr)node, (xmlDocPtr)doc);
|
26
30
|
}
|
27
31
|
|
28
|
-
static void
|
32
|
+
static void
|
33
|
+
remove_private(xmlNodePtr node)
|
29
34
|
{
|
30
35
|
xmlNodePtr child;
|
31
36
|
|
32
|
-
for (child = node->children; child; child = child->next)
|
37
|
+
for (child = node->children; child; child = child->next) {
|
33
38
|
remove_private(child);
|
39
|
+
}
|
34
40
|
|
35
41
|
if ((node->type == XML_ELEMENT_NODE ||
|
36
42
|
node->type == XML_XINCLUDE_START ||
|
37
43
|
node->type == XML_XINCLUDE_END) &&
|
38
44
|
node->properties) {
|
39
|
-
for (child = (xmlNodePtr)node->properties; child; child = child->next)
|
45
|
+
for (child = (xmlNodePtr)node->properties; child; child = child->next) {
|
40
46
|
remove_private(child);
|
47
|
+
}
|
41
48
|
}
|
42
49
|
|
43
50
|
node->_private = NULL;
|
44
51
|
}
|
45
52
|
|
46
|
-
static void
|
53
|
+
static void
|
54
|
+
mark(xmlDocPtr doc)
|
47
55
|
{
|
48
56
|
nokogiriTuplePtr tuple = (nokogiriTuplePtr)doc->_private;
|
49
|
-
if(tuple) {
|
50
|
-
|
51
|
-
|
57
|
+
if (tuple) {
|
58
|
+
rb_gc_mark(tuple->doc);
|
59
|
+
rb_gc_mark(tuple->node_cache);
|
52
60
|
}
|
53
61
|
}
|
54
62
|
|
55
|
-
static void
|
63
|
+
static void
|
64
|
+
dealloc(xmlDocPtr doc)
|
56
65
|
{
|
57
66
|
st_table *node_hash;
|
58
67
|
|
@@ -70,23 +79,26 @@ static void dealloc(xmlDocPtr doc)
|
|
70
79
|
* xmlDeregisterNode callback from accessing VALUE pointers from ruby's GC
|
71
80
|
* free context, which can result in segfaults.
|
72
81
|
*/
|
73
|
-
if (xmlDeregisterNodeDefaultValue)
|
82
|
+
if (xmlDeregisterNodeDefaultValue) {
|
74
83
|
remove_private((xmlNodePtr)doc);
|
84
|
+
}
|
75
85
|
|
76
86
|
xmlFreeDoc(doc);
|
77
87
|
|
78
88
|
NOKOGIRI_DEBUG_END(doc);
|
79
89
|
}
|
80
90
|
|
81
|
-
static void
|
91
|
+
static void
|
92
|
+
recursively_remove_namespaces_from_node(xmlNodePtr node)
|
82
93
|
{
|
83
94
|
xmlNodePtr child ;
|
84
95
|
xmlAttrPtr property ;
|
85
96
|
|
86
97
|
xmlSetNs(node, NULL);
|
87
98
|
|
88
|
-
for (child = node->children ; child ; child = child->next)
|
99
|
+
for (child = node->children ; child ; child = child->next) {
|
89
100
|
recursively_remove_namespaces_from_node(child);
|
101
|
+
}
|
90
102
|
|
91
103
|
if (((node->type == XML_ELEMENT_NODE) ||
|
92
104
|
(node->type == XML_XINCLUDE_START) ||
|
@@ -99,7 +111,7 @@ static void recursively_remove_namespaces_from_node(xmlNodePtr node)
|
|
99
111
|
if (node->type == XML_ELEMENT_NODE && node->properties != NULL) {
|
100
112
|
property = node->properties ;
|
101
113
|
while (property != NULL) {
|
102
|
-
if (property->ns) property->ns = NULL ;
|
114
|
+
if (property->ns) { property->ns = NULL ; }
|
103
115
|
property = property->next ;
|
104
116
|
}
|
105
117
|
}
|
@@ -111,12 +123,13 @@ static void recursively_remove_namespaces_from_node(xmlNodePtr node)
|
|
111
123
|
*
|
112
124
|
* Get the url name for this document.
|
113
125
|
*/
|
114
|
-
static VALUE
|
126
|
+
static VALUE
|
127
|
+
url(VALUE self)
|
115
128
|
{
|
116
129
|
xmlDocPtr doc;
|
117
130
|
Data_Get_Struct(self, xmlDoc, doc);
|
118
131
|
|
119
|
-
if(doc->URL) return NOKOGIRI_STR_NEW2(doc->URL);
|
132
|
+
if (doc->URL) { return NOKOGIRI_STR_NEW2(doc->URL); }
|
120
133
|
|
121
134
|
return Qnil;
|
122
135
|
}
|
@@ -127,7 +140,8 @@ static VALUE url(VALUE self)
|
|
127
140
|
*
|
128
141
|
* Set the root element on this document
|
129
142
|
*/
|
130
|
-
static VALUE
|
143
|
+
static VALUE
|
144
|
+
set_root(VALUE self, VALUE root)
|
131
145
|
{
|
132
146
|
xmlDocPtr doc;
|
133
147
|
xmlNodePtr new_root;
|
@@ -137,12 +151,12 @@ static VALUE set_root(VALUE self, VALUE root)
|
|
137
151
|
|
138
152
|
old_root = NULL;
|
139
153
|
|
140
|
-
if(NIL_P(root)) {
|
154
|
+
if (NIL_P(root)) {
|
141
155
|
old_root = xmlDocGetRootElement(doc);
|
142
156
|
|
143
|
-
if(old_root) {
|
157
|
+
if (old_root) {
|
144
158
|
xmlUnlinkNode(old_root);
|
145
|
-
|
159
|
+
noko_xml_document_pin_node(old_root);
|
146
160
|
}
|
147
161
|
|
148
162
|
return root;
|
@@ -153,7 +167,7 @@ static VALUE set_root(VALUE self, VALUE root)
|
|
153
167
|
|
154
168
|
/* If the new root's document is not the same as the current document,
|
155
169
|
* then we need to dup the node in to this document. */
|
156
|
-
if(new_root->doc != doc) {
|
170
|
+
if (new_root->doc != doc) {
|
157
171
|
old_root = xmlDocGetRootElement(doc);
|
158
172
|
if (!(new_root = xmlDocCopyNode(new_root, doc, 1))) {
|
159
173
|
rb_raise(rb_eRuntimeError, "Could not reparent node (xmlDocCopyNode)");
|
@@ -161,7 +175,7 @@ static VALUE set_root(VALUE self, VALUE root)
|
|
161
175
|
}
|
162
176
|
|
163
177
|
xmlDocSetRootElement(doc, new_root);
|
164
|
-
if(old_root)
|
178
|
+
if (old_root) { noko_xml_document_pin_node(old_root); }
|
165
179
|
return root;
|
166
180
|
}
|
167
181
|
|
@@ -171,7 +185,8 @@ static VALUE set_root(VALUE self, VALUE root)
|
|
171
185
|
*
|
172
186
|
* Get the root node for this document.
|
173
187
|
*/
|
174
|
-
static VALUE
|
188
|
+
static VALUE
|
189
|
+
root(VALUE self)
|
175
190
|
{
|
176
191
|
xmlDocPtr doc;
|
177
192
|
xmlNodePtr root;
|
@@ -180,8 +195,8 @@ static VALUE root(VALUE self)
|
|
180
195
|
|
181
196
|
root = xmlDocGetRootElement(doc);
|
182
197
|
|
183
|
-
if(!root) return Qnil;
|
184
|
-
return
|
198
|
+
if (!root) { return Qnil; }
|
199
|
+
return noko_xml_node_wrap(Qnil, root) ;
|
185
200
|
}
|
186
201
|
|
187
202
|
/*
|
@@ -190,13 +205,15 @@ static VALUE root(VALUE self)
|
|
190
205
|
*
|
191
206
|
* Set the encoding string for this Document
|
192
207
|
*/
|
193
|
-
static VALUE
|
208
|
+
static VALUE
|
209
|
+
set_encoding(VALUE self, VALUE encoding)
|
194
210
|
{
|
195
211
|
xmlDocPtr doc;
|
196
212
|
Data_Get_Struct(self, xmlDoc, doc);
|
197
213
|
|
198
|
-
if (doc->encoding)
|
199
|
-
|
214
|
+
if (doc->encoding) {
|
215
|
+
free((char *)(uintptr_t) doc->encoding); /* avoid gcc cast warning */
|
216
|
+
}
|
200
217
|
|
201
218
|
doc->encoding = xmlStrdup((xmlChar *)StringValueCStr(encoding));
|
202
219
|
|
@@ -209,12 +226,13 @@ static VALUE set_encoding(VALUE self, VALUE encoding)
|
|
209
226
|
*
|
210
227
|
* Get the encoding for this Document
|
211
228
|
*/
|
212
|
-
static VALUE
|
229
|
+
static VALUE
|
230
|
+
encoding(VALUE self)
|
213
231
|
{
|
214
232
|
xmlDocPtr doc;
|
215
233
|
Data_Get_Struct(self, xmlDoc, doc);
|
216
234
|
|
217
|
-
if(!doc->encoding) return Qnil;
|
235
|
+
if (!doc->encoding) { return Qnil; }
|
218
236
|
return NOKOGIRI_STR_NEW2(doc->encoding);
|
219
237
|
}
|
220
238
|
|
@@ -224,12 +242,13 @@ static VALUE encoding(VALUE self)
|
|
224
242
|
*
|
225
243
|
* Get the XML version for this Document
|
226
244
|
*/
|
227
|
-
static VALUE
|
245
|
+
static VALUE
|
246
|
+
version(VALUE self)
|
228
247
|
{
|
229
248
|
xmlDocPtr doc;
|
230
249
|
Data_Get_Struct(self, xmlDoc, doc);
|
231
250
|
|
232
|
-
if(!doc->version) return Qnil;
|
251
|
+
if (!doc->version) { return Qnil; }
|
233
252
|
return NOKOGIRI_STR_NEW2(doc->version);
|
234
253
|
}
|
235
254
|
|
@@ -239,14 +258,15 @@ static VALUE version(VALUE self)
|
|
239
258
|
*
|
240
259
|
* Create a new document from an IO object
|
241
260
|
*/
|
242
|
-
static VALUE
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
261
|
+
static VALUE
|
262
|
+
read_io(VALUE klass,
|
263
|
+
VALUE io,
|
264
|
+
VALUE url,
|
265
|
+
VALUE encoding,
|
266
|
+
VALUE options)
|
247
267
|
{
|
248
|
-
const char *
|
249
|
-
const char *
|
268
|
+
const char *c_url = NIL_P(url) ? NULL : StringValueCStr(url);
|
269
|
+
const char *c_enc = NIL_P(encoding) ? NULL : StringValueCStr(encoding);
|
250
270
|
VALUE error_list = rb_ary_new();
|
251
271
|
VALUE document;
|
252
272
|
xmlDocPtr doc;
|
@@ -255,30 +275,31 @@ static VALUE read_io( VALUE klass,
|
|
255
275
|
xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
|
256
276
|
|
257
277
|
doc = xmlReadIO(
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
278
|
+
(xmlInputReadCallback)noko_io_read,
|
279
|
+
(xmlInputCloseCallback)noko_io_close,
|
280
|
+
(void *)io,
|
281
|
+
c_url,
|
282
|
+
c_enc,
|
283
|
+
(int)NUM2INT(options)
|
284
|
+
);
|
265
285
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
266
286
|
|
267
|
-
if(doc == NULL) {
|
287
|
+
if (doc == NULL) {
|
268
288
|
xmlErrorPtr error;
|
269
289
|
|
270
290
|
xmlFreeDoc(doc);
|
271
291
|
|
272
292
|
error = xmlGetLastError();
|
273
|
-
if(error)
|
293
|
+
if (error) {
|
274
294
|
rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
|
275
|
-
else
|
295
|
+
} else {
|
276
296
|
rb_raise(rb_eRuntimeError, "Could not parse document");
|
297
|
+
}
|
277
298
|
|
278
299
|
return Qnil;
|
279
300
|
}
|
280
301
|
|
281
|
-
document =
|
302
|
+
document = noko_xml_document_wrap(klass, doc);
|
282
303
|
rb_iv_set(document, "@errors", error_list);
|
283
304
|
return document;
|
284
305
|
}
|
@@ -289,15 +310,16 @@ static VALUE read_io( VALUE klass,
|
|
289
310
|
*
|
290
311
|
* Create a new document from a String
|
291
312
|
*/
|
292
|
-
static VALUE
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
313
|
+
static VALUE
|
314
|
+
read_memory(VALUE klass,
|
315
|
+
VALUE string,
|
316
|
+
VALUE url,
|
317
|
+
VALUE encoding,
|
318
|
+
VALUE options)
|
297
319
|
{
|
298
|
-
const char *
|
299
|
-
const char *
|
300
|
-
const char *
|
320
|
+
const char *c_buffer = StringValuePtr(string);
|
321
|
+
const char *c_url = NIL_P(url) ? NULL : StringValueCStr(url);
|
322
|
+
const char *c_enc = NIL_P(encoding) ? NULL : StringValueCStr(encoding);
|
301
323
|
int len = (int)RSTRING_LEN(string);
|
302
324
|
VALUE error_list = rb_ary_new();
|
303
325
|
VALUE document;
|
@@ -308,21 +330,22 @@ static VALUE read_memory( VALUE klass,
|
|
308
330
|
doc = xmlReadMemory(c_buffer, len, c_url, c_enc, (int)NUM2INT(options));
|
309
331
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
310
332
|
|
311
|
-
if(doc == NULL) {
|
333
|
+
if (doc == NULL) {
|
312
334
|
xmlErrorPtr error;
|
313
335
|
|
314
336
|
xmlFreeDoc(doc);
|
315
337
|
|
316
338
|
error = xmlGetLastError();
|
317
|
-
if(error)
|
339
|
+
if (error) {
|
318
340
|
rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
|
319
|
-
else
|
341
|
+
} else {
|
320
342
|
rb_raise(rb_eRuntimeError, "Could not parse document");
|
343
|
+
}
|
321
344
|
|
322
345
|
return Qnil;
|
323
346
|
}
|
324
347
|
|
325
|
-
document =
|
348
|
+
document = noko_xml_document_wrap(klass, doc);
|
326
349
|
rb_iv_set(document, "@errors", error_list);
|
327
350
|
return document;
|
328
351
|
}
|
@@ -334,26 +357,26 @@ static VALUE read_memory( VALUE klass,
|
|
334
357
|
* Copy this Document. An optional depth may be passed in, but it defaults
|
335
358
|
* to a deep copy. 0 is a shallow copy, 1 is a deep copy.
|
336
359
|
*/
|
337
|
-
static VALUE
|
360
|
+
static VALUE
|
361
|
+
duplicate_document(int argc, VALUE *argv, VALUE self)
|
338
362
|
{
|
339
363
|
xmlDocPtr doc, dup;
|
340
364
|
VALUE copy;
|
341
365
|
VALUE level;
|
342
|
-
VALUE error_list;
|
343
366
|
|
344
|
-
if(rb_scan_args(argc, argv, "01", &level) == 0)
|
367
|
+
if (rb_scan_args(argc, argv, "01", &level) == 0) {
|
345
368
|
level = INT2NUM((long)1);
|
369
|
+
}
|
346
370
|
|
347
371
|
Data_Get_Struct(self, xmlDoc, doc);
|
348
372
|
|
349
373
|
dup = xmlCopyDoc(doc, (int)NUM2INT(level));
|
350
374
|
|
351
|
-
if(dup == NULL) return Qnil;
|
375
|
+
if (dup == NULL) { return Qnil; }
|
352
376
|
|
353
377
|
dup->type = doc->type;
|
354
|
-
copy =
|
355
|
-
|
356
|
-
rb_iv_set(copy, "@errors", error_list);
|
378
|
+
copy = noko_xml_document_wrap(rb_obj_class(self), dup);
|
379
|
+
rb_iv_set(copy, "@errors", rb_iv_get(self, "@errors"));
|
357
380
|
return copy ;
|
358
381
|
}
|
359
382
|
|
@@ -363,18 +386,18 @@ static VALUE duplicate_document(int argc, VALUE *argv, VALUE self)
|
|
363
386
|
*
|
364
387
|
* Create a new document with +version+ (defaults to "1.0")
|
365
388
|
*/
|
366
|
-
static VALUE
|
389
|
+
static VALUE
|
390
|
+
new (int argc, VALUE *argv, VALUE klass)
|
367
391
|
{
|
368
392
|
xmlDocPtr doc;
|
369
393
|
VALUE version, rest, rb_doc ;
|
370
394
|
|
371
395
|
rb_scan_args(argc, argv, "0*", &rest);
|
372
396
|
version = rb_ary_entry(rest, (long)0);
|
373
|
-
if (NIL_P(version)) version = rb_str_new2("1.0");
|
397
|
+
if (NIL_P(version)) { version = rb_str_new2("1.0"); }
|
374
398
|
|
375
399
|
doc = xmlNewDoc((xmlChar *)StringValueCStr(version));
|
376
|
-
rb_doc =
|
377
|
-
rb_obj_call_init(rb_doc, argc, argv);
|
400
|
+
rb_doc = noko_xml_document_wrap_with_init_args(klass, doc, argc, argv);
|
378
401
|
return rb_doc ;
|
379
402
|
}
|
380
403
|
|
@@ -415,7 +438,8 @@ static VALUE new(int argc, VALUE *argv, VALUE klass)
|
|
415
438
|
* please direct your browser to
|
416
439
|
* http://tenderlovemaking.com/2009/04/23/namespaces-in-xml.html
|
417
440
|
*/
|
418
|
-
|
441
|
+
static VALUE
|
442
|
+
remove_namespaces_bang(VALUE self)
|
419
443
|
{
|
420
444
|
xmlDocPtr doc ;
|
421
445
|
Data_Get_Struct(self, xmlDoc, doc);
|
@@ -435,7 +459,8 @@ VALUE remove_namespaces_bang(VALUE self)
|
|
435
459
|
* +external_id+, +system_id+, and +content+ set the External ID, System ID,
|
436
460
|
* and content respectively. All of these parameters are optional.
|
437
461
|
*/
|
438
|
-
static VALUE
|
462
|
+
static VALUE
|
463
|
+
create_entity(int argc, VALUE *argv, VALUE self)
|
439
464
|
{
|
440
465
|
VALUE name;
|
441
466
|
VALUE type;
|
@@ -448,52 +473,50 @@ static VALUE create_entity(int argc, VALUE *argv, VALUE self)
|
|
448
473
|
Data_Get_Struct(self, xmlDoc, doc);
|
449
474
|
|
450
475
|
rb_scan_args(argc, argv, "14", &name, &type, &external_id, &system_id,
|
451
|
-
|
476
|
+
&content);
|
452
477
|
|
453
478
|
xmlResetLastError();
|
454
479
|
ptr = xmlAddDocEntity(
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
if(NULL == ptr) {
|
480
|
+
doc,
|
481
|
+
(xmlChar *)(NIL_P(name) ? NULL : StringValueCStr(name)),
|
482
|
+
(int)(NIL_P(type) ? XML_INTERNAL_GENERAL_ENTITY : NUM2INT(type)),
|
483
|
+
(xmlChar *)(NIL_P(external_id) ? NULL : StringValueCStr(external_id)),
|
484
|
+
(xmlChar *)(NIL_P(system_id) ? NULL : StringValueCStr(system_id)),
|
485
|
+
(xmlChar *)(NIL_P(content) ? NULL : StringValueCStr(content))
|
486
|
+
);
|
487
|
+
|
488
|
+
if (NULL == ptr) {
|
464
489
|
xmlErrorPtr error = xmlGetLastError();
|
465
|
-
if(error)
|
490
|
+
if (error) {
|
466
491
|
rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
|
467
|
-
else
|
492
|
+
} else {
|
468
493
|
rb_raise(rb_eRuntimeError, "Could not create entity");
|
494
|
+
}
|
469
495
|
|
470
496
|
return Qnil;
|
471
497
|
}
|
472
498
|
|
473
|
-
return
|
499
|
+
return noko_xml_node_wrap(cNokogiriXmlEntityDecl, (xmlNodePtr)ptr);
|
474
500
|
}
|
475
501
|
|
476
|
-
static int
|
502
|
+
static int
|
503
|
+
block_caller(void *ctx, xmlNodePtr c_node, xmlNodePtr c_parent_node)
|
477
504
|
{
|
478
|
-
VALUE block;
|
479
|
-
VALUE
|
480
|
-
VALUE
|
505
|
+
VALUE block = (VALUE)ctx;
|
506
|
+
VALUE rb_node;
|
507
|
+
VALUE rb_parent_node;
|
481
508
|
VALUE ret;
|
482
509
|
|
483
|
-
if(
|
484
|
-
|
510
|
+
if (c_node->type == XML_NAMESPACE_DECL) {
|
511
|
+
rb_node = noko_xml_namespace_wrap((xmlNsPtr)c_node, c_parent_node->doc);
|
512
|
+
} else {
|
513
|
+
rb_node = noko_xml_node_wrap(Qnil, c_node);
|
485
514
|
}
|
486
|
-
|
487
|
-
node = Nokogiri_wrap_xml_node(Qnil, _node);
|
488
|
-
}
|
489
|
-
parent = _parent ? Nokogiri_wrap_xml_node(Qnil, _parent) : Qnil;
|
490
|
-
block = (VALUE)ctx;
|
491
|
-
|
492
|
-
ret = rb_funcall(block, rb_intern("call"), 2, node, parent);
|
515
|
+
rb_parent_node = c_parent_node ? noko_xml_node_wrap(Qnil, c_parent_node) : Qnil;
|
493
516
|
|
494
|
-
|
517
|
+
ret = rb_funcall(block, rb_intern("call"), 2, rb_node, rb_parent_node);
|
495
518
|
|
496
|
-
return 1;
|
519
|
+
return (Qfalse == ret || Qnil == ret) ? 0 : 1;
|
497
520
|
}
|
498
521
|
|
499
522
|
/* call-seq:
|
@@ -506,7 +529,8 @@ static int block_caller(void * ctx, xmlNodePtr _node, xmlNodePtr _parent)
|
|
506
529
|
* The block must return a non-nil, non-false value if the +obj+ passed in
|
507
530
|
* should be included in the canonicalized document.
|
508
531
|
*/
|
509
|
-
static VALUE
|
532
|
+
static VALUE
|
533
|
+
rb_xml_document_canonicalize(int argc, VALUE *argv, VALUE self)
|
510
534
|
{
|
511
535
|
VALUE mode;
|
512
536
|
VALUE incl_ns;
|
@@ -517,7 +541,7 @@ static VALUE nokogiri_xml_document_canonicalize(int argc, VALUE* argv, VALUE sel
|
|
517
541
|
xmlDocPtr doc;
|
518
542
|
xmlOutputBufferPtr buf;
|
519
543
|
xmlC14NIsVisibleCallback cb = NULL;
|
520
|
-
void *
|
544
|
+
void *ctx = NULL;
|
521
545
|
|
522
546
|
VALUE rb_cStringIO;
|
523
547
|
VALUE io;
|
@@ -530,93 +554,126 @@ static VALUE nokogiri_xml_document_canonicalize(int argc, VALUE* argv, VALUE sel
|
|
530
554
|
io = rb_class_new_instance(0, 0, rb_cStringIO);
|
531
555
|
buf = xmlAllocOutputBuffer(NULL);
|
532
556
|
|
533
|
-
buf->writecallback = (xmlOutputWriteCallback)
|
534
|
-
buf->closecallback = (xmlOutputCloseCallback)
|
557
|
+
buf->writecallback = (xmlOutputWriteCallback)noko_io_write;
|
558
|
+
buf->closecallback = (xmlOutputCloseCallback)noko_io_close;
|
535
559
|
buf->context = (void *)io;
|
536
560
|
|
537
|
-
if(rb_block_given_p()) {
|
561
|
+
if (rb_block_given_p()) {
|
538
562
|
cb = block_caller;
|
539
563
|
ctx = (void *)rb_block_proc();
|
540
564
|
}
|
541
565
|
|
542
|
-
if(NIL_P(incl_ns)){
|
566
|
+
if (NIL_P(incl_ns)) {
|
543
567
|
ns = NULL;
|
544
|
-
}
|
545
|
-
else{
|
568
|
+
} else {
|
546
569
|
Check_Type(incl_ns, T_ARRAY);
|
547
570
|
ns_len = RARRAY_LEN(incl_ns);
|
548
|
-
ns = calloc((size_t)ns_len+1, sizeof(xmlChar *));
|
571
|
+
ns = calloc((size_t)ns_len + 1, sizeof(xmlChar *));
|
549
572
|
for (i = 0 ; i < ns_len ; i++) {
|
550
573
|
VALUE entry = rb_ary_entry(incl_ns, i);
|
551
|
-
ns[i] = (xmlChar*)StringValueCStr(entry);
|
574
|
+
ns[i] = (xmlChar *)StringValueCStr(entry);
|
552
575
|
}
|
553
576
|
}
|
554
577
|
|
555
578
|
|
556
579
|
xmlC14NExecute(doc, cb, ctx,
|
557
|
-
|
558
|
-
|
559
|
-
|
560
|
-
|
580
|
+
(int)(NIL_P(mode) ? 0 : NUM2INT(mode)),
|
581
|
+
ns,
|
582
|
+
(int) RTEST(with_comments),
|
583
|
+
buf);
|
561
584
|
|
562
585
|
xmlOutputBufferClose(buf);
|
563
586
|
|
564
587
|
return rb_funcall(io, rb_intern("string"), 0);
|
565
588
|
}
|
566
589
|
|
567
|
-
VALUE
|
568
|
-
|
590
|
+
VALUE
|
591
|
+
noko_xml_document_wrap_with_init_args(VALUE klass, xmlDocPtr c_document, int argc, VALUE *argv)
|
569
592
|
{
|
570
|
-
VALUE
|
571
|
-
|
572
|
-
VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
|
593
|
+
VALUE rb_document;
|
594
|
+
nokogiriTuplePtr tuple;
|
573
595
|
|
574
|
-
|
575
|
-
|
576
|
-
|
577
|
-
|
578
|
-
|
579
|
-
|
580
|
-
|
581
|
-
|
582
|
-
|
583
|
-
|
584
|
-
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
|
593
|
-
|
594
|
-
rb_define_method(klass, "remove_namespaces!", remove_namespaces_bang, 0);
|
596
|
+
if (!klass) {
|
597
|
+
klass = cNokogiriXmlDocument;
|
598
|
+
}
|
599
|
+
|
600
|
+
rb_document = Data_Wrap_Struct(klass, mark, dealloc, c_document);
|
601
|
+
|
602
|
+
tuple = (nokogiriTuplePtr)malloc(sizeof(nokogiriTuple));
|
603
|
+
tuple->doc = rb_document;
|
604
|
+
tuple->unlinkedNodes = st_init_numtable_with_size(128);
|
605
|
+
tuple->node_cache = rb_ary_new();
|
606
|
+
|
607
|
+
c_document->_private = tuple ;
|
608
|
+
|
609
|
+
rb_iv_set(rb_document, "@decorators", Qnil);
|
610
|
+
rb_iv_set(rb_document, "@errors", Qnil);
|
611
|
+
rb_iv_set(rb_document, "@node_cache", tuple->node_cache);
|
612
|
+
|
613
|
+
rb_obj_call_init(rb_document, argc, argv);
|
614
|
+
|
615
|
+
return rb_document ;
|
595
616
|
}
|
596
617
|
|
597
618
|
|
598
|
-
/*
|
599
|
-
VALUE
|
619
|
+
/* deprecated. use noko_xml_document_wrap() instead. */
|
620
|
+
VALUE
|
621
|
+
Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc)
|
600
622
|
{
|
601
|
-
|
623
|
+
/* TODO: deprecate this method in v2.0 */
|
624
|
+
return noko_xml_document_wrap_with_init_args(klass, doc, 0, NULL);
|
625
|
+
}
|
602
626
|
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
);
|
627
|
+
VALUE
|
628
|
+
noko_xml_document_wrap(VALUE klass, xmlDocPtr doc)
|
629
|
+
{
|
630
|
+
return noko_xml_document_wrap_with_init_args(klass, doc, 0, NULL);
|
631
|
+
}
|
609
632
|
|
610
|
-
VALUE cache = rb_ary_new();
|
611
|
-
rb_iv_set(rb_doc, "@decorators", Qnil);
|
612
|
-
rb_iv_set(rb_doc, "@node_cache", cache);
|
613
633
|
|
614
|
-
|
615
|
-
|
616
|
-
|
617
|
-
doc
|
634
|
+
void
|
635
|
+
noko_xml_document_pin_node(xmlNodePtr node)
|
636
|
+
{
|
637
|
+
xmlDocPtr doc;
|
638
|
+
nokogiriTuplePtr tuple;
|
639
|
+
|
640
|
+
doc = node->doc;
|
641
|
+
tuple = (nokogiriTuplePtr)doc->_private;
|
642
|
+
st_insert(tuple->unlinkedNodes, (st_data_t)node, (st_data_t)node);
|
643
|
+
}
|
618
644
|
|
619
|
-
rb_obj_call_init(rb_doc, 0, NULL);
|
620
645
|
|
621
|
-
|
646
|
+
void
|
647
|
+
noko_xml_document_pin_namespace(xmlNsPtr ns, xmlDocPtr doc)
|
648
|
+
{
|
649
|
+
nokogiriTuplePtr tuple;
|
650
|
+
|
651
|
+
tuple = (nokogiriTuplePtr)doc->_private;
|
652
|
+
st_insert(tuple->unlinkedNodes, (st_data_t)ns, (st_data_t)ns);
|
653
|
+
}
|
654
|
+
|
655
|
+
|
656
|
+
void
|
657
|
+
noko_init_xml_document()
|
658
|
+
{
|
659
|
+
assert(cNokogiriXmlNode);
|
660
|
+
/*
|
661
|
+
* Nokogiri::XML::Document wraps an xml document.
|
662
|
+
*/
|
663
|
+
cNokogiriXmlDocument = rb_define_class_under(mNokogiriXml, "Document", cNokogiriXmlNode);
|
664
|
+
|
665
|
+
rb_define_singleton_method(cNokogiriXmlDocument, "read_memory", read_memory, 4);
|
666
|
+
rb_define_singleton_method(cNokogiriXmlDocument, "read_io", read_io, 4);
|
667
|
+
rb_define_singleton_method(cNokogiriXmlDocument, "new", new, -1);
|
668
|
+
|
669
|
+
rb_define_method(cNokogiriXmlDocument, "root", root, 0);
|
670
|
+
rb_define_method(cNokogiriXmlDocument, "root=", set_root, 1);
|
671
|
+
rb_define_method(cNokogiriXmlDocument, "encoding", encoding, 0);
|
672
|
+
rb_define_method(cNokogiriXmlDocument, "encoding=", set_encoding, 1);
|
673
|
+
rb_define_method(cNokogiriXmlDocument, "version", version, 0);
|
674
|
+
rb_define_method(cNokogiriXmlDocument, "canonicalize", rb_xml_document_canonicalize, -1);
|
675
|
+
rb_define_method(cNokogiriXmlDocument, "dup", duplicate_document, -1);
|
676
|
+
rb_define_method(cNokogiriXmlDocument, "url", url, 0);
|
677
|
+
rb_define_method(cNokogiriXmlDocument, "create_entity", create_entity, -1);
|
678
|
+
rb_define_method(cNokogiriXmlDocument, "remove_namespaces!", remove_namespaces_bang, 0);
|
622
679
|
}
|