nokogiri 1.4.1 → 1.4.2
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- data/CHANGELOG.ja.rdoc +45 -0
- data/CHANGELOG.rdoc +53 -1
- data/Manifest.txt +3 -3
- data/README.ja.rdoc +1 -1
- data/README.rdoc +11 -5
- data/Rakefile +13 -79
- data/ext/nokogiri/extconf.rb +22 -74
- data/ext/nokogiri/html_document.c +17 -8
- data/ext/nokogiri/html_element_description.c +20 -16
- data/ext/nokogiri/html_entity_lookup.c +2 -2
- data/ext/nokogiri/html_sax_parser_context.c +10 -8
- data/ext/nokogiri/nokogiri.c +0 -1
- data/ext/nokogiri/nokogiri.h +33 -28
- data/ext/nokogiri/xml_attr.c +7 -5
- data/ext/nokogiri/xml_attribute_decl.c +5 -2
- data/ext/nokogiri/xml_cdata.c +4 -2
- data/ext/nokogiri/xml_comment.c +4 -2
- data/ext/nokogiri/xml_document.c +93 -15
- data/ext/nokogiri/xml_document.h +0 -1
- data/ext/nokogiri/xml_document_fragment.c +4 -2
- data/ext/nokogiri/xml_dtd.c +18 -8
- data/ext/nokogiri/xml_element_content.c +2 -2
- data/ext/nokogiri/xml_entity_decl.c +15 -2
- data/ext/nokogiri/xml_entity_reference.c +4 -2
- data/ext/nokogiri/xml_io.c +1 -1
- data/ext/nokogiri/xml_namespace.c +5 -3
- data/ext/nokogiri/xml_node.c +353 -114
- data/ext/nokogiri/xml_node_set.c +35 -22
- data/ext/nokogiri/xml_node_set.h +1 -1
- data/ext/nokogiri/xml_processing_instruction.c +4 -2
- data/ext/nokogiri/xml_reader.c +119 -47
- data/ext/nokogiri/xml_relax_ng.c +21 -12
- data/ext/nokogiri/xml_sax_parser.c +6 -3
- data/ext/nokogiri/xml_sax_parser.h +13 -17
- data/ext/nokogiri/xml_sax_parser_context.c +8 -6
- data/ext/nokogiri/xml_sax_push_parser.c +7 -6
- data/ext/nokogiri/xml_schema.c +62 -13
- data/ext/nokogiri/xml_syntax_error.c +18 -12
- data/ext/nokogiri/xml_syntax_error.h +1 -1
- data/ext/nokogiri/xml_text.c +4 -2
- data/ext/nokogiri/xml_xpath_context.c +60 -23
- data/ext/nokogiri/xslt_stylesheet.c +14 -3
- data/lib/nokogiri.rb +17 -0
- data/lib/nokogiri/css/generated_parser.rb +72 -62
- data/lib/nokogiri/css/generated_tokenizer.rb +23 -24
- data/lib/nokogiri/css/parser.y +3 -1
- data/lib/nokogiri/css/tokenizer.rex +3 -3
- data/lib/nokogiri/css/xpath_visitor.rb +8 -3
- data/lib/nokogiri/ffi/html/sax/parser_context.rb +3 -3
- data/lib/nokogiri/ffi/libxml.rb +16 -2
- data/lib/nokogiri/ffi/structs/common_node.rb +15 -3
- data/lib/nokogiri/ffi/structs/xml_document.rb +13 -4
- data/lib/nokogiri/ffi/structs/xml_xpath_context.rb +3 -2
- data/lib/nokogiri/ffi/weak_bucket.rb +40 -0
- data/lib/nokogiri/ffi/xml/document.rb +27 -0
- data/lib/nokogiri/ffi/xml/entity_decl.rb +9 -0
- data/lib/nokogiri/ffi/xml/node.rb +142 -61
- data/lib/nokogiri/ffi/xml/node_set.rb +15 -12
- data/lib/nokogiri/ffi/xml/reader.rb +5 -0
- data/lib/nokogiri/ffi/xml/schema.rb +17 -0
- data/lib/nokogiri/ffi/xml/syntax_error.rb +4 -4
- data/lib/nokogiri/ffi/xml/xpath.rb +0 -10
- data/lib/nokogiri/ffi/xml/xpath_context.rb +22 -9
- data/lib/nokogiri/ffi/xslt/stylesheet.rb +3 -0
- data/lib/nokogiri/html/document.rb +5 -3
- data/lib/nokogiri/html/document_fragment.rb +28 -7
- data/lib/nokogiri/version.rb +6 -2
- data/lib/nokogiri/version_warning.rb +6 -3
- data/lib/nokogiri/xml.rb +1 -1
- data/lib/nokogiri/xml/builder.rb +35 -22
- data/lib/nokogiri/xml/document.rb +44 -12
- data/lib/nokogiri/xml/document_fragment.rb +16 -12
- data/lib/nokogiri/xml/entity_decl.rb +4 -0
- data/lib/nokogiri/xml/node.rb +152 -95
- data/lib/nokogiri/xml/node_set.rb +2 -1
- data/lib/nokogiri/xml/sax/push_parser.rb +1 -1
- data/lib/nokogiri/xml/schema.rb +1 -5
- data/lib/nokogiri/xml/syntax_error.rb +4 -0
- data/lib/nokogiri/xml/text.rb +9 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +3 -0
- data/tasks/cross_compile.rb +158 -0
- data/tasks/test.rb +0 -6
- data/test/css/test_xpath_visitor.rb +9 -0
- data/test/helper.rb +49 -11
- data/test/html/sax/test_parser.rb +11 -1
- data/test/html/test_document.rb +8 -0
- data/test/html/test_document_fragment.rb +14 -2
- data/test/html/test_element_description.rb +5 -1
- data/test/html/test_node.rb +5 -66
- data/test/test_reader.rb +28 -0
- data/test/test_xslt_transforms.rb +14 -0
- data/test/xml/test_builder.rb +43 -0
- data/test/xml/test_cdata.rb +12 -0
- data/test/xml/test_document.rb +74 -39
- data/test/xml/test_document_fragment.rb +36 -0
- data/test/xml/test_entity_decl.rb +37 -0
- data/test/xml/test_node.rb +192 -65
- data/test/xml/test_node_reparenting.rb +253 -236
- data/test/xml/test_node_set.rb +67 -0
- data/test/xml/test_text.rb +8 -0
- data/test/xml/test_xpath.rb +32 -0
- metadata +103 -48
- data.tar.gz.sig +0 -0
- data/ext/nokogiri/xml_xpath.c +0 -53
- data/ext/nokogiri/xml_xpath.h +0 -11
- data/lib/nokogiri/xml/fragment_handler.rb +0 -79
- metadata.gz.sig +0 -0
data/ext/nokogiri/xml_node.c
CHANGED
@@ -14,42 +14,61 @@ static void debug_node_dealloc(xmlNodePtr x)
|
|
14
14
|
|
15
15
|
static void mark(xmlNodePtr node)
|
16
16
|
{
|
17
|
-
|
18
|
-
|
17
|
+
/* it's OK if the document isn't fully realized (as in XML::Reader). */
|
18
|
+
/* see http://github.com/tenderlove/nokogiri/issues/closed/#issue/95 */
|
19
19
|
if (DOC_RUBY_OBJECT_TEST(node->doc) && DOC_RUBY_OBJECT(node->doc))
|
20
20
|
rb_gc_mark(DOC_RUBY_OBJECT(node->doc));
|
21
21
|
}
|
22
22
|
|
23
23
|
/* :nodoc: */
|
24
|
-
typedef xmlNodePtr (*
|
24
|
+
typedef xmlNodePtr (*pivot_reparentee_func)(xmlNodePtr, xmlNodePtr);
|
25
25
|
|
26
26
|
/* :nodoc: */
|
27
27
|
static void relink_namespace(xmlNodePtr reparented)
|
28
28
|
{
|
29
|
-
|
29
|
+
xmlNodePtr child;
|
30
|
+
|
31
|
+
/* Avoid segv when relinking against unlinked nodes. */
|
30
32
|
if(!reparented->parent) return;
|
31
33
|
|
32
|
-
|
34
|
+
/* Make sure that our reparented node has the correct namespaces */
|
33
35
|
if(!reparented->ns && reparented->doc != (xmlDocPtr)reparented->parent)
|
34
36
|
xmlSetNs(reparented, reparented->parent->ns);
|
35
37
|
|
36
|
-
|
38
|
+
/* Search our parents for an existing definition */
|
37
39
|
if(reparented->nsDef) {
|
38
|
-
xmlNsPtr
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
40
|
+
xmlNsPtr curr = reparented->nsDef;
|
41
|
+
xmlNsPtr prev = NULL;
|
42
|
+
|
43
|
+
while(curr) {
|
44
|
+
xmlNsPtr ns = xmlSearchNsByHref(
|
45
|
+
reparented->doc,
|
46
|
+
reparented->parent,
|
47
|
+
curr->href
|
48
|
+
);
|
49
|
+
/* If we find the namespace is already declared, remove it from this
|
50
|
+
* definition list. */
|
51
|
+
if(ns && ns != curr) {
|
52
|
+
if (prev) {
|
53
|
+
prev->next = curr->next;
|
54
|
+
} else {
|
55
|
+
reparented->nsDef = curr->next;
|
56
|
+
}
|
57
|
+
NOKOGIRI_ROOT_NSDEF(curr, reparented->doc);
|
58
|
+
} else {
|
59
|
+
prev = curr;
|
60
|
+
}
|
61
|
+
curr = curr->next;
|
62
|
+
}
|
44
63
|
}
|
45
64
|
|
46
|
-
|
47
|
-
|
65
|
+
/* Only walk all children if there actually is a namespace we need to */
|
66
|
+
/* reparent. */
|
48
67
|
if(NULL == reparented->ns) return;
|
49
68
|
|
50
|
-
|
51
|
-
|
52
|
-
|
69
|
+
/* When a node gets reparented, walk it's children to make sure that */
|
70
|
+
/* their namespaces are reparented as well. */
|
71
|
+
child = reparented->children;
|
53
72
|
while(NULL != child) {
|
54
73
|
relink_namespace(child);
|
55
74
|
child = child->next;
|
@@ -57,72 +76,88 @@ static void relink_namespace(xmlNodePtr reparented)
|
|
57
76
|
}
|
58
77
|
|
59
78
|
/* :nodoc: */
|
60
|
-
static xmlNodePtr xmlReplaceNodeWrapper(xmlNodePtr
|
79
|
+
static xmlNodePtr xmlReplaceNodeWrapper(xmlNodePtr pivot, xmlNodePtr new_node)
|
61
80
|
{
|
62
81
|
xmlNodePtr retval ;
|
63
|
-
|
64
|
-
|
65
|
-
|
82
|
+
|
83
|
+
retval = xmlReplaceNode(pivot, new_node) ;
|
84
|
+
|
85
|
+
if (retval == pivot) {
|
86
|
+
retval = new_node ; /* return semantics for reparent_node_with */
|
66
87
|
}
|
88
|
+
|
89
|
+
/* work around libxml2 issue: https://bugzilla.gnome.org/show_bug.cgi?id=615612 */
|
90
|
+
if (retval->type == XML_TEXT_NODE) {
|
91
|
+
if (retval->prev && retval->prev->type == XML_TEXT_NODE) {
|
92
|
+
retval = xmlTextMerge(retval->prev, retval);
|
93
|
+
}
|
94
|
+
if (retval->next && retval->next->type == XML_TEXT_NODE) {
|
95
|
+
retval = xmlTextMerge(retval, retval->next);
|
96
|
+
}
|
97
|
+
}
|
98
|
+
|
67
99
|
return retval ;
|
68
100
|
}
|
69
101
|
|
70
102
|
/* :nodoc: */
|
71
|
-
static VALUE reparent_node_with(VALUE
|
103
|
+
static VALUE reparent_node_with(VALUE pivot_obj, VALUE reparentee_obj, pivot_reparentee_func prf)
|
72
104
|
{
|
73
105
|
VALUE reparented_obj ;
|
74
|
-
xmlNodePtr
|
106
|
+
xmlNodePtr reparentee, pivot, reparented ;
|
75
107
|
|
76
|
-
if(!rb_obj_is_kind_of(
|
108
|
+
if(!rb_obj_is_kind_of(reparentee_obj, cNokogiriXmlNode))
|
109
|
+
rb_raise(rb_eArgError, "node must be a Nokogiri::XML::Node");
|
110
|
+
if(rb_obj_is_kind_of(reparentee_obj, cNokogiriXmlDocument))
|
77
111
|
rb_raise(rb_eArgError, "node must be a Nokogiri::XML::Node");
|
78
112
|
|
79
|
-
Data_Get_Struct(
|
80
|
-
Data_Get_Struct(
|
113
|
+
Data_Get_Struct(reparentee_obj, xmlNode, reparentee);
|
114
|
+
Data_Get_Struct(pivot_obj, xmlNode, pivot);
|
81
115
|
|
82
|
-
if(XML_DOCUMENT_NODE ==
|
116
|
+
if(XML_DOCUMENT_NODE == reparentee->type || XML_HTML_DOCUMENT_NODE == reparentee->type)
|
83
117
|
rb_raise(rb_eArgError, "cannot reparent a document node");
|
84
118
|
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
} else {
|
107
|
-
xmlNodePtr duped_node ;
|
108
|
-
// recursively copy to the new document
|
109
|
-
if (!(duped_node = xmlDocCopyNode(node, other->doc, 1))) {
|
119
|
+
xmlUnlinkNode(reparentee);
|
120
|
+
|
121
|
+
if (reparentee->doc != pivot->doc || reparentee->type == XML_TEXT_NODE) {
|
122
|
+
/*
|
123
|
+
* if the reparentee is a text node, there's a very good chance it will be
|
124
|
+
* merged with an adjacent text node after being reparented, and in that case
|
125
|
+
* libxml will free the underlying C struct.
|
126
|
+
*
|
127
|
+
* since we clearly have a ruby object which references the underlying
|
128
|
+
* memory, we can't let the C struct get freed. let's pickle the original
|
129
|
+
* reparentee by rooting it; and then we'll reparent a duplicate of the
|
130
|
+
* node that we don't care about preserving.
|
131
|
+
*
|
132
|
+
* alternatively, if the reparentee is from a different document than the
|
133
|
+
* pivot node, libxml2 is going to get confused about which document's
|
134
|
+
* "dictionary" the node's strings belong to (this is an otherwise
|
135
|
+
* uninteresting libxml2 implementation detail). as a result, we cannot
|
136
|
+
* reparent the actual reparentee, so we reparent a duplicate.
|
137
|
+
*/
|
138
|
+
NOKOGIRI_ROOT_NODE(reparentee);
|
139
|
+
if (!(reparentee = xmlDocCopyNode(reparentee, pivot->doc, 1))) {
|
110
140
|
rb_raise(rb_eRuntimeError, "Could not reparent node (xmlDocCopyNode)");
|
111
141
|
}
|
112
|
-
if(!(reparented = (*func)(other, duped_node))) {
|
113
|
-
rb_raise(rb_eRuntimeError, "Could not reparent node (2)");
|
114
|
-
}
|
115
|
-
xmlUnlinkNode(node);
|
116
|
-
NOKOGIRI_ROOT_NODE(node);
|
117
142
|
}
|
118
143
|
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
144
|
+
/* TODO: I really want to remove this. We shouldn't support 2.6.16 anymore */
|
145
|
+
if ( reparentee->type == XML_TEXT_NODE && pivot->type == XML_TEXT_NODE && is_2_6_16() ) {
|
146
|
+
/* work around a string-handling bug in libxml 2.6.16. we'd rather leak than segfault. */
|
147
|
+
pivot->content = xmlStrdup(pivot->content);
|
148
|
+
}
|
149
|
+
|
150
|
+
if(!(reparented = (*prf)(pivot, reparentee))) {
|
151
|
+
rb_raise(rb_eRuntimeError, "Could not reparent node");
|
123
152
|
}
|
124
153
|
|
125
|
-
|
154
|
+
/*
|
155
|
+
* make sure the ruby object is pointed at the just-reparented node, which
|
156
|
+
* might be a duplicate (see above) or might be the result of merging
|
157
|
+
* adjacent text nodes.
|
158
|
+
*/
|
159
|
+
DATA_PTR(reparentee_obj) = reparented ;
|
160
|
+
|
126
161
|
relink_namespace(reparented);
|
127
162
|
|
128
163
|
reparented_obj = Nokogiri_wrap_xml_node(Qnil, reparented);
|
@@ -169,13 +204,16 @@ static VALUE pointer_id(VALUE self)
|
|
169
204
|
static VALUE encode_special_chars(VALUE self, VALUE string)
|
170
205
|
{
|
171
206
|
xmlNodePtr node;
|
207
|
+
xmlChar *encoded;
|
208
|
+
VALUE encoded_str;
|
209
|
+
|
172
210
|
Data_Get_Struct(self, xmlNode, node);
|
173
|
-
|
211
|
+
encoded = xmlEncodeSpecialChars(
|
174
212
|
node->doc,
|
175
213
|
(const xmlChar *)StringValuePtr(string)
|
176
214
|
);
|
177
215
|
|
178
|
-
|
216
|
+
encoded_str = NOKOGIRI_STR_NEW2(encoded);
|
179
217
|
xmlFree(encoded);
|
180
218
|
|
181
219
|
return encoded_str;
|
@@ -191,6 +229,8 @@ static VALUE create_internal_subset(VALUE self, VALUE name, VALUE external_id, V
|
|
191
229
|
{
|
192
230
|
xmlNodePtr node;
|
193
231
|
xmlDocPtr doc;
|
232
|
+
xmlDtdPtr dtd;
|
233
|
+
|
194
234
|
Data_Get_Struct(self, xmlNode, node);
|
195
235
|
|
196
236
|
doc = node->doc;
|
@@ -198,8 +238,8 @@ static VALUE create_internal_subset(VALUE self, VALUE name, VALUE external_id, V
|
|
198
238
|
if(xmlGetIntSubset(doc))
|
199
239
|
rb_raise(rb_eRuntimeError, "Document already has an internal subset");
|
200
240
|
|
201
|
-
|
202
|
-
doc,
|
241
|
+
dtd = xmlCreateIntSubset(
|
242
|
+
doc,
|
203
243
|
NIL_P(name) ? NULL : (const xmlChar *)StringValuePtr(name),
|
204
244
|
NIL_P(external_id) ? NULL : (const xmlChar *)StringValuePtr(external_id),
|
205
245
|
NIL_P(system_id) ? NULL : (const xmlChar *)StringValuePtr(system_id)
|
@@ -220,6 +260,8 @@ static VALUE create_external_subset(VALUE self, VALUE name, VALUE external_id, V
|
|
220
260
|
{
|
221
261
|
xmlNodePtr node;
|
222
262
|
xmlDocPtr doc;
|
263
|
+
xmlDtdPtr dtd;
|
264
|
+
|
223
265
|
Data_Get_Struct(self, xmlNode, node);
|
224
266
|
|
225
267
|
doc = node->doc;
|
@@ -227,8 +269,8 @@ static VALUE create_external_subset(VALUE self, VALUE name, VALUE external_id, V
|
|
227
269
|
if(doc->extSubset)
|
228
270
|
rb_raise(rb_eRuntimeError, "Document already has an external subset");
|
229
271
|
|
230
|
-
|
231
|
-
doc,
|
272
|
+
dtd = xmlNewDtd(
|
273
|
+
doc,
|
232
274
|
NIL_P(name) ? NULL : (const xmlChar *)StringValuePtr(name),
|
233
275
|
NIL_P(external_id) ? NULL : (const xmlChar *)StringValuePtr(external_id),
|
234
276
|
NIL_P(system_id) ? NULL : (const xmlChar *)StringValuePtr(system_id)
|
@@ -249,12 +291,14 @@ static VALUE external_subset(VALUE self)
|
|
249
291
|
{
|
250
292
|
xmlNodePtr node;
|
251
293
|
xmlDocPtr doc;
|
294
|
+
xmlDtdPtr dtd;
|
295
|
+
|
252
296
|
Data_Get_Struct(self, xmlNode, node);
|
253
297
|
|
254
298
|
if(!node->doc) return Qnil;
|
255
299
|
|
256
300
|
doc = node->doc;
|
257
|
-
|
301
|
+
dtd = doc->extSubset;
|
258
302
|
|
259
303
|
if(!dtd) return Qnil;
|
260
304
|
|
@@ -271,12 +315,14 @@ static VALUE internal_subset(VALUE self)
|
|
271
315
|
{
|
272
316
|
xmlNodePtr node;
|
273
317
|
xmlDocPtr doc;
|
318
|
+
xmlDtdPtr dtd;
|
319
|
+
|
274
320
|
Data_Get_Struct(self, xmlNode, node);
|
275
321
|
|
276
322
|
if(!node->doc) return Qnil;
|
277
323
|
|
278
324
|
doc = node->doc;
|
279
|
-
|
325
|
+
dtd = xmlGetIntSubset(doc);
|
280
326
|
|
281
327
|
if(!dtd) return Qnil;
|
282
328
|
|
@@ -293,11 +339,11 @@ static VALUE internal_subset(VALUE self)
|
|
293
339
|
static VALUE duplicate_node(int argc, VALUE *argv, VALUE self)
|
294
340
|
{
|
295
341
|
VALUE level;
|
342
|
+
xmlNodePtr node, dup;
|
296
343
|
|
297
344
|
if(rb_scan_args(argc, argv, "01", &level) == 0)
|
298
345
|
level = INT2NUM((long)1);
|
299
346
|
|
300
|
-
xmlNodePtr node, dup;
|
301
347
|
Data_Get_Struct(self, xmlNode, node);
|
302
348
|
|
303
349
|
dup = xmlDocCopyNode(node, node->doc, (int)NUM2INT(level));
|
@@ -331,9 +377,7 @@ static VALUE blank_eh(VALUE self)
|
|
331
377
|
{
|
332
378
|
xmlNodePtr node;
|
333
379
|
Data_Get_Struct(self, xmlNode, node);
|
334
|
-
|
335
|
-
return Qtrue;
|
336
|
-
return Qfalse;
|
380
|
+
return (1 == xmlIsBlankNode(node)) ? Qtrue : Qfalse ;
|
337
381
|
}
|
338
382
|
|
339
383
|
/*
|
@@ -381,13 +425,10 @@ static VALUE next_element(VALUE self)
|
|
381
425
|
xmlNodePtr node, sibling;
|
382
426
|
Data_Get_Struct(self, xmlNode, node);
|
383
427
|
|
384
|
-
sibling = node
|
428
|
+
sibling = xmlNextElementSibling(node);
|
385
429
|
if(!sibling) return Qnil;
|
386
430
|
|
387
|
-
|
388
|
-
sibling = sibling->next;
|
389
|
-
|
390
|
-
return sibling ? Nokogiri_wrap_xml_node(Qnil, sibling) : Qnil ;
|
431
|
+
return Nokogiri_wrap_xml_node(Qnil, sibling);
|
391
432
|
}
|
392
433
|
|
393
434
|
/*
|
@@ -401,6 +442,9 @@ static VALUE previous_element(VALUE self)
|
|
401
442
|
xmlNodePtr node, sibling;
|
402
443
|
Data_Get_Struct(self, xmlNode, node);
|
403
444
|
|
445
|
+
/*
|
446
|
+
* note that we don't use xmlPreviousElementSibling here because it's buggy pre-2.7.7.
|
447
|
+
*/
|
404
448
|
sibling = node->prev;
|
405
449
|
if(!sibling) return Qnil;
|
406
450
|
|
@@ -411,10 +455,9 @@ static VALUE previous_element(VALUE self)
|
|
411
455
|
}
|
412
456
|
|
413
457
|
/* :nodoc: */
|
414
|
-
static VALUE replace(VALUE self, VALUE
|
458
|
+
static VALUE replace(VALUE self, VALUE new_node)
|
415
459
|
{
|
416
|
-
reparent_node_with(
|
417
|
-
return self ;
|
460
|
+
return reparent_node_with(self, new_node, xmlReplaceNodeWrapper) ;
|
418
461
|
}
|
419
462
|
|
420
463
|
/*
|
@@ -426,21 +469,66 @@ static VALUE replace(VALUE self, VALUE _new_node)
|
|
426
469
|
static VALUE children(VALUE self)
|
427
470
|
{
|
428
471
|
xmlNodePtr node;
|
472
|
+
xmlNodePtr child;
|
473
|
+
xmlNodeSetPtr set;
|
474
|
+
VALUE document;
|
475
|
+
VALUE node_set;
|
476
|
+
|
429
477
|
Data_Get_Struct(self, xmlNode, node);
|
430
478
|
|
431
|
-
|
432
|
-
|
479
|
+
child = node->children;
|
480
|
+
set = xmlXPathNodeSetCreate(child);
|
481
|
+
|
482
|
+
document = DOC_RUBY_OBJECT(node->doc);
|
433
483
|
|
434
|
-
if(!child) return Nokogiri_wrap_xml_node_set(set);
|
484
|
+
if(!child) return Nokogiri_wrap_xml_node_set(set, document);
|
435
485
|
|
436
486
|
child = child->next;
|
437
487
|
while(NULL != child) {
|
438
|
-
|
488
|
+
xmlXPathNodeSetAddUnique(set, child);
|
439
489
|
child = child->next;
|
440
490
|
}
|
441
491
|
|
442
|
-
|
443
|
-
|
492
|
+
node_set = Nokogiri_wrap_xml_node_set(set, document);
|
493
|
+
|
494
|
+
return node_set;
|
495
|
+
}
|
496
|
+
|
497
|
+
/*
|
498
|
+
* call-seq:
|
499
|
+
* element_children
|
500
|
+
*
|
501
|
+
* Get the list of children for this node as a NodeSet. All nodes will be
|
502
|
+
* element nodes.
|
503
|
+
*
|
504
|
+
* Example:
|
505
|
+
*
|
506
|
+
* @doc.root.element_children.all? { |x| x.element? } # => true
|
507
|
+
*/
|
508
|
+
static VALUE element_children(VALUE self)
|
509
|
+
{
|
510
|
+
xmlNodePtr node;
|
511
|
+
xmlNodePtr child;
|
512
|
+
xmlNodeSetPtr set;
|
513
|
+
VALUE document;
|
514
|
+
VALUE node_set;
|
515
|
+
|
516
|
+
Data_Get_Struct(self, xmlNode, node);
|
517
|
+
|
518
|
+
child = xmlFirstElementChild(node);
|
519
|
+
set = xmlXPathNodeSetCreate(child);
|
520
|
+
|
521
|
+
document = DOC_RUBY_OBJECT(node->doc);
|
522
|
+
|
523
|
+
if(!child) return Nokogiri_wrap_xml_node_set(set, document);
|
524
|
+
|
525
|
+
child = xmlNextElementSibling(child);
|
526
|
+
while(NULL != child) {
|
527
|
+
xmlXPathNodeSetAddUnique(set, child);
|
528
|
+
child = xmlNextElementSibling(child);
|
529
|
+
}
|
530
|
+
|
531
|
+
node_set = Nokogiri_wrap_xml_node_set(set, document);
|
444
532
|
|
445
533
|
return node_set;
|
446
534
|
}
|
@@ -462,6 +550,48 @@ static VALUE child(VALUE self)
|
|
462
550
|
return Nokogiri_wrap_xml_node(Qnil, child);
|
463
551
|
}
|
464
552
|
|
553
|
+
/*
|
554
|
+
* call-seq:
|
555
|
+
* first_element_child
|
556
|
+
*
|
557
|
+
* Returns the first child node of this node that is an element.
|
558
|
+
*
|
559
|
+
* Example:
|
560
|
+
*
|
561
|
+
* @doc.root.first_element_child.element? # => true
|
562
|
+
*/
|
563
|
+
static VALUE first_element_child(VALUE self)
|
564
|
+
{
|
565
|
+
xmlNodePtr node, child;
|
566
|
+
Data_Get_Struct(self, xmlNode, node);
|
567
|
+
|
568
|
+
child = xmlFirstElementChild(node);
|
569
|
+
if(!child) return Qnil;
|
570
|
+
|
571
|
+
return Nokogiri_wrap_xml_node(Qnil, child);
|
572
|
+
}
|
573
|
+
|
574
|
+
/*
|
575
|
+
* call-seq:
|
576
|
+
* last_element_child
|
577
|
+
*
|
578
|
+
* Returns the last child node of this node that is an element.
|
579
|
+
*
|
580
|
+
* Example:
|
581
|
+
*
|
582
|
+
* @doc.root.last_element_child.element? # => true
|
583
|
+
*/
|
584
|
+
static VALUE last_element_child(VALUE self)
|
585
|
+
{
|
586
|
+
xmlNodePtr node, child;
|
587
|
+
Data_Get_Struct(self, xmlNode, node);
|
588
|
+
|
589
|
+
child = xmlLastElementChild(node);
|
590
|
+
if(!child) return Qnil;
|
591
|
+
|
592
|
+
return Nokogiri_wrap_xml_node(Qnil, child);
|
593
|
+
}
|
594
|
+
|
465
595
|
/*
|
466
596
|
* call-seq:
|
467
597
|
* key?(attribute)
|
@@ -544,10 +674,12 @@ static VALUE get(VALUE self, VALUE attribute)
|
|
544
674
|
static VALUE set_namespace(VALUE self, VALUE namespace)
|
545
675
|
{
|
546
676
|
xmlNodePtr node;
|
547
|
-
xmlNsPtr ns;
|
677
|
+
xmlNsPtr ns = NULL;
|
548
678
|
|
549
679
|
Data_Get_Struct(self, xmlNode, node);
|
550
|
-
|
680
|
+
|
681
|
+
if(!NIL_P(namespace))
|
682
|
+
Data_Get_Struct(namespace, xmlNs, ns);
|
551
683
|
|
552
684
|
xmlSetNs(node, ns);
|
553
685
|
|
@@ -599,10 +731,11 @@ static VALUE attribute_nodes(VALUE self)
|
|
599
731
|
{
|
600
732
|
/* this code in the mode of xmlHasProp() */
|
601
733
|
xmlNodePtr node;
|
734
|
+
VALUE attr;
|
602
735
|
|
603
736
|
Data_Get_Struct(self, xmlNode, node);
|
604
737
|
|
605
|
-
|
738
|
+
attr = rb_ary_new();
|
606
739
|
Nokogiri_xml_node_properties(node, attr);
|
607
740
|
|
608
741
|
return attr ;
|
@@ -636,12 +769,14 @@ static VALUE namespace_definitions(VALUE self)
|
|
636
769
|
{
|
637
770
|
/* this code in the mode of xmlHasProp() */
|
638
771
|
xmlNodePtr node ;
|
772
|
+
VALUE list;
|
773
|
+
xmlNsPtr ns;
|
639
774
|
|
640
775
|
Data_Get_Struct(self, xmlNode, node);
|
641
776
|
|
642
|
-
|
777
|
+
list = rb_ary_new();
|
643
778
|
|
644
|
-
|
779
|
+
ns = node->nsDef;
|
645
780
|
|
646
781
|
if(!ns) return list;
|
647
782
|
|
@@ -653,6 +788,35 @@ static VALUE namespace_definitions(VALUE self)
|
|
653
788
|
return list;
|
654
789
|
}
|
655
790
|
|
791
|
+
/*
|
792
|
+
* call-seq:
|
793
|
+
* namespace_scopes()
|
794
|
+
*
|
795
|
+
* returns a list of Namespace nodes in scope for _self_. this is all
|
796
|
+
* namespaces defined in the node, or in any ancestor node.
|
797
|
+
*/
|
798
|
+
static VALUE namespace_scopes(VALUE self)
|
799
|
+
{
|
800
|
+
xmlNodePtr node ;
|
801
|
+
VALUE list;
|
802
|
+
xmlNsPtr *ns_list;
|
803
|
+
int j;
|
804
|
+
|
805
|
+
Data_Get_Struct(self, xmlNode, node);
|
806
|
+
|
807
|
+
list = rb_ary_new();
|
808
|
+
ns_list = xmlGetNsList(node->doc, node);
|
809
|
+
|
810
|
+
if(!ns_list) return list;
|
811
|
+
|
812
|
+
for (j = 0 ; ns_list[j] != NULL ; ++j) {
|
813
|
+
rb_ary_push(list, Nokogiri_wrap_xml_namespace(node->doc, ns_list[j]));
|
814
|
+
}
|
815
|
+
|
816
|
+
xmlFree(ns_list);
|
817
|
+
return list;
|
818
|
+
}
|
819
|
+
|
656
820
|
/*
|
657
821
|
* call-seq:
|
658
822
|
* node_type
|
@@ -674,8 +838,17 @@ static VALUE node_type(VALUE self)
|
|
674
838
|
*/
|
675
839
|
static VALUE set_content(VALUE self, VALUE content)
|
676
840
|
{
|
677
|
-
xmlNodePtr node;
|
841
|
+
xmlNodePtr node, child, next ;
|
678
842
|
Data_Get_Struct(self, xmlNode, node);
|
843
|
+
|
844
|
+
child = node->children;
|
845
|
+
while (NULL != child) {
|
846
|
+
next = child->next ;
|
847
|
+
xmlUnlinkNode(child) ;
|
848
|
+
NOKOGIRI_ROOT_NODE(child) ;
|
849
|
+
child = next ;
|
850
|
+
}
|
851
|
+
|
679
852
|
xmlNodeSetContent(node, (xmlChar *)StringValuePtr(content));
|
680
853
|
return content;
|
681
854
|
}
|
@@ -689,9 +862,11 @@ static VALUE set_content(VALUE self, VALUE content)
|
|
689
862
|
static VALUE get_content(VALUE self)
|
690
863
|
{
|
691
864
|
xmlNodePtr node;
|
865
|
+
xmlChar * content;
|
866
|
+
|
692
867
|
Data_Get_Struct(self, xmlNode, node);
|
693
868
|
|
694
|
-
|
869
|
+
content = xmlNodeGetContent(node);
|
695
870
|
if(content) {
|
696
871
|
VALUE rval = NOKOGIRI_STR_NEW2(content);
|
697
872
|
xmlFree(content);
|
@@ -701,9 +876,9 @@ static VALUE get_content(VALUE self)
|
|
701
876
|
}
|
702
877
|
|
703
878
|
/* :nodoc: */
|
704
|
-
static VALUE add_child(VALUE self, VALUE
|
879
|
+
static VALUE add_child(VALUE self, VALUE new_child)
|
705
880
|
{
|
706
|
-
return reparent_node_with(
|
881
|
+
return reparent_node_with(self, new_child, xmlAddChild);
|
707
882
|
}
|
708
883
|
|
709
884
|
/*
|
@@ -762,24 +937,26 @@ static VALUE path(VALUE self)
|
|
762
937
|
{
|
763
938
|
xmlNodePtr node;
|
764
939
|
xmlChar *path ;
|
940
|
+
VALUE rval;
|
941
|
+
|
765
942
|
Data_Get_Struct(self, xmlNode, node);
|
766
|
-
|
943
|
+
|
767
944
|
path = xmlGetNodePath(node);
|
768
|
-
|
945
|
+
rval = NOKOGIRI_STR_NEW2(path);
|
769
946
|
xmlFree(path);
|
770
947
|
return rval ;
|
771
948
|
}
|
772
949
|
|
773
950
|
/* :nodoc: */
|
774
|
-
static VALUE add_next_sibling(VALUE self, VALUE
|
951
|
+
static VALUE add_next_sibling(VALUE self, VALUE new_sibling)
|
775
952
|
{
|
776
|
-
return reparent_node_with(
|
953
|
+
return reparent_node_with(self, new_sibling, xmlAddNextSibling) ;
|
777
954
|
}
|
778
955
|
|
779
956
|
/* :nodoc: */
|
780
|
-
static VALUE add_previous_sibling(VALUE self, VALUE
|
957
|
+
static VALUE add_previous_sibling(VALUE self, VALUE new_sibling)
|
781
958
|
{
|
782
|
-
return reparent_node_with(
|
959
|
+
return reparent_node_with(self, new_sibling, xmlAddPrevSibling) ;
|
783
960
|
}
|
784
961
|
|
785
962
|
/*
|
@@ -796,16 +973,18 @@ static VALUE native_write_to(
|
|
796
973
|
VALUE options
|
797
974
|
) {
|
798
975
|
xmlNodePtr node;
|
976
|
+
const char * before_indent;
|
977
|
+
xmlSaveCtxtPtr savectx;
|
799
978
|
|
800
979
|
Data_Get_Struct(self, xmlNode, node);
|
801
980
|
|
802
981
|
xmlIndentTreeOutput = 1;
|
803
982
|
|
804
|
-
|
983
|
+
before_indent = xmlTreeIndentString;
|
805
984
|
|
806
985
|
xmlTreeIndentString = StringValuePtr(indent_string);
|
807
986
|
|
808
|
-
|
987
|
+
savectx = xmlSaveToIO(
|
809
988
|
(xmlOutputWriteCallback)io_write_callback,
|
810
989
|
(xmlOutputCloseCallback)io_close_callback,
|
811
990
|
(void *)io,
|
@@ -843,10 +1022,11 @@ static VALUE line(VALUE self)
|
|
843
1022
|
static VALUE add_namespace_definition(VALUE self, VALUE prefix, VALUE href)
|
844
1023
|
{
|
845
1024
|
xmlNodePtr node;
|
846
|
-
|
1025
|
+
xmlNsPtr ns;
|
847
1026
|
|
1027
|
+
Data_Get_Struct(self, xmlNode, node);
|
848
1028
|
|
849
|
-
|
1029
|
+
ns = xmlNewNs(
|
850
1030
|
node,
|
851
1031
|
(const xmlChar *)StringValuePtr(href),
|
852
1032
|
(const xmlChar *)(NIL_P(prefix) ? NULL : StringValuePtr(prefix))
|
@@ -874,19 +1054,21 @@ static VALUE add_namespace_definition(VALUE self, VALUE prefix, VALUE href)
|
|
874
1054
|
static VALUE new(int argc, VALUE *argv, VALUE klass)
|
875
1055
|
{
|
876
1056
|
xmlDocPtr doc;
|
1057
|
+
xmlNodePtr node;
|
877
1058
|
VALUE name;
|
878
1059
|
VALUE document;
|
879
1060
|
VALUE rest;
|
1061
|
+
VALUE rb_node;
|
880
1062
|
|
881
1063
|
rb_scan_args(argc, argv, "2*", &name, &document, &rest);
|
882
1064
|
|
883
1065
|
Data_Get_Struct(document, xmlDoc, doc);
|
884
1066
|
|
885
|
-
|
1067
|
+
node = xmlNewNode(NULL, (xmlChar *)StringValuePtr(name));
|
886
1068
|
node->doc = doc->doc;
|
887
1069
|
NOKOGIRI_ROOT_NODE(node);
|
888
1070
|
|
889
|
-
|
1071
|
+
rb_node = Nokogiri_wrap_xml_node(
|
890
1072
|
klass == cNokogiriXmlNode ? (VALUE)NULL : klass,
|
891
1073
|
node
|
892
1074
|
);
|
@@ -907,11 +1089,13 @@ static VALUE dump_html(VALUE self)
|
|
907
1089
|
{
|
908
1090
|
xmlBufferPtr buf ;
|
909
1091
|
xmlNodePtr node ;
|
1092
|
+
VALUE html;
|
1093
|
+
|
910
1094
|
Data_Get_Struct(self, xmlNode, node);
|
911
1095
|
|
912
1096
|
buf = xmlBufferCreate() ;
|
913
1097
|
htmlNodeDump(buf, node->doc, node);
|
914
|
-
|
1098
|
+
html = NOKOGIRI_STR_NEW2(buf->content);
|
915
1099
|
xmlBufferFree(buf);
|
916
1100
|
return html ;
|
917
1101
|
}
|
@@ -931,14 +1115,64 @@ static VALUE compare(VALUE self, VALUE _other)
|
|
931
1115
|
return INT2NUM((long)xmlXPathCmpNodes(other, node));
|
932
1116
|
}
|
933
1117
|
|
934
|
-
|
1118
|
+
|
1119
|
+
/* TODO: DOCUMENT ME */
|
1120
|
+
static VALUE in_context(VALUE self, VALUE _str, VALUE _options)
|
935
1121
|
{
|
936
|
-
|
1122
|
+
xmlNodePtr node;
|
1123
|
+
xmlNodePtr list;
|
1124
|
+
xmlNodeSetPtr set;
|
1125
|
+
VALUE doc, err;
|
1126
|
+
|
1127
|
+
Data_Get_Struct(self, xmlNode, node);
|
1128
|
+
|
1129
|
+
if(!node->parent)
|
1130
|
+
rb_raise(rb_eRuntimeError, "no contextual parsing on unlinked nodes");
|
1131
|
+
|
1132
|
+
doc = DOC_RUBY_OBJECT(node->doc);
|
1133
|
+
err = rb_iv_get(doc, "@errors");
|
937
1134
|
|
1135
|
+
xmlSetStructuredErrorFunc((void *)err, Nokogiri_error_array_pusher);
|
1136
|
+
|
1137
|
+
/* Twiddle global variable because of a bug in libxml2.
|
1138
|
+
* http://git.gnome.org/browse/libxml2/commit/?id=e20fb5a72c83cbfc8e4a8aa3943c6be8febadab7
|
1139
|
+
*/
|
1140
|
+
#ifndef HTML_PARSE_NOIMPLIED
|
1141
|
+
htmlHandleOmittedElem(0);
|
1142
|
+
#endif
|
1143
|
+
|
1144
|
+
xmlParseInNodeContext(
|
1145
|
+
node,
|
1146
|
+
StringValuePtr(_str),
|
1147
|
+
(int)RSTRING_LEN(_str),
|
1148
|
+
(int)NUM2INT(_options),
|
1149
|
+
&list);
|
1150
|
+
|
1151
|
+
#ifndef HTML_PARSE_NOIMPLIED
|
1152
|
+
htmlHandleOmittedElem(1);
|
1153
|
+
#endif
|
1154
|
+
|
1155
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
1156
|
+
|
1157
|
+
set = xmlXPathNodeSetCreate(NULL);
|
1158
|
+
|
1159
|
+
while(list) {
|
1160
|
+
xmlXPathNodeSetAddUnique(set, list);
|
1161
|
+
list = list->next;
|
1162
|
+
}
|
1163
|
+
|
1164
|
+
return Nokogiri_wrap_xml_node_set(set, doc);
|
1165
|
+
}
|
1166
|
+
|
1167
|
+
|
1168
|
+
VALUE Nokogiri_wrap_xml_node(VALUE klass, xmlNodePtr node)
|
1169
|
+
{
|
938
1170
|
VALUE document = Qnil ;
|
939
1171
|
VALUE node_cache = Qnil ;
|
940
1172
|
VALUE rb_node = Qnil ;
|
941
1173
|
|
1174
|
+
assert(node);
|
1175
|
+
|
942
1176
|
if(node->type == XML_DOCUMENT_NODE || node->type == XML_HTML_DOCUMENT_NODE)
|
943
1177
|
return DOC_RUBY_OBJECT(node->doc);
|
944
1178
|
|
@@ -994,8 +1228,8 @@ VALUE Nokogiri_wrap_xml_node(VALUE klass, xmlNodePtr node)
|
|
994
1228
|
node->_private = (void *)rb_node;
|
995
1229
|
|
996
1230
|
if (DOC_RUBY_OBJECT_TEST(node->doc) && DOC_RUBY_OBJECT(node->doc)) {
|
997
|
-
|
998
|
-
|
1231
|
+
/* it's OK if the document isn't fully realized (as in XML::Reader). */
|
1232
|
+
/* see http://github.com/tenderlove/nokogiri/issues/closed/#issue/95 */
|
999
1233
|
document = DOC_RUBY_OBJECT(node->doc);
|
1000
1234
|
node_cache = DOC_NODE_CACHE(node->doc);
|
1001
1235
|
rb_ary_push(node_cache, rb_node);
|
@@ -1037,7 +1271,10 @@ void init_xml_node()
|
|
1037
1271
|
rb_define_method(klass, "node_name=", set_name, 1);
|
1038
1272
|
rb_define_method(klass, "parent", get_parent, 0);
|
1039
1273
|
rb_define_method(klass, "child", child, 0);
|
1274
|
+
rb_define_method(klass, "first_element_child", first_element_child, 0);
|
1275
|
+
rb_define_method(klass, "last_element_child", last_element_child, 0);
|
1040
1276
|
rb_define_method(klass, "children", children, 0);
|
1277
|
+
rb_define_method(klass, "element_children", element_children, 0);
|
1041
1278
|
rb_define_method(klass, "next_sibling", next_sibling, 0);
|
1042
1279
|
rb_define_method(klass, "previous_sibling", previous_sibling, 0);
|
1043
1280
|
rb_define_method(klass, "next_element", next_element, 0);
|
@@ -1054,6 +1291,7 @@ void init_xml_node()
|
|
1054
1291
|
rb_define_method(klass, "attribute_with_ns", attribute_with_ns, 2);
|
1055
1292
|
rb_define_method(klass, "namespace", namespace, 0);
|
1056
1293
|
rb_define_method(klass, "namespace_definitions", namespace_definitions, 0);
|
1294
|
+
rb_define_method(klass, "namespace_scopes", namespace_scopes, 0);
|
1057
1295
|
rb_define_method(klass, "encode_special_chars", encode_special_chars, 1);
|
1058
1296
|
rb_define_method(klass, "dup", duplicate_node, -1);
|
1059
1297
|
rb_define_method(klass, "unlink", unlink_node, 0);
|
@@ -1064,6 +1302,7 @@ void init_xml_node()
|
|
1064
1302
|
rb_define_method(klass, "pointer_id", pointer_id, 0);
|
1065
1303
|
rb_define_method(klass, "line", line, 0);
|
1066
1304
|
|
1305
|
+
rb_define_private_method(klass, "in_context", in_context, 2);
|
1067
1306
|
rb_define_private_method(klass, "add_child_node", add_child, 1);
|
1068
1307
|
rb_define_private_method(klass, "add_previous_sibling_node", add_previous_sibling, 1);
|
1069
1308
|
rb_define_private_method(klass, "add_next_sibling_node", add_next_sibling, 1);
|