nokogiri 1.4.1 → 1.4.2

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (107) hide show
  1. data/CHANGELOG.ja.rdoc +45 -0
  2. data/CHANGELOG.rdoc +53 -1
  3. data/Manifest.txt +3 -3
  4. data/README.ja.rdoc +1 -1
  5. data/README.rdoc +11 -5
  6. data/Rakefile +13 -79
  7. data/ext/nokogiri/extconf.rb +22 -74
  8. data/ext/nokogiri/html_document.c +17 -8
  9. data/ext/nokogiri/html_element_description.c +20 -16
  10. data/ext/nokogiri/html_entity_lookup.c +2 -2
  11. data/ext/nokogiri/html_sax_parser_context.c +10 -8
  12. data/ext/nokogiri/nokogiri.c +0 -1
  13. data/ext/nokogiri/nokogiri.h +33 -28
  14. data/ext/nokogiri/xml_attr.c +7 -5
  15. data/ext/nokogiri/xml_attribute_decl.c +5 -2
  16. data/ext/nokogiri/xml_cdata.c +4 -2
  17. data/ext/nokogiri/xml_comment.c +4 -2
  18. data/ext/nokogiri/xml_document.c +93 -15
  19. data/ext/nokogiri/xml_document.h +0 -1
  20. data/ext/nokogiri/xml_document_fragment.c +4 -2
  21. data/ext/nokogiri/xml_dtd.c +18 -8
  22. data/ext/nokogiri/xml_element_content.c +2 -2
  23. data/ext/nokogiri/xml_entity_decl.c +15 -2
  24. data/ext/nokogiri/xml_entity_reference.c +4 -2
  25. data/ext/nokogiri/xml_io.c +1 -1
  26. data/ext/nokogiri/xml_namespace.c +5 -3
  27. data/ext/nokogiri/xml_node.c +353 -114
  28. data/ext/nokogiri/xml_node_set.c +35 -22
  29. data/ext/nokogiri/xml_node_set.h +1 -1
  30. data/ext/nokogiri/xml_processing_instruction.c +4 -2
  31. data/ext/nokogiri/xml_reader.c +119 -47
  32. data/ext/nokogiri/xml_relax_ng.c +21 -12
  33. data/ext/nokogiri/xml_sax_parser.c +6 -3
  34. data/ext/nokogiri/xml_sax_parser.h +13 -17
  35. data/ext/nokogiri/xml_sax_parser_context.c +8 -6
  36. data/ext/nokogiri/xml_sax_push_parser.c +7 -6
  37. data/ext/nokogiri/xml_schema.c +62 -13
  38. data/ext/nokogiri/xml_syntax_error.c +18 -12
  39. data/ext/nokogiri/xml_syntax_error.h +1 -1
  40. data/ext/nokogiri/xml_text.c +4 -2
  41. data/ext/nokogiri/xml_xpath_context.c +60 -23
  42. data/ext/nokogiri/xslt_stylesheet.c +14 -3
  43. data/lib/nokogiri.rb +17 -0
  44. data/lib/nokogiri/css/generated_parser.rb +72 -62
  45. data/lib/nokogiri/css/generated_tokenizer.rb +23 -24
  46. data/lib/nokogiri/css/parser.y +3 -1
  47. data/lib/nokogiri/css/tokenizer.rex +3 -3
  48. data/lib/nokogiri/css/xpath_visitor.rb +8 -3
  49. data/lib/nokogiri/ffi/html/sax/parser_context.rb +3 -3
  50. data/lib/nokogiri/ffi/libxml.rb +16 -2
  51. data/lib/nokogiri/ffi/structs/common_node.rb +15 -3
  52. data/lib/nokogiri/ffi/structs/xml_document.rb +13 -4
  53. data/lib/nokogiri/ffi/structs/xml_xpath_context.rb +3 -2
  54. data/lib/nokogiri/ffi/weak_bucket.rb +40 -0
  55. data/lib/nokogiri/ffi/xml/document.rb +27 -0
  56. data/lib/nokogiri/ffi/xml/entity_decl.rb +9 -0
  57. data/lib/nokogiri/ffi/xml/node.rb +142 -61
  58. data/lib/nokogiri/ffi/xml/node_set.rb +15 -12
  59. data/lib/nokogiri/ffi/xml/reader.rb +5 -0
  60. data/lib/nokogiri/ffi/xml/schema.rb +17 -0
  61. data/lib/nokogiri/ffi/xml/syntax_error.rb +4 -4
  62. data/lib/nokogiri/ffi/xml/xpath.rb +0 -10
  63. data/lib/nokogiri/ffi/xml/xpath_context.rb +22 -9
  64. data/lib/nokogiri/ffi/xslt/stylesheet.rb +3 -0
  65. data/lib/nokogiri/html/document.rb +5 -3
  66. data/lib/nokogiri/html/document_fragment.rb +28 -7
  67. data/lib/nokogiri/version.rb +6 -2
  68. data/lib/nokogiri/version_warning.rb +6 -3
  69. data/lib/nokogiri/xml.rb +1 -1
  70. data/lib/nokogiri/xml/builder.rb +35 -22
  71. data/lib/nokogiri/xml/document.rb +44 -12
  72. data/lib/nokogiri/xml/document_fragment.rb +16 -12
  73. data/lib/nokogiri/xml/entity_decl.rb +4 -0
  74. data/lib/nokogiri/xml/node.rb +152 -95
  75. data/lib/nokogiri/xml/node_set.rb +2 -1
  76. data/lib/nokogiri/xml/sax/push_parser.rb +1 -1
  77. data/lib/nokogiri/xml/schema.rb +1 -5
  78. data/lib/nokogiri/xml/syntax_error.rb +4 -0
  79. data/lib/nokogiri/xml/text.rb +9 -0
  80. data/lib/nokogiri/xml/xpath/syntax_error.rb +3 -0
  81. data/tasks/cross_compile.rb +158 -0
  82. data/tasks/test.rb +0 -6
  83. data/test/css/test_xpath_visitor.rb +9 -0
  84. data/test/helper.rb +49 -11
  85. data/test/html/sax/test_parser.rb +11 -1
  86. data/test/html/test_document.rb +8 -0
  87. data/test/html/test_document_fragment.rb +14 -2
  88. data/test/html/test_element_description.rb +5 -1
  89. data/test/html/test_node.rb +5 -66
  90. data/test/test_reader.rb +28 -0
  91. data/test/test_xslt_transforms.rb +14 -0
  92. data/test/xml/test_builder.rb +43 -0
  93. data/test/xml/test_cdata.rb +12 -0
  94. data/test/xml/test_document.rb +74 -39
  95. data/test/xml/test_document_fragment.rb +36 -0
  96. data/test/xml/test_entity_decl.rb +37 -0
  97. data/test/xml/test_node.rb +192 -65
  98. data/test/xml/test_node_reparenting.rb +253 -236
  99. data/test/xml/test_node_set.rb +67 -0
  100. data/test/xml/test_text.rb +8 -0
  101. data/test/xml/test_xpath.rb +32 -0
  102. metadata +103 -48
  103. data.tar.gz.sig +0 -0
  104. data/ext/nokogiri/xml_xpath.c +0 -53
  105. data/ext/nokogiri/xml_xpath.h +0 -11
  106. data/lib/nokogiri/xml/fragment_handler.rb +0 -79
  107. metadata.gz.sig +0 -0
@@ -14,42 +14,61 @@ static void debug_node_dealloc(xmlNodePtr x)
14
14
 
15
15
  static void mark(xmlNodePtr node)
16
16
  {
17
- // it's OK if the document isn't fully realized (as in XML::Reader).
18
- // see http://github.com/tenderlove/nokogiri/issues/closed/#issue/95
17
+ /* it's OK if the document isn't fully realized (as in XML::Reader). */
18
+ /* see http://github.com/tenderlove/nokogiri/issues/closed/#issue/95 */
19
19
  if (DOC_RUBY_OBJECT_TEST(node->doc) && DOC_RUBY_OBJECT(node->doc))
20
20
  rb_gc_mark(DOC_RUBY_OBJECT(node->doc));
21
21
  }
22
22
 
23
23
  /* :nodoc: */
24
- typedef xmlNodePtr (*node_other_func)(xmlNodePtr, xmlNodePtr);
24
+ typedef xmlNodePtr (*pivot_reparentee_func)(xmlNodePtr, xmlNodePtr);
25
25
 
26
26
  /* :nodoc: */
27
27
  static void relink_namespace(xmlNodePtr reparented)
28
28
  {
29
- // Avoid segv when relinking against unlinked nodes.
29
+ xmlNodePtr child;
30
+
31
+ /* Avoid segv when relinking against unlinked nodes. */
30
32
  if(!reparented->parent) return;
31
33
 
32
- // Make sure that our reparented node has the correct namespaces
34
+ /* Make sure that our reparented node has the correct namespaces */
33
35
  if(!reparented->ns && reparented->doc != (xmlDocPtr)reparented->parent)
34
36
  xmlSetNs(reparented, reparented->parent->ns);
35
37
 
36
- // Search our parents for an existing definition
38
+ /* Search our parents for an existing definition */
37
39
  if(reparented->nsDef) {
38
- xmlNsPtr ns = xmlSearchNsByHref(
39
- reparented->doc,
40
- reparented->parent,
41
- reparented->nsDef->href
42
- );
43
- if(ns && ns != reparented->nsDef) reparented->nsDef = NULL;
40
+ xmlNsPtr curr = reparented->nsDef;
41
+ xmlNsPtr prev = NULL;
42
+
43
+ while(curr) {
44
+ xmlNsPtr ns = xmlSearchNsByHref(
45
+ reparented->doc,
46
+ reparented->parent,
47
+ curr->href
48
+ );
49
+ /* If we find the namespace is already declared, remove it from this
50
+ * definition list. */
51
+ if(ns && ns != curr) {
52
+ if (prev) {
53
+ prev->next = curr->next;
54
+ } else {
55
+ reparented->nsDef = curr->next;
56
+ }
57
+ NOKOGIRI_ROOT_NSDEF(curr, reparented->doc);
58
+ } else {
59
+ prev = curr;
60
+ }
61
+ curr = curr->next;
62
+ }
44
63
  }
45
64
 
46
- // Only walk all children if there actually is a namespace we need to
47
- // reparent.
65
+ /* Only walk all children if there actually is a namespace we need to */
66
+ /* reparent. */
48
67
  if(NULL == reparented->ns) return;
49
68
 
50
- // When a node gets reparented, walk it's children to make sure that
51
- // their namespaces are reparented as well.
52
- xmlNodePtr child = reparented->children;
69
+ /* When a node gets reparented, walk it's children to make sure that */
70
+ /* their namespaces are reparented as well. */
71
+ child = reparented->children;
53
72
  while(NULL != child) {
54
73
  relink_namespace(child);
55
74
  child = child->next;
@@ -57,72 +76,88 @@ static void relink_namespace(xmlNodePtr reparented)
57
76
  }
58
77
 
59
78
  /* :nodoc: */
60
- static xmlNodePtr xmlReplaceNodeWrapper(xmlNodePtr old, xmlNodePtr cur)
79
+ static xmlNodePtr xmlReplaceNodeWrapper(xmlNodePtr pivot, xmlNodePtr new_node)
61
80
  {
62
81
  xmlNodePtr retval ;
63
- retval = xmlReplaceNode(old, cur) ;
64
- if (retval == old) {
65
- return cur ; // return semantics for reparent_node_with
82
+
83
+ retval = xmlReplaceNode(pivot, new_node) ;
84
+
85
+ if (retval == pivot) {
86
+ retval = new_node ; /* return semantics for reparent_node_with */
66
87
  }
88
+
89
+ /* work around libxml2 issue: https://bugzilla.gnome.org/show_bug.cgi?id=615612 */
90
+ if (retval->type == XML_TEXT_NODE) {
91
+ if (retval->prev && retval->prev->type == XML_TEXT_NODE) {
92
+ retval = xmlTextMerge(retval->prev, retval);
93
+ }
94
+ if (retval->next && retval->next->type == XML_TEXT_NODE) {
95
+ retval = xmlTextMerge(retval, retval->next);
96
+ }
97
+ }
98
+
67
99
  return retval ;
68
100
  }
69
101
 
70
102
  /* :nodoc: */
71
- static VALUE reparent_node_with(VALUE node_obj, VALUE other_obj, node_other_func func)
103
+ static VALUE reparent_node_with(VALUE pivot_obj, VALUE reparentee_obj, pivot_reparentee_func prf)
72
104
  {
73
105
  VALUE reparented_obj ;
74
- xmlNodePtr node, other, reparented ;
106
+ xmlNodePtr reparentee, pivot, reparented ;
75
107
 
76
- if(!rb_obj_is_kind_of(node_obj, cNokogiriXmlNode))
108
+ if(!rb_obj_is_kind_of(reparentee_obj, cNokogiriXmlNode))
109
+ rb_raise(rb_eArgError, "node must be a Nokogiri::XML::Node");
110
+ if(rb_obj_is_kind_of(reparentee_obj, cNokogiriXmlDocument))
77
111
  rb_raise(rb_eArgError, "node must be a Nokogiri::XML::Node");
78
112
 
79
- Data_Get_Struct(node_obj, xmlNode, node);
80
- Data_Get_Struct(other_obj, xmlNode, other);
113
+ Data_Get_Struct(reparentee_obj, xmlNode, reparentee);
114
+ Data_Get_Struct(pivot_obj, xmlNode, pivot);
81
115
 
82
- if(XML_DOCUMENT_NODE == node->type || XML_HTML_DOCUMENT_NODE == node->type)
116
+ if(XML_DOCUMENT_NODE == reparentee->type || XML_HTML_DOCUMENT_NODE == reparentee->type)
83
117
  rb_raise(rb_eArgError, "cannot reparent a document node");
84
118
 
85
- if(node->type == XML_TEXT_NODE) {
86
- NOKOGIRI_ROOT_NODE(node);
87
- node = xmlDocCopyNode(node, other->doc, 1);
88
- }
89
-
90
- if (node->doc == other->doc) {
91
- xmlUnlinkNode(node) ;
92
-
93
- // TODO: I really want to remove this. We shouldn't support 2.6.16 anymore
94
- if ( node->type == XML_TEXT_NODE
95
- && other->type == XML_TEXT_NODE
96
- && is_2_6_16() ) {
97
-
98
- // we'd rather leak than segfault.
99
- other->content = xmlStrdup(other->content);
100
-
101
- }
102
-
103
- if(!(reparented = (*func)(other, node))) {
104
- rb_raise(rb_eRuntimeError, "Could not reparent node (1)");
105
- }
106
- } else {
107
- xmlNodePtr duped_node ;
108
- // recursively copy to the new document
109
- if (!(duped_node = xmlDocCopyNode(node, other->doc, 1))) {
119
+ xmlUnlinkNode(reparentee);
120
+
121
+ if (reparentee->doc != pivot->doc || reparentee->type == XML_TEXT_NODE) {
122
+ /*
123
+ * if the reparentee is a text node, there's a very good chance it will be
124
+ * merged with an adjacent text node after being reparented, and in that case
125
+ * libxml will free the underlying C struct.
126
+ *
127
+ * since we clearly have a ruby object which references the underlying
128
+ * memory, we can't let the C struct get freed. let's pickle the original
129
+ * reparentee by rooting it; and then we'll reparent a duplicate of the
130
+ * node that we don't care about preserving.
131
+ *
132
+ * alternatively, if the reparentee is from a different document than the
133
+ * pivot node, libxml2 is going to get confused about which document's
134
+ * "dictionary" the node's strings belong to (this is an otherwise
135
+ * uninteresting libxml2 implementation detail). as a result, we cannot
136
+ * reparent the actual reparentee, so we reparent a duplicate.
137
+ */
138
+ NOKOGIRI_ROOT_NODE(reparentee);
139
+ if (!(reparentee = xmlDocCopyNode(reparentee, pivot->doc, 1))) {
110
140
  rb_raise(rb_eRuntimeError, "Could not reparent node (xmlDocCopyNode)");
111
141
  }
112
- if(!(reparented = (*func)(other, duped_node))) {
113
- rb_raise(rb_eRuntimeError, "Could not reparent node (2)");
114
- }
115
- xmlUnlinkNode(node);
116
- NOKOGIRI_ROOT_NODE(node);
117
142
  }
118
143
 
119
- // the child was a text node that was coalesced. we need to have the object
120
- // point at SOMETHING, or we'll totally bomb out.
121
- if (reparented != node) {
122
- DATA_PTR(node_obj) = reparented ;
144
+ /* TODO: I really want to remove this. We shouldn't support 2.6.16 anymore */
145
+ if ( reparentee->type == XML_TEXT_NODE && pivot->type == XML_TEXT_NODE && is_2_6_16() ) {
146
+ /* work around a string-handling bug in libxml 2.6.16. we'd rather leak than segfault. */
147
+ pivot->content = xmlStrdup(pivot->content);
148
+ }
149
+
150
+ if(!(reparented = (*prf)(pivot, reparentee))) {
151
+ rb_raise(rb_eRuntimeError, "Could not reparent node");
123
152
  }
124
153
 
125
- // Appropriately link in namespaces
154
+ /*
155
+ * make sure the ruby object is pointed at the just-reparented node, which
156
+ * might be a duplicate (see above) or might be the result of merging
157
+ * adjacent text nodes.
158
+ */
159
+ DATA_PTR(reparentee_obj) = reparented ;
160
+
126
161
  relink_namespace(reparented);
127
162
 
128
163
  reparented_obj = Nokogiri_wrap_xml_node(Qnil, reparented);
@@ -169,13 +204,16 @@ static VALUE pointer_id(VALUE self)
169
204
  static VALUE encode_special_chars(VALUE self, VALUE string)
170
205
  {
171
206
  xmlNodePtr node;
207
+ xmlChar *encoded;
208
+ VALUE encoded_str;
209
+
172
210
  Data_Get_Struct(self, xmlNode, node);
173
- xmlChar * encoded = xmlEncodeSpecialChars(
211
+ encoded = xmlEncodeSpecialChars(
174
212
  node->doc,
175
213
  (const xmlChar *)StringValuePtr(string)
176
214
  );
177
215
 
178
- VALUE encoded_str = NOKOGIRI_STR_NEW2(encoded);
216
+ encoded_str = NOKOGIRI_STR_NEW2(encoded);
179
217
  xmlFree(encoded);
180
218
 
181
219
  return encoded_str;
@@ -191,6 +229,8 @@ static VALUE create_internal_subset(VALUE self, VALUE name, VALUE external_id, V
191
229
  {
192
230
  xmlNodePtr node;
193
231
  xmlDocPtr doc;
232
+ xmlDtdPtr dtd;
233
+
194
234
  Data_Get_Struct(self, xmlNode, node);
195
235
 
196
236
  doc = node->doc;
@@ -198,8 +238,8 @@ static VALUE create_internal_subset(VALUE self, VALUE name, VALUE external_id, V
198
238
  if(xmlGetIntSubset(doc))
199
239
  rb_raise(rb_eRuntimeError, "Document already has an internal subset");
200
240
 
201
- xmlDtdPtr dtd = xmlCreateIntSubset(
202
- doc,
241
+ dtd = xmlCreateIntSubset(
242
+ doc,
203
243
  NIL_P(name) ? NULL : (const xmlChar *)StringValuePtr(name),
204
244
  NIL_P(external_id) ? NULL : (const xmlChar *)StringValuePtr(external_id),
205
245
  NIL_P(system_id) ? NULL : (const xmlChar *)StringValuePtr(system_id)
@@ -220,6 +260,8 @@ static VALUE create_external_subset(VALUE self, VALUE name, VALUE external_id, V
220
260
  {
221
261
  xmlNodePtr node;
222
262
  xmlDocPtr doc;
263
+ xmlDtdPtr dtd;
264
+
223
265
  Data_Get_Struct(self, xmlNode, node);
224
266
 
225
267
  doc = node->doc;
@@ -227,8 +269,8 @@ static VALUE create_external_subset(VALUE self, VALUE name, VALUE external_id, V
227
269
  if(doc->extSubset)
228
270
  rb_raise(rb_eRuntimeError, "Document already has an external subset");
229
271
 
230
- xmlDtdPtr dtd = xmlNewDtd(
231
- doc,
272
+ dtd = xmlNewDtd(
273
+ doc,
232
274
  NIL_P(name) ? NULL : (const xmlChar *)StringValuePtr(name),
233
275
  NIL_P(external_id) ? NULL : (const xmlChar *)StringValuePtr(external_id),
234
276
  NIL_P(system_id) ? NULL : (const xmlChar *)StringValuePtr(system_id)
@@ -249,12 +291,14 @@ static VALUE external_subset(VALUE self)
249
291
  {
250
292
  xmlNodePtr node;
251
293
  xmlDocPtr doc;
294
+ xmlDtdPtr dtd;
295
+
252
296
  Data_Get_Struct(self, xmlNode, node);
253
297
 
254
298
  if(!node->doc) return Qnil;
255
299
 
256
300
  doc = node->doc;
257
- xmlDtdPtr dtd = doc->extSubset;
301
+ dtd = doc->extSubset;
258
302
 
259
303
  if(!dtd) return Qnil;
260
304
 
@@ -271,12 +315,14 @@ static VALUE internal_subset(VALUE self)
271
315
  {
272
316
  xmlNodePtr node;
273
317
  xmlDocPtr doc;
318
+ xmlDtdPtr dtd;
319
+
274
320
  Data_Get_Struct(self, xmlNode, node);
275
321
 
276
322
  if(!node->doc) return Qnil;
277
323
 
278
324
  doc = node->doc;
279
- xmlDtdPtr dtd = xmlGetIntSubset(doc);
325
+ dtd = xmlGetIntSubset(doc);
280
326
 
281
327
  if(!dtd) return Qnil;
282
328
 
@@ -293,11 +339,11 @@ static VALUE internal_subset(VALUE self)
293
339
  static VALUE duplicate_node(int argc, VALUE *argv, VALUE self)
294
340
  {
295
341
  VALUE level;
342
+ xmlNodePtr node, dup;
296
343
 
297
344
  if(rb_scan_args(argc, argv, "01", &level) == 0)
298
345
  level = INT2NUM((long)1);
299
346
 
300
- xmlNodePtr node, dup;
301
347
  Data_Get_Struct(self, xmlNode, node);
302
348
 
303
349
  dup = xmlDocCopyNode(node, node->doc, (int)NUM2INT(level));
@@ -331,9 +377,7 @@ static VALUE blank_eh(VALUE self)
331
377
  {
332
378
  xmlNodePtr node;
333
379
  Data_Get_Struct(self, xmlNode, node);
334
- if(1 == xmlIsBlankNode(node))
335
- return Qtrue;
336
- return Qfalse;
380
+ return (1 == xmlIsBlankNode(node)) ? Qtrue : Qfalse ;
337
381
  }
338
382
 
339
383
  /*
@@ -381,13 +425,10 @@ static VALUE next_element(VALUE self)
381
425
  xmlNodePtr node, sibling;
382
426
  Data_Get_Struct(self, xmlNode, node);
383
427
 
384
- sibling = node->next;
428
+ sibling = xmlNextElementSibling(node);
385
429
  if(!sibling) return Qnil;
386
430
 
387
- while(sibling && sibling->type != XML_ELEMENT_NODE)
388
- sibling = sibling->next;
389
-
390
- return sibling ? Nokogiri_wrap_xml_node(Qnil, sibling) : Qnil ;
431
+ return Nokogiri_wrap_xml_node(Qnil, sibling);
391
432
  }
392
433
 
393
434
  /*
@@ -401,6 +442,9 @@ static VALUE previous_element(VALUE self)
401
442
  xmlNodePtr node, sibling;
402
443
  Data_Get_Struct(self, xmlNode, node);
403
444
 
445
+ /*
446
+ * note that we don't use xmlPreviousElementSibling here because it's buggy pre-2.7.7.
447
+ */
404
448
  sibling = node->prev;
405
449
  if(!sibling) return Qnil;
406
450
 
@@ -411,10 +455,9 @@ static VALUE previous_element(VALUE self)
411
455
  }
412
456
 
413
457
  /* :nodoc: */
414
- static VALUE replace(VALUE self, VALUE _new_node)
458
+ static VALUE replace(VALUE self, VALUE new_node)
415
459
  {
416
- reparent_node_with(_new_node, self, xmlReplaceNodeWrapper) ;
417
- return self ;
460
+ return reparent_node_with(self, new_node, xmlReplaceNodeWrapper) ;
418
461
  }
419
462
 
420
463
  /*
@@ -426,21 +469,66 @@ static VALUE replace(VALUE self, VALUE _new_node)
426
469
  static VALUE children(VALUE self)
427
470
  {
428
471
  xmlNodePtr node;
472
+ xmlNodePtr child;
473
+ xmlNodeSetPtr set;
474
+ VALUE document;
475
+ VALUE node_set;
476
+
429
477
  Data_Get_Struct(self, xmlNode, node);
430
478
 
431
- xmlNodePtr child = node->children;
432
- xmlNodeSetPtr set = xmlXPathNodeSetCreate(child);
479
+ child = node->children;
480
+ set = xmlXPathNodeSetCreate(child);
481
+
482
+ document = DOC_RUBY_OBJECT(node->doc);
433
483
 
434
- if(!child) return Nokogiri_wrap_xml_node_set(set);
484
+ if(!child) return Nokogiri_wrap_xml_node_set(set, document);
435
485
 
436
486
  child = child->next;
437
487
  while(NULL != child) {
438
- xmlXPathNodeSetAdd(set, child);
488
+ xmlXPathNodeSetAddUnique(set, child);
439
489
  child = child->next;
440
490
  }
441
491
 
442
- VALUE node_set = Nokogiri_wrap_xml_node_set(set);
443
- rb_iv_set(node_set, "@document", DOC_RUBY_OBJECT(node->doc));
492
+ node_set = Nokogiri_wrap_xml_node_set(set, document);
493
+
494
+ return node_set;
495
+ }
496
+
497
+ /*
498
+ * call-seq:
499
+ * element_children
500
+ *
501
+ * Get the list of children for this node as a NodeSet. All nodes will be
502
+ * element nodes.
503
+ *
504
+ * Example:
505
+ *
506
+ * @doc.root.element_children.all? { |x| x.element? } # => true
507
+ */
508
+ static VALUE element_children(VALUE self)
509
+ {
510
+ xmlNodePtr node;
511
+ xmlNodePtr child;
512
+ xmlNodeSetPtr set;
513
+ VALUE document;
514
+ VALUE node_set;
515
+
516
+ Data_Get_Struct(self, xmlNode, node);
517
+
518
+ child = xmlFirstElementChild(node);
519
+ set = xmlXPathNodeSetCreate(child);
520
+
521
+ document = DOC_RUBY_OBJECT(node->doc);
522
+
523
+ if(!child) return Nokogiri_wrap_xml_node_set(set, document);
524
+
525
+ child = xmlNextElementSibling(child);
526
+ while(NULL != child) {
527
+ xmlXPathNodeSetAddUnique(set, child);
528
+ child = xmlNextElementSibling(child);
529
+ }
530
+
531
+ node_set = Nokogiri_wrap_xml_node_set(set, document);
444
532
 
445
533
  return node_set;
446
534
  }
@@ -462,6 +550,48 @@ static VALUE child(VALUE self)
462
550
  return Nokogiri_wrap_xml_node(Qnil, child);
463
551
  }
464
552
 
553
+ /*
554
+ * call-seq:
555
+ * first_element_child
556
+ *
557
+ * Returns the first child node of this node that is an element.
558
+ *
559
+ * Example:
560
+ *
561
+ * @doc.root.first_element_child.element? # => true
562
+ */
563
+ static VALUE first_element_child(VALUE self)
564
+ {
565
+ xmlNodePtr node, child;
566
+ Data_Get_Struct(self, xmlNode, node);
567
+
568
+ child = xmlFirstElementChild(node);
569
+ if(!child) return Qnil;
570
+
571
+ return Nokogiri_wrap_xml_node(Qnil, child);
572
+ }
573
+
574
+ /*
575
+ * call-seq:
576
+ * last_element_child
577
+ *
578
+ * Returns the last child node of this node that is an element.
579
+ *
580
+ * Example:
581
+ *
582
+ * @doc.root.last_element_child.element? # => true
583
+ */
584
+ static VALUE last_element_child(VALUE self)
585
+ {
586
+ xmlNodePtr node, child;
587
+ Data_Get_Struct(self, xmlNode, node);
588
+
589
+ child = xmlLastElementChild(node);
590
+ if(!child) return Qnil;
591
+
592
+ return Nokogiri_wrap_xml_node(Qnil, child);
593
+ }
594
+
465
595
  /*
466
596
  * call-seq:
467
597
  * key?(attribute)
@@ -544,10 +674,12 @@ static VALUE get(VALUE self, VALUE attribute)
544
674
  static VALUE set_namespace(VALUE self, VALUE namespace)
545
675
  {
546
676
  xmlNodePtr node;
547
- xmlNsPtr ns;
677
+ xmlNsPtr ns = NULL;
548
678
 
549
679
  Data_Get_Struct(self, xmlNode, node);
550
- Data_Get_Struct(namespace, xmlNs, ns);
680
+
681
+ if(!NIL_P(namespace))
682
+ Data_Get_Struct(namespace, xmlNs, ns);
551
683
 
552
684
  xmlSetNs(node, ns);
553
685
 
@@ -599,10 +731,11 @@ static VALUE attribute_nodes(VALUE self)
599
731
  {
600
732
  /* this code in the mode of xmlHasProp() */
601
733
  xmlNodePtr node;
734
+ VALUE attr;
602
735
 
603
736
  Data_Get_Struct(self, xmlNode, node);
604
737
 
605
- VALUE attr = rb_ary_new();
738
+ attr = rb_ary_new();
606
739
  Nokogiri_xml_node_properties(node, attr);
607
740
 
608
741
  return attr ;
@@ -636,12 +769,14 @@ static VALUE namespace_definitions(VALUE self)
636
769
  {
637
770
  /* this code in the mode of xmlHasProp() */
638
771
  xmlNodePtr node ;
772
+ VALUE list;
773
+ xmlNsPtr ns;
639
774
 
640
775
  Data_Get_Struct(self, xmlNode, node);
641
776
 
642
- VALUE list = rb_ary_new();
777
+ list = rb_ary_new();
643
778
 
644
- xmlNsPtr ns = node->nsDef;
779
+ ns = node->nsDef;
645
780
 
646
781
  if(!ns) return list;
647
782
 
@@ -653,6 +788,35 @@ static VALUE namespace_definitions(VALUE self)
653
788
  return list;
654
789
  }
655
790
 
791
+ /*
792
+ * call-seq:
793
+ * namespace_scopes()
794
+ *
795
+ * returns a list of Namespace nodes in scope for _self_. this is all
796
+ * namespaces defined in the node, or in any ancestor node.
797
+ */
798
+ static VALUE namespace_scopes(VALUE self)
799
+ {
800
+ xmlNodePtr node ;
801
+ VALUE list;
802
+ xmlNsPtr *ns_list;
803
+ int j;
804
+
805
+ Data_Get_Struct(self, xmlNode, node);
806
+
807
+ list = rb_ary_new();
808
+ ns_list = xmlGetNsList(node->doc, node);
809
+
810
+ if(!ns_list) return list;
811
+
812
+ for (j = 0 ; ns_list[j] != NULL ; ++j) {
813
+ rb_ary_push(list, Nokogiri_wrap_xml_namespace(node->doc, ns_list[j]));
814
+ }
815
+
816
+ xmlFree(ns_list);
817
+ return list;
818
+ }
819
+
656
820
  /*
657
821
  * call-seq:
658
822
  * node_type
@@ -674,8 +838,17 @@ static VALUE node_type(VALUE self)
674
838
  */
675
839
  static VALUE set_content(VALUE self, VALUE content)
676
840
  {
677
- xmlNodePtr node;
841
+ xmlNodePtr node, child, next ;
678
842
  Data_Get_Struct(self, xmlNode, node);
843
+
844
+ child = node->children;
845
+ while (NULL != child) {
846
+ next = child->next ;
847
+ xmlUnlinkNode(child) ;
848
+ NOKOGIRI_ROOT_NODE(child) ;
849
+ child = next ;
850
+ }
851
+
679
852
  xmlNodeSetContent(node, (xmlChar *)StringValuePtr(content));
680
853
  return content;
681
854
  }
@@ -689,9 +862,11 @@ static VALUE set_content(VALUE self, VALUE content)
689
862
  static VALUE get_content(VALUE self)
690
863
  {
691
864
  xmlNodePtr node;
865
+ xmlChar * content;
866
+
692
867
  Data_Get_Struct(self, xmlNode, node);
693
868
 
694
- xmlChar * content = xmlNodeGetContent(node);
869
+ content = xmlNodeGetContent(node);
695
870
  if(content) {
696
871
  VALUE rval = NOKOGIRI_STR_NEW2(content);
697
872
  xmlFree(content);
@@ -701,9 +876,9 @@ static VALUE get_content(VALUE self)
701
876
  }
702
877
 
703
878
  /* :nodoc: */
704
- static VALUE add_child(VALUE self, VALUE child)
879
+ static VALUE add_child(VALUE self, VALUE new_child)
705
880
  {
706
- return reparent_node_with(child, self, xmlAddChild);
881
+ return reparent_node_with(self, new_child, xmlAddChild);
707
882
  }
708
883
 
709
884
  /*
@@ -762,24 +937,26 @@ static VALUE path(VALUE self)
762
937
  {
763
938
  xmlNodePtr node;
764
939
  xmlChar *path ;
940
+ VALUE rval;
941
+
765
942
  Data_Get_Struct(self, xmlNode, node);
766
-
943
+
767
944
  path = xmlGetNodePath(node);
768
- VALUE rval = NOKOGIRI_STR_NEW2(path);
945
+ rval = NOKOGIRI_STR_NEW2(path);
769
946
  xmlFree(path);
770
947
  return rval ;
771
948
  }
772
949
 
773
950
  /* :nodoc: */
774
- static VALUE add_next_sibling(VALUE self, VALUE rb_node)
951
+ static VALUE add_next_sibling(VALUE self, VALUE new_sibling)
775
952
  {
776
- return reparent_node_with(rb_node, self, xmlAddNextSibling) ;
953
+ return reparent_node_with(self, new_sibling, xmlAddNextSibling) ;
777
954
  }
778
955
 
779
956
  /* :nodoc: */
780
- static VALUE add_previous_sibling(VALUE self, VALUE rb_node)
957
+ static VALUE add_previous_sibling(VALUE self, VALUE new_sibling)
781
958
  {
782
- return reparent_node_with(rb_node, self, xmlAddPrevSibling) ;
959
+ return reparent_node_with(self, new_sibling, xmlAddPrevSibling) ;
783
960
  }
784
961
 
785
962
  /*
@@ -796,16 +973,18 @@ static VALUE native_write_to(
796
973
  VALUE options
797
974
  ) {
798
975
  xmlNodePtr node;
976
+ const char * before_indent;
977
+ xmlSaveCtxtPtr savectx;
799
978
 
800
979
  Data_Get_Struct(self, xmlNode, node);
801
980
 
802
981
  xmlIndentTreeOutput = 1;
803
982
 
804
- const char * before_indent = xmlTreeIndentString;
983
+ before_indent = xmlTreeIndentString;
805
984
 
806
985
  xmlTreeIndentString = StringValuePtr(indent_string);
807
986
 
808
- xmlSaveCtxtPtr savectx = xmlSaveToIO(
987
+ savectx = xmlSaveToIO(
809
988
  (xmlOutputWriteCallback)io_write_callback,
810
989
  (xmlOutputCloseCallback)io_close_callback,
811
990
  (void *)io,
@@ -843,10 +1022,11 @@ static VALUE line(VALUE self)
843
1022
  static VALUE add_namespace_definition(VALUE self, VALUE prefix, VALUE href)
844
1023
  {
845
1024
  xmlNodePtr node;
846
- Data_Get_Struct(self, xmlNode, node);
1025
+ xmlNsPtr ns;
847
1026
 
1027
+ Data_Get_Struct(self, xmlNode, node);
848
1028
 
849
- xmlNsPtr ns = xmlNewNs(
1029
+ ns = xmlNewNs(
850
1030
  node,
851
1031
  (const xmlChar *)StringValuePtr(href),
852
1032
  (const xmlChar *)(NIL_P(prefix) ? NULL : StringValuePtr(prefix))
@@ -874,19 +1054,21 @@ static VALUE add_namespace_definition(VALUE self, VALUE prefix, VALUE href)
874
1054
  static VALUE new(int argc, VALUE *argv, VALUE klass)
875
1055
  {
876
1056
  xmlDocPtr doc;
1057
+ xmlNodePtr node;
877
1058
  VALUE name;
878
1059
  VALUE document;
879
1060
  VALUE rest;
1061
+ VALUE rb_node;
880
1062
 
881
1063
  rb_scan_args(argc, argv, "2*", &name, &document, &rest);
882
1064
 
883
1065
  Data_Get_Struct(document, xmlDoc, doc);
884
1066
 
885
- xmlNodePtr node = xmlNewNode(NULL, (xmlChar *)StringValuePtr(name));
1067
+ node = xmlNewNode(NULL, (xmlChar *)StringValuePtr(name));
886
1068
  node->doc = doc->doc;
887
1069
  NOKOGIRI_ROOT_NODE(node);
888
1070
 
889
- VALUE rb_node = Nokogiri_wrap_xml_node(
1071
+ rb_node = Nokogiri_wrap_xml_node(
890
1072
  klass == cNokogiriXmlNode ? (VALUE)NULL : klass,
891
1073
  node
892
1074
  );
@@ -907,11 +1089,13 @@ static VALUE dump_html(VALUE self)
907
1089
  {
908
1090
  xmlBufferPtr buf ;
909
1091
  xmlNodePtr node ;
1092
+ VALUE html;
1093
+
910
1094
  Data_Get_Struct(self, xmlNode, node);
911
1095
 
912
1096
  buf = xmlBufferCreate() ;
913
1097
  htmlNodeDump(buf, node->doc, node);
914
- VALUE html = NOKOGIRI_STR_NEW2(buf->content);
1098
+ html = NOKOGIRI_STR_NEW2(buf->content);
915
1099
  xmlBufferFree(buf);
916
1100
  return html ;
917
1101
  }
@@ -931,14 +1115,64 @@ static VALUE compare(VALUE self, VALUE _other)
931
1115
  return INT2NUM((long)xmlXPathCmpNodes(other, node));
932
1116
  }
933
1117
 
934
- VALUE Nokogiri_wrap_xml_node(VALUE klass, xmlNodePtr node)
1118
+
1119
+ /* TODO: DOCUMENT ME */
1120
+ static VALUE in_context(VALUE self, VALUE _str, VALUE _options)
935
1121
  {
936
- assert(node);
1122
+ xmlNodePtr node;
1123
+ xmlNodePtr list;
1124
+ xmlNodeSetPtr set;
1125
+ VALUE doc, err;
1126
+
1127
+ Data_Get_Struct(self, xmlNode, node);
1128
+
1129
+ if(!node->parent)
1130
+ rb_raise(rb_eRuntimeError, "no contextual parsing on unlinked nodes");
1131
+
1132
+ doc = DOC_RUBY_OBJECT(node->doc);
1133
+ err = rb_iv_get(doc, "@errors");
937
1134
 
1135
+ xmlSetStructuredErrorFunc((void *)err, Nokogiri_error_array_pusher);
1136
+
1137
+ /* Twiddle global variable because of a bug in libxml2.
1138
+ * http://git.gnome.org/browse/libxml2/commit/?id=e20fb5a72c83cbfc8e4a8aa3943c6be8febadab7
1139
+ */
1140
+ #ifndef HTML_PARSE_NOIMPLIED
1141
+ htmlHandleOmittedElem(0);
1142
+ #endif
1143
+
1144
+ xmlParseInNodeContext(
1145
+ node,
1146
+ StringValuePtr(_str),
1147
+ (int)RSTRING_LEN(_str),
1148
+ (int)NUM2INT(_options),
1149
+ &list);
1150
+
1151
+ #ifndef HTML_PARSE_NOIMPLIED
1152
+ htmlHandleOmittedElem(1);
1153
+ #endif
1154
+
1155
+ xmlSetStructuredErrorFunc(NULL, NULL);
1156
+
1157
+ set = xmlXPathNodeSetCreate(NULL);
1158
+
1159
+ while(list) {
1160
+ xmlXPathNodeSetAddUnique(set, list);
1161
+ list = list->next;
1162
+ }
1163
+
1164
+ return Nokogiri_wrap_xml_node_set(set, doc);
1165
+ }
1166
+
1167
+
1168
+ VALUE Nokogiri_wrap_xml_node(VALUE klass, xmlNodePtr node)
1169
+ {
938
1170
  VALUE document = Qnil ;
939
1171
  VALUE node_cache = Qnil ;
940
1172
  VALUE rb_node = Qnil ;
941
1173
 
1174
+ assert(node);
1175
+
942
1176
  if(node->type == XML_DOCUMENT_NODE || node->type == XML_HTML_DOCUMENT_NODE)
943
1177
  return DOC_RUBY_OBJECT(node->doc);
944
1178
 
@@ -994,8 +1228,8 @@ VALUE Nokogiri_wrap_xml_node(VALUE klass, xmlNodePtr node)
994
1228
  node->_private = (void *)rb_node;
995
1229
 
996
1230
  if (DOC_RUBY_OBJECT_TEST(node->doc) && DOC_RUBY_OBJECT(node->doc)) {
997
- // it's OK if the document isn't fully realized (as in XML::Reader).
998
- // see http://github.com/tenderlove/nokogiri/issues/closed/#issue/95
1231
+ /* it's OK if the document isn't fully realized (as in XML::Reader). */
1232
+ /* see http://github.com/tenderlove/nokogiri/issues/closed/#issue/95 */
999
1233
  document = DOC_RUBY_OBJECT(node->doc);
1000
1234
  node_cache = DOC_NODE_CACHE(node->doc);
1001
1235
  rb_ary_push(node_cache, rb_node);
@@ -1037,7 +1271,10 @@ void init_xml_node()
1037
1271
  rb_define_method(klass, "node_name=", set_name, 1);
1038
1272
  rb_define_method(klass, "parent", get_parent, 0);
1039
1273
  rb_define_method(klass, "child", child, 0);
1274
+ rb_define_method(klass, "first_element_child", first_element_child, 0);
1275
+ rb_define_method(klass, "last_element_child", last_element_child, 0);
1040
1276
  rb_define_method(klass, "children", children, 0);
1277
+ rb_define_method(klass, "element_children", element_children, 0);
1041
1278
  rb_define_method(klass, "next_sibling", next_sibling, 0);
1042
1279
  rb_define_method(klass, "previous_sibling", previous_sibling, 0);
1043
1280
  rb_define_method(klass, "next_element", next_element, 0);
@@ -1054,6 +1291,7 @@ void init_xml_node()
1054
1291
  rb_define_method(klass, "attribute_with_ns", attribute_with_ns, 2);
1055
1292
  rb_define_method(klass, "namespace", namespace, 0);
1056
1293
  rb_define_method(klass, "namespace_definitions", namespace_definitions, 0);
1294
+ rb_define_method(klass, "namespace_scopes", namespace_scopes, 0);
1057
1295
  rb_define_method(klass, "encode_special_chars", encode_special_chars, 1);
1058
1296
  rb_define_method(klass, "dup", duplicate_node, -1);
1059
1297
  rb_define_method(klass, "unlink", unlink_node, 0);
@@ -1064,6 +1302,7 @@ void init_xml_node()
1064
1302
  rb_define_method(klass, "pointer_id", pointer_id, 0);
1065
1303
  rb_define_method(klass, "line", line, 0);
1066
1304
 
1305
+ rb_define_private_method(klass, "in_context", in_context, 2);
1067
1306
  rb_define_private_method(klass, "add_child_node", add_child, 1);
1068
1307
  rb_define_private_method(klass, "add_previous_sibling_node", add_previous_sibling, 1);
1069
1308
  rb_define_private_method(klass, "add_next_sibling_node", add_next_sibling, 1);