nokogiri 1.5.0-x86-mingw32 → 1.5.1.rc1-x86-mingw32

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (63) hide show
  1. data/CHANGELOG.ja.rdoc +39 -12
  2. data/CHANGELOG.rdoc +28 -0
  3. data/C_CODING_STYLE.rdoc +27 -0
  4. data/Manifest.txt +4 -0
  5. data/README.rdoc +11 -7
  6. data/Rakefile +40 -25
  7. data/bin/nokogiri +10 -2
  8. data/ext/nokogiri/extconf.rb +9 -1
  9. data/ext/nokogiri/html_document.c +16 -0
  10. data/ext/nokogiri/html_sax_parser_context.c +59 -37
  11. data/ext/nokogiri/html_sax_push_parser.c +87 -0
  12. data/ext/nokogiri/html_sax_push_parser.h +9 -0
  13. data/ext/nokogiri/nokogiri.c +6 -8
  14. data/ext/nokogiri/nokogiri.h +3 -0
  15. data/ext/nokogiri/xml_document.c +101 -3
  16. data/ext/nokogiri/xml_document.h +3 -3
  17. data/ext/nokogiri/xml_node.c +150 -58
  18. data/ext/nokogiri/xml_node_set.c +169 -120
  19. data/ext/nokogiri/xml_node_set.h +5 -0
  20. data/ext/nokogiri/xml_sax_parser_context.c +64 -41
  21. data/ext/nokogiri/xml_text.c +2 -0
  22. data/ext/nokogiri/xml_xpath_context.c +30 -24
  23. data/ext/nokogiri/xslt_stylesheet.c +62 -16
  24. data/ext/nokogiri/xslt_stylesheet.h +5 -0
  25. data/lib/nokogiri/1.8/nokogiri.so +0 -0
  26. data/lib/nokogiri/1.9/nokogiri.so +0 -0
  27. data/lib/nokogiri/css/parser.rb +165 -159
  28. data/lib/nokogiri/css/parser.y +6 -3
  29. data/lib/nokogiri/css/tokenizer.rb +1 -1
  30. data/lib/nokogiri/css/tokenizer.rex +1 -1
  31. data/lib/nokogiri/html.rb +1 -0
  32. data/lib/nokogiri/html/document.rb +82 -42
  33. data/lib/nokogiri/html/sax/push_parser.rb +16 -0
  34. data/lib/nokogiri/version.rb +1 -1
  35. data/lib/nokogiri/xml.rb +6 -0
  36. data/lib/nokogiri/xml/builder.rb +7 -1
  37. data/lib/nokogiri/xml/document.rb +32 -17
  38. data/lib/nokogiri/xml/document_fragment.rb +6 -1
  39. data/lib/nokogiri/xml/node.rb +40 -9
  40. data/lib/nokogiri/xslt.rb +5 -1
  41. data/tasks/cross_compile.rb +1 -0
  42. data/tasks/nokogiri.org.rb +6 -0
  43. data/tasks/test.rb +1 -0
  44. data/test/css/test_xpath_visitor.rb +6 -0
  45. data/test/helper.rb +1 -0
  46. data/test/html/test_document.rb +26 -0
  47. data/test/html/test_document_fragment.rb +1 -2
  48. data/test/test_memory_leak.rb +81 -1
  49. data/test/test_xslt_transforms.rb +152 -123
  50. data/test/xml/test_builder.rb +24 -2
  51. data/test/xml/test_c14n.rb +151 -0
  52. data/test/xml/test_document.rb +48 -0
  53. data/test/xml/test_namespace.rb +5 -0
  54. data/test/xml/test_node.rb +82 -1
  55. data/test/xml/test_node_attributes.rb +19 -0
  56. data/test/xml/test_node_inheritance.rb +32 -0
  57. data/test/xml/test_node_reparenting.rb +32 -0
  58. data/test/xml/test_node_set.rb +16 -8
  59. data/test/xml/test_reader_encoding.rb +16 -0
  60. data/test/xml/test_unparented_node.rb +24 -0
  61. data/test/xml/test_xinclude.rb +83 -0
  62. data/test/xml/test_xpath.rb +22 -0
  63. metadata +159 -126
@@ -0,0 +1,87 @@
1
+ #include <html_sax_push_parser.h>
2
+
3
+ /*
4
+ * call-seq:
5
+ * native_write(chunk, last_chunk)
6
+ *
7
+ * Write +chunk+ to PushParser. +last_chunk+ triggers the end_document handle
8
+ */
9
+ static VALUE native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
10
+ {
11
+ xmlParserCtxtPtr ctx;
12
+ const char * chunk = NULL;
13
+ int size = 0;
14
+
15
+
16
+ Data_Get_Struct(self, xmlParserCtxt, ctx);
17
+
18
+ if(Qnil != _chunk) {
19
+ chunk = StringValuePtr(_chunk);
20
+ size = (int)RSTRING_LEN(_chunk);
21
+ }
22
+
23
+ if(htmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0)) {
24
+ if (!(ctx->options & XML_PARSE_RECOVER)) {
25
+ xmlErrorPtr e = xmlCtxtGetLastError(ctx);
26
+ Nokogiri_error_raise(NULL, e);
27
+ }
28
+ }
29
+
30
+ return self;
31
+ }
32
+
33
+ /*
34
+ * call-seq:
35
+ * initialize_native(xml_sax, filename)
36
+ *
37
+ * Initialize the push parser with +xml_sax+ using +filename+
38
+ */
39
+ static VALUE initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename,
40
+ VALUE encoding)
41
+ {
42
+ htmlSAXHandlerPtr sax;
43
+ const char * filename = NULL;
44
+ htmlParserCtxtPtr ctx;
45
+ xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
46
+
47
+ Data_Get_Struct(_xml_sax, xmlSAXHandler, sax);
48
+
49
+ if(_filename != Qnil) filename = StringValuePtr(_filename);
50
+
51
+ if (!NIL_P(encoding)) {
52
+ enc = xmlParseCharEncoding(StringValuePtr(encoding));
53
+ if (enc == XML_CHAR_ENCODING_ERROR)
54
+ rb_raise(rb_eArgError, "Unsupported Encoding");
55
+ }
56
+
57
+ ctx = htmlCreatePushParserCtxt(
58
+ sax,
59
+ NULL,
60
+ NULL,
61
+ 0,
62
+ filename,
63
+ enc
64
+ );
65
+ if(ctx == NULL)
66
+ rb_raise(rb_eRuntimeError, "Could not create a parser context");
67
+
68
+ ctx->userData = NOKOGIRI_SAX_TUPLE_NEW(ctx, self);
69
+
70
+ ctx->sax2 = 1;
71
+ DATA_PTR(self) = ctx;
72
+ return self;
73
+ }
74
+
75
+ VALUE cNokogiriHtmlSaxPushParser;
76
+ void init_html_sax_push_parser()
77
+ {
78
+ VALUE nokogiri = rb_define_module("Nokogiri");
79
+ VALUE html = rb_define_module_under(nokogiri, "HTML");
80
+ VALUE sax = rb_define_module_under(html, "SAX");
81
+ VALUE klass = rb_define_class_under(sax, "PushParser", cNokogiriXmlSaxPushParser);
82
+
83
+ cNokogiriHtmlSaxPushParser = klass;
84
+
85
+ rb_define_private_method(klass, "initialize_native", initialize_native, 3);
86
+ rb_define_private_method(klass, "native_write", native_write, 2);
87
+ }
@@ -0,0 +1,9 @@
1
+ #ifndef NOKOGIRI_HTML_SAX_PUSH_PARSER
2
+ #define NOKOGIRI_HTML_SAX_PUSH_PARSER
3
+
4
+ #include <nokogiri.h>
5
+
6
+ void init_html_sax_push_parser();
7
+
8
+ extern VALUE cNokogiriHtmlSaxPushParser ;
9
+ #endif
@@ -35,15 +35,12 @@ void vasprintf_free (void *p)
35
35
  }
36
36
  #endif
37
37
 
38
+ #ifdef HAVE_RUBY_UTIL_H
39
+ #include "ruby/util.h"
40
+ #else
38
41
  #ifndef __MACRUBY__
39
- /* Allocate strdupped strings with the same memory allocator Ruby uses. */
40
- static char *ruby_strdup(const char *s)
41
- {
42
- size_t len = strlen(s);
43
- char *result = ruby_xmalloc((ssize_t) (len + 1));
44
- memcpy(result, s, len + 1);
45
- return result;
46
- }
42
+ #include "util.h"
43
+ #endif
47
44
  #endif
48
45
 
49
46
  void Init_nokogiri()
@@ -104,6 +101,7 @@ void Init_nokogiri()
104
101
  init_xml_entity_decl();
105
102
  init_xml_namespace();
106
103
  init_html_sax_parser_context();
104
+ init_html_sax_push_parser();
107
105
  init_xslt_stylesheet();
108
106
  init_xml_syntax_error();
109
107
  init_html_entity_lookup();
@@ -27,7 +27,9 @@ int vasprintf (char **strp, const char *fmt, va_list ap);
27
27
  #include <libxml/HTMLparser.h>
28
28
  #include <libxml/HTMLtree.h>
29
29
  #include <libxml/relaxng.h>
30
+ #include <libxml/xinclude.h>
30
31
  #include <libxslt/extensions.h>
32
+ #include <libxml/c14n.h>
31
33
  #include <ruby.h>
32
34
 
33
35
  #ifdef HAVE_RUBY_ENCODING_H
@@ -102,6 +104,7 @@ int vasprintf (char **strp, const char *fmt, va_list ap);
102
104
  #include <xml_sax_push_parser.h>
103
105
  #include <xml_reader.h>
104
106
  #include <html_sax_parser_context.h>
107
+ #include <html_sax_push_parser.h>
105
108
  #include <xslt_stylesheet.h>
106
109
  #include <xml_syntax_error.h>
107
110
  #include <xml_schema.h>
@@ -48,12 +48,15 @@ static void recursively_remove_namespaces_from_node(xmlNodePtr node)
48
48
  for (child = node->children ; child ; child = child->next)
49
49
  recursively_remove_namespaces_from_node(child);
50
50
 
51
- if (node->nsDef) {
51
+ if (((node->type == XML_ELEMENT_NODE) ||
52
+ (node->type == XML_XINCLUDE_START) ||
53
+ (node->type == XML_XINCLUDE_END)) &&
54
+ node->nsDef) {
52
55
  xmlFreeNsList(node->nsDef);
53
56
  node->nsDef = NULL;
54
57
  }
55
58
 
56
- if (node->properties != NULL) {
59
+ if (node->type == XML_ELEMENT_NODE && node->properties != NULL) {
57
60
  property = node->properties ;
58
61
  while (property != NULL) {
59
62
  if (property->ns) property->ns = NULL ;
@@ -152,6 +155,9 @@ static VALUE set_encoding(VALUE self, VALUE encoding)
152
155
  xmlDocPtr doc;
153
156
  Data_Get_Struct(self, xmlDoc, doc);
154
157
 
158
+ if (doc->encoding)
159
+ free((char *) doc->encoding); // this may produce a gcc cast warning
160
+
155
161
  doc->encoding = xmlStrdup((xmlChar *)StringValuePtr(encoding));
156
162
 
157
163
  return encoding;
@@ -421,6 +427,97 @@ static VALUE create_entity(int argc, VALUE *argv, VALUE self)
421
427
  return Nokogiri_wrap_xml_node(cNokogiriXmlEntityDecl, (xmlNodePtr)ptr);
422
428
  }
423
429
 
430
+ static int block_caller(void * ctx, xmlNodePtr _node, xmlNodePtr _parent)
431
+ {
432
+ VALUE block;
433
+ VALUE node;
434
+ VALUE parent;
435
+ VALUE ret;
436
+
437
+ if(_node->type == XML_NAMESPACE_DECL){
438
+ node = Nokogiri_wrap_xml_namespace(_parent->doc, (xmlNsPtr) _node);
439
+ }
440
+ else{
441
+ node = Nokogiri_wrap_xml_node(Qnil, _node);
442
+ }
443
+ parent = _parent ? Nokogiri_wrap_xml_node(Qnil, _parent) : Qnil;
444
+ block = (VALUE)ctx;
445
+
446
+ ret = rb_funcall(block, rb_intern("call"), 2, node, parent);
447
+
448
+ if(Qfalse == ret || Qnil == ret) return 0;
449
+
450
+ return 1;
451
+ }
452
+
453
+ /* call-seq:
454
+ * doc.canonicalize(mode=XML_C14N_1_0,inclusive_namespaces=nil,with_comments=false)
455
+ * doc.canonicalize { |obj, parent| ... }
456
+ *
457
+ * Canonicalize a document and return the results. Takes an optional block
458
+ * that takes two parameters: the +obj+ and that node's +parent+.
459
+ * The +obj+ will be either a Nokogiri::XML::Node, or a Nokogiri::XML::Namespace
460
+ * The block must return a non-nil, non-false value if the +obj+ passed in
461
+ * should be included in the canonicalized document.
462
+ */
463
+ static VALUE canonicalize(int argc, VALUE* argv, VALUE self)
464
+ {
465
+ VALUE mode;
466
+ VALUE incl_ns;
467
+ VALUE with_comments;
468
+ xmlChar **ns;
469
+ long ns_len, i;
470
+
471
+ rb_scan_args(argc, argv, "03", &mode, &incl_ns, &with_comments);
472
+
473
+ xmlDocPtr doc;
474
+ xmlOutputBufferPtr buf;
475
+ xmlC14NIsVisibleCallback cb = NULL;
476
+ void * ctx = NULL;
477
+
478
+ VALUE rb_cStringIO;
479
+ VALUE io;
480
+
481
+ Data_Get_Struct(self, xmlDoc, doc);
482
+
483
+ rb_cStringIO = rb_const_get_at(rb_cObject, rb_intern("StringIO"));
484
+ io = rb_class_new_instance(0, 0, rb_cStringIO);
485
+ buf = xmlAllocOutputBuffer(NULL);
486
+
487
+ buf->writecallback = (xmlOutputWriteCallback)io_write_callback;
488
+ buf->closecallback = (xmlOutputCloseCallback)io_close_callback;
489
+ buf->context = (void *)io;
490
+
491
+ if(rb_block_given_p()) {
492
+ cb = block_caller;
493
+ ctx = (void *)rb_block_proc();
494
+ }
495
+
496
+ if(NIL_P(incl_ns)){
497
+ ns = NULL;
498
+ }
499
+ else{
500
+ ns_len = RARRAY_LEN(incl_ns);
501
+ ns = calloc((size_t)ns_len+1, sizeof(xmlChar *));
502
+ for (i = 0 ; i < ns_len ; i++) {
503
+ VALUE entry = rb_ary_entry(incl_ns, i);
504
+ const char * ptr = StringValuePtr(entry);
505
+ ns[i] = (xmlChar*) ptr;
506
+ }
507
+ }
508
+
509
+
510
+ xmlC14NExecute(doc, cb, ctx,
511
+ (int) (NIL_P(mode) ? 0 : NUM2INT(mode)),
512
+ ns,
513
+ (int) (NIL_P(with_comments) ? 0 : 1),
514
+ buf);
515
+
516
+ xmlOutputBufferClose(buf);
517
+
518
+ return rb_funcall(io, rb_intern("string"), 0);
519
+ }
520
+
424
521
  VALUE cNokogiriXmlDocument ;
425
522
  void init_xml_document()
426
523
  {
@@ -444,6 +541,7 @@ void init_xml_document()
444
541
  rb_define_method(klass, "encoding", encoding, 0);
445
542
  rb_define_method(klass, "encoding=", set_encoding, 1);
446
543
  rb_define_method(klass, "version", version, 0);
544
+ rb_define_method(klass, "canonicalize", canonicalize, -1);
447
545
  rb_define_method(klass, "dup", duplicate_node, -1);
448
546
  rb_define_method(klass, "url", url, 0);
449
547
  rb_define_method(klass, "create_entity", create_entity, -1);
@@ -467,7 +565,7 @@ VALUE Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc)
467
565
  rb_iv_set(rb_doc, "@decorators", Qnil);
468
566
  rb_iv_set(rb_doc, "@node_cache", cache);
469
567
 
470
- tuple->doc = (void *)rb_doc;
568
+ tuple->doc = rb_doc;
471
569
  tuple->unlinkedNodes = st_init_numtable_with_size(128);
472
570
  tuple->node_cache = cache;
473
571
  doc->_private = tuple ;
@@ -4,7 +4,7 @@
4
4
  #include <nokogiri.h>
5
5
 
6
6
  struct _nokogiriTuple {
7
- xmlDocPtr doc;
7
+ VALUE doc;
8
8
  st_table *unlinkedNodes;
9
9
  VALUE node_cache;
10
10
  };
@@ -15,9 +15,9 @@ void init_xml_document();
15
15
  VALUE Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc);
16
16
 
17
17
  #define DOC_RUBY_OBJECT_TEST(x) ((nokogiriTuplePtr)(x->_private))
18
- #define DOC_RUBY_OBJECT(x) ((VALUE)((nokogiriTuplePtr)(x->_private))->doc)
18
+ #define DOC_RUBY_OBJECT(x) (((nokogiriTuplePtr)(x->_private))->doc)
19
19
  #define DOC_UNLINKED_NODE_HASH(x) (((nokogiriTuplePtr)(x->_private))->unlinkedNodes)
20
- #define DOC_NODE_CACHE(x) ((VALUE)((nokogiriTuplePtr)(x->_private))->node_cache)
20
+ #define DOC_NODE_CACHE(x) (((nokogiriTuplePtr)(x->_private))->node_cache)
21
21
 
22
22
  extern VALUE cNokogiriXmlDocument ;
23
23
  #endif
@@ -138,7 +138,8 @@ static VALUE reparent_node_with(VALUE pivot_obj, VALUE reparentee_obj, pivot_rep
138
138
  }
139
139
  }
140
140
 
141
- if (reparentee->type == XML_TEXT_NODE && pivot->next && pivot->next->type == XML_TEXT_NODE) {
141
+ if (prf != xmlAddPrevSibling && prf != xmlAddNextSibling
142
+ && reparentee->type == XML_TEXT_NODE && pivot->next && pivot->next->type == XML_TEXT_NODE) {
142
143
  /*
143
144
  * libxml merges text nodes in a right-to-left fashion, meaning that if
144
145
  * there are two text nodes who would be adjacent, the right (or following,
@@ -484,7 +485,13 @@ static VALUE previous_element(VALUE self)
484
485
  /* :nodoc: */
485
486
  static VALUE replace(VALUE self, VALUE new_node)
486
487
  {
487
- return reparent_node_with(self, new_node, xmlReplaceNodeWrapper) ;
488
+ VALUE reparent = reparent_node_with(self, new_node, xmlReplaceNodeWrapper);
489
+
490
+ xmlNodePtr pivot;
491
+ Data_Get_Struct(self, xmlNode, pivot);
492
+ NOKOGIRI_ROOT_NODE(pivot);
493
+
494
+ return reparent;
488
495
  }
489
496
 
490
497
  /*
@@ -658,9 +665,28 @@ static VALUE namespaced_key_eh(VALUE self, VALUE attribute, VALUE namespace)
658
665
  */
659
666
  static VALUE set(VALUE self, VALUE property, VALUE value)
660
667
  {
661
- xmlNodePtr node;
668
+ xmlNodePtr node, cur;
669
+ xmlAttrPtr prop;
662
670
  Data_Get_Struct(self, xmlNode, node);
663
671
 
672
+ /* If a matching attribute node already exists, then xmlSetProp will destroy
673
+ * the existing node's children. However, if Nokogiri has a node object
674
+ * pointing to one of those children, we are left with a broken reference.
675
+ *
676
+ * We can avoid this by unlinking these nodes first.
677
+ */
678
+ if (node->type != XML_ELEMENT_NODE)
679
+ return(Qnil);
680
+ prop = xmlHasProp(node, (xmlChar *)StringValuePtr(property));
681
+ if (prop && prop->children) {
682
+ for (cur = prop->children; cur; cur = cur->next) {
683
+ if (cur->_private) {
684
+ NOKOGIRI_ROOT_NODE(cur);
685
+ xmlUnlinkNode(cur);
686
+ }
687
+ }
688
+ }
689
+
664
690
  xmlSetProp(node, (xmlChar *)StringValuePtr(property),
665
691
  (xmlChar *)StringValuePtr(value));
666
692
 
@@ -1157,73 +1183,134 @@ static VALUE compare(VALUE self, VALUE _other)
1157
1183
  }
1158
1184
 
1159
1185
 
1160
- /* TODO: DOCUMENT ME */
1161
- static VALUE in_context(VALUE self, VALUE _str, VALUE _options)
1186
+ /*
1187
+ * call-seq:
1188
+ * process_xincludes(options)
1189
+ *
1190
+ * Loads and substitutes all xinclude elements below the node. The
1191
+ * parser context will be initialized with +options+.
1192
+ */
1193
+ static VALUE process_xincludes(VALUE self, VALUE options)
1162
1194
  {
1195
+ int rcode ;
1163
1196
  xmlNodePtr node;
1164
- xmlNodePtr list;
1165
- xmlNodePtr child_iter;
1166
- xmlNodeSetPtr set;
1167
- xmlParserErrors error;
1168
- VALUE doc, err;
1197
+ VALUE error_list = rb_ary_new();
1169
1198
 
1170
1199
  Data_Get_Struct(self, xmlNode, node);
1171
1200
 
1172
- doc = DOC_RUBY_OBJECT(node->doc);
1173
- err = rb_iv_get(doc, "@errors");
1201
+ xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
1202
+ rcode = xmlXIncludeProcessTreeFlags(node, (int)NUM2INT(options));
1203
+ xmlSetStructuredErrorFunc(NULL, NULL);
1174
1204
 
1175
- xmlSetStructuredErrorFunc((void *)err, Nokogiri_error_array_pusher);
1205
+ if (rcode < 0) {
1206
+ xmlErrorPtr error;
1176
1207
 
1177
- /* Twiddle global variable because of a bug in libxml2.
1178
- * http://git.gnome.org/browse/libxml2/commit/?id=e20fb5a72c83cbfc8e4a8aa3943c6be8febadab7
1179
- */
1208
+ error = xmlGetLastError();
1209
+ if(error)
1210
+ rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error));
1211
+ else
1212
+ rb_raise(rb_eRuntimeError, "Could not perform xinclude substitution");
1213
+ }
1214
+
1215
+ return self;
1216
+ }
1217
+
1218
+
1219
+ /* TODO: DOCUMENT ME */
1220
+ static VALUE in_context(VALUE self, VALUE _str, VALUE _options)
1221
+ {
1222
+ xmlNodePtr node, list, child_iter, tmp, node_children, doc_children;
1223
+ xmlNodeSetPtr set;
1224
+ xmlParserErrors error;
1225
+ VALUE doc, err;
1226
+ int doc_is_empty;
1227
+
1228
+ Data_Get_Struct(self, xmlNode, node);
1229
+
1230
+ doc = DOC_RUBY_OBJECT(node->doc);
1231
+ err = rb_iv_get(doc, "@errors");
1232
+ doc_is_empty = (node->doc->children == NULL) ? 1 : 0;
1233
+ node_children = node->children;
1234
+ doc_children = node->doc->children;
1235
+
1236
+ xmlSetStructuredErrorFunc((void *)err, Nokogiri_error_array_pusher);
1237
+
1238
+ /* Twiddle global variable because of a bug in libxml2.
1239
+ * http://git.gnome.org/browse/libxml2/commit/?id=e20fb5a72c83cbfc8e4a8aa3943c6be8febadab7
1240
+ */
1180
1241
  #ifndef HTML_PARSE_NOIMPLIED
1181
- htmlHandleOmittedElem(0);
1242
+ htmlHandleOmittedElem(0);
1182
1243
  #endif
1183
1244
 
1184
- error = xmlParseInNodeContext(
1185
- node,
1186
- StringValuePtr(_str),
1187
- (int)RSTRING_LEN(_str),
1188
- (int)NUM2INT(_options),
1189
- &list);
1190
-
1191
- /* make sure parent/child pointers are coherent so an unlink will work properly (#331) */
1192
- child_iter = node->doc->children ;
1193
- while (child_iter) {
1194
- if (child_iter->parent != (xmlNodePtr)node->doc)
1195
- child_iter->parent = (xmlNodePtr)node->doc ;
1196
- child_iter = child_iter->next ;
1197
- }
1245
+ /* This function adds a fake node to the child of +node+. If the parser
1246
+ * does not exit cleanly with XML_ERR_OK, the list is freed. This can
1247
+ * leave the child pointers in a bad state if they were originally empty.
1248
+ *
1249
+ * http://git.gnome.org/browse/libxml2/tree/parser.c#n13177
1250
+ * */
1251
+ error = xmlParseInNodeContext(node, StringValuePtr(_str),
1252
+ (int)RSTRING_LEN(_str),
1253
+ (int)NUM2INT(_options), &list);
1254
+
1255
+ /* xmlParseInNodeContext should not mutate the original document or node,
1256
+ * so reassigning these pointers should be OK. The reason we're reassigning
1257
+ * is because if there were errors, it's possible for the child pointers
1258
+ * to be manipulated. */
1259
+ if (error != XML_ERR_OK) {
1260
+ node->doc->children = doc_children;
1261
+ node->children = node_children;
1262
+ }
1263
+
1264
+ /* make sure parent/child pointers are coherent so an unlink will work
1265
+ * properly (#331)
1266
+ */
1267
+ child_iter = node->doc->children ;
1268
+ while (child_iter) {
1269
+ if (child_iter->parent != (xmlNodePtr)node->doc)
1270
+ child_iter->parent = (xmlNodePtr)node->doc;
1271
+ child_iter = child_iter->next;
1272
+ }
1198
1273
 
1199
1274
  #ifndef HTML_PARSE_NOIMPLIED
1200
- htmlHandleOmittedElem(1);
1275
+ htmlHandleOmittedElem(1);
1201
1276
  #endif
1202
1277
 
1203
- xmlSetStructuredErrorFunc(NULL, NULL);
1278
+ xmlSetStructuredErrorFunc(NULL, NULL);
1204
1279
 
1205
- /* FIXME: This probably needs to handle more constants... */
1206
- switch(error) {
1207
- case XML_ERR_OK:
1208
- break;
1209
-
1210
- case XML_ERR_INTERNAL_ERROR:
1211
- case XML_ERR_NO_MEMORY:
1212
- rb_raise(rb_eRuntimeError, "error parsing fragment (%d)", error);
1213
- break;
1280
+ /* Workaround for a libxml2 bug where a parsing error may leave a broken
1281
+ * node reference in node->doc->children.
1282
+ * This workaround is limited to when a parse error occurs, the document
1283
+ * went from having no children to having children, and the context node is
1284
+ * part of a document fragment.
1285
+ * https://bugzilla.gnome.org/show_bug.cgi?id=668155
1286
+ */
1287
+ if (error != XML_ERR_OK && doc_is_empty && node->doc->children != NULL) {
1288
+ tmp = node;
1289
+ while (tmp->parent)
1290
+ tmp = tmp->parent;
1291
+
1292
+ if (tmp->type == XML_DOCUMENT_FRAG_NODE)
1293
+ node->doc->children = NULL;
1294
+ }
1214
1295
 
1215
- default:
1216
- break;
1217
- }
1296
+ /* FIXME: This probably needs to handle more constants... */
1297
+ switch (error) {
1298
+ case XML_ERR_INTERNAL_ERROR:
1299
+ case XML_ERR_NO_MEMORY:
1300
+ rb_raise(rb_eRuntimeError, "error parsing fragment (%d)", error);
1301
+ break;
1302
+ default:
1303
+ break;
1304
+ }
1218
1305
 
1219
- set = xmlXPathNodeSetCreate(NULL);
1306
+ set = xmlXPathNodeSetCreate(NULL);
1220
1307
 
1221
- while(list) {
1222
- xmlXPathNodeSetAddUnique(set, list);
1223
- list = list->next;
1224
- }
1308
+ while (list) {
1309
+ xmlXPathNodeSetAddUnique(set, list);
1310
+ list = list->next;
1311
+ }
1225
1312
 
1226
- return Nokogiri_wrap_xml_node_set(set, doc);
1313
+ return Nokogiri_wrap_xml_node_set(set, doc);
1227
1314
  }
1228
1315
 
1229
1316
 
@@ -1232,7 +1319,7 @@ VALUE Nokogiri_wrap_xml_node(VALUE klass, xmlNodePtr node)
1232
1319
  VALUE document = Qnil ;
1233
1320
  VALUE node_cache = Qnil ;
1234
1321
  VALUE rb_node = Qnil ;
1235
- int node_has_a_document = 0 ;
1322
+ nokogiriTuplePtr node_has_a_document;
1236
1323
  void (*mark_method)(xmlNodePtr) = NULL ;
1237
1324
 
1238
1325
  assert(node);
@@ -1240,7 +1327,13 @@ VALUE Nokogiri_wrap_xml_node(VALUE klass, xmlNodePtr node)
1240
1327
  if(node->type == XML_DOCUMENT_NODE || node->type == XML_HTML_DOCUMENT_NODE)
1241
1328
  return DOC_RUBY_OBJECT(node->doc);
1242
1329
 
1243
- if(NULL != node->_private) return (VALUE)node->_private;
1330
+ /* It's OK if the node doesn't have a fully-realized document (as in XML::Reader). */
1331
+ /* see https://github.com/tenderlove/nokogiri/issues/95 */
1332
+ /* and https://github.com/tenderlove/nokogiri/issues/439 */
1333
+ node_has_a_document = DOC_RUBY_OBJECT_TEST(node->doc);
1334
+
1335
+ if(node->_private && node_has_a_document)
1336
+ return (VALUE)node->_private;
1244
1337
 
1245
1338
  if(!RTEST(klass)) {
1246
1339
  switch(node->type)
@@ -1286,10 +1379,6 @@ VALUE Nokogiri_wrap_xml_node(VALUE klass, xmlNodePtr node)
1286
1379
  }
1287
1380
  }
1288
1381
 
1289
- /* It's OK if the node doesn't have a fully-realized document (as in XML::Reader). */
1290
- /* see https://github.com/tenderlove/nokogiri/issues/95 */
1291
- /* and https://github.com/tenderlove/nokogiri/issues/439 */
1292
- node_has_a_document = (DOC_RUBY_OBJECT_TEST(node->doc) && DOC_RUBY_OBJECT(node->doc)) ? 1 : 0 ;
1293
1382
  mark_method = node_has_a_document ? mark : NULL ;
1294
1383
 
1295
1384
  rb_node = Data_Wrap_Struct(klass, mark_method, debug_node_dealloc, node) ;
@@ -1351,7 +1440,6 @@ void init_xml_node()
1351
1440
  rb_define_method(klass, "key?", key_eh, 1);
1352
1441
  rb_define_method(klass, "namespaced_key?", namespaced_key_eh, 2);
1353
1442
  rb_define_method(klass, "blank?", blank_eh, 0);
1354
- rb_define_method(klass, "[]=", set, 2);
1355
1443
  rb_define_method(klass, "attribute_nodes", attribute_nodes, 0);
1356
1444
  rb_define_method(klass, "attribute", attr, 1);
1357
1445
  rb_define_method(klass, "attribute_with_ns", attribute_with_ns, 2);
@@ -1368,6 +1456,7 @@ void init_xml_node()
1368
1456
  rb_define_method(klass, "pointer_id", pointer_id, 0);
1369
1457
  rb_define_method(klass, "line", line, 0);
1370
1458
 
1459
+ rb_define_private_method(klass, "process_xincludes", process_xincludes, 1);
1371
1460
  rb_define_private_method(klass, "in_context", in_context, 2);
1372
1461
  rb_define_private_method(klass, "add_child_node", add_child, 1);
1373
1462
  rb_define_private_method(klass, "add_previous_sibling_node", add_previous_sibling, 1);
@@ -1377,9 +1466,12 @@ void init_xml_node()
1377
1466
  rb_define_private_method(klass, "native_write_to", native_write_to, 4);
1378
1467
  rb_define_private_method(klass, "native_content=", set_content, 1);
1379
1468
  rb_define_private_method(klass, "get", get, 1);
1469
+ rb_define_private_method(klass, "set", set, 2);
1380
1470
  rb_define_private_method(klass, "set_namespace", set_namespace, 1);
1381
1471
  rb_define_private_method(klass, "compare", compare, 1);
1382
1472
 
1383
1473
  decorate = rb_intern("decorate");
1384
1474
  decorate_bang = rb_intern("decorate!");
1385
1475
  }
1476
+
1477
+ /* vim: set noet sw=4 sws=4 */