nokogiri-maglev- 1.5.0.1 → 1.5.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. data/CHANGELOG.ja.rdoc +56 -12
  2. data/CHANGELOG.rdoc +49 -0
  3. data/C_CODING_STYLE.rdoc +27 -0
  4. data/Manifest.txt +4 -0
  5. data/README.rdoc +11 -7
  6. data/Rakefile +42 -27
  7. data/bin/nokogiri +10 -2
  8. data/ext/nokogiri/extconf.rb +11 -3
  9. data/ext/nokogiri/html_document.c +16 -0
  10. data/ext/nokogiri/html_sax_parser_context.c +59 -37
  11. data/ext/nokogiri/html_sax_push_parser.c +87 -0
  12. data/ext/nokogiri/html_sax_push_parser.h +9 -0
  13. data/ext/nokogiri/nokogiri.c +7 -9
  14. data/ext/nokogiri/nokogiri.h +3 -0
  15. data/ext/nokogiri/xml_document.c +101 -3
  16. data/ext/nokogiri/xml_document.h +3 -3
  17. data/ext/nokogiri/xml_node.c +151 -58
  18. data/ext/nokogiri/xml_node_set.c +169 -120
  19. data/ext/nokogiri/xml_node_set.h +5 -0
  20. data/ext/nokogiri/xml_sax_parser_context.c +64 -41
  21. data/ext/nokogiri/xml_text.c +2 -0
  22. data/ext/nokogiri/xml_xpath_context.c +31 -25
  23. data/ext/nokogiri/xslt_stylesheet.c +62 -16
  24. data/ext/nokogiri/xslt_stylesheet.h +5 -0
  25. data/lib/nokogiri/css/parser.rb +165 -159
  26. data/lib/nokogiri/css/parser.y +6 -3
  27. data/lib/nokogiri/css/tokenizer.rb +1 -1
  28. data/lib/nokogiri/css/tokenizer.rex +1 -1
  29. data/lib/nokogiri/html.rb +1 -0
  30. data/lib/nokogiri/html/document.rb +82 -42
  31. data/lib/nokogiri/html/sax/push_parser.rb +16 -0
  32. data/lib/nokogiri/version.rb +1 -1
  33. data/lib/nokogiri/xml.rb +6 -0
  34. data/lib/nokogiri/xml/builder.rb +7 -1
  35. data/lib/nokogiri/xml/document.rb +32 -17
  36. data/lib/nokogiri/xml/document_fragment.rb +6 -1
  37. data/lib/nokogiri/xml/node.rb +40 -9
  38. data/lib/nokogiri/xslt.rb +5 -1
  39. data/tasks/cross_compile.rb +1 -0
  40. data/tasks/nokogiri.org.rb +6 -0
  41. data/tasks/test.rb +1 -0
  42. data/test/css/test_xpath_visitor.rb +6 -0
  43. data/test/helper.rb +1 -0
  44. data/test/html/test_document.rb +26 -0
  45. data/test/html/test_document_fragment.rb +1 -2
  46. data/test/test_memory_leak.rb +81 -1
  47. data/test/test_xslt_transforms.rb +152 -123
  48. data/test/xml/test_builder.rb +24 -2
  49. data/test/xml/test_c14n.rb +151 -0
  50. data/test/xml/test_document.rb +48 -0
  51. data/test/xml/test_namespace.rb +5 -0
  52. data/test/xml/test_node.rb +82 -1
  53. data/test/xml/test_node_attributes.rb +19 -0
  54. data/test/xml/test_node_inheritance.rb +32 -0
  55. data/test/xml/test_node_reparenting.rb +32 -0
  56. data/test/xml/test_node_set.rb +16 -8
  57. data/test/xml/test_reader_encoding.rb +16 -0
  58. data/test/xml/test_unparented_node.rb +32 -0
  59. data/test/xml/test_xinclude.rb +83 -0
  60. data/test/xml/test_xpath.rb +22 -0
  61. metadata +208 -241
@@ -13,31 +13,35 @@ static void deallocate(xmlParserCtxtPtr ctxt)
13
13
  NOKOGIRI_DEBUG_END(handler);
14
14
  }
15
15
 
16
- static VALUE parse_memory(VALUE klass, VALUE data, VALUE encoding)
16
+ static VALUE
17
+ parse_memory(VALUE klass, VALUE data, VALUE encoding)
17
18
  {
18
- htmlParserCtxtPtr ctxt;
19
-
20
- if(NIL_P(data)) rb_raise(rb_eArgError, "data cannot be nil");
21
- if(!(int)RSTRING_LEN(data))
22
- rb_raise(rb_eRuntimeError, "data cannot be empty");
23
-
24
- ctxt = htmlCreateMemoryParserCtxt(
25
- StringValuePtr(data),
26
- (int)RSTRING_LEN(data)
27
- );
19
+ htmlParserCtxtPtr ctxt;
20
+
21
+ if (NIL_P(data))
22
+ rb_raise(rb_eArgError, "data cannot be nil");
23
+ if (!(int)RSTRING_LEN(data))
24
+ rb_raise(rb_eRuntimeError, "data cannot be empty");
25
+
26
+ ctxt = htmlCreateMemoryParserCtxt(StringValuePtr(data),
27
+ (int)RSTRING_LEN(data));
28
+ if (ctxt->sax) {
29
+ xmlFree(ctxt->sax);
30
+ ctxt->sax = NULL;
31
+ }
28
32
 
29
- if(RTEST(encoding)) {
30
- xmlCharEncodingHandlerPtr enc = xmlFindCharEncodingHandler(StringValuePtr(encoding));
31
- if(enc != NULL) {
32
- xmlSwitchToEncoding(ctxt, enc);
33
- if(ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
34
- rb_raise(rb_eRuntimeError, "Unsupported encoding %s",
35
- StringValuePtr(encoding));
36
- }
33
+ if (RTEST(encoding)) {
34
+ xmlCharEncodingHandlerPtr enc = xmlFindCharEncodingHandler(StringValuePtr(encoding));
35
+ if (enc != NULL) {
36
+ xmlSwitchToEncoding(ctxt, enc);
37
+ if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
38
+ rb_raise(rb_eRuntimeError, "Unsupported encoding %s",
39
+ StringValuePtr(encoding));
40
+ }
41
+ }
37
42
  }
38
- }
39
43
 
40
- return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
44
+ return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
41
45
  }
42
46
 
43
47
  static VALUE parse_file(VALUE klass, VALUE filename, VALUE encoding)
@@ -49,30 +53,48 @@ static VALUE parse_file(VALUE klass, VALUE filename, VALUE encoding)
49
53
  return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
50
54
  }
51
55
 
52
- static VALUE parse_with(VALUE self, VALUE sax_handler)
56
+ static VALUE
57
+ parse_doc(VALUE ctxt_val)
58
+ {
59
+ htmlParserCtxtPtr ctxt = (htmlParserCtxtPtr)ctxt_val;
60
+ htmlParseDocument(ctxt);
61
+ return Qnil;
62
+ }
63
+
64
+ static VALUE
65
+ parse_doc_finalize(VALUE ctxt_val)
53
66
  {
54
- htmlParserCtxtPtr ctxt;
55
- htmlSAXHandlerPtr sax;
67
+ htmlParserCtxtPtr ctxt = (htmlParserCtxtPtr)ctxt_val;
68
+
69
+ if (ctxt->myDoc)
70
+ xmlFreeDoc(ctxt->myDoc);
56
71
 
57
- if(!rb_obj_is_kind_of_(sax_handler, cNokogiriXmlSaxParser))
58
- rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
72
+ NOKOGIRI_SAX_TUPLE_DESTROY(ctxt->userData);
73
+ return Qnil;
74
+ }
75
+
76
+ static VALUE
77
+ parse_with(VALUE self, VALUE sax_handler)
78
+ {
79
+ htmlParserCtxtPtr ctxt;
80
+ htmlSAXHandlerPtr sax;
59
81
 
60
- Data_Get_Struct(self, htmlParserCtxt, ctxt);
61
- Data_Get_Struct(sax_handler, htmlSAXHandler, sax);
82
+ if (!RTEST(rb_obj_is_kind_of(sax_handler, cNokogiriXmlSaxParser)))
83
+ rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
62
84
 
63
- /* Free the sax handler since we'll assign our own */
64
- if(ctxt->sax && ctxt->sax != (xmlSAXHandlerPtr)&xmlDefaultSAXHandler)
65
- xmlFree(ctxt->sax);
85
+ Data_Get_Struct(self, htmlParserCtxt, ctxt);
86
+ Data_Get_Struct(sax_handler, htmlSAXHandler, sax);
66
87
 
67
- ctxt->sax = sax;
68
- ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
88
+ /* Free the sax handler since we'll assign our own */
89
+ if (ctxt->sax && ctxt->sax != (xmlSAXHandlerPtr)&xmlDefaultSAXHandler)
90
+ xmlFree(ctxt->sax);
69
91
 
70
- htmlParseDocument(ctxt);
92
+ ctxt->sax = sax;
93
+ ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
71
94
 
72
- if(NULL != ctxt->myDoc) xmlFreeDoc(ctxt->myDoc);
95
+ rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt);
73
96
 
74
- NOKOGIRI_SAX_TUPLE_DESTROY(ctxt->userData);
75
- return self;
97
+ return self;
76
98
  }
77
99
 
78
100
  void init_html_sax_parser_context()
@@ -0,0 +1,87 @@
1
+ #include <html_sax_push_parser.h>
2
+
3
+ /*
4
+ * call-seq:
5
+ * native_write(chunk, last_chunk)
6
+ *
7
+ * Write +chunk+ to PushParser. +last_chunk+ triggers the end_document handle
8
+ */
9
+ static VALUE native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
10
+ {
11
+ xmlParserCtxtPtr ctx;
12
+ const char * chunk = NULL;
13
+ int size = 0;
14
+
15
+
16
+ Data_Get_Struct(self, xmlParserCtxt, ctx);
17
+
18
+ if(Qnil != _chunk) {
19
+ chunk = StringValuePtr(_chunk);
20
+ size = (int)RSTRING_LEN(_chunk);
21
+ }
22
+
23
+ if(htmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0)) {
24
+ if (!(ctx->options & XML_PARSE_RECOVER)) {
25
+ xmlErrorPtr e = xmlCtxtGetLastError(ctx);
26
+ Nokogiri_error_raise(NULL, e);
27
+ }
28
+ }
29
+
30
+ return self;
31
+ }
32
+
33
+ /*
34
+ * call-seq:
35
+ * initialize_native(xml_sax, filename)
36
+ *
37
+ * Initialize the push parser with +xml_sax+ using +filename+
38
+ */
39
+ static VALUE initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename,
40
+ VALUE encoding)
41
+ {
42
+ htmlSAXHandlerPtr sax;
43
+ const char * filename = NULL;
44
+ htmlParserCtxtPtr ctx;
45
+ xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
46
+
47
+ Data_Get_Struct(_xml_sax, xmlSAXHandler, sax);
48
+
49
+ if(_filename != Qnil) filename = StringValuePtr(_filename);
50
+
51
+ if (!NIL_P(encoding)) {
52
+ enc = xmlParseCharEncoding(StringValuePtr(encoding));
53
+ if (enc == XML_CHAR_ENCODING_ERROR)
54
+ rb_raise(rb_eArgError, "Unsupported Encoding");
55
+ }
56
+
57
+ ctx = htmlCreatePushParserCtxt(
58
+ sax,
59
+ NULL,
60
+ NULL,
61
+ 0,
62
+ filename,
63
+ enc
64
+ );
65
+ if(ctx == NULL)
66
+ rb_raise(rb_eRuntimeError, "Could not create a parser context");
67
+
68
+ ctx->userData = NOKOGIRI_SAX_TUPLE_NEW(ctx, self);
69
+
70
+ ctx->sax2 = 1;
71
+ rb_rdata_store(self, ctx); // DATA_PTR(self) = ctx;
72
+ return self;
73
+ }
74
+
75
+ VALUE cNokogiriHtmlSaxPushParser;
76
+ void init_html_sax_push_parser()
77
+ {
78
+ VALUE nokogiri = rb_define_module("Nokogiri");
79
+ VALUE html = rb_define_module_under(nokogiri, "HTML");
80
+ VALUE sax = rb_define_module_under(html, "SAX");
81
+ VALUE klass = rb_define_class_under(sax, "PushParser", cNokogiriXmlSaxPushParser);
82
+
83
+ cNokogiriHtmlSaxPushParser = klass;
84
+
85
+ rb_define_private_method(klass, "initialize_native", initialize_native, 3);
86
+ rb_define_private_method(klass, "native_write", native_write, 2);
87
+ }
@@ -0,0 +1,9 @@
1
+ #ifndef NOKOGIRI_HTML_SAX_PUSH_PARSER
2
+ #define NOKOGIRI_HTML_SAX_PUSH_PARSER
3
+
4
+ #include <nokogiri.h>
5
+
6
+ void init_html_sax_push_parser();
7
+
8
+ extern VALUE cNokogiriHtmlSaxPushParser ;
9
+ #endif
@@ -35,20 +35,17 @@ void vasprintf_free (void *p)
35
35
  }
36
36
  #endif
37
37
 
38
+ #ifdef HAVE_RUBY_UTIL_H
39
+ #include "ruby/util.h"
40
+ #else
38
41
  #ifndef __MACRUBY__
39
- /* Allocate strdupped strings with the same memory allocator Ruby uses. */
40
- static char *ruby_strdup(const char *s)
41
- {
42
- size_t len = strlen(s);
43
- char *result = ruby_xmalloc((ssize_t) (len + 1));
44
- memcpy(result, s, len + 1);
45
- return result;
46
- }
42
+ #include "util.h"
43
+ #endif
47
44
  #endif
48
45
 
49
46
  void Init_nokogiri()
50
47
  {
51
- #ifndef __MACRUBY__
48
+ #if !(defined __MACRUBY__) && !(defined MAGLEV)
52
49
  xmlMemSetup(
53
50
  (xmlFreeFunc)ruby_xfree,
54
51
  (xmlMallocFunc)ruby_xmalloc,
@@ -104,6 +101,7 @@ void Init_nokogiri()
104
101
  init_xml_entity_decl();
105
102
  init_xml_namespace();
106
103
  init_html_sax_parser_context();
104
+ init_html_sax_push_parser();
107
105
  init_xslt_stylesheet();
108
106
  init_xml_syntax_error();
109
107
  init_html_entity_lookup();
@@ -27,7 +27,9 @@ int vasprintf (char **strp, const char *fmt, va_list ap);
27
27
  #include <libxml/HTMLparser.h>
28
28
  #include <libxml/HTMLtree.h>
29
29
  #include <libxml/relaxng.h>
30
+ #include <libxml/xinclude.h>
30
31
  #include <libxslt/extensions.h>
32
+ #include <libxml/c14n.h>
31
33
  #include <ruby.h>
32
34
 
33
35
  #ifdef HAVE_RUBY_ENCODING_H
@@ -102,6 +104,7 @@ int vasprintf (char **strp, const char *fmt, va_list ap);
102
104
  #include <xml_sax_push_parser.h>
103
105
  #include <xml_reader.h>
104
106
  #include <html_sax_parser_context.h>
107
+ #include <html_sax_push_parser.h>
105
108
  #include <xslt_stylesheet.h>
106
109
  #include <xml_syntax_error.h>
107
110
  #include <xml_schema.h>
@@ -48,12 +48,15 @@ static void recursively_remove_namespaces_from_node(xmlNodePtr node)
48
48
  for (child = node->children ; child ; child = child->next)
49
49
  recursively_remove_namespaces_from_node(child);
50
50
 
51
- if (node->nsDef) {
51
+ if (((node->type == XML_ELEMENT_NODE) ||
52
+ (node->type == XML_XINCLUDE_START) ||
53
+ (node->type == XML_XINCLUDE_END)) &&
54
+ node->nsDef) {
52
55
  xmlFreeNsList(node->nsDef);
53
56
  node->nsDef = NULL;
54
57
  }
55
58
 
56
- if (node->properties != NULL) {
59
+ if (node->type == XML_ELEMENT_NODE && node->properties != NULL) {
57
60
  property = node->properties ;
58
61
  while (property != NULL) {
59
62
  if (property->ns) property->ns = NULL ;
@@ -152,6 +155,9 @@ static VALUE set_encoding(VALUE self, VALUE encoding)
152
155
  xmlDocPtr doc;
153
156
  Data_Get_Struct(self, xmlDoc, doc);
154
157
 
158
+ if (doc->encoding)
159
+ free((char *) doc->encoding); // this may produce a gcc cast warning
160
+
155
161
  doc->encoding = xmlStrdup((xmlChar *)StringValuePtr(encoding));
156
162
 
157
163
  return encoding;
@@ -421,6 +427,97 @@ static VALUE create_entity(int argc, VALUE *argv, VALUE self)
421
427
  return Nokogiri_wrap_xml_node(cNokogiriXmlEntityDecl, (xmlNodePtr)ptr);
422
428
  }
423
429
 
430
+ static int block_caller(void * ctx, xmlNodePtr _node, xmlNodePtr _parent)
431
+ {
432
+ VALUE block;
433
+ VALUE node;
434
+ VALUE parent;
435
+ VALUE ret;
436
+
437
+ if(_node->type == XML_NAMESPACE_DECL){
438
+ node = Nokogiri_wrap_xml_namespace(_parent->doc, (xmlNsPtr) _node);
439
+ }
440
+ else{
441
+ node = Nokogiri_wrap_xml_node(Qnil, _node);
442
+ }
443
+ parent = _parent ? Nokogiri_wrap_xml_node(Qnil, _parent) : Qnil;
444
+ block = (VALUE)ctx;
445
+
446
+ ret = rb_funcall(block, rb_intern("call"), 2, node, parent);
447
+
448
+ if(Qfalse == ret || Qnil == ret) return 0;
449
+
450
+ return 1;
451
+ }
452
+
453
+ /* call-seq:
454
+ * doc.canonicalize(mode=XML_C14N_1_0,inclusive_namespaces=nil,with_comments=false)
455
+ * doc.canonicalize { |obj, parent| ... }
456
+ *
457
+ * Canonicalize a document and return the results. Takes an optional block
458
+ * that takes two parameters: the +obj+ and that node's +parent+.
459
+ * The +obj+ will be either a Nokogiri::XML::Node, or a Nokogiri::XML::Namespace
460
+ * The block must return a non-nil, non-false value if the +obj+ passed in
461
+ * should be included in the canonicalized document.
462
+ */
463
+ static VALUE canonicalize(int argc, VALUE* argv, VALUE self)
464
+ {
465
+ VALUE mode;
466
+ VALUE incl_ns;
467
+ VALUE with_comments;
468
+ xmlChar **ns;
469
+ long ns_len, i;
470
+
471
+ xmlDocPtr doc;
472
+ xmlOutputBufferPtr buf;
473
+ xmlC14NIsVisibleCallback cb = NULL;
474
+ void * ctx = NULL;
475
+
476
+ VALUE rb_cStringIO;
477
+ VALUE io;
478
+
479
+ rb_scan_args(argc, argv, "03", &mode, &incl_ns, &with_comments);
480
+
481
+ Data_Get_Struct(self, xmlDoc, doc);
482
+
483
+ rb_cStringIO = rb_const_get_at(rb_cObject, rb_intern("StringIO"));
484
+ io = rb_class_new_instance(0, 0, rb_cStringIO);
485
+ buf = xmlAllocOutputBuffer(NULL);
486
+
487
+ buf->writecallback = (xmlOutputWriteCallback)io_write_callback;
488
+ buf->closecallback = (xmlOutputCloseCallback)io_close_callback;
489
+ buf->context = (void *)io;
490
+
491
+ if(rb_block_given_p()) {
492
+ cb = block_caller;
493
+ ctx = (void *)rb_block_proc();
494
+ }
495
+
496
+ if(NIL_P(incl_ns)){
497
+ ns = NULL;
498
+ }
499
+ else{
500
+ ns_len = RARRAY_LEN(incl_ns);
501
+ ns = calloc((size_t)ns_len+1, sizeof(xmlChar *));
502
+ for (i = 0 ; i < ns_len ; i++) {
503
+ VALUE entry = rb_ary_entry(incl_ns, i);
504
+ const char * ptr = StringValuePtr(entry);
505
+ ns[i] = (xmlChar*) ptr;
506
+ }
507
+ }
508
+
509
+
510
+ xmlC14NExecute(doc, cb, ctx,
511
+ (int) (NIL_P(mode) ? 0 : NUM2INT(mode)),
512
+ ns,
513
+ (int) (NIL_P(with_comments) ? 0 : 1),
514
+ buf);
515
+
516
+ xmlOutputBufferClose(buf);
517
+
518
+ return rb_funcall(io, rb_intern("string"), 0);
519
+ }
520
+
424
521
  VALUE cNokogiriXmlDocument ;
425
522
  void init_xml_document()
426
523
  {
@@ -444,6 +541,7 @@ void init_xml_document()
444
541
  rb_define_method(klass, "encoding", encoding, 0);
445
542
  rb_define_method(klass, "encoding=", set_encoding, 1);
446
543
  rb_define_method(klass, "version", version, 0);
544
+ rb_define_method(klass, "canonicalize", canonicalize, -1);
447
545
  rb_define_method(klass, "dup", duplicate_node, -1);
448
546
  rb_define_method(klass, "url", url, 0);
449
547
  rb_define_method(klass, "create_entity", create_entity, -1);
@@ -467,7 +565,7 @@ VALUE Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc)
467
565
  rb_iv_set(rb_doc, "@decorators", Qnil);
468
566
  rb_iv_set(rb_doc, "@node_cache", cache);
469
567
 
470
- tuple->doc = (void *)rb_doc;
568
+ tuple->doc = rb_doc;
471
569
  tuple->unlinkedNodes = st_init_numtable_with_size(128);
472
570
  tuple->node_cache = cache;
473
571
  doc->_private = tuple ;
@@ -4,7 +4,7 @@
4
4
  #include <nokogiri.h>
5
5
 
6
6
  struct _nokogiriTuple {
7
- xmlDocPtr doc;
7
+ VALUE doc;
8
8
  st_table *unlinkedNodes;
9
9
  VALUE node_cache;
10
10
  };
@@ -15,9 +15,9 @@ void init_xml_document();
15
15
  VALUE Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc);
16
16
 
17
17
  #define DOC_RUBY_OBJECT_TEST(x) ((nokogiriTuplePtr)(x->_private))
18
- #define DOC_RUBY_OBJECT(x) ((VALUE)((nokogiriTuplePtr)(x->_private))->doc)
18
+ #define DOC_RUBY_OBJECT(x) (((nokogiriTuplePtr)(x->_private))->doc)
19
19
  #define DOC_UNLINKED_NODE_HASH(x) (((nokogiriTuplePtr)(x->_private))->unlinkedNodes)
20
- #define DOC_NODE_CACHE(x) ((VALUE)((nokogiriTuplePtr)(x->_private))->node_cache)
20
+ #define DOC_NODE_CACHE(x) (((nokogiriTuplePtr)(x->_private))->node_cache)
21
21
 
22
22
  extern VALUE cNokogiriXmlDocument ;
23
23
  #endif
@@ -138,7 +138,8 @@ static VALUE reparent_node_with(VALUE pivot_obj, VALUE reparentee_obj, pivot_rep
138
138
  }
139
139
  }
140
140
 
141
- if (reparentee->type == XML_TEXT_NODE && pivot->next && pivot->next->type == XML_TEXT_NODE) {
141
+ if (prf != xmlAddPrevSibling && prf != xmlAddNextSibling
142
+ && reparentee->type == XML_TEXT_NODE && pivot->next && pivot->next->type == XML_TEXT_NODE) {
142
143
  /*
143
144
  * libxml merges text nodes in a right-to-left fashion, meaning that if
144
145
  * there are two text nodes who would be adjacent, the right (or following,
@@ -484,7 +485,13 @@ static VALUE previous_element(VALUE self)
484
485
  /* :nodoc: */
485
486
  static VALUE replace(VALUE self, VALUE new_node)
486
487
  {
487
- return reparent_node_with(self, new_node, xmlReplaceNodeWrapper) ;
488
+ VALUE reparent = reparent_node_with(self, new_node, xmlReplaceNodeWrapper);
489
+
490
+ xmlNodePtr pivot;
491
+ Data_Get_Struct(self, xmlNode, pivot);
492
+ NOKOGIRI_ROOT_NODE(pivot);
493
+
494
+ return reparent;
488
495
  }
489
496
 
490
497
  /*
@@ -658,9 +665,28 @@ static VALUE namespaced_key_eh(VALUE self, VALUE attribute, VALUE namespace)
658
665
  */
659
666
  static VALUE set(VALUE self, VALUE property, VALUE value)
660
667
  {
661
- xmlNodePtr node;
668
+ xmlNodePtr node, cur;
669
+ xmlAttrPtr prop;
662
670
  Data_Get_Struct(self, xmlNode, node);
663
671
 
672
+ /* If a matching attribute node already exists, then xmlSetProp will destroy
673
+ * the existing node's children. However, if Nokogiri has a node object
674
+ * pointing to one of those children, we are left with a broken reference.
675
+ *
676
+ * We can avoid this by unlinking these nodes first.
677
+ */
678
+ if (node->type != XML_ELEMENT_NODE)
679
+ return(Qnil);
680
+ prop = xmlHasProp(node, (xmlChar *)StringValuePtr(property));
681
+ if (prop && prop->children) {
682
+ for (cur = prop->children; cur; cur = cur->next) {
683
+ if (cur->_private) {
684
+ NOKOGIRI_ROOT_NODE(cur);
685
+ xmlUnlinkNode(cur);
686
+ }
687
+ }
688
+ }
689
+
664
690
  xmlSetProp(node, (xmlChar *)StringValuePtr(property),
665
691
  (xmlChar *)StringValuePtr(value));
666
692
 
@@ -1157,73 +1183,134 @@ static VALUE compare(VALUE self, VALUE _other)
1157
1183
  }
1158
1184
 
1159
1185
 
1160
- /* TODO: DOCUMENT ME */
1161
- static VALUE in_context(VALUE self, VALUE _str, VALUE _options)
1186
+ /*
1187
+ * call-seq:
1188
+ * process_xincludes(options)
1189
+ *
1190
+ * Loads and substitutes all xinclude elements below the node. The
1191
+ * parser context will be initialized with +options+.
1192
+ */
1193
+ static VALUE process_xincludes(VALUE self, VALUE options)
1162
1194
  {
1195
+ int rcode ;
1163
1196
  xmlNodePtr node;
1164
- xmlNodePtr list;
1165
- xmlNodePtr child_iter;
1166
- xmlNodeSetPtr set;
1167
- xmlParserErrors error;
1168
- VALUE doc, err;
1197
+ VALUE error_list = rb_ary_new();
1169
1198
 
1170
1199
  Data_Get_Struct(self, xmlNode, node);
1171
1200
 
1172
- doc = DOC_RUBY_OBJECT(node->doc);
1173
- err = rb_iv_get(doc, "@errors");
1201
+ xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
1202
+ rcode = xmlXIncludeProcessTreeFlags(node, (int)NUM2INT(options));
1203
+ xmlSetStructuredErrorFunc(NULL, NULL);
1174
1204
 
1175
- xmlSetStructuredErrorFunc((void *)err, Nokogiri_error_array_pusher);
1205
+ if (rcode < 0) {
1206
+ xmlErrorPtr error;
1176
1207
 
1177
- /* Twiddle global variable because of a bug in libxml2.
1178
- * http://git.gnome.org/browse/libxml2/commit/?id=e20fb5a72c83cbfc8e4a8aa3943c6be8febadab7
1179
- */
1208
+ error = xmlGetLastError();
1209
+ if(error)
1210
+ rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error));
1211
+ else
1212
+ rb_raise(rb_eRuntimeError, "Could not perform xinclude substitution");
1213
+ }
1214
+
1215
+ return self;
1216
+ }
1217
+
1218
+
1219
+ /* TODO: DOCUMENT ME */
1220
+ static VALUE in_context(VALUE self, VALUE _str, VALUE _options)
1221
+ {
1222
+ xmlNodePtr node, list, child_iter, tmp, node_children, doc_children;
1223
+ xmlNodeSetPtr set;
1224
+ xmlParserErrors error;
1225
+ VALUE doc, err;
1226
+ int doc_is_empty;
1227
+
1228
+ Data_Get_Struct(self, xmlNode, node);
1229
+
1230
+ doc = DOC_RUBY_OBJECT(node->doc);
1231
+ err = rb_iv_get(doc, "@errors");
1232
+ doc_is_empty = (node->doc->children == NULL) ? 1 : 0;
1233
+ node_children = node->children;
1234
+ doc_children = node->doc->children;
1235
+
1236
+ xmlSetStructuredErrorFunc((void *)err, Nokogiri_error_array_pusher);
1237
+
1238
+ /* Twiddle global variable because of a bug in libxml2.
1239
+ * http://git.gnome.org/browse/libxml2/commit/?id=e20fb5a72c83cbfc8e4a8aa3943c6be8febadab7
1240
+ */
1180
1241
  #ifndef HTML_PARSE_NOIMPLIED
1181
- htmlHandleOmittedElem(0);
1242
+ htmlHandleOmittedElem(0);
1182
1243
  #endif
1183
1244
 
1184
- error = xmlParseInNodeContext(
1185
- node,
1186
- StringValuePtr(_str),
1187
- (int)RSTRING_LEN(_str),
1188
- (int)NUM2INT(_options),
1189
- &list);
1190
-
1191
- /* make sure parent/child pointers are coherent so an unlink will work properly (#331) */
1192
- child_iter = node->doc->children ;
1193
- while (child_iter) {
1194
- if (child_iter->parent != (xmlNodePtr)node->doc)
1195
- child_iter->parent = (xmlNodePtr)node->doc ;
1196
- child_iter = child_iter->next ;
1197
- }
1245
+ /* This function adds a fake node to the child of +node+. If the parser
1246
+ * does not exit cleanly with XML_ERR_OK, the list is freed. This can
1247
+ * leave the child pointers in a bad state if they were originally empty.
1248
+ *
1249
+ * http://git.gnome.org/browse/libxml2/tree/parser.c#n13177
1250
+ * */
1251
+ error = xmlParseInNodeContext(node, StringValuePtr(_str),
1252
+ (int)RSTRING_LEN(_str),
1253
+ (int)NUM2INT(_options), &list);
1254
+
1255
+ /* xmlParseInNodeContext should not mutate the original document or node,
1256
+ * so reassigning these pointers should be OK. The reason we're reassigning
1257
+ * is because if there were errors, it's possible for the child pointers
1258
+ * to be manipulated. */
1259
+ if (error != XML_ERR_OK) {
1260
+ node->doc->children = doc_children;
1261
+ node->children = node_children;
1262
+ }
1263
+
1264
+ /* make sure parent/child pointers are coherent so an unlink will work
1265
+ * properly (#331)
1266
+ */
1267
+ child_iter = node->doc->children ;
1268
+ while (child_iter) {
1269
+ if (child_iter->parent != (xmlNodePtr)node->doc)
1270
+ child_iter->parent = (xmlNodePtr)node->doc;
1271
+ child_iter = child_iter->next;
1272
+ }
1198
1273
 
1199
1274
  #ifndef HTML_PARSE_NOIMPLIED
1200
- htmlHandleOmittedElem(1);
1275
+ htmlHandleOmittedElem(1);
1201
1276
  #endif
1202
1277
 
1203
- xmlSetStructuredErrorFunc(NULL, NULL);
1278
+ xmlSetStructuredErrorFunc(NULL, NULL);
1204
1279
 
1205
- /* FIXME: This probably needs to handle more constants... */
1206
- switch(error) {
1207
- case XML_ERR_OK:
1208
- break;
1209
-
1210
- case XML_ERR_INTERNAL_ERROR:
1211
- case XML_ERR_NO_MEMORY:
1212
- rb_raise(rb_eRuntimeError, "error parsing fragment (%d)", error);
1213
- break;
1280
+ /* Workaround for a libxml2 bug where a parsing error may leave a broken
1281
+ * node reference in node->doc->children.
1282
+ * This workaround is limited to when a parse error occurs, the document
1283
+ * went from having no children to having children, and the context node is
1284
+ * part of a document fragment.
1285
+ * https://bugzilla.gnome.org/show_bug.cgi?id=668155
1286
+ */
1287
+ if (error != XML_ERR_OK && doc_is_empty && node->doc->children != NULL) {
1288
+ tmp = node;
1289
+ while (tmp->parent)
1290
+ tmp = tmp->parent;
1291
+
1292
+ if (tmp->type == XML_DOCUMENT_FRAG_NODE)
1293
+ node->doc->children = NULL;
1294
+ }
1214
1295
 
1215
- default:
1216
- break;
1217
- }
1296
+ /* FIXME: This probably needs to handle more constants... */
1297
+ switch (error) {
1298
+ case XML_ERR_INTERNAL_ERROR:
1299
+ case XML_ERR_NO_MEMORY:
1300
+ rb_raise(rb_eRuntimeError, "error parsing fragment (%d)", error);
1301
+ break;
1302
+ default:
1303
+ break;
1304
+ }
1218
1305
 
1219
- set = xmlXPathNodeSetCreate(NULL);
1306
+ set = xmlXPathNodeSetCreate(NULL);
1220
1307
 
1221
- while(list) {
1222
- xmlXPathNodeSetAddUnique(set, list);
1223
- list = list->next;
1224
- }
1308
+ while (list) {
1309
+ xmlXPathNodeSetAddUnique(set, list);
1310
+ list = list->next;
1311
+ }
1225
1312
 
1226
- return Nokogiri_wrap_xml_node_set(set, doc);
1313
+ return Nokogiri_wrap_xml_node_set(set, doc);
1227
1314
  }
1228
1315
 
1229
1316
  static VALUE sym_iv_doc = Qnil;
@@ -1234,7 +1321,7 @@ VALUE Nokogiri_wrap_xml_node(VALUE klass, xmlNodePtr node)
1234
1321
  VALUE document = Qnil ;
1235
1322
  VALUE node_cache = Qnil ;
1236
1323
  VALUE rb_node = Qnil ;
1237
- int node_has_a_document = 0 ;
1324
+ nokogiriTuplePtr node_has_a_document;
1238
1325
  void (*mark_method)(xmlNodePtr) = NULL ;
1239
1326
 
1240
1327
  assert(node);
@@ -1242,7 +1329,13 @@ VALUE Nokogiri_wrap_xml_node(VALUE klass, xmlNodePtr node)
1242
1329
  if(node->type == XML_DOCUMENT_NODE || node->type == XML_HTML_DOCUMENT_NODE)
1243
1330
  return DOC_RUBY_OBJECT(node->doc);
1244
1331
 
1245
- if(NULL != node->_private) return (VALUE)node->_private;
1332
+ /* It's OK if the node doesn't have a fully-realized document (as in XML::Reader). */
1333
+ /* see https://github.com/tenderlove/nokogiri/issues/95 */
1334
+ /* and https://github.com/tenderlove/nokogiri/issues/439 */
1335
+ node_has_a_document = DOC_RUBY_OBJECT_TEST(node->doc);
1336
+
1337
+ if(node->_private && node_has_a_document)
1338
+ return (VALUE)node->_private;
1246
1339
 
1247
1340
  if (!RTEST(klass)) {
1248
1341
  switch(node->type) {
@@ -1288,10 +1381,7 @@ VALUE Nokogiri_wrap_xml_node(VALUE klass, xmlNodePtr node)
1288
1381
  }
1289
1382
  }
1290
1383
 
1291
- /* It's OK if the node doesn't have a fully-realized document (as in XML::Reader). */
1292
- /* see https://github.com/tenderlove/nokogiri/issues/95 */
1293
- /* and https://github.com/tenderlove/nokogiri/issues/439 */
1294
- node_has_a_document = (DOC_RUBY_OBJECT_TEST(node->doc) && DOC_RUBY_OBJECT(node->doc)) ? 1 : 0 ;
1384
+ mark_method = node_has_a_document ? mark : NULL ;
1295
1385
 
1296
1386
  if (DOC_RUBY_OBJECT_TEST(node->doc)) { // maglev workaround , no gc mark
1297
1387
  VALUE ref = DOC_RUBY_OBJECT(node->doc);
@@ -1363,7 +1453,6 @@ void init_xml_node()
1363
1453
  rb_define_method(klass, "key?", key_eh, 1);
1364
1454
  rb_define_method(klass, "namespaced_key?", namespaced_key_eh, 2);
1365
1455
  rb_define_method(klass, "blank?", blank_eh, 0);
1366
- rb_define_method(klass, "[]=", set, 2);
1367
1456
  rb_define_method(klass, "attribute_nodes", attribute_nodes, 0);
1368
1457
  rb_define_method(klass, "attribute", attr, 1);
1369
1458
  rb_define_method(klass, "attribute_with_ns", attribute_with_ns, 2);
@@ -1380,6 +1469,7 @@ void init_xml_node()
1380
1469
  rb_define_method(klass, "pointer_id", pointer_id, 0);
1381
1470
  rb_define_method(klass, "line", line, 0);
1382
1471
 
1472
+ rb_define_private_method(klass, "process_xincludes", process_xincludes, 1);
1383
1473
  rb_define_private_method(klass, "in_context", in_context, 2);
1384
1474
  rb_define_private_method(klass, "add_child_node", add_child, 1);
1385
1475
  rb_define_private_method(klass, "add_previous_sibling_node", add_previous_sibling, 1);
@@ -1389,9 +1479,12 @@ void init_xml_node()
1389
1479
  rb_define_private_method(klass, "native_write_to", native_write_to, 4);
1390
1480
  rb_define_private_method(klass, "native_content=", set_content, 1);
1391
1481
  rb_define_private_method(klass, "get", get, 1);
1482
+ rb_define_private_method(klass, "set", set, 2);
1392
1483
  rb_define_private_method(klass, "set_namespace", set_namespace, 1);
1393
1484
  rb_define_private_method(klass, "compare", compare, 1);
1394
1485
 
1395
1486
  decorate = rb_intern("decorate");
1396
1487
  decorate_bang = rb_intern("decorate!");
1397
1488
  }
1489
+
1490
+ /* vim: set noet sw=4 sws=4 */