nokogiri-maglev- 1.5.0.1 → 1.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.ja.rdoc +56 -12
- data/CHANGELOG.rdoc +49 -0
- data/C_CODING_STYLE.rdoc +27 -0
- data/Manifest.txt +4 -0
- data/README.rdoc +11 -7
- data/Rakefile +42 -27
- data/bin/nokogiri +10 -2
- data/ext/nokogiri/extconf.rb +11 -3
- data/ext/nokogiri/html_document.c +16 -0
- data/ext/nokogiri/html_sax_parser_context.c +59 -37
- data/ext/nokogiri/html_sax_push_parser.c +87 -0
- data/ext/nokogiri/html_sax_push_parser.h +9 -0
- data/ext/nokogiri/nokogiri.c +7 -9
- data/ext/nokogiri/nokogiri.h +3 -0
- data/ext/nokogiri/xml_document.c +101 -3
- data/ext/nokogiri/xml_document.h +3 -3
- data/ext/nokogiri/xml_node.c +151 -58
- data/ext/nokogiri/xml_node_set.c +169 -120
- data/ext/nokogiri/xml_node_set.h +5 -0
- data/ext/nokogiri/xml_sax_parser_context.c +64 -41
- data/ext/nokogiri/xml_text.c +2 -0
- data/ext/nokogiri/xml_xpath_context.c +31 -25
- data/ext/nokogiri/xslt_stylesheet.c +62 -16
- data/ext/nokogiri/xslt_stylesheet.h +5 -0
- data/lib/nokogiri/css/parser.rb +165 -159
- data/lib/nokogiri/css/parser.y +6 -3
- data/lib/nokogiri/css/tokenizer.rb +1 -1
- data/lib/nokogiri/css/tokenizer.rex +1 -1
- data/lib/nokogiri/html.rb +1 -0
- data/lib/nokogiri/html/document.rb +82 -42
- data/lib/nokogiri/html/sax/push_parser.rb +16 -0
- data/lib/nokogiri/version.rb +1 -1
- data/lib/nokogiri/xml.rb +6 -0
- data/lib/nokogiri/xml/builder.rb +7 -1
- data/lib/nokogiri/xml/document.rb +32 -17
- data/lib/nokogiri/xml/document_fragment.rb +6 -1
- data/lib/nokogiri/xml/node.rb +40 -9
- data/lib/nokogiri/xslt.rb +5 -1
- data/tasks/cross_compile.rb +1 -0
- data/tasks/nokogiri.org.rb +6 -0
- data/tasks/test.rb +1 -0
- data/test/css/test_xpath_visitor.rb +6 -0
- data/test/helper.rb +1 -0
- data/test/html/test_document.rb +26 -0
- data/test/html/test_document_fragment.rb +1 -2
- data/test/test_memory_leak.rb +81 -1
- data/test/test_xslt_transforms.rb +152 -123
- data/test/xml/test_builder.rb +24 -2
- data/test/xml/test_c14n.rb +151 -0
- data/test/xml/test_document.rb +48 -0
- data/test/xml/test_namespace.rb +5 -0
- data/test/xml/test_node.rb +82 -1
- data/test/xml/test_node_attributes.rb +19 -0
- data/test/xml/test_node_inheritance.rb +32 -0
- data/test/xml/test_node_reparenting.rb +32 -0
- data/test/xml/test_node_set.rb +16 -8
- data/test/xml/test_reader_encoding.rb +16 -0
- data/test/xml/test_unparented_node.rb +32 -0
- data/test/xml/test_xinclude.rb +83 -0
- data/test/xml/test_xpath.rb +22 -0
- metadata +208 -241
@@ -13,31 +13,35 @@ static void deallocate(xmlParserCtxtPtr ctxt)
|
|
13
13
|
NOKOGIRI_DEBUG_END(handler);
|
14
14
|
}
|
15
15
|
|
16
|
-
static VALUE
|
16
|
+
static VALUE
|
17
|
+
parse_memory(VALUE klass, VALUE data, VALUE encoding)
|
17
18
|
{
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
19
|
+
htmlParserCtxtPtr ctxt;
|
20
|
+
|
21
|
+
if (NIL_P(data))
|
22
|
+
rb_raise(rb_eArgError, "data cannot be nil");
|
23
|
+
if (!(int)RSTRING_LEN(data))
|
24
|
+
rb_raise(rb_eRuntimeError, "data cannot be empty");
|
25
|
+
|
26
|
+
ctxt = htmlCreateMemoryParserCtxt(StringValuePtr(data),
|
27
|
+
(int)RSTRING_LEN(data));
|
28
|
+
if (ctxt->sax) {
|
29
|
+
xmlFree(ctxt->sax);
|
30
|
+
ctxt->sax = NULL;
|
31
|
+
}
|
28
32
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
33
|
+
if (RTEST(encoding)) {
|
34
|
+
xmlCharEncodingHandlerPtr enc = xmlFindCharEncodingHandler(StringValuePtr(encoding));
|
35
|
+
if (enc != NULL) {
|
36
|
+
xmlSwitchToEncoding(ctxt, enc);
|
37
|
+
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
|
38
|
+
rb_raise(rb_eRuntimeError, "Unsupported encoding %s",
|
39
|
+
StringValuePtr(encoding));
|
40
|
+
}
|
41
|
+
}
|
37
42
|
}
|
38
|
-
}
|
39
43
|
|
40
|
-
|
44
|
+
return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
|
41
45
|
}
|
42
46
|
|
43
47
|
static VALUE parse_file(VALUE klass, VALUE filename, VALUE encoding)
|
@@ -49,30 +53,48 @@ static VALUE parse_file(VALUE klass, VALUE filename, VALUE encoding)
|
|
49
53
|
return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
|
50
54
|
}
|
51
55
|
|
52
|
-
static VALUE
|
56
|
+
static VALUE
|
57
|
+
parse_doc(VALUE ctxt_val)
|
58
|
+
{
|
59
|
+
htmlParserCtxtPtr ctxt = (htmlParserCtxtPtr)ctxt_val;
|
60
|
+
htmlParseDocument(ctxt);
|
61
|
+
return Qnil;
|
62
|
+
}
|
63
|
+
|
64
|
+
static VALUE
|
65
|
+
parse_doc_finalize(VALUE ctxt_val)
|
53
66
|
{
|
54
|
-
|
55
|
-
|
67
|
+
htmlParserCtxtPtr ctxt = (htmlParserCtxtPtr)ctxt_val;
|
68
|
+
|
69
|
+
if (ctxt->myDoc)
|
70
|
+
xmlFreeDoc(ctxt->myDoc);
|
56
71
|
|
57
|
-
|
58
|
-
|
72
|
+
NOKOGIRI_SAX_TUPLE_DESTROY(ctxt->userData);
|
73
|
+
return Qnil;
|
74
|
+
}
|
75
|
+
|
76
|
+
static VALUE
|
77
|
+
parse_with(VALUE self, VALUE sax_handler)
|
78
|
+
{
|
79
|
+
htmlParserCtxtPtr ctxt;
|
80
|
+
htmlSAXHandlerPtr sax;
|
59
81
|
|
60
|
-
|
61
|
-
|
82
|
+
if (!RTEST(rb_obj_is_kind_of(sax_handler, cNokogiriXmlSaxParser)))
|
83
|
+
rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
|
62
84
|
|
63
|
-
|
64
|
-
|
65
|
-
xmlFree(ctxt->sax);
|
85
|
+
Data_Get_Struct(self, htmlParserCtxt, ctxt);
|
86
|
+
Data_Get_Struct(sax_handler, htmlSAXHandler, sax);
|
66
87
|
|
67
|
-
|
68
|
-
|
88
|
+
/* Free the sax handler since we'll assign our own */
|
89
|
+
if (ctxt->sax && ctxt->sax != (xmlSAXHandlerPtr)&xmlDefaultSAXHandler)
|
90
|
+
xmlFree(ctxt->sax);
|
69
91
|
|
70
|
-
|
92
|
+
ctxt->sax = sax;
|
93
|
+
ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
|
71
94
|
|
72
|
-
|
95
|
+
rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt);
|
73
96
|
|
74
|
-
|
75
|
-
return self;
|
97
|
+
return self;
|
76
98
|
}
|
77
99
|
|
78
100
|
void init_html_sax_parser_context()
|
@@ -0,0 +1,87 @@
|
|
1
|
+
#include <html_sax_push_parser.h>
|
2
|
+
|
3
|
+
/*
|
4
|
+
* call-seq:
|
5
|
+
* native_write(chunk, last_chunk)
|
6
|
+
*
|
7
|
+
* Write +chunk+ to PushParser. +last_chunk+ triggers the end_document handle
|
8
|
+
*/
|
9
|
+
static VALUE native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
|
10
|
+
{
|
11
|
+
xmlParserCtxtPtr ctx;
|
12
|
+
const char * chunk = NULL;
|
13
|
+
int size = 0;
|
14
|
+
|
15
|
+
|
16
|
+
Data_Get_Struct(self, xmlParserCtxt, ctx);
|
17
|
+
|
18
|
+
if(Qnil != _chunk) {
|
19
|
+
chunk = StringValuePtr(_chunk);
|
20
|
+
size = (int)RSTRING_LEN(_chunk);
|
21
|
+
}
|
22
|
+
|
23
|
+
if(htmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0)) {
|
24
|
+
if (!(ctx->options & XML_PARSE_RECOVER)) {
|
25
|
+
xmlErrorPtr e = xmlCtxtGetLastError(ctx);
|
26
|
+
Nokogiri_error_raise(NULL, e);
|
27
|
+
}
|
28
|
+
}
|
29
|
+
|
30
|
+
return self;
|
31
|
+
}
|
32
|
+
|
33
|
+
/*
|
34
|
+
* call-seq:
|
35
|
+
* initialize_native(xml_sax, filename)
|
36
|
+
*
|
37
|
+
* Initialize the push parser with +xml_sax+ using +filename+
|
38
|
+
*/
|
39
|
+
static VALUE initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename,
|
40
|
+
VALUE encoding)
|
41
|
+
{
|
42
|
+
htmlSAXHandlerPtr sax;
|
43
|
+
const char * filename = NULL;
|
44
|
+
htmlParserCtxtPtr ctx;
|
45
|
+
xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
|
46
|
+
|
47
|
+
Data_Get_Struct(_xml_sax, xmlSAXHandler, sax);
|
48
|
+
|
49
|
+
if(_filename != Qnil) filename = StringValuePtr(_filename);
|
50
|
+
|
51
|
+
if (!NIL_P(encoding)) {
|
52
|
+
enc = xmlParseCharEncoding(StringValuePtr(encoding));
|
53
|
+
if (enc == XML_CHAR_ENCODING_ERROR)
|
54
|
+
rb_raise(rb_eArgError, "Unsupported Encoding");
|
55
|
+
}
|
56
|
+
|
57
|
+
ctx = htmlCreatePushParserCtxt(
|
58
|
+
sax,
|
59
|
+
NULL,
|
60
|
+
NULL,
|
61
|
+
0,
|
62
|
+
filename,
|
63
|
+
enc
|
64
|
+
);
|
65
|
+
if(ctx == NULL)
|
66
|
+
rb_raise(rb_eRuntimeError, "Could not create a parser context");
|
67
|
+
|
68
|
+
ctx->userData = NOKOGIRI_SAX_TUPLE_NEW(ctx, self);
|
69
|
+
|
70
|
+
ctx->sax2 = 1;
|
71
|
+
rb_rdata_store(self, ctx); // DATA_PTR(self) = ctx;
|
72
|
+
return self;
|
73
|
+
}
|
74
|
+
|
75
|
+
VALUE cNokogiriHtmlSaxPushParser;
|
76
|
+
void init_html_sax_push_parser()
|
77
|
+
{
|
78
|
+
VALUE nokogiri = rb_define_module("Nokogiri");
|
79
|
+
VALUE html = rb_define_module_under(nokogiri, "HTML");
|
80
|
+
VALUE sax = rb_define_module_under(html, "SAX");
|
81
|
+
VALUE klass = rb_define_class_under(sax, "PushParser", cNokogiriXmlSaxPushParser);
|
82
|
+
|
83
|
+
cNokogiriHtmlSaxPushParser = klass;
|
84
|
+
|
85
|
+
rb_define_private_method(klass, "initialize_native", initialize_native, 3);
|
86
|
+
rb_define_private_method(klass, "native_write", native_write, 2);
|
87
|
+
}
|
data/ext/nokogiri/nokogiri.c
CHANGED
@@ -35,20 +35,17 @@ void vasprintf_free (void *p)
|
|
35
35
|
}
|
36
36
|
#endif
|
37
37
|
|
38
|
+
#ifdef HAVE_RUBY_UTIL_H
|
39
|
+
#include "ruby/util.h"
|
40
|
+
#else
|
38
41
|
#ifndef __MACRUBY__
|
39
|
-
|
40
|
-
|
41
|
-
{
|
42
|
-
size_t len = strlen(s);
|
43
|
-
char *result = ruby_xmalloc((ssize_t) (len + 1));
|
44
|
-
memcpy(result, s, len + 1);
|
45
|
-
return result;
|
46
|
-
}
|
42
|
+
#include "util.h"
|
43
|
+
#endif
|
47
44
|
#endif
|
48
45
|
|
49
46
|
void Init_nokogiri()
|
50
47
|
{
|
51
|
-
#
|
48
|
+
#if !(defined __MACRUBY__) && !(defined MAGLEV)
|
52
49
|
xmlMemSetup(
|
53
50
|
(xmlFreeFunc)ruby_xfree,
|
54
51
|
(xmlMallocFunc)ruby_xmalloc,
|
@@ -104,6 +101,7 @@ void Init_nokogiri()
|
|
104
101
|
init_xml_entity_decl();
|
105
102
|
init_xml_namespace();
|
106
103
|
init_html_sax_parser_context();
|
104
|
+
init_html_sax_push_parser();
|
107
105
|
init_xslt_stylesheet();
|
108
106
|
init_xml_syntax_error();
|
109
107
|
init_html_entity_lookup();
|
data/ext/nokogiri/nokogiri.h
CHANGED
@@ -27,7 +27,9 @@ int vasprintf (char **strp, const char *fmt, va_list ap);
|
|
27
27
|
#include <libxml/HTMLparser.h>
|
28
28
|
#include <libxml/HTMLtree.h>
|
29
29
|
#include <libxml/relaxng.h>
|
30
|
+
#include <libxml/xinclude.h>
|
30
31
|
#include <libxslt/extensions.h>
|
32
|
+
#include <libxml/c14n.h>
|
31
33
|
#include <ruby.h>
|
32
34
|
|
33
35
|
#ifdef HAVE_RUBY_ENCODING_H
|
@@ -102,6 +104,7 @@ int vasprintf (char **strp, const char *fmt, va_list ap);
|
|
102
104
|
#include <xml_sax_push_parser.h>
|
103
105
|
#include <xml_reader.h>
|
104
106
|
#include <html_sax_parser_context.h>
|
107
|
+
#include <html_sax_push_parser.h>
|
105
108
|
#include <xslt_stylesheet.h>
|
106
109
|
#include <xml_syntax_error.h>
|
107
110
|
#include <xml_schema.h>
|
data/ext/nokogiri/xml_document.c
CHANGED
@@ -48,12 +48,15 @@ static void recursively_remove_namespaces_from_node(xmlNodePtr node)
|
|
48
48
|
for (child = node->children ; child ; child = child->next)
|
49
49
|
recursively_remove_namespaces_from_node(child);
|
50
50
|
|
51
|
-
if (node->
|
51
|
+
if (((node->type == XML_ELEMENT_NODE) ||
|
52
|
+
(node->type == XML_XINCLUDE_START) ||
|
53
|
+
(node->type == XML_XINCLUDE_END)) &&
|
54
|
+
node->nsDef) {
|
52
55
|
xmlFreeNsList(node->nsDef);
|
53
56
|
node->nsDef = NULL;
|
54
57
|
}
|
55
58
|
|
56
|
-
if (node->properties != NULL) {
|
59
|
+
if (node->type == XML_ELEMENT_NODE && node->properties != NULL) {
|
57
60
|
property = node->properties ;
|
58
61
|
while (property != NULL) {
|
59
62
|
if (property->ns) property->ns = NULL ;
|
@@ -152,6 +155,9 @@ static VALUE set_encoding(VALUE self, VALUE encoding)
|
|
152
155
|
xmlDocPtr doc;
|
153
156
|
Data_Get_Struct(self, xmlDoc, doc);
|
154
157
|
|
158
|
+
if (doc->encoding)
|
159
|
+
free((char *) doc->encoding); // this may produce a gcc cast warning
|
160
|
+
|
155
161
|
doc->encoding = xmlStrdup((xmlChar *)StringValuePtr(encoding));
|
156
162
|
|
157
163
|
return encoding;
|
@@ -421,6 +427,97 @@ static VALUE create_entity(int argc, VALUE *argv, VALUE self)
|
|
421
427
|
return Nokogiri_wrap_xml_node(cNokogiriXmlEntityDecl, (xmlNodePtr)ptr);
|
422
428
|
}
|
423
429
|
|
430
|
+
static int block_caller(void * ctx, xmlNodePtr _node, xmlNodePtr _parent)
|
431
|
+
{
|
432
|
+
VALUE block;
|
433
|
+
VALUE node;
|
434
|
+
VALUE parent;
|
435
|
+
VALUE ret;
|
436
|
+
|
437
|
+
if(_node->type == XML_NAMESPACE_DECL){
|
438
|
+
node = Nokogiri_wrap_xml_namespace(_parent->doc, (xmlNsPtr) _node);
|
439
|
+
}
|
440
|
+
else{
|
441
|
+
node = Nokogiri_wrap_xml_node(Qnil, _node);
|
442
|
+
}
|
443
|
+
parent = _parent ? Nokogiri_wrap_xml_node(Qnil, _parent) : Qnil;
|
444
|
+
block = (VALUE)ctx;
|
445
|
+
|
446
|
+
ret = rb_funcall(block, rb_intern("call"), 2, node, parent);
|
447
|
+
|
448
|
+
if(Qfalse == ret || Qnil == ret) return 0;
|
449
|
+
|
450
|
+
return 1;
|
451
|
+
}
|
452
|
+
|
453
|
+
/* call-seq:
|
454
|
+
* doc.canonicalize(mode=XML_C14N_1_0,inclusive_namespaces=nil,with_comments=false)
|
455
|
+
* doc.canonicalize { |obj, parent| ... }
|
456
|
+
*
|
457
|
+
* Canonicalize a document and return the results. Takes an optional block
|
458
|
+
* that takes two parameters: the +obj+ and that node's +parent+.
|
459
|
+
* The +obj+ will be either a Nokogiri::XML::Node, or a Nokogiri::XML::Namespace
|
460
|
+
* The block must return a non-nil, non-false value if the +obj+ passed in
|
461
|
+
* should be included in the canonicalized document.
|
462
|
+
*/
|
463
|
+
static VALUE canonicalize(int argc, VALUE* argv, VALUE self)
|
464
|
+
{
|
465
|
+
VALUE mode;
|
466
|
+
VALUE incl_ns;
|
467
|
+
VALUE with_comments;
|
468
|
+
xmlChar **ns;
|
469
|
+
long ns_len, i;
|
470
|
+
|
471
|
+
xmlDocPtr doc;
|
472
|
+
xmlOutputBufferPtr buf;
|
473
|
+
xmlC14NIsVisibleCallback cb = NULL;
|
474
|
+
void * ctx = NULL;
|
475
|
+
|
476
|
+
VALUE rb_cStringIO;
|
477
|
+
VALUE io;
|
478
|
+
|
479
|
+
rb_scan_args(argc, argv, "03", &mode, &incl_ns, &with_comments);
|
480
|
+
|
481
|
+
Data_Get_Struct(self, xmlDoc, doc);
|
482
|
+
|
483
|
+
rb_cStringIO = rb_const_get_at(rb_cObject, rb_intern("StringIO"));
|
484
|
+
io = rb_class_new_instance(0, 0, rb_cStringIO);
|
485
|
+
buf = xmlAllocOutputBuffer(NULL);
|
486
|
+
|
487
|
+
buf->writecallback = (xmlOutputWriteCallback)io_write_callback;
|
488
|
+
buf->closecallback = (xmlOutputCloseCallback)io_close_callback;
|
489
|
+
buf->context = (void *)io;
|
490
|
+
|
491
|
+
if(rb_block_given_p()) {
|
492
|
+
cb = block_caller;
|
493
|
+
ctx = (void *)rb_block_proc();
|
494
|
+
}
|
495
|
+
|
496
|
+
if(NIL_P(incl_ns)){
|
497
|
+
ns = NULL;
|
498
|
+
}
|
499
|
+
else{
|
500
|
+
ns_len = RARRAY_LEN(incl_ns);
|
501
|
+
ns = calloc((size_t)ns_len+1, sizeof(xmlChar *));
|
502
|
+
for (i = 0 ; i < ns_len ; i++) {
|
503
|
+
VALUE entry = rb_ary_entry(incl_ns, i);
|
504
|
+
const char * ptr = StringValuePtr(entry);
|
505
|
+
ns[i] = (xmlChar*) ptr;
|
506
|
+
}
|
507
|
+
}
|
508
|
+
|
509
|
+
|
510
|
+
xmlC14NExecute(doc, cb, ctx,
|
511
|
+
(int) (NIL_P(mode) ? 0 : NUM2INT(mode)),
|
512
|
+
ns,
|
513
|
+
(int) (NIL_P(with_comments) ? 0 : 1),
|
514
|
+
buf);
|
515
|
+
|
516
|
+
xmlOutputBufferClose(buf);
|
517
|
+
|
518
|
+
return rb_funcall(io, rb_intern("string"), 0);
|
519
|
+
}
|
520
|
+
|
424
521
|
VALUE cNokogiriXmlDocument ;
|
425
522
|
void init_xml_document()
|
426
523
|
{
|
@@ -444,6 +541,7 @@ void init_xml_document()
|
|
444
541
|
rb_define_method(klass, "encoding", encoding, 0);
|
445
542
|
rb_define_method(klass, "encoding=", set_encoding, 1);
|
446
543
|
rb_define_method(klass, "version", version, 0);
|
544
|
+
rb_define_method(klass, "canonicalize", canonicalize, -1);
|
447
545
|
rb_define_method(klass, "dup", duplicate_node, -1);
|
448
546
|
rb_define_method(klass, "url", url, 0);
|
449
547
|
rb_define_method(klass, "create_entity", create_entity, -1);
|
@@ -467,7 +565,7 @@ VALUE Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc)
|
|
467
565
|
rb_iv_set(rb_doc, "@decorators", Qnil);
|
468
566
|
rb_iv_set(rb_doc, "@node_cache", cache);
|
469
567
|
|
470
|
-
tuple->doc =
|
568
|
+
tuple->doc = rb_doc;
|
471
569
|
tuple->unlinkedNodes = st_init_numtable_with_size(128);
|
472
570
|
tuple->node_cache = cache;
|
473
571
|
doc->_private = tuple ;
|
data/ext/nokogiri/xml_document.h
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
#include <nokogiri.h>
|
5
5
|
|
6
6
|
struct _nokogiriTuple {
|
7
|
-
|
7
|
+
VALUE doc;
|
8
8
|
st_table *unlinkedNodes;
|
9
9
|
VALUE node_cache;
|
10
10
|
};
|
@@ -15,9 +15,9 @@ void init_xml_document();
|
|
15
15
|
VALUE Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc);
|
16
16
|
|
17
17
|
#define DOC_RUBY_OBJECT_TEST(x) ((nokogiriTuplePtr)(x->_private))
|
18
|
-
#define DOC_RUBY_OBJECT(x) ((
|
18
|
+
#define DOC_RUBY_OBJECT(x) (((nokogiriTuplePtr)(x->_private))->doc)
|
19
19
|
#define DOC_UNLINKED_NODE_HASH(x) (((nokogiriTuplePtr)(x->_private))->unlinkedNodes)
|
20
|
-
#define DOC_NODE_CACHE(x) ((
|
20
|
+
#define DOC_NODE_CACHE(x) (((nokogiriTuplePtr)(x->_private))->node_cache)
|
21
21
|
|
22
22
|
extern VALUE cNokogiriXmlDocument ;
|
23
23
|
#endif
|
data/ext/nokogiri/xml_node.c
CHANGED
@@ -138,7 +138,8 @@ static VALUE reparent_node_with(VALUE pivot_obj, VALUE reparentee_obj, pivot_rep
|
|
138
138
|
}
|
139
139
|
}
|
140
140
|
|
141
|
-
if (
|
141
|
+
if (prf != xmlAddPrevSibling && prf != xmlAddNextSibling
|
142
|
+
&& reparentee->type == XML_TEXT_NODE && pivot->next && pivot->next->type == XML_TEXT_NODE) {
|
142
143
|
/*
|
143
144
|
* libxml merges text nodes in a right-to-left fashion, meaning that if
|
144
145
|
* there are two text nodes who would be adjacent, the right (or following,
|
@@ -484,7 +485,13 @@ static VALUE previous_element(VALUE self)
|
|
484
485
|
/* :nodoc: */
|
485
486
|
static VALUE replace(VALUE self, VALUE new_node)
|
486
487
|
{
|
487
|
-
|
488
|
+
VALUE reparent = reparent_node_with(self, new_node, xmlReplaceNodeWrapper);
|
489
|
+
|
490
|
+
xmlNodePtr pivot;
|
491
|
+
Data_Get_Struct(self, xmlNode, pivot);
|
492
|
+
NOKOGIRI_ROOT_NODE(pivot);
|
493
|
+
|
494
|
+
return reparent;
|
488
495
|
}
|
489
496
|
|
490
497
|
/*
|
@@ -658,9 +665,28 @@ static VALUE namespaced_key_eh(VALUE self, VALUE attribute, VALUE namespace)
|
|
658
665
|
*/
|
659
666
|
static VALUE set(VALUE self, VALUE property, VALUE value)
|
660
667
|
{
|
661
|
-
xmlNodePtr node;
|
668
|
+
xmlNodePtr node, cur;
|
669
|
+
xmlAttrPtr prop;
|
662
670
|
Data_Get_Struct(self, xmlNode, node);
|
663
671
|
|
672
|
+
/* If a matching attribute node already exists, then xmlSetProp will destroy
|
673
|
+
* the existing node's children. However, if Nokogiri has a node object
|
674
|
+
* pointing to one of those children, we are left with a broken reference.
|
675
|
+
*
|
676
|
+
* We can avoid this by unlinking these nodes first.
|
677
|
+
*/
|
678
|
+
if (node->type != XML_ELEMENT_NODE)
|
679
|
+
return(Qnil);
|
680
|
+
prop = xmlHasProp(node, (xmlChar *)StringValuePtr(property));
|
681
|
+
if (prop && prop->children) {
|
682
|
+
for (cur = prop->children; cur; cur = cur->next) {
|
683
|
+
if (cur->_private) {
|
684
|
+
NOKOGIRI_ROOT_NODE(cur);
|
685
|
+
xmlUnlinkNode(cur);
|
686
|
+
}
|
687
|
+
}
|
688
|
+
}
|
689
|
+
|
664
690
|
xmlSetProp(node, (xmlChar *)StringValuePtr(property),
|
665
691
|
(xmlChar *)StringValuePtr(value));
|
666
692
|
|
@@ -1157,73 +1183,134 @@ static VALUE compare(VALUE self, VALUE _other)
|
|
1157
1183
|
}
|
1158
1184
|
|
1159
1185
|
|
1160
|
-
/*
|
1161
|
-
|
1186
|
+
/*
|
1187
|
+
* call-seq:
|
1188
|
+
* process_xincludes(options)
|
1189
|
+
*
|
1190
|
+
* Loads and substitutes all xinclude elements below the node. The
|
1191
|
+
* parser context will be initialized with +options+.
|
1192
|
+
*/
|
1193
|
+
static VALUE process_xincludes(VALUE self, VALUE options)
|
1162
1194
|
{
|
1195
|
+
int rcode ;
|
1163
1196
|
xmlNodePtr node;
|
1164
|
-
|
1165
|
-
xmlNodePtr child_iter;
|
1166
|
-
xmlNodeSetPtr set;
|
1167
|
-
xmlParserErrors error;
|
1168
|
-
VALUE doc, err;
|
1197
|
+
VALUE error_list = rb_ary_new();
|
1169
1198
|
|
1170
1199
|
Data_Get_Struct(self, xmlNode, node);
|
1171
1200
|
|
1172
|
-
|
1173
|
-
|
1201
|
+
xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
|
1202
|
+
rcode = xmlXIncludeProcessTreeFlags(node, (int)NUM2INT(options));
|
1203
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
1174
1204
|
|
1175
|
-
|
1205
|
+
if (rcode < 0) {
|
1206
|
+
xmlErrorPtr error;
|
1176
1207
|
|
1177
|
-
|
1178
|
-
|
1179
|
-
|
1208
|
+
error = xmlGetLastError();
|
1209
|
+
if(error)
|
1210
|
+
rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error));
|
1211
|
+
else
|
1212
|
+
rb_raise(rb_eRuntimeError, "Could not perform xinclude substitution");
|
1213
|
+
}
|
1214
|
+
|
1215
|
+
return self;
|
1216
|
+
}
|
1217
|
+
|
1218
|
+
|
1219
|
+
/* TODO: DOCUMENT ME */
|
1220
|
+
static VALUE in_context(VALUE self, VALUE _str, VALUE _options)
|
1221
|
+
{
|
1222
|
+
xmlNodePtr node, list, child_iter, tmp, node_children, doc_children;
|
1223
|
+
xmlNodeSetPtr set;
|
1224
|
+
xmlParserErrors error;
|
1225
|
+
VALUE doc, err;
|
1226
|
+
int doc_is_empty;
|
1227
|
+
|
1228
|
+
Data_Get_Struct(self, xmlNode, node);
|
1229
|
+
|
1230
|
+
doc = DOC_RUBY_OBJECT(node->doc);
|
1231
|
+
err = rb_iv_get(doc, "@errors");
|
1232
|
+
doc_is_empty = (node->doc->children == NULL) ? 1 : 0;
|
1233
|
+
node_children = node->children;
|
1234
|
+
doc_children = node->doc->children;
|
1235
|
+
|
1236
|
+
xmlSetStructuredErrorFunc((void *)err, Nokogiri_error_array_pusher);
|
1237
|
+
|
1238
|
+
/* Twiddle global variable because of a bug in libxml2.
|
1239
|
+
* http://git.gnome.org/browse/libxml2/commit/?id=e20fb5a72c83cbfc8e4a8aa3943c6be8febadab7
|
1240
|
+
*/
|
1180
1241
|
#ifndef HTML_PARSE_NOIMPLIED
|
1181
|
-
|
1242
|
+
htmlHandleOmittedElem(0);
|
1182
1243
|
#endif
|
1183
1244
|
|
1184
|
-
|
1185
|
-
|
1186
|
-
|
1187
|
-
|
1188
|
-
|
1189
|
-
|
1190
|
-
|
1191
|
-
|
1192
|
-
|
1193
|
-
|
1194
|
-
|
1195
|
-
|
1196
|
-
|
1197
|
-
|
1245
|
+
/* This function adds a fake node to the child of +node+. If the parser
|
1246
|
+
* does not exit cleanly with XML_ERR_OK, the list is freed. This can
|
1247
|
+
* leave the child pointers in a bad state if they were originally empty.
|
1248
|
+
*
|
1249
|
+
* http://git.gnome.org/browse/libxml2/tree/parser.c#n13177
|
1250
|
+
* */
|
1251
|
+
error = xmlParseInNodeContext(node, StringValuePtr(_str),
|
1252
|
+
(int)RSTRING_LEN(_str),
|
1253
|
+
(int)NUM2INT(_options), &list);
|
1254
|
+
|
1255
|
+
/* xmlParseInNodeContext should not mutate the original document or node,
|
1256
|
+
* so reassigning these pointers should be OK. The reason we're reassigning
|
1257
|
+
* is because if there were errors, it's possible for the child pointers
|
1258
|
+
* to be manipulated. */
|
1259
|
+
if (error != XML_ERR_OK) {
|
1260
|
+
node->doc->children = doc_children;
|
1261
|
+
node->children = node_children;
|
1262
|
+
}
|
1263
|
+
|
1264
|
+
/* make sure parent/child pointers are coherent so an unlink will work
|
1265
|
+
* properly (#331)
|
1266
|
+
*/
|
1267
|
+
child_iter = node->doc->children ;
|
1268
|
+
while (child_iter) {
|
1269
|
+
if (child_iter->parent != (xmlNodePtr)node->doc)
|
1270
|
+
child_iter->parent = (xmlNodePtr)node->doc;
|
1271
|
+
child_iter = child_iter->next;
|
1272
|
+
}
|
1198
1273
|
|
1199
1274
|
#ifndef HTML_PARSE_NOIMPLIED
|
1200
|
-
|
1275
|
+
htmlHandleOmittedElem(1);
|
1201
1276
|
#endif
|
1202
1277
|
|
1203
|
-
|
1278
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
1204
1279
|
|
1205
|
-
|
1206
|
-
|
1207
|
-
|
1208
|
-
|
1209
|
-
|
1210
|
-
|
1211
|
-
|
1212
|
-
|
1213
|
-
|
1280
|
+
/* Workaround for a libxml2 bug where a parsing error may leave a broken
|
1281
|
+
* node reference in node->doc->children.
|
1282
|
+
* This workaround is limited to when a parse error occurs, the document
|
1283
|
+
* went from having no children to having children, and the context node is
|
1284
|
+
* part of a document fragment.
|
1285
|
+
* https://bugzilla.gnome.org/show_bug.cgi?id=668155
|
1286
|
+
*/
|
1287
|
+
if (error != XML_ERR_OK && doc_is_empty && node->doc->children != NULL) {
|
1288
|
+
tmp = node;
|
1289
|
+
while (tmp->parent)
|
1290
|
+
tmp = tmp->parent;
|
1291
|
+
|
1292
|
+
if (tmp->type == XML_DOCUMENT_FRAG_NODE)
|
1293
|
+
node->doc->children = NULL;
|
1294
|
+
}
|
1214
1295
|
|
1215
|
-
|
1216
|
-
|
1217
|
-
|
1296
|
+
/* FIXME: This probably needs to handle more constants... */
|
1297
|
+
switch (error) {
|
1298
|
+
case XML_ERR_INTERNAL_ERROR:
|
1299
|
+
case XML_ERR_NO_MEMORY:
|
1300
|
+
rb_raise(rb_eRuntimeError, "error parsing fragment (%d)", error);
|
1301
|
+
break;
|
1302
|
+
default:
|
1303
|
+
break;
|
1304
|
+
}
|
1218
1305
|
|
1219
|
-
|
1306
|
+
set = xmlXPathNodeSetCreate(NULL);
|
1220
1307
|
|
1221
|
-
|
1222
|
-
|
1223
|
-
|
1224
|
-
|
1308
|
+
while (list) {
|
1309
|
+
xmlXPathNodeSetAddUnique(set, list);
|
1310
|
+
list = list->next;
|
1311
|
+
}
|
1225
1312
|
|
1226
|
-
|
1313
|
+
return Nokogiri_wrap_xml_node_set(set, doc);
|
1227
1314
|
}
|
1228
1315
|
|
1229
1316
|
static VALUE sym_iv_doc = Qnil;
|
@@ -1234,7 +1321,7 @@ VALUE Nokogiri_wrap_xml_node(VALUE klass, xmlNodePtr node)
|
|
1234
1321
|
VALUE document = Qnil ;
|
1235
1322
|
VALUE node_cache = Qnil ;
|
1236
1323
|
VALUE rb_node = Qnil ;
|
1237
|
-
|
1324
|
+
nokogiriTuplePtr node_has_a_document;
|
1238
1325
|
void (*mark_method)(xmlNodePtr) = NULL ;
|
1239
1326
|
|
1240
1327
|
assert(node);
|
@@ -1242,7 +1329,13 @@ VALUE Nokogiri_wrap_xml_node(VALUE klass, xmlNodePtr node)
|
|
1242
1329
|
if(node->type == XML_DOCUMENT_NODE || node->type == XML_HTML_DOCUMENT_NODE)
|
1243
1330
|
return DOC_RUBY_OBJECT(node->doc);
|
1244
1331
|
|
1245
|
-
if
|
1332
|
+
/* It's OK if the node doesn't have a fully-realized document (as in XML::Reader). */
|
1333
|
+
/* see https://github.com/tenderlove/nokogiri/issues/95 */
|
1334
|
+
/* and https://github.com/tenderlove/nokogiri/issues/439 */
|
1335
|
+
node_has_a_document = DOC_RUBY_OBJECT_TEST(node->doc);
|
1336
|
+
|
1337
|
+
if(node->_private && node_has_a_document)
|
1338
|
+
return (VALUE)node->_private;
|
1246
1339
|
|
1247
1340
|
if (!RTEST(klass)) {
|
1248
1341
|
switch(node->type) {
|
@@ -1288,10 +1381,7 @@ VALUE Nokogiri_wrap_xml_node(VALUE klass, xmlNodePtr node)
|
|
1288
1381
|
}
|
1289
1382
|
}
|
1290
1383
|
|
1291
|
-
|
1292
|
-
/* see https://github.com/tenderlove/nokogiri/issues/95 */
|
1293
|
-
/* and https://github.com/tenderlove/nokogiri/issues/439 */
|
1294
|
-
node_has_a_document = (DOC_RUBY_OBJECT_TEST(node->doc) && DOC_RUBY_OBJECT(node->doc)) ? 1 : 0 ;
|
1384
|
+
mark_method = node_has_a_document ? mark : NULL ;
|
1295
1385
|
|
1296
1386
|
if (DOC_RUBY_OBJECT_TEST(node->doc)) { // maglev workaround , no gc mark
|
1297
1387
|
VALUE ref = DOC_RUBY_OBJECT(node->doc);
|
@@ -1363,7 +1453,6 @@ void init_xml_node()
|
|
1363
1453
|
rb_define_method(klass, "key?", key_eh, 1);
|
1364
1454
|
rb_define_method(klass, "namespaced_key?", namespaced_key_eh, 2);
|
1365
1455
|
rb_define_method(klass, "blank?", blank_eh, 0);
|
1366
|
-
rb_define_method(klass, "[]=", set, 2);
|
1367
1456
|
rb_define_method(klass, "attribute_nodes", attribute_nodes, 0);
|
1368
1457
|
rb_define_method(klass, "attribute", attr, 1);
|
1369
1458
|
rb_define_method(klass, "attribute_with_ns", attribute_with_ns, 2);
|
@@ -1380,6 +1469,7 @@ void init_xml_node()
|
|
1380
1469
|
rb_define_method(klass, "pointer_id", pointer_id, 0);
|
1381
1470
|
rb_define_method(klass, "line", line, 0);
|
1382
1471
|
|
1472
|
+
rb_define_private_method(klass, "process_xincludes", process_xincludes, 1);
|
1383
1473
|
rb_define_private_method(klass, "in_context", in_context, 2);
|
1384
1474
|
rb_define_private_method(klass, "add_child_node", add_child, 1);
|
1385
1475
|
rb_define_private_method(klass, "add_previous_sibling_node", add_previous_sibling, 1);
|
@@ -1389,9 +1479,12 @@ void init_xml_node()
|
|
1389
1479
|
rb_define_private_method(klass, "native_write_to", native_write_to, 4);
|
1390
1480
|
rb_define_private_method(klass, "native_content=", set_content, 1);
|
1391
1481
|
rb_define_private_method(klass, "get", get, 1);
|
1482
|
+
rb_define_private_method(klass, "set", set, 2);
|
1392
1483
|
rb_define_private_method(klass, "set_namespace", set_namespace, 1);
|
1393
1484
|
rb_define_private_method(klass, "compare", compare, 1);
|
1394
1485
|
|
1395
1486
|
decorate = rb_intern("decorate");
|
1396
1487
|
decorate_bang = rb_intern("decorate!");
|
1397
1488
|
}
|
1489
|
+
|
1490
|
+
/* vim: set noet sw=4 sws=4 */
|