nokogiri-maglev- 1.5.0.1 → 1.5.2
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.ja.rdoc +56 -12
- data/CHANGELOG.rdoc +49 -0
- data/C_CODING_STYLE.rdoc +27 -0
- data/Manifest.txt +4 -0
- data/README.rdoc +11 -7
- data/Rakefile +42 -27
- data/bin/nokogiri +10 -2
- data/ext/nokogiri/extconf.rb +11 -3
- data/ext/nokogiri/html_document.c +16 -0
- data/ext/nokogiri/html_sax_parser_context.c +59 -37
- data/ext/nokogiri/html_sax_push_parser.c +87 -0
- data/ext/nokogiri/html_sax_push_parser.h +9 -0
- data/ext/nokogiri/nokogiri.c +7 -9
- data/ext/nokogiri/nokogiri.h +3 -0
- data/ext/nokogiri/xml_document.c +101 -3
- data/ext/nokogiri/xml_document.h +3 -3
- data/ext/nokogiri/xml_node.c +151 -58
- data/ext/nokogiri/xml_node_set.c +169 -120
- data/ext/nokogiri/xml_node_set.h +5 -0
- data/ext/nokogiri/xml_sax_parser_context.c +64 -41
- data/ext/nokogiri/xml_text.c +2 -0
- data/ext/nokogiri/xml_xpath_context.c +31 -25
- data/ext/nokogiri/xslt_stylesheet.c +62 -16
- data/ext/nokogiri/xslt_stylesheet.h +5 -0
- data/lib/nokogiri/css/parser.rb +165 -159
- data/lib/nokogiri/css/parser.y +6 -3
- data/lib/nokogiri/css/tokenizer.rb +1 -1
- data/lib/nokogiri/css/tokenizer.rex +1 -1
- data/lib/nokogiri/html.rb +1 -0
- data/lib/nokogiri/html/document.rb +82 -42
- data/lib/nokogiri/html/sax/push_parser.rb +16 -0
- data/lib/nokogiri/version.rb +1 -1
- data/lib/nokogiri/xml.rb +6 -0
- data/lib/nokogiri/xml/builder.rb +7 -1
- data/lib/nokogiri/xml/document.rb +32 -17
- data/lib/nokogiri/xml/document_fragment.rb +6 -1
- data/lib/nokogiri/xml/node.rb +40 -9
- data/lib/nokogiri/xslt.rb +5 -1
- data/tasks/cross_compile.rb +1 -0
- data/tasks/nokogiri.org.rb +6 -0
- data/tasks/test.rb +1 -0
- data/test/css/test_xpath_visitor.rb +6 -0
- data/test/helper.rb +1 -0
- data/test/html/test_document.rb +26 -0
- data/test/html/test_document_fragment.rb +1 -2
- data/test/test_memory_leak.rb +81 -1
- data/test/test_xslt_transforms.rb +152 -123
- data/test/xml/test_builder.rb +24 -2
- data/test/xml/test_c14n.rb +151 -0
- data/test/xml/test_document.rb +48 -0
- data/test/xml/test_namespace.rb +5 -0
- data/test/xml/test_node.rb +82 -1
- data/test/xml/test_node_attributes.rb +19 -0
- data/test/xml/test_node_inheritance.rb +32 -0
- data/test/xml/test_node_reparenting.rb +32 -0
- data/test/xml/test_node_set.rb +16 -8
- data/test/xml/test_reader_encoding.rb +16 -0
- data/test/xml/test_unparented_node.rb +32 -0
- data/test/xml/test_xinclude.rb +83 -0
- data/test/xml/test_xpath.rb +22 -0
- metadata +208 -241
@@ -13,31 +13,35 @@ static void deallocate(xmlParserCtxtPtr ctxt)
|
|
13
13
|
NOKOGIRI_DEBUG_END(handler);
|
14
14
|
}
|
15
15
|
|
16
|
-
static VALUE
|
16
|
+
static VALUE
|
17
|
+
parse_memory(VALUE klass, VALUE data, VALUE encoding)
|
17
18
|
{
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
19
|
+
htmlParserCtxtPtr ctxt;
|
20
|
+
|
21
|
+
if (NIL_P(data))
|
22
|
+
rb_raise(rb_eArgError, "data cannot be nil");
|
23
|
+
if (!(int)RSTRING_LEN(data))
|
24
|
+
rb_raise(rb_eRuntimeError, "data cannot be empty");
|
25
|
+
|
26
|
+
ctxt = htmlCreateMemoryParserCtxt(StringValuePtr(data),
|
27
|
+
(int)RSTRING_LEN(data));
|
28
|
+
if (ctxt->sax) {
|
29
|
+
xmlFree(ctxt->sax);
|
30
|
+
ctxt->sax = NULL;
|
31
|
+
}
|
28
32
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
33
|
+
if (RTEST(encoding)) {
|
34
|
+
xmlCharEncodingHandlerPtr enc = xmlFindCharEncodingHandler(StringValuePtr(encoding));
|
35
|
+
if (enc != NULL) {
|
36
|
+
xmlSwitchToEncoding(ctxt, enc);
|
37
|
+
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
|
38
|
+
rb_raise(rb_eRuntimeError, "Unsupported encoding %s",
|
39
|
+
StringValuePtr(encoding));
|
40
|
+
}
|
41
|
+
}
|
37
42
|
}
|
38
|
-
}
|
39
43
|
|
40
|
-
|
44
|
+
return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
|
41
45
|
}
|
42
46
|
|
43
47
|
static VALUE parse_file(VALUE klass, VALUE filename, VALUE encoding)
|
@@ -49,30 +53,48 @@ static VALUE parse_file(VALUE klass, VALUE filename, VALUE encoding)
|
|
49
53
|
return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
|
50
54
|
}
|
51
55
|
|
52
|
-
static VALUE
|
56
|
+
static VALUE
|
57
|
+
parse_doc(VALUE ctxt_val)
|
58
|
+
{
|
59
|
+
htmlParserCtxtPtr ctxt = (htmlParserCtxtPtr)ctxt_val;
|
60
|
+
htmlParseDocument(ctxt);
|
61
|
+
return Qnil;
|
62
|
+
}
|
63
|
+
|
64
|
+
static VALUE
|
65
|
+
parse_doc_finalize(VALUE ctxt_val)
|
53
66
|
{
|
54
|
-
|
55
|
-
|
67
|
+
htmlParserCtxtPtr ctxt = (htmlParserCtxtPtr)ctxt_val;
|
68
|
+
|
69
|
+
if (ctxt->myDoc)
|
70
|
+
xmlFreeDoc(ctxt->myDoc);
|
56
71
|
|
57
|
-
|
58
|
-
|
72
|
+
NOKOGIRI_SAX_TUPLE_DESTROY(ctxt->userData);
|
73
|
+
return Qnil;
|
74
|
+
}
|
75
|
+
|
76
|
+
static VALUE
|
77
|
+
parse_with(VALUE self, VALUE sax_handler)
|
78
|
+
{
|
79
|
+
htmlParserCtxtPtr ctxt;
|
80
|
+
htmlSAXHandlerPtr sax;
|
59
81
|
|
60
|
-
|
61
|
-
|
82
|
+
if (!RTEST(rb_obj_is_kind_of(sax_handler, cNokogiriXmlSaxParser)))
|
83
|
+
rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
|
62
84
|
|
63
|
-
|
64
|
-
|
65
|
-
xmlFree(ctxt->sax);
|
85
|
+
Data_Get_Struct(self, htmlParserCtxt, ctxt);
|
86
|
+
Data_Get_Struct(sax_handler, htmlSAXHandler, sax);
|
66
87
|
|
67
|
-
|
68
|
-
|
88
|
+
/* Free the sax handler since we'll assign our own */
|
89
|
+
if (ctxt->sax && ctxt->sax != (xmlSAXHandlerPtr)&xmlDefaultSAXHandler)
|
90
|
+
xmlFree(ctxt->sax);
|
69
91
|
|
70
|
-
|
92
|
+
ctxt->sax = sax;
|
93
|
+
ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
|
71
94
|
|
72
|
-
|
95
|
+
rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt);
|
73
96
|
|
74
|
-
|
75
|
-
return self;
|
97
|
+
return self;
|
76
98
|
}
|
77
99
|
|
78
100
|
void init_html_sax_parser_context()
|
@@ -0,0 +1,87 @@
|
|
1
|
+
#include <html_sax_push_parser.h>
|
2
|
+
|
3
|
+
/*
|
4
|
+
* call-seq:
|
5
|
+
* native_write(chunk, last_chunk)
|
6
|
+
*
|
7
|
+
* Write +chunk+ to PushParser. +last_chunk+ triggers the end_document handle
|
8
|
+
*/
|
9
|
+
static VALUE native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
|
10
|
+
{
|
11
|
+
xmlParserCtxtPtr ctx;
|
12
|
+
const char * chunk = NULL;
|
13
|
+
int size = 0;
|
14
|
+
|
15
|
+
|
16
|
+
Data_Get_Struct(self, xmlParserCtxt, ctx);
|
17
|
+
|
18
|
+
if(Qnil != _chunk) {
|
19
|
+
chunk = StringValuePtr(_chunk);
|
20
|
+
size = (int)RSTRING_LEN(_chunk);
|
21
|
+
}
|
22
|
+
|
23
|
+
if(htmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0)) {
|
24
|
+
if (!(ctx->options & XML_PARSE_RECOVER)) {
|
25
|
+
xmlErrorPtr e = xmlCtxtGetLastError(ctx);
|
26
|
+
Nokogiri_error_raise(NULL, e);
|
27
|
+
}
|
28
|
+
}
|
29
|
+
|
30
|
+
return self;
|
31
|
+
}
|
32
|
+
|
33
|
+
/*
|
34
|
+
* call-seq:
|
35
|
+
* initialize_native(xml_sax, filename)
|
36
|
+
*
|
37
|
+
* Initialize the push parser with +xml_sax+ using +filename+
|
38
|
+
*/
|
39
|
+
static VALUE initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename,
|
40
|
+
VALUE encoding)
|
41
|
+
{
|
42
|
+
htmlSAXHandlerPtr sax;
|
43
|
+
const char * filename = NULL;
|
44
|
+
htmlParserCtxtPtr ctx;
|
45
|
+
xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
|
46
|
+
|
47
|
+
Data_Get_Struct(_xml_sax, xmlSAXHandler, sax);
|
48
|
+
|
49
|
+
if(_filename != Qnil) filename = StringValuePtr(_filename);
|
50
|
+
|
51
|
+
if (!NIL_P(encoding)) {
|
52
|
+
enc = xmlParseCharEncoding(StringValuePtr(encoding));
|
53
|
+
if (enc == XML_CHAR_ENCODING_ERROR)
|
54
|
+
rb_raise(rb_eArgError, "Unsupported Encoding");
|
55
|
+
}
|
56
|
+
|
57
|
+
ctx = htmlCreatePushParserCtxt(
|
58
|
+
sax,
|
59
|
+
NULL,
|
60
|
+
NULL,
|
61
|
+
0,
|
62
|
+
filename,
|
63
|
+
enc
|
64
|
+
);
|
65
|
+
if(ctx == NULL)
|
66
|
+
rb_raise(rb_eRuntimeError, "Could not create a parser context");
|
67
|
+
|
68
|
+
ctx->userData = NOKOGIRI_SAX_TUPLE_NEW(ctx, self);
|
69
|
+
|
70
|
+
ctx->sax2 = 1;
|
71
|
+
rb_rdata_store(self, ctx); // DATA_PTR(self) = ctx;
|
72
|
+
return self;
|
73
|
+
}
|
74
|
+
|
75
|
+
VALUE cNokogiriHtmlSaxPushParser;
|
76
|
+
void init_html_sax_push_parser()
|
77
|
+
{
|
78
|
+
VALUE nokogiri = rb_define_module("Nokogiri");
|
79
|
+
VALUE html = rb_define_module_under(nokogiri, "HTML");
|
80
|
+
VALUE sax = rb_define_module_under(html, "SAX");
|
81
|
+
VALUE klass = rb_define_class_under(sax, "PushParser", cNokogiriXmlSaxPushParser);
|
82
|
+
|
83
|
+
cNokogiriHtmlSaxPushParser = klass;
|
84
|
+
|
85
|
+
rb_define_private_method(klass, "initialize_native", initialize_native, 3);
|
86
|
+
rb_define_private_method(klass, "native_write", native_write, 2);
|
87
|
+
}
|
data/ext/nokogiri/nokogiri.c
CHANGED
@@ -35,20 +35,17 @@ void vasprintf_free (void *p)
|
|
35
35
|
}
|
36
36
|
#endif
|
37
37
|
|
38
|
+
#ifdef HAVE_RUBY_UTIL_H
|
39
|
+
#include "ruby/util.h"
|
40
|
+
#else
|
38
41
|
#ifndef __MACRUBY__
|
39
|
-
|
40
|
-
|
41
|
-
{
|
42
|
-
size_t len = strlen(s);
|
43
|
-
char *result = ruby_xmalloc((ssize_t) (len + 1));
|
44
|
-
memcpy(result, s, len + 1);
|
45
|
-
return result;
|
46
|
-
}
|
42
|
+
#include "util.h"
|
43
|
+
#endif
|
47
44
|
#endif
|
48
45
|
|
49
46
|
void Init_nokogiri()
|
50
47
|
{
|
51
|
-
#
|
48
|
+
#if !(defined __MACRUBY__) && !(defined MAGLEV)
|
52
49
|
xmlMemSetup(
|
53
50
|
(xmlFreeFunc)ruby_xfree,
|
54
51
|
(xmlMallocFunc)ruby_xmalloc,
|
@@ -104,6 +101,7 @@ void Init_nokogiri()
|
|
104
101
|
init_xml_entity_decl();
|
105
102
|
init_xml_namespace();
|
106
103
|
init_html_sax_parser_context();
|
104
|
+
init_html_sax_push_parser();
|
107
105
|
init_xslt_stylesheet();
|
108
106
|
init_xml_syntax_error();
|
109
107
|
init_html_entity_lookup();
|
data/ext/nokogiri/nokogiri.h
CHANGED
@@ -27,7 +27,9 @@ int vasprintf (char **strp, const char *fmt, va_list ap);
|
|
27
27
|
#include <libxml/HTMLparser.h>
|
28
28
|
#include <libxml/HTMLtree.h>
|
29
29
|
#include <libxml/relaxng.h>
|
30
|
+
#include <libxml/xinclude.h>
|
30
31
|
#include <libxslt/extensions.h>
|
32
|
+
#include <libxml/c14n.h>
|
31
33
|
#include <ruby.h>
|
32
34
|
|
33
35
|
#ifdef HAVE_RUBY_ENCODING_H
|
@@ -102,6 +104,7 @@ int vasprintf (char **strp, const char *fmt, va_list ap);
|
|
102
104
|
#include <xml_sax_push_parser.h>
|
103
105
|
#include <xml_reader.h>
|
104
106
|
#include <html_sax_parser_context.h>
|
107
|
+
#include <html_sax_push_parser.h>
|
105
108
|
#include <xslt_stylesheet.h>
|
106
109
|
#include <xml_syntax_error.h>
|
107
110
|
#include <xml_schema.h>
|
data/ext/nokogiri/xml_document.c
CHANGED
@@ -48,12 +48,15 @@ static void recursively_remove_namespaces_from_node(xmlNodePtr node)
|
|
48
48
|
for (child = node->children ; child ; child = child->next)
|
49
49
|
recursively_remove_namespaces_from_node(child);
|
50
50
|
|
51
|
-
if (node->
|
51
|
+
if (((node->type == XML_ELEMENT_NODE) ||
|
52
|
+
(node->type == XML_XINCLUDE_START) ||
|
53
|
+
(node->type == XML_XINCLUDE_END)) &&
|
54
|
+
node->nsDef) {
|
52
55
|
xmlFreeNsList(node->nsDef);
|
53
56
|
node->nsDef = NULL;
|
54
57
|
}
|
55
58
|
|
56
|
-
if (node->properties != NULL) {
|
59
|
+
if (node->type == XML_ELEMENT_NODE && node->properties != NULL) {
|
57
60
|
property = node->properties ;
|
58
61
|
while (property != NULL) {
|
59
62
|
if (property->ns) property->ns = NULL ;
|
@@ -152,6 +155,9 @@ static VALUE set_encoding(VALUE self, VALUE encoding)
|
|
152
155
|
xmlDocPtr doc;
|
153
156
|
Data_Get_Struct(self, xmlDoc, doc);
|
154
157
|
|
158
|
+
if (doc->encoding)
|
159
|
+
free((char *) doc->encoding); // this may produce a gcc cast warning
|
160
|
+
|
155
161
|
doc->encoding = xmlStrdup((xmlChar *)StringValuePtr(encoding));
|
156
162
|
|
157
163
|
return encoding;
|
@@ -421,6 +427,97 @@ static VALUE create_entity(int argc, VALUE *argv, VALUE self)
|
|
421
427
|
return Nokogiri_wrap_xml_node(cNokogiriXmlEntityDecl, (xmlNodePtr)ptr);
|
422
428
|
}
|
423
429
|
|
430
|
+
static int block_caller(void * ctx, xmlNodePtr _node, xmlNodePtr _parent)
|
431
|
+
{
|
432
|
+
VALUE block;
|
433
|
+
VALUE node;
|
434
|
+
VALUE parent;
|
435
|
+
VALUE ret;
|
436
|
+
|
437
|
+
if(_node->type == XML_NAMESPACE_DECL){
|
438
|
+
node = Nokogiri_wrap_xml_namespace(_parent->doc, (xmlNsPtr) _node);
|
439
|
+
}
|
440
|
+
else{
|
441
|
+
node = Nokogiri_wrap_xml_node(Qnil, _node);
|
442
|
+
}
|
443
|
+
parent = _parent ? Nokogiri_wrap_xml_node(Qnil, _parent) : Qnil;
|
444
|
+
block = (VALUE)ctx;
|
445
|
+
|
446
|
+
ret = rb_funcall(block, rb_intern("call"), 2, node, parent);
|
447
|
+
|
448
|
+
if(Qfalse == ret || Qnil == ret) return 0;
|
449
|
+
|
450
|
+
return 1;
|
451
|
+
}
|
452
|
+
|
453
|
+
/* call-seq:
|
454
|
+
* doc.canonicalize(mode=XML_C14N_1_0,inclusive_namespaces=nil,with_comments=false)
|
455
|
+
* doc.canonicalize { |obj, parent| ... }
|
456
|
+
*
|
457
|
+
* Canonicalize a document and return the results. Takes an optional block
|
458
|
+
* that takes two parameters: the +obj+ and that node's +parent+.
|
459
|
+
* The +obj+ will be either a Nokogiri::XML::Node, or a Nokogiri::XML::Namespace
|
460
|
+
* The block must return a non-nil, non-false value if the +obj+ passed in
|
461
|
+
* should be included in the canonicalized document.
|
462
|
+
*/
|
463
|
+
static VALUE canonicalize(int argc, VALUE* argv, VALUE self)
|
464
|
+
{
|
465
|
+
VALUE mode;
|
466
|
+
VALUE incl_ns;
|
467
|
+
VALUE with_comments;
|
468
|
+
xmlChar **ns;
|
469
|
+
long ns_len, i;
|
470
|
+
|
471
|
+
xmlDocPtr doc;
|
472
|
+
xmlOutputBufferPtr buf;
|
473
|
+
xmlC14NIsVisibleCallback cb = NULL;
|
474
|
+
void * ctx = NULL;
|
475
|
+
|
476
|
+
VALUE rb_cStringIO;
|
477
|
+
VALUE io;
|
478
|
+
|
479
|
+
rb_scan_args(argc, argv, "03", &mode, &incl_ns, &with_comments);
|
480
|
+
|
481
|
+
Data_Get_Struct(self, xmlDoc, doc);
|
482
|
+
|
483
|
+
rb_cStringIO = rb_const_get_at(rb_cObject, rb_intern("StringIO"));
|
484
|
+
io = rb_class_new_instance(0, 0, rb_cStringIO);
|
485
|
+
buf = xmlAllocOutputBuffer(NULL);
|
486
|
+
|
487
|
+
buf->writecallback = (xmlOutputWriteCallback)io_write_callback;
|
488
|
+
buf->closecallback = (xmlOutputCloseCallback)io_close_callback;
|
489
|
+
buf->context = (void *)io;
|
490
|
+
|
491
|
+
if(rb_block_given_p()) {
|
492
|
+
cb = block_caller;
|
493
|
+
ctx = (void *)rb_block_proc();
|
494
|
+
}
|
495
|
+
|
496
|
+
if(NIL_P(incl_ns)){
|
497
|
+
ns = NULL;
|
498
|
+
}
|
499
|
+
else{
|
500
|
+
ns_len = RARRAY_LEN(incl_ns);
|
501
|
+
ns = calloc((size_t)ns_len+1, sizeof(xmlChar *));
|
502
|
+
for (i = 0 ; i < ns_len ; i++) {
|
503
|
+
VALUE entry = rb_ary_entry(incl_ns, i);
|
504
|
+
const char * ptr = StringValuePtr(entry);
|
505
|
+
ns[i] = (xmlChar*) ptr;
|
506
|
+
}
|
507
|
+
}
|
508
|
+
|
509
|
+
|
510
|
+
xmlC14NExecute(doc, cb, ctx,
|
511
|
+
(int) (NIL_P(mode) ? 0 : NUM2INT(mode)),
|
512
|
+
ns,
|
513
|
+
(int) (NIL_P(with_comments) ? 0 : 1),
|
514
|
+
buf);
|
515
|
+
|
516
|
+
xmlOutputBufferClose(buf);
|
517
|
+
|
518
|
+
return rb_funcall(io, rb_intern("string"), 0);
|
519
|
+
}
|
520
|
+
|
424
521
|
VALUE cNokogiriXmlDocument ;
|
425
522
|
void init_xml_document()
|
426
523
|
{
|
@@ -444,6 +541,7 @@ void init_xml_document()
|
|
444
541
|
rb_define_method(klass, "encoding", encoding, 0);
|
445
542
|
rb_define_method(klass, "encoding=", set_encoding, 1);
|
446
543
|
rb_define_method(klass, "version", version, 0);
|
544
|
+
rb_define_method(klass, "canonicalize", canonicalize, -1);
|
447
545
|
rb_define_method(klass, "dup", duplicate_node, -1);
|
448
546
|
rb_define_method(klass, "url", url, 0);
|
449
547
|
rb_define_method(klass, "create_entity", create_entity, -1);
|
@@ -467,7 +565,7 @@ VALUE Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc)
|
|
467
565
|
rb_iv_set(rb_doc, "@decorators", Qnil);
|
468
566
|
rb_iv_set(rb_doc, "@node_cache", cache);
|
469
567
|
|
470
|
-
tuple->doc =
|
568
|
+
tuple->doc = rb_doc;
|
471
569
|
tuple->unlinkedNodes = st_init_numtable_with_size(128);
|
472
570
|
tuple->node_cache = cache;
|
473
571
|
doc->_private = tuple ;
|
data/ext/nokogiri/xml_document.h
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
#include <nokogiri.h>
|
5
5
|
|
6
6
|
struct _nokogiriTuple {
|
7
|
-
|
7
|
+
VALUE doc;
|
8
8
|
st_table *unlinkedNodes;
|
9
9
|
VALUE node_cache;
|
10
10
|
};
|
@@ -15,9 +15,9 @@ void init_xml_document();
|
|
15
15
|
VALUE Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc);
|
16
16
|
|
17
17
|
#define DOC_RUBY_OBJECT_TEST(x) ((nokogiriTuplePtr)(x->_private))
|
18
|
-
#define DOC_RUBY_OBJECT(x) ((
|
18
|
+
#define DOC_RUBY_OBJECT(x) (((nokogiriTuplePtr)(x->_private))->doc)
|
19
19
|
#define DOC_UNLINKED_NODE_HASH(x) (((nokogiriTuplePtr)(x->_private))->unlinkedNodes)
|
20
|
-
#define DOC_NODE_CACHE(x) ((
|
20
|
+
#define DOC_NODE_CACHE(x) (((nokogiriTuplePtr)(x->_private))->node_cache)
|
21
21
|
|
22
22
|
extern VALUE cNokogiriXmlDocument ;
|
23
23
|
#endif
|
data/ext/nokogiri/xml_node.c
CHANGED
@@ -138,7 +138,8 @@ static VALUE reparent_node_with(VALUE pivot_obj, VALUE reparentee_obj, pivot_rep
|
|
138
138
|
}
|
139
139
|
}
|
140
140
|
|
141
|
-
if (
|
141
|
+
if (prf != xmlAddPrevSibling && prf != xmlAddNextSibling
|
142
|
+
&& reparentee->type == XML_TEXT_NODE && pivot->next && pivot->next->type == XML_TEXT_NODE) {
|
142
143
|
/*
|
143
144
|
* libxml merges text nodes in a right-to-left fashion, meaning that if
|
144
145
|
* there are two text nodes who would be adjacent, the right (or following,
|
@@ -484,7 +485,13 @@ static VALUE previous_element(VALUE self)
|
|
484
485
|
/* :nodoc: */
|
485
486
|
static VALUE replace(VALUE self, VALUE new_node)
|
486
487
|
{
|
487
|
-
|
488
|
+
VALUE reparent = reparent_node_with(self, new_node, xmlReplaceNodeWrapper);
|
489
|
+
|
490
|
+
xmlNodePtr pivot;
|
491
|
+
Data_Get_Struct(self, xmlNode, pivot);
|
492
|
+
NOKOGIRI_ROOT_NODE(pivot);
|
493
|
+
|
494
|
+
return reparent;
|
488
495
|
}
|
489
496
|
|
490
497
|
/*
|
@@ -658,9 +665,28 @@ static VALUE namespaced_key_eh(VALUE self, VALUE attribute, VALUE namespace)
|
|
658
665
|
*/
|
659
666
|
static VALUE set(VALUE self, VALUE property, VALUE value)
|
660
667
|
{
|
661
|
-
xmlNodePtr node;
|
668
|
+
xmlNodePtr node, cur;
|
669
|
+
xmlAttrPtr prop;
|
662
670
|
Data_Get_Struct(self, xmlNode, node);
|
663
671
|
|
672
|
+
/* If a matching attribute node already exists, then xmlSetProp will destroy
|
673
|
+
* the existing node's children. However, if Nokogiri has a node object
|
674
|
+
* pointing to one of those children, we are left with a broken reference.
|
675
|
+
*
|
676
|
+
* We can avoid this by unlinking these nodes first.
|
677
|
+
*/
|
678
|
+
if (node->type != XML_ELEMENT_NODE)
|
679
|
+
return(Qnil);
|
680
|
+
prop = xmlHasProp(node, (xmlChar *)StringValuePtr(property));
|
681
|
+
if (prop && prop->children) {
|
682
|
+
for (cur = prop->children; cur; cur = cur->next) {
|
683
|
+
if (cur->_private) {
|
684
|
+
NOKOGIRI_ROOT_NODE(cur);
|
685
|
+
xmlUnlinkNode(cur);
|
686
|
+
}
|
687
|
+
}
|
688
|
+
}
|
689
|
+
|
664
690
|
xmlSetProp(node, (xmlChar *)StringValuePtr(property),
|
665
691
|
(xmlChar *)StringValuePtr(value));
|
666
692
|
|
@@ -1157,73 +1183,134 @@ static VALUE compare(VALUE self, VALUE _other)
|
|
1157
1183
|
}
|
1158
1184
|
|
1159
1185
|
|
1160
|
-
/*
|
1161
|
-
|
1186
|
+
/*
|
1187
|
+
* call-seq:
|
1188
|
+
* process_xincludes(options)
|
1189
|
+
*
|
1190
|
+
* Loads and substitutes all xinclude elements below the node. The
|
1191
|
+
* parser context will be initialized with +options+.
|
1192
|
+
*/
|
1193
|
+
static VALUE process_xincludes(VALUE self, VALUE options)
|
1162
1194
|
{
|
1195
|
+
int rcode ;
|
1163
1196
|
xmlNodePtr node;
|
1164
|
-
|
1165
|
-
xmlNodePtr child_iter;
|
1166
|
-
xmlNodeSetPtr set;
|
1167
|
-
xmlParserErrors error;
|
1168
|
-
VALUE doc, err;
|
1197
|
+
VALUE error_list = rb_ary_new();
|
1169
1198
|
|
1170
1199
|
Data_Get_Struct(self, xmlNode, node);
|
1171
1200
|
|
1172
|
-
|
1173
|
-
|
1201
|
+
xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
|
1202
|
+
rcode = xmlXIncludeProcessTreeFlags(node, (int)NUM2INT(options));
|
1203
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
1174
1204
|
|
1175
|
-
|
1205
|
+
if (rcode < 0) {
|
1206
|
+
xmlErrorPtr error;
|
1176
1207
|
|
1177
|
-
|
1178
|
-
|
1179
|
-
|
1208
|
+
error = xmlGetLastError();
|
1209
|
+
if(error)
|
1210
|
+
rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error));
|
1211
|
+
else
|
1212
|
+
rb_raise(rb_eRuntimeError, "Could not perform xinclude substitution");
|
1213
|
+
}
|
1214
|
+
|
1215
|
+
return self;
|
1216
|
+
}
|
1217
|
+
|
1218
|
+
|
1219
|
+
/* TODO: DOCUMENT ME */
|
1220
|
+
static VALUE in_context(VALUE self, VALUE _str, VALUE _options)
|
1221
|
+
{
|
1222
|
+
xmlNodePtr node, list, child_iter, tmp, node_children, doc_children;
|
1223
|
+
xmlNodeSetPtr set;
|
1224
|
+
xmlParserErrors error;
|
1225
|
+
VALUE doc, err;
|
1226
|
+
int doc_is_empty;
|
1227
|
+
|
1228
|
+
Data_Get_Struct(self, xmlNode, node);
|
1229
|
+
|
1230
|
+
doc = DOC_RUBY_OBJECT(node->doc);
|
1231
|
+
err = rb_iv_get(doc, "@errors");
|
1232
|
+
doc_is_empty = (node->doc->children == NULL) ? 1 : 0;
|
1233
|
+
node_children = node->children;
|
1234
|
+
doc_children = node->doc->children;
|
1235
|
+
|
1236
|
+
xmlSetStructuredErrorFunc((void *)err, Nokogiri_error_array_pusher);
|
1237
|
+
|
1238
|
+
/* Twiddle global variable because of a bug in libxml2.
|
1239
|
+
* http://git.gnome.org/browse/libxml2/commit/?id=e20fb5a72c83cbfc8e4a8aa3943c6be8febadab7
|
1240
|
+
*/
|
1180
1241
|
#ifndef HTML_PARSE_NOIMPLIED
|
1181
|
-
|
1242
|
+
htmlHandleOmittedElem(0);
|
1182
1243
|
#endif
|
1183
1244
|
|
1184
|
-
|
1185
|
-
|
1186
|
-
|
1187
|
-
|
1188
|
-
|
1189
|
-
|
1190
|
-
|
1191
|
-
|
1192
|
-
|
1193
|
-
|
1194
|
-
|
1195
|
-
|
1196
|
-
|
1197
|
-
|
1245
|
+
/* This function adds a fake node to the child of +node+. If the parser
|
1246
|
+
* does not exit cleanly with XML_ERR_OK, the list is freed. This can
|
1247
|
+
* leave the child pointers in a bad state if they were originally empty.
|
1248
|
+
*
|
1249
|
+
* http://git.gnome.org/browse/libxml2/tree/parser.c#n13177
|
1250
|
+
* */
|
1251
|
+
error = xmlParseInNodeContext(node, StringValuePtr(_str),
|
1252
|
+
(int)RSTRING_LEN(_str),
|
1253
|
+
(int)NUM2INT(_options), &list);
|
1254
|
+
|
1255
|
+
/* xmlParseInNodeContext should not mutate the original document or node,
|
1256
|
+
* so reassigning these pointers should be OK. The reason we're reassigning
|
1257
|
+
* is because if there were errors, it's possible for the child pointers
|
1258
|
+
* to be manipulated. */
|
1259
|
+
if (error != XML_ERR_OK) {
|
1260
|
+
node->doc->children = doc_children;
|
1261
|
+
node->children = node_children;
|
1262
|
+
}
|
1263
|
+
|
1264
|
+
/* make sure parent/child pointers are coherent so an unlink will work
|
1265
|
+
* properly (#331)
|
1266
|
+
*/
|
1267
|
+
child_iter = node->doc->children ;
|
1268
|
+
while (child_iter) {
|
1269
|
+
if (child_iter->parent != (xmlNodePtr)node->doc)
|
1270
|
+
child_iter->parent = (xmlNodePtr)node->doc;
|
1271
|
+
child_iter = child_iter->next;
|
1272
|
+
}
|
1198
1273
|
|
1199
1274
|
#ifndef HTML_PARSE_NOIMPLIED
|
1200
|
-
|
1275
|
+
htmlHandleOmittedElem(1);
|
1201
1276
|
#endif
|
1202
1277
|
|
1203
|
-
|
1278
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
1204
1279
|
|
1205
|
-
|
1206
|
-
|
1207
|
-
|
1208
|
-
|
1209
|
-
|
1210
|
-
|
1211
|
-
|
1212
|
-
|
1213
|
-
|
1280
|
+
/* Workaround for a libxml2 bug where a parsing error may leave a broken
|
1281
|
+
* node reference in node->doc->children.
|
1282
|
+
* This workaround is limited to when a parse error occurs, the document
|
1283
|
+
* went from having no children to having children, and the context node is
|
1284
|
+
* part of a document fragment.
|
1285
|
+
* https://bugzilla.gnome.org/show_bug.cgi?id=668155
|
1286
|
+
*/
|
1287
|
+
if (error != XML_ERR_OK && doc_is_empty && node->doc->children != NULL) {
|
1288
|
+
tmp = node;
|
1289
|
+
while (tmp->parent)
|
1290
|
+
tmp = tmp->parent;
|
1291
|
+
|
1292
|
+
if (tmp->type == XML_DOCUMENT_FRAG_NODE)
|
1293
|
+
node->doc->children = NULL;
|
1294
|
+
}
|
1214
1295
|
|
1215
|
-
|
1216
|
-
|
1217
|
-
|
1296
|
+
/* FIXME: This probably needs to handle more constants... */
|
1297
|
+
switch (error) {
|
1298
|
+
case XML_ERR_INTERNAL_ERROR:
|
1299
|
+
case XML_ERR_NO_MEMORY:
|
1300
|
+
rb_raise(rb_eRuntimeError, "error parsing fragment (%d)", error);
|
1301
|
+
break;
|
1302
|
+
default:
|
1303
|
+
break;
|
1304
|
+
}
|
1218
1305
|
|
1219
|
-
|
1306
|
+
set = xmlXPathNodeSetCreate(NULL);
|
1220
1307
|
|
1221
|
-
|
1222
|
-
|
1223
|
-
|
1224
|
-
|
1308
|
+
while (list) {
|
1309
|
+
xmlXPathNodeSetAddUnique(set, list);
|
1310
|
+
list = list->next;
|
1311
|
+
}
|
1225
1312
|
|
1226
|
-
|
1313
|
+
return Nokogiri_wrap_xml_node_set(set, doc);
|
1227
1314
|
}
|
1228
1315
|
|
1229
1316
|
static VALUE sym_iv_doc = Qnil;
|
@@ -1234,7 +1321,7 @@ VALUE Nokogiri_wrap_xml_node(VALUE klass, xmlNodePtr node)
|
|
1234
1321
|
VALUE document = Qnil ;
|
1235
1322
|
VALUE node_cache = Qnil ;
|
1236
1323
|
VALUE rb_node = Qnil ;
|
1237
|
-
|
1324
|
+
nokogiriTuplePtr node_has_a_document;
|
1238
1325
|
void (*mark_method)(xmlNodePtr) = NULL ;
|
1239
1326
|
|
1240
1327
|
assert(node);
|
@@ -1242,7 +1329,13 @@ VALUE Nokogiri_wrap_xml_node(VALUE klass, xmlNodePtr node)
|
|
1242
1329
|
if(node->type == XML_DOCUMENT_NODE || node->type == XML_HTML_DOCUMENT_NODE)
|
1243
1330
|
return DOC_RUBY_OBJECT(node->doc);
|
1244
1331
|
|
1245
|
-
if
|
1332
|
+
/* It's OK if the node doesn't have a fully-realized document (as in XML::Reader). */
|
1333
|
+
/* see https://github.com/tenderlove/nokogiri/issues/95 */
|
1334
|
+
/* and https://github.com/tenderlove/nokogiri/issues/439 */
|
1335
|
+
node_has_a_document = DOC_RUBY_OBJECT_TEST(node->doc);
|
1336
|
+
|
1337
|
+
if(node->_private && node_has_a_document)
|
1338
|
+
return (VALUE)node->_private;
|
1246
1339
|
|
1247
1340
|
if (!RTEST(klass)) {
|
1248
1341
|
switch(node->type) {
|
@@ -1288,10 +1381,7 @@ VALUE Nokogiri_wrap_xml_node(VALUE klass, xmlNodePtr node)
|
|
1288
1381
|
}
|
1289
1382
|
}
|
1290
1383
|
|
1291
|
-
|
1292
|
-
/* see https://github.com/tenderlove/nokogiri/issues/95 */
|
1293
|
-
/* and https://github.com/tenderlove/nokogiri/issues/439 */
|
1294
|
-
node_has_a_document = (DOC_RUBY_OBJECT_TEST(node->doc) && DOC_RUBY_OBJECT(node->doc)) ? 1 : 0 ;
|
1384
|
+
mark_method = node_has_a_document ? mark : NULL ;
|
1295
1385
|
|
1296
1386
|
if (DOC_RUBY_OBJECT_TEST(node->doc)) { // maglev workaround , no gc mark
|
1297
1387
|
VALUE ref = DOC_RUBY_OBJECT(node->doc);
|
@@ -1363,7 +1453,6 @@ void init_xml_node()
|
|
1363
1453
|
rb_define_method(klass, "key?", key_eh, 1);
|
1364
1454
|
rb_define_method(klass, "namespaced_key?", namespaced_key_eh, 2);
|
1365
1455
|
rb_define_method(klass, "blank?", blank_eh, 0);
|
1366
|
-
rb_define_method(klass, "[]=", set, 2);
|
1367
1456
|
rb_define_method(klass, "attribute_nodes", attribute_nodes, 0);
|
1368
1457
|
rb_define_method(klass, "attribute", attr, 1);
|
1369
1458
|
rb_define_method(klass, "attribute_with_ns", attribute_with_ns, 2);
|
@@ -1380,6 +1469,7 @@ void init_xml_node()
|
|
1380
1469
|
rb_define_method(klass, "pointer_id", pointer_id, 0);
|
1381
1470
|
rb_define_method(klass, "line", line, 0);
|
1382
1471
|
|
1472
|
+
rb_define_private_method(klass, "process_xincludes", process_xincludes, 1);
|
1383
1473
|
rb_define_private_method(klass, "in_context", in_context, 2);
|
1384
1474
|
rb_define_private_method(klass, "add_child_node", add_child, 1);
|
1385
1475
|
rb_define_private_method(klass, "add_previous_sibling_node", add_previous_sibling, 1);
|
@@ -1389,9 +1479,12 @@ void init_xml_node()
|
|
1389
1479
|
rb_define_private_method(klass, "native_write_to", native_write_to, 4);
|
1390
1480
|
rb_define_private_method(klass, "native_content=", set_content, 1);
|
1391
1481
|
rb_define_private_method(klass, "get", get, 1);
|
1482
|
+
rb_define_private_method(klass, "set", set, 2);
|
1392
1483
|
rb_define_private_method(klass, "set_namespace", set_namespace, 1);
|
1393
1484
|
rb_define_private_method(klass, "compare", compare, 1);
|
1394
1485
|
|
1395
1486
|
decorate = rb_intern("decorate");
|
1396
1487
|
decorate_bang = rb_intern("decorate!");
|
1397
1488
|
}
|
1489
|
+
|
1490
|
+
/* vim: set noet sw=4 sws=4 */
|