nokogiri 1.11.0.rc1 → 1.11.1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +3 -0
- data/LICENSE-DEPENDENCIES.md +1015 -947
- data/README.md +164 -92
- data/ext/nokogiri/depend +476 -357
- data/ext/nokogiri/extconf.rb +467 -326
- data/ext/nokogiri/html_document.c +79 -78
- data/ext/nokogiri/html_sax_parser_context.c +4 -2
- data/ext/nokogiri/html_sax_push_parser.c +14 -8
- data/ext/nokogiri/nokogiri.c +37 -46
- data/ext/nokogiri/nokogiri.h +25 -17
- data/ext/nokogiri/test_global_handlers.c +41 -0
- data/ext/nokogiri/xml_document.c +8 -3
- data/ext/nokogiri/xml_io.c +8 -6
- data/ext/nokogiri/xml_node.c +1 -1
- data/ext/nokogiri/xml_node_set.c +1 -1
- data/ext/nokogiri/xml_reader.c +6 -17
- data/ext/nokogiri/xml_relax_ng.c +29 -11
- data/ext/nokogiri/xml_sax_parser.c +2 -7
- data/ext/nokogiri/xml_sax_parser_context.c +4 -2
- data/ext/nokogiri/xml_sax_push_parser.c +2 -0
- data/ext/nokogiri/xml_schema.c +84 -13
- data/ext/nokogiri/xml_syntax_error.c +23 -0
- data/ext/nokogiri/xml_syntax_error.h +15 -3
- data/ext/nokogiri/xml_xpath_context.c +80 -4
- data/ext/nokogiri/xslt_stylesheet.c +1 -4
- data/lib/nokogiri.rb +20 -3
- data/lib/nokogiri/css/parser.rb +62 -62
- data/lib/nokogiri/css/parser.y +2 -2
- data/lib/nokogiri/css/parser_extras.rb +38 -36
- data/lib/nokogiri/css/xpath_visitor.rb +70 -42
- data/lib/nokogiri/html/document.rb +12 -26
- data/lib/nokogiri/version.rb +2 -148
- data/lib/nokogiri/version/constant.rb +5 -0
- data/lib/nokogiri/version/info.rb +182 -0
- data/lib/nokogiri/xml/builder.rb +2 -2
- data/lib/nokogiri/xml/document.rb +17 -7
- data/lib/nokogiri/xml/document_fragment.rb +4 -6
- data/lib/nokogiri/xml/node.rb +562 -238
- data/lib/nokogiri/xml/parse_options.rb +6 -0
- data/lib/nokogiri/xml/relax_ng.rb +6 -2
- data/lib/nokogiri/xml/schema.rb +12 -4
- data/lib/nokogiri/xml/searchable.rb +24 -16
- data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +32 -0
- data/patches/libxml2/0006-htmlParseComment-treat-as-if-it-closed-the-comment.patch +73 -0
- data/patches/libxml2/0007-use-new-htmlParseLookupCommentEnd-to-find-comment-en.patch +103 -0
- data/patches/libxml2/0008-use-glibc-strlen.patch +53 -0
- data/patches/libxml2/0009-avoid-isnan-isinf.patch +81 -0
- metadata +84 -114
@@ -0,0 +1,41 @@
|
|
1
|
+
#include <nokogiri.h>
|
2
|
+
#include "libxml/xmlerror.h"
|
3
|
+
|
4
|
+
static VALUE foreign_error_handler_block = Qnil;
|
5
|
+
|
6
|
+
static void foreign_error_handler(void* user_data, xmlErrorPtr c_error)
|
7
|
+
{
|
8
|
+
rb_funcall(foreign_error_handler_block, rb_intern("call"), 0);
|
9
|
+
}
|
10
|
+
|
11
|
+
/*
|
12
|
+
* call-seq:
|
13
|
+
* __foreign_error_handler { ... } -> nil
|
14
|
+
*
|
15
|
+
* Override libxml2's global error handlers to call the block. This method thus has very little
|
16
|
+
* value except to test that Nokogiri is properly setting error handlers elsewhere in the code. See
|
17
|
+
* test/helper.rb for how this is being used.
|
18
|
+
*/
|
19
|
+
static VALUE
|
20
|
+
rb_foreign_error_handler(VALUE klass)
|
21
|
+
{
|
22
|
+
rb_need_block();
|
23
|
+
foreign_error_handler_block = rb_block_proc();
|
24
|
+
xmlSetStructuredErrorFunc(NULL, foreign_error_handler);
|
25
|
+
return Qnil;
|
26
|
+
}
|
27
|
+
|
28
|
+
/*
|
29
|
+
* Document-module: Nokogiri::Test
|
30
|
+
*
|
31
|
+
* The Nokogiri::Test module should only be used for testing Nokogiri.
|
32
|
+
* Do NOT use this outside of the Nokogiri test suite.
|
33
|
+
*/
|
34
|
+
void
|
35
|
+
init_test_global_handlers()
|
36
|
+
{
|
37
|
+
VALUE mNokogiri = rb_define_module("Nokogiri");
|
38
|
+
VALUE mNokogiriTest = rb_define_module_under(mNokogiri, "Test");
|
39
|
+
|
40
|
+
rb_define_singleton_method(mNokogiriTest, "__foreign_error_handler", rb_foreign_error_handler, 0);
|
41
|
+
}
|
data/ext/nokogiri/xml_document.c
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
#include <xml_document.h>
|
2
2
|
|
3
|
-
static int
|
3
|
+
static int dealloc_node_i2(xmlNodePtr key, xmlNodePtr node, xmlDocPtr doc)
|
4
4
|
{
|
5
5
|
switch(node->type) {
|
6
6
|
case XML_ATTRIBUTE_NODE:
|
@@ -20,6 +20,11 @@ static int dealloc_node_i(xmlNodePtr key, xmlNodePtr node, xmlDocPtr doc)
|
|
20
20
|
return ST_CONTINUE;
|
21
21
|
}
|
22
22
|
|
23
|
+
static int dealloc_node_i(st_data_t key, st_data_t node, st_data_t doc)
|
24
|
+
{
|
25
|
+
return dealloc_node_i2((xmlNodePtr)key, (xmlNodePtr)node, (xmlDocPtr)doc);
|
26
|
+
}
|
27
|
+
|
23
28
|
static void remove_private(xmlNodePtr node)
|
24
29
|
{
|
25
30
|
xmlNodePtr child;
|
@@ -501,7 +506,7 @@ static int block_caller(void * ctx, xmlNodePtr _node, xmlNodePtr _parent)
|
|
501
506
|
* The block must return a non-nil, non-false value if the +obj+ passed in
|
502
507
|
* should be included in the canonicalized document.
|
503
508
|
*/
|
504
|
-
static VALUE
|
509
|
+
static VALUE nokogiri_xml_document_canonicalize(int argc, VALUE* argv, VALUE self)
|
505
510
|
{
|
506
511
|
VALUE mode;
|
507
512
|
VALUE incl_ns;
|
@@ -582,7 +587,7 @@ void init_xml_document()
|
|
582
587
|
rb_define_method(klass, "encoding", encoding, 0);
|
583
588
|
rb_define_method(klass, "encoding=", set_encoding, 1);
|
584
589
|
rb_define_method(klass, "version", version, 0);
|
585
|
-
rb_define_method(klass, "canonicalize",
|
590
|
+
rb_define_method(klass, "canonicalize", nokogiri_xml_document_canonicalize, -1);
|
586
591
|
rb_define_method(klass, "dup", duplicate_document, -1);
|
587
592
|
rb_define_method(klass, "url", url, 0);
|
588
593
|
rb_define_method(klass, "create_entity", create_entity, -1);
|
data/ext/nokogiri/xml_io.c
CHANGED
@@ -2,12 +2,13 @@
|
|
2
2
|
|
3
3
|
static ID id_read, id_write;
|
4
4
|
|
5
|
-
VALUE read_check(VALUE
|
5
|
+
VALUE read_check(VALUE val) {
|
6
|
+
VALUE *args = (VALUE *)val;
|
6
7
|
return rb_funcall(args[0], id_read, 1, args[1]);
|
7
8
|
}
|
8
9
|
|
9
|
-
VALUE read_failed(
|
10
|
-
|
10
|
+
VALUE read_failed(VALUE arg, VALUE exc) {
|
11
|
+
return Qundef;
|
11
12
|
}
|
12
13
|
|
13
14
|
int io_read_callback(void * ctx, char * buffer, int len) {
|
@@ -30,12 +31,13 @@ int io_read_callback(void * ctx, char * buffer, int len) {
|
|
30
31
|
return (int)safe_len;
|
31
32
|
}
|
32
33
|
|
33
|
-
VALUE write_check(VALUE
|
34
|
+
VALUE write_check(VALUE val) {
|
35
|
+
VALUE *args = (VALUE *)val;
|
34
36
|
return rb_funcall(args[0], id_write, 1, args[1]);
|
35
37
|
}
|
36
38
|
|
37
|
-
VALUE write_failed(
|
38
|
-
|
39
|
+
VALUE write_failed(VALUE arg, VALUE exc) {
|
40
|
+
return Qundef;
|
39
41
|
}
|
40
42
|
|
41
43
|
int io_write_callback(void * ctx, char * buffer, int len) {
|
data/ext/nokogiri/xml_node.c
CHANGED
@@ -301,7 +301,7 @@ ok:
|
|
301
301
|
* issue #391, where new node's prefix may become the string "default"
|
302
302
|
* see libxml2 tree.c xmlNewReconciliedNs which implements this behavior.
|
303
303
|
*/
|
304
|
-
xmlFree(reparentee->ns->prefix);
|
304
|
+
xmlFree((xmlChar*)reparentee->ns->prefix);
|
305
305
|
reparentee->ns->prefix = NULL;
|
306
306
|
}
|
307
307
|
}
|
data/ext/nokogiri/xml_node_set.c
CHANGED
data/ext/nokogiri/xml_reader.c
CHANGED
@@ -28,35 +28,24 @@ static int has_attributes(xmlTextReaderPtr reader)
|
|
28
28
|
static void Nokogiri_xml_node_namespaces(xmlNodePtr node, VALUE attr_hash)
|
29
29
|
{
|
30
30
|
xmlNsPtr ns;
|
31
|
-
|
32
|
-
char *key ;
|
33
|
-
size_t keylen ;
|
31
|
+
VALUE key;
|
34
32
|
|
35
33
|
if (node->type != XML_ELEMENT_NODE) return ;
|
36
34
|
|
37
35
|
ns = node->nsDef;
|
38
36
|
while (ns != NULL) {
|
39
37
|
|
40
|
-
|
41
|
-
if (keylen > XMLNS_BUFFER_LEN) {
|
42
|
-
key = (char*)malloc(keylen) ;
|
43
|
-
} else {
|
44
|
-
key = buffer ;
|
45
|
-
}
|
46
|
-
|
38
|
+
key = rb_enc_str_new_cstr(XMLNS_PREFIX, rb_utf8_encoding());
|
47
39
|
if (ns->prefix) {
|
48
|
-
|
49
|
-
|
50
|
-
sprintf(key, "%s", XMLNS_PREFIX);
|
40
|
+
rb_str_cat_cstr(key, ":");
|
41
|
+
rb_str_cat_cstr(key, (const char*)ns->prefix);
|
51
42
|
}
|
52
43
|
|
44
|
+
key = rb_str_conv_enc(key, rb_utf8_encoding(), rb_default_internal_encoding());
|
53
45
|
rb_hash_aset(attr_hash,
|
54
|
-
|
46
|
+
key,
|
55
47
|
(ns->href ? NOKOGIRI_STR_NEW2(ns->href) : Qnil)
|
56
48
|
);
|
57
|
-
if (key != buffer) {
|
58
|
-
free(key);
|
59
|
-
}
|
60
49
|
ns = ns->next ;
|
61
50
|
}
|
62
51
|
}
|
data/ext/nokogiri/xml_relax_ng.c
CHANGED
@@ -53,16 +53,24 @@ static VALUE validate_document(VALUE self, VALUE document)
|
|
53
53
|
*
|
54
54
|
* Create a new RelaxNG from the contents of +string+
|
55
55
|
*/
|
56
|
-
static VALUE read_memory(VALUE
|
56
|
+
static VALUE read_memory(int argc, VALUE *argv, VALUE klass)
|
57
57
|
{
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
);
|
58
|
+
VALUE content;
|
59
|
+
VALUE parse_options;
|
60
|
+
xmlRelaxNGParserCtxtPtr ctx;
|
62
61
|
xmlRelaxNGPtr schema;
|
63
|
-
VALUE errors
|
62
|
+
VALUE errors;
|
64
63
|
VALUE rb_schema;
|
64
|
+
int scanned_args = 0;
|
65
|
+
|
66
|
+
scanned_args = rb_scan_args(argc, argv, "11", &content, &parse_options);
|
67
|
+
if (scanned_args == 1) {
|
68
|
+
parse_options = rb_const_get(rb_const_get(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
|
69
|
+
}
|
65
70
|
|
71
|
+
ctx = xmlRelaxNGNewMemParserCtxt((const char *)StringValuePtr(content), (int)RSTRING_LEN(content));
|
72
|
+
|
73
|
+
errors = rb_ary_new();
|
66
74
|
xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
|
67
75
|
|
68
76
|
#ifdef HAVE_XMLRELAXNGSETPARSERSTRUCTUREDERRORS
|
@@ -90,6 +98,7 @@ static VALUE read_memory(VALUE klass, VALUE content)
|
|
90
98
|
|
91
99
|
rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
|
92
100
|
rb_iv_set(rb_schema, "@errors", errors);
|
101
|
+
rb_iv_set(rb_schema, "@parse_options", parse_options);
|
93
102
|
|
94
103
|
return rb_schema;
|
95
104
|
}
|
@@ -100,18 +109,25 @@ static VALUE read_memory(VALUE klass, VALUE content)
|
|
100
109
|
*
|
101
110
|
* Create a new RelaxNG schema from the Nokogiri::XML::Document +doc+
|
102
111
|
*/
|
103
|
-
static VALUE from_document(VALUE
|
112
|
+
static VALUE from_document(int argc, VALUE *argv, VALUE klass)
|
104
113
|
{
|
114
|
+
VALUE document;
|
115
|
+
VALUE parse_options;
|
105
116
|
xmlDocPtr doc;
|
106
117
|
xmlRelaxNGParserCtxtPtr ctx;
|
107
118
|
xmlRelaxNGPtr schema;
|
108
119
|
VALUE errors;
|
109
120
|
VALUE rb_schema;
|
121
|
+
int scanned_args = 0;
|
122
|
+
|
123
|
+
scanned_args = rb_scan_args(argc, argv, "11", &document, &parse_options);
|
110
124
|
|
111
125
|
Data_Get_Struct(document, xmlDoc, doc);
|
126
|
+
doc = doc->doc; /* In case someone passes us a node. ugh. */
|
112
127
|
|
113
|
-
|
114
|
-
|
128
|
+
if (scanned_args == 1) {
|
129
|
+
parse_options = rb_const_get(rb_const_get(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
|
130
|
+
}
|
115
131
|
|
116
132
|
ctx = xmlRelaxNGNewDocParserCtxt(doc);
|
117
133
|
|
@@ -129,6 +145,7 @@ static VALUE from_document(VALUE klass, VALUE document)
|
|
129
145
|
schema = xmlRelaxNGParse(ctx);
|
130
146
|
|
131
147
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
148
|
+
xmlRelaxNGFreeParserCtxt(ctx);
|
132
149
|
|
133
150
|
if(NULL == schema) {
|
134
151
|
xmlErrorPtr error = xmlGetLastError();
|
@@ -142,6 +159,7 @@ static VALUE from_document(VALUE klass, VALUE document)
|
|
142
159
|
|
143
160
|
rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
|
144
161
|
rb_iv_set(rb_schema, "@errors", errors);
|
162
|
+
rb_iv_set(rb_schema, "@parse_options", parse_options);
|
145
163
|
|
146
164
|
return rb_schema;
|
147
165
|
}
|
@@ -155,7 +173,7 @@ void init_xml_relax_ng()
|
|
155
173
|
|
156
174
|
cNokogiriXmlRelaxNG = klass;
|
157
175
|
|
158
|
-
rb_define_singleton_method(klass, "read_memory", read_memory, 1);
|
159
|
-
rb_define_singleton_method(klass, "from_document", from_document, 1);
|
176
|
+
rb_define_singleton_method(klass, "read_memory", read_memory, -1);
|
177
|
+
rb_define_singleton_method(klass, "from_document", from_document, -1);
|
160
178
|
rb_define_private_method(klass, "validate_document", validate_document, 1);
|
161
179
|
}
|
@@ -1,8 +1,5 @@
|
|
1
1
|
#include <xml_sax_parser.h>
|
2
2
|
|
3
|
-
int vasprintf (char **strp, const char *fmt, va_list ap);
|
4
|
-
void vasprintf_free (void *p);
|
5
|
-
|
6
3
|
static ID id_start_document, id_end_document, id_start_element, id_end_element;
|
7
4
|
static ID id_start_element_namespace, id_end_element_namespace;
|
8
5
|
static ID id_comment, id_characters, id_xmldecl, id_error, id_warning;
|
@@ -206,7 +203,7 @@ static void warning_func(void * ctx, const char *msg, ...)
|
|
206
203
|
va_end(args);
|
207
204
|
|
208
205
|
ruby_message = NOKOGIRI_STR_NEW2(message);
|
209
|
-
|
206
|
+
free(message);
|
210
207
|
rb_funcall(doc, id_warning, 1, ruby_message);
|
211
208
|
}
|
212
209
|
|
@@ -223,7 +220,7 @@ static void error_func(void * ctx, const char *msg, ...)
|
|
223
220
|
va_end(args);
|
224
221
|
|
225
222
|
ruby_message = NOKOGIRI_STR_NEW2(message);
|
226
|
-
|
223
|
+
free(message);
|
227
224
|
rb_funcall(doc, id_error, 1, ruby_message);
|
228
225
|
}
|
229
226
|
|
@@ -262,8 +259,6 @@ static VALUE allocate(VALUE klass)
|
|
262
259
|
{
|
263
260
|
xmlSAXHandlerPtr handler = calloc((size_t)1, sizeof(xmlSAXHandler));
|
264
261
|
|
265
|
-
xmlSetStructuredErrorFunc(NULL, NULL);
|
266
|
-
|
267
262
|
handler->startDocument = start_document;
|
268
263
|
handler->endDocument = end_document;
|
269
264
|
handler->startElement = start_element;
|
@@ -4,13 +4,13 @@ VALUE cNokogiriXmlSaxParserContext ;
|
|
4
4
|
|
5
5
|
static void deallocate(xmlParserCtxtPtr ctxt)
|
6
6
|
{
|
7
|
-
NOKOGIRI_DEBUG_START(
|
7
|
+
NOKOGIRI_DEBUG_START(ctxt);
|
8
8
|
|
9
9
|
ctxt->sax = NULL;
|
10
10
|
|
11
11
|
xmlFreeParserCtxt(ctxt);
|
12
12
|
|
13
|
-
NOKOGIRI_DEBUG_END(
|
13
|
+
NOKOGIRI_DEBUG_END(ctxt);
|
14
14
|
}
|
15
15
|
|
16
16
|
/*
|
@@ -120,6 +120,8 @@ parse_with(VALUE self, VALUE sax_handler)
|
|
120
120
|
ctxt->sax = sax;
|
121
121
|
ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
|
122
122
|
|
123
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
124
|
+
|
123
125
|
rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt);
|
124
126
|
|
125
127
|
return Qnil;
|
@@ -35,6 +35,8 @@ static VALUE native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
|
|
35
35
|
size = (int)RSTRING_LEN(_chunk);
|
36
36
|
}
|
37
37
|
|
38
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
39
|
+
|
38
40
|
if (xmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0)) {
|
39
41
|
if (!(ctx->options & XML_PARSE_RECOVER)) {
|
40
42
|
xmlErrorPtr e = xmlCtxtGetLastError(ctx);
|
data/ext/nokogiri/xml_schema.c
CHANGED
@@ -93,15 +93,27 @@ static VALUE validate_file(VALUE self, VALUE rb_filename)
|
|
93
93
|
*
|
94
94
|
* Create a new Schema from the contents of +string+
|
95
95
|
*/
|
96
|
-
static VALUE read_memory(VALUE
|
96
|
+
static VALUE read_memory(int argc, VALUE *argv, VALUE klass)
|
97
97
|
{
|
98
|
+
VALUE content;
|
99
|
+
VALUE parse_options;
|
100
|
+
int parse_options_int;
|
101
|
+
xmlSchemaParserCtxtPtr ctx;
|
98
102
|
xmlSchemaPtr schema;
|
99
|
-
|
100
|
-
(const char *)StringValuePtr(content),
|
101
|
-
(int)RSTRING_LEN(content)
|
102
|
-
);
|
103
|
+
VALUE errors;
|
103
104
|
VALUE rb_schema;
|
104
|
-
|
105
|
+
int scanned_args = 0;
|
106
|
+
xmlExternalEntityLoader old_loader = 0;
|
107
|
+
|
108
|
+
scanned_args = rb_scan_args(argc, argv, "11", &content, &parse_options);
|
109
|
+
if (scanned_args == 1) {
|
110
|
+
parse_options = rb_const_get(rb_const_get(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
|
111
|
+
}
|
112
|
+
parse_options_int = (int)NUM2INT(rb_funcall(parse_options, rb_intern("to_i"), 0));
|
113
|
+
|
114
|
+
ctx = xmlSchemaNewMemParserCtxt((const char *)StringValuePtr(content), (int)RSTRING_LEN(content));
|
115
|
+
|
116
|
+
errors = rb_ary_new();
|
105
117
|
xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
|
106
118
|
|
107
119
|
#ifdef HAVE_XMLSCHEMASETPARSERSTRUCTUREDERRORS
|
@@ -109,10 +121,19 @@ static VALUE read_memory(VALUE klass, VALUE content)
|
|
109
121
|
ctx,
|
110
122
|
Nokogiri_error_array_pusher,
|
111
123
|
(void *)errors
|
112
|
-
|
124
|
+
);
|
113
125
|
#endif
|
114
126
|
|
115
|
-
|
127
|
+
if (parse_options_int & XML_PARSE_NONET) {
|
128
|
+
old_loader = xmlGetExternalEntityLoader();
|
129
|
+
xmlSetExternalEntityLoader(xmlNoNetExternalEntityLoader);
|
130
|
+
}
|
131
|
+
|
132
|
+
schema = xmlSchemaParse(ctx);
|
133
|
+
|
134
|
+
if (old_loader) {
|
135
|
+
xmlSetExternalEntityLoader(old_loader);
|
136
|
+
}
|
116
137
|
|
117
138
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
118
139
|
xmlSchemaFreeParserCtxt(ctx);
|
@@ -129,28 +150,68 @@ static VALUE read_memory(VALUE klass, VALUE content)
|
|
129
150
|
|
130
151
|
rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
|
131
152
|
rb_iv_set(rb_schema, "@errors", errors);
|
153
|
+
rb_iv_set(rb_schema, "@parse_options", parse_options);
|
132
154
|
|
133
155
|
return rb_schema;
|
134
156
|
}
|
135
157
|
|
158
|
+
/* Schema creation will remove and deallocate "blank" nodes.
|
159
|
+
* If those blank nodes have been exposed to Ruby, they could get freed
|
160
|
+
* out from under the VALUE pointer. This function checks to see if any of
|
161
|
+
* those nodes have been exposed to Ruby, and if so we should raise an exception.
|
162
|
+
*/
|
163
|
+
static int has_blank_nodes_p(VALUE cache)
|
164
|
+
{
|
165
|
+
long i;
|
166
|
+
|
167
|
+
if (NIL_P(cache)) {
|
168
|
+
return 0;
|
169
|
+
}
|
170
|
+
|
171
|
+
for (i = 0; i < RARRAY_LEN(cache); i++) {
|
172
|
+
xmlNodePtr node;
|
173
|
+
VALUE element = rb_ary_entry(cache, i);
|
174
|
+
Data_Get_Struct(element, xmlNode, node);
|
175
|
+
if (xmlIsBlankNode(node)) {
|
176
|
+
return 1;
|
177
|
+
}
|
178
|
+
}
|
179
|
+
|
180
|
+
return 0;
|
181
|
+
}
|
182
|
+
|
136
183
|
/*
|
137
184
|
* call-seq:
|
138
185
|
* from_document(doc)
|
139
186
|
*
|
140
187
|
* Create a new Schema from the Nokogiri::XML::Document +doc+
|
141
188
|
*/
|
142
|
-
static VALUE from_document(VALUE
|
189
|
+
static VALUE from_document(int argc, VALUE *argv, VALUE klass)
|
143
190
|
{
|
191
|
+
VALUE document;
|
192
|
+
VALUE parse_options;
|
193
|
+
int parse_options_int;
|
144
194
|
xmlDocPtr doc;
|
145
195
|
xmlSchemaParserCtxtPtr ctx;
|
146
196
|
xmlSchemaPtr schema;
|
147
197
|
VALUE errors;
|
148
198
|
VALUE rb_schema;
|
199
|
+
int scanned_args = 0;
|
200
|
+
xmlExternalEntityLoader old_loader = 0;
|
201
|
+
|
202
|
+
scanned_args = rb_scan_args(argc, argv, "11", &document, &parse_options);
|
149
203
|
|
150
204
|
Data_Get_Struct(document, xmlDoc, doc);
|
205
|
+
doc = doc->doc; /* In case someone passes us a node. ugh. */
|
151
206
|
|
152
|
-
|
153
|
-
|
207
|
+
if (scanned_args == 1) {
|
208
|
+
parse_options = rb_const_get(rb_const_get(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
|
209
|
+
}
|
210
|
+
parse_options_int = (int)NUM2INT(rb_funcall(parse_options, rb_intern("to_i"), 0));
|
211
|
+
|
212
|
+
if (has_blank_nodes_p(DOC_NODE_CACHE(doc))) {
|
213
|
+
rb_raise(rb_eArgError, "Creating a schema from a document that has blank nodes exposed to Ruby is dangerous");
|
214
|
+
}
|
154
215
|
|
155
216
|
ctx = xmlSchemaNewDocParserCtxt(doc);
|
156
217
|
|
@@ -165,8 +226,17 @@ static VALUE from_document(VALUE klass, VALUE document)
|
|
165
226
|
);
|
166
227
|
#endif
|
167
228
|
|
229
|
+
if (parse_options_int & XML_PARSE_NONET) {
|
230
|
+
old_loader = xmlGetExternalEntityLoader();
|
231
|
+
xmlSetExternalEntityLoader(xmlNoNetExternalEntityLoader);
|
232
|
+
}
|
233
|
+
|
168
234
|
schema = xmlSchemaParse(ctx);
|
169
235
|
|
236
|
+
if (old_loader) {
|
237
|
+
xmlSetExternalEntityLoader(old_loader);
|
238
|
+
}
|
239
|
+
|
170
240
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
171
241
|
xmlSchemaFreeParserCtxt(ctx);
|
172
242
|
|
@@ -182,6 +252,7 @@ static VALUE from_document(VALUE klass, VALUE document)
|
|
182
252
|
|
183
253
|
rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
|
184
254
|
rb_iv_set(rb_schema, "@errors", errors);
|
255
|
+
rb_iv_set(rb_schema, "@parse_options", parse_options);
|
185
256
|
|
186
257
|
return rb_schema;
|
187
258
|
|
@@ -197,8 +268,8 @@ void init_xml_schema()
|
|
197
268
|
|
198
269
|
cNokogiriXmlSchema = klass;
|
199
270
|
|
200
|
-
rb_define_singleton_method(klass, "read_memory", read_memory, 1);
|
201
|
-
rb_define_singleton_method(klass, "from_document", from_document, 1);
|
271
|
+
rb_define_singleton_method(klass, "read_memory", read_memory, -1);
|
272
|
+
rb_define_singleton_method(klass, "from_document", from_document, -1);
|
202
273
|
|
203
274
|
rb_define_private_method(klass, "validate_document", validate_document, 1);
|
204
275
|
rb_define_private_method(klass, "validate_file", validate_file, 1);
|