nokogiri 1.16.0 → 1.18.1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +14 -16
- data/LICENSE-DEPENDENCIES.md +6 -6
- data/README.md +8 -5
- data/dependencies.yml +9 -9
- data/ext/nokogiri/extconf.rb +188 -142
- data/ext/nokogiri/gumbo.c +69 -53
- data/ext/nokogiri/html4_document.c +10 -4
- data/ext/nokogiri/html4_element_description.c +18 -18
- data/ext/nokogiri/html4_sax_parser.c +40 -0
- data/ext/nokogiri/html4_sax_parser_context.c +48 -58
- data/ext/nokogiri/html4_sax_push_parser.c +25 -24
- data/ext/nokogiri/libxml2_polyfill.c +114 -0
- data/ext/nokogiri/nokogiri.c +9 -2
- data/ext/nokogiri/nokogiri.h +18 -33
- data/ext/nokogiri/xml_attr.c +1 -1
- data/ext/nokogiri/xml_cdata.c +2 -10
- data/ext/nokogiri/xml_comment.c +3 -8
- data/ext/nokogiri/xml_document.c +167 -156
- data/ext/nokogiri/xml_document_fragment.c +10 -25
- data/ext/nokogiri/xml_dtd.c +1 -1
- data/ext/nokogiri/xml_element_content.c +9 -9
- data/ext/nokogiri/xml_encoding_handler.c +4 -4
- data/ext/nokogiri/xml_namespace.c +6 -6
- data/ext/nokogiri/xml_node.c +141 -104
- data/ext/nokogiri/xml_node_set.c +46 -44
- data/ext/nokogiri/xml_reader.c +74 -57
- data/ext/nokogiri/xml_relax_ng.c +35 -56
- data/ext/nokogiri/xml_sax_parser.c +156 -88
- data/ext/nokogiri/xml_sax_parser_context.c +219 -131
- data/ext/nokogiri/xml_sax_push_parser.c +68 -49
- data/ext/nokogiri/xml_schema.c +50 -85
- data/ext/nokogiri/xml_syntax_error.c +19 -11
- data/ext/nokogiri/xml_text.c +2 -4
- data/ext/nokogiri/xml_xpath_context.c +103 -100
- data/ext/nokogiri/xslt_stylesheet.c +16 -11
- data/gumbo-parser/Makefile +3 -0
- data/gumbo-parser/src/ascii.c +2 -2
- data/gumbo-parser/src/error.c +76 -48
- data/gumbo-parser/src/error.h +5 -1
- data/gumbo-parser/src/nokogiri_gumbo.h +11 -2
- data/gumbo-parser/src/parser.c +66 -25
- data/gumbo-parser/src/tokenizer.c +6 -6
- data/lib/nokogiri/class_resolver.rb +1 -1
- data/lib/nokogiri/css/node.rb +6 -2
- data/lib/nokogiri/css/parser.rb +6 -4
- data/lib/nokogiri/css/parser.y +2 -2
- data/lib/nokogiri/css/parser_extras.rb +6 -66
- data/lib/nokogiri/css/selector_cache.rb +38 -0
- data/lib/nokogiri/css/tokenizer.rb +4 -4
- data/lib/nokogiri/css/tokenizer.rex +9 -8
- data/lib/nokogiri/css/xpath_visitor.rb +42 -6
- data/lib/nokogiri/css.rb +86 -20
- data/lib/nokogiri/decorators/slop.rb +3 -5
- data/lib/nokogiri/encoding_handler.rb +2 -2
- data/lib/nokogiri/html4/document.rb +44 -23
- data/lib/nokogiri/html4/document_fragment.rb +124 -12
- data/lib/nokogiri/html4/encoding_reader.rb +1 -1
- data/lib/nokogiri/html4/sax/parser.rb +23 -38
- data/lib/nokogiri/html4/sax/parser_context.rb +4 -9
- data/lib/nokogiri/html4.rb +9 -14
- data/lib/nokogiri/html5/builder.rb +40 -0
- data/lib/nokogiri/html5/document.rb +61 -30
- data/lib/nokogiri/html5/document_fragment.rb +130 -20
- data/lib/nokogiri/html5/node.rb +4 -4
- data/lib/nokogiri/html5.rb +114 -72
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/xml/builder.rb +8 -1
- data/lib/nokogiri/xml/document.rb +70 -26
- data/lib/nokogiri/xml/document_fragment.rb +84 -13
- data/lib/nokogiri/xml/node.rb +82 -11
- data/lib/nokogiri/xml/node_set.rb +9 -7
- data/lib/nokogiri/xml/parse_options.rb +1 -1
- data/lib/nokogiri/xml/pp/node.rb +6 -1
- data/lib/nokogiri/xml/reader.rb +51 -17
- data/lib/nokogiri/xml/relax_ng.rb +57 -20
- data/lib/nokogiri/xml/sax/document.rb +174 -83
- data/lib/nokogiri/xml/sax/parser.rb +115 -41
- data/lib/nokogiri/xml/sax/parser_context.rb +116 -8
- data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
- data/lib/nokogiri/xml/sax.rb +48 -0
- data/lib/nokogiri/xml/schema.rb +112 -45
- data/lib/nokogiri/xml/searchable.rb +38 -42
- data/lib/nokogiri/xml/syntax_error.rb +22 -0
- data/lib/nokogiri/xml/xpath_context.rb +14 -3
- data/lib/nokogiri/xml.rb +13 -24
- data/lib/nokogiri/xslt/stylesheet.rb +29 -7
- data/lib/nokogiri/xslt.rb +3 -9
- data/lib/xsd/xmlparser/nokogiri.rb +3 -4
- data/patches/libxml2/0019-xpath-Use-separate-static-hash-table-for-standard-fu.patch +244 -0
- data/ports/archives/libxml2-2.13.5.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.42.tar.xz +0 -0
- metadata +13 -14
- data/ext/nokogiri/libxml2_backwards_compat.c +0 -121
- data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +0 -25
- data/ports/archives/libxml2-2.12.3.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
data/ext/nokogiri/xml_reader.c
CHANGED
@@ -5,12 +5,18 @@ VALUE cNokogiriXmlReader;
|
|
5
5
|
static void
|
6
6
|
xml_reader_deallocate(void *data)
|
7
7
|
{
|
8
|
+
// free the document separately because we _may_ have triggered preservation by calling
|
9
|
+
// xmlTextReaderCurrentDoc during a read_more.
|
8
10
|
xmlTextReaderPtr reader = data;
|
11
|
+
xmlDocPtr doc = xmlTextReaderCurrentDoc(reader);
|
9
12
|
xmlFreeTextReader(reader);
|
13
|
+
if (doc) {
|
14
|
+
xmlFreeDoc(doc);
|
15
|
+
}
|
10
16
|
}
|
11
17
|
|
12
|
-
static const rb_data_type_t
|
13
|
-
.wrap_struct_name = "
|
18
|
+
static const rb_data_type_t xml_text_reader_type = {
|
19
|
+
.wrap_struct_name = "xmlTextReader",
|
14
20
|
.function = {
|
15
21
|
.dfree = xml_reader_deallocate,
|
16
22
|
},
|
@@ -78,7 +84,7 @@ default_eh(VALUE self)
|
|
78
84
|
xmlTextReaderPtr reader;
|
79
85
|
int eh;
|
80
86
|
|
81
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
87
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
82
88
|
eh = xmlTextReaderIsDefault(reader);
|
83
89
|
if (eh == 0) { return Qfalse; }
|
84
90
|
if (eh == 1) { return Qtrue; }
|
@@ -98,7 +104,7 @@ value_eh(VALUE self)
|
|
98
104
|
xmlTextReaderPtr reader;
|
99
105
|
int eh;
|
100
106
|
|
101
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
107
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
102
108
|
eh = xmlTextReaderHasValue(reader);
|
103
109
|
if (eh == 0) { return Qfalse; }
|
104
110
|
if (eh == 1) { return Qtrue; }
|
@@ -118,7 +124,7 @@ attributes_eh(VALUE self)
|
|
118
124
|
xmlTextReaderPtr reader;
|
119
125
|
int eh;
|
120
126
|
|
121
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
127
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
122
128
|
eh = has_attributes(reader);
|
123
129
|
if (eh == 0) { return Qfalse; }
|
124
130
|
if (eh == 1) { return Qtrue; }
|
@@ -140,7 +146,7 @@ rb_xml_reader_namespaces(VALUE rb_reader)
|
|
140
146
|
xmlNodePtr c_node;
|
141
147
|
VALUE rb_errors;
|
142
148
|
|
143
|
-
TypedData_Get_Struct(rb_reader, xmlTextReader, &
|
149
|
+
TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_text_reader_type, c_reader);
|
144
150
|
|
145
151
|
if (! has_attributes(c_reader)) {
|
146
152
|
return rb_namespaces ;
|
@@ -148,7 +154,7 @@ rb_xml_reader_namespaces(VALUE rb_reader)
|
|
148
154
|
|
149
155
|
rb_errors = rb_funcall(rb_reader, rb_intern("errors"), 0);
|
150
156
|
|
151
|
-
xmlSetStructuredErrorFunc((void *)rb_errors,
|
157
|
+
xmlSetStructuredErrorFunc((void *)rb_errors, noko__error_array_pusher);
|
152
158
|
c_node = xmlTextReaderExpand(c_reader);
|
153
159
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
154
160
|
|
@@ -182,7 +188,7 @@ rb_xml_reader_attribute_hash(VALUE rb_reader)
|
|
182
188
|
xmlAttrPtr c_property;
|
183
189
|
VALUE rb_errors;
|
184
190
|
|
185
|
-
TypedData_Get_Struct(rb_reader, xmlTextReader, &
|
191
|
+
TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_text_reader_type, c_reader);
|
186
192
|
|
187
193
|
if (!has_attributes(c_reader)) {
|
188
194
|
return rb_attributes;
|
@@ -190,7 +196,7 @@ rb_xml_reader_attribute_hash(VALUE rb_reader)
|
|
190
196
|
|
191
197
|
rb_errors = rb_funcall(rb_reader, rb_intern("errors"), 0);
|
192
198
|
|
193
|
-
xmlSetStructuredErrorFunc((void *)rb_errors,
|
199
|
+
xmlSetStructuredErrorFunc((void *)rb_errors, noko__error_array_pusher);
|
194
200
|
c_node = xmlTextReaderExpand(c_reader);
|
195
201
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
196
202
|
|
@@ -235,7 +241,7 @@ attribute_at(VALUE self, VALUE index)
|
|
235
241
|
xmlChar *value;
|
236
242
|
VALUE rb_value;
|
237
243
|
|
238
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
244
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
239
245
|
|
240
246
|
if (NIL_P(index)) { return Qnil; }
|
241
247
|
index = rb_Integer(index);
|
@@ -264,7 +270,7 @@ reader_attribute(VALUE self, VALUE name)
|
|
264
270
|
xmlChar *value ;
|
265
271
|
VALUE rb_value;
|
266
272
|
|
267
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
273
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
268
274
|
|
269
275
|
if (NIL_P(name)) { return Qnil; }
|
270
276
|
name = StringValue(name) ;
|
@@ -289,7 +295,7 @@ attribute_count(VALUE self)
|
|
289
295
|
xmlTextReaderPtr reader;
|
290
296
|
int count;
|
291
297
|
|
292
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
298
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
293
299
|
count = xmlTextReaderAttributeCount(reader);
|
294
300
|
if (count == -1) { return Qnil; }
|
295
301
|
|
@@ -308,7 +314,7 @@ depth(VALUE self)
|
|
308
314
|
xmlTextReaderPtr reader;
|
309
315
|
int depth;
|
310
316
|
|
311
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
317
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
312
318
|
depth = xmlTextReaderDepth(reader);
|
313
319
|
if (depth == -1) { return Qnil; }
|
314
320
|
|
@@ -327,7 +333,7 @@ xml_version(VALUE self)
|
|
327
333
|
xmlTextReaderPtr reader;
|
328
334
|
const char *version;
|
329
335
|
|
330
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
336
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
331
337
|
version = (const char *)xmlTextReaderConstXmlVersion(reader);
|
332
338
|
if (version == NULL) { return Qnil; }
|
333
339
|
|
@@ -346,7 +352,7 @@ lang(VALUE self)
|
|
346
352
|
xmlTextReaderPtr reader;
|
347
353
|
const char *lang;
|
348
354
|
|
349
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
355
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
350
356
|
lang = (const char *)xmlTextReaderConstXmlLang(reader);
|
351
357
|
if (lang == NULL) { return Qnil; }
|
352
358
|
|
@@ -365,7 +371,7 @@ value(VALUE self)
|
|
365
371
|
xmlTextReaderPtr reader;
|
366
372
|
const char *value;
|
367
373
|
|
368
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
374
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
369
375
|
value = (const char *)xmlTextReaderConstValue(reader);
|
370
376
|
if (value == NULL) { return Qnil; }
|
371
377
|
|
@@ -384,7 +390,7 @@ prefix(VALUE self)
|
|
384
390
|
xmlTextReaderPtr reader;
|
385
391
|
const char *prefix;
|
386
392
|
|
387
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
393
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
388
394
|
prefix = (const char *)xmlTextReaderConstPrefix(reader);
|
389
395
|
if (prefix == NULL) { return Qnil; }
|
390
396
|
|
@@ -403,7 +409,7 @@ namespace_uri(VALUE self)
|
|
403
409
|
xmlTextReaderPtr reader;
|
404
410
|
const char *uri;
|
405
411
|
|
406
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
412
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
407
413
|
uri = (const char *)xmlTextReaderConstNamespaceUri(reader);
|
408
414
|
if (uri == NULL) { return Qnil; }
|
409
415
|
|
@@ -422,7 +428,7 @@ local_name(VALUE self)
|
|
422
428
|
xmlTextReaderPtr reader;
|
423
429
|
const char *name;
|
424
430
|
|
425
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
431
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
426
432
|
name = (const char *)xmlTextReaderConstLocalName(reader);
|
427
433
|
if (name == NULL) { return Qnil; }
|
428
434
|
|
@@ -441,7 +447,7 @@ name(VALUE self)
|
|
441
447
|
xmlTextReaderPtr reader;
|
442
448
|
const char *name;
|
443
449
|
|
444
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
450
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
445
451
|
name = (const char *)xmlTextReaderConstName(reader);
|
446
452
|
if (name == NULL) { return Qnil; }
|
447
453
|
|
@@ -461,7 +467,7 @@ rb_xml_reader_base_uri(VALUE rb_reader)
|
|
461
467
|
xmlTextReaderPtr c_reader;
|
462
468
|
xmlChar *c_base_uri;
|
463
469
|
|
464
|
-
TypedData_Get_Struct(rb_reader, xmlTextReader, &
|
470
|
+
TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_text_reader_type, c_reader);
|
465
471
|
|
466
472
|
c_base_uri = xmlTextReaderBaseUri(c_reader);
|
467
473
|
if (c_base_uri == NULL) {
|
@@ -484,7 +490,7 @@ static VALUE
|
|
484
490
|
state(VALUE self)
|
485
491
|
{
|
486
492
|
xmlTextReaderPtr reader;
|
487
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
493
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
488
494
|
return INT2NUM(xmlTextReaderReadState(reader));
|
489
495
|
}
|
490
496
|
|
@@ -498,7 +504,7 @@ static VALUE
|
|
498
504
|
node_type(VALUE self)
|
499
505
|
{
|
500
506
|
xmlTextReaderPtr reader;
|
501
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
507
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
502
508
|
return INT2NUM(xmlTextReaderNodeType(reader));
|
503
509
|
}
|
504
510
|
|
@@ -509,32 +515,41 @@ node_type(VALUE self)
|
|
509
515
|
* Move the Reader forward through the XML document.
|
510
516
|
*/
|
511
517
|
static VALUE
|
512
|
-
read_more(VALUE
|
518
|
+
read_more(VALUE rb_reader)
|
513
519
|
{
|
514
|
-
xmlTextReaderPtr
|
515
|
-
|
516
|
-
VALUE error_list;
|
517
|
-
int ret;
|
520
|
+
xmlTextReaderPtr c_reader;
|
521
|
+
libxmlStructuredErrorHandlerState handler_state;
|
518
522
|
|
519
|
-
TypedData_Get_Struct(
|
523
|
+
TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_text_reader_type, c_reader);
|
520
524
|
|
521
|
-
|
525
|
+
VALUE rb_errors = rb_funcall(rb_reader, rb_intern("errors"), 0);
|
526
|
+
noko__structured_error_func_save_and_set(&handler_state, (void *)rb_errors, noko__error_array_pusher);
|
522
527
|
|
523
|
-
|
524
|
-
ret = xmlTextReaderRead(reader);
|
525
|
-
xmlSetStructuredErrorFunc(NULL, NULL);
|
528
|
+
int status = xmlTextReaderRead(c_reader);
|
526
529
|
|
527
|
-
|
528
|
-
if (ret == 0) { return Qnil; }
|
530
|
+
noko__structured_error_func_restore(&handler_state);
|
529
531
|
|
530
|
-
|
531
|
-
if (
|
532
|
-
|
533
|
-
|
534
|
-
|
532
|
+
xmlDocPtr c_document = xmlTextReaderCurrentDoc(c_reader);
|
533
|
+
if (c_document && c_document->encoding == NULL) {
|
534
|
+
VALUE constructor_encoding = rb_iv_get(rb_reader, "@encoding");
|
535
|
+
if (RTEST(constructor_encoding)) {
|
536
|
+
c_document->encoding = xmlStrdup(BAD_CAST StringValueCStr(constructor_encoding));
|
537
|
+
} else {
|
538
|
+
rb_iv_set(rb_reader, "@encoding", NOKOGIRI_STR_NEW2("UTF-8"));
|
539
|
+
c_document->encoding = xmlStrdup(BAD_CAST "UTF-8");
|
540
|
+
}
|
535
541
|
}
|
536
542
|
|
537
|
-
return
|
543
|
+
if (status == 1) { return rb_reader; }
|
544
|
+
if (status == 0) { return Qnil; }
|
545
|
+
|
546
|
+
/* if we're here, there was an error */
|
547
|
+
VALUE exception = rb_funcall(cNokogiriXmlSyntaxError, rb_intern("aggregate"), 1, rb_errors);
|
548
|
+
if (RB_TEST(exception)) {
|
549
|
+
rb_exc_raise(exception);
|
550
|
+
} else {
|
551
|
+
rb_raise(rb_eRuntimeError, "Error pulling: %d", status);
|
552
|
+
}
|
538
553
|
}
|
539
554
|
|
540
555
|
/*
|
@@ -551,7 +566,7 @@ inner_xml(VALUE self)
|
|
551
566
|
xmlChar *value;
|
552
567
|
VALUE str;
|
553
568
|
|
554
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
569
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
555
570
|
|
556
571
|
value = xmlTextReaderReadInnerXml(reader);
|
557
572
|
|
@@ -578,7 +593,7 @@ outer_xml(VALUE self)
|
|
578
593
|
xmlChar *value;
|
579
594
|
VALUE str = Qnil;
|
580
595
|
|
581
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
596
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
582
597
|
|
583
598
|
value = xmlTextReaderReadOuterXml(reader);
|
584
599
|
|
@@ -593,11 +608,13 @@ outer_xml(VALUE self)
|
|
593
608
|
* call-seq:
|
594
609
|
* from_memory(string, url = nil, encoding = nil, options = 0)
|
595
610
|
*
|
596
|
-
* Create a new
|
611
|
+
* Create a new Reader to parse a String.
|
597
612
|
*/
|
598
613
|
static VALUE
|
599
614
|
from_memory(int argc, VALUE *argv, VALUE klass)
|
600
615
|
{
|
616
|
+
/* TODO: deprecate this method, since Reader.new can handle both memory and IO. It can then
|
617
|
+
* become private. */
|
601
618
|
VALUE rb_buffer, rb_url, encoding, rb_options;
|
602
619
|
xmlTextReaderPtr reader;
|
603
620
|
const char *c_url = NULL;
|
@@ -625,7 +642,7 @@ from_memory(int argc, VALUE *argv, VALUE klass)
|
|
625
642
|
rb_raise(rb_eRuntimeError, "couldn't create a parser");
|
626
643
|
}
|
627
644
|
|
628
|
-
rb_reader = TypedData_Wrap_Struct(klass, &
|
645
|
+
rb_reader = TypedData_Wrap_Struct(klass, &xml_text_reader_type, reader);
|
629
646
|
args[0] = rb_buffer;
|
630
647
|
args[1] = rb_url;
|
631
648
|
args[2] = encoding;
|
@@ -638,11 +655,13 @@ from_memory(int argc, VALUE *argv, VALUE klass)
|
|
638
655
|
* call-seq:
|
639
656
|
* from_io(io, url = nil, encoding = nil, options = 0)
|
640
657
|
*
|
641
|
-
* Create a new
|
658
|
+
* Create a new Reader to parse an IO stream.
|
642
659
|
*/
|
643
660
|
static VALUE
|
644
661
|
from_io(int argc, VALUE *argv, VALUE klass)
|
645
662
|
{
|
663
|
+
/* TODO: deprecate this method, since Reader.new can handle both memory and IO. It can then
|
664
|
+
* become private. */
|
646
665
|
VALUE rb_io, rb_url, encoding, rb_options;
|
647
666
|
xmlTextReaderPtr reader;
|
648
667
|
const char *c_url = NULL;
|
@@ -671,7 +690,7 @@ from_io(int argc, VALUE *argv, VALUE klass)
|
|
671
690
|
rb_raise(rb_eRuntimeError, "couldn't create a parser");
|
672
691
|
}
|
673
692
|
|
674
|
-
rb_reader = TypedData_Wrap_Struct(klass, &
|
693
|
+
rb_reader = TypedData_Wrap_Struct(klass, &xml_text_reader_type, reader);
|
675
694
|
args[0] = rb_io;
|
676
695
|
args[1] = rb_url;
|
677
696
|
args[2] = encoding;
|
@@ -691,7 +710,7 @@ empty_element_p(VALUE self)
|
|
691
710
|
{
|
692
711
|
xmlTextReaderPtr reader;
|
693
712
|
|
694
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
713
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
695
714
|
|
696
715
|
if (xmlTextReaderIsEmptyElement(reader)) {
|
697
716
|
return Qtrue;
|
@@ -707,25 +726,23 @@ rb_xml_reader_encoding(VALUE rb_reader)
|
|
707
726
|
const char *parser_encoding;
|
708
727
|
VALUE constructor_encoding;
|
709
728
|
|
729
|
+
TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_text_reader_type, c_reader);
|
730
|
+
parser_encoding = (const char *)xmlTextReaderConstEncoding(c_reader);
|
731
|
+
if (parser_encoding) {
|
732
|
+
return NOKOGIRI_STR_NEW2(parser_encoding);
|
733
|
+
}
|
734
|
+
|
710
735
|
constructor_encoding = rb_iv_get(rb_reader, "@encoding");
|
711
736
|
if (RTEST(constructor_encoding)) {
|
712
737
|
return constructor_encoding;
|
713
738
|
}
|
714
739
|
|
715
|
-
|
716
|
-
parser_encoding = (const char *)xmlTextReaderConstEncoding(c_reader);
|
717
|
-
if (parser_encoding == NULL) { return Qnil; }
|
718
|
-
return NOKOGIRI_STR_NEW2(parser_encoding);
|
740
|
+
return Qnil;
|
719
741
|
}
|
720
742
|
|
721
743
|
void
|
722
744
|
noko_init_xml_reader(void)
|
723
745
|
{
|
724
|
-
/*
|
725
|
-
* The Reader parser allows you to effectively pull parse an XML document.
|
726
|
-
* Once instantiated, call Nokogiri::XML::Reader#each to iterate over each
|
727
|
-
* node. Note that you may only iterate over the document once!
|
728
|
-
*/
|
729
746
|
cNokogiriXmlReader = rb_define_class_under(mNokogiriXml, "Reader", rb_cObject);
|
730
747
|
|
731
748
|
rb_undef_alloc_func(cNokogiriXmlReader);
|
data/ext/nokogiri/xml_relax_ng.c
CHANGED
@@ -3,28 +3,22 @@
|
|
3
3
|
VALUE cNokogiriXmlRelaxNG;
|
4
4
|
|
5
5
|
static void
|
6
|
-
|
6
|
+
_noko_xml_relax_ng_deallocate(void *data)
|
7
7
|
{
|
8
8
|
xmlRelaxNGPtr schema = data;
|
9
9
|
xmlRelaxNGFree(schema);
|
10
10
|
}
|
11
11
|
|
12
12
|
static const rb_data_type_t xml_relax_ng_type = {
|
13
|
-
.wrap_struct_name = "
|
13
|
+
.wrap_struct_name = "xmlRelaxNG",
|
14
14
|
.function = {
|
15
|
-
.dfree =
|
15
|
+
.dfree = _noko_xml_relax_ng_deallocate,
|
16
16
|
},
|
17
17
|
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
18
18
|
};
|
19
19
|
|
20
|
-
/*
|
21
|
-
* call-seq:
|
22
|
-
* validate_document(document)
|
23
|
-
*
|
24
|
-
* Validate a Nokogiri::XML::Document against this RelaxNG schema.
|
25
|
-
*/
|
26
20
|
static VALUE
|
27
|
-
|
21
|
+
noko_xml_relax_ng__validate_document(VALUE self, VALUE document)
|
28
22
|
{
|
29
23
|
xmlDocPtr doc;
|
30
24
|
xmlRelaxNGPtr schema;
|
@@ -43,13 +37,11 @@ validate_document(VALUE self, VALUE document)
|
|
43
37
|
rb_raise(rb_eRuntimeError, "Could not create a validation context");
|
44
38
|
}
|
45
39
|
|
46
|
-
#ifdef HAVE_XMLRELAXNGSETVALIDSTRUCTUREDERRORS
|
47
40
|
xmlRelaxNGSetValidStructuredErrors(
|
48
41
|
valid_ctxt,
|
49
|
-
|
42
|
+
noko__error_array_pusher,
|
50
43
|
(void *)errors
|
51
44
|
);
|
52
|
-
#endif
|
53
45
|
|
54
46
|
xmlRelaxNGValidateDoc(valid_ctxt, doc);
|
55
47
|
|
@@ -59,8 +51,8 @@ validate_document(VALUE self, VALUE document)
|
|
59
51
|
}
|
60
52
|
|
61
53
|
static VALUE
|
62
|
-
|
63
|
-
VALUE
|
54
|
+
_noko_xml_relax_ng_parse_schema(
|
55
|
+
VALUE rb_class,
|
64
56
|
xmlRelaxNGParserCtxtPtr c_parser_context,
|
65
57
|
VALUE rb_parse_options
|
66
58
|
)
|
@@ -68,6 +60,7 @@ xml_relax_ng_parse_schema(
|
|
68
60
|
VALUE rb_errors;
|
69
61
|
VALUE rb_schema;
|
70
62
|
xmlRelaxNGPtr c_schema;
|
63
|
+
libxmlStructuredErrorHandlerState handler_state;
|
71
64
|
|
72
65
|
if (NIL_P(rb_parse_options)) {
|
73
66
|
rb_parse_options = rb_const_get_at(
|
@@ -77,33 +70,30 @@ xml_relax_ng_parse_schema(
|
|
77
70
|
}
|
78
71
|
|
79
72
|
rb_errors = rb_ary_new();
|
80
|
-
xmlSetStructuredErrorFunc((void *)rb_errors, Nokogiri_error_array_pusher);
|
81
73
|
|
82
|
-
|
74
|
+
noko__structured_error_func_save_and_set(&handler_state, (void *)rb_errors, noko__error_array_pusher);
|
83
75
|
xmlRelaxNGSetParserStructuredErrors(
|
84
76
|
c_parser_context,
|
85
|
-
|
77
|
+
noko__error_array_pusher,
|
86
78
|
(void *)rb_errors
|
87
79
|
);
|
88
|
-
#endif
|
89
80
|
|
90
81
|
c_schema = xmlRelaxNGParse(c_parser_context);
|
91
82
|
|
92
|
-
xmlSetStructuredErrorFunc(NULL, NULL);
|
93
83
|
xmlRelaxNGFreeParserCtxt(c_parser_context);
|
84
|
+
noko__structured_error_func_restore(&handler_state);
|
94
85
|
|
95
86
|
if (NULL == c_schema) {
|
96
|
-
|
97
|
-
|
98
|
-
|
87
|
+
VALUE exception = rb_funcall(cNokogiriXmlSyntaxError, rb_intern("aggregate"), 1, rb_errors);
|
88
|
+
|
89
|
+
if (RB_TEST(exception)) {
|
90
|
+
rb_exc_raise(exception);
|
99
91
|
} else {
|
100
92
|
rb_raise(rb_eRuntimeError, "Could not parse document");
|
101
93
|
}
|
102
|
-
|
103
|
-
return Qnil;
|
104
94
|
}
|
105
95
|
|
106
|
-
rb_schema = TypedData_Wrap_Struct(
|
96
|
+
rb_schema = TypedData_Wrap_Struct(rb_class, &xml_relax_ng_type, c_schema);
|
107
97
|
rb_iv_set(rb_schema, "@errors", rb_errors);
|
108
98
|
rb_iv_set(rb_schema, "@parse_options", rb_parse_options);
|
109
99
|
|
@@ -111,37 +101,27 @@ xml_relax_ng_parse_schema(
|
|
111
101
|
}
|
112
102
|
|
113
103
|
/*
|
114
|
-
* call-seq:
|
115
|
-
*
|
104
|
+
* :call-seq:
|
105
|
+
* from_document(document) → Nokogiri::XML::RelaxNG
|
106
|
+
* from_document(document, parse_options) → Nokogiri::XML::RelaxNG
|
116
107
|
*
|
117
|
-
*
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
rb_scan_args(argc, argv, "11", &rb_content, &rb_parse_options);
|
127
|
-
|
128
|
-
c_parser_context = xmlRelaxNGNewMemParserCtxt(
|
129
|
-
(const char *)StringValuePtr(rb_content),
|
130
|
-
(int)RSTRING_LEN(rb_content)
|
131
|
-
);
|
132
|
-
|
133
|
-
return xml_relax_ng_parse_schema(klass, c_parser_context, rb_parse_options);
|
134
|
-
}
|
135
|
-
|
136
|
-
/*
|
137
|
-
* call-seq:
|
138
|
-
* from_document(doc)
|
108
|
+
* Parse a RELAX NG schema definition from a Document to create a new Nokogiri::XML::RelaxNG.
|
109
|
+
*
|
110
|
+
* [Parameters]
|
111
|
+
* - +document+ (XML::Document) A document containing the RELAX NG schema definition
|
112
|
+
* - +parse_options+ (Nokogiri::XML::ParseOptions)
|
113
|
+
* Defaults to ParseOptions::DEFAULT_SCHEMA ⚠ Unused
|
114
|
+
*
|
115
|
+
* [Returns] Nokogiri::XML::RelaxNG
|
139
116
|
*
|
140
|
-
*
|
117
|
+
* ⚠ +parse_options+ is currently unused by this method and is present only as a placeholder for
|
118
|
+
* future functionality.
|
141
119
|
*/
|
142
120
|
static VALUE
|
143
|
-
|
121
|
+
noko_xml_relax_ng_s_from_document(int argc, VALUE *argv, VALUE rb_class)
|
144
122
|
{
|
123
|
+
/* TODO: deprecate this method and put file-or-string logic into .new so that becomes the
|
124
|
+
* preferred entry point, and this can become a private method */
|
145
125
|
VALUE rb_document;
|
146
126
|
VALUE rb_parse_options;
|
147
127
|
xmlDocPtr c_document;
|
@@ -154,7 +134,7 @@ from_document(int argc, VALUE *argv, VALUE klass)
|
|
154
134
|
|
155
135
|
c_parser_context = xmlRelaxNGNewDocParserCtxt(c_document);
|
156
136
|
|
157
|
-
return
|
137
|
+
return _noko_xml_relax_ng_parse_schema(rb_class, c_parser_context, rb_parse_options);
|
158
138
|
}
|
159
139
|
|
160
140
|
void
|
@@ -163,8 +143,7 @@ noko_init_xml_relax_ng(void)
|
|
163
143
|
assert(cNokogiriXmlSchema);
|
164
144
|
cNokogiriXmlRelaxNG = rb_define_class_under(mNokogiriXml, "RelaxNG", cNokogiriXmlSchema);
|
165
145
|
|
166
|
-
rb_define_singleton_method(cNokogiriXmlRelaxNG, "
|
167
|
-
rb_define_singleton_method(cNokogiriXmlRelaxNG, "from_document", from_document, -1);
|
146
|
+
rb_define_singleton_method(cNokogiriXmlRelaxNG, "from_document", noko_xml_relax_ng_s_from_document, -1);
|
168
147
|
|
169
|
-
rb_define_private_method(cNokogiriXmlRelaxNG, "validate_document",
|
148
|
+
rb_define_private_method(cNokogiriXmlRelaxNG, "validate_document", noko_xml_relax_ng__validate_document, 1);
|
170
149
|
}
|