nokogiri 1.16.8 → 1.18.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +11 -21
- data/LICENSE-DEPENDENCIES.md +6 -6
- data/README.md +8 -5
- data/dependencies.yml +6 -6
- data/ext/nokogiri/extconf.rb +188 -142
- data/ext/nokogiri/gumbo.c +69 -53
- data/ext/nokogiri/html4_document.c +10 -4
- data/ext/nokogiri/html4_element_description.c +18 -18
- data/ext/nokogiri/html4_sax_parser.c +40 -0
- data/ext/nokogiri/html4_sax_parser_context.c +48 -58
- data/ext/nokogiri/html4_sax_push_parser.c +25 -24
- data/ext/nokogiri/libxml2_polyfill.c +114 -0
- data/ext/nokogiri/nokogiri.c +9 -2
- data/ext/nokogiri/nokogiri.h +18 -33
- data/ext/nokogiri/xml_attr.c +1 -1
- data/ext/nokogiri/xml_cdata.c +2 -10
- data/ext/nokogiri/xml_comment.c +3 -8
- data/ext/nokogiri/xml_document.c +163 -156
- data/ext/nokogiri/xml_document_fragment.c +10 -25
- data/ext/nokogiri/xml_dtd.c +1 -1
- data/ext/nokogiri/xml_element_content.c +9 -9
- data/ext/nokogiri/xml_encoding_handler.c +4 -4
- data/ext/nokogiri/xml_namespace.c +6 -6
- data/ext/nokogiri/xml_node.c +134 -103
- data/ext/nokogiri/xml_node_set.c +46 -44
- data/ext/nokogiri/xml_reader.c +54 -58
- data/ext/nokogiri/xml_relax_ng.c +35 -56
- data/ext/nokogiri/xml_sax_parser.c +156 -88
- data/ext/nokogiri/xml_sax_parser_context.c +219 -131
- data/ext/nokogiri/xml_sax_push_parser.c +68 -49
- data/ext/nokogiri/xml_schema.c +50 -85
- data/ext/nokogiri/xml_syntax_error.c +19 -11
- data/ext/nokogiri/xml_text.c +2 -4
- data/ext/nokogiri/xml_xpath_context.c +103 -100
- data/ext/nokogiri/xslt_stylesheet.c +8 -8
- data/gumbo-parser/src/ascii.c +2 -2
- data/gumbo-parser/src/error.c +76 -48
- data/gumbo-parser/src/error.h +5 -1
- data/gumbo-parser/src/nokogiri_gumbo.h +11 -2
- data/gumbo-parser/src/parser.c +63 -25
- data/gumbo-parser/src/tokenizer.c +6 -6
- data/lib/nokogiri/class_resolver.rb +1 -1
- data/lib/nokogiri/css/node.rb +6 -2
- data/lib/nokogiri/css/parser.rb +6 -4
- data/lib/nokogiri/css/parser.y +2 -2
- data/lib/nokogiri/css/parser_extras.rb +6 -66
- data/lib/nokogiri/css/selector_cache.rb +38 -0
- data/lib/nokogiri/css/tokenizer.rb +4 -4
- data/lib/nokogiri/css/tokenizer.rex +9 -8
- data/lib/nokogiri/css/xpath_visitor.rb +43 -6
- data/lib/nokogiri/css.rb +86 -20
- data/lib/nokogiri/decorators/slop.rb +3 -5
- data/lib/nokogiri/encoding_handler.rb +2 -2
- data/lib/nokogiri/html4/document.rb +44 -23
- data/lib/nokogiri/html4/document_fragment.rb +124 -12
- data/lib/nokogiri/html4/encoding_reader.rb +1 -1
- data/lib/nokogiri/html4/sax/parser.rb +23 -38
- data/lib/nokogiri/html4/sax/parser_context.rb +4 -9
- data/lib/nokogiri/html4.rb +9 -14
- data/lib/nokogiri/html5/builder.rb +40 -0
- data/lib/nokogiri/html5/document.rb +61 -30
- data/lib/nokogiri/html5/document_fragment.rb +130 -20
- data/lib/nokogiri/html5/node.rb +4 -4
- data/lib/nokogiri/html5.rb +114 -72
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/xml/builder.rb +8 -1
- data/lib/nokogiri/xml/document.rb +70 -26
- data/lib/nokogiri/xml/document_fragment.rb +84 -13
- data/lib/nokogiri/xml/node.rb +82 -11
- data/lib/nokogiri/xml/node_set.rb +9 -7
- data/lib/nokogiri/xml/parse_options.rb +1 -1
- data/lib/nokogiri/xml/pp/node.rb +6 -1
- data/lib/nokogiri/xml/reader.rb +46 -13
- data/lib/nokogiri/xml/relax_ng.rb +57 -20
- data/lib/nokogiri/xml/sax/document.rb +174 -83
- data/lib/nokogiri/xml/sax/parser.rb +115 -41
- data/lib/nokogiri/xml/sax/parser_context.rb +116 -8
- data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
- data/lib/nokogiri/xml/sax.rb +48 -0
- data/lib/nokogiri/xml/schema.rb +112 -45
- data/lib/nokogiri/xml/searchable.rb +38 -42
- data/lib/nokogiri/xml/syntax_error.rb +22 -0
- data/lib/nokogiri/xml/xpath_context.rb +14 -3
- data/lib/nokogiri/xml.rb +13 -24
- data/lib/nokogiri/xslt.rb +3 -9
- data/lib/xsd/xmlparser/nokogiri.rb +3 -4
- data/patches/libxml2/0019-xpath-Use-separate-static-hash-table-for-standard-fu.patch +244 -0
- data/ports/archives/libxml2-2.13.5.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.42.tar.xz +0 -0
- metadata +13 -12
- data/ext/nokogiri/libxml2_backwards_compat.c +0 -121
- data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +0 -25
- data/ports/archives/libxml2-2.12.9.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
data/ext/nokogiri/xml_reader.c
CHANGED
@@ -15,8 +15,8 @@ xml_reader_deallocate(void *data)
|
|
15
15
|
}
|
16
16
|
}
|
17
17
|
|
18
|
-
static const rb_data_type_t
|
19
|
-
.wrap_struct_name = "
|
18
|
+
static const rb_data_type_t xml_text_reader_type = {
|
19
|
+
.wrap_struct_name = "xmlTextReader",
|
20
20
|
.function = {
|
21
21
|
.dfree = xml_reader_deallocate,
|
22
22
|
},
|
@@ -84,7 +84,7 @@ default_eh(VALUE self)
|
|
84
84
|
xmlTextReaderPtr reader;
|
85
85
|
int eh;
|
86
86
|
|
87
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
87
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
88
88
|
eh = xmlTextReaderIsDefault(reader);
|
89
89
|
if (eh == 0) { return Qfalse; }
|
90
90
|
if (eh == 1) { return Qtrue; }
|
@@ -104,7 +104,7 @@ value_eh(VALUE self)
|
|
104
104
|
xmlTextReaderPtr reader;
|
105
105
|
int eh;
|
106
106
|
|
107
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
107
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
108
108
|
eh = xmlTextReaderHasValue(reader);
|
109
109
|
if (eh == 0) { return Qfalse; }
|
110
110
|
if (eh == 1) { return Qtrue; }
|
@@ -124,7 +124,7 @@ attributes_eh(VALUE self)
|
|
124
124
|
xmlTextReaderPtr reader;
|
125
125
|
int eh;
|
126
126
|
|
127
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
127
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
128
128
|
eh = has_attributes(reader);
|
129
129
|
if (eh == 0) { return Qfalse; }
|
130
130
|
if (eh == 1) { return Qtrue; }
|
@@ -146,7 +146,7 @@ rb_xml_reader_namespaces(VALUE rb_reader)
|
|
146
146
|
xmlNodePtr c_node;
|
147
147
|
VALUE rb_errors;
|
148
148
|
|
149
|
-
TypedData_Get_Struct(rb_reader, xmlTextReader, &
|
149
|
+
TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_text_reader_type, c_reader);
|
150
150
|
|
151
151
|
if (! has_attributes(c_reader)) {
|
152
152
|
return rb_namespaces ;
|
@@ -154,7 +154,7 @@ rb_xml_reader_namespaces(VALUE rb_reader)
|
|
154
154
|
|
155
155
|
rb_errors = rb_funcall(rb_reader, rb_intern("errors"), 0);
|
156
156
|
|
157
|
-
xmlSetStructuredErrorFunc((void *)rb_errors,
|
157
|
+
xmlSetStructuredErrorFunc((void *)rb_errors, noko__error_array_pusher);
|
158
158
|
c_node = xmlTextReaderExpand(c_reader);
|
159
159
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
160
160
|
|
@@ -188,7 +188,7 @@ rb_xml_reader_attribute_hash(VALUE rb_reader)
|
|
188
188
|
xmlAttrPtr c_property;
|
189
189
|
VALUE rb_errors;
|
190
190
|
|
191
|
-
TypedData_Get_Struct(rb_reader, xmlTextReader, &
|
191
|
+
TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_text_reader_type, c_reader);
|
192
192
|
|
193
193
|
if (!has_attributes(c_reader)) {
|
194
194
|
return rb_attributes;
|
@@ -196,7 +196,7 @@ rb_xml_reader_attribute_hash(VALUE rb_reader)
|
|
196
196
|
|
197
197
|
rb_errors = rb_funcall(rb_reader, rb_intern("errors"), 0);
|
198
198
|
|
199
|
-
xmlSetStructuredErrorFunc((void *)rb_errors,
|
199
|
+
xmlSetStructuredErrorFunc((void *)rb_errors, noko__error_array_pusher);
|
200
200
|
c_node = xmlTextReaderExpand(c_reader);
|
201
201
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
202
202
|
|
@@ -241,7 +241,7 @@ attribute_at(VALUE self, VALUE index)
|
|
241
241
|
xmlChar *value;
|
242
242
|
VALUE rb_value;
|
243
243
|
|
244
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
244
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
245
245
|
|
246
246
|
if (NIL_P(index)) { return Qnil; }
|
247
247
|
index = rb_Integer(index);
|
@@ -270,7 +270,7 @@ reader_attribute(VALUE self, VALUE name)
|
|
270
270
|
xmlChar *value ;
|
271
271
|
VALUE rb_value;
|
272
272
|
|
273
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
273
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
274
274
|
|
275
275
|
if (NIL_P(name)) { return Qnil; }
|
276
276
|
name = StringValue(name) ;
|
@@ -295,7 +295,7 @@ attribute_count(VALUE self)
|
|
295
295
|
xmlTextReaderPtr reader;
|
296
296
|
int count;
|
297
297
|
|
298
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
298
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
299
299
|
count = xmlTextReaderAttributeCount(reader);
|
300
300
|
if (count == -1) { return Qnil; }
|
301
301
|
|
@@ -314,7 +314,7 @@ depth(VALUE self)
|
|
314
314
|
xmlTextReaderPtr reader;
|
315
315
|
int depth;
|
316
316
|
|
317
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
317
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
318
318
|
depth = xmlTextReaderDepth(reader);
|
319
319
|
if (depth == -1) { return Qnil; }
|
320
320
|
|
@@ -333,7 +333,7 @@ xml_version(VALUE self)
|
|
333
333
|
xmlTextReaderPtr reader;
|
334
334
|
const char *version;
|
335
335
|
|
336
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
336
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
337
337
|
version = (const char *)xmlTextReaderConstXmlVersion(reader);
|
338
338
|
if (version == NULL) { return Qnil; }
|
339
339
|
|
@@ -352,7 +352,7 @@ lang(VALUE self)
|
|
352
352
|
xmlTextReaderPtr reader;
|
353
353
|
const char *lang;
|
354
354
|
|
355
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
355
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
356
356
|
lang = (const char *)xmlTextReaderConstXmlLang(reader);
|
357
357
|
if (lang == NULL) { return Qnil; }
|
358
358
|
|
@@ -371,7 +371,7 @@ value(VALUE self)
|
|
371
371
|
xmlTextReaderPtr reader;
|
372
372
|
const char *value;
|
373
373
|
|
374
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
374
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
375
375
|
value = (const char *)xmlTextReaderConstValue(reader);
|
376
376
|
if (value == NULL) { return Qnil; }
|
377
377
|
|
@@ -390,7 +390,7 @@ prefix(VALUE self)
|
|
390
390
|
xmlTextReaderPtr reader;
|
391
391
|
const char *prefix;
|
392
392
|
|
393
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
393
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
394
394
|
prefix = (const char *)xmlTextReaderConstPrefix(reader);
|
395
395
|
if (prefix == NULL) { return Qnil; }
|
396
396
|
|
@@ -409,7 +409,7 @@ namespace_uri(VALUE self)
|
|
409
409
|
xmlTextReaderPtr reader;
|
410
410
|
const char *uri;
|
411
411
|
|
412
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
412
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
413
413
|
uri = (const char *)xmlTextReaderConstNamespaceUri(reader);
|
414
414
|
if (uri == NULL) { return Qnil; }
|
415
415
|
|
@@ -428,7 +428,7 @@ local_name(VALUE self)
|
|
428
428
|
xmlTextReaderPtr reader;
|
429
429
|
const char *name;
|
430
430
|
|
431
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
431
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
432
432
|
name = (const char *)xmlTextReaderConstLocalName(reader);
|
433
433
|
if (name == NULL) { return Qnil; }
|
434
434
|
|
@@ -447,7 +447,7 @@ name(VALUE self)
|
|
447
447
|
xmlTextReaderPtr reader;
|
448
448
|
const char *name;
|
449
449
|
|
450
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
450
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
451
451
|
name = (const char *)xmlTextReaderConstName(reader);
|
452
452
|
if (name == NULL) { return Qnil; }
|
453
453
|
|
@@ -467,7 +467,7 @@ rb_xml_reader_base_uri(VALUE rb_reader)
|
|
467
467
|
xmlTextReaderPtr c_reader;
|
468
468
|
xmlChar *c_base_uri;
|
469
469
|
|
470
|
-
TypedData_Get_Struct(rb_reader, xmlTextReader, &
|
470
|
+
TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_text_reader_type, c_reader);
|
471
471
|
|
472
472
|
c_base_uri = xmlTextReaderBaseUri(c_reader);
|
473
473
|
if (c_base_uri == NULL) {
|
@@ -490,7 +490,7 @@ static VALUE
|
|
490
490
|
state(VALUE self)
|
491
491
|
{
|
492
492
|
xmlTextReaderPtr reader;
|
493
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
493
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
494
494
|
return INT2NUM(xmlTextReaderReadState(reader));
|
495
495
|
}
|
496
496
|
|
@@ -504,7 +504,7 @@ static VALUE
|
|
504
504
|
node_type(VALUE self)
|
505
505
|
{
|
506
506
|
xmlTextReaderPtr reader;
|
507
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
507
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
508
508
|
return INT2NUM(xmlTextReaderNodeType(reader));
|
509
509
|
}
|
510
510
|
|
@@ -515,44 +515,41 @@ node_type(VALUE self)
|
|
515
515
|
* Move the Reader forward through the XML document.
|
516
516
|
*/
|
517
517
|
static VALUE
|
518
|
-
read_more(VALUE
|
518
|
+
read_more(VALUE rb_reader)
|
519
519
|
{
|
520
|
-
xmlTextReaderPtr
|
521
|
-
|
522
|
-
VALUE error_list;
|
523
|
-
int ret;
|
524
|
-
xmlDocPtr c_document;
|
520
|
+
xmlTextReaderPtr c_reader;
|
521
|
+
libxmlStructuredErrorHandlerState handler_state;
|
525
522
|
|
526
|
-
TypedData_Get_Struct(
|
523
|
+
TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_text_reader_type, c_reader);
|
527
524
|
|
528
|
-
|
525
|
+
VALUE rb_errors = rb_funcall(rb_reader, rb_intern("errors"), 0);
|
526
|
+
noko__structured_error_func_save_and_set(&handler_state, (void *)rb_errors, noko__error_array_pusher);
|
529
527
|
|
530
|
-
|
531
|
-
|
532
|
-
|
528
|
+
int status = xmlTextReaderRead(c_reader);
|
529
|
+
|
530
|
+
noko__structured_error_func_restore(&handler_state);
|
533
531
|
|
534
|
-
c_document = xmlTextReaderCurrentDoc(
|
532
|
+
xmlDocPtr c_document = xmlTextReaderCurrentDoc(c_reader);
|
535
533
|
if (c_document && c_document->encoding == NULL) {
|
536
|
-
VALUE constructor_encoding = rb_iv_get(
|
534
|
+
VALUE constructor_encoding = rb_iv_get(rb_reader, "@encoding");
|
537
535
|
if (RTEST(constructor_encoding)) {
|
538
536
|
c_document->encoding = xmlStrdup(BAD_CAST StringValueCStr(constructor_encoding));
|
539
537
|
} else {
|
540
|
-
rb_iv_set(
|
538
|
+
rb_iv_set(rb_reader, "@encoding", NOKOGIRI_STR_NEW2("UTF-8"));
|
541
539
|
c_document->encoding = xmlStrdup(BAD_CAST "UTF-8");
|
542
540
|
}
|
543
541
|
}
|
544
542
|
|
545
|
-
if (
|
546
|
-
if (
|
543
|
+
if (status == 1) { return rb_reader; }
|
544
|
+
if (status == 0) { return Qnil; }
|
547
545
|
|
548
|
-
error
|
549
|
-
|
550
|
-
|
546
|
+
/* if we're here, there was an error */
|
547
|
+
VALUE exception = rb_funcall(cNokogiriXmlSyntaxError, rb_intern("aggregate"), 1, rb_errors);
|
548
|
+
if (RB_TEST(exception)) {
|
549
|
+
rb_exc_raise(exception);
|
551
550
|
} else {
|
552
|
-
rb_raise(rb_eRuntimeError, "Error pulling: %d",
|
551
|
+
rb_raise(rb_eRuntimeError, "Error pulling: %d", status);
|
553
552
|
}
|
554
|
-
|
555
|
-
return Qnil;
|
556
553
|
}
|
557
554
|
|
558
555
|
/*
|
@@ -569,7 +566,7 @@ inner_xml(VALUE self)
|
|
569
566
|
xmlChar *value;
|
570
567
|
VALUE str;
|
571
568
|
|
572
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
569
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
573
570
|
|
574
571
|
value = xmlTextReaderReadInnerXml(reader);
|
575
572
|
|
@@ -596,7 +593,7 @@ outer_xml(VALUE self)
|
|
596
593
|
xmlChar *value;
|
597
594
|
VALUE str = Qnil;
|
598
595
|
|
599
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
596
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
600
597
|
|
601
598
|
value = xmlTextReaderReadOuterXml(reader);
|
602
599
|
|
@@ -611,11 +608,13 @@ outer_xml(VALUE self)
|
|
611
608
|
* call-seq:
|
612
609
|
* from_memory(string, url = nil, encoding = nil, options = 0)
|
613
610
|
*
|
614
|
-
* Create a new
|
611
|
+
* Create a new Reader to parse a String.
|
615
612
|
*/
|
616
613
|
static VALUE
|
617
614
|
from_memory(int argc, VALUE *argv, VALUE klass)
|
618
615
|
{
|
616
|
+
/* TODO: deprecate this method, since Reader.new can handle both memory and IO. It can then
|
617
|
+
* become private. */
|
619
618
|
VALUE rb_buffer, rb_url, encoding, rb_options;
|
620
619
|
xmlTextReaderPtr reader;
|
621
620
|
const char *c_url = NULL;
|
@@ -643,7 +642,7 @@ from_memory(int argc, VALUE *argv, VALUE klass)
|
|
643
642
|
rb_raise(rb_eRuntimeError, "couldn't create a parser");
|
644
643
|
}
|
645
644
|
|
646
|
-
rb_reader = TypedData_Wrap_Struct(klass, &
|
645
|
+
rb_reader = TypedData_Wrap_Struct(klass, &xml_text_reader_type, reader);
|
647
646
|
args[0] = rb_buffer;
|
648
647
|
args[1] = rb_url;
|
649
648
|
args[2] = encoding;
|
@@ -656,11 +655,13 @@ from_memory(int argc, VALUE *argv, VALUE klass)
|
|
656
655
|
* call-seq:
|
657
656
|
* from_io(io, url = nil, encoding = nil, options = 0)
|
658
657
|
*
|
659
|
-
* Create a new
|
658
|
+
* Create a new Reader to parse an IO stream.
|
660
659
|
*/
|
661
660
|
static VALUE
|
662
661
|
from_io(int argc, VALUE *argv, VALUE klass)
|
663
662
|
{
|
663
|
+
/* TODO: deprecate this method, since Reader.new can handle both memory and IO. It can then
|
664
|
+
* become private. */
|
664
665
|
VALUE rb_io, rb_url, encoding, rb_options;
|
665
666
|
xmlTextReaderPtr reader;
|
666
667
|
const char *c_url = NULL;
|
@@ -689,7 +690,7 @@ from_io(int argc, VALUE *argv, VALUE klass)
|
|
689
690
|
rb_raise(rb_eRuntimeError, "couldn't create a parser");
|
690
691
|
}
|
691
692
|
|
692
|
-
rb_reader = TypedData_Wrap_Struct(klass, &
|
693
|
+
rb_reader = TypedData_Wrap_Struct(klass, &xml_text_reader_type, reader);
|
693
694
|
args[0] = rb_io;
|
694
695
|
args[1] = rb_url;
|
695
696
|
args[2] = encoding;
|
@@ -709,7 +710,7 @@ empty_element_p(VALUE self)
|
|
709
710
|
{
|
710
711
|
xmlTextReaderPtr reader;
|
711
712
|
|
712
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
713
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
713
714
|
|
714
715
|
if (xmlTextReaderIsEmptyElement(reader)) {
|
715
716
|
return Qtrue;
|
@@ -725,7 +726,7 @@ rb_xml_reader_encoding(VALUE rb_reader)
|
|
725
726
|
const char *parser_encoding;
|
726
727
|
VALUE constructor_encoding;
|
727
728
|
|
728
|
-
TypedData_Get_Struct(rb_reader, xmlTextReader, &
|
729
|
+
TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_text_reader_type, c_reader);
|
729
730
|
parser_encoding = (const char *)xmlTextReaderConstEncoding(c_reader);
|
730
731
|
if (parser_encoding) {
|
731
732
|
return NOKOGIRI_STR_NEW2(parser_encoding);
|
@@ -742,11 +743,6 @@ rb_xml_reader_encoding(VALUE rb_reader)
|
|
742
743
|
void
|
743
744
|
noko_init_xml_reader(void)
|
744
745
|
{
|
745
|
-
/*
|
746
|
-
* The Reader parser allows you to effectively pull parse an XML document.
|
747
|
-
* Once instantiated, call Nokogiri::XML::Reader#each to iterate over each
|
748
|
-
* node. Note that you may only iterate over the document once!
|
749
|
-
*/
|
750
746
|
cNokogiriXmlReader = rb_define_class_under(mNokogiriXml, "Reader", rb_cObject);
|
751
747
|
|
752
748
|
rb_undef_alloc_func(cNokogiriXmlReader);
|
data/ext/nokogiri/xml_relax_ng.c
CHANGED
@@ -3,28 +3,22 @@
|
|
3
3
|
VALUE cNokogiriXmlRelaxNG;
|
4
4
|
|
5
5
|
static void
|
6
|
-
|
6
|
+
_noko_xml_relax_ng_deallocate(void *data)
|
7
7
|
{
|
8
8
|
xmlRelaxNGPtr schema = data;
|
9
9
|
xmlRelaxNGFree(schema);
|
10
10
|
}
|
11
11
|
|
12
12
|
static const rb_data_type_t xml_relax_ng_type = {
|
13
|
-
.wrap_struct_name = "
|
13
|
+
.wrap_struct_name = "xmlRelaxNG",
|
14
14
|
.function = {
|
15
|
-
.dfree =
|
15
|
+
.dfree = _noko_xml_relax_ng_deallocate,
|
16
16
|
},
|
17
17
|
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
18
18
|
};
|
19
19
|
|
20
|
-
/*
|
21
|
-
* call-seq:
|
22
|
-
* validate_document(document)
|
23
|
-
*
|
24
|
-
* Validate a Nokogiri::XML::Document against this RelaxNG schema.
|
25
|
-
*/
|
26
20
|
static VALUE
|
27
|
-
|
21
|
+
noko_xml_relax_ng__validate_document(VALUE self, VALUE document)
|
28
22
|
{
|
29
23
|
xmlDocPtr doc;
|
30
24
|
xmlRelaxNGPtr schema;
|
@@ -43,13 +37,11 @@ validate_document(VALUE self, VALUE document)
|
|
43
37
|
rb_raise(rb_eRuntimeError, "Could not create a validation context");
|
44
38
|
}
|
45
39
|
|
46
|
-
#ifdef HAVE_XMLRELAXNGSETVALIDSTRUCTUREDERRORS
|
47
40
|
xmlRelaxNGSetValidStructuredErrors(
|
48
41
|
valid_ctxt,
|
49
|
-
|
42
|
+
noko__error_array_pusher,
|
50
43
|
(void *)errors
|
51
44
|
);
|
52
|
-
#endif
|
53
45
|
|
54
46
|
xmlRelaxNGValidateDoc(valid_ctxt, doc);
|
55
47
|
|
@@ -59,8 +51,8 @@ validate_document(VALUE self, VALUE document)
|
|
59
51
|
}
|
60
52
|
|
61
53
|
static VALUE
|
62
|
-
|
63
|
-
VALUE
|
54
|
+
_noko_xml_relax_ng_parse_schema(
|
55
|
+
VALUE rb_class,
|
64
56
|
xmlRelaxNGParserCtxtPtr c_parser_context,
|
65
57
|
VALUE rb_parse_options
|
66
58
|
)
|
@@ -68,6 +60,7 @@ xml_relax_ng_parse_schema(
|
|
68
60
|
VALUE rb_errors;
|
69
61
|
VALUE rb_schema;
|
70
62
|
xmlRelaxNGPtr c_schema;
|
63
|
+
libxmlStructuredErrorHandlerState handler_state;
|
71
64
|
|
72
65
|
if (NIL_P(rb_parse_options)) {
|
73
66
|
rb_parse_options = rb_const_get_at(
|
@@ -77,33 +70,30 @@ xml_relax_ng_parse_schema(
|
|
77
70
|
}
|
78
71
|
|
79
72
|
rb_errors = rb_ary_new();
|
80
|
-
xmlSetStructuredErrorFunc((void *)rb_errors, Nokogiri_error_array_pusher);
|
81
73
|
|
82
|
-
|
74
|
+
noko__structured_error_func_save_and_set(&handler_state, (void *)rb_errors, noko__error_array_pusher);
|
83
75
|
xmlRelaxNGSetParserStructuredErrors(
|
84
76
|
c_parser_context,
|
85
|
-
|
77
|
+
noko__error_array_pusher,
|
86
78
|
(void *)rb_errors
|
87
79
|
);
|
88
|
-
#endif
|
89
80
|
|
90
81
|
c_schema = xmlRelaxNGParse(c_parser_context);
|
91
82
|
|
92
|
-
xmlSetStructuredErrorFunc(NULL, NULL);
|
93
83
|
xmlRelaxNGFreeParserCtxt(c_parser_context);
|
84
|
+
noko__structured_error_func_restore(&handler_state);
|
94
85
|
|
95
86
|
if (NULL == c_schema) {
|
96
|
-
|
97
|
-
|
98
|
-
|
87
|
+
VALUE exception = rb_funcall(cNokogiriXmlSyntaxError, rb_intern("aggregate"), 1, rb_errors);
|
88
|
+
|
89
|
+
if (RB_TEST(exception)) {
|
90
|
+
rb_exc_raise(exception);
|
99
91
|
} else {
|
100
92
|
rb_raise(rb_eRuntimeError, "Could not parse document");
|
101
93
|
}
|
102
|
-
|
103
|
-
return Qnil;
|
104
94
|
}
|
105
95
|
|
106
|
-
rb_schema = TypedData_Wrap_Struct(
|
96
|
+
rb_schema = TypedData_Wrap_Struct(rb_class, &xml_relax_ng_type, c_schema);
|
107
97
|
rb_iv_set(rb_schema, "@errors", rb_errors);
|
108
98
|
rb_iv_set(rb_schema, "@parse_options", rb_parse_options);
|
109
99
|
|
@@ -111,37 +101,27 @@ xml_relax_ng_parse_schema(
|
|
111
101
|
}
|
112
102
|
|
113
103
|
/*
|
114
|
-
* call-seq:
|
115
|
-
*
|
104
|
+
* :call-seq:
|
105
|
+
* from_document(document) → Nokogiri::XML::RelaxNG
|
106
|
+
* from_document(document, parse_options) → Nokogiri::XML::RelaxNG
|
116
107
|
*
|
117
|
-
*
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
rb_scan_args(argc, argv, "11", &rb_content, &rb_parse_options);
|
127
|
-
|
128
|
-
c_parser_context = xmlRelaxNGNewMemParserCtxt(
|
129
|
-
(const char *)StringValuePtr(rb_content),
|
130
|
-
(int)RSTRING_LEN(rb_content)
|
131
|
-
);
|
132
|
-
|
133
|
-
return xml_relax_ng_parse_schema(klass, c_parser_context, rb_parse_options);
|
134
|
-
}
|
135
|
-
|
136
|
-
/*
|
137
|
-
* call-seq:
|
138
|
-
* from_document(doc)
|
108
|
+
* Parse a RELAX NG schema definition from a Document to create a new Nokogiri::XML::RelaxNG.
|
109
|
+
*
|
110
|
+
* [Parameters]
|
111
|
+
* - +document+ (XML::Document) A document containing the RELAX NG schema definition
|
112
|
+
* - +parse_options+ (Nokogiri::XML::ParseOptions)
|
113
|
+
* Defaults to ParseOptions::DEFAULT_SCHEMA ⚠ Unused
|
114
|
+
*
|
115
|
+
* [Returns] Nokogiri::XML::RelaxNG
|
139
116
|
*
|
140
|
-
*
|
117
|
+
* ⚠ +parse_options+ is currently unused by this method and is present only as a placeholder for
|
118
|
+
* future functionality.
|
141
119
|
*/
|
142
120
|
static VALUE
|
143
|
-
|
121
|
+
noko_xml_relax_ng_s_from_document(int argc, VALUE *argv, VALUE rb_class)
|
144
122
|
{
|
123
|
+
/* TODO: deprecate this method and put file-or-string logic into .new so that becomes the
|
124
|
+
* preferred entry point, and this can become a private method */
|
145
125
|
VALUE rb_document;
|
146
126
|
VALUE rb_parse_options;
|
147
127
|
xmlDocPtr c_document;
|
@@ -154,7 +134,7 @@ from_document(int argc, VALUE *argv, VALUE klass)
|
|
154
134
|
|
155
135
|
c_parser_context = xmlRelaxNGNewDocParserCtxt(c_document);
|
156
136
|
|
157
|
-
return
|
137
|
+
return _noko_xml_relax_ng_parse_schema(rb_class, c_parser_context, rb_parse_options);
|
158
138
|
}
|
159
139
|
|
160
140
|
void
|
@@ -163,8 +143,7 @@ noko_init_xml_relax_ng(void)
|
|
163
143
|
assert(cNokogiriXmlSchema);
|
164
144
|
cNokogiriXmlRelaxNG = rb_define_class_under(mNokogiriXml, "RelaxNG", cNokogiriXmlSchema);
|
165
145
|
|
166
|
-
rb_define_singleton_method(cNokogiriXmlRelaxNG, "
|
167
|
-
rb_define_singleton_method(cNokogiriXmlRelaxNG, "from_document", from_document, -1);
|
146
|
+
rb_define_singleton_method(cNokogiriXmlRelaxNG, "from_document", noko_xml_relax_ng_s_from_document, -1);
|
168
147
|
|
169
|
-
rb_define_private_method(cNokogiriXmlRelaxNG, "validate_document",
|
148
|
+
rb_define_private_method(cNokogiriXmlRelaxNG, "validate_document", noko_xml_relax_ng__validate_document, 1);
|
170
149
|
}
|