nokogiri 1.14.2 → 1.16.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +19 -15
- data/README.md +4 -1
- data/dependencies.yml +9 -8
- data/ext/nokogiri/extconf.rb +69 -26
- data/ext/nokogiri/html4_document.c +1 -2
- data/ext/nokogiri/html4_element_description.c +19 -14
- data/ext/nokogiri/html4_sax_parser_context.c +10 -16
- data/ext/nokogiri/html4_sax_push_parser.c +3 -3
- data/ext/nokogiri/nokogiri.c +46 -24
- data/ext/nokogiri/nokogiri.h +23 -5
- data/ext/nokogiri/test_global_handlers.c +1 -1
- data/ext/nokogiri/xml_attr.c +1 -1
- data/ext/nokogiri/xml_cdata.c +30 -17
- data/ext/nokogiri/xml_comment.c +1 -1
- data/ext/nokogiri/xml_document.c +113 -25
- data/ext/nokogiri/xml_document_fragment.c +1 -1
- data/ext/nokogiri/xml_dtd.c +1 -1
- data/ext/nokogiri/xml_element_content.c +32 -29
- data/ext/nokogiri/xml_element_decl.c +5 -5
- data/ext/nokogiri/xml_encoding_handler.c +12 -4
- data/ext/nokogiri/xml_entity_reference.c +1 -1
- data/ext/nokogiri/xml_namespace.c +11 -16
- data/ext/nokogiri/xml_node.c +13 -16
- data/ext/nokogiri/xml_node_set.c +125 -105
- data/ext/nokogiri/xml_processing_instruction.c +1 -1
- data/ext/nokogiri/xml_reader.c +61 -74
- data/ext/nokogiri/xml_relax_ng.c +66 -79
- data/ext/nokogiri/xml_sax_parser.c +24 -5
- data/ext/nokogiri/xml_sax_parser_context.c +50 -25
- data/ext/nokogiri/xml_sax_push_parser.c +30 -9
- data/ext/nokogiri/xml_schema.c +94 -115
- data/ext/nokogiri/xml_syntax_error.c +3 -3
- data/ext/nokogiri/xml_text.c +26 -13
- data/ext/nokogiri/xml_xpath_context.c +153 -83
- data/ext/nokogiri/xslt_stylesheet.c +111 -53
- data/gumbo-parser/Makefile +18 -0
- data/gumbo-parser/src/error.c +8 -4
- data/gumbo-parser/src/foreign_attrs.c +13 -14
- data/gumbo-parser/src/foreign_attrs.gperf +1 -1
- data/gumbo-parser/src/parser.c +21 -5
- data/gumbo-parser/src/tokenizer.c +1 -0
- data/lib/nokogiri/css/parser_extras.rb +1 -1
- data/lib/nokogiri/css/xpath_visitor.rb +3 -23
- data/lib/nokogiri/extension.rb +1 -1
- data/lib/nokogiri/html4/document.rb +1 -1
- data/lib/nokogiri/html4/document_fragment.rb +1 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +1821 -353
- data/lib/nokogiri/html4/encoding_reader.rb +1 -1
- data/lib/nokogiri/html5/document_fragment.rb +1 -1
- data/lib/nokogiri/html5/node.rb +5 -0
- data/lib/nokogiri/html5.rb +0 -63
- data/lib/nokogiri/jruby/nokogiri_jars.rb +9 -9
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +6 -5
- data/lib/nokogiri/xml/attr.rb +2 -2
- data/lib/nokogiri/xml/attribute_decl.rb +4 -2
- data/lib/nokogiri/xml/document.rb +4 -5
- data/lib/nokogiri/xml/document_fragment.rb +3 -3
- data/lib/nokogiri/xml/element_content.rb +10 -2
- data/lib/nokogiri/xml/element_decl.rb +4 -2
- data/lib/nokogiri/xml/entity_decl.rb +4 -2
- data/lib/nokogiri/xml/namespace.rb +1 -2
- data/lib/nokogiri/xml/node/save_options.rb +8 -0
- data/lib/nokogiri/xml/node.rb +53 -37
- data/lib/nokogiri/xml/node_set.rb +3 -3
- data/lib/nokogiri/xml/pp/node.rb +23 -12
- data/lib/nokogiri/xml/reader.rb +10 -9
- data/lib/nokogiri/xml/sax/document.rb +1 -1
- data/lib/nokogiri/xml/searchable.rb +21 -13
- data/lib/nokogiri/xml/syntax_error.rb +1 -1
- data/lib/nokogiri/xml.rb +1 -1
- data/lib/nokogiri/xslt/stylesheet.rb +29 -7
- data/lib/nokogiri/xslt.rb +74 -4
- data/lib/nokogiri.rb +13 -5
- data/lib/xsd/xmlparser/nokogiri.rb +1 -1
- data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
- data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
- data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
- data/ports/archives/libxml2-2.12.8.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
- metadata +16 -12
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +0 -3037
- data/ports/archives/libxml2-2.10.3.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.37.tar.xz +0 -0
data/ext/nokogiri/xml_reader.c
CHANGED
@@ -3,11 +3,26 @@
|
|
3
3
|
VALUE cNokogiriXmlReader;
|
4
4
|
|
5
5
|
static void
|
6
|
-
|
6
|
+
xml_reader_deallocate(void *data)
|
7
7
|
{
|
8
|
+
// free the document separately because we _may_ have triggered preservation by calling
|
9
|
+
// xmlTextReaderCurrentDoc during a read_more.
|
10
|
+
xmlTextReaderPtr reader = data;
|
11
|
+
xmlDocPtr doc = xmlTextReaderCurrentDoc(reader);
|
8
12
|
xmlFreeTextReader(reader);
|
13
|
+
if (doc) {
|
14
|
+
xmlFreeDoc(doc);
|
15
|
+
}
|
9
16
|
}
|
10
17
|
|
18
|
+
static const rb_data_type_t xml_reader_type = {
|
19
|
+
.wrap_struct_name = "Nokogiri::XML::Reader",
|
20
|
+
.function = {
|
21
|
+
.dfree = xml_reader_deallocate,
|
22
|
+
},
|
23
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
24
|
+
};
|
25
|
+
|
11
26
|
static int
|
12
27
|
has_attributes(xmlTextReaderPtr reader)
|
13
28
|
{
|
@@ -69,7 +84,7 @@ default_eh(VALUE self)
|
|
69
84
|
xmlTextReaderPtr reader;
|
70
85
|
int eh;
|
71
86
|
|
72
|
-
|
87
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
73
88
|
eh = xmlTextReaderIsDefault(reader);
|
74
89
|
if (eh == 0) { return Qfalse; }
|
75
90
|
if (eh == 1) { return Qtrue; }
|
@@ -89,7 +104,7 @@ value_eh(VALUE self)
|
|
89
104
|
xmlTextReaderPtr reader;
|
90
105
|
int eh;
|
91
106
|
|
92
|
-
|
107
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
93
108
|
eh = xmlTextReaderHasValue(reader);
|
94
109
|
if (eh == 0) { return Qfalse; }
|
95
110
|
if (eh == 1) { return Qtrue; }
|
@@ -109,7 +124,7 @@ attributes_eh(VALUE self)
|
|
109
124
|
xmlTextReaderPtr reader;
|
110
125
|
int eh;
|
111
126
|
|
112
|
-
|
127
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
113
128
|
eh = has_attributes(reader);
|
114
129
|
if (eh == 0) { return Qfalse; }
|
115
130
|
if (eh == 1) { return Qtrue; }
|
@@ -131,7 +146,7 @@ rb_xml_reader_namespaces(VALUE rb_reader)
|
|
131
146
|
xmlNodePtr c_node;
|
132
147
|
VALUE rb_errors;
|
133
148
|
|
134
|
-
|
149
|
+
TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
|
135
150
|
|
136
151
|
if (! has_attributes(c_reader)) {
|
137
152
|
return rb_namespaces ;
|
@@ -157,48 +172,6 @@ rb_xml_reader_namespaces(VALUE rb_reader)
|
|
157
172
|
return rb_namespaces ;
|
158
173
|
}
|
159
174
|
|
160
|
-
/*
|
161
|
-
:call-seq: attribute_nodes() → Array<Nokogiri::XML::Attr>
|
162
|
-
|
163
|
-
Get the attributes of the current node as an Array of XML:Attr
|
164
|
-
|
165
|
-
⚠ This method is deprecated and unsafe to use. It will be removed in a future version of Nokogiri.
|
166
|
-
|
167
|
-
See related: #attribute_hash, #attributes
|
168
|
-
*/
|
169
|
-
static VALUE
|
170
|
-
rb_xml_reader_attribute_nodes(VALUE rb_reader)
|
171
|
-
{
|
172
|
-
xmlTextReaderPtr c_reader;
|
173
|
-
xmlNodePtr c_node;
|
174
|
-
VALUE attr_nodes;
|
175
|
-
int j;
|
176
|
-
|
177
|
-
// TODO: deprecated, remove in Nokogiri v1.15, see https://github.com/sparklemotion/nokogiri/issues/2598
|
178
|
-
// After removal, we can also remove all the "node_has_a_document" special handling from xml_node.c
|
179
|
-
NOKO_WARN_DEPRECATION("Reader#attribute_nodes is deprecated and will be removed in a future version of Nokogiri. Please use Reader#attribute_hash instead.");
|
180
|
-
|
181
|
-
Data_Get_Struct(rb_reader, xmlTextReader, c_reader);
|
182
|
-
|
183
|
-
if (! has_attributes(c_reader)) {
|
184
|
-
return rb_ary_new() ;
|
185
|
-
}
|
186
|
-
|
187
|
-
c_node = xmlTextReaderExpand(c_reader);
|
188
|
-
if (c_node == NULL) {
|
189
|
-
return Qnil;
|
190
|
-
}
|
191
|
-
|
192
|
-
attr_nodes = noko_xml_node_attrs(c_node);
|
193
|
-
|
194
|
-
/* ensure that the Reader won't be GCed as long as a node is referenced */
|
195
|
-
for (j = 0 ; j < RARRAY_LEN(attr_nodes) ; j++) {
|
196
|
-
rb_iv_set(rb_ary_entry(attr_nodes, j), "@reader", rb_reader);
|
197
|
-
}
|
198
|
-
|
199
|
-
return attr_nodes;
|
200
|
-
}
|
201
|
-
|
202
175
|
/*
|
203
176
|
:call-seq: attribute_hash() → Hash<String ⇒ String>
|
204
177
|
|
@@ -215,7 +188,7 @@ rb_xml_reader_attribute_hash(VALUE rb_reader)
|
|
215
188
|
xmlAttrPtr c_property;
|
216
189
|
VALUE rb_errors;
|
217
190
|
|
218
|
-
|
191
|
+
TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
|
219
192
|
|
220
193
|
if (!has_attributes(c_reader)) {
|
221
194
|
return rb_attributes;
|
@@ -268,7 +241,7 @@ attribute_at(VALUE self, VALUE index)
|
|
268
241
|
xmlChar *value;
|
269
242
|
VALUE rb_value;
|
270
243
|
|
271
|
-
|
244
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
272
245
|
|
273
246
|
if (NIL_P(index)) { return Qnil; }
|
274
247
|
index = rb_Integer(index);
|
@@ -297,7 +270,7 @@ reader_attribute(VALUE self, VALUE name)
|
|
297
270
|
xmlChar *value ;
|
298
271
|
VALUE rb_value;
|
299
272
|
|
300
|
-
|
273
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
301
274
|
|
302
275
|
if (NIL_P(name)) { return Qnil; }
|
303
276
|
name = StringValue(name) ;
|
@@ -322,7 +295,7 @@ attribute_count(VALUE self)
|
|
322
295
|
xmlTextReaderPtr reader;
|
323
296
|
int count;
|
324
297
|
|
325
|
-
|
298
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
326
299
|
count = xmlTextReaderAttributeCount(reader);
|
327
300
|
if (count == -1) { return Qnil; }
|
328
301
|
|
@@ -341,7 +314,7 @@ depth(VALUE self)
|
|
341
314
|
xmlTextReaderPtr reader;
|
342
315
|
int depth;
|
343
316
|
|
344
|
-
|
317
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
345
318
|
depth = xmlTextReaderDepth(reader);
|
346
319
|
if (depth == -1) { return Qnil; }
|
347
320
|
|
@@ -360,7 +333,7 @@ xml_version(VALUE self)
|
|
360
333
|
xmlTextReaderPtr reader;
|
361
334
|
const char *version;
|
362
335
|
|
363
|
-
|
336
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
364
337
|
version = (const char *)xmlTextReaderConstXmlVersion(reader);
|
365
338
|
if (version == NULL) { return Qnil; }
|
366
339
|
|
@@ -379,7 +352,7 @@ lang(VALUE self)
|
|
379
352
|
xmlTextReaderPtr reader;
|
380
353
|
const char *lang;
|
381
354
|
|
382
|
-
|
355
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
383
356
|
lang = (const char *)xmlTextReaderConstXmlLang(reader);
|
384
357
|
if (lang == NULL) { return Qnil; }
|
385
358
|
|
@@ -398,7 +371,7 @@ value(VALUE self)
|
|
398
371
|
xmlTextReaderPtr reader;
|
399
372
|
const char *value;
|
400
373
|
|
401
|
-
|
374
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
402
375
|
value = (const char *)xmlTextReaderConstValue(reader);
|
403
376
|
if (value == NULL) { return Qnil; }
|
404
377
|
|
@@ -417,7 +390,7 @@ prefix(VALUE self)
|
|
417
390
|
xmlTextReaderPtr reader;
|
418
391
|
const char *prefix;
|
419
392
|
|
420
|
-
|
393
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
421
394
|
prefix = (const char *)xmlTextReaderConstPrefix(reader);
|
422
395
|
if (prefix == NULL) { return Qnil; }
|
423
396
|
|
@@ -436,7 +409,7 @@ namespace_uri(VALUE self)
|
|
436
409
|
xmlTextReaderPtr reader;
|
437
410
|
const char *uri;
|
438
411
|
|
439
|
-
|
412
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
440
413
|
uri = (const char *)xmlTextReaderConstNamespaceUri(reader);
|
441
414
|
if (uri == NULL) { return Qnil; }
|
442
415
|
|
@@ -455,7 +428,7 @@ local_name(VALUE self)
|
|
455
428
|
xmlTextReaderPtr reader;
|
456
429
|
const char *name;
|
457
430
|
|
458
|
-
|
431
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
459
432
|
name = (const char *)xmlTextReaderConstLocalName(reader);
|
460
433
|
if (name == NULL) { return Qnil; }
|
461
434
|
|
@@ -474,7 +447,7 @@ name(VALUE self)
|
|
474
447
|
xmlTextReaderPtr reader;
|
475
448
|
const char *name;
|
476
449
|
|
477
|
-
|
450
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
478
451
|
name = (const char *)xmlTextReaderConstName(reader);
|
479
452
|
if (name == NULL) { return Qnil; }
|
480
453
|
|
@@ -494,7 +467,7 @@ rb_xml_reader_base_uri(VALUE rb_reader)
|
|
494
467
|
xmlTextReaderPtr c_reader;
|
495
468
|
xmlChar *c_base_uri;
|
496
469
|
|
497
|
-
|
470
|
+
TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
|
498
471
|
|
499
472
|
c_base_uri = xmlTextReaderBaseUri(c_reader);
|
500
473
|
if (c_base_uri == NULL) {
|
@@ -517,7 +490,7 @@ static VALUE
|
|
517
490
|
state(VALUE self)
|
518
491
|
{
|
519
492
|
xmlTextReaderPtr reader;
|
520
|
-
|
493
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
521
494
|
return INT2NUM(xmlTextReaderReadState(reader));
|
522
495
|
}
|
523
496
|
|
@@ -531,7 +504,7 @@ static VALUE
|
|
531
504
|
node_type(VALUE self)
|
532
505
|
{
|
533
506
|
xmlTextReaderPtr reader;
|
534
|
-
|
507
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
535
508
|
return INT2NUM(xmlTextReaderNodeType(reader));
|
536
509
|
}
|
537
510
|
|
@@ -545,11 +518,12 @@ static VALUE
|
|
545
518
|
read_more(VALUE self)
|
546
519
|
{
|
547
520
|
xmlTextReaderPtr reader;
|
548
|
-
|
521
|
+
xmlErrorConstPtr error;
|
549
522
|
VALUE error_list;
|
550
523
|
int ret;
|
524
|
+
xmlDocPtr c_document;
|
551
525
|
|
552
|
-
|
526
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
553
527
|
|
554
528
|
error_list = rb_funcall(self, rb_intern("errors"), 0);
|
555
529
|
|
@@ -557,6 +531,17 @@ read_more(VALUE self)
|
|
557
531
|
ret = xmlTextReaderRead(reader);
|
558
532
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
559
533
|
|
534
|
+
c_document = xmlTextReaderCurrentDoc(reader);
|
535
|
+
if (c_document && c_document->encoding == NULL) {
|
536
|
+
VALUE constructor_encoding = rb_iv_get(self, "@encoding");
|
537
|
+
if (RTEST(constructor_encoding)) {
|
538
|
+
c_document->encoding = xmlStrdup(BAD_CAST StringValueCStr(constructor_encoding));
|
539
|
+
} else {
|
540
|
+
rb_iv_set(self, "@encoding", NOKOGIRI_STR_NEW2("UTF-8"));
|
541
|
+
c_document->encoding = xmlStrdup(BAD_CAST "UTF-8");
|
542
|
+
}
|
543
|
+
}
|
544
|
+
|
560
545
|
if (ret == 1) { return self; }
|
561
546
|
if (ret == 0) { return Qnil; }
|
562
547
|
|
@@ -584,7 +569,7 @@ inner_xml(VALUE self)
|
|
584
569
|
xmlChar *value;
|
585
570
|
VALUE str;
|
586
571
|
|
587
|
-
|
572
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
588
573
|
|
589
574
|
value = xmlTextReaderReadInnerXml(reader);
|
590
575
|
|
@@ -611,7 +596,7 @@ outer_xml(VALUE self)
|
|
611
596
|
xmlChar *value;
|
612
597
|
VALUE str = Qnil;
|
613
598
|
|
614
|
-
|
599
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
615
600
|
|
616
601
|
value = xmlTextReaderReadOuterXml(reader);
|
617
602
|
|
@@ -658,7 +643,7 @@ from_memory(int argc, VALUE *argv, VALUE klass)
|
|
658
643
|
rb_raise(rb_eRuntimeError, "couldn't create a parser");
|
659
644
|
}
|
660
645
|
|
661
|
-
rb_reader =
|
646
|
+
rb_reader = TypedData_Wrap_Struct(klass, &xml_reader_type, reader);
|
662
647
|
args[0] = rb_buffer;
|
663
648
|
args[1] = rb_url;
|
664
649
|
args[2] = encoding;
|
@@ -704,7 +689,7 @@ from_io(int argc, VALUE *argv, VALUE klass)
|
|
704
689
|
rb_raise(rb_eRuntimeError, "couldn't create a parser");
|
705
690
|
}
|
706
691
|
|
707
|
-
rb_reader =
|
692
|
+
rb_reader = TypedData_Wrap_Struct(klass, &xml_reader_type, reader);
|
708
693
|
args[0] = rb_io;
|
709
694
|
args[1] = rb_url;
|
710
695
|
args[2] = encoding;
|
@@ -724,7 +709,7 @@ empty_element_p(VALUE self)
|
|
724
709
|
{
|
725
710
|
xmlTextReaderPtr reader;
|
726
711
|
|
727
|
-
|
712
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
728
713
|
|
729
714
|
if (xmlTextReaderIsEmptyElement(reader)) {
|
730
715
|
return Qtrue;
|
@@ -740,15 +725,18 @@ rb_xml_reader_encoding(VALUE rb_reader)
|
|
740
725
|
const char *parser_encoding;
|
741
726
|
VALUE constructor_encoding;
|
742
727
|
|
728
|
+
TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
|
729
|
+
parser_encoding = (const char *)xmlTextReaderConstEncoding(c_reader);
|
730
|
+
if (parser_encoding) {
|
731
|
+
return NOKOGIRI_STR_NEW2(parser_encoding);
|
732
|
+
}
|
733
|
+
|
743
734
|
constructor_encoding = rb_iv_get(rb_reader, "@encoding");
|
744
735
|
if (RTEST(constructor_encoding)) {
|
745
736
|
return constructor_encoding;
|
746
737
|
}
|
747
738
|
|
748
|
-
|
749
|
-
parser_encoding = (const char *)xmlTextReaderConstEncoding(c_reader);
|
750
|
-
if (parser_encoding == NULL) { return Qnil; }
|
751
|
-
return NOKOGIRI_STR_NEW2(parser_encoding);
|
739
|
+
return Qnil;
|
752
740
|
}
|
753
741
|
|
754
742
|
void
|
@@ -769,7 +757,6 @@ noko_init_xml_reader(void)
|
|
769
757
|
rb_define_method(cNokogiriXmlReader, "attribute", reader_attribute, 1);
|
770
758
|
rb_define_method(cNokogiriXmlReader, "attribute_at", attribute_at, 1);
|
771
759
|
rb_define_method(cNokogiriXmlReader, "attribute_count", attribute_count, 0);
|
772
|
-
rb_define_method(cNokogiriXmlReader, "attribute_nodes", rb_xml_reader_attribute_nodes, 0);
|
773
760
|
rb_define_method(cNokogiriXmlReader, "attribute_hash", rb_xml_reader_attribute_hash, 0);
|
774
761
|
rb_define_method(cNokogiriXmlReader, "attributes?", attributes_eh, 0);
|
775
762
|
rb_define_method(cNokogiriXmlReader, "base_uri", rb_xml_reader_base_uri, 0);
|
data/ext/nokogiri/xml_relax_ng.c
CHANGED
@@ -3,11 +3,20 @@
|
|
3
3
|
VALUE cNokogiriXmlRelaxNG;
|
4
4
|
|
5
5
|
static void
|
6
|
-
|
6
|
+
xml_relax_ng_deallocate(void *data)
|
7
7
|
{
|
8
|
+
xmlRelaxNGPtr schema = data;
|
8
9
|
xmlRelaxNGFree(schema);
|
9
10
|
}
|
10
11
|
|
12
|
+
static const rb_data_type_t xml_relax_ng_type = {
|
13
|
+
.wrap_struct_name = "Nokogiri::XML::RelaxNG",
|
14
|
+
.function = {
|
15
|
+
.dfree = xml_relax_ng_deallocate,
|
16
|
+
},
|
17
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
18
|
+
};
|
19
|
+
|
11
20
|
/*
|
12
21
|
* call-seq:
|
13
22
|
* validate_document(document)
|
@@ -22,8 +31,8 @@ validate_document(VALUE self, VALUE document)
|
|
22
31
|
VALUE errors;
|
23
32
|
xmlRelaxNGValidCtxtPtr valid_ctxt;
|
24
33
|
|
25
|
-
|
26
|
-
|
34
|
+
TypedData_Get_Struct(self, xmlRelaxNG, &xml_relax_ng_type, schema);
|
35
|
+
doc = noko_xml_document_unwrap(document);
|
27
36
|
|
28
37
|
errors = rb_ary_new();
|
29
38
|
|
@@ -49,48 +58,42 @@ validate_document(VALUE self, VALUE document)
|
|
49
58
|
return errors;
|
50
59
|
}
|
51
60
|
|
52
|
-
/*
|
53
|
-
* call-seq:
|
54
|
-
* read_memory(string)
|
55
|
-
*
|
56
|
-
* Create a new RelaxNG from the contents of +string+
|
57
|
-
*/
|
58
61
|
static VALUE
|
59
|
-
|
62
|
+
xml_relax_ng_parse_schema(
|
63
|
+
VALUE klass,
|
64
|
+
xmlRelaxNGParserCtxtPtr c_parser_context,
|
65
|
+
VALUE rb_parse_options
|
66
|
+
)
|
60
67
|
{
|
61
|
-
VALUE
|
62
|
-
VALUE parse_options;
|
63
|
-
xmlRelaxNGParserCtxtPtr ctx;
|
64
|
-
xmlRelaxNGPtr schema;
|
65
|
-
VALUE errors;
|
68
|
+
VALUE rb_errors;
|
66
69
|
VALUE rb_schema;
|
67
|
-
|
70
|
+
xmlRelaxNGPtr c_schema;
|
68
71
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
+
if (NIL_P(rb_parse_options)) {
|
73
|
+
rb_parse_options = rb_const_get_at(
|
74
|
+
rb_const_get_at(mNokogiriXml, rb_intern("ParseOptions")),
|
75
|
+
rb_intern("DEFAULT_SCHEMA")
|
76
|
+
);
|
72
77
|
}
|
73
78
|
|
74
|
-
|
75
|
-
|
76
|
-
errors = rb_ary_new();
|
77
|
-
xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
|
79
|
+
rb_errors = rb_ary_new();
|
80
|
+
xmlSetStructuredErrorFunc((void *)rb_errors, Nokogiri_error_array_pusher);
|
78
81
|
|
79
82
|
#ifdef HAVE_XMLRELAXNGSETPARSERSTRUCTUREDERRORS
|
80
83
|
xmlRelaxNGSetParserStructuredErrors(
|
81
|
-
|
84
|
+
c_parser_context,
|
82
85
|
Nokogiri_error_array_pusher,
|
83
|
-
(void *)
|
86
|
+
(void *)rb_errors
|
84
87
|
);
|
85
88
|
#endif
|
86
89
|
|
87
|
-
|
90
|
+
c_schema = xmlRelaxNGParse(c_parser_context);
|
88
91
|
|
89
92
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
90
|
-
xmlRelaxNGFreeParserCtxt(
|
93
|
+
xmlRelaxNGFreeParserCtxt(c_parser_context);
|
91
94
|
|
92
|
-
if (NULL ==
|
93
|
-
|
95
|
+
if (NULL == c_schema) {
|
96
|
+
xmlErrorConstPtr error = xmlGetLastError();
|
94
97
|
if (error) {
|
95
98
|
Nokogiri_error_raise(NULL, error);
|
96
99
|
} else {
|
@@ -100,74 +103,58 @@ read_memory(int argc, VALUE *argv, VALUE klass)
|
|
100
103
|
return Qnil;
|
101
104
|
}
|
102
105
|
|
103
|
-
rb_schema =
|
104
|
-
rb_iv_set(rb_schema, "@errors",
|
105
|
-
rb_iv_set(rb_schema, "@parse_options",
|
106
|
+
rb_schema = TypedData_Wrap_Struct(klass, &xml_relax_ng_type, c_schema);
|
107
|
+
rb_iv_set(rb_schema, "@errors", rb_errors);
|
108
|
+
rb_iv_set(rb_schema, "@parse_options", rb_parse_options);
|
106
109
|
|
107
110
|
return rb_schema;
|
108
111
|
}
|
109
112
|
|
110
113
|
/*
|
111
114
|
* call-seq:
|
112
|
-
*
|
115
|
+
* read_memory(string)
|
113
116
|
*
|
114
|
-
* Create a new RelaxNG
|
117
|
+
* Create a new RelaxNG from the contents of +string+
|
115
118
|
*/
|
116
119
|
static VALUE
|
117
|
-
|
120
|
+
read_memory(int argc, VALUE *argv, VALUE klass)
|
118
121
|
{
|
119
|
-
VALUE
|
120
|
-
VALUE
|
121
|
-
|
122
|
-
xmlRelaxNGParserCtxtPtr ctx;
|
123
|
-
xmlRelaxNGPtr schema;
|
124
|
-
VALUE errors;
|
125
|
-
VALUE rb_schema;
|
126
|
-
int scanned_args = 0;
|
127
|
-
|
128
|
-
scanned_args = rb_scan_args(argc, argv, "11", &document, &parse_options);
|
129
|
-
|
130
|
-
Data_Get_Struct(document, xmlDoc, doc);
|
131
|
-
doc = doc->doc; /* In case someone passes us a node. ugh. */
|
132
|
-
|
133
|
-
if (scanned_args == 1) {
|
134
|
-
parse_options = rb_const_get_at(rb_const_get_at(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
|
135
|
-
}
|
122
|
+
VALUE rb_content;
|
123
|
+
VALUE rb_parse_options;
|
124
|
+
xmlRelaxNGParserCtxtPtr c_parser_context;
|
136
125
|
|
137
|
-
|
126
|
+
rb_scan_args(argc, argv, "11", &rb_content, &rb_parse_options);
|
138
127
|
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
xmlRelaxNGSetParserStructuredErrors(
|
144
|
-
ctx,
|
145
|
-
Nokogiri_error_array_pusher,
|
146
|
-
(void *)errors
|
147
|
-
);
|
148
|
-
#endif
|
128
|
+
c_parser_context = xmlRelaxNGNewMemParserCtxt(
|
129
|
+
(const char *)StringValuePtr(rb_content),
|
130
|
+
(int)RSTRING_LEN(rb_content)
|
131
|
+
);
|
149
132
|
|
150
|
-
|
133
|
+
return xml_relax_ng_parse_schema(klass, c_parser_context, rb_parse_options);
|
134
|
+
}
|
151
135
|
|
152
|
-
|
153
|
-
|
136
|
+
/*
|
137
|
+
* call-seq:
|
138
|
+
* from_document(doc)
|
139
|
+
*
|
140
|
+
* Create a new RelaxNG schema from the Nokogiri::XML::Document +doc+
|
141
|
+
*/
|
142
|
+
static VALUE
|
143
|
+
from_document(int argc, VALUE *argv, VALUE klass)
|
144
|
+
{
|
145
|
+
VALUE rb_document;
|
146
|
+
VALUE rb_parse_options;
|
147
|
+
xmlDocPtr c_document;
|
148
|
+
xmlRelaxNGParserCtxtPtr c_parser_context;
|
154
149
|
|
155
|
-
|
156
|
-
xmlErrorPtr error = xmlGetLastError();
|
157
|
-
if (error) {
|
158
|
-
Nokogiri_error_raise(NULL, error);
|
159
|
-
} else {
|
160
|
-
rb_raise(rb_eRuntimeError, "Could not parse document");
|
161
|
-
}
|
150
|
+
rb_scan_args(argc, argv, "11", &rb_document, &rb_parse_options);
|
162
151
|
|
163
|
-
|
164
|
-
|
152
|
+
c_document = noko_xml_document_unwrap(rb_document);
|
153
|
+
c_document = c_document->doc; /* In case someone passes us a node. ugh. */
|
165
154
|
|
166
|
-
|
167
|
-
rb_iv_set(rb_schema, "@errors", errors);
|
168
|
-
rb_iv_set(rb_schema, "@parse_options", parse_options);
|
155
|
+
c_parser_context = xmlRelaxNGNewDocParserCtxt(c_document);
|
169
156
|
|
170
|
-
return
|
157
|
+
return xml_relax_ng_parse_schema(klass, c_parser_context, rb_parse_options);
|
171
158
|
}
|
172
159
|
|
173
160
|
void
|
@@ -265,16 +265,27 @@ processing_instruction(void *ctx, const xmlChar *name, const xmlChar *content)
|
|
265
265
|
);
|
266
266
|
}
|
267
267
|
|
268
|
-
static
|
269
|
-
|
268
|
+
static size_t
|
269
|
+
memsize(const void *data)
|
270
270
|
{
|
271
|
-
|
271
|
+
return sizeof(xmlSAXHandler);
|
272
272
|
}
|
273
273
|
|
274
|
+
/* Used by Nokogiri::XML::SAX::Parser and Nokogiri::HTML::SAX::Parser */
|
275
|
+
static const rb_data_type_t noko_sax_handler_type = {
|
276
|
+
.wrap_struct_name = "Nokogiri::SAXHandler",
|
277
|
+
.function = {
|
278
|
+
.dfree = RUBY_TYPED_DEFAULT_FREE,
|
279
|
+
.dsize = memsize
|
280
|
+
},
|
281
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
|
282
|
+
};
|
283
|
+
|
274
284
|
static VALUE
|
275
285
|
allocate(VALUE klass)
|
276
286
|
{
|
277
|
-
xmlSAXHandlerPtr handler
|
287
|
+
xmlSAXHandlerPtr handler;
|
288
|
+
VALUE self = TypedData_Make_Struct(klass, xmlSAXHandler, &noko_sax_handler_type, handler);
|
278
289
|
|
279
290
|
handler->startDocument = start_document;
|
280
291
|
handler->endDocument = end_document;
|
@@ -290,7 +301,15 @@ allocate(VALUE klass)
|
|
290
301
|
handler->processingInstruction = processing_instruction;
|
291
302
|
handler->initialized = XML_SAX2_MAGIC;
|
292
303
|
|
293
|
-
return
|
304
|
+
return self;
|
305
|
+
}
|
306
|
+
|
307
|
+
xmlSAXHandlerPtr
|
308
|
+
noko_sax_handler_unwrap(VALUE rb_sax_handler)
|
309
|
+
{
|
310
|
+
xmlSAXHandlerPtr c_sax_handler;
|
311
|
+
TypedData_Get_Struct(rb_sax_handler, xmlSAXHandler, &noko_sax_handler_type, c_sax_handler);
|
312
|
+
return c_sax_handler;
|
294
313
|
}
|
295
314
|
|
296
315
|
void
|