nokogiri 1.14.2 → 1.16.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +19 -15
- data/README.md +4 -1
- data/dependencies.yml +9 -8
- data/ext/nokogiri/extconf.rb +69 -26
- data/ext/nokogiri/html4_document.c +1 -2
- data/ext/nokogiri/html4_element_description.c +19 -14
- data/ext/nokogiri/html4_sax_parser_context.c +10 -16
- data/ext/nokogiri/html4_sax_push_parser.c +3 -3
- data/ext/nokogiri/nokogiri.c +46 -24
- data/ext/nokogiri/nokogiri.h +23 -5
- data/ext/nokogiri/test_global_handlers.c +1 -1
- data/ext/nokogiri/xml_attr.c +1 -1
- data/ext/nokogiri/xml_cdata.c +30 -17
- data/ext/nokogiri/xml_comment.c +1 -1
- data/ext/nokogiri/xml_document.c +113 -25
- data/ext/nokogiri/xml_document_fragment.c +1 -1
- data/ext/nokogiri/xml_dtd.c +1 -1
- data/ext/nokogiri/xml_element_content.c +32 -29
- data/ext/nokogiri/xml_element_decl.c +5 -5
- data/ext/nokogiri/xml_encoding_handler.c +12 -4
- data/ext/nokogiri/xml_entity_reference.c +1 -1
- data/ext/nokogiri/xml_namespace.c +11 -16
- data/ext/nokogiri/xml_node.c +13 -16
- data/ext/nokogiri/xml_node_set.c +125 -105
- data/ext/nokogiri/xml_processing_instruction.c +1 -1
- data/ext/nokogiri/xml_reader.c +61 -74
- data/ext/nokogiri/xml_relax_ng.c +66 -79
- data/ext/nokogiri/xml_sax_parser.c +24 -5
- data/ext/nokogiri/xml_sax_parser_context.c +50 -25
- data/ext/nokogiri/xml_sax_push_parser.c +30 -9
- data/ext/nokogiri/xml_schema.c +94 -115
- data/ext/nokogiri/xml_syntax_error.c +3 -3
- data/ext/nokogiri/xml_text.c +26 -13
- data/ext/nokogiri/xml_xpath_context.c +153 -83
- data/ext/nokogiri/xslt_stylesheet.c +111 -53
- data/gumbo-parser/Makefile +18 -0
- data/gumbo-parser/src/error.c +8 -4
- data/gumbo-parser/src/foreign_attrs.c +13 -14
- data/gumbo-parser/src/foreign_attrs.gperf +1 -1
- data/gumbo-parser/src/parser.c +21 -5
- data/gumbo-parser/src/tokenizer.c +1 -0
- data/lib/nokogiri/css/parser_extras.rb +1 -1
- data/lib/nokogiri/css/xpath_visitor.rb +3 -23
- data/lib/nokogiri/extension.rb +1 -1
- data/lib/nokogiri/html4/document.rb +1 -1
- data/lib/nokogiri/html4/document_fragment.rb +1 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +1821 -353
- data/lib/nokogiri/html4/encoding_reader.rb +1 -1
- data/lib/nokogiri/html5/document_fragment.rb +1 -1
- data/lib/nokogiri/html5/node.rb +5 -0
- data/lib/nokogiri/html5.rb +0 -63
- data/lib/nokogiri/jruby/nokogiri_jars.rb +9 -9
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +6 -5
- data/lib/nokogiri/xml/attr.rb +2 -2
- data/lib/nokogiri/xml/attribute_decl.rb +4 -2
- data/lib/nokogiri/xml/document.rb +4 -5
- data/lib/nokogiri/xml/document_fragment.rb +3 -3
- data/lib/nokogiri/xml/element_content.rb +10 -2
- data/lib/nokogiri/xml/element_decl.rb +4 -2
- data/lib/nokogiri/xml/entity_decl.rb +4 -2
- data/lib/nokogiri/xml/namespace.rb +1 -2
- data/lib/nokogiri/xml/node/save_options.rb +8 -0
- data/lib/nokogiri/xml/node.rb +53 -37
- data/lib/nokogiri/xml/node_set.rb +3 -3
- data/lib/nokogiri/xml/pp/node.rb +23 -12
- data/lib/nokogiri/xml/reader.rb +10 -9
- data/lib/nokogiri/xml/sax/document.rb +1 -1
- data/lib/nokogiri/xml/searchable.rb +21 -13
- data/lib/nokogiri/xml/syntax_error.rb +1 -1
- data/lib/nokogiri/xml.rb +1 -1
- data/lib/nokogiri/xslt/stylesheet.rb +29 -7
- data/lib/nokogiri/xslt.rb +74 -4
- data/lib/nokogiri.rb +13 -5
- data/lib/xsd/xmlparser/nokogiri.rb +1 -1
- data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
- data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
- data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
- data/ports/archives/libxml2-2.12.8.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
- metadata +16 -12
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +0 -3037
- data/ports/archives/libxml2-2.10.3.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.37.tar.xz +0 -0
data/ext/nokogiri/xml_reader.c
CHANGED
@@ -3,11 +3,26 @@
|
|
3
3
|
VALUE cNokogiriXmlReader;
|
4
4
|
|
5
5
|
static void
|
6
|
-
|
6
|
+
xml_reader_deallocate(void *data)
|
7
7
|
{
|
8
|
+
// free the document separately because we _may_ have triggered preservation by calling
|
9
|
+
// xmlTextReaderCurrentDoc during a read_more.
|
10
|
+
xmlTextReaderPtr reader = data;
|
11
|
+
xmlDocPtr doc = xmlTextReaderCurrentDoc(reader);
|
8
12
|
xmlFreeTextReader(reader);
|
13
|
+
if (doc) {
|
14
|
+
xmlFreeDoc(doc);
|
15
|
+
}
|
9
16
|
}
|
10
17
|
|
18
|
+
static const rb_data_type_t xml_reader_type = {
|
19
|
+
.wrap_struct_name = "Nokogiri::XML::Reader",
|
20
|
+
.function = {
|
21
|
+
.dfree = xml_reader_deallocate,
|
22
|
+
},
|
23
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
24
|
+
};
|
25
|
+
|
11
26
|
static int
|
12
27
|
has_attributes(xmlTextReaderPtr reader)
|
13
28
|
{
|
@@ -69,7 +84,7 @@ default_eh(VALUE self)
|
|
69
84
|
xmlTextReaderPtr reader;
|
70
85
|
int eh;
|
71
86
|
|
72
|
-
|
87
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
73
88
|
eh = xmlTextReaderIsDefault(reader);
|
74
89
|
if (eh == 0) { return Qfalse; }
|
75
90
|
if (eh == 1) { return Qtrue; }
|
@@ -89,7 +104,7 @@ value_eh(VALUE self)
|
|
89
104
|
xmlTextReaderPtr reader;
|
90
105
|
int eh;
|
91
106
|
|
92
|
-
|
107
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
93
108
|
eh = xmlTextReaderHasValue(reader);
|
94
109
|
if (eh == 0) { return Qfalse; }
|
95
110
|
if (eh == 1) { return Qtrue; }
|
@@ -109,7 +124,7 @@ attributes_eh(VALUE self)
|
|
109
124
|
xmlTextReaderPtr reader;
|
110
125
|
int eh;
|
111
126
|
|
112
|
-
|
127
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
113
128
|
eh = has_attributes(reader);
|
114
129
|
if (eh == 0) { return Qfalse; }
|
115
130
|
if (eh == 1) { return Qtrue; }
|
@@ -131,7 +146,7 @@ rb_xml_reader_namespaces(VALUE rb_reader)
|
|
131
146
|
xmlNodePtr c_node;
|
132
147
|
VALUE rb_errors;
|
133
148
|
|
134
|
-
|
149
|
+
TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
|
135
150
|
|
136
151
|
if (! has_attributes(c_reader)) {
|
137
152
|
return rb_namespaces ;
|
@@ -157,48 +172,6 @@ rb_xml_reader_namespaces(VALUE rb_reader)
|
|
157
172
|
return rb_namespaces ;
|
158
173
|
}
|
159
174
|
|
160
|
-
/*
|
161
|
-
:call-seq: attribute_nodes() → Array<Nokogiri::XML::Attr>
|
162
|
-
|
163
|
-
Get the attributes of the current node as an Array of XML:Attr
|
164
|
-
|
165
|
-
⚠ This method is deprecated and unsafe to use. It will be removed in a future version of Nokogiri.
|
166
|
-
|
167
|
-
See related: #attribute_hash, #attributes
|
168
|
-
*/
|
169
|
-
static VALUE
|
170
|
-
rb_xml_reader_attribute_nodes(VALUE rb_reader)
|
171
|
-
{
|
172
|
-
xmlTextReaderPtr c_reader;
|
173
|
-
xmlNodePtr c_node;
|
174
|
-
VALUE attr_nodes;
|
175
|
-
int j;
|
176
|
-
|
177
|
-
// TODO: deprecated, remove in Nokogiri v1.15, see https://github.com/sparklemotion/nokogiri/issues/2598
|
178
|
-
// After removal, we can also remove all the "node_has_a_document" special handling from xml_node.c
|
179
|
-
NOKO_WARN_DEPRECATION("Reader#attribute_nodes is deprecated and will be removed in a future version of Nokogiri. Please use Reader#attribute_hash instead.");
|
180
|
-
|
181
|
-
Data_Get_Struct(rb_reader, xmlTextReader, c_reader);
|
182
|
-
|
183
|
-
if (! has_attributes(c_reader)) {
|
184
|
-
return rb_ary_new() ;
|
185
|
-
}
|
186
|
-
|
187
|
-
c_node = xmlTextReaderExpand(c_reader);
|
188
|
-
if (c_node == NULL) {
|
189
|
-
return Qnil;
|
190
|
-
}
|
191
|
-
|
192
|
-
attr_nodes = noko_xml_node_attrs(c_node);
|
193
|
-
|
194
|
-
/* ensure that the Reader won't be GCed as long as a node is referenced */
|
195
|
-
for (j = 0 ; j < RARRAY_LEN(attr_nodes) ; j++) {
|
196
|
-
rb_iv_set(rb_ary_entry(attr_nodes, j), "@reader", rb_reader);
|
197
|
-
}
|
198
|
-
|
199
|
-
return attr_nodes;
|
200
|
-
}
|
201
|
-
|
202
175
|
/*
|
203
176
|
:call-seq: attribute_hash() → Hash<String ⇒ String>
|
204
177
|
|
@@ -215,7 +188,7 @@ rb_xml_reader_attribute_hash(VALUE rb_reader)
|
|
215
188
|
xmlAttrPtr c_property;
|
216
189
|
VALUE rb_errors;
|
217
190
|
|
218
|
-
|
191
|
+
TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
|
219
192
|
|
220
193
|
if (!has_attributes(c_reader)) {
|
221
194
|
return rb_attributes;
|
@@ -268,7 +241,7 @@ attribute_at(VALUE self, VALUE index)
|
|
268
241
|
xmlChar *value;
|
269
242
|
VALUE rb_value;
|
270
243
|
|
271
|
-
|
244
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
272
245
|
|
273
246
|
if (NIL_P(index)) { return Qnil; }
|
274
247
|
index = rb_Integer(index);
|
@@ -297,7 +270,7 @@ reader_attribute(VALUE self, VALUE name)
|
|
297
270
|
xmlChar *value ;
|
298
271
|
VALUE rb_value;
|
299
272
|
|
300
|
-
|
273
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
301
274
|
|
302
275
|
if (NIL_P(name)) { return Qnil; }
|
303
276
|
name = StringValue(name) ;
|
@@ -322,7 +295,7 @@ attribute_count(VALUE self)
|
|
322
295
|
xmlTextReaderPtr reader;
|
323
296
|
int count;
|
324
297
|
|
325
|
-
|
298
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
326
299
|
count = xmlTextReaderAttributeCount(reader);
|
327
300
|
if (count == -1) { return Qnil; }
|
328
301
|
|
@@ -341,7 +314,7 @@ depth(VALUE self)
|
|
341
314
|
xmlTextReaderPtr reader;
|
342
315
|
int depth;
|
343
316
|
|
344
|
-
|
317
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
345
318
|
depth = xmlTextReaderDepth(reader);
|
346
319
|
if (depth == -1) { return Qnil; }
|
347
320
|
|
@@ -360,7 +333,7 @@ xml_version(VALUE self)
|
|
360
333
|
xmlTextReaderPtr reader;
|
361
334
|
const char *version;
|
362
335
|
|
363
|
-
|
336
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
364
337
|
version = (const char *)xmlTextReaderConstXmlVersion(reader);
|
365
338
|
if (version == NULL) { return Qnil; }
|
366
339
|
|
@@ -379,7 +352,7 @@ lang(VALUE self)
|
|
379
352
|
xmlTextReaderPtr reader;
|
380
353
|
const char *lang;
|
381
354
|
|
382
|
-
|
355
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
383
356
|
lang = (const char *)xmlTextReaderConstXmlLang(reader);
|
384
357
|
if (lang == NULL) { return Qnil; }
|
385
358
|
|
@@ -398,7 +371,7 @@ value(VALUE self)
|
|
398
371
|
xmlTextReaderPtr reader;
|
399
372
|
const char *value;
|
400
373
|
|
401
|
-
|
374
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
402
375
|
value = (const char *)xmlTextReaderConstValue(reader);
|
403
376
|
if (value == NULL) { return Qnil; }
|
404
377
|
|
@@ -417,7 +390,7 @@ prefix(VALUE self)
|
|
417
390
|
xmlTextReaderPtr reader;
|
418
391
|
const char *prefix;
|
419
392
|
|
420
|
-
|
393
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
421
394
|
prefix = (const char *)xmlTextReaderConstPrefix(reader);
|
422
395
|
if (prefix == NULL) { return Qnil; }
|
423
396
|
|
@@ -436,7 +409,7 @@ namespace_uri(VALUE self)
|
|
436
409
|
xmlTextReaderPtr reader;
|
437
410
|
const char *uri;
|
438
411
|
|
439
|
-
|
412
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
440
413
|
uri = (const char *)xmlTextReaderConstNamespaceUri(reader);
|
441
414
|
if (uri == NULL) { return Qnil; }
|
442
415
|
|
@@ -455,7 +428,7 @@ local_name(VALUE self)
|
|
455
428
|
xmlTextReaderPtr reader;
|
456
429
|
const char *name;
|
457
430
|
|
458
|
-
|
431
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
459
432
|
name = (const char *)xmlTextReaderConstLocalName(reader);
|
460
433
|
if (name == NULL) { return Qnil; }
|
461
434
|
|
@@ -474,7 +447,7 @@ name(VALUE self)
|
|
474
447
|
xmlTextReaderPtr reader;
|
475
448
|
const char *name;
|
476
449
|
|
477
|
-
|
450
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
478
451
|
name = (const char *)xmlTextReaderConstName(reader);
|
479
452
|
if (name == NULL) { return Qnil; }
|
480
453
|
|
@@ -494,7 +467,7 @@ rb_xml_reader_base_uri(VALUE rb_reader)
|
|
494
467
|
xmlTextReaderPtr c_reader;
|
495
468
|
xmlChar *c_base_uri;
|
496
469
|
|
497
|
-
|
470
|
+
TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
|
498
471
|
|
499
472
|
c_base_uri = xmlTextReaderBaseUri(c_reader);
|
500
473
|
if (c_base_uri == NULL) {
|
@@ -517,7 +490,7 @@ static VALUE
|
|
517
490
|
state(VALUE self)
|
518
491
|
{
|
519
492
|
xmlTextReaderPtr reader;
|
520
|
-
|
493
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
521
494
|
return INT2NUM(xmlTextReaderReadState(reader));
|
522
495
|
}
|
523
496
|
|
@@ -531,7 +504,7 @@ static VALUE
|
|
531
504
|
node_type(VALUE self)
|
532
505
|
{
|
533
506
|
xmlTextReaderPtr reader;
|
534
|
-
|
507
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
535
508
|
return INT2NUM(xmlTextReaderNodeType(reader));
|
536
509
|
}
|
537
510
|
|
@@ -545,11 +518,12 @@ static VALUE
|
|
545
518
|
read_more(VALUE self)
|
546
519
|
{
|
547
520
|
xmlTextReaderPtr reader;
|
548
|
-
|
521
|
+
xmlErrorConstPtr error;
|
549
522
|
VALUE error_list;
|
550
523
|
int ret;
|
524
|
+
xmlDocPtr c_document;
|
551
525
|
|
552
|
-
|
526
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
553
527
|
|
554
528
|
error_list = rb_funcall(self, rb_intern("errors"), 0);
|
555
529
|
|
@@ -557,6 +531,17 @@ read_more(VALUE self)
|
|
557
531
|
ret = xmlTextReaderRead(reader);
|
558
532
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
559
533
|
|
534
|
+
c_document = xmlTextReaderCurrentDoc(reader);
|
535
|
+
if (c_document && c_document->encoding == NULL) {
|
536
|
+
VALUE constructor_encoding = rb_iv_get(self, "@encoding");
|
537
|
+
if (RTEST(constructor_encoding)) {
|
538
|
+
c_document->encoding = xmlStrdup(BAD_CAST StringValueCStr(constructor_encoding));
|
539
|
+
} else {
|
540
|
+
rb_iv_set(self, "@encoding", NOKOGIRI_STR_NEW2("UTF-8"));
|
541
|
+
c_document->encoding = xmlStrdup(BAD_CAST "UTF-8");
|
542
|
+
}
|
543
|
+
}
|
544
|
+
|
560
545
|
if (ret == 1) { return self; }
|
561
546
|
if (ret == 0) { return Qnil; }
|
562
547
|
|
@@ -584,7 +569,7 @@ inner_xml(VALUE self)
|
|
584
569
|
xmlChar *value;
|
585
570
|
VALUE str;
|
586
571
|
|
587
|
-
|
572
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
588
573
|
|
589
574
|
value = xmlTextReaderReadInnerXml(reader);
|
590
575
|
|
@@ -611,7 +596,7 @@ outer_xml(VALUE self)
|
|
611
596
|
xmlChar *value;
|
612
597
|
VALUE str = Qnil;
|
613
598
|
|
614
|
-
|
599
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
615
600
|
|
616
601
|
value = xmlTextReaderReadOuterXml(reader);
|
617
602
|
|
@@ -658,7 +643,7 @@ from_memory(int argc, VALUE *argv, VALUE klass)
|
|
658
643
|
rb_raise(rb_eRuntimeError, "couldn't create a parser");
|
659
644
|
}
|
660
645
|
|
661
|
-
rb_reader =
|
646
|
+
rb_reader = TypedData_Wrap_Struct(klass, &xml_reader_type, reader);
|
662
647
|
args[0] = rb_buffer;
|
663
648
|
args[1] = rb_url;
|
664
649
|
args[2] = encoding;
|
@@ -704,7 +689,7 @@ from_io(int argc, VALUE *argv, VALUE klass)
|
|
704
689
|
rb_raise(rb_eRuntimeError, "couldn't create a parser");
|
705
690
|
}
|
706
691
|
|
707
|
-
rb_reader =
|
692
|
+
rb_reader = TypedData_Wrap_Struct(klass, &xml_reader_type, reader);
|
708
693
|
args[0] = rb_io;
|
709
694
|
args[1] = rb_url;
|
710
695
|
args[2] = encoding;
|
@@ -724,7 +709,7 @@ empty_element_p(VALUE self)
|
|
724
709
|
{
|
725
710
|
xmlTextReaderPtr reader;
|
726
711
|
|
727
|
-
|
712
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
728
713
|
|
729
714
|
if (xmlTextReaderIsEmptyElement(reader)) {
|
730
715
|
return Qtrue;
|
@@ -740,15 +725,18 @@ rb_xml_reader_encoding(VALUE rb_reader)
|
|
740
725
|
const char *parser_encoding;
|
741
726
|
VALUE constructor_encoding;
|
742
727
|
|
728
|
+
TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
|
729
|
+
parser_encoding = (const char *)xmlTextReaderConstEncoding(c_reader);
|
730
|
+
if (parser_encoding) {
|
731
|
+
return NOKOGIRI_STR_NEW2(parser_encoding);
|
732
|
+
}
|
733
|
+
|
743
734
|
constructor_encoding = rb_iv_get(rb_reader, "@encoding");
|
744
735
|
if (RTEST(constructor_encoding)) {
|
745
736
|
return constructor_encoding;
|
746
737
|
}
|
747
738
|
|
748
|
-
|
749
|
-
parser_encoding = (const char *)xmlTextReaderConstEncoding(c_reader);
|
750
|
-
if (parser_encoding == NULL) { return Qnil; }
|
751
|
-
return NOKOGIRI_STR_NEW2(parser_encoding);
|
739
|
+
return Qnil;
|
752
740
|
}
|
753
741
|
|
754
742
|
void
|
@@ -769,7 +757,6 @@ noko_init_xml_reader(void)
|
|
769
757
|
rb_define_method(cNokogiriXmlReader, "attribute", reader_attribute, 1);
|
770
758
|
rb_define_method(cNokogiriXmlReader, "attribute_at", attribute_at, 1);
|
771
759
|
rb_define_method(cNokogiriXmlReader, "attribute_count", attribute_count, 0);
|
772
|
-
rb_define_method(cNokogiriXmlReader, "attribute_nodes", rb_xml_reader_attribute_nodes, 0);
|
773
760
|
rb_define_method(cNokogiriXmlReader, "attribute_hash", rb_xml_reader_attribute_hash, 0);
|
774
761
|
rb_define_method(cNokogiriXmlReader, "attributes?", attributes_eh, 0);
|
775
762
|
rb_define_method(cNokogiriXmlReader, "base_uri", rb_xml_reader_base_uri, 0);
|
data/ext/nokogiri/xml_relax_ng.c
CHANGED
@@ -3,11 +3,20 @@
|
|
3
3
|
VALUE cNokogiriXmlRelaxNG;
|
4
4
|
|
5
5
|
static void
|
6
|
-
|
6
|
+
xml_relax_ng_deallocate(void *data)
|
7
7
|
{
|
8
|
+
xmlRelaxNGPtr schema = data;
|
8
9
|
xmlRelaxNGFree(schema);
|
9
10
|
}
|
10
11
|
|
12
|
+
static const rb_data_type_t xml_relax_ng_type = {
|
13
|
+
.wrap_struct_name = "Nokogiri::XML::RelaxNG",
|
14
|
+
.function = {
|
15
|
+
.dfree = xml_relax_ng_deallocate,
|
16
|
+
},
|
17
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
18
|
+
};
|
19
|
+
|
11
20
|
/*
|
12
21
|
* call-seq:
|
13
22
|
* validate_document(document)
|
@@ -22,8 +31,8 @@ validate_document(VALUE self, VALUE document)
|
|
22
31
|
VALUE errors;
|
23
32
|
xmlRelaxNGValidCtxtPtr valid_ctxt;
|
24
33
|
|
25
|
-
|
26
|
-
|
34
|
+
TypedData_Get_Struct(self, xmlRelaxNG, &xml_relax_ng_type, schema);
|
35
|
+
doc = noko_xml_document_unwrap(document);
|
27
36
|
|
28
37
|
errors = rb_ary_new();
|
29
38
|
|
@@ -49,48 +58,42 @@ validate_document(VALUE self, VALUE document)
|
|
49
58
|
return errors;
|
50
59
|
}
|
51
60
|
|
52
|
-
/*
|
53
|
-
* call-seq:
|
54
|
-
* read_memory(string)
|
55
|
-
*
|
56
|
-
* Create a new RelaxNG from the contents of +string+
|
57
|
-
*/
|
58
61
|
static VALUE
|
59
|
-
|
62
|
+
xml_relax_ng_parse_schema(
|
63
|
+
VALUE klass,
|
64
|
+
xmlRelaxNGParserCtxtPtr c_parser_context,
|
65
|
+
VALUE rb_parse_options
|
66
|
+
)
|
60
67
|
{
|
61
|
-
VALUE
|
62
|
-
VALUE parse_options;
|
63
|
-
xmlRelaxNGParserCtxtPtr ctx;
|
64
|
-
xmlRelaxNGPtr schema;
|
65
|
-
VALUE errors;
|
68
|
+
VALUE rb_errors;
|
66
69
|
VALUE rb_schema;
|
67
|
-
|
70
|
+
xmlRelaxNGPtr c_schema;
|
68
71
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
+
if (NIL_P(rb_parse_options)) {
|
73
|
+
rb_parse_options = rb_const_get_at(
|
74
|
+
rb_const_get_at(mNokogiriXml, rb_intern("ParseOptions")),
|
75
|
+
rb_intern("DEFAULT_SCHEMA")
|
76
|
+
);
|
72
77
|
}
|
73
78
|
|
74
|
-
|
75
|
-
|
76
|
-
errors = rb_ary_new();
|
77
|
-
xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
|
79
|
+
rb_errors = rb_ary_new();
|
80
|
+
xmlSetStructuredErrorFunc((void *)rb_errors, Nokogiri_error_array_pusher);
|
78
81
|
|
79
82
|
#ifdef HAVE_XMLRELAXNGSETPARSERSTRUCTUREDERRORS
|
80
83
|
xmlRelaxNGSetParserStructuredErrors(
|
81
|
-
|
84
|
+
c_parser_context,
|
82
85
|
Nokogiri_error_array_pusher,
|
83
|
-
(void *)
|
86
|
+
(void *)rb_errors
|
84
87
|
);
|
85
88
|
#endif
|
86
89
|
|
87
|
-
|
90
|
+
c_schema = xmlRelaxNGParse(c_parser_context);
|
88
91
|
|
89
92
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
90
|
-
xmlRelaxNGFreeParserCtxt(
|
93
|
+
xmlRelaxNGFreeParserCtxt(c_parser_context);
|
91
94
|
|
92
|
-
if (NULL ==
|
93
|
-
|
95
|
+
if (NULL == c_schema) {
|
96
|
+
xmlErrorConstPtr error = xmlGetLastError();
|
94
97
|
if (error) {
|
95
98
|
Nokogiri_error_raise(NULL, error);
|
96
99
|
} else {
|
@@ -100,74 +103,58 @@ read_memory(int argc, VALUE *argv, VALUE klass)
|
|
100
103
|
return Qnil;
|
101
104
|
}
|
102
105
|
|
103
|
-
rb_schema =
|
104
|
-
rb_iv_set(rb_schema, "@errors",
|
105
|
-
rb_iv_set(rb_schema, "@parse_options",
|
106
|
+
rb_schema = TypedData_Wrap_Struct(klass, &xml_relax_ng_type, c_schema);
|
107
|
+
rb_iv_set(rb_schema, "@errors", rb_errors);
|
108
|
+
rb_iv_set(rb_schema, "@parse_options", rb_parse_options);
|
106
109
|
|
107
110
|
return rb_schema;
|
108
111
|
}
|
109
112
|
|
110
113
|
/*
|
111
114
|
* call-seq:
|
112
|
-
*
|
115
|
+
* read_memory(string)
|
113
116
|
*
|
114
|
-
* Create a new RelaxNG
|
117
|
+
* Create a new RelaxNG from the contents of +string+
|
115
118
|
*/
|
116
119
|
static VALUE
|
117
|
-
|
120
|
+
read_memory(int argc, VALUE *argv, VALUE klass)
|
118
121
|
{
|
119
|
-
VALUE
|
120
|
-
VALUE
|
121
|
-
|
122
|
-
xmlRelaxNGParserCtxtPtr ctx;
|
123
|
-
xmlRelaxNGPtr schema;
|
124
|
-
VALUE errors;
|
125
|
-
VALUE rb_schema;
|
126
|
-
int scanned_args = 0;
|
127
|
-
|
128
|
-
scanned_args = rb_scan_args(argc, argv, "11", &document, &parse_options);
|
129
|
-
|
130
|
-
Data_Get_Struct(document, xmlDoc, doc);
|
131
|
-
doc = doc->doc; /* In case someone passes us a node. ugh. */
|
132
|
-
|
133
|
-
if (scanned_args == 1) {
|
134
|
-
parse_options = rb_const_get_at(rb_const_get_at(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
|
135
|
-
}
|
122
|
+
VALUE rb_content;
|
123
|
+
VALUE rb_parse_options;
|
124
|
+
xmlRelaxNGParserCtxtPtr c_parser_context;
|
136
125
|
|
137
|
-
|
126
|
+
rb_scan_args(argc, argv, "11", &rb_content, &rb_parse_options);
|
138
127
|
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
xmlRelaxNGSetParserStructuredErrors(
|
144
|
-
ctx,
|
145
|
-
Nokogiri_error_array_pusher,
|
146
|
-
(void *)errors
|
147
|
-
);
|
148
|
-
#endif
|
128
|
+
c_parser_context = xmlRelaxNGNewMemParserCtxt(
|
129
|
+
(const char *)StringValuePtr(rb_content),
|
130
|
+
(int)RSTRING_LEN(rb_content)
|
131
|
+
);
|
149
132
|
|
150
|
-
|
133
|
+
return xml_relax_ng_parse_schema(klass, c_parser_context, rb_parse_options);
|
134
|
+
}
|
151
135
|
|
152
|
-
|
153
|
-
|
136
|
+
/*
|
137
|
+
* call-seq:
|
138
|
+
* from_document(doc)
|
139
|
+
*
|
140
|
+
* Create a new RelaxNG schema from the Nokogiri::XML::Document +doc+
|
141
|
+
*/
|
142
|
+
static VALUE
|
143
|
+
from_document(int argc, VALUE *argv, VALUE klass)
|
144
|
+
{
|
145
|
+
VALUE rb_document;
|
146
|
+
VALUE rb_parse_options;
|
147
|
+
xmlDocPtr c_document;
|
148
|
+
xmlRelaxNGParserCtxtPtr c_parser_context;
|
154
149
|
|
155
|
-
|
156
|
-
xmlErrorPtr error = xmlGetLastError();
|
157
|
-
if (error) {
|
158
|
-
Nokogiri_error_raise(NULL, error);
|
159
|
-
} else {
|
160
|
-
rb_raise(rb_eRuntimeError, "Could not parse document");
|
161
|
-
}
|
150
|
+
rb_scan_args(argc, argv, "11", &rb_document, &rb_parse_options);
|
162
151
|
|
163
|
-
|
164
|
-
|
152
|
+
c_document = noko_xml_document_unwrap(rb_document);
|
153
|
+
c_document = c_document->doc; /* In case someone passes us a node. ugh. */
|
165
154
|
|
166
|
-
|
167
|
-
rb_iv_set(rb_schema, "@errors", errors);
|
168
|
-
rb_iv_set(rb_schema, "@parse_options", parse_options);
|
155
|
+
c_parser_context = xmlRelaxNGNewDocParserCtxt(c_document);
|
169
156
|
|
170
|
-
return
|
157
|
+
return xml_relax_ng_parse_schema(klass, c_parser_context, rb_parse_options);
|
171
158
|
}
|
172
159
|
|
173
160
|
void
|
@@ -265,16 +265,27 @@ processing_instruction(void *ctx, const xmlChar *name, const xmlChar *content)
|
|
265
265
|
);
|
266
266
|
}
|
267
267
|
|
268
|
-
static
|
269
|
-
|
268
|
+
static size_t
|
269
|
+
memsize(const void *data)
|
270
270
|
{
|
271
|
-
|
271
|
+
return sizeof(xmlSAXHandler);
|
272
272
|
}
|
273
273
|
|
274
|
+
/* Used by Nokogiri::XML::SAX::Parser and Nokogiri::HTML::SAX::Parser */
|
275
|
+
static const rb_data_type_t noko_sax_handler_type = {
|
276
|
+
.wrap_struct_name = "Nokogiri::SAXHandler",
|
277
|
+
.function = {
|
278
|
+
.dfree = RUBY_TYPED_DEFAULT_FREE,
|
279
|
+
.dsize = memsize
|
280
|
+
},
|
281
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
|
282
|
+
};
|
283
|
+
|
274
284
|
static VALUE
|
275
285
|
allocate(VALUE klass)
|
276
286
|
{
|
277
|
-
xmlSAXHandlerPtr handler
|
287
|
+
xmlSAXHandlerPtr handler;
|
288
|
+
VALUE self = TypedData_Make_Struct(klass, xmlSAXHandler, &noko_sax_handler_type, handler);
|
278
289
|
|
279
290
|
handler->startDocument = start_document;
|
280
291
|
handler->endDocument = end_document;
|
@@ -290,7 +301,15 @@ allocate(VALUE klass)
|
|
290
301
|
handler->processingInstruction = processing_instruction;
|
291
302
|
handler->initialized = XML_SAX2_MAGIC;
|
292
303
|
|
293
|
-
return
|
304
|
+
return self;
|
305
|
+
}
|
306
|
+
|
307
|
+
xmlSAXHandlerPtr
|
308
|
+
noko_sax_handler_unwrap(VALUE rb_sax_handler)
|
309
|
+
{
|
310
|
+
xmlSAXHandlerPtr c_sax_handler;
|
311
|
+
TypedData_Get_Struct(rb_sax_handler, xmlSAXHandler, &noko_sax_handler_type, c_sax_handler);
|
312
|
+
return c_sax_handler;
|
294
313
|
}
|
295
314
|
|
296
315
|
void
|