nokogiri 1.14.5 → 1.15.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +9 -8
- data/dependencies.yml +6 -6
- data/ext/nokogiri/extconf.rb +66 -22
- data/ext/nokogiri/html4_document.c +1 -2
- data/ext/nokogiri/html4_element_description.c +19 -14
- data/ext/nokogiri/html4_sax_parser_context.c +10 -16
- data/ext/nokogiri/html4_sax_push_parser.c +2 -2
- data/ext/nokogiri/nokogiri.c +46 -24
- data/ext/nokogiri/nokogiri.h +13 -2
- data/ext/nokogiri/xml_attr.c +1 -1
- data/ext/nokogiri/xml_cdata.c +10 -2
- data/ext/nokogiri/xml_comment.c +1 -1
- data/ext/nokogiri/xml_document.c +102 -22
- data/ext/nokogiri/xml_document_fragment.c +1 -1
- data/ext/nokogiri/xml_dtd.c +1 -1
- data/ext/nokogiri/xml_element_content.c +32 -29
- data/ext/nokogiri/xml_element_decl.c +5 -5
- data/ext/nokogiri/xml_encoding_handler.c +12 -4
- data/ext/nokogiri/xml_entity_reference.c +1 -1
- data/ext/nokogiri/xml_namespace.c +11 -12
- data/ext/nokogiri/xml_node.c +7 -7
- data/ext/nokogiri/xml_node_set.c +125 -105
- data/ext/nokogiri/xml_processing_instruction.c +1 -1
- data/ext/nokogiri/xml_reader.c +37 -28
- data/ext/nokogiri/xml_relax_ng.c +65 -78
- data/ext/nokogiri/xml_sax_parser.c +24 -5
- data/ext/nokogiri/xml_sax_parser_context.c +46 -25
- data/ext/nokogiri/xml_sax_push_parser.c +29 -8
- data/ext/nokogiri/xml_schema.c +90 -116
- data/ext/nokogiri/xml_text.c +10 -2
- data/ext/nokogiri/xml_xpath_context.c +156 -83
- data/ext/nokogiri/xslt_stylesheet.c +103 -50
- data/gumbo-parser/src/error.c +8 -4
- data/gumbo-parser/src/foreign_attrs.c +13 -14
- data/gumbo-parser/src/foreign_attrs.gperf +1 -1
- data/gumbo-parser/src/parser.c +13 -0
- data/lib/nokogiri/css/xpath_visitor.rb +2 -2
- data/lib/nokogiri/extension.rb +1 -1
- data/lib/nokogiri/html4/document_fragment.rb +1 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +1821 -353
- data/lib/nokogiri/html5/document_fragment.rb +1 -1
- data/lib/nokogiri/html5/node.rb +5 -0
- data/lib/nokogiri/html5.rb +5 -2
- data/lib/nokogiri/jruby/nokogiri_jars.rb +3 -3
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/xml/attribute_decl.rb +4 -2
- data/lib/nokogiri/xml/document_fragment.rb +1 -1
- data/lib/nokogiri/xml/element_content.rb +10 -2
- data/lib/nokogiri/xml/element_decl.rb +4 -2
- data/lib/nokogiri/xml/entity_decl.rb +4 -2
- data/lib/nokogiri/xml/node/save_options.rb +8 -0
- data/lib/nokogiri/xml/node.rb +22 -13
- data/lib/nokogiri/xml/pp/node.rb +23 -12
- data/lib/nokogiri/xml/sax/document.rb +1 -1
- data/lib/nokogiri/xml/searchable.rb +18 -10
- data/lib/nokogiri/xslt.rb +73 -3
- data/lib/nokogiri.rb +12 -4
- data/lib/xsd/xmlparser/nokogiri.rb +1 -1
- data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
- data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
- data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
- data/ports/archives/libxml2-2.11.3.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.38.tar.xz +0 -0
- metadata +9 -7
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +0 -3037
- data/ports/archives/libxml2-2.10.4.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.37.tar.xz +0 -0
data/ext/nokogiri/xml_reader.c
CHANGED
@@ -3,11 +3,20 @@
|
|
3
3
|
VALUE cNokogiriXmlReader;
|
4
4
|
|
5
5
|
static void
|
6
|
-
|
6
|
+
xml_reader_deallocate(void *data)
|
7
7
|
{
|
8
|
+
xmlTextReaderPtr reader = data;
|
8
9
|
xmlFreeTextReader(reader);
|
9
10
|
}
|
10
11
|
|
12
|
+
static const rb_data_type_t xml_reader_type = {
|
13
|
+
.wrap_struct_name = "Nokogiri::XML::Reader",
|
14
|
+
.function = {
|
15
|
+
.dfree = xml_reader_deallocate,
|
16
|
+
},
|
17
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
18
|
+
};
|
19
|
+
|
11
20
|
static int
|
12
21
|
has_attributes(xmlTextReaderPtr reader)
|
13
22
|
{
|
@@ -69,7 +78,7 @@ default_eh(VALUE self)
|
|
69
78
|
xmlTextReaderPtr reader;
|
70
79
|
int eh;
|
71
80
|
|
72
|
-
|
81
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
73
82
|
eh = xmlTextReaderIsDefault(reader);
|
74
83
|
if (eh == 0) { return Qfalse; }
|
75
84
|
if (eh == 1) { return Qtrue; }
|
@@ -89,7 +98,7 @@ value_eh(VALUE self)
|
|
89
98
|
xmlTextReaderPtr reader;
|
90
99
|
int eh;
|
91
100
|
|
92
|
-
|
101
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
93
102
|
eh = xmlTextReaderHasValue(reader);
|
94
103
|
if (eh == 0) { return Qfalse; }
|
95
104
|
if (eh == 1) { return Qtrue; }
|
@@ -109,7 +118,7 @@ attributes_eh(VALUE self)
|
|
109
118
|
xmlTextReaderPtr reader;
|
110
119
|
int eh;
|
111
120
|
|
112
|
-
|
121
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
113
122
|
eh = has_attributes(reader);
|
114
123
|
if (eh == 0) { return Qfalse; }
|
115
124
|
if (eh == 1) { return Qtrue; }
|
@@ -131,7 +140,7 @@ rb_xml_reader_namespaces(VALUE rb_reader)
|
|
131
140
|
xmlNodePtr c_node;
|
132
141
|
VALUE rb_errors;
|
133
142
|
|
134
|
-
|
143
|
+
TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
|
135
144
|
|
136
145
|
if (! has_attributes(c_reader)) {
|
137
146
|
return rb_namespaces ;
|
@@ -178,7 +187,7 @@ rb_xml_reader_attribute_nodes(VALUE rb_reader)
|
|
178
187
|
// After removal, we can also remove all the "node_has_a_document" special handling from xml_node.c
|
179
188
|
NOKO_WARN_DEPRECATION("Reader#attribute_nodes is deprecated and will be removed in a future version of Nokogiri. Please use Reader#attribute_hash instead.");
|
180
189
|
|
181
|
-
|
190
|
+
TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
|
182
191
|
|
183
192
|
if (! has_attributes(c_reader)) {
|
184
193
|
return rb_ary_new() ;
|
@@ -215,7 +224,7 @@ rb_xml_reader_attribute_hash(VALUE rb_reader)
|
|
215
224
|
xmlAttrPtr c_property;
|
216
225
|
VALUE rb_errors;
|
217
226
|
|
218
|
-
|
227
|
+
TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
|
219
228
|
|
220
229
|
if (!has_attributes(c_reader)) {
|
221
230
|
return rb_attributes;
|
@@ -268,7 +277,7 @@ attribute_at(VALUE self, VALUE index)
|
|
268
277
|
xmlChar *value;
|
269
278
|
VALUE rb_value;
|
270
279
|
|
271
|
-
|
280
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
272
281
|
|
273
282
|
if (NIL_P(index)) { return Qnil; }
|
274
283
|
index = rb_Integer(index);
|
@@ -297,7 +306,7 @@ reader_attribute(VALUE self, VALUE name)
|
|
297
306
|
xmlChar *value ;
|
298
307
|
VALUE rb_value;
|
299
308
|
|
300
|
-
|
309
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
301
310
|
|
302
311
|
if (NIL_P(name)) { return Qnil; }
|
303
312
|
name = StringValue(name) ;
|
@@ -322,7 +331,7 @@ attribute_count(VALUE self)
|
|
322
331
|
xmlTextReaderPtr reader;
|
323
332
|
int count;
|
324
333
|
|
325
|
-
|
334
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
326
335
|
count = xmlTextReaderAttributeCount(reader);
|
327
336
|
if (count == -1) { return Qnil; }
|
328
337
|
|
@@ -341,7 +350,7 @@ depth(VALUE self)
|
|
341
350
|
xmlTextReaderPtr reader;
|
342
351
|
int depth;
|
343
352
|
|
344
|
-
|
353
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
345
354
|
depth = xmlTextReaderDepth(reader);
|
346
355
|
if (depth == -1) { return Qnil; }
|
347
356
|
|
@@ -360,7 +369,7 @@ xml_version(VALUE self)
|
|
360
369
|
xmlTextReaderPtr reader;
|
361
370
|
const char *version;
|
362
371
|
|
363
|
-
|
372
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
364
373
|
version = (const char *)xmlTextReaderConstXmlVersion(reader);
|
365
374
|
if (version == NULL) { return Qnil; }
|
366
375
|
|
@@ -379,7 +388,7 @@ lang(VALUE self)
|
|
379
388
|
xmlTextReaderPtr reader;
|
380
389
|
const char *lang;
|
381
390
|
|
382
|
-
|
391
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
383
392
|
lang = (const char *)xmlTextReaderConstXmlLang(reader);
|
384
393
|
if (lang == NULL) { return Qnil; }
|
385
394
|
|
@@ -398,7 +407,7 @@ value(VALUE self)
|
|
398
407
|
xmlTextReaderPtr reader;
|
399
408
|
const char *value;
|
400
409
|
|
401
|
-
|
410
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
402
411
|
value = (const char *)xmlTextReaderConstValue(reader);
|
403
412
|
if (value == NULL) { return Qnil; }
|
404
413
|
|
@@ -417,7 +426,7 @@ prefix(VALUE self)
|
|
417
426
|
xmlTextReaderPtr reader;
|
418
427
|
const char *prefix;
|
419
428
|
|
420
|
-
|
429
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
421
430
|
prefix = (const char *)xmlTextReaderConstPrefix(reader);
|
422
431
|
if (prefix == NULL) { return Qnil; }
|
423
432
|
|
@@ -436,7 +445,7 @@ namespace_uri(VALUE self)
|
|
436
445
|
xmlTextReaderPtr reader;
|
437
446
|
const char *uri;
|
438
447
|
|
439
|
-
|
448
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
440
449
|
uri = (const char *)xmlTextReaderConstNamespaceUri(reader);
|
441
450
|
if (uri == NULL) { return Qnil; }
|
442
451
|
|
@@ -455,7 +464,7 @@ local_name(VALUE self)
|
|
455
464
|
xmlTextReaderPtr reader;
|
456
465
|
const char *name;
|
457
466
|
|
458
|
-
|
467
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
459
468
|
name = (const char *)xmlTextReaderConstLocalName(reader);
|
460
469
|
if (name == NULL) { return Qnil; }
|
461
470
|
|
@@ -474,7 +483,7 @@ name(VALUE self)
|
|
474
483
|
xmlTextReaderPtr reader;
|
475
484
|
const char *name;
|
476
485
|
|
477
|
-
|
486
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
478
487
|
name = (const char *)xmlTextReaderConstName(reader);
|
479
488
|
if (name == NULL) { return Qnil; }
|
480
489
|
|
@@ -494,7 +503,7 @@ rb_xml_reader_base_uri(VALUE rb_reader)
|
|
494
503
|
xmlTextReaderPtr c_reader;
|
495
504
|
xmlChar *c_base_uri;
|
496
505
|
|
497
|
-
|
506
|
+
TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
|
498
507
|
|
499
508
|
c_base_uri = xmlTextReaderBaseUri(c_reader);
|
500
509
|
if (c_base_uri == NULL) {
|
@@ -517,7 +526,7 @@ static VALUE
|
|
517
526
|
state(VALUE self)
|
518
527
|
{
|
519
528
|
xmlTextReaderPtr reader;
|
520
|
-
|
529
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
521
530
|
return INT2NUM(xmlTextReaderReadState(reader));
|
522
531
|
}
|
523
532
|
|
@@ -531,7 +540,7 @@ static VALUE
|
|
531
540
|
node_type(VALUE self)
|
532
541
|
{
|
533
542
|
xmlTextReaderPtr reader;
|
534
|
-
|
543
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
535
544
|
return INT2NUM(xmlTextReaderNodeType(reader));
|
536
545
|
}
|
537
546
|
|
@@ -549,7 +558,7 @@ read_more(VALUE self)
|
|
549
558
|
VALUE error_list;
|
550
559
|
int ret;
|
551
560
|
|
552
|
-
|
561
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
553
562
|
|
554
563
|
error_list = rb_funcall(self, rb_intern("errors"), 0);
|
555
564
|
|
@@ -584,7 +593,7 @@ inner_xml(VALUE self)
|
|
584
593
|
xmlChar *value;
|
585
594
|
VALUE str;
|
586
595
|
|
587
|
-
|
596
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
588
597
|
|
589
598
|
value = xmlTextReaderReadInnerXml(reader);
|
590
599
|
|
@@ -611,7 +620,7 @@ outer_xml(VALUE self)
|
|
611
620
|
xmlChar *value;
|
612
621
|
VALUE str = Qnil;
|
613
622
|
|
614
|
-
|
623
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
615
624
|
|
616
625
|
value = xmlTextReaderReadOuterXml(reader);
|
617
626
|
|
@@ -658,7 +667,7 @@ from_memory(int argc, VALUE *argv, VALUE klass)
|
|
658
667
|
rb_raise(rb_eRuntimeError, "couldn't create a parser");
|
659
668
|
}
|
660
669
|
|
661
|
-
rb_reader =
|
670
|
+
rb_reader = TypedData_Wrap_Struct(klass, &xml_reader_type, reader);
|
662
671
|
args[0] = rb_buffer;
|
663
672
|
args[1] = rb_url;
|
664
673
|
args[2] = encoding;
|
@@ -704,7 +713,7 @@ from_io(int argc, VALUE *argv, VALUE klass)
|
|
704
713
|
rb_raise(rb_eRuntimeError, "couldn't create a parser");
|
705
714
|
}
|
706
715
|
|
707
|
-
rb_reader =
|
716
|
+
rb_reader = TypedData_Wrap_Struct(klass, &xml_reader_type, reader);
|
708
717
|
args[0] = rb_io;
|
709
718
|
args[1] = rb_url;
|
710
719
|
args[2] = encoding;
|
@@ -724,7 +733,7 @@ empty_element_p(VALUE self)
|
|
724
733
|
{
|
725
734
|
xmlTextReaderPtr reader;
|
726
735
|
|
727
|
-
|
736
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
728
737
|
|
729
738
|
if (xmlTextReaderIsEmptyElement(reader)) {
|
730
739
|
return Qtrue;
|
@@ -745,7 +754,7 @@ rb_xml_reader_encoding(VALUE rb_reader)
|
|
745
754
|
return constructor_encoding;
|
746
755
|
}
|
747
756
|
|
748
|
-
|
757
|
+
TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
|
749
758
|
parser_encoding = (const char *)xmlTextReaderConstEncoding(c_reader);
|
750
759
|
if (parser_encoding == NULL) { return Qnil; }
|
751
760
|
return NOKOGIRI_STR_NEW2(parser_encoding);
|
data/ext/nokogiri/xml_relax_ng.c
CHANGED
@@ -3,11 +3,20 @@
|
|
3
3
|
VALUE cNokogiriXmlRelaxNG;
|
4
4
|
|
5
5
|
static void
|
6
|
-
|
6
|
+
xml_relax_ng_deallocate(void *data)
|
7
7
|
{
|
8
|
+
xmlRelaxNGPtr schema = data;
|
8
9
|
xmlRelaxNGFree(schema);
|
9
10
|
}
|
10
11
|
|
12
|
+
static const rb_data_type_t xml_relax_ng_type = {
|
13
|
+
.wrap_struct_name = "Nokogiri::XML::RelaxNG",
|
14
|
+
.function = {
|
15
|
+
.dfree = xml_relax_ng_deallocate,
|
16
|
+
},
|
17
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
18
|
+
};
|
19
|
+
|
11
20
|
/*
|
12
21
|
* call-seq:
|
13
22
|
* validate_document(document)
|
@@ -22,8 +31,8 @@ validate_document(VALUE self, VALUE document)
|
|
22
31
|
VALUE errors;
|
23
32
|
xmlRelaxNGValidCtxtPtr valid_ctxt;
|
24
33
|
|
25
|
-
|
26
|
-
|
34
|
+
TypedData_Get_Struct(self, xmlRelaxNG, &xml_relax_ng_type, schema);
|
35
|
+
doc = noko_xml_document_unwrap(document);
|
27
36
|
|
28
37
|
errors = rb_ary_new();
|
29
38
|
|
@@ -49,47 +58,41 @@ validate_document(VALUE self, VALUE document)
|
|
49
58
|
return errors;
|
50
59
|
}
|
51
60
|
|
52
|
-
/*
|
53
|
-
* call-seq:
|
54
|
-
* read_memory(string)
|
55
|
-
*
|
56
|
-
* Create a new RelaxNG from the contents of +string+
|
57
|
-
*/
|
58
61
|
static VALUE
|
59
|
-
|
62
|
+
xml_relax_ng_parse_schema(
|
63
|
+
VALUE klass,
|
64
|
+
xmlRelaxNGParserCtxtPtr c_parser_context,
|
65
|
+
VALUE rb_parse_options
|
66
|
+
)
|
60
67
|
{
|
61
|
-
VALUE
|
62
|
-
VALUE parse_options;
|
63
|
-
xmlRelaxNGParserCtxtPtr ctx;
|
64
|
-
xmlRelaxNGPtr schema;
|
65
|
-
VALUE errors;
|
68
|
+
VALUE rb_errors;
|
66
69
|
VALUE rb_schema;
|
67
|
-
|
70
|
+
xmlRelaxNGPtr c_schema;
|
68
71
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
+
if (NIL_P(rb_parse_options)) {
|
73
|
+
rb_parse_options = rb_const_get_at(
|
74
|
+
rb_const_get_at(mNokogiriXml, rb_intern("ParseOptions")),
|
75
|
+
rb_intern("DEFAULT_SCHEMA")
|
76
|
+
);
|
72
77
|
}
|
73
78
|
|
74
|
-
|
75
|
-
|
76
|
-
errors = rb_ary_new();
|
77
|
-
xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
|
79
|
+
rb_errors = rb_ary_new();
|
80
|
+
xmlSetStructuredErrorFunc((void *)rb_errors, Nokogiri_error_array_pusher);
|
78
81
|
|
79
82
|
#ifdef HAVE_XMLRELAXNGSETPARSERSTRUCTUREDERRORS
|
80
83
|
xmlRelaxNGSetParserStructuredErrors(
|
81
|
-
|
84
|
+
c_parser_context,
|
82
85
|
Nokogiri_error_array_pusher,
|
83
|
-
(void *)
|
86
|
+
(void *)rb_errors
|
84
87
|
);
|
85
88
|
#endif
|
86
89
|
|
87
|
-
|
90
|
+
c_schema = xmlRelaxNGParse(c_parser_context);
|
88
91
|
|
89
92
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
90
|
-
xmlRelaxNGFreeParserCtxt(
|
93
|
+
xmlRelaxNGFreeParserCtxt(c_parser_context);
|
91
94
|
|
92
|
-
if (NULL ==
|
95
|
+
if (NULL == c_schema) {
|
93
96
|
xmlErrorPtr error = xmlGetLastError();
|
94
97
|
if (error) {
|
95
98
|
Nokogiri_error_raise(NULL, error);
|
@@ -100,74 +103,58 @@ read_memory(int argc, VALUE *argv, VALUE klass)
|
|
100
103
|
return Qnil;
|
101
104
|
}
|
102
105
|
|
103
|
-
rb_schema =
|
104
|
-
rb_iv_set(rb_schema, "@errors",
|
105
|
-
rb_iv_set(rb_schema, "@parse_options",
|
106
|
+
rb_schema = TypedData_Wrap_Struct(klass, &xml_relax_ng_type, c_schema);
|
107
|
+
rb_iv_set(rb_schema, "@errors", rb_errors);
|
108
|
+
rb_iv_set(rb_schema, "@parse_options", rb_parse_options);
|
106
109
|
|
107
110
|
return rb_schema;
|
108
111
|
}
|
109
112
|
|
110
113
|
/*
|
111
114
|
* call-seq:
|
112
|
-
*
|
115
|
+
* read_memory(string)
|
113
116
|
*
|
114
|
-
* Create a new RelaxNG
|
117
|
+
* Create a new RelaxNG from the contents of +string+
|
115
118
|
*/
|
116
119
|
static VALUE
|
117
|
-
|
120
|
+
read_memory(int argc, VALUE *argv, VALUE klass)
|
118
121
|
{
|
119
|
-
VALUE
|
120
|
-
VALUE
|
121
|
-
|
122
|
-
xmlRelaxNGParserCtxtPtr ctx;
|
123
|
-
xmlRelaxNGPtr schema;
|
124
|
-
VALUE errors;
|
125
|
-
VALUE rb_schema;
|
126
|
-
int scanned_args = 0;
|
122
|
+
VALUE rb_content;
|
123
|
+
VALUE rb_parse_options;
|
124
|
+
xmlRelaxNGParserCtxtPtr c_parser_context;
|
127
125
|
|
128
|
-
|
126
|
+
rb_scan_args(argc, argv, "11", &rb_content, &rb_parse_options);
|
129
127
|
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
parse_options = rb_const_get_at(rb_const_get_at(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
|
135
|
-
}
|
136
|
-
|
137
|
-
ctx = xmlRelaxNGNewDocParserCtxt(doc);
|
138
|
-
|
139
|
-
errors = rb_ary_new();
|
140
|
-
xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
|
128
|
+
c_parser_context = xmlRelaxNGNewMemParserCtxt(
|
129
|
+
(const char *)StringValuePtr(rb_content),
|
130
|
+
(int)RSTRING_LEN(rb_content)
|
131
|
+
);
|
141
132
|
|
142
|
-
|
143
|
-
|
144
|
-
ctx,
|
145
|
-
Nokogiri_error_array_pusher,
|
146
|
-
(void *)errors
|
147
|
-
);
|
148
|
-
#endif
|
149
|
-
|
150
|
-
schema = xmlRelaxNGParse(ctx);
|
133
|
+
return xml_relax_ng_parse_schema(klass, c_parser_context, rb_parse_options);
|
134
|
+
}
|
151
135
|
|
152
|
-
|
153
|
-
|
136
|
+
/*
|
137
|
+
* call-seq:
|
138
|
+
* from_document(doc)
|
139
|
+
*
|
140
|
+
* Create a new RelaxNG schema from the Nokogiri::XML::Document +doc+
|
141
|
+
*/
|
142
|
+
static VALUE
|
143
|
+
from_document(int argc, VALUE *argv, VALUE klass)
|
144
|
+
{
|
145
|
+
VALUE rb_document;
|
146
|
+
VALUE rb_parse_options;
|
147
|
+
xmlDocPtr c_document;
|
148
|
+
xmlRelaxNGParserCtxtPtr c_parser_context;
|
154
149
|
|
155
|
-
|
156
|
-
xmlErrorPtr error = xmlGetLastError();
|
157
|
-
if (error) {
|
158
|
-
Nokogiri_error_raise(NULL, error);
|
159
|
-
} else {
|
160
|
-
rb_raise(rb_eRuntimeError, "Could not parse document");
|
161
|
-
}
|
150
|
+
rb_scan_args(argc, argv, "11", &rb_document, &rb_parse_options);
|
162
151
|
|
163
|
-
|
164
|
-
|
152
|
+
c_document = noko_xml_document_unwrap(rb_document);
|
153
|
+
c_document = c_document->doc; /* In case someone passes us a node. ugh. */
|
165
154
|
|
166
|
-
|
167
|
-
rb_iv_set(rb_schema, "@errors", errors);
|
168
|
-
rb_iv_set(rb_schema, "@parse_options", parse_options);
|
155
|
+
c_parser_context = xmlRelaxNGNewDocParserCtxt(c_document);
|
169
156
|
|
170
|
-
return
|
157
|
+
return xml_relax_ng_parse_schema(klass, c_parser_context, rb_parse_options);
|
171
158
|
}
|
172
159
|
|
173
160
|
void
|
@@ -265,16 +265,27 @@ processing_instruction(void *ctx, const xmlChar *name, const xmlChar *content)
|
|
265
265
|
);
|
266
266
|
}
|
267
267
|
|
268
|
-
static
|
269
|
-
|
268
|
+
static size_t
|
269
|
+
memsize(const void *data)
|
270
270
|
{
|
271
|
-
|
271
|
+
return sizeof(xmlSAXHandler);
|
272
272
|
}
|
273
273
|
|
274
|
+
/* Used by Nokogiri::XML::SAX::Parser and Nokogiri::HTML::SAX::Parser */
|
275
|
+
static const rb_data_type_t noko_sax_handler_type = {
|
276
|
+
.wrap_struct_name = "Nokogiri::SAXHandler",
|
277
|
+
.function = {
|
278
|
+
.dfree = RUBY_TYPED_DEFAULT_FREE,
|
279
|
+
.dsize = memsize
|
280
|
+
},
|
281
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
|
282
|
+
};
|
283
|
+
|
274
284
|
static VALUE
|
275
285
|
allocate(VALUE klass)
|
276
286
|
{
|
277
|
-
xmlSAXHandlerPtr handler
|
287
|
+
xmlSAXHandlerPtr handler;
|
288
|
+
VALUE self = TypedData_Make_Struct(klass, xmlSAXHandler, &noko_sax_handler_type, handler);
|
278
289
|
|
279
290
|
handler->startDocument = start_document;
|
280
291
|
handler->endDocument = end_document;
|
@@ -290,7 +301,15 @@ allocate(VALUE klass)
|
|
290
301
|
handler->processingInstruction = processing_instruction;
|
291
302
|
handler->initialized = XML_SAX2_MAGIC;
|
292
303
|
|
293
|
-
return
|
304
|
+
return self;
|
305
|
+
}
|
306
|
+
|
307
|
+
xmlSAXHandlerPtr
|
308
|
+
noko_sax_handler_unwrap(VALUE rb_sax_handler)
|
309
|
+
{
|
310
|
+
xmlSAXHandlerPtr c_sax_handler;
|
311
|
+
TypedData_Get_Struct(rb_sax_handler, xmlSAXHandler, &noko_sax_handler_type, c_sax_handler);
|
312
|
+
return c_sax_handler;
|
294
313
|
}
|
295
314
|
|
296
315
|
void
|
@@ -5,12 +5,40 @@ VALUE cNokogiriXmlSaxParserContext ;
|
|
5
5
|
static ID id_read;
|
6
6
|
|
7
7
|
static void
|
8
|
-
|
8
|
+
xml_sax_parser_context_free(void *data)
|
9
9
|
{
|
10
|
+
xmlParserCtxtPtr ctxt = data;
|
10
11
|
ctxt->sax = NULL;
|
11
12
|
xmlFreeParserCtxt(ctxt);
|
12
13
|
}
|
13
14
|
|
15
|
+
/*
|
16
|
+
* note that htmlParserCtxtPtr == xmlParserCtxtPtr and xmlFreeParserCtxt() == htmlFreeParserCtxt()
|
17
|
+
* so we use this type for both XML::SAX::ParserContext and HTML::SAX::ParserContext
|
18
|
+
*/
|
19
|
+
static const rb_data_type_t xml_sax_parser_context_type = {
|
20
|
+
.wrap_struct_name = "Nokogiri::XML::SAX::ParserContext",
|
21
|
+
.function = {
|
22
|
+
.dfree = xml_sax_parser_context_free,
|
23
|
+
},
|
24
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
25
|
+
};
|
26
|
+
|
27
|
+
xmlParserCtxtPtr
|
28
|
+
noko_xml_sax_parser_context_unwrap(VALUE rb_context)
|
29
|
+
{
|
30
|
+
xmlParserCtxtPtr c_context;
|
31
|
+
TypedData_Get_Struct(rb_context, xmlParserCtxt, &xml_sax_parser_context_type, c_context);
|
32
|
+
return c_context;
|
33
|
+
}
|
34
|
+
|
35
|
+
VALUE
|
36
|
+
noko_xml_sax_parser_context_wrap(VALUE klass, xmlParserCtxtPtr c_context)
|
37
|
+
{
|
38
|
+
return TypedData_Wrap_Struct(klass, &xml_sax_parser_context_type, c_context);
|
39
|
+
}
|
40
|
+
|
41
|
+
|
14
42
|
/*
|
15
43
|
* call-seq:
|
16
44
|
* parse_io(io, encoding)
|
@@ -36,7 +64,7 @@ parse_io(VALUE klass, VALUE io, VALUE encoding)
|
|
36
64
|
ctxt->sax = NULL;
|
37
65
|
}
|
38
66
|
|
39
|
-
return
|
67
|
+
return noko_xml_sax_parser_context_wrap(klass, ctxt);
|
40
68
|
}
|
41
69
|
|
42
70
|
/*
|
@@ -49,7 +77,13 @@ static VALUE
|
|
49
77
|
parse_file(VALUE klass, VALUE filename)
|
50
78
|
{
|
51
79
|
xmlParserCtxtPtr ctxt = xmlCreateFileParserCtxt(StringValueCStr(filename));
|
52
|
-
|
80
|
+
|
81
|
+
if (ctxt->sax) {
|
82
|
+
xmlFree(ctxt->sax);
|
83
|
+
ctxt->sax = NULL;
|
84
|
+
}
|
85
|
+
|
86
|
+
return noko_xml_sax_parser_context_wrap(klass, ctxt);
|
53
87
|
}
|
54
88
|
|
55
89
|
/*
|
@@ -76,7 +110,7 @@ parse_memory(VALUE klass, VALUE data)
|
|
76
110
|
ctxt->sax = NULL;
|
77
111
|
}
|
78
112
|
|
79
|
-
return
|
113
|
+
return noko_xml_sax_parser_context_wrap(klass, ctxt);
|
80
114
|
}
|
81
115
|
|
82
116
|
static VALUE
|
@@ -116,13 +150,8 @@ parse_with(VALUE self, VALUE sax_handler)
|
|
116
150
|
rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
|
117
151
|
}
|
118
152
|
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
/* Free the sax handler since we'll assign our own */
|
123
|
-
if (ctxt->sax && ctxt->sax != (xmlSAXHandlerPtr)&xmlDefaultSAXHandler) {
|
124
|
-
xmlFree(ctxt->sax);
|
125
|
-
}
|
153
|
+
ctxt = noko_xml_sax_parser_context_unwrap(self);
|
154
|
+
sax = noko_sax_handler_unwrap(sax_handler);
|
126
155
|
|
127
156
|
ctxt->sax = sax;
|
128
157
|
ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
|
@@ -144,8 +173,7 @@ parse_with(VALUE self, VALUE sax_handler)
|
|
144
173
|
static VALUE
|
145
174
|
set_replace_entities(VALUE self, VALUE value)
|
146
175
|
{
|
147
|
-
xmlParserCtxtPtr ctxt;
|
148
|
-
Data_Get_Struct(self, xmlParserCtxt, ctxt);
|
176
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
|
149
177
|
|
150
178
|
if (Qfalse == value) {
|
151
179
|
ctxt->replaceEntities = 0;
|
@@ -166,8 +194,7 @@ set_replace_entities(VALUE self, VALUE value)
|
|
166
194
|
static VALUE
|
167
195
|
get_replace_entities(VALUE self)
|
168
196
|
{
|
169
|
-
xmlParserCtxtPtr ctxt;
|
170
|
-
Data_Get_Struct(self, xmlParserCtxt, ctxt);
|
197
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
|
171
198
|
|
172
199
|
if (0 == ctxt->replaceEntities) {
|
173
200
|
return Qfalse;
|
@@ -184,10 +211,8 @@ get_replace_entities(VALUE self)
|
|
184
211
|
static VALUE
|
185
212
|
line(VALUE self)
|
186
213
|
{
|
187
|
-
xmlParserCtxtPtr ctxt;
|
188
214
|
xmlParserInputPtr io;
|
189
|
-
|
190
|
-
Data_Get_Struct(self, xmlParserCtxt, ctxt);
|
215
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
|
191
216
|
|
192
217
|
io = ctxt->input;
|
193
218
|
if (io) {
|
@@ -205,11 +230,9 @@ line(VALUE self)
|
|
205
230
|
static VALUE
|
206
231
|
column(VALUE self)
|
207
232
|
{
|
208
|
-
xmlParserCtxtPtr ctxt;
|
233
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
|
209
234
|
xmlParserInputPtr io;
|
210
235
|
|
211
|
-
Data_Get_Struct(self, xmlParserCtxt, ctxt);
|
212
|
-
|
213
236
|
io = ctxt->input;
|
214
237
|
if (io) {
|
215
238
|
return INT2NUM(io->col);
|
@@ -228,8 +251,7 @@ column(VALUE self)
|
|
228
251
|
static VALUE
|
229
252
|
set_recovery(VALUE self, VALUE value)
|
230
253
|
{
|
231
|
-
xmlParserCtxtPtr ctxt;
|
232
|
-
Data_Get_Struct(self, xmlParserCtxt, ctxt);
|
254
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
|
233
255
|
|
234
256
|
if (value == Qfalse) {
|
235
257
|
ctxt->recovery = 0;
|
@@ -250,8 +272,7 @@ set_recovery(VALUE self, VALUE value)
|
|
250
272
|
static VALUE
|
251
273
|
get_recovery(VALUE self)
|
252
274
|
{
|
253
|
-
xmlParserCtxtPtr ctxt;
|
254
|
-
Data_Get_Struct(self, xmlParserCtxt, ctxt);
|
275
|
+
xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
|
255
276
|
|
256
277
|
if (ctxt->recovery == 0) {
|
257
278
|
return Qfalse;
|