nokogiri 1.13.8 → 1.15.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +40 -0
- data/LICENSE-DEPENDENCIES.md +830 -509
- data/LICENSE.md +1 -1
- data/README.md +18 -11
- data/dependencies.yml +33 -15
- data/ext/nokogiri/extconf.rb +164 -46
- data/ext/nokogiri/gumbo.c +20 -10
- data/ext/nokogiri/html4_document.c +3 -4
- data/ext/nokogiri/html4_element_description.c +20 -15
- data/ext/nokogiri/html4_entity_lookup.c +2 -2
- data/ext/nokogiri/html4_sax_parser_context.c +11 -22
- data/ext/nokogiri/html4_sax_push_parser.c +3 -3
- data/ext/nokogiri/nokogiri.c +84 -75
- data/ext/nokogiri/nokogiri.h +31 -16
- data/ext/nokogiri/test_global_handlers.c +1 -1
- data/ext/nokogiri/xml_attr.c +2 -2
- data/ext/nokogiri/xml_attribute_decl.c +2 -2
- data/ext/nokogiri/xml_cdata.c +32 -18
- data/ext/nokogiri/xml_comment.c +2 -2
- data/ext/nokogiri/xml_document.c +127 -34
- data/ext/nokogiri/xml_document_fragment.c +2 -2
- data/ext/nokogiri/xml_dtd.c +2 -2
- data/ext/nokogiri/xml_element_content.c +34 -31
- data/ext/nokogiri/xml_element_decl.c +7 -7
- data/ext/nokogiri/xml_encoding_handler.c +15 -7
- data/ext/nokogiri/xml_entity_decl.c +1 -1
- data/ext/nokogiri/xml_entity_reference.c +2 -2
- data/ext/nokogiri/xml_namespace.c +79 -14
- data/ext/nokogiri/xml_node.c +300 -34
- data/ext/nokogiri/xml_node_set.c +125 -107
- data/ext/nokogiri/xml_processing_instruction.c +2 -2
- data/ext/nokogiri/xml_reader.c +81 -48
- data/ext/nokogiri/xml_relax_ng.c +66 -81
- data/ext/nokogiri/xml_sax_parser.c +45 -20
- data/ext/nokogiri/xml_sax_parser_context.c +46 -30
- data/ext/nokogiri/xml_sax_push_parser.c +30 -11
- data/ext/nokogiri/xml_schema.c +95 -117
- data/ext/nokogiri/xml_syntax_error.c +1 -1
- data/ext/nokogiri/xml_text.c +28 -14
- data/ext/nokogiri/xml_xpath_context.c +216 -136
- data/ext/nokogiri/xslt_stylesheet.c +118 -64
- data/gumbo-parser/Makefile +10 -0
- data/gumbo-parser/src/attribute.h +1 -1
- data/gumbo-parser/src/error.c +10 -6
- data/gumbo-parser/src/error.h +1 -1
- data/gumbo-parser/src/foreign_attrs.c +15 -16
- data/gumbo-parser/src/foreign_attrs.gperf +1 -1
- data/gumbo-parser/src/{gumbo.h → nokogiri_gumbo.h} +1 -0
- data/gumbo-parser/src/parser.c +21 -5
- data/gumbo-parser/src/replacement.h +1 -1
- data/gumbo-parser/src/string_buffer.h +1 -1
- data/gumbo-parser/src/string_piece.c +1 -1
- data/gumbo-parser/src/svg_attrs.c +2 -2
- data/gumbo-parser/src/svg_tags.c +2 -2
- data/gumbo-parser/src/tag.c +2 -1
- data/gumbo-parser/src/tag_lookup.c +7 -7
- data/gumbo-parser/src/tag_lookup.gperf +1 -0
- data/gumbo-parser/src/tag_lookup.h +1 -1
- data/gumbo-parser/src/token_buffer.h +1 -1
- data/gumbo-parser/src/tokenizer.c +1 -1
- data/gumbo-parser/src/tokenizer.h +1 -1
- data/gumbo-parser/src/utf8.c +1 -1
- data/gumbo-parser/src/utf8.h +1 -1
- data/gumbo-parser/src/util.c +1 -3
- data/gumbo-parser/src/util.h +4 -0
- data/gumbo-parser/src/vector.h +1 -1
- data/lib/nokogiri/css/node.rb +2 -2
- data/lib/nokogiri/css/xpath_visitor.rb +7 -5
- data/lib/nokogiri/css.rb +6 -0
- data/lib/nokogiri/decorators/slop.rb +1 -1
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +4 -3
- data/lib/nokogiri/html4/document.rb +2 -121
- data/lib/nokogiri/html4/document_fragment.rb +1 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +1827 -365
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/html4.rb +1 -0
- data/lib/nokogiri/html5/document.rb +113 -36
- data/lib/nokogiri/html5/document_fragment.rb +10 -3
- data/lib/nokogiri/html5/node.rb +8 -5
- data/lib/nokogiri/html5.rb +130 -216
- data/lib/nokogiri/jruby/dependencies.rb +1 -19
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +11 -10
- data/lib/nokogiri/xml/attr.rb +49 -0
- data/lib/nokogiri/xml/attribute_decl.rb +4 -2
- data/lib/nokogiri/xml/builder.rb +1 -1
- data/lib/nokogiri/xml/document.rb +102 -55
- data/lib/nokogiri/xml/document_fragment.rb +50 -7
- data/lib/nokogiri/xml/element_content.rb +10 -2
- data/lib/nokogiri/xml/element_decl.rb +4 -2
- data/lib/nokogiri/xml/entity_decl.rb +4 -2
- data/lib/nokogiri/xml/namespace.rb +42 -0
- data/lib/nokogiri/xml/node/save_options.rb +14 -4
- data/lib/nokogiri/xml/node.rb +212 -48
- data/lib/nokogiri/xml/node_set.rb +88 -9
- data/lib/nokogiri/xml/parse_options.rb +129 -50
- data/lib/nokogiri/xml/pp/node.rb +28 -15
- data/lib/nokogiri/xml/processing_instruction.rb +2 -1
- data/lib/nokogiri/xml/sax/document.rb +1 -1
- data/lib/nokogiri/xml/sax/parser.rb +2 -3
- data/lib/nokogiri/xml/searchable.rb +18 -10
- data/lib/nokogiri/xslt.rb +74 -4
- data/lib/nokogiri.rb +15 -15
- data/lib/xsd/xmlparser/nokogiri.rb +4 -2
- data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
- data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
- data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
- data/ports/archives/libxml2-2.11.7.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
- metadata +19 -242
- data/patches/libxml2/0004-use-glibc-strlen.patch +0 -53
- data/patches/libxml2/0005-avoid-isnan-isinf.patch +0 -81
- data/patches/libxml2/0006-update-automake-files-for-arm64.patch +0 -3040
- data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +0 -61
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +0 -3037
- data/ports/archives/libxml2-2.9.14.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.35.tar.xz +0 -0
data/ext/nokogiri/xml_reader.c
CHANGED
@@ -3,13 +3,20 @@
|
|
3
3
|
VALUE cNokogiriXmlReader;
|
4
4
|
|
5
5
|
static void
|
6
|
-
|
6
|
+
xml_reader_deallocate(void *data)
|
7
7
|
{
|
8
|
-
|
8
|
+
xmlTextReaderPtr reader = data;
|
9
9
|
xmlFreeTextReader(reader);
|
10
|
-
NOKOGIRI_DEBUG_END(reader);
|
11
10
|
}
|
12
11
|
|
12
|
+
static const rb_data_type_t xml_reader_type = {
|
13
|
+
.wrap_struct_name = "Nokogiri::XML::Reader",
|
14
|
+
.function = {
|
15
|
+
.dfree = xml_reader_deallocate,
|
16
|
+
},
|
17
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
18
|
+
};
|
19
|
+
|
13
20
|
static int
|
14
21
|
has_attributes(xmlTextReaderPtr reader)
|
15
22
|
{
|
@@ -71,7 +78,7 @@ default_eh(VALUE self)
|
|
71
78
|
xmlTextReaderPtr reader;
|
72
79
|
int eh;
|
73
80
|
|
74
|
-
|
81
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
75
82
|
eh = xmlTextReaderIsDefault(reader);
|
76
83
|
if (eh == 0) { return Qfalse; }
|
77
84
|
if (eh == 1) { return Qtrue; }
|
@@ -91,7 +98,7 @@ value_eh(VALUE self)
|
|
91
98
|
xmlTextReaderPtr reader;
|
92
99
|
int eh;
|
93
100
|
|
94
|
-
|
101
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
95
102
|
eh = xmlTextReaderHasValue(reader);
|
96
103
|
if (eh == 0) { return Qfalse; }
|
97
104
|
if (eh == 1) { return Qtrue; }
|
@@ -111,7 +118,7 @@ attributes_eh(VALUE self)
|
|
111
118
|
xmlTextReaderPtr reader;
|
112
119
|
int eh;
|
113
120
|
|
114
|
-
|
121
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
115
122
|
eh = has_attributes(reader);
|
116
123
|
if (eh == 0) { return Qfalse; }
|
117
124
|
if (eh == 1) { return Qtrue; }
|
@@ -126,26 +133,37 @@ attributes_eh(VALUE self)
|
|
126
133
|
* Get a hash of namespaces for this Node
|
127
134
|
*/
|
128
135
|
static VALUE
|
129
|
-
|
136
|
+
rb_xml_reader_namespaces(VALUE rb_reader)
|
130
137
|
{
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
Data_Get_Struct(self, xmlTextReader, reader);
|
138
|
+
VALUE rb_namespaces = rb_hash_new() ;
|
139
|
+
xmlTextReaderPtr c_reader;
|
140
|
+
xmlNodePtr c_node;
|
141
|
+
VALUE rb_errors;
|
136
142
|
|
137
|
-
|
143
|
+
TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
|
138
144
|
|
139
|
-
if (! has_attributes(
|
140
|
-
return
|
145
|
+
if (! has_attributes(c_reader)) {
|
146
|
+
return rb_namespaces ;
|
141
147
|
}
|
142
148
|
|
143
|
-
|
144
|
-
|
149
|
+
rb_errors = rb_funcall(rb_reader, rb_intern("errors"), 0);
|
150
|
+
|
151
|
+
xmlSetStructuredErrorFunc((void *)rb_errors, Nokogiri_error_array_pusher);
|
152
|
+
c_node = xmlTextReaderExpand(c_reader);
|
153
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
154
|
+
|
155
|
+
if (c_node == NULL) {
|
156
|
+
if (RARRAY_LEN(rb_errors) > 0) {
|
157
|
+
VALUE rb_error = rb_ary_entry(rb_errors, 0);
|
158
|
+
VALUE exception_message = rb_funcall(rb_error, rb_intern("to_s"), 0);
|
159
|
+
rb_exc_raise(rb_class_new_instance(1, &exception_message, cNokogiriXmlSyntaxError));
|
160
|
+
}
|
161
|
+
return Qnil;
|
162
|
+
}
|
145
163
|
|
146
|
-
Nokogiri_xml_node_namespaces(
|
164
|
+
Nokogiri_xml_node_namespaces(c_node, rb_namespaces);
|
147
165
|
|
148
|
-
return
|
166
|
+
return rb_namespaces ;
|
149
167
|
}
|
150
168
|
|
151
169
|
/*
|
@@ -169,7 +187,7 @@ rb_xml_reader_attribute_nodes(VALUE rb_reader)
|
|
169
187
|
// After removal, we can also remove all the "node_has_a_document" special handling from xml_node.c
|
170
188
|
NOKO_WARN_DEPRECATION("Reader#attribute_nodes is deprecated and will be removed in a future version of Nokogiri. Please use Reader#attribute_hash instead.");
|
171
189
|
|
172
|
-
|
190
|
+
TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
|
173
191
|
|
174
192
|
if (! has_attributes(c_reader)) {
|
175
193
|
return rb_ary_new() ;
|
@@ -204,14 +222,29 @@ rb_xml_reader_attribute_hash(VALUE rb_reader)
|
|
204
222
|
xmlTextReaderPtr c_reader;
|
205
223
|
xmlNodePtr c_node;
|
206
224
|
xmlAttrPtr c_property;
|
225
|
+
VALUE rb_errors;
|
207
226
|
|
208
|
-
|
227
|
+
TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
|
209
228
|
|
210
229
|
if (!has_attributes(c_reader)) {
|
211
230
|
return rb_attributes;
|
212
231
|
}
|
213
232
|
|
233
|
+
rb_errors = rb_funcall(rb_reader, rb_intern("errors"), 0);
|
234
|
+
|
235
|
+
xmlSetStructuredErrorFunc((void *)rb_errors, Nokogiri_error_array_pusher);
|
214
236
|
c_node = xmlTextReaderExpand(c_reader);
|
237
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
238
|
+
|
239
|
+
if (c_node == NULL) {
|
240
|
+
if (RARRAY_LEN(rb_errors) > 0) {
|
241
|
+
VALUE rb_error = rb_ary_entry(rb_errors, 0);
|
242
|
+
VALUE exception_message = rb_funcall(rb_error, rb_intern("to_s"), 0);
|
243
|
+
rb_exc_raise(rb_class_new_instance(1, &exception_message, cNokogiriXmlSyntaxError));
|
244
|
+
}
|
245
|
+
return Qnil;
|
246
|
+
}
|
247
|
+
|
215
248
|
c_property = c_node->properties;
|
216
249
|
while (c_property != NULL) {
|
217
250
|
VALUE rb_name = NOKOGIRI_STR_NEW2(c_property->name);
|
@@ -244,7 +277,7 @@ attribute_at(VALUE self, VALUE index)
|
|
244
277
|
xmlChar *value;
|
245
278
|
VALUE rb_value;
|
246
279
|
|
247
|
-
|
280
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
248
281
|
|
249
282
|
if (NIL_P(index)) { return Qnil; }
|
250
283
|
index = rb_Integer(index);
|
@@ -273,7 +306,7 @@ reader_attribute(VALUE self, VALUE name)
|
|
273
306
|
xmlChar *value ;
|
274
307
|
VALUE rb_value;
|
275
308
|
|
276
|
-
|
309
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
277
310
|
|
278
311
|
if (NIL_P(name)) { return Qnil; }
|
279
312
|
name = StringValue(name) ;
|
@@ -298,11 +331,11 @@ attribute_count(VALUE self)
|
|
298
331
|
xmlTextReaderPtr reader;
|
299
332
|
int count;
|
300
333
|
|
301
|
-
|
334
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
302
335
|
count = xmlTextReaderAttributeCount(reader);
|
303
336
|
if (count == -1) { return Qnil; }
|
304
337
|
|
305
|
-
return INT2NUM(
|
338
|
+
return INT2NUM(count);
|
306
339
|
}
|
307
340
|
|
308
341
|
/*
|
@@ -317,11 +350,11 @@ depth(VALUE self)
|
|
317
350
|
xmlTextReaderPtr reader;
|
318
351
|
int depth;
|
319
352
|
|
320
|
-
|
353
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
321
354
|
depth = xmlTextReaderDepth(reader);
|
322
355
|
if (depth == -1) { return Qnil; }
|
323
356
|
|
324
|
-
return INT2NUM(
|
357
|
+
return INT2NUM(depth);
|
325
358
|
}
|
326
359
|
|
327
360
|
/*
|
@@ -336,7 +369,7 @@ xml_version(VALUE self)
|
|
336
369
|
xmlTextReaderPtr reader;
|
337
370
|
const char *version;
|
338
371
|
|
339
|
-
|
372
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
340
373
|
version = (const char *)xmlTextReaderConstXmlVersion(reader);
|
341
374
|
if (version == NULL) { return Qnil; }
|
342
375
|
|
@@ -355,7 +388,7 @@ lang(VALUE self)
|
|
355
388
|
xmlTextReaderPtr reader;
|
356
389
|
const char *lang;
|
357
390
|
|
358
|
-
|
391
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
359
392
|
lang = (const char *)xmlTextReaderConstXmlLang(reader);
|
360
393
|
if (lang == NULL) { return Qnil; }
|
361
394
|
|
@@ -374,7 +407,7 @@ value(VALUE self)
|
|
374
407
|
xmlTextReaderPtr reader;
|
375
408
|
const char *value;
|
376
409
|
|
377
|
-
|
410
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
378
411
|
value = (const char *)xmlTextReaderConstValue(reader);
|
379
412
|
if (value == NULL) { return Qnil; }
|
380
413
|
|
@@ -393,7 +426,7 @@ prefix(VALUE self)
|
|
393
426
|
xmlTextReaderPtr reader;
|
394
427
|
const char *prefix;
|
395
428
|
|
396
|
-
|
429
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
397
430
|
prefix = (const char *)xmlTextReaderConstPrefix(reader);
|
398
431
|
if (prefix == NULL) { return Qnil; }
|
399
432
|
|
@@ -412,7 +445,7 @@ namespace_uri(VALUE self)
|
|
412
445
|
xmlTextReaderPtr reader;
|
413
446
|
const char *uri;
|
414
447
|
|
415
|
-
|
448
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
416
449
|
uri = (const char *)xmlTextReaderConstNamespaceUri(reader);
|
417
450
|
if (uri == NULL) { return Qnil; }
|
418
451
|
|
@@ -431,7 +464,7 @@ local_name(VALUE self)
|
|
431
464
|
xmlTextReaderPtr reader;
|
432
465
|
const char *name;
|
433
466
|
|
434
|
-
|
467
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
435
468
|
name = (const char *)xmlTextReaderConstLocalName(reader);
|
436
469
|
if (name == NULL) { return Qnil; }
|
437
470
|
|
@@ -450,7 +483,7 @@ name(VALUE self)
|
|
450
483
|
xmlTextReaderPtr reader;
|
451
484
|
const char *name;
|
452
485
|
|
453
|
-
|
486
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
454
487
|
name = (const char *)xmlTextReaderConstName(reader);
|
455
488
|
if (name == NULL) { return Qnil; }
|
456
489
|
|
@@ -470,7 +503,7 @@ rb_xml_reader_base_uri(VALUE rb_reader)
|
|
470
503
|
xmlTextReaderPtr c_reader;
|
471
504
|
xmlChar *c_base_uri;
|
472
505
|
|
473
|
-
|
506
|
+
TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
|
474
507
|
|
475
508
|
c_base_uri = xmlTextReaderBaseUri(c_reader);
|
476
509
|
if (c_base_uri == NULL) {
|
@@ -493,8 +526,8 @@ static VALUE
|
|
493
526
|
state(VALUE self)
|
494
527
|
{
|
495
528
|
xmlTextReaderPtr reader;
|
496
|
-
|
497
|
-
return INT2NUM(
|
529
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
530
|
+
return INT2NUM(xmlTextReaderReadState(reader));
|
498
531
|
}
|
499
532
|
|
500
533
|
/*
|
@@ -507,8 +540,8 @@ static VALUE
|
|
507
540
|
node_type(VALUE self)
|
508
541
|
{
|
509
542
|
xmlTextReaderPtr reader;
|
510
|
-
|
511
|
-
return INT2NUM(
|
543
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
544
|
+
return INT2NUM(xmlTextReaderNodeType(reader));
|
512
545
|
}
|
513
546
|
|
514
547
|
/*
|
@@ -525,7 +558,7 @@ read_more(VALUE self)
|
|
525
558
|
VALUE error_list;
|
526
559
|
int ret;
|
527
560
|
|
528
|
-
|
561
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
529
562
|
|
530
563
|
error_list = rb_funcall(self, rb_intern("errors"), 0);
|
531
564
|
|
@@ -560,7 +593,7 @@ inner_xml(VALUE self)
|
|
560
593
|
xmlChar *value;
|
561
594
|
VALUE str;
|
562
595
|
|
563
|
-
|
596
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
564
597
|
|
565
598
|
value = xmlTextReaderReadInnerXml(reader);
|
566
599
|
|
@@ -587,7 +620,7 @@ outer_xml(VALUE self)
|
|
587
620
|
xmlChar *value;
|
588
621
|
VALUE str = Qnil;
|
589
622
|
|
590
|
-
|
623
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
591
624
|
|
592
625
|
value = xmlTextReaderReadOuterXml(reader);
|
593
626
|
|
@@ -634,7 +667,7 @@ from_memory(int argc, VALUE *argv, VALUE klass)
|
|
634
667
|
rb_raise(rb_eRuntimeError, "couldn't create a parser");
|
635
668
|
}
|
636
669
|
|
637
|
-
rb_reader =
|
670
|
+
rb_reader = TypedData_Wrap_Struct(klass, &xml_reader_type, reader);
|
638
671
|
args[0] = rb_buffer;
|
639
672
|
args[1] = rb_url;
|
640
673
|
args[2] = encoding;
|
@@ -680,7 +713,7 @@ from_io(int argc, VALUE *argv, VALUE klass)
|
|
680
713
|
rb_raise(rb_eRuntimeError, "couldn't create a parser");
|
681
714
|
}
|
682
715
|
|
683
|
-
rb_reader =
|
716
|
+
rb_reader = TypedData_Wrap_Struct(klass, &xml_reader_type, reader);
|
684
717
|
args[0] = rb_io;
|
685
718
|
args[1] = rb_url;
|
686
719
|
args[2] = encoding;
|
@@ -700,7 +733,7 @@ empty_element_p(VALUE self)
|
|
700
733
|
{
|
701
734
|
xmlTextReaderPtr reader;
|
702
735
|
|
703
|
-
|
736
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
|
704
737
|
|
705
738
|
if (xmlTextReaderIsEmptyElement(reader)) {
|
706
739
|
return Qtrue;
|
@@ -721,14 +754,14 @@ rb_xml_reader_encoding(VALUE rb_reader)
|
|
721
754
|
return constructor_encoding;
|
722
755
|
}
|
723
756
|
|
724
|
-
|
757
|
+
TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
|
725
758
|
parser_encoding = (const char *)xmlTextReaderConstEncoding(c_reader);
|
726
759
|
if (parser_encoding == NULL) { return Qnil; }
|
727
760
|
return NOKOGIRI_STR_NEW2(parser_encoding);
|
728
761
|
}
|
729
762
|
|
730
763
|
void
|
731
|
-
noko_init_xml_reader()
|
764
|
+
noko_init_xml_reader(void)
|
732
765
|
{
|
733
766
|
/*
|
734
767
|
* The Reader parser allows you to effectively pull parse an XML document.
|
@@ -758,7 +791,7 @@ noko_init_xml_reader()
|
|
758
791
|
rb_define_method(cNokogiriXmlReader, "local_name", local_name, 0);
|
759
792
|
rb_define_method(cNokogiriXmlReader, "name", name, 0);
|
760
793
|
rb_define_method(cNokogiriXmlReader, "namespace_uri", namespace_uri, 0);
|
761
|
-
rb_define_method(cNokogiriXmlReader, "namespaces",
|
794
|
+
rb_define_method(cNokogiriXmlReader, "namespaces", rb_xml_reader_namespaces, 0);
|
762
795
|
rb_define_method(cNokogiriXmlReader, "node_type", node_type, 0);
|
763
796
|
rb_define_method(cNokogiriXmlReader, "outer_xml", outer_xml, 0);
|
764
797
|
rb_define_method(cNokogiriXmlReader, "prefix", prefix, 0);
|
data/ext/nokogiri/xml_relax_ng.c
CHANGED
@@ -3,13 +3,20 @@
|
|
3
3
|
VALUE cNokogiriXmlRelaxNG;
|
4
4
|
|
5
5
|
static void
|
6
|
-
|
6
|
+
xml_relax_ng_deallocate(void *data)
|
7
7
|
{
|
8
|
-
|
8
|
+
xmlRelaxNGPtr schema = data;
|
9
9
|
xmlRelaxNGFree(schema);
|
10
|
-
NOKOGIRI_DEBUG_END(schema);
|
11
10
|
}
|
12
11
|
|
12
|
+
static const rb_data_type_t xml_relax_ng_type = {
|
13
|
+
.wrap_struct_name = "Nokogiri::XML::RelaxNG",
|
14
|
+
.function = {
|
15
|
+
.dfree = xml_relax_ng_deallocate,
|
16
|
+
},
|
17
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
18
|
+
};
|
19
|
+
|
13
20
|
/*
|
14
21
|
* call-seq:
|
15
22
|
* validate_document(document)
|
@@ -24,8 +31,8 @@ validate_document(VALUE self, VALUE document)
|
|
24
31
|
VALUE errors;
|
25
32
|
xmlRelaxNGValidCtxtPtr valid_ctxt;
|
26
33
|
|
27
|
-
|
28
|
-
|
34
|
+
TypedData_Get_Struct(self, xmlRelaxNG, &xml_relax_ng_type, schema);
|
35
|
+
doc = noko_xml_document_unwrap(document);
|
29
36
|
|
30
37
|
errors = rb_ary_new();
|
31
38
|
|
@@ -51,47 +58,41 @@ validate_document(VALUE self, VALUE document)
|
|
51
58
|
return errors;
|
52
59
|
}
|
53
60
|
|
54
|
-
/*
|
55
|
-
* call-seq:
|
56
|
-
* read_memory(string)
|
57
|
-
*
|
58
|
-
* Create a new RelaxNG from the contents of +string+
|
59
|
-
*/
|
60
61
|
static VALUE
|
61
|
-
|
62
|
+
xml_relax_ng_parse_schema(
|
63
|
+
VALUE klass,
|
64
|
+
xmlRelaxNGParserCtxtPtr c_parser_context,
|
65
|
+
VALUE rb_parse_options
|
66
|
+
)
|
62
67
|
{
|
63
|
-
VALUE
|
64
|
-
VALUE parse_options;
|
65
|
-
xmlRelaxNGParserCtxtPtr ctx;
|
66
|
-
xmlRelaxNGPtr schema;
|
67
|
-
VALUE errors;
|
68
|
+
VALUE rb_errors;
|
68
69
|
VALUE rb_schema;
|
69
|
-
|
70
|
+
xmlRelaxNGPtr c_schema;
|
70
71
|
|
71
|
-
|
72
|
-
|
73
|
-
|
72
|
+
if (NIL_P(rb_parse_options)) {
|
73
|
+
rb_parse_options = rb_const_get_at(
|
74
|
+
rb_const_get_at(mNokogiriXml, rb_intern("ParseOptions")),
|
75
|
+
rb_intern("DEFAULT_SCHEMA")
|
76
|
+
);
|
74
77
|
}
|
75
78
|
|
76
|
-
|
77
|
-
|
78
|
-
errors = rb_ary_new();
|
79
|
-
xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
|
79
|
+
rb_errors = rb_ary_new();
|
80
|
+
xmlSetStructuredErrorFunc((void *)rb_errors, Nokogiri_error_array_pusher);
|
80
81
|
|
81
82
|
#ifdef HAVE_XMLRELAXNGSETPARSERSTRUCTUREDERRORS
|
82
83
|
xmlRelaxNGSetParserStructuredErrors(
|
83
|
-
|
84
|
+
c_parser_context,
|
84
85
|
Nokogiri_error_array_pusher,
|
85
|
-
(void *)
|
86
|
+
(void *)rb_errors
|
86
87
|
);
|
87
88
|
#endif
|
88
89
|
|
89
|
-
|
90
|
+
c_schema = xmlRelaxNGParse(c_parser_context);
|
90
91
|
|
91
92
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
92
|
-
xmlRelaxNGFreeParserCtxt(
|
93
|
+
xmlRelaxNGFreeParserCtxt(c_parser_context);
|
93
94
|
|
94
|
-
if (NULL ==
|
95
|
+
if (NULL == c_schema) {
|
95
96
|
xmlErrorPtr error = xmlGetLastError();
|
96
97
|
if (error) {
|
97
98
|
Nokogiri_error_raise(NULL, error);
|
@@ -102,78 +103,62 @@ read_memory(int argc, VALUE *argv, VALUE klass)
|
|
102
103
|
return Qnil;
|
103
104
|
}
|
104
105
|
|
105
|
-
rb_schema =
|
106
|
-
rb_iv_set(rb_schema, "@errors",
|
107
|
-
rb_iv_set(rb_schema, "@parse_options",
|
106
|
+
rb_schema = TypedData_Wrap_Struct(klass, &xml_relax_ng_type, c_schema);
|
107
|
+
rb_iv_set(rb_schema, "@errors", rb_errors);
|
108
|
+
rb_iv_set(rb_schema, "@parse_options", rb_parse_options);
|
108
109
|
|
109
110
|
return rb_schema;
|
110
111
|
}
|
111
112
|
|
112
113
|
/*
|
113
114
|
* call-seq:
|
114
|
-
*
|
115
|
+
* read_memory(string)
|
115
116
|
*
|
116
|
-
* Create a new RelaxNG
|
117
|
+
* Create a new RelaxNG from the contents of +string+
|
117
118
|
*/
|
118
119
|
static VALUE
|
119
|
-
|
120
|
+
read_memory(int argc, VALUE *argv, VALUE klass)
|
120
121
|
{
|
121
|
-
VALUE
|
122
|
-
VALUE
|
123
|
-
|
124
|
-
xmlRelaxNGParserCtxtPtr ctx;
|
125
|
-
xmlRelaxNGPtr schema;
|
126
|
-
VALUE errors;
|
127
|
-
VALUE rb_schema;
|
128
|
-
int scanned_args = 0;
|
122
|
+
VALUE rb_content;
|
123
|
+
VALUE rb_parse_options;
|
124
|
+
xmlRelaxNGParserCtxtPtr c_parser_context;
|
129
125
|
|
130
|
-
|
126
|
+
rb_scan_args(argc, argv, "11", &rb_content, &rb_parse_options);
|
131
127
|
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
parse_options = rb_const_get_at(rb_const_get_at(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
|
137
|
-
}
|
138
|
-
|
139
|
-
ctx = xmlRelaxNGNewDocParserCtxt(doc);
|
140
|
-
|
141
|
-
errors = rb_ary_new();
|
142
|
-
xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
|
128
|
+
c_parser_context = xmlRelaxNGNewMemParserCtxt(
|
129
|
+
(const char *)StringValuePtr(rb_content),
|
130
|
+
(int)RSTRING_LEN(rb_content)
|
131
|
+
);
|
143
132
|
|
144
|
-
|
145
|
-
|
146
|
-
ctx,
|
147
|
-
Nokogiri_error_array_pusher,
|
148
|
-
(void *)errors
|
149
|
-
);
|
150
|
-
#endif
|
151
|
-
|
152
|
-
schema = xmlRelaxNGParse(ctx);
|
133
|
+
return xml_relax_ng_parse_schema(klass, c_parser_context, rb_parse_options);
|
134
|
+
}
|
153
135
|
|
154
|
-
|
155
|
-
|
136
|
+
/*
|
137
|
+
* call-seq:
|
138
|
+
* from_document(doc)
|
139
|
+
*
|
140
|
+
* Create a new RelaxNG schema from the Nokogiri::XML::Document +doc+
|
141
|
+
*/
|
142
|
+
static VALUE
|
143
|
+
from_document(int argc, VALUE *argv, VALUE klass)
|
144
|
+
{
|
145
|
+
VALUE rb_document;
|
146
|
+
VALUE rb_parse_options;
|
147
|
+
xmlDocPtr c_document;
|
148
|
+
xmlRelaxNGParserCtxtPtr c_parser_context;
|
156
149
|
|
157
|
-
|
158
|
-
xmlErrorPtr error = xmlGetLastError();
|
159
|
-
if (error) {
|
160
|
-
Nokogiri_error_raise(NULL, error);
|
161
|
-
} else {
|
162
|
-
rb_raise(rb_eRuntimeError, "Could not parse document");
|
163
|
-
}
|
150
|
+
rb_scan_args(argc, argv, "11", &rb_document, &rb_parse_options);
|
164
151
|
|
165
|
-
|
166
|
-
|
152
|
+
c_document = noko_xml_document_unwrap(rb_document);
|
153
|
+
c_document = c_document->doc; /* In case someone passes us a node. ugh. */
|
167
154
|
|
168
|
-
|
169
|
-
rb_iv_set(rb_schema, "@errors", errors);
|
170
|
-
rb_iv_set(rb_schema, "@parse_options", parse_options);
|
155
|
+
c_parser_context = xmlRelaxNGNewDocParserCtxt(c_document);
|
171
156
|
|
172
|
-
return
|
157
|
+
return xml_relax_ng_parse_schema(klass, c_parser_context, rb_parse_options);
|
173
158
|
}
|
174
159
|
|
175
160
|
void
|
176
|
-
noko_init_xml_relax_ng()
|
161
|
+
noko_init_xml_relax_ng(void)
|
177
162
|
{
|
178
163
|
assert(cNokogiriXmlSchema);
|
179
164
|
cNokogiriXmlRelaxNG = rb_define_class_under(mNokogiriXml, "RelaxNG", cNokogiriXmlSchema);
|
@@ -195,40 +195,48 @@ comment_func(void *ctx, const xmlChar *value)
|
|
195
195
|
rb_funcall(doc, id_comment, 1, str);
|
196
196
|
}
|
197
197
|
|
198
|
+
PRINTFLIKE_DECL(2, 3)
|
198
199
|
static void
|
199
200
|
warning_func(void *ctx, const char *msg, ...)
|
200
201
|
{
|
201
202
|
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
202
203
|
VALUE doc = rb_iv_get(self, "@document");
|
203
|
-
|
204
|
-
VALUE ruby_message;
|
204
|
+
VALUE rb_message;
|
205
205
|
|
206
|
+
#ifdef TRUFFLERUBY_NOKOGIRI_SYSTEM_LIBRARIES
|
207
|
+
/* It is not currently possible to pass var args from native
|
208
|
+
functions to sulong, so we work around the issue here. */
|
209
|
+
rb_message = rb_sprintf("warning_func: %s", msg);
|
210
|
+
#else
|
206
211
|
va_list args;
|
207
212
|
va_start(args, msg);
|
208
|
-
|
213
|
+
rb_message = rb_vsprintf(msg, args);
|
209
214
|
va_end(args);
|
215
|
+
#endif
|
210
216
|
|
211
|
-
|
212
|
-
free(message);
|
213
|
-
rb_funcall(doc, id_warning, 1, ruby_message);
|
217
|
+
rb_funcall(doc, id_warning, 1, rb_message);
|
214
218
|
}
|
215
219
|
|
220
|
+
PRINTFLIKE_DECL(2, 3)
|
216
221
|
static void
|
217
222
|
error_func(void *ctx, const char *msg, ...)
|
218
223
|
{
|
219
224
|
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
220
225
|
VALUE doc = rb_iv_get(self, "@document");
|
221
|
-
|
222
|
-
VALUE ruby_message;
|
226
|
+
VALUE rb_message;
|
223
227
|
|
228
|
+
#ifdef TRUFFLERUBY_NOKOGIRI_SYSTEM_LIBRARIES
|
229
|
+
/* It is not currently possible to pass var args from native
|
230
|
+
functions to sulong, so we work around the issue here. */
|
231
|
+
rb_message = rb_sprintf("error_func: %s", msg);
|
232
|
+
#else
|
224
233
|
va_list args;
|
225
234
|
va_start(args, msg);
|
226
|
-
|
235
|
+
rb_message = rb_vsprintf(msg, args);
|
227
236
|
va_end(args);
|
237
|
+
#endif
|
228
238
|
|
229
|
-
|
230
|
-
free(message);
|
231
|
-
rb_funcall(doc, id_error, 1, ruby_message);
|
239
|
+
rb_funcall(doc, id_error, 1, rb_message);
|
232
240
|
}
|
233
241
|
|
234
242
|
static void
|
@@ -257,18 +265,27 @@ processing_instruction(void *ctx, const xmlChar *name, const xmlChar *content)
|
|
257
265
|
);
|
258
266
|
}
|
259
267
|
|
260
|
-
static
|
261
|
-
|
268
|
+
static size_t
|
269
|
+
memsize(const void *data)
|
262
270
|
{
|
263
|
-
|
264
|
-
free(handler);
|
265
|
-
NOKOGIRI_DEBUG_END(handler);
|
271
|
+
return sizeof(xmlSAXHandler);
|
266
272
|
}
|
267
273
|
|
274
|
+
/* Used by Nokogiri::XML::SAX::Parser and Nokogiri::HTML::SAX::Parser */
|
275
|
+
static const rb_data_type_t noko_sax_handler_type = {
|
276
|
+
.wrap_struct_name = "Nokogiri::SAXHandler",
|
277
|
+
.function = {
|
278
|
+
.dfree = RUBY_TYPED_DEFAULT_FREE,
|
279
|
+
.dsize = memsize
|
280
|
+
},
|
281
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
|
282
|
+
};
|
283
|
+
|
268
284
|
static VALUE
|
269
285
|
allocate(VALUE klass)
|
270
286
|
{
|
271
|
-
xmlSAXHandlerPtr handler
|
287
|
+
xmlSAXHandlerPtr handler;
|
288
|
+
VALUE self = TypedData_Make_Struct(klass, xmlSAXHandler, &noko_sax_handler_type, handler);
|
272
289
|
|
273
290
|
handler->startDocument = start_document;
|
274
291
|
handler->endDocument = end_document;
|
@@ -284,11 +301,19 @@ allocate(VALUE klass)
|
|
284
301
|
handler->processingInstruction = processing_instruction;
|
285
302
|
handler->initialized = XML_SAX2_MAGIC;
|
286
303
|
|
287
|
-
return
|
304
|
+
return self;
|
305
|
+
}
|
306
|
+
|
307
|
+
xmlSAXHandlerPtr
|
308
|
+
noko_sax_handler_unwrap(VALUE rb_sax_handler)
|
309
|
+
{
|
310
|
+
xmlSAXHandlerPtr c_sax_handler;
|
311
|
+
TypedData_Get_Struct(rb_sax_handler, xmlSAXHandler, &noko_sax_handler_type, c_sax_handler);
|
312
|
+
return c_sax_handler;
|
288
313
|
}
|
289
314
|
|
290
315
|
void
|
291
|
-
noko_init_xml_sax_parser()
|
316
|
+
noko_init_xml_sax_parser(void)
|
292
317
|
{
|
293
318
|
cNokogiriXmlSaxParser = rb_define_class_under(mNokogiriXmlSax, "Parser", rb_cObject);
|
294
319
|
|