nokogiri 1.15.4 → 1.17.2
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +12 -19
- data/README.md +8 -1
- data/dependencies.yml +9 -8
- data/ext/nokogiri/extconf.rb +194 -141
- data/ext/nokogiri/gumbo.c +69 -53
- data/ext/nokogiri/html4_document.c +10 -4
- data/ext/nokogiri/html4_element_description.c +18 -18
- data/ext/nokogiri/html4_sax_parser.c +40 -0
- data/ext/nokogiri/html4_sax_parser_context.c +48 -58
- data/ext/nokogiri/html4_sax_push_parser.c +26 -25
- data/ext/nokogiri/libxml2_polyfill.c +114 -0
- data/ext/nokogiri/nokogiri.c +9 -2
- data/ext/nokogiri/nokogiri.h +25 -33
- data/ext/nokogiri/test_global_handlers.c +1 -1
- data/ext/nokogiri/xml_attr.c +1 -1
- data/ext/nokogiri/xml_cdata.c +3 -12
- data/ext/nokogiri/xml_comment.c +3 -8
- data/ext/nokogiri/xml_document.c +167 -156
- data/ext/nokogiri/xml_document_fragment.c +10 -25
- data/ext/nokogiri/xml_dtd.c +1 -1
- data/ext/nokogiri/xml_element_content.c +9 -9
- data/ext/nokogiri/xml_encoding_handler.c +4 -4
- data/ext/nokogiri/xml_namespace.c +6 -10
- data/ext/nokogiri/xml_node.c +142 -108
- data/ext/nokogiri/xml_node_set.c +46 -44
- data/ext/nokogiri/xml_reader.c +74 -100
- data/ext/nokogiri/xml_relax_ng.c +35 -56
- data/ext/nokogiri/xml_sax_parser.c +156 -88
- data/ext/nokogiri/xml_sax_parser_context.c +214 -128
- data/ext/nokogiri/xml_sax_push_parser.c +69 -50
- data/ext/nokogiri/xml_schema.c +51 -87
- data/ext/nokogiri/xml_syntax_error.c +19 -11
- data/ext/nokogiri/xml_text.c +3 -6
- data/ext/nokogiri/xml_xpath_context.c +4 -7
- data/ext/nokogiri/xslt_stylesheet.c +16 -11
- data/gumbo-parser/Makefile +18 -0
- data/gumbo-parser/src/error.c +76 -48
- data/gumbo-parser/src/error.h +5 -1
- data/gumbo-parser/src/nokogiri_gumbo.h +11 -2
- data/gumbo-parser/src/parser.c +64 -23
- data/gumbo-parser/src/tokenizer.c +7 -6
- data/lib/nokogiri/class_resolver.rb +1 -1
- data/lib/nokogiri/css/node.rb +6 -2
- data/lib/nokogiri/css/parser.rb +6 -4
- data/lib/nokogiri/css/parser.y +2 -2
- data/lib/nokogiri/css/parser_extras.rb +6 -66
- data/lib/nokogiri/css/selector_cache.rb +38 -0
- data/lib/nokogiri/css/tokenizer.rb +4 -4
- data/lib/nokogiri/css/tokenizer.rex +9 -8
- data/lib/nokogiri/css/xpath_visitor.rb +43 -27
- data/lib/nokogiri/css.rb +86 -20
- data/lib/nokogiri/decorators/slop.rb +3 -5
- data/lib/nokogiri/encoding_handler.rb +2 -2
- data/lib/nokogiri/html4/document.rb +45 -24
- data/lib/nokogiri/html4/document_fragment.rb +124 -12
- data/lib/nokogiri/html4/encoding_reader.rb +2 -2
- data/lib/nokogiri/html4/sax/parser.rb +23 -38
- data/lib/nokogiri/html4/sax/parser_context.rb +4 -9
- data/lib/nokogiri/html4.rb +9 -14
- data/lib/nokogiri/html5/builder.rb +40 -0
- data/lib/nokogiri/html5/document.rb +61 -30
- data/lib/nokogiri/html5/document_fragment.rb +130 -20
- data/lib/nokogiri/html5/node.rb +4 -4
- data/lib/nokogiri/html5.rb +114 -138
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +6 -5
- data/lib/nokogiri/xml/attr.rb +2 -2
- data/lib/nokogiri/xml/builder.rb +8 -1
- data/lib/nokogiri/xml/document.rb +74 -31
- data/lib/nokogiri/xml/document_fragment.rb +86 -15
- data/lib/nokogiri/xml/namespace.rb +1 -2
- data/lib/nokogiri/xml/node.rb +113 -35
- data/lib/nokogiri/xml/node_set.rb +12 -10
- data/lib/nokogiri/xml/parse_options.rb +1 -1
- data/lib/nokogiri/xml/pp/node.rb +6 -1
- data/lib/nokogiri/xml/reader.rb +51 -17
- data/lib/nokogiri/xml/relax_ng.rb +57 -20
- data/lib/nokogiri/xml/sax/document.rb +174 -83
- data/lib/nokogiri/xml/sax/parser.rb +115 -41
- data/lib/nokogiri/xml/sax/parser_context.rb +116 -8
- data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
- data/lib/nokogiri/xml/sax.rb +48 -0
- data/lib/nokogiri/xml/schema.rb +112 -45
- data/lib/nokogiri/xml/searchable.rb +9 -11
- data/lib/nokogiri/xml/syntax_error.rb +23 -1
- data/lib/nokogiri/xml.rb +14 -25
- data/lib/nokogiri/xslt/stylesheet.rb +29 -7
- data/lib/nokogiri/xslt.rb +4 -10
- data/lib/nokogiri.rb +1 -1
- data/lib/xsd/xmlparser/nokogiri.rb +3 -4
- data/ports/archives/libxml2-2.13.5.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.42.tar.xz +0 -0
- metadata +15 -14
- data/ext/nokogiri/libxml2_backwards_compat.c +0 -121
- data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +0 -25
- data/ports/archives/libxml2-2.11.5.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.38.tar.xz +0 -0
data/ext/nokogiri/xml_reader.c
CHANGED
@@ -5,12 +5,18 @@ VALUE cNokogiriXmlReader;
|
|
5
5
|
static void
|
6
6
|
xml_reader_deallocate(void *data)
|
7
7
|
{
|
8
|
+
// free the document separately because we _may_ have triggered preservation by calling
|
9
|
+
// xmlTextReaderCurrentDoc during a read_more.
|
8
10
|
xmlTextReaderPtr reader = data;
|
11
|
+
xmlDocPtr doc = xmlTextReaderCurrentDoc(reader);
|
9
12
|
xmlFreeTextReader(reader);
|
13
|
+
if (doc) {
|
14
|
+
xmlFreeDoc(doc);
|
15
|
+
}
|
10
16
|
}
|
11
17
|
|
12
|
-
static const rb_data_type_t
|
13
|
-
.wrap_struct_name = "
|
18
|
+
static const rb_data_type_t xml_text_reader_type = {
|
19
|
+
.wrap_struct_name = "xmlTextReader",
|
14
20
|
.function = {
|
15
21
|
.dfree = xml_reader_deallocate,
|
16
22
|
},
|
@@ -78,7 +84,7 @@ default_eh(VALUE self)
|
|
78
84
|
xmlTextReaderPtr reader;
|
79
85
|
int eh;
|
80
86
|
|
81
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
87
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
82
88
|
eh = xmlTextReaderIsDefault(reader);
|
83
89
|
if (eh == 0) { return Qfalse; }
|
84
90
|
if (eh == 1) { return Qtrue; }
|
@@ -98,7 +104,7 @@ value_eh(VALUE self)
|
|
98
104
|
xmlTextReaderPtr reader;
|
99
105
|
int eh;
|
100
106
|
|
101
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
107
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
102
108
|
eh = xmlTextReaderHasValue(reader);
|
103
109
|
if (eh == 0) { return Qfalse; }
|
104
110
|
if (eh == 1) { return Qtrue; }
|
@@ -118,7 +124,7 @@ attributes_eh(VALUE self)
|
|
118
124
|
xmlTextReaderPtr reader;
|
119
125
|
int eh;
|
120
126
|
|
121
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
127
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
122
128
|
eh = has_attributes(reader);
|
123
129
|
if (eh == 0) { return Qfalse; }
|
124
130
|
if (eh == 1) { return Qtrue; }
|
@@ -140,7 +146,7 @@ rb_xml_reader_namespaces(VALUE rb_reader)
|
|
140
146
|
xmlNodePtr c_node;
|
141
147
|
VALUE rb_errors;
|
142
148
|
|
143
|
-
TypedData_Get_Struct(rb_reader, xmlTextReader, &
|
149
|
+
TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_text_reader_type, c_reader);
|
144
150
|
|
145
151
|
if (! has_attributes(c_reader)) {
|
146
152
|
return rb_namespaces ;
|
@@ -148,7 +154,7 @@ rb_xml_reader_namespaces(VALUE rb_reader)
|
|
148
154
|
|
149
155
|
rb_errors = rb_funcall(rb_reader, rb_intern("errors"), 0);
|
150
156
|
|
151
|
-
xmlSetStructuredErrorFunc((void *)rb_errors,
|
157
|
+
xmlSetStructuredErrorFunc((void *)rb_errors, noko__error_array_pusher);
|
152
158
|
c_node = xmlTextReaderExpand(c_reader);
|
153
159
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
154
160
|
|
@@ -166,48 +172,6 @@ rb_xml_reader_namespaces(VALUE rb_reader)
|
|
166
172
|
return rb_namespaces ;
|
167
173
|
}
|
168
174
|
|
169
|
-
/*
|
170
|
-
:call-seq: attribute_nodes() → Array<Nokogiri::XML::Attr>
|
171
|
-
|
172
|
-
Get the attributes of the current node as an Array of XML:Attr
|
173
|
-
|
174
|
-
⚠ This method is deprecated and unsafe to use. It will be removed in a future version of Nokogiri.
|
175
|
-
|
176
|
-
See related: #attribute_hash, #attributes
|
177
|
-
*/
|
178
|
-
static VALUE
|
179
|
-
rb_xml_reader_attribute_nodes(VALUE rb_reader)
|
180
|
-
{
|
181
|
-
xmlTextReaderPtr c_reader;
|
182
|
-
xmlNodePtr c_node;
|
183
|
-
VALUE attr_nodes;
|
184
|
-
int j;
|
185
|
-
|
186
|
-
// TODO: deprecated, remove in Nokogiri v1.15, see https://github.com/sparklemotion/nokogiri/issues/2598
|
187
|
-
// After removal, we can also remove all the "node_has_a_document" special handling from xml_node.c
|
188
|
-
NOKO_WARN_DEPRECATION("Reader#attribute_nodes is deprecated and will be removed in a future version of Nokogiri. Please use Reader#attribute_hash instead.");
|
189
|
-
|
190
|
-
TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
|
191
|
-
|
192
|
-
if (! has_attributes(c_reader)) {
|
193
|
-
return rb_ary_new() ;
|
194
|
-
}
|
195
|
-
|
196
|
-
c_node = xmlTextReaderExpand(c_reader);
|
197
|
-
if (c_node == NULL) {
|
198
|
-
return Qnil;
|
199
|
-
}
|
200
|
-
|
201
|
-
attr_nodes = noko_xml_node_attrs(c_node);
|
202
|
-
|
203
|
-
/* ensure that the Reader won't be GCed as long as a node is referenced */
|
204
|
-
for (j = 0 ; j < RARRAY_LEN(attr_nodes) ; j++) {
|
205
|
-
rb_iv_set(rb_ary_entry(attr_nodes, j), "@reader", rb_reader);
|
206
|
-
}
|
207
|
-
|
208
|
-
return attr_nodes;
|
209
|
-
}
|
210
|
-
|
211
175
|
/*
|
212
176
|
:call-seq: attribute_hash() → Hash<String ⇒ String>
|
213
177
|
|
@@ -224,7 +188,7 @@ rb_xml_reader_attribute_hash(VALUE rb_reader)
|
|
224
188
|
xmlAttrPtr c_property;
|
225
189
|
VALUE rb_errors;
|
226
190
|
|
227
|
-
TypedData_Get_Struct(rb_reader, xmlTextReader, &
|
191
|
+
TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_text_reader_type, c_reader);
|
228
192
|
|
229
193
|
if (!has_attributes(c_reader)) {
|
230
194
|
return rb_attributes;
|
@@ -232,7 +196,7 @@ rb_xml_reader_attribute_hash(VALUE rb_reader)
|
|
232
196
|
|
233
197
|
rb_errors = rb_funcall(rb_reader, rb_intern("errors"), 0);
|
234
198
|
|
235
|
-
xmlSetStructuredErrorFunc((void *)rb_errors,
|
199
|
+
xmlSetStructuredErrorFunc((void *)rb_errors, noko__error_array_pusher);
|
236
200
|
c_node = xmlTextReaderExpand(c_reader);
|
237
201
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
238
202
|
|
@@ -277,7 +241,7 @@ attribute_at(VALUE self, VALUE index)
|
|
277
241
|
xmlChar *value;
|
278
242
|
VALUE rb_value;
|
279
243
|
|
280
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
244
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
281
245
|
|
282
246
|
if (NIL_P(index)) { return Qnil; }
|
283
247
|
index = rb_Integer(index);
|
@@ -306,7 +270,7 @@ reader_attribute(VALUE self, VALUE name)
|
|
306
270
|
xmlChar *value ;
|
307
271
|
VALUE rb_value;
|
308
272
|
|
309
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
273
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
310
274
|
|
311
275
|
if (NIL_P(name)) { return Qnil; }
|
312
276
|
name = StringValue(name) ;
|
@@ -331,7 +295,7 @@ attribute_count(VALUE self)
|
|
331
295
|
xmlTextReaderPtr reader;
|
332
296
|
int count;
|
333
297
|
|
334
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
298
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
335
299
|
count = xmlTextReaderAttributeCount(reader);
|
336
300
|
if (count == -1) { return Qnil; }
|
337
301
|
|
@@ -350,7 +314,7 @@ depth(VALUE self)
|
|
350
314
|
xmlTextReaderPtr reader;
|
351
315
|
int depth;
|
352
316
|
|
353
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
317
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
354
318
|
depth = xmlTextReaderDepth(reader);
|
355
319
|
if (depth == -1) { return Qnil; }
|
356
320
|
|
@@ -369,7 +333,7 @@ xml_version(VALUE self)
|
|
369
333
|
xmlTextReaderPtr reader;
|
370
334
|
const char *version;
|
371
335
|
|
372
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
336
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
373
337
|
version = (const char *)xmlTextReaderConstXmlVersion(reader);
|
374
338
|
if (version == NULL) { return Qnil; }
|
375
339
|
|
@@ -388,7 +352,7 @@ lang(VALUE self)
|
|
388
352
|
xmlTextReaderPtr reader;
|
389
353
|
const char *lang;
|
390
354
|
|
391
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
355
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
392
356
|
lang = (const char *)xmlTextReaderConstXmlLang(reader);
|
393
357
|
if (lang == NULL) { return Qnil; }
|
394
358
|
|
@@ -407,7 +371,7 @@ value(VALUE self)
|
|
407
371
|
xmlTextReaderPtr reader;
|
408
372
|
const char *value;
|
409
373
|
|
410
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
374
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
411
375
|
value = (const char *)xmlTextReaderConstValue(reader);
|
412
376
|
if (value == NULL) { return Qnil; }
|
413
377
|
|
@@ -426,7 +390,7 @@ prefix(VALUE self)
|
|
426
390
|
xmlTextReaderPtr reader;
|
427
391
|
const char *prefix;
|
428
392
|
|
429
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
393
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
430
394
|
prefix = (const char *)xmlTextReaderConstPrefix(reader);
|
431
395
|
if (prefix == NULL) { return Qnil; }
|
432
396
|
|
@@ -445,7 +409,7 @@ namespace_uri(VALUE self)
|
|
445
409
|
xmlTextReaderPtr reader;
|
446
410
|
const char *uri;
|
447
411
|
|
448
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
412
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
449
413
|
uri = (const char *)xmlTextReaderConstNamespaceUri(reader);
|
450
414
|
if (uri == NULL) { return Qnil; }
|
451
415
|
|
@@ -464,7 +428,7 @@ local_name(VALUE self)
|
|
464
428
|
xmlTextReaderPtr reader;
|
465
429
|
const char *name;
|
466
430
|
|
467
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
431
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
468
432
|
name = (const char *)xmlTextReaderConstLocalName(reader);
|
469
433
|
if (name == NULL) { return Qnil; }
|
470
434
|
|
@@ -483,7 +447,7 @@ name(VALUE self)
|
|
483
447
|
xmlTextReaderPtr reader;
|
484
448
|
const char *name;
|
485
449
|
|
486
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
450
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
487
451
|
name = (const char *)xmlTextReaderConstName(reader);
|
488
452
|
if (name == NULL) { return Qnil; }
|
489
453
|
|
@@ -503,7 +467,7 @@ rb_xml_reader_base_uri(VALUE rb_reader)
|
|
503
467
|
xmlTextReaderPtr c_reader;
|
504
468
|
xmlChar *c_base_uri;
|
505
469
|
|
506
|
-
TypedData_Get_Struct(rb_reader, xmlTextReader, &
|
470
|
+
TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_text_reader_type, c_reader);
|
507
471
|
|
508
472
|
c_base_uri = xmlTextReaderBaseUri(c_reader);
|
509
473
|
if (c_base_uri == NULL) {
|
@@ -526,7 +490,7 @@ static VALUE
|
|
526
490
|
state(VALUE self)
|
527
491
|
{
|
528
492
|
xmlTextReaderPtr reader;
|
529
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
493
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
530
494
|
return INT2NUM(xmlTextReaderReadState(reader));
|
531
495
|
}
|
532
496
|
|
@@ -540,7 +504,7 @@ static VALUE
|
|
540
504
|
node_type(VALUE self)
|
541
505
|
{
|
542
506
|
xmlTextReaderPtr reader;
|
543
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
507
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
544
508
|
return INT2NUM(xmlTextReaderNodeType(reader));
|
545
509
|
}
|
546
510
|
|
@@ -551,32 +515,41 @@ node_type(VALUE self)
|
|
551
515
|
* Move the Reader forward through the XML document.
|
552
516
|
*/
|
553
517
|
static VALUE
|
554
|
-
read_more(VALUE
|
518
|
+
read_more(VALUE rb_reader)
|
555
519
|
{
|
556
|
-
xmlTextReaderPtr
|
557
|
-
|
558
|
-
VALUE error_list;
|
559
|
-
int ret;
|
520
|
+
xmlTextReaderPtr c_reader;
|
521
|
+
libxmlStructuredErrorHandlerState handler_state;
|
560
522
|
|
561
|
-
TypedData_Get_Struct(
|
523
|
+
TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_text_reader_type, c_reader);
|
562
524
|
|
563
|
-
|
525
|
+
VALUE rb_errors = rb_funcall(rb_reader, rb_intern("errors"), 0);
|
526
|
+
noko__structured_error_func_save_and_set(&handler_state, (void *)rb_errors, noko__error_array_pusher);
|
564
527
|
|
565
|
-
|
566
|
-
ret = xmlTextReaderRead(reader);
|
567
|
-
xmlSetStructuredErrorFunc(NULL, NULL);
|
528
|
+
int status = xmlTextReaderRead(c_reader);
|
568
529
|
|
569
|
-
|
570
|
-
if (ret == 0) { return Qnil; }
|
530
|
+
noko__structured_error_func_restore(&handler_state);
|
571
531
|
|
572
|
-
|
573
|
-
if (
|
574
|
-
|
575
|
-
|
576
|
-
|
532
|
+
xmlDocPtr c_document = xmlTextReaderCurrentDoc(c_reader);
|
533
|
+
if (c_document && c_document->encoding == NULL) {
|
534
|
+
VALUE constructor_encoding = rb_iv_get(rb_reader, "@encoding");
|
535
|
+
if (RTEST(constructor_encoding)) {
|
536
|
+
c_document->encoding = xmlStrdup(BAD_CAST StringValueCStr(constructor_encoding));
|
537
|
+
} else {
|
538
|
+
rb_iv_set(rb_reader, "@encoding", NOKOGIRI_STR_NEW2("UTF-8"));
|
539
|
+
c_document->encoding = xmlStrdup(BAD_CAST "UTF-8");
|
540
|
+
}
|
577
541
|
}
|
578
542
|
|
579
|
-
return
|
543
|
+
if (status == 1) { return rb_reader; }
|
544
|
+
if (status == 0) { return Qnil; }
|
545
|
+
|
546
|
+
/* if we're here, there was an error */
|
547
|
+
VALUE exception = rb_funcall(cNokogiriXmlSyntaxError, rb_intern("aggregate"), 1, rb_errors);
|
548
|
+
if (RB_TEST(exception)) {
|
549
|
+
rb_exc_raise(exception);
|
550
|
+
} else {
|
551
|
+
rb_raise(rb_eRuntimeError, "Error pulling: %d", status);
|
552
|
+
}
|
580
553
|
}
|
581
554
|
|
582
555
|
/*
|
@@ -593,7 +566,7 @@ inner_xml(VALUE self)
|
|
593
566
|
xmlChar *value;
|
594
567
|
VALUE str;
|
595
568
|
|
596
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
569
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
597
570
|
|
598
571
|
value = xmlTextReaderReadInnerXml(reader);
|
599
572
|
|
@@ -620,7 +593,7 @@ outer_xml(VALUE self)
|
|
620
593
|
xmlChar *value;
|
621
594
|
VALUE str = Qnil;
|
622
595
|
|
623
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
596
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
624
597
|
|
625
598
|
value = xmlTextReaderReadOuterXml(reader);
|
626
599
|
|
@@ -635,11 +608,13 @@ outer_xml(VALUE self)
|
|
635
608
|
* call-seq:
|
636
609
|
* from_memory(string, url = nil, encoding = nil, options = 0)
|
637
610
|
*
|
638
|
-
* Create a new
|
611
|
+
* Create a new Reader to parse a String.
|
639
612
|
*/
|
640
613
|
static VALUE
|
641
614
|
from_memory(int argc, VALUE *argv, VALUE klass)
|
642
615
|
{
|
616
|
+
/* TODO: deprecate this method, since Reader.new can handle both memory and IO. It can then
|
617
|
+
* become private. */
|
643
618
|
VALUE rb_buffer, rb_url, encoding, rb_options;
|
644
619
|
xmlTextReaderPtr reader;
|
645
620
|
const char *c_url = NULL;
|
@@ -667,7 +642,7 @@ from_memory(int argc, VALUE *argv, VALUE klass)
|
|
667
642
|
rb_raise(rb_eRuntimeError, "couldn't create a parser");
|
668
643
|
}
|
669
644
|
|
670
|
-
rb_reader = TypedData_Wrap_Struct(klass, &
|
645
|
+
rb_reader = TypedData_Wrap_Struct(klass, &xml_text_reader_type, reader);
|
671
646
|
args[0] = rb_buffer;
|
672
647
|
args[1] = rb_url;
|
673
648
|
args[2] = encoding;
|
@@ -680,11 +655,13 @@ from_memory(int argc, VALUE *argv, VALUE klass)
|
|
680
655
|
* call-seq:
|
681
656
|
* from_io(io, url = nil, encoding = nil, options = 0)
|
682
657
|
*
|
683
|
-
* Create a new
|
658
|
+
* Create a new Reader to parse an IO stream.
|
684
659
|
*/
|
685
660
|
static VALUE
|
686
661
|
from_io(int argc, VALUE *argv, VALUE klass)
|
687
662
|
{
|
663
|
+
/* TODO: deprecate this method, since Reader.new can handle both memory and IO. It can then
|
664
|
+
* become private. */
|
688
665
|
VALUE rb_io, rb_url, encoding, rb_options;
|
689
666
|
xmlTextReaderPtr reader;
|
690
667
|
const char *c_url = NULL;
|
@@ -713,7 +690,7 @@ from_io(int argc, VALUE *argv, VALUE klass)
|
|
713
690
|
rb_raise(rb_eRuntimeError, "couldn't create a parser");
|
714
691
|
}
|
715
692
|
|
716
|
-
rb_reader = TypedData_Wrap_Struct(klass, &
|
693
|
+
rb_reader = TypedData_Wrap_Struct(klass, &xml_text_reader_type, reader);
|
717
694
|
args[0] = rb_io;
|
718
695
|
args[1] = rb_url;
|
719
696
|
args[2] = encoding;
|
@@ -733,7 +710,7 @@ empty_element_p(VALUE self)
|
|
733
710
|
{
|
734
711
|
xmlTextReaderPtr reader;
|
735
712
|
|
736
|
-
TypedData_Get_Struct(self, xmlTextReader, &
|
713
|
+
TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
|
737
714
|
|
738
715
|
if (xmlTextReaderIsEmptyElement(reader)) {
|
739
716
|
return Qtrue;
|
@@ -749,25 +726,23 @@ rb_xml_reader_encoding(VALUE rb_reader)
|
|
749
726
|
const char *parser_encoding;
|
750
727
|
VALUE constructor_encoding;
|
751
728
|
|
729
|
+
TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_text_reader_type, c_reader);
|
730
|
+
parser_encoding = (const char *)xmlTextReaderConstEncoding(c_reader);
|
731
|
+
if (parser_encoding) {
|
732
|
+
return NOKOGIRI_STR_NEW2(parser_encoding);
|
733
|
+
}
|
734
|
+
|
752
735
|
constructor_encoding = rb_iv_get(rb_reader, "@encoding");
|
753
736
|
if (RTEST(constructor_encoding)) {
|
754
737
|
return constructor_encoding;
|
755
738
|
}
|
756
739
|
|
757
|
-
|
758
|
-
parser_encoding = (const char *)xmlTextReaderConstEncoding(c_reader);
|
759
|
-
if (parser_encoding == NULL) { return Qnil; }
|
760
|
-
return NOKOGIRI_STR_NEW2(parser_encoding);
|
740
|
+
return Qnil;
|
761
741
|
}
|
762
742
|
|
763
743
|
void
|
764
744
|
noko_init_xml_reader(void)
|
765
745
|
{
|
766
|
-
/*
|
767
|
-
* The Reader parser allows you to effectively pull parse an XML document.
|
768
|
-
* Once instantiated, call Nokogiri::XML::Reader#each to iterate over each
|
769
|
-
* node. Note that you may only iterate over the document once!
|
770
|
-
*/
|
771
746
|
cNokogiriXmlReader = rb_define_class_under(mNokogiriXml, "Reader", rb_cObject);
|
772
747
|
|
773
748
|
rb_undef_alloc_func(cNokogiriXmlReader);
|
@@ -778,7 +753,6 @@ noko_init_xml_reader(void)
|
|
778
753
|
rb_define_method(cNokogiriXmlReader, "attribute", reader_attribute, 1);
|
779
754
|
rb_define_method(cNokogiriXmlReader, "attribute_at", attribute_at, 1);
|
780
755
|
rb_define_method(cNokogiriXmlReader, "attribute_count", attribute_count, 0);
|
781
|
-
rb_define_method(cNokogiriXmlReader, "attribute_nodes", rb_xml_reader_attribute_nodes, 0);
|
782
756
|
rb_define_method(cNokogiriXmlReader, "attribute_hash", rb_xml_reader_attribute_hash, 0);
|
783
757
|
rb_define_method(cNokogiriXmlReader, "attributes?", attributes_eh, 0);
|
784
758
|
rb_define_method(cNokogiriXmlReader, "base_uri", rb_xml_reader_base_uri, 0);
|
data/ext/nokogiri/xml_relax_ng.c
CHANGED
@@ -3,28 +3,22 @@
|
|
3
3
|
VALUE cNokogiriXmlRelaxNG;
|
4
4
|
|
5
5
|
static void
|
6
|
-
|
6
|
+
_noko_xml_relax_ng_deallocate(void *data)
|
7
7
|
{
|
8
8
|
xmlRelaxNGPtr schema = data;
|
9
9
|
xmlRelaxNGFree(schema);
|
10
10
|
}
|
11
11
|
|
12
12
|
static const rb_data_type_t xml_relax_ng_type = {
|
13
|
-
.wrap_struct_name = "
|
13
|
+
.wrap_struct_name = "xmlRelaxNG",
|
14
14
|
.function = {
|
15
|
-
.dfree =
|
15
|
+
.dfree = _noko_xml_relax_ng_deallocate,
|
16
16
|
},
|
17
17
|
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
18
18
|
};
|
19
19
|
|
20
|
-
/*
|
21
|
-
* call-seq:
|
22
|
-
* validate_document(document)
|
23
|
-
*
|
24
|
-
* Validate a Nokogiri::XML::Document against this RelaxNG schema.
|
25
|
-
*/
|
26
20
|
static VALUE
|
27
|
-
|
21
|
+
noko_xml_relax_ng__validate_document(VALUE self, VALUE document)
|
28
22
|
{
|
29
23
|
xmlDocPtr doc;
|
30
24
|
xmlRelaxNGPtr schema;
|
@@ -43,13 +37,11 @@ validate_document(VALUE self, VALUE document)
|
|
43
37
|
rb_raise(rb_eRuntimeError, "Could not create a validation context");
|
44
38
|
}
|
45
39
|
|
46
|
-
#ifdef HAVE_XMLRELAXNGSETVALIDSTRUCTUREDERRORS
|
47
40
|
xmlRelaxNGSetValidStructuredErrors(
|
48
41
|
valid_ctxt,
|
49
|
-
|
42
|
+
noko__error_array_pusher,
|
50
43
|
(void *)errors
|
51
44
|
);
|
52
|
-
#endif
|
53
45
|
|
54
46
|
xmlRelaxNGValidateDoc(valid_ctxt, doc);
|
55
47
|
|
@@ -59,8 +51,8 @@ validate_document(VALUE self, VALUE document)
|
|
59
51
|
}
|
60
52
|
|
61
53
|
static VALUE
|
62
|
-
|
63
|
-
VALUE
|
54
|
+
_noko_xml_relax_ng_parse_schema(
|
55
|
+
VALUE rb_class,
|
64
56
|
xmlRelaxNGParserCtxtPtr c_parser_context,
|
65
57
|
VALUE rb_parse_options
|
66
58
|
)
|
@@ -68,6 +60,7 @@ xml_relax_ng_parse_schema(
|
|
68
60
|
VALUE rb_errors;
|
69
61
|
VALUE rb_schema;
|
70
62
|
xmlRelaxNGPtr c_schema;
|
63
|
+
libxmlStructuredErrorHandlerState handler_state;
|
71
64
|
|
72
65
|
if (NIL_P(rb_parse_options)) {
|
73
66
|
rb_parse_options = rb_const_get_at(
|
@@ -77,33 +70,30 @@ xml_relax_ng_parse_schema(
|
|
77
70
|
}
|
78
71
|
|
79
72
|
rb_errors = rb_ary_new();
|
80
|
-
xmlSetStructuredErrorFunc((void *)rb_errors, Nokogiri_error_array_pusher);
|
81
73
|
|
82
|
-
|
74
|
+
noko__structured_error_func_save_and_set(&handler_state, (void *)rb_errors, noko__error_array_pusher);
|
83
75
|
xmlRelaxNGSetParserStructuredErrors(
|
84
76
|
c_parser_context,
|
85
|
-
|
77
|
+
noko__error_array_pusher,
|
86
78
|
(void *)rb_errors
|
87
79
|
);
|
88
|
-
#endif
|
89
80
|
|
90
81
|
c_schema = xmlRelaxNGParse(c_parser_context);
|
91
82
|
|
92
|
-
xmlSetStructuredErrorFunc(NULL, NULL);
|
93
83
|
xmlRelaxNGFreeParserCtxt(c_parser_context);
|
84
|
+
noko__structured_error_func_restore(&handler_state);
|
94
85
|
|
95
86
|
if (NULL == c_schema) {
|
96
|
-
|
97
|
-
|
98
|
-
|
87
|
+
VALUE exception = rb_funcall(cNokogiriXmlSyntaxError, rb_intern("aggregate"), 1, rb_errors);
|
88
|
+
|
89
|
+
if (RB_TEST(exception)) {
|
90
|
+
rb_exc_raise(exception);
|
99
91
|
} else {
|
100
92
|
rb_raise(rb_eRuntimeError, "Could not parse document");
|
101
93
|
}
|
102
|
-
|
103
|
-
return Qnil;
|
104
94
|
}
|
105
95
|
|
106
|
-
rb_schema = TypedData_Wrap_Struct(
|
96
|
+
rb_schema = TypedData_Wrap_Struct(rb_class, &xml_relax_ng_type, c_schema);
|
107
97
|
rb_iv_set(rb_schema, "@errors", rb_errors);
|
108
98
|
rb_iv_set(rb_schema, "@parse_options", rb_parse_options);
|
109
99
|
|
@@ -111,37 +101,27 @@ xml_relax_ng_parse_schema(
|
|
111
101
|
}
|
112
102
|
|
113
103
|
/*
|
114
|
-
* call-seq:
|
115
|
-
*
|
104
|
+
* :call-seq:
|
105
|
+
* from_document(document) → Nokogiri::XML::RelaxNG
|
106
|
+
* from_document(document, parse_options) → Nokogiri::XML::RelaxNG
|
116
107
|
*
|
117
|
-
*
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
rb_scan_args(argc, argv, "11", &rb_content, &rb_parse_options);
|
127
|
-
|
128
|
-
c_parser_context = xmlRelaxNGNewMemParserCtxt(
|
129
|
-
(const char *)StringValuePtr(rb_content),
|
130
|
-
(int)RSTRING_LEN(rb_content)
|
131
|
-
);
|
132
|
-
|
133
|
-
return xml_relax_ng_parse_schema(klass, c_parser_context, rb_parse_options);
|
134
|
-
}
|
135
|
-
|
136
|
-
/*
|
137
|
-
* call-seq:
|
138
|
-
* from_document(doc)
|
108
|
+
* Parse a RELAX NG schema definition from a Document to create a new Nokogiri::XML::RelaxNG.
|
109
|
+
*
|
110
|
+
* [Parameters]
|
111
|
+
* - +document+ (XML::Document) A document containing the RELAX NG schema definition
|
112
|
+
* - +parse_options+ (Nokogiri::XML::ParseOptions)
|
113
|
+
* Defaults to ParseOptions::DEFAULT_SCHEMA ⚠ Unused
|
114
|
+
*
|
115
|
+
* [Returns] Nokogiri::XML::RelaxNG
|
139
116
|
*
|
140
|
-
*
|
117
|
+
* ⚠ +parse_options+ is currently unused by this method and is present only as a placeholder for
|
118
|
+
* future functionality.
|
141
119
|
*/
|
142
120
|
static VALUE
|
143
|
-
|
121
|
+
noko_xml_relax_ng_s_from_document(int argc, VALUE *argv, VALUE rb_class)
|
144
122
|
{
|
123
|
+
/* TODO: deprecate this method and put file-or-string logic into .new so that becomes the
|
124
|
+
* preferred entry point, and this can become a private method */
|
145
125
|
VALUE rb_document;
|
146
126
|
VALUE rb_parse_options;
|
147
127
|
xmlDocPtr c_document;
|
@@ -154,7 +134,7 @@ from_document(int argc, VALUE *argv, VALUE klass)
|
|
154
134
|
|
155
135
|
c_parser_context = xmlRelaxNGNewDocParserCtxt(c_document);
|
156
136
|
|
157
|
-
return
|
137
|
+
return _noko_xml_relax_ng_parse_schema(rb_class, c_parser_context, rb_parse_options);
|
158
138
|
}
|
159
139
|
|
160
140
|
void
|
@@ -163,8 +143,7 @@ noko_init_xml_relax_ng(void)
|
|
163
143
|
assert(cNokogiriXmlSchema);
|
164
144
|
cNokogiriXmlRelaxNG = rb_define_class_under(mNokogiriXml, "RelaxNG", cNokogiriXmlSchema);
|
165
145
|
|
166
|
-
rb_define_singleton_method(cNokogiriXmlRelaxNG, "
|
167
|
-
rb_define_singleton_method(cNokogiriXmlRelaxNG, "from_document", from_document, -1);
|
146
|
+
rb_define_singleton_method(cNokogiriXmlRelaxNG, "from_document", noko_xml_relax_ng_s_from_document, -1);
|
168
147
|
|
169
|
-
rb_define_private_method(cNokogiriXmlRelaxNG, "validate_document",
|
148
|
+
rb_define_private_method(cNokogiriXmlRelaxNG, "validate_document", noko_xml_relax_ng__validate_document, 1);
|
170
149
|
}
|