nokogiri 1.16.0 → 1.18.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (97) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +14 -16
  3. data/LICENSE-DEPENDENCIES.md +6 -6
  4. data/README.md +8 -5
  5. data/dependencies.yml +9 -9
  6. data/ext/nokogiri/extconf.rb +188 -142
  7. data/ext/nokogiri/gumbo.c +69 -53
  8. data/ext/nokogiri/html4_document.c +10 -4
  9. data/ext/nokogiri/html4_element_description.c +18 -18
  10. data/ext/nokogiri/html4_sax_parser.c +40 -0
  11. data/ext/nokogiri/html4_sax_parser_context.c +48 -58
  12. data/ext/nokogiri/html4_sax_push_parser.c +25 -24
  13. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  14. data/ext/nokogiri/nokogiri.c +9 -2
  15. data/ext/nokogiri/nokogiri.h +18 -33
  16. data/ext/nokogiri/xml_attr.c +1 -1
  17. data/ext/nokogiri/xml_cdata.c +2 -10
  18. data/ext/nokogiri/xml_comment.c +3 -8
  19. data/ext/nokogiri/xml_document.c +167 -156
  20. data/ext/nokogiri/xml_document_fragment.c +10 -25
  21. data/ext/nokogiri/xml_dtd.c +1 -1
  22. data/ext/nokogiri/xml_element_content.c +9 -9
  23. data/ext/nokogiri/xml_encoding_handler.c +4 -4
  24. data/ext/nokogiri/xml_namespace.c +6 -6
  25. data/ext/nokogiri/xml_node.c +141 -104
  26. data/ext/nokogiri/xml_node_set.c +46 -44
  27. data/ext/nokogiri/xml_reader.c +74 -57
  28. data/ext/nokogiri/xml_relax_ng.c +35 -56
  29. data/ext/nokogiri/xml_sax_parser.c +156 -88
  30. data/ext/nokogiri/xml_sax_parser_context.c +219 -131
  31. data/ext/nokogiri/xml_sax_push_parser.c +68 -49
  32. data/ext/nokogiri/xml_schema.c +50 -85
  33. data/ext/nokogiri/xml_syntax_error.c +19 -11
  34. data/ext/nokogiri/xml_text.c +2 -4
  35. data/ext/nokogiri/xml_xpath_context.c +103 -100
  36. data/ext/nokogiri/xslt_stylesheet.c +16 -11
  37. data/gumbo-parser/Makefile +3 -0
  38. data/gumbo-parser/src/ascii.c +2 -2
  39. data/gumbo-parser/src/error.c +76 -48
  40. data/gumbo-parser/src/error.h +5 -1
  41. data/gumbo-parser/src/nokogiri_gumbo.h +11 -2
  42. data/gumbo-parser/src/parser.c +66 -25
  43. data/gumbo-parser/src/tokenizer.c +6 -6
  44. data/lib/nokogiri/class_resolver.rb +1 -1
  45. data/lib/nokogiri/css/node.rb +6 -2
  46. data/lib/nokogiri/css/parser.rb +6 -4
  47. data/lib/nokogiri/css/parser.y +2 -2
  48. data/lib/nokogiri/css/parser_extras.rb +6 -66
  49. data/lib/nokogiri/css/selector_cache.rb +38 -0
  50. data/lib/nokogiri/css/tokenizer.rb +4 -4
  51. data/lib/nokogiri/css/tokenizer.rex +9 -8
  52. data/lib/nokogiri/css/xpath_visitor.rb +42 -6
  53. data/lib/nokogiri/css.rb +86 -20
  54. data/lib/nokogiri/decorators/slop.rb +3 -5
  55. data/lib/nokogiri/encoding_handler.rb +2 -2
  56. data/lib/nokogiri/html4/document.rb +44 -23
  57. data/lib/nokogiri/html4/document_fragment.rb +124 -12
  58. data/lib/nokogiri/html4/encoding_reader.rb +1 -1
  59. data/lib/nokogiri/html4/sax/parser.rb +23 -38
  60. data/lib/nokogiri/html4/sax/parser_context.rb +4 -9
  61. data/lib/nokogiri/html4.rb +9 -14
  62. data/lib/nokogiri/html5/builder.rb +40 -0
  63. data/lib/nokogiri/html5/document.rb +61 -30
  64. data/lib/nokogiri/html5/document_fragment.rb +130 -20
  65. data/lib/nokogiri/html5/node.rb +4 -4
  66. data/lib/nokogiri/html5.rb +114 -72
  67. data/lib/nokogiri/version/constant.rb +1 -1
  68. data/lib/nokogiri/xml/builder.rb +8 -1
  69. data/lib/nokogiri/xml/document.rb +70 -26
  70. data/lib/nokogiri/xml/document_fragment.rb +84 -13
  71. data/lib/nokogiri/xml/node.rb +82 -11
  72. data/lib/nokogiri/xml/node_set.rb +9 -7
  73. data/lib/nokogiri/xml/parse_options.rb +1 -1
  74. data/lib/nokogiri/xml/pp/node.rb +6 -1
  75. data/lib/nokogiri/xml/reader.rb +51 -17
  76. data/lib/nokogiri/xml/relax_ng.rb +57 -20
  77. data/lib/nokogiri/xml/sax/document.rb +174 -83
  78. data/lib/nokogiri/xml/sax/parser.rb +115 -41
  79. data/lib/nokogiri/xml/sax/parser_context.rb +116 -8
  80. data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
  81. data/lib/nokogiri/xml/sax.rb +48 -0
  82. data/lib/nokogiri/xml/schema.rb +112 -45
  83. data/lib/nokogiri/xml/searchable.rb +38 -42
  84. data/lib/nokogiri/xml/syntax_error.rb +22 -0
  85. data/lib/nokogiri/xml/xpath_context.rb +14 -3
  86. data/lib/nokogiri/xml.rb +13 -24
  87. data/lib/nokogiri/xslt/stylesheet.rb +29 -7
  88. data/lib/nokogiri/xslt.rb +3 -9
  89. data/lib/xsd/xmlparser/nokogiri.rb +3 -4
  90. data/patches/libxml2/0019-xpath-Use-separate-static-hash-table-for-standard-fu.patch +244 -0
  91. data/ports/archives/libxml2-2.13.5.tar.xz +0 -0
  92. data/ports/archives/libxslt-1.1.42.tar.xz +0 -0
  93. metadata +13 -14
  94. data/ext/nokogiri/libxml2_backwards_compat.c +0 -121
  95. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +0 -25
  96. data/ports/archives/libxml2-2.12.3.tar.xz +0 -0
  97. data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
@@ -5,12 +5,18 @@ VALUE cNokogiriXmlReader;
5
5
  static void
6
6
  xml_reader_deallocate(void *data)
7
7
  {
8
+ // free the document separately because we _may_ have triggered preservation by calling
9
+ // xmlTextReaderCurrentDoc during a read_more.
8
10
  xmlTextReaderPtr reader = data;
11
+ xmlDocPtr doc = xmlTextReaderCurrentDoc(reader);
9
12
  xmlFreeTextReader(reader);
13
+ if (doc) {
14
+ xmlFreeDoc(doc);
15
+ }
10
16
  }
11
17
 
12
- static const rb_data_type_t xml_reader_type = {
13
- .wrap_struct_name = "Nokogiri::XML::Reader",
18
+ static const rb_data_type_t xml_text_reader_type = {
19
+ .wrap_struct_name = "xmlTextReader",
14
20
  .function = {
15
21
  .dfree = xml_reader_deallocate,
16
22
  },
@@ -78,7 +84,7 @@ default_eh(VALUE self)
78
84
  xmlTextReaderPtr reader;
79
85
  int eh;
80
86
 
81
- TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
87
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
82
88
  eh = xmlTextReaderIsDefault(reader);
83
89
  if (eh == 0) { return Qfalse; }
84
90
  if (eh == 1) { return Qtrue; }
@@ -98,7 +104,7 @@ value_eh(VALUE self)
98
104
  xmlTextReaderPtr reader;
99
105
  int eh;
100
106
 
101
- TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
107
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
102
108
  eh = xmlTextReaderHasValue(reader);
103
109
  if (eh == 0) { return Qfalse; }
104
110
  if (eh == 1) { return Qtrue; }
@@ -118,7 +124,7 @@ attributes_eh(VALUE self)
118
124
  xmlTextReaderPtr reader;
119
125
  int eh;
120
126
 
121
- TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
127
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
122
128
  eh = has_attributes(reader);
123
129
  if (eh == 0) { return Qfalse; }
124
130
  if (eh == 1) { return Qtrue; }
@@ -140,7 +146,7 @@ rb_xml_reader_namespaces(VALUE rb_reader)
140
146
  xmlNodePtr c_node;
141
147
  VALUE rb_errors;
142
148
 
143
- TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
149
+ TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_text_reader_type, c_reader);
144
150
 
145
151
  if (! has_attributes(c_reader)) {
146
152
  return rb_namespaces ;
@@ -148,7 +154,7 @@ rb_xml_reader_namespaces(VALUE rb_reader)
148
154
 
149
155
  rb_errors = rb_funcall(rb_reader, rb_intern("errors"), 0);
150
156
 
151
- xmlSetStructuredErrorFunc((void *)rb_errors, Nokogiri_error_array_pusher);
157
+ xmlSetStructuredErrorFunc((void *)rb_errors, noko__error_array_pusher);
152
158
  c_node = xmlTextReaderExpand(c_reader);
153
159
  xmlSetStructuredErrorFunc(NULL, NULL);
154
160
 
@@ -182,7 +188,7 @@ rb_xml_reader_attribute_hash(VALUE rb_reader)
182
188
  xmlAttrPtr c_property;
183
189
  VALUE rb_errors;
184
190
 
185
- TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
191
+ TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_text_reader_type, c_reader);
186
192
 
187
193
  if (!has_attributes(c_reader)) {
188
194
  return rb_attributes;
@@ -190,7 +196,7 @@ rb_xml_reader_attribute_hash(VALUE rb_reader)
190
196
 
191
197
  rb_errors = rb_funcall(rb_reader, rb_intern("errors"), 0);
192
198
 
193
- xmlSetStructuredErrorFunc((void *)rb_errors, Nokogiri_error_array_pusher);
199
+ xmlSetStructuredErrorFunc((void *)rb_errors, noko__error_array_pusher);
194
200
  c_node = xmlTextReaderExpand(c_reader);
195
201
  xmlSetStructuredErrorFunc(NULL, NULL);
196
202
 
@@ -235,7 +241,7 @@ attribute_at(VALUE self, VALUE index)
235
241
  xmlChar *value;
236
242
  VALUE rb_value;
237
243
 
238
- TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
244
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
239
245
 
240
246
  if (NIL_P(index)) { return Qnil; }
241
247
  index = rb_Integer(index);
@@ -264,7 +270,7 @@ reader_attribute(VALUE self, VALUE name)
264
270
  xmlChar *value ;
265
271
  VALUE rb_value;
266
272
 
267
- TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
273
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
268
274
 
269
275
  if (NIL_P(name)) { return Qnil; }
270
276
  name = StringValue(name) ;
@@ -289,7 +295,7 @@ attribute_count(VALUE self)
289
295
  xmlTextReaderPtr reader;
290
296
  int count;
291
297
 
292
- TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
298
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
293
299
  count = xmlTextReaderAttributeCount(reader);
294
300
  if (count == -1) { return Qnil; }
295
301
 
@@ -308,7 +314,7 @@ depth(VALUE self)
308
314
  xmlTextReaderPtr reader;
309
315
  int depth;
310
316
 
311
- TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
317
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
312
318
  depth = xmlTextReaderDepth(reader);
313
319
  if (depth == -1) { return Qnil; }
314
320
 
@@ -327,7 +333,7 @@ xml_version(VALUE self)
327
333
  xmlTextReaderPtr reader;
328
334
  const char *version;
329
335
 
330
- TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
336
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
331
337
  version = (const char *)xmlTextReaderConstXmlVersion(reader);
332
338
  if (version == NULL) { return Qnil; }
333
339
 
@@ -346,7 +352,7 @@ lang(VALUE self)
346
352
  xmlTextReaderPtr reader;
347
353
  const char *lang;
348
354
 
349
- TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
355
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
350
356
  lang = (const char *)xmlTextReaderConstXmlLang(reader);
351
357
  if (lang == NULL) { return Qnil; }
352
358
 
@@ -365,7 +371,7 @@ value(VALUE self)
365
371
  xmlTextReaderPtr reader;
366
372
  const char *value;
367
373
 
368
- TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
374
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
369
375
  value = (const char *)xmlTextReaderConstValue(reader);
370
376
  if (value == NULL) { return Qnil; }
371
377
 
@@ -384,7 +390,7 @@ prefix(VALUE self)
384
390
  xmlTextReaderPtr reader;
385
391
  const char *prefix;
386
392
 
387
- TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
393
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
388
394
  prefix = (const char *)xmlTextReaderConstPrefix(reader);
389
395
  if (prefix == NULL) { return Qnil; }
390
396
 
@@ -403,7 +409,7 @@ namespace_uri(VALUE self)
403
409
  xmlTextReaderPtr reader;
404
410
  const char *uri;
405
411
 
406
- TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
412
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
407
413
  uri = (const char *)xmlTextReaderConstNamespaceUri(reader);
408
414
  if (uri == NULL) { return Qnil; }
409
415
 
@@ -422,7 +428,7 @@ local_name(VALUE self)
422
428
  xmlTextReaderPtr reader;
423
429
  const char *name;
424
430
 
425
- TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
431
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
426
432
  name = (const char *)xmlTextReaderConstLocalName(reader);
427
433
  if (name == NULL) { return Qnil; }
428
434
 
@@ -441,7 +447,7 @@ name(VALUE self)
441
447
  xmlTextReaderPtr reader;
442
448
  const char *name;
443
449
 
444
- TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
450
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
445
451
  name = (const char *)xmlTextReaderConstName(reader);
446
452
  if (name == NULL) { return Qnil; }
447
453
 
@@ -461,7 +467,7 @@ rb_xml_reader_base_uri(VALUE rb_reader)
461
467
  xmlTextReaderPtr c_reader;
462
468
  xmlChar *c_base_uri;
463
469
 
464
- TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
470
+ TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_text_reader_type, c_reader);
465
471
 
466
472
  c_base_uri = xmlTextReaderBaseUri(c_reader);
467
473
  if (c_base_uri == NULL) {
@@ -484,7 +490,7 @@ static VALUE
484
490
  state(VALUE self)
485
491
  {
486
492
  xmlTextReaderPtr reader;
487
- TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
493
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
488
494
  return INT2NUM(xmlTextReaderReadState(reader));
489
495
  }
490
496
 
@@ -498,7 +504,7 @@ static VALUE
498
504
  node_type(VALUE self)
499
505
  {
500
506
  xmlTextReaderPtr reader;
501
- TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
507
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
502
508
  return INT2NUM(xmlTextReaderNodeType(reader));
503
509
  }
504
510
 
@@ -509,32 +515,41 @@ node_type(VALUE self)
509
515
  * Move the Reader forward through the XML document.
510
516
  */
511
517
  static VALUE
512
- read_more(VALUE self)
518
+ read_more(VALUE rb_reader)
513
519
  {
514
- xmlTextReaderPtr reader;
515
- xmlErrorConstPtr error;
516
- VALUE error_list;
517
- int ret;
520
+ xmlTextReaderPtr c_reader;
521
+ libxmlStructuredErrorHandlerState handler_state;
518
522
 
519
- TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
523
+ TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_text_reader_type, c_reader);
520
524
 
521
- error_list = rb_funcall(self, rb_intern("errors"), 0);
525
+ VALUE rb_errors = rb_funcall(rb_reader, rb_intern("errors"), 0);
526
+ noko__structured_error_func_save_and_set(&handler_state, (void *)rb_errors, noko__error_array_pusher);
522
527
 
523
- xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
524
- ret = xmlTextReaderRead(reader);
525
- xmlSetStructuredErrorFunc(NULL, NULL);
528
+ int status = xmlTextReaderRead(c_reader);
526
529
 
527
- if (ret == 1) { return self; }
528
- if (ret == 0) { return Qnil; }
530
+ noko__structured_error_func_restore(&handler_state);
529
531
 
530
- error = xmlGetLastError();
531
- if (error) {
532
- rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
533
- } else {
534
- rb_raise(rb_eRuntimeError, "Error pulling: %d", ret);
532
+ xmlDocPtr c_document = xmlTextReaderCurrentDoc(c_reader);
533
+ if (c_document && c_document->encoding == NULL) {
534
+ VALUE constructor_encoding = rb_iv_get(rb_reader, "@encoding");
535
+ if (RTEST(constructor_encoding)) {
536
+ c_document->encoding = xmlStrdup(BAD_CAST StringValueCStr(constructor_encoding));
537
+ } else {
538
+ rb_iv_set(rb_reader, "@encoding", NOKOGIRI_STR_NEW2("UTF-8"));
539
+ c_document->encoding = xmlStrdup(BAD_CAST "UTF-8");
540
+ }
535
541
  }
536
542
 
537
- return Qnil;
543
+ if (status == 1) { return rb_reader; }
544
+ if (status == 0) { return Qnil; }
545
+
546
+ /* if we're here, there was an error */
547
+ VALUE exception = rb_funcall(cNokogiriXmlSyntaxError, rb_intern("aggregate"), 1, rb_errors);
548
+ if (RB_TEST(exception)) {
549
+ rb_exc_raise(exception);
550
+ } else {
551
+ rb_raise(rb_eRuntimeError, "Error pulling: %d", status);
552
+ }
538
553
  }
539
554
 
540
555
  /*
@@ -551,7 +566,7 @@ inner_xml(VALUE self)
551
566
  xmlChar *value;
552
567
  VALUE str;
553
568
 
554
- TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
569
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
555
570
 
556
571
  value = xmlTextReaderReadInnerXml(reader);
557
572
 
@@ -578,7 +593,7 @@ outer_xml(VALUE self)
578
593
  xmlChar *value;
579
594
  VALUE str = Qnil;
580
595
 
581
- TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
596
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
582
597
 
583
598
  value = xmlTextReaderReadOuterXml(reader);
584
599
 
@@ -593,11 +608,13 @@ outer_xml(VALUE self)
593
608
  * call-seq:
594
609
  * from_memory(string, url = nil, encoding = nil, options = 0)
595
610
  *
596
- * Create a new reader that parses +string+
611
+ * Create a new Reader to parse a String.
597
612
  */
598
613
  static VALUE
599
614
  from_memory(int argc, VALUE *argv, VALUE klass)
600
615
  {
616
+ /* TODO: deprecate this method, since Reader.new can handle both memory and IO. It can then
617
+ * become private. */
601
618
  VALUE rb_buffer, rb_url, encoding, rb_options;
602
619
  xmlTextReaderPtr reader;
603
620
  const char *c_url = NULL;
@@ -625,7 +642,7 @@ from_memory(int argc, VALUE *argv, VALUE klass)
625
642
  rb_raise(rb_eRuntimeError, "couldn't create a parser");
626
643
  }
627
644
 
628
- rb_reader = TypedData_Wrap_Struct(klass, &xml_reader_type, reader);
645
+ rb_reader = TypedData_Wrap_Struct(klass, &xml_text_reader_type, reader);
629
646
  args[0] = rb_buffer;
630
647
  args[1] = rb_url;
631
648
  args[2] = encoding;
@@ -638,11 +655,13 @@ from_memory(int argc, VALUE *argv, VALUE klass)
638
655
  * call-seq:
639
656
  * from_io(io, url = nil, encoding = nil, options = 0)
640
657
  *
641
- * Create a new reader that parses +io+
658
+ * Create a new Reader to parse an IO stream.
642
659
  */
643
660
  static VALUE
644
661
  from_io(int argc, VALUE *argv, VALUE klass)
645
662
  {
663
+ /* TODO: deprecate this method, since Reader.new can handle both memory and IO. It can then
664
+ * become private. */
646
665
  VALUE rb_io, rb_url, encoding, rb_options;
647
666
  xmlTextReaderPtr reader;
648
667
  const char *c_url = NULL;
@@ -671,7 +690,7 @@ from_io(int argc, VALUE *argv, VALUE klass)
671
690
  rb_raise(rb_eRuntimeError, "couldn't create a parser");
672
691
  }
673
692
 
674
- rb_reader = TypedData_Wrap_Struct(klass, &xml_reader_type, reader);
693
+ rb_reader = TypedData_Wrap_Struct(klass, &xml_text_reader_type, reader);
675
694
  args[0] = rb_io;
676
695
  args[1] = rb_url;
677
696
  args[2] = encoding;
@@ -691,7 +710,7 @@ empty_element_p(VALUE self)
691
710
  {
692
711
  xmlTextReaderPtr reader;
693
712
 
694
- TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
713
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
695
714
 
696
715
  if (xmlTextReaderIsEmptyElement(reader)) {
697
716
  return Qtrue;
@@ -707,25 +726,23 @@ rb_xml_reader_encoding(VALUE rb_reader)
707
726
  const char *parser_encoding;
708
727
  VALUE constructor_encoding;
709
728
 
729
+ TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_text_reader_type, c_reader);
730
+ parser_encoding = (const char *)xmlTextReaderConstEncoding(c_reader);
731
+ if (parser_encoding) {
732
+ return NOKOGIRI_STR_NEW2(parser_encoding);
733
+ }
734
+
710
735
  constructor_encoding = rb_iv_get(rb_reader, "@encoding");
711
736
  if (RTEST(constructor_encoding)) {
712
737
  return constructor_encoding;
713
738
  }
714
739
 
715
- TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
716
- parser_encoding = (const char *)xmlTextReaderConstEncoding(c_reader);
717
- if (parser_encoding == NULL) { return Qnil; }
718
- return NOKOGIRI_STR_NEW2(parser_encoding);
740
+ return Qnil;
719
741
  }
720
742
 
721
743
  void
722
744
  noko_init_xml_reader(void)
723
745
  {
724
- /*
725
- * The Reader parser allows you to effectively pull parse an XML document.
726
- * Once instantiated, call Nokogiri::XML::Reader#each to iterate over each
727
- * node. Note that you may only iterate over the document once!
728
- */
729
746
  cNokogiriXmlReader = rb_define_class_under(mNokogiriXml, "Reader", rb_cObject);
730
747
 
731
748
  rb_undef_alloc_func(cNokogiriXmlReader);
@@ -3,28 +3,22 @@
3
3
  VALUE cNokogiriXmlRelaxNG;
4
4
 
5
5
  static void
6
- xml_relax_ng_deallocate(void *data)
6
+ _noko_xml_relax_ng_deallocate(void *data)
7
7
  {
8
8
  xmlRelaxNGPtr schema = data;
9
9
  xmlRelaxNGFree(schema);
10
10
  }
11
11
 
12
12
  static const rb_data_type_t xml_relax_ng_type = {
13
- .wrap_struct_name = "Nokogiri::XML::RelaxNG",
13
+ .wrap_struct_name = "xmlRelaxNG",
14
14
  .function = {
15
- .dfree = xml_relax_ng_deallocate,
15
+ .dfree = _noko_xml_relax_ng_deallocate,
16
16
  },
17
17
  .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
18
18
  };
19
19
 
20
- /*
21
- * call-seq:
22
- * validate_document(document)
23
- *
24
- * Validate a Nokogiri::XML::Document against this RelaxNG schema.
25
- */
26
20
  static VALUE
27
- validate_document(VALUE self, VALUE document)
21
+ noko_xml_relax_ng__validate_document(VALUE self, VALUE document)
28
22
  {
29
23
  xmlDocPtr doc;
30
24
  xmlRelaxNGPtr schema;
@@ -43,13 +37,11 @@ validate_document(VALUE self, VALUE document)
43
37
  rb_raise(rb_eRuntimeError, "Could not create a validation context");
44
38
  }
45
39
 
46
- #ifdef HAVE_XMLRELAXNGSETVALIDSTRUCTUREDERRORS
47
40
  xmlRelaxNGSetValidStructuredErrors(
48
41
  valid_ctxt,
49
- Nokogiri_error_array_pusher,
42
+ noko__error_array_pusher,
50
43
  (void *)errors
51
44
  );
52
- #endif
53
45
 
54
46
  xmlRelaxNGValidateDoc(valid_ctxt, doc);
55
47
 
@@ -59,8 +51,8 @@ validate_document(VALUE self, VALUE document)
59
51
  }
60
52
 
61
53
  static VALUE
62
- xml_relax_ng_parse_schema(
63
- VALUE klass,
54
+ _noko_xml_relax_ng_parse_schema(
55
+ VALUE rb_class,
64
56
  xmlRelaxNGParserCtxtPtr c_parser_context,
65
57
  VALUE rb_parse_options
66
58
  )
@@ -68,6 +60,7 @@ xml_relax_ng_parse_schema(
68
60
  VALUE rb_errors;
69
61
  VALUE rb_schema;
70
62
  xmlRelaxNGPtr c_schema;
63
+ libxmlStructuredErrorHandlerState handler_state;
71
64
 
72
65
  if (NIL_P(rb_parse_options)) {
73
66
  rb_parse_options = rb_const_get_at(
@@ -77,33 +70,30 @@ xml_relax_ng_parse_schema(
77
70
  }
78
71
 
79
72
  rb_errors = rb_ary_new();
80
- xmlSetStructuredErrorFunc((void *)rb_errors, Nokogiri_error_array_pusher);
81
73
 
82
- #ifdef HAVE_XMLRELAXNGSETPARSERSTRUCTUREDERRORS
74
+ noko__structured_error_func_save_and_set(&handler_state, (void *)rb_errors, noko__error_array_pusher);
83
75
  xmlRelaxNGSetParserStructuredErrors(
84
76
  c_parser_context,
85
- Nokogiri_error_array_pusher,
77
+ noko__error_array_pusher,
86
78
  (void *)rb_errors
87
79
  );
88
- #endif
89
80
 
90
81
  c_schema = xmlRelaxNGParse(c_parser_context);
91
82
 
92
- xmlSetStructuredErrorFunc(NULL, NULL);
93
83
  xmlRelaxNGFreeParserCtxt(c_parser_context);
84
+ noko__structured_error_func_restore(&handler_state);
94
85
 
95
86
  if (NULL == c_schema) {
96
- xmlErrorConstPtr error = xmlGetLastError();
97
- if (error) {
98
- Nokogiri_error_raise(NULL, error);
87
+ VALUE exception = rb_funcall(cNokogiriXmlSyntaxError, rb_intern("aggregate"), 1, rb_errors);
88
+
89
+ if (RB_TEST(exception)) {
90
+ rb_exc_raise(exception);
99
91
  } else {
100
92
  rb_raise(rb_eRuntimeError, "Could not parse document");
101
93
  }
102
-
103
- return Qnil;
104
94
  }
105
95
 
106
- rb_schema = TypedData_Wrap_Struct(klass, &xml_relax_ng_type, c_schema);
96
+ rb_schema = TypedData_Wrap_Struct(rb_class, &xml_relax_ng_type, c_schema);
107
97
  rb_iv_set(rb_schema, "@errors", rb_errors);
108
98
  rb_iv_set(rb_schema, "@parse_options", rb_parse_options);
109
99
 
@@ -111,37 +101,27 @@ xml_relax_ng_parse_schema(
111
101
  }
112
102
 
113
103
  /*
114
- * call-seq:
115
- * read_memory(string)
104
+ * :call-seq:
105
+ * from_document(document) → Nokogiri::XML::RelaxNG
106
+ * from_document(document, parse_options) → Nokogiri::XML::RelaxNG
116
107
  *
117
- * Create a new RelaxNG from the contents of +string+
118
- */
119
- static VALUE
120
- read_memory(int argc, VALUE *argv, VALUE klass)
121
- {
122
- VALUE rb_content;
123
- VALUE rb_parse_options;
124
- xmlRelaxNGParserCtxtPtr c_parser_context;
125
-
126
- rb_scan_args(argc, argv, "11", &rb_content, &rb_parse_options);
127
-
128
- c_parser_context = xmlRelaxNGNewMemParserCtxt(
129
- (const char *)StringValuePtr(rb_content),
130
- (int)RSTRING_LEN(rb_content)
131
- );
132
-
133
- return xml_relax_ng_parse_schema(klass, c_parser_context, rb_parse_options);
134
- }
135
-
136
- /*
137
- * call-seq:
138
- * from_document(doc)
108
+ * Parse a RELAX NG schema definition from a Document to create a new Nokogiri::XML::RelaxNG.
109
+ *
110
+ * [Parameters]
111
+ * - +document+ (XML::Document) A document containing the RELAX NG schema definition
112
+ * - +parse_options+ (Nokogiri::XML::ParseOptions)
113
+ * Defaults to ParseOptions::DEFAULT_SCHEMA ⚠ Unused
114
+ *
115
+ * [Returns] Nokogiri::XML::RelaxNG
139
116
  *
140
- * Create a new RelaxNG schema from the Nokogiri::XML::Document +doc+
117
+ * +parse_options+ is currently unused by this method and is present only as a placeholder for
118
+ * future functionality.
141
119
  */
142
120
  static VALUE
143
- from_document(int argc, VALUE *argv, VALUE klass)
121
+ noko_xml_relax_ng_s_from_document(int argc, VALUE *argv, VALUE rb_class)
144
122
  {
123
+ /* TODO: deprecate this method and put file-or-string logic into .new so that becomes the
124
+ * preferred entry point, and this can become a private method */
145
125
  VALUE rb_document;
146
126
  VALUE rb_parse_options;
147
127
  xmlDocPtr c_document;
@@ -154,7 +134,7 @@ from_document(int argc, VALUE *argv, VALUE klass)
154
134
 
155
135
  c_parser_context = xmlRelaxNGNewDocParserCtxt(c_document);
156
136
 
157
- return xml_relax_ng_parse_schema(klass, c_parser_context, rb_parse_options);
137
+ return _noko_xml_relax_ng_parse_schema(rb_class, c_parser_context, rb_parse_options);
158
138
  }
159
139
 
160
140
  void
@@ -163,8 +143,7 @@ noko_init_xml_relax_ng(void)
163
143
  assert(cNokogiriXmlSchema);
164
144
  cNokogiriXmlRelaxNG = rb_define_class_under(mNokogiriXml, "RelaxNG", cNokogiriXmlSchema);
165
145
 
166
- rb_define_singleton_method(cNokogiriXmlRelaxNG, "read_memory", read_memory, -1);
167
- rb_define_singleton_method(cNokogiriXmlRelaxNG, "from_document", from_document, -1);
146
+ rb_define_singleton_method(cNokogiriXmlRelaxNG, "from_document", noko_xml_relax_ng_s_from_document, -1);
168
147
 
169
- rb_define_private_method(cNokogiriXmlRelaxNG, "validate_document", validate_document, 1);
148
+ rb_define_private_method(cNokogiriXmlRelaxNG, "validate_document", noko_xml_relax_ng__validate_document, 1);
170
149
  }