nokogiri 1.14.2 → 1.16.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (85) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +19 -15
  3. data/README.md +4 -1
  4. data/dependencies.yml +9 -8
  5. data/ext/nokogiri/extconf.rb +69 -26
  6. data/ext/nokogiri/html4_document.c +1 -2
  7. data/ext/nokogiri/html4_element_description.c +19 -14
  8. data/ext/nokogiri/html4_sax_parser_context.c +10 -16
  9. data/ext/nokogiri/html4_sax_push_parser.c +3 -3
  10. data/ext/nokogiri/nokogiri.c +46 -24
  11. data/ext/nokogiri/nokogiri.h +23 -5
  12. data/ext/nokogiri/test_global_handlers.c +1 -1
  13. data/ext/nokogiri/xml_attr.c +1 -1
  14. data/ext/nokogiri/xml_cdata.c +30 -17
  15. data/ext/nokogiri/xml_comment.c +1 -1
  16. data/ext/nokogiri/xml_document.c +113 -25
  17. data/ext/nokogiri/xml_document_fragment.c +1 -1
  18. data/ext/nokogiri/xml_dtd.c +1 -1
  19. data/ext/nokogiri/xml_element_content.c +32 -29
  20. data/ext/nokogiri/xml_element_decl.c +5 -5
  21. data/ext/nokogiri/xml_encoding_handler.c +12 -4
  22. data/ext/nokogiri/xml_entity_reference.c +1 -1
  23. data/ext/nokogiri/xml_namespace.c +11 -16
  24. data/ext/nokogiri/xml_node.c +13 -16
  25. data/ext/nokogiri/xml_node_set.c +125 -105
  26. data/ext/nokogiri/xml_processing_instruction.c +1 -1
  27. data/ext/nokogiri/xml_reader.c +61 -74
  28. data/ext/nokogiri/xml_relax_ng.c +66 -79
  29. data/ext/nokogiri/xml_sax_parser.c +24 -5
  30. data/ext/nokogiri/xml_sax_parser_context.c +50 -25
  31. data/ext/nokogiri/xml_sax_push_parser.c +30 -9
  32. data/ext/nokogiri/xml_schema.c +94 -115
  33. data/ext/nokogiri/xml_syntax_error.c +3 -3
  34. data/ext/nokogiri/xml_text.c +26 -13
  35. data/ext/nokogiri/xml_xpath_context.c +153 -83
  36. data/ext/nokogiri/xslt_stylesheet.c +111 -53
  37. data/gumbo-parser/Makefile +18 -0
  38. data/gumbo-parser/src/error.c +8 -4
  39. data/gumbo-parser/src/foreign_attrs.c +13 -14
  40. data/gumbo-parser/src/foreign_attrs.gperf +1 -1
  41. data/gumbo-parser/src/parser.c +21 -5
  42. data/gumbo-parser/src/tokenizer.c +1 -0
  43. data/lib/nokogiri/css/parser_extras.rb +1 -1
  44. data/lib/nokogiri/css/xpath_visitor.rb +3 -23
  45. data/lib/nokogiri/extension.rb +1 -1
  46. data/lib/nokogiri/html4/document.rb +1 -1
  47. data/lib/nokogiri/html4/document_fragment.rb +1 -1
  48. data/lib/nokogiri/html4/element_description_defaults.rb +1821 -353
  49. data/lib/nokogiri/html4/encoding_reader.rb +1 -1
  50. data/lib/nokogiri/html5/document_fragment.rb +1 -1
  51. data/lib/nokogiri/html5/node.rb +5 -0
  52. data/lib/nokogiri/html5.rb +0 -63
  53. data/lib/nokogiri/jruby/nokogiri_jars.rb +9 -9
  54. data/lib/nokogiri/version/constant.rb +1 -1
  55. data/lib/nokogiri/version/info.rb +6 -5
  56. data/lib/nokogiri/xml/attr.rb +2 -2
  57. data/lib/nokogiri/xml/attribute_decl.rb +4 -2
  58. data/lib/nokogiri/xml/document.rb +4 -5
  59. data/lib/nokogiri/xml/document_fragment.rb +3 -3
  60. data/lib/nokogiri/xml/element_content.rb +10 -2
  61. data/lib/nokogiri/xml/element_decl.rb +4 -2
  62. data/lib/nokogiri/xml/entity_decl.rb +4 -2
  63. data/lib/nokogiri/xml/namespace.rb +1 -2
  64. data/lib/nokogiri/xml/node/save_options.rb +8 -0
  65. data/lib/nokogiri/xml/node.rb +53 -37
  66. data/lib/nokogiri/xml/node_set.rb +3 -3
  67. data/lib/nokogiri/xml/pp/node.rb +23 -12
  68. data/lib/nokogiri/xml/reader.rb +10 -9
  69. data/lib/nokogiri/xml/sax/document.rb +1 -1
  70. data/lib/nokogiri/xml/searchable.rb +21 -13
  71. data/lib/nokogiri/xml/syntax_error.rb +1 -1
  72. data/lib/nokogiri/xml.rb +1 -1
  73. data/lib/nokogiri/xslt/stylesheet.rb +29 -7
  74. data/lib/nokogiri/xslt.rb +74 -4
  75. data/lib/nokogiri.rb +13 -5
  76. data/lib/xsd/xmlparser/nokogiri.rb +1 -1
  77. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  78. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  79. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  80. data/ports/archives/libxml2-2.12.8.tar.xz +0 -0
  81. data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
  82. metadata +16 -12
  83. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +0 -3037
  84. data/ports/archives/libxml2-2.10.3.tar.xz +0 -0
  85. data/ports/archives/libxslt-1.1.37.tar.xz +0 -0
@@ -3,11 +3,26 @@
3
3
  VALUE cNokogiriXmlReader;
4
4
 
5
5
  static void
6
- dealloc(xmlTextReaderPtr reader)
6
+ xml_reader_deallocate(void *data)
7
7
  {
8
+ // free the document separately because we _may_ have triggered preservation by calling
9
+ // xmlTextReaderCurrentDoc during a read_more.
10
+ xmlTextReaderPtr reader = data;
11
+ xmlDocPtr doc = xmlTextReaderCurrentDoc(reader);
8
12
  xmlFreeTextReader(reader);
13
+ if (doc) {
14
+ xmlFreeDoc(doc);
15
+ }
9
16
  }
10
17
 
18
+ static const rb_data_type_t xml_reader_type = {
19
+ .wrap_struct_name = "Nokogiri::XML::Reader",
20
+ .function = {
21
+ .dfree = xml_reader_deallocate,
22
+ },
23
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
24
+ };
25
+
11
26
  static int
12
27
  has_attributes(xmlTextReaderPtr reader)
13
28
  {
@@ -69,7 +84,7 @@ default_eh(VALUE self)
69
84
  xmlTextReaderPtr reader;
70
85
  int eh;
71
86
 
72
- Data_Get_Struct(self, xmlTextReader, reader);
87
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
73
88
  eh = xmlTextReaderIsDefault(reader);
74
89
  if (eh == 0) { return Qfalse; }
75
90
  if (eh == 1) { return Qtrue; }
@@ -89,7 +104,7 @@ value_eh(VALUE self)
89
104
  xmlTextReaderPtr reader;
90
105
  int eh;
91
106
 
92
- Data_Get_Struct(self, xmlTextReader, reader);
107
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
93
108
  eh = xmlTextReaderHasValue(reader);
94
109
  if (eh == 0) { return Qfalse; }
95
110
  if (eh == 1) { return Qtrue; }
@@ -109,7 +124,7 @@ attributes_eh(VALUE self)
109
124
  xmlTextReaderPtr reader;
110
125
  int eh;
111
126
 
112
- Data_Get_Struct(self, xmlTextReader, reader);
127
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
113
128
  eh = has_attributes(reader);
114
129
  if (eh == 0) { return Qfalse; }
115
130
  if (eh == 1) { return Qtrue; }
@@ -131,7 +146,7 @@ rb_xml_reader_namespaces(VALUE rb_reader)
131
146
  xmlNodePtr c_node;
132
147
  VALUE rb_errors;
133
148
 
134
- Data_Get_Struct(rb_reader, xmlTextReader, c_reader);
149
+ TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
135
150
 
136
151
  if (! has_attributes(c_reader)) {
137
152
  return rb_namespaces ;
@@ -157,48 +172,6 @@ rb_xml_reader_namespaces(VALUE rb_reader)
157
172
  return rb_namespaces ;
158
173
  }
159
174
 
160
- /*
161
- :call-seq: attribute_nodes() → Array<Nokogiri::XML::Attr>
162
-
163
- Get the attributes of the current node as an Array of XML:Attr
164
-
165
- ⚠ This method is deprecated and unsafe to use. It will be removed in a future version of Nokogiri.
166
-
167
- See related: #attribute_hash, #attributes
168
- */
169
- static VALUE
170
- rb_xml_reader_attribute_nodes(VALUE rb_reader)
171
- {
172
- xmlTextReaderPtr c_reader;
173
- xmlNodePtr c_node;
174
- VALUE attr_nodes;
175
- int j;
176
-
177
- // TODO: deprecated, remove in Nokogiri v1.15, see https://github.com/sparklemotion/nokogiri/issues/2598
178
- // After removal, we can also remove all the "node_has_a_document" special handling from xml_node.c
179
- NOKO_WARN_DEPRECATION("Reader#attribute_nodes is deprecated and will be removed in a future version of Nokogiri. Please use Reader#attribute_hash instead.");
180
-
181
- Data_Get_Struct(rb_reader, xmlTextReader, c_reader);
182
-
183
- if (! has_attributes(c_reader)) {
184
- return rb_ary_new() ;
185
- }
186
-
187
- c_node = xmlTextReaderExpand(c_reader);
188
- if (c_node == NULL) {
189
- return Qnil;
190
- }
191
-
192
- attr_nodes = noko_xml_node_attrs(c_node);
193
-
194
- /* ensure that the Reader won't be GCed as long as a node is referenced */
195
- for (j = 0 ; j < RARRAY_LEN(attr_nodes) ; j++) {
196
- rb_iv_set(rb_ary_entry(attr_nodes, j), "@reader", rb_reader);
197
- }
198
-
199
- return attr_nodes;
200
- }
201
-
202
175
  /*
203
176
  :call-seq: attribute_hash() → Hash<String ⇒ String>
204
177
 
@@ -215,7 +188,7 @@ rb_xml_reader_attribute_hash(VALUE rb_reader)
215
188
  xmlAttrPtr c_property;
216
189
  VALUE rb_errors;
217
190
 
218
- Data_Get_Struct(rb_reader, xmlTextReader, c_reader);
191
+ TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
219
192
 
220
193
  if (!has_attributes(c_reader)) {
221
194
  return rb_attributes;
@@ -268,7 +241,7 @@ attribute_at(VALUE self, VALUE index)
268
241
  xmlChar *value;
269
242
  VALUE rb_value;
270
243
 
271
- Data_Get_Struct(self, xmlTextReader, reader);
244
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
272
245
 
273
246
  if (NIL_P(index)) { return Qnil; }
274
247
  index = rb_Integer(index);
@@ -297,7 +270,7 @@ reader_attribute(VALUE self, VALUE name)
297
270
  xmlChar *value ;
298
271
  VALUE rb_value;
299
272
 
300
- Data_Get_Struct(self, xmlTextReader, reader);
273
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
301
274
 
302
275
  if (NIL_P(name)) { return Qnil; }
303
276
  name = StringValue(name) ;
@@ -322,7 +295,7 @@ attribute_count(VALUE self)
322
295
  xmlTextReaderPtr reader;
323
296
  int count;
324
297
 
325
- Data_Get_Struct(self, xmlTextReader, reader);
298
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
326
299
  count = xmlTextReaderAttributeCount(reader);
327
300
  if (count == -1) { return Qnil; }
328
301
 
@@ -341,7 +314,7 @@ depth(VALUE self)
341
314
  xmlTextReaderPtr reader;
342
315
  int depth;
343
316
 
344
- Data_Get_Struct(self, xmlTextReader, reader);
317
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
345
318
  depth = xmlTextReaderDepth(reader);
346
319
  if (depth == -1) { return Qnil; }
347
320
 
@@ -360,7 +333,7 @@ xml_version(VALUE self)
360
333
  xmlTextReaderPtr reader;
361
334
  const char *version;
362
335
 
363
- Data_Get_Struct(self, xmlTextReader, reader);
336
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
364
337
  version = (const char *)xmlTextReaderConstXmlVersion(reader);
365
338
  if (version == NULL) { return Qnil; }
366
339
 
@@ -379,7 +352,7 @@ lang(VALUE self)
379
352
  xmlTextReaderPtr reader;
380
353
  const char *lang;
381
354
 
382
- Data_Get_Struct(self, xmlTextReader, reader);
355
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
383
356
  lang = (const char *)xmlTextReaderConstXmlLang(reader);
384
357
  if (lang == NULL) { return Qnil; }
385
358
 
@@ -398,7 +371,7 @@ value(VALUE self)
398
371
  xmlTextReaderPtr reader;
399
372
  const char *value;
400
373
 
401
- Data_Get_Struct(self, xmlTextReader, reader);
374
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
402
375
  value = (const char *)xmlTextReaderConstValue(reader);
403
376
  if (value == NULL) { return Qnil; }
404
377
 
@@ -417,7 +390,7 @@ prefix(VALUE self)
417
390
  xmlTextReaderPtr reader;
418
391
  const char *prefix;
419
392
 
420
- Data_Get_Struct(self, xmlTextReader, reader);
393
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
421
394
  prefix = (const char *)xmlTextReaderConstPrefix(reader);
422
395
  if (prefix == NULL) { return Qnil; }
423
396
 
@@ -436,7 +409,7 @@ namespace_uri(VALUE self)
436
409
  xmlTextReaderPtr reader;
437
410
  const char *uri;
438
411
 
439
- Data_Get_Struct(self, xmlTextReader, reader);
412
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
440
413
  uri = (const char *)xmlTextReaderConstNamespaceUri(reader);
441
414
  if (uri == NULL) { return Qnil; }
442
415
 
@@ -455,7 +428,7 @@ local_name(VALUE self)
455
428
  xmlTextReaderPtr reader;
456
429
  const char *name;
457
430
 
458
- Data_Get_Struct(self, xmlTextReader, reader);
431
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
459
432
  name = (const char *)xmlTextReaderConstLocalName(reader);
460
433
  if (name == NULL) { return Qnil; }
461
434
 
@@ -474,7 +447,7 @@ name(VALUE self)
474
447
  xmlTextReaderPtr reader;
475
448
  const char *name;
476
449
 
477
- Data_Get_Struct(self, xmlTextReader, reader);
450
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
478
451
  name = (const char *)xmlTextReaderConstName(reader);
479
452
  if (name == NULL) { return Qnil; }
480
453
 
@@ -494,7 +467,7 @@ rb_xml_reader_base_uri(VALUE rb_reader)
494
467
  xmlTextReaderPtr c_reader;
495
468
  xmlChar *c_base_uri;
496
469
 
497
- Data_Get_Struct(rb_reader, xmlTextReader, c_reader);
470
+ TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
498
471
 
499
472
  c_base_uri = xmlTextReaderBaseUri(c_reader);
500
473
  if (c_base_uri == NULL) {
@@ -517,7 +490,7 @@ static VALUE
517
490
  state(VALUE self)
518
491
  {
519
492
  xmlTextReaderPtr reader;
520
- Data_Get_Struct(self, xmlTextReader, reader);
493
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
521
494
  return INT2NUM(xmlTextReaderReadState(reader));
522
495
  }
523
496
 
@@ -531,7 +504,7 @@ static VALUE
531
504
  node_type(VALUE self)
532
505
  {
533
506
  xmlTextReaderPtr reader;
534
- Data_Get_Struct(self, xmlTextReader, reader);
507
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
535
508
  return INT2NUM(xmlTextReaderNodeType(reader));
536
509
  }
537
510
 
@@ -545,11 +518,12 @@ static VALUE
545
518
  read_more(VALUE self)
546
519
  {
547
520
  xmlTextReaderPtr reader;
548
- xmlErrorPtr error;
521
+ xmlErrorConstPtr error;
549
522
  VALUE error_list;
550
523
  int ret;
524
+ xmlDocPtr c_document;
551
525
 
552
- Data_Get_Struct(self, xmlTextReader, reader);
526
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
553
527
 
554
528
  error_list = rb_funcall(self, rb_intern("errors"), 0);
555
529
 
@@ -557,6 +531,17 @@ read_more(VALUE self)
557
531
  ret = xmlTextReaderRead(reader);
558
532
  xmlSetStructuredErrorFunc(NULL, NULL);
559
533
 
534
+ c_document = xmlTextReaderCurrentDoc(reader);
535
+ if (c_document && c_document->encoding == NULL) {
536
+ VALUE constructor_encoding = rb_iv_get(self, "@encoding");
537
+ if (RTEST(constructor_encoding)) {
538
+ c_document->encoding = xmlStrdup(BAD_CAST StringValueCStr(constructor_encoding));
539
+ } else {
540
+ rb_iv_set(self, "@encoding", NOKOGIRI_STR_NEW2("UTF-8"));
541
+ c_document->encoding = xmlStrdup(BAD_CAST "UTF-8");
542
+ }
543
+ }
544
+
560
545
  if (ret == 1) { return self; }
561
546
  if (ret == 0) { return Qnil; }
562
547
 
@@ -584,7 +569,7 @@ inner_xml(VALUE self)
584
569
  xmlChar *value;
585
570
  VALUE str;
586
571
 
587
- Data_Get_Struct(self, xmlTextReader, reader);
572
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
588
573
 
589
574
  value = xmlTextReaderReadInnerXml(reader);
590
575
 
@@ -611,7 +596,7 @@ outer_xml(VALUE self)
611
596
  xmlChar *value;
612
597
  VALUE str = Qnil;
613
598
 
614
- Data_Get_Struct(self, xmlTextReader, reader);
599
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
615
600
 
616
601
  value = xmlTextReaderReadOuterXml(reader);
617
602
 
@@ -658,7 +643,7 @@ from_memory(int argc, VALUE *argv, VALUE klass)
658
643
  rb_raise(rb_eRuntimeError, "couldn't create a parser");
659
644
  }
660
645
 
661
- rb_reader = Data_Wrap_Struct(klass, NULL, dealloc, reader);
646
+ rb_reader = TypedData_Wrap_Struct(klass, &xml_reader_type, reader);
662
647
  args[0] = rb_buffer;
663
648
  args[1] = rb_url;
664
649
  args[2] = encoding;
@@ -704,7 +689,7 @@ from_io(int argc, VALUE *argv, VALUE klass)
704
689
  rb_raise(rb_eRuntimeError, "couldn't create a parser");
705
690
  }
706
691
 
707
- rb_reader = Data_Wrap_Struct(klass, NULL, dealloc, reader);
692
+ rb_reader = TypedData_Wrap_Struct(klass, &xml_reader_type, reader);
708
693
  args[0] = rb_io;
709
694
  args[1] = rb_url;
710
695
  args[2] = encoding;
@@ -724,7 +709,7 @@ empty_element_p(VALUE self)
724
709
  {
725
710
  xmlTextReaderPtr reader;
726
711
 
727
- Data_Get_Struct(self, xmlTextReader, reader);
712
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
728
713
 
729
714
  if (xmlTextReaderIsEmptyElement(reader)) {
730
715
  return Qtrue;
@@ -740,15 +725,18 @@ rb_xml_reader_encoding(VALUE rb_reader)
740
725
  const char *parser_encoding;
741
726
  VALUE constructor_encoding;
742
727
 
728
+ TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
729
+ parser_encoding = (const char *)xmlTextReaderConstEncoding(c_reader);
730
+ if (parser_encoding) {
731
+ return NOKOGIRI_STR_NEW2(parser_encoding);
732
+ }
733
+
743
734
  constructor_encoding = rb_iv_get(rb_reader, "@encoding");
744
735
  if (RTEST(constructor_encoding)) {
745
736
  return constructor_encoding;
746
737
  }
747
738
 
748
- Data_Get_Struct(rb_reader, xmlTextReader, c_reader);
749
- parser_encoding = (const char *)xmlTextReaderConstEncoding(c_reader);
750
- if (parser_encoding == NULL) { return Qnil; }
751
- return NOKOGIRI_STR_NEW2(parser_encoding);
739
+ return Qnil;
752
740
  }
753
741
 
754
742
  void
@@ -769,7 +757,6 @@ noko_init_xml_reader(void)
769
757
  rb_define_method(cNokogiriXmlReader, "attribute", reader_attribute, 1);
770
758
  rb_define_method(cNokogiriXmlReader, "attribute_at", attribute_at, 1);
771
759
  rb_define_method(cNokogiriXmlReader, "attribute_count", attribute_count, 0);
772
- rb_define_method(cNokogiriXmlReader, "attribute_nodes", rb_xml_reader_attribute_nodes, 0);
773
760
  rb_define_method(cNokogiriXmlReader, "attribute_hash", rb_xml_reader_attribute_hash, 0);
774
761
  rb_define_method(cNokogiriXmlReader, "attributes?", attributes_eh, 0);
775
762
  rb_define_method(cNokogiriXmlReader, "base_uri", rb_xml_reader_base_uri, 0);
@@ -3,11 +3,20 @@
3
3
  VALUE cNokogiriXmlRelaxNG;
4
4
 
5
5
  static void
6
- dealloc(xmlRelaxNGPtr schema)
6
+ xml_relax_ng_deallocate(void *data)
7
7
  {
8
+ xmlRelaxNGPtr schema = data;
8
9
  xmlRelaxNGFree(schema);
9
10
  }
10
11
 
12
+ static const rb_data_type_t xml_relax_ng_type = {
13
+ .wrap_struct_name = "Nokogiri::XML::RelaxNG",
14
+ .function = {
15
+ .dfree = xml_relax_ng_deallocate,
16
+ },
17
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
18
+ };
19
+
11
20
  /*
12
21
  * call-seq:
13
22
  * validate_document(document)
@@ -22,8 +31,8 @@ validate_document(VALUE self, VALUE document)
22
31
  VALUE errors;
23
32
  xmlRelaxNGValidCtxtPtr valid_ctxt;
24
33
 
25
- Data_Get_Struct(self, xmlRelaxNG, schema);
26
- Data_Get_Struct(document, xmlDoc, doc);
34
+ TypedData_Get_Struct(self, xmlRelaxNG, &xml_relax_ng_type, schema);
35
+ doc = noko_xml_document_unwrap(document);
27
36
 
28
37
  errors = rb_ary_new();
29
38
 
@@ -49,48 +58,42 @@ validate_document(VALUE self, VALUE document)
49
58
  return errors;
50
59
  }
51
60
 
52
- /*
53
- * call-seq:
54
- * read_memory(string)
55
- *
56
- * Create a new RelaxNG from the contents of +string+
57
- */
58
61
  static VALUE
59
- read_memory(int argc, VALUE *argv, VALUE klass)
62
+ xml_relax_ng_parse_schema(
63
+ VALUE klass,
64
+ xmlRelaxNGParserCtxtPtr c_parser_context,
65
+ VALUE rb_parse_options
66
+ )
60
67
  {
61
- VALUE content;
62
- VALUE parse_options;
63
- xmlRelaxNGParserCtxtPtr ctx;
64
- xmlRelaxNGPtr schema;
65
- VALUE errors;
68
+ VALUE rb_errors;
66
69
  VALUE rb_schema;
67
- int scanned_args = 0;
70
+ xmlRelaxNGPtr c_schema;
68
71
 
69
- scanned_args = rb_scan_args(argc, argv, "11", &content, &parse_options);
70
- if (scanned_args == 1) {
71
- parse_options = rb_const_get_at(rb_const_get_at(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
72
+ if (NIL_P(rb_parse_options)) {
73
+ rb_parse_options = rb_const_get_at(
74
+ rb_const_get_at(mNokogiriXml, rb_intern("ParseOptions")),
75
+ rb_intern("DEFAULT_SCHEMA")
76
+ );
72
77
  }
73
78
 
74
- ctx = xmlRelaxNGNewMemParserCtxt((const char *)StringValuePtr(content), (int)RSTRING_LEN(content));
75
-
76
- errors = rb_ary_new();
77
- xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
79
+ rb_errors = rb_ary_new();
80
+ xmlSetStructuredErrorFunc((void *)rb_errors, Nokogiri_error_array_pusher);
78
81
 
79
82
  #ifdef HAVE_XMLRELAXNGSETPARSERSTRUCTUREDERRORS
80
83
  xmlRelaxNGSetParserStructuredErrors(
81
- ctx,
84
+ c_parser_context,
82
85
  Nokogiri_error_array_pusher,
83
- (void *)errors
86
+ (void *)rb_errors
84
87
  );
85
88
  #endif
86
89
 
87
- schema = xmlRelaxNGParse(ctx);
90
+ c_schema = xmlRelaxNGParse(c_parser_context);
88
91
 
89
92
  xmlSetStructuredErrorFunc(NULL, NULL);
90
- xmlRelaxNGFreeParserCtxt(ctx);
93
+ xmlRelaxNGFreeParserCtxt(c_parser_context);
91
94
 
92
- if (NULL == schema) {
93
- xmlErrorPtr error = xmlGetLastError();
95
+ if (NULL == c_schema) {
96
+ xmlErrorConstPtr error = xmlGetLastError();
94
97
  if (error) {
95
98
  Nokogiri_error_raise(NULL, error);
96
99
  } else {
@@ -100,74 +103,58 @@ read_memory(int argc, VALUE *argv, VALUE klass)
100
103
  return Qnil;
101
104
  }
102
105
 
103
- rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
104
- rb_iv_set(rb_schema, "@errors", errors);
105
- rb_iv_set(rb_schema, "@parse_options", parse_options);
106
+ rb_schema = TypedData_Wrap_Struct(klass, &xml_relax_ng_type, c_schema);
107
+ rb_iv_set(rb_schema, "@errors", rb_errors);
108
+ rb_iv_set(rb_schema, "@parse_options", rb_parse_options);
106
109
 
107
110
  return rb_schema;
108
111
  }
109
112
 
110
113
  /*
111
114
  * call-seq:
112
- * from_document(doc)
115
+ * read_memory(string)
113
116
  *
114
- * Create a new RelaxNG schema from the Nokogiri::XML::Document +doc+
117
+ * Create a new RelaxNG from the contents of +string+
115
118
  */
116
119
  static VALUE
117
- from_document(int argc, VALUE *argv, VALUE klass)
120
+ read_memory(int argc, VALUE *argv, VALUE klass)
118
121
  {
119
- VALUE document;
120
- VALUE parse_options;
121
- xmlDocPtr doc;
122
- xmlRelaxNGParserCtxtPtr ctx;
123
- xmlRelaxNGPtr schema;
124
- VALUE errors;
125
- VALUE rb_schema;
126
- int scanned_args = 0;
127
-
128
- scanned_args = rb_scan_args(argc, argv, "11", &document, &parse_options);
129
-
130
- Data_Get_Struct(document, xmlDoc, doc);
131
- doc = doc->doc; /* In case someone passes us a node. ugh. */
132
-
133
- if (scanned_args == 1) {
134
- parse_options = rb_const_get_at(rb_const_get_at(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
135
- }
122
+ VALUE rb_content;
123
+ VALUE rb_parse_options;
124
+ xmlRelaxNGParserCtxtPtr c_parser_context;
136
125
 
137
- ctx = xmlRelaxNGNewDocParserCtxt(doc);
126
+ rb_scan_args(argc, argv, "11", &rb_content, &rb_parse_options);
138
127
 
139
- errors = rb_ary_new();
140
- xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
141
-
142
- #ifdef HAVE_XMLRELAXNGSETPARSERSTRUCTUREDERRORS
143
- xmlRelaxNGSetParserStructuredErrors(
144
- ctx,
145
- Nokogiri_error_array_pusher,
146
- (void *)errors
147
- );
148
- #endif
128
+ c_parser_context = xmlRelaxNGNewMemParserCtxt(
129
+ (const char *)StringValuePtr(rb_content),
130
+ (int)RSTRING_LEN(rb_content)
131
+ );
149
132
 
150
- schema = xmlRelaxNGParse(ctx);
133
+ return xml_relax_ng_parse_schema(klass, c_parser_context, rb_parse_options);
134
+ }
151
135
 
152
- xmlSetStructuredErrorFunc(NULL, NULL);
153
- xmlRelaxNGFreeParserCtxt(ctx);
136
+ /*
137
+ * call-seq:
138
+ * from_document(doc)
139
+ *
140
+ * Create a new RelaxNG schema from the Nokogiri::XML::Document +doc+
141
+ */
142
+ static VALUE
143
+ from_document(int argc, VALUE *argv, VALUE klass)
144
+ {
145
+ VALUE rb_document;
146
+ VALUE rb_parse_options;
147
+ xmlDocPtr c_document;
148
+ xmlRelaxNGParserCtxtPtr c_parser_context;
154
149
 
155
- if (NULL == schema) {
156
- xmlErrorPtr error = xmlGetLastError();
157
- if (error) {
158
- Nokogiri_error_raise(NULL, error);
159
- } else {
160
- rb_raise(rb_eRuntimeError, "Could not parse document");
161
- }
150
+ rb_scan_args(argc, argv, "11", &rb_document, &rb_parse_options);
162
151
 
163
- return Qnil;
164
- }
152
+ c_document = noko_xml_document_unwrap(rb_document);
153
+ c_document = c_document->doc; /* In case someone passes us a node. ugh. */
165
154
 
166
- rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
167
- rb_iv_set(rb_schema, "@errors", errors);
168
- rb_iv_set(rb_schema, "@parse_options", parse_options);
155
+ c_parser_context = xmlRelaxNGNewDocParserCtxt(c_document);
169
156
 
170
- return rb_schema;
157
+ return xml_relax_ng_parse_schema(klass, c_parser_context, rb_parse_options);
171
158
  }
172
159
 
173
160
  void
@@ -265,16 +265,27 @@ processing_instruction(void *ctx, const xmlChar *name, const xmlChar *content)
265
265
  );
266
266
  }
267
267
 
268
- static void
269
- deallocate(xmlSAXHandlerPtr handler)
268
+ static size_t
269
+ memsize(const void *data)
270
270
  {
271
- ruby_xfree(handler);
271
+ return sizeof(xmlSAXHandler);
272
272
  }
273
273
 
274
+ /* Used by Nokogiri::XML::SAX::Parser and Nokogiri::HTML::SAX::Parser */
275
+ static const rb_data_type_t noko_sax_handler_type = {
276
+ .wrap_struct_name = "Nokogiri::SAXHandler",
277
+ .function = {
278
+ .dfree = RUBY_TYPED_DEFAULT_FREE,
279
+ .dsize = memsize
280
+ },
281
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
282
+ };
283
+
274
284
  static VALUE
275
285
  allocate(VALUE klass)
276
286
  {
277
- xmlSAXHandlerPtr handler = ruby_xcalloc((size_t)1, sizeof(xmlSAXHandler));
287
+ xmlSAXHandlerPtr handler;
288
+ VALUE self = TypedData_Make_Struct(klass, xmlSAXHandler, &noko_sax_handler_type, handler);
278
289
 
279
290
  handler->startDocument = start_document;
280
291
  handler->endDocument = end_document;
@@ -290,7 +301,15 @@ allocate(VALUE klass)
290
301
  handler->processingInstruction = processing_instruction;
291
302
  handler->initialized = XML_SAX2_MAGIC;
292
303
 
293
- return Data_Wrap_Struct(klass, NULL, deallocate, handler);
304
+ return self;
305
+ }
306
+
307
+ xmlSAXHandlerPtr
308
+ noko_sax_handler_unwrap(VALUE rb_sax_handler)
309
+ {
310
+ xmlSAXHandlerPtr c_sax_handler;
311
+ TypedData_Get_Struct(rb_sax_handler, xmlSAXHandler, &noko_sax_handler_type, c_sax_handler);
312
+ return c_sax_handler;
294
313
  }
295
314
 
296
315
  void