nokogiri 1.13.8 → 1.15.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (120) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +40 -0
  3. data/LICENSE-DEPENDENCIES.md +830 -509
  4. data/LICENSE.md +1 -1
  5. data/README.md +18 -11
  6. data/dependencies.yml +33 -15
  7. data/ext/nokogiri/extconf.rb +164 -46
  8. data/ext/nokogiri/gumbo.c +20 -10
  9. data/ext/nokogiri/html4_document.c +3 -4
  10. data/ext/nokogiri/html4_element_description.c +20 -15
  11. data/ext/nokogiri/html4_entity_lookup.c +2 -2
  12. data/ext/nokogiri/html4_sax_parser_context.c +11 -22
  13. data/ext/nokogiri/html4_sax_push_parser.c +3 -3
  14. data/ext/nokogiri/nokogiri.c +84 -75
  15. data/ext/nokogiri/nokogiri.h +31 -16
  16. data/ext/nokogiri/test_global_handlers.c +1 -1
  17. data/ext/nokogiri/xml_attr.c +2 -2
  18. data/ext/nokogiri/xml_attribute_decl.c +2 -2
  19. data/ext/nokogiri/xml_cdata.c +32 -18
  20. data/ext/nokogiri/xml_comment.c +2 -2
  21. data/ext/nokogiri/xml_document.c +127 -34
  22. data/ext/nokogiri/xml_document_fragment.c +2 -2
  23. data/ext/nokogiri/xml_dtd.c +2 -2
  24. data/ext/nokogiri/xml_element_content.c +34 -31
  25. data/ext/nokogiri/xml_element_decl.c +7 -7
  26. data/ext/nokogiri/xml_encoding_handler.c +15 -7
  27. data/ext/nokogiri/xml_entity_decl.c +1 -1
  28. data/ext/nokogiri/xml_entity_reference.c +2 -2
  29. data/ext/nokogiri/xml_namespace.c +79 -14
  30. data/ext/nokogiri/xml_node.c +300 -34
  31. data/ext/nokogiri/xml_node_set.c +125 -107
  32. data/ext/nokogiri/xml_processing_instruction.c +2 -2
  33. data/ext/nokogiri/xml_reader.c +81 -48
  34. data/ext/nokogiri/xml_relax_ng.c +66 -81
  35. data/ext/nokogiri/xml_sax_parser.c +45 -20
  36. data/ext/nokogiri/xml_sax_parser_context.c +46 -30
  37. data/ext/nokogiri/xml_sax_push_parser.c +30 -11
  38. data/ext/nokogiri/xml_schema.c +95 -117
  39. data/ext/nokogiri/xml_syntax_error.c +1 -1
  40. data/ext/nokogiri/xml_text.c +28 -14
  41. data/ext/nokogiri/xml_xpath_context.c +216 -136
  42. data/ext/nokogiri/xslt_stylesheet.c +118 -64
  43. data/gumbo-parser/Makefile +10 -0
  44. data/gumbo-parser/src/attribute.h +1 -1
  45. data/gumbo-parser/src/error.c +10 -6
  46. data/gumbo-parser/src/error.h +1 -1
  47. data/gumbo-parser/src/foreign_attrs.c +15 -16
  48. data/gumbo-parser/src/foreign_attrs.gperf +1 -1
  49. data/gumbo-parser/src/{gumbo.h → nokogiri_gumbo.h} +1 -0
  50. data/gumbo-parser/src/parser.c +21 -5
  51. data/gumbo-parser/src/replacement.h +1 -1
  52. data/gumbo-parser/src/string_buffer.h +1 -1
  53. data/gumbo-parser/src/string_piece.c +1 -1
  54. data/gumbo-parser/src/svg_attrs.c +2 -2
  55. data/gumbo-parser/src/svg_tags.c +2 -2
  56. data/gumbo-parser/src/tag.c +2 -1
  57. data/gumbo-parser/src/tag_lookup.c +7 -7
  58. data/gumbo-parser/src/tag_lookup.gperf +1 -0
  59. data/gumbo-parser/src/tag_lookup.h +1 -1
  60. data/gumbo-parser/src/token_buffer.h +1 -1
  61. data/gumbo-parser/src/tokenizer.c +1 -1
  62. data/gumbo-parser/src/tokenizer.h +1 -1
  63. data/gumbo-parser/src/utf8.c +1 -1
  64. data/gumbo-parser/src/utf8.h +1 -1
  65. data/gumbo-parser/src/util.c +1 -3
  66. data/gumbo-parser/src/util.h +4 -0
  67. data/gumbo-parser/src/vector.h +1 -1
  68. data/lib/nokogiri/css/node.rb +2 -2
  69. data/lib/nokogiri/css/xpath_visitor.rb +7 -5
  70. data/lib/nokogiri/css.rb +6 -0
  71. data/lib/nokogiri/decorators/slop.rb +1 -1
  72. data/lib/nokogiri/encoding_handler.rb +57 -0
  73. data/lib/nokogiri/extension.rb +4 -3
  74. data/lib/nokogiri/html4/document.rb +2 -121
  75. data/lib/nokogiri/html4/document_fragment.rb +1 -1
  76. data/lib/nokogiri/html4/element_description_defaults.rb +1827 -365
  77. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  78. data/lib/nokogiri/html4.rb +1 -0
  79. data/lib/nokogiri/html5/document.rb +113 -36
  80. data/lib/nokogiri/html5/document_fragment.rb +10 -3
  81. data/lib/nokogiri/html5/node.rb +8 -5
  82. data/lib/nokogiri/html5.rb +130 -216
  83. data/lib/nokogiri/jruby/dependencies.rb +1 -19
  84. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  85. data/lib/nokogiri/version/constant.rb +1 -1
  86. data/lib/nokogiri/version/info.rb +11 -10
  87. data/lib/nokogiri/xml/attr.rb +49 -0
  88. data/lib/nokogiri/xml/attribute_decl.rb +4 -2
  89. data/lib/nokogiri/xml/builder.rb +1 -1
  90. data/lib/nokogiri/xml/document.rb +102 -55
  91. data/lib/nokogiri/xml/document_fragment.rb +50 -7
  92. data/lib/nokogiri/xml/element_content.rb +10 -2
  93. data/lib/nokogiri/xml/element_decl.rb +4 -2
  94. data/lib/nokogiri/xml/entity_decl.rb +4 -2
  95. data/lib/nokogiri/xml/namespace.rb +42 -0
  96. data/lib/nokogiri/xml/node/save_options.rb +14 -4
  97. data/lib/nokogiri/xml/node.rb +212 -48
  98. data/lib/nokogiri/xml/node_set.rb +88 -9
  99. data/lib/nokogiri/xml/parse_options.rb +129 -50
  100. data/lib/nokogiri/xml/pp/node.rb +28 -15
  101. data/lib/nokogiri/xml/processing_instruction.rb +2 -1
  102. data/lib/nokogiri/xml/sax/document.rb +1 -1
  103. data/lib/nokogiri/xml/sax/parser.rb +2 -3
  104. data/lib/nokogiri/xml/searchable.rb +18 -10
  105. data/lib/nokogiri/xslt.rb +74 -4
  106. data/lib/nokogiri.rb +15 -15
  107. data/lib/xsd/xmlparser/nokogiri.rb +4 -2
  108. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  109. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  110. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  111. data/ports/archives/libxml2-2.11.7.tar.xz +0 -0
  112. data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
  113. metadata +19 -242
  114. data/patches/libxml2/0004-use-glibc-strlen.patch +0 -53
  115. data/patches/libxml2/0005-avoid-isnan-isinf.patch +0 -81
  116. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +0 -3040
  117. data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +0 -61
  118. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +0 -3037
  119. data/ports/archives/libxml2-2.9.14.tar.xz +0 -0
  120. data/ports/archives/libxslt-1.1.35.tar.xz +0 -0
@@ -3,13 +3,20 @@
3
3
  VALUE cNokogiriXmlReader;
4
4
 
5
5
  static void
6
- dealloc(xmlTextReaderPtr reader)
6
+ xml_reader_deallocate(void *data)
7
7
  {
8
- NOKOGIRI_DEBUG_START(reader);
8
+ xmlTextReaderPtr reader = data;
9
9
  xmlFreeTextReader(reader);
10
- NOKOGIRI_DEBUG_END(reader);
11
10
  }
12
11
 
12
+ static const rb_data_type_t xml_reader_type = {
13
+ .wrap_struct_name = "Nokogiri::XML::Reader",
14
+ .function = {
15
+ .dfree = xml_reader_deallocate,
16
+ },
17
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
18
+ };
19
+
13
20
  static int
14
21
  has_attributes(xmlTextReaderPtr reader)
15
22
  {
@@ -71,7 +78,7 @@ default_eh(VALUE self)
71
78
  xmlTextReaderPtr reader;
72
79
  int eh;
73
80
 
74
- Data_Get_Struct(self, xmlTextReader, reader);
81
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
75
82
  eh = xmlTextReaderIsDefault(reader);
76
83
  if (eh == 0) { return Qfalse; }
77
84
  if (eh == 1) { return Qtrue; }
@@ -91,7 +98,7 @@ value_eh(VALUE self)
91
98
  xmlTextReaderPtr reader;
92
99
  int eh;
93
100
 
94
- Data_Get_Struct(self, xmlTextReader, reader);
101
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
95
102
  eh = xmlTextReaderHasValue(reader);
96
103
  if (eh == 0) { return Qfalse; }
97
104
  if (eh == 1) { return Qtrue; }
@@ -111,7 +118,7 @@ attributes_eh(VALUE self)
111
118
  xmlTextReaderPtr reader;
112
119
  int eh;
113
120
 
114
- Data_Get_Struct(self, xmlTextReader, reader);
121
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
115
122
  eh = has_attributes(reader);
116
123
  if (eh == 0) { return Qfalse; }
117
124
  if (eh == 1) { return Qtrue; }
@@ -126,26 +133,37 @@ attributes_eh(VALUE self)
126
133
  * Get a hash of namespaces for this Node
127
134
  */
128
135
  static VALUE
129
- namespaces(VALUE self)
136
+ rb_xml_reader_namespaces(VALUE rb_reader)
130
137
  {
131
- xmlTextReaderPtr reader;
132
- xmlNodePtr ptr;
133
- VALUE attr ;
134
-
135
- Data_Get_Struct(self, xmlTextReader, reader);
138
+ VALUE rb_namespaces = rb_hash_new() ;
139
+ xmlTextReaderPtr c_reader;
140
+ xmlNodePtr c_node;
141
+ VALUE rb_errors;
136
142
 
137
- attr = rb_hash_new() ;
143
+ TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
138
144
 
139
- if (! has_attributes(reader)) {
140
- return attr ;
145
+ if (! has_attributes(c_reader)) {
146
+ return rb_namespaces ;
141
147
  }
142
148
 
143
- ptr = xmlTextReaderExpand(reader);
144
- if (ptr == NULL) { return Qnil; }
149
+ rb_errors = rb_funcall(rb_reader, rb_intern("errors"), 0);
150
+
151
+ xmlSetStructuredErrorFunc((void *)rb_errors, Nokogiri_error_array_pusher);
152
+ c_node = xmlTextReaderExpand(c_reader);
153
+ xmlSetStructuredErrorFunc(NULL, NULL);
154
+
155
+ if (c_node == NULL) {
156
+ if (RARRAY_LEN(rb_errors) > 0) {
157
+ VALUE rb_error = rb_ary_entry(rb_errors, 0);
158
+ VALUE exception_message = rb_funcall(rb_error, rb_intern("to_s"), 0);
159
+ rb_exc_raise(rb_class_new_instance(1, &exception_message, cNokogiriXmlSyntaxError));
160
+ }
161
+ return Qnil;
162
+ }
145
163
 
146
- Nokogiri_xml_node_namespaces(ptr, attr);
164
+ Nokogiri_xml_node_namespaces(c_node, rb_namespaces);
147
165
 
148
- return attr ;
166
+ return rb_namespaces ;
149
167
  }
150
168
 
151
169
  /*
@@ -169,7 +187,7 @@ rb_xml_reader_attribute_nodes(VALUE rb_reader)
169
187
  // After removal, we can also remove all the "node_has_a_document" special handling from xml_node.c
170
188
  NOKO_WARN_DEPRECATION("Reader#attribute_nodes is deprecated and will be removed in a future version of Nokogiri. Please use Reader#attribute_hash instead.");
171
189
 
172
- Data_Get_Struct(rb_reader, xmlTextReader, c_reader);
190
+ TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
173
191
 
174
192
  if (! has_attributes(c_reader)) {
175
193
  return rb_ary_new() ;
@@ -204,14 +222,29 @@ rb_xml_reader_attribute_hash(VALUE rb_reader)
204
222
  xmlTextReaderPtr c_reader;
205
223
  xmlNodePtr c_node;
206
224
  xmlAttrPtr c_property;
225
+ VALUE rb_errors;
207
226
 
208
- Data_Get_Struct(rb_reader, xmlTextReader, c_reader);
227
+ TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
209
228
 
210
229
  if (!has_attributes(c_reader)) {
211
230
  return rb_attributes;
212
231
  }
213
232
 
233
+ rb_errors = rb_funcall(rb_reader, rb_intern("errors"), 0);
234
+
235
+ xmlSetStructuredErrorFunc((void *)rb_errors, Nokogiri_error_array_pusher);
214
236
  c_node = xmlTextReaderExpand(c_reader);
237
+ xmlSetStructuredErrorFunc(NULL, NULL);
238
+
239
+ if (c_node == NULL) {
240
+ if (RARRAY_LEN(rb_errors) > 0) {
241
+ VALUE rb_error = rb_ary_entry(rb_errors, 0);
242
+ VALUE exception_message = rb_funcall(rb_error, rb_intern("to_s"), 0);
243
+ rb_exc_raise(rb_class_new_instance(1, &exception_message, cNokogiriXmlSyntaxError));
244
+ }
245
+ return Qnil;
246
+ }
247
+
215
248
  c_property = c_node->properties;
216
249
  while (c_property != NULL) {
217
250
  VALUE rb_name = NOKOGIRI_STR_NEW2(c_property->name);
@@ -244,7 +277,7 @@ attribute_at(VALUE self, VALUE index)
244
277
  xmlChar *value;
245
278
  VALUE rb_value;
246
279
 
247
- Data_Get_Struct(self, xmlTextReader, reader);
280
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
248
281
 
249
282
  if (NIL_P(index)) { return Qnil; }
250
283
  index = rb_Integer(index);
@@ -273,7 +306,7 @@ reader_attribute(VALUE self, VALUE name)
273
306
  xmlChar *value ;
274
307
  VALUE rb_value;
275
308
 
276
- Data_Get_Struct(self, xmlTextReader, reader);
309
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
277
310
 
278
311
  if (NIL_P(name)) { return Qnil; }
279
312
  name = StringValue(name) ;
@@ -298,11 +331,11 @@ attribute_count(VALUE self)
298
331
  xmlTextReaderPtr reader;
299
332
  int count;
300
333
 
301
- Data_Get_Struct(self, xmlTextReader, reader);
334
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
302
335
  count = xmlTextReaderAttributeCount(reader);
303
336
  if (count == -1) { return Qnil; }
304
337
 
305
- return INT2NUM((long)count);
338
+ return INT2NUM(count);
306
339
  }
307
340
 
308
341
  /*
@@ -317,11 +350,11 @@ depth(VALUE self)
317
350
  xmlTextReaderPtr reader;
318
351
  int depth;
319
352
 
320
- Data_Get_Struct(self, xmlTextReader, reader);
353
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
321
354
  depth = xmlTextReaderDepth(reader);
322
355
  if (depth == -1) { return Qnil; }
323
356
 
324
- return INT2NUM((long)depth);
357
+ return INT2NUM(depth);
325
358
  }
326
359
 
327
360
  /*
@@ -336,7 +369,7 @@ xml_version(VALUE self)
336
369
  xmlTextReaderPtr reader;
337
370
  const char *version;
338
371
 
339
- Data_Get_Struct(self, xmlTextReader, reader);
372
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
340
373
  version = (const char *)xmlTextReaderConstXmlVersion(reader);
341
374
  if (version == NULL) { return Qnil; }
342
375
 
@@ -355,7 +388,7 @@ lang(VALUE self)
355
388
  xmlTextReaderPtr reader;
356
389
  const char *lang;
357
390
 
358
- Data_Get_Struct(self, xmlTextReader, reader);
391
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
359
392
  lang = (const char *)xmlTextReaderConstXmlLang(reader);
360
393
  if (lang == NULL) { return Qnil; }
361
394
 
@@ -374,7 +407,7 @@ value(VALUE self)
374
407
  xmlTextReaderPtr reader;
375
408
  const char *value;
376
409
 
377
- Data_Get_Struct(self, xmlTextReader, reader);
410
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
378
411
  value = (const char *)xmlTextReaderConstValue(reader);
379
412
  if (value == NULL) { return Qnil; }
380
413
 
@@ -393,7 +426,7 @@ prefix(VALUE self)
393
426
  xmlTextReaderPtr reader;
394
427
  const char *prefix;
395
428
 
396
- Data_Get_Struct(self, xmlTextReader, reader);
429
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
397
430
  prefix = (const char *)xmlTextReaderConstPrefix(reader);
398
431
  if (prefix == NULL) { return Qnil; }
399
432
 
@@ -412,7 +445,7 @@ namespace_uri(VALUE self)
412
445
  xmlTextReaderPtr reader;
413
446
  const char *uri;
414
447
 
415
- Data_Get_Struct(self, xmlTextReader, reader);
448
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
416
449
  uri = (const char *)xmlTextReaderConstNamespaceUri(reader);
417
450
  if (uri == NULL) { return Qnil; }
418
451
 
@@ -431,7 +464,7 @@ local_name(VALUE self)
431
464
  xmlTextReaderPtr reader;
432
465
  const char *name;
433
466
 
434
- Data_Get_Struct(self, xmlTextReader, reader);
467
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
435
468
  name = (const char *)xmlTextReaderConstLocalName(reader);
436
469
  if (name == NULL) { return Qnil; }
437
470
 
@@ -450,7 +483,7 @@ name(VALUE self)
450
483
  xmlTextReaderPtr reader;
451
484
  const char *name;
452
485
 
453
- Data_Get_Struct(self, xmlTextReader, reader);
486
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
454
487
  name = (const char *)xmlTextReaderConstName(reader);
455
488
  if (name == NULL) { return Qnil; }
456
489
 
@@ -470,7 +503,7 @@ rb_xml_reader_base_uri(VALUE rb_reader)
470
503
  xmlTextReaderPtr c_reader;
471
504
  xmlChar *c_base_uri;
472
505
 
473
- Data_Get_Struct(rb_reader, xmlTextReader, c_reader);
506
+ TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
474
507
 
475
508
  c_base_uri = xmlTextReaderBaseUri(c_reader);
476
509
  if (c_base_uri == NULL) {
@@ -493,8 +526,8 @@ static VALUE
493
526
  state(VALUE self)
494
527
  {
495
528
  xmlTextReaderPtr reader;
496
- Data_Get_Struct(self, xmlTextReader, reader);
497
- return INT2NUM((long)xmlTextReaderReadState(reader));
529
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
530
+ return INT2NUM(xmlTextReaderReadState(reader));
498
531
  }
499
532
 
500
533
  /*
@@ -507,8 +540,8 @@ static VALUE
507
540
  node_type(VALUE self)
508
541
  {
509
542
  xmlTextReaderPtr reader;
510
- Data_Get_Struct(self, xmlTextReader, reader);
511
- return INT2NUM((long)xmlTextReaderNodeType(reader));
543
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
544
+ return INT2NUM(xmlTextReaderNodeType(reader));
512
545
  }
513
546
 
514
547
  /*
@@ -525,7 +558,7 @@ read_more(VALUE self)
525
558
  VALUE error_list;
526
559
  int ret;
527
560
 
528
- Data_Get_Struct(self, xmlTextReader, reader);
561
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
529
562
 
530
563
  error_list = rb_funcall(self, rb_intern("errors"), 0);
531
564
 
@@ -560,7 +593,7 @@ inner_xml(VALUE self)
560
593
  xmlChar *value;
561
594
  VALUE str;
562
595
 
563
- Data_Get_Struct(self, xmlTextReader, reader);
596
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
564
597
 
565
598
  value = xmlTextReaderReadInnerXml(reader);
566
599
 
@@ -587,7 +620,7 @@ outer_xml(VALUE self)
587
620
  xmlChar *value;
588
621
  VALUE str = Qnil;
589
622
 
590
- Data_Get_Struct(self, xmlTextReader, reader);
623
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
591
624
 
592
625
  value = xmlTextReaderReadOuterXml(reader);
593
626
 
@@ -634,7 +667,7 @@ from_memory(int argc, VALUE *argv, VALUE klass)
634
667
  rb_raise(rb_eRuntimeError, "couldn't create a parser");
635
668
  }
636
669
 
637
- rb_reader = Data_Wrap_Struct(klass, NULL, dealloc, reader);
670
+ rb_reader = TypedData_Wrap_Struct(klass, &xml_reader_type, reader);
638
671
  args[0] = rb_buffer;
639
672
  args[1] = rb_url;
640
673
  args[2] = encoding;
@@ -680,7 +713,7 @@ from_io(int argc, VALUE *argv, VALUE klass)
680
713
  rb_raise(rb_eRuntimeError, "couldn't create a parser");
681
714
  }
682
715
 
683
- rb_reader = Data_Wrap_Struct(klass, NULL, dealloc, reader);
716
+ rb_reader = TypedData_Wrap_Struct(klass, &xml_reader_type, reader);
684
717
  args[0] = rb_io;
685
718
  args[1] = rb_url;
686
719
  args[2] = encoding;
@@ -700,7 +733,7 @@ empty_element_p(VALUE self)
700
733
  {
701
734
  xmlTextReaderPtr reader;
702
735
 
703
- Data_Get_Struct(self, xmlTextReader, reader);
736
+ TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
704
737
 
705
738
  if (xmlTextReaderIsEmptyElement(reader)) {
706
739
  return Qtrue;
@@ -721,14 +754,14 @@ rb_xml_reader_encoding(VALUE rb_reader)
721
754
  return constructor_encoding;
722
755
  }
723
756
 
724
- Data_Get_Struct(rb_reader, xmlTextReader, c_reader);
757
+ TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
725
758
  parser_encoding = (const char *)xmlTextReaderConstEncoding(c_reader);
726
759
  if (parser_encoding == NULL) { return Qnil; }
727
760
  return NOKOGIRI_STR_NEW2(parser_encoding);
728
761
  }
729
762
 
730
763
  void
731
- noko_init_xml_reader()
764
+ noko_init_xml_reader(void)
732
765
  {
733
766
  /*
734
767
  * The Reader parser allows you to effectively pull parse an XML document.
@@ -758,7 +791,7 @@ noko_init_xml_reader()
758
791
  rb_define_method(cNokogiriXmlReader, "local_name", local_name, 0);
759
792
  rb_define_method(cNokogiriXmlReader, "name", name, 0);
760
793
  rb_define_method(cNokogiriXmlReader, "namespace_uri", namespace_uri, 0);
761
- rb_define_method(cNokogiriXmlReader, "namespaces", namespaces, 0);
794
+ rb_define_method(cNokogiriXmlReader, "namespaces", rb_xml_reader_namespaces, 0);
762
795
  rb_define_method(cNokogiriXmlReader, "node_type", node_type, 0);
763
796
  rb_define_method(cNokogiriXmlReader, "outer_xml", outer_xml, 0);
764
797
  rb_define_method(cNokogiriXmlReader, "prefix", prefix, 0);
@@ -3,13 +3,20 @@
3
3
  VALUE cNokogiriXmlRelaxNG;
4
4
 
5
5
  static void
6
- dealloc(xmlRelaxNGPtr schema)
6
+ xml_relax_ng_deallocate(void *data)
7
7
  {
8
- NOKOGIRI_DEBUG_START(schema);
8
+ xmlRelaxNGPtr schema = data;
9
9
  xmlRelaxNGFree(schema);
10
- NOKOGIRI_DEBUG_END(schema);
11
10
  }
12
11
 
12
+ static const rb_data_type_t xml_relax_ng_type = {
13
+ .wrap_struct_name = "Nokogiri::XML::RelaxNG",
14
+ .function = {
15
+ .dfree = xml_relax_ng_deallocate,
16
+ },
17
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
18
+ };
19
+
13
20
  /*
14
21
  * call-seq:
15
22
  * validate_document(document)
@@ -24,8 +31,8 @@ validate_document(VALUE self, VALUE document)
24
31
  VALUE errors;
25
32
  xmlRelaxNGValidCtxtPtr valid_ctxt;
26
33
 
27
- Data_Get_Struct(self, xmlRelaxNG, schema);
28
- Data_Get_Struct(document, xmlDoc, doc);
34
+ TypedData_Get_Struct(self, xmlRelaxNG, &xml_relax_ng_type, schema);
35
+ doc = noko_xml_document_unwrap(document);
29
36
 
30
37
  errors = rb_ary_new();
31
38
 
@@ -51,47 +58,41 @@ validate_document(VALUE self, VALUE document)
51
58
  return errors;
52
59
  }
53
60
 
54
- /*
55
- * call-seq:
56
- * read_memory(string)
57
- *
58
- * Create a new RelaxNG from the contents of +string+
59
- */
60
61
  static VALUE
61
- read_memory(int argc, VALUE *argv, VALUE klass)
62
+ xml_relax_ng_parse_schema(
63
+ VALUE klass,
64
+ xmlRelaxNGParserCtxtPtr c_parser_context,
65
+ VALUE rb_parse_options
66
+ )
62
67
  {
63
- VALUE content;
64
- VALUE parse_options;
65
- xmlRelaxNGParserCtxtPtr ctx;
66
- xmlRelaxNGPtr schema;
67
- VALUE errors;
68
+ VALUE rb_errors;
68
69
  VALUE rb_schema;
69
- int scanned_args = 0;
70
+ xmlRelaxNGPtr c_schema;
70
71
 
71
- scanned_args = rb_scan_args(argc, argv, "11", &content, &parse_options);
72
- if (scanned_args == 1) {
73
- parse_options = rb_const_get_at(rb_const_get_at(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
72
+ if (NIL_P(rb_parse_options)) {
73
+ rb_parse_options = rb_const_get_at(
74
+ rb_const_get_at(mNokogiriXml, rb_intern("ParseOptions")),
75
+ rb_intern("DEFAULT_SCHEMA")
76
+ );
74
77
  }
75
78
 
76
- ctx = xmlRelaxNGNewMemParserCtxt((const char *)StringValuePtr(content), (int)RSTRING_LEN(content));
77
-
78
- errors = rb_ary_new();
79
- xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
79
+ rb_errors = rb_ary_new();
80
+ xmlSetStructuredErrorFunc((void *)rb_errors, Nokogiri_error_array_pusher);
80
81
 
81
82
  #ifdef HAVE_XMLRELAXNGSETPARSERSTRUCTUREDERRORS
82
83
  xmlRelaxNGSetParserStructuredErrors(
83
- ctx,
84
+ c_parser_context,
84
85
  Nokogiri_error_array_pusher,
85
- (void *)errors
86
+ (void *)rb_errors
86
87
  );
87
88
  #endif
88
89
 
89
- schema = xmlRelaxNGParse(ctx);
90
+ c_schema = xmlRelaxNGParse(c_parser_context);
90
91
 
91
92
  xmlSetStructuredErrorFunc(NULL, NULL);
92
- xmlRelaxNGFreeParserCtxt(ctx);
93
+ xmlRelaxNGFreeParserCtxt(c_parser_context);
93
94
 
94
- if (NULL == schema) {
95
+ if (NULL == c_schema) {
95
96
  xmlErrorPtr error = xmlGetLastError();
96
97
  if (error) {
97
98
  Nokogiri_error_raise(NULL, error);
@@ -102,78 +103,62 @@ read_memory(int argc, VALUE *argv, VALUE klass)
102
103
  return Qnil;
103
104
  }
104
105
 
105
- rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
106
- rb_iv_set(rb_schema, "@errors", errors);
107
- rb_iv_set(rb_schema, "@parse_options", parse_options);
106
+ rb_schema = TypedData_Wrap_Struct(klass, &xml_relax_ng_type, c_schema);
107
+ rb_iv_set(rb_schema, "@errors", rb_errors);
108
+ rb_iv_set(rb_schema, "@parse_options", rb_parse_options);
108
109
 
109
110
  return rb_schema;
110
111
  }
111
112
 
112
113
  /*
113
114
  * call-seq:
114
- * from_document(doc)
115
+ * read_memory(string)
115
116
  *
116
- * Create a new RelaxNG schema from the Nokogiri::XML::Document +doc+
117
+ * Create a new RelaxNG from the contents of +string+
117
118
  */
118
119
  static VALUE
119
- from_document(int argc, VALUE *argv, VALUE klass)
120
+ read_memory(int argc, VALUE *argv, VALUE klass)
120
121
  {
121
- VALUE document;
122
- VALUE parse_options;
123
- xmlDocPtr doc;
124
- xmlRelaxNGParserCtxtPtr ctx;
125
- xmlRelaxNGPtr schema;
126
- VALUE errors;
127
- VALUE rb_schema;
128
- int scanned_args = 0;
122
+ VALUE rb_content;
123
+ VALUE rb_parse_options;
124
+ xmlRelaxNGParserCtxtPtr c_parser_context;
129
125
 
130
- scanned_args = rb_scan_args(argc, argv, "11", &document, &parse_options);
126
+ rb_scan_args(argc, argv, "11", &rb_content, &rb_parse_options);
131
127
 
132
- Data_Get_Struct(document, xmlDoc, doc);
133
- doc = doc->doc; /* In case someone passes us a node. ugh. */
134
-
135
- if (scanned_args == 1) {
136
- parse_options = rb_const_get_at(rb_const_get_at(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
137
- }
138
-
139
- ctx = xmlRelaxNGNewDocParserCtxt(doc);
140
-
141
- errors = rb_ary_new();
142
- xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
128
+ c_parser_context = xmlRelaxNGNewMemParserCtxt(
129
+ (const char *)StringValuePtr(rb_content),
130
+ (int)RSTRING_LEN(rb_content)
131
+ );
143
132
 
144
- #ifdef HAVE_XMLRELAXNGSETPARSERSTRUCTUREDERRORS
145
- xmlRelaxNGSetParserStructuredErrors(
146
- ctx,
147
- Nokogiri_error_array_pusher,
148
- (void *)errors
149
- );
150
- #endif
151
-
152
- schema = xmlRelaxNGParse(ctx);
133
+ return xml_relax_ng_parse_schema(klass, c_parser_context, rb_parse_options);
134
+ }
153
135
 
154
- xmlSetStructuredErrorFunc(NULL, NULL);
155
- xmlRelaxNGFreeParserCtxt(ctx);
136
+ /*
137
+ * call-seq:
138
+ * from_document(doc)
139
+ *
140
+ * Create a new RelaxNG schema from the Nokogiri::XML::Document +doc+
141
+ */
142
+ static VALUE
143
+ from_document(int argc, VALUE *argv, VALUE klass)
144
+ {
145
+ VALUE rb_document;
146
+ VALUE rb_parse_options;
147
+ xmlDocPtr c_document;
148
+ xmlRelaxNGParserCtxtPtr c_parser_context;
156
149
 
157
- if (NULL == schema) {
158
- xmlErrorPtr error = xmlGetLastError();
159
- if (error) {
160
- Nokogiri_error_raise(NULL, error);
161
- } else {
162
- rb_raise(rb_eRuntimeError, "Could not parse document");
163
- }
150
+ rb_scan_args(argc, argv, "11", &rb_document, &rb_parse_options);
164
151
 
165
- return Qnil;
166
- }
152
+ c_document = noko_xml_document_unwrap(rb_document);
153
+ c_document = c_document->doc; /* In case someone passes us a node. ugh. */
167
154
 
168
- rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
169
- rb_iv_set(rb_schema, "@errors", errors);
170
- rb_iv_set(rb_schema, "@parse_options", parse_options);
155
+ c_parser_context = xmlRelaxNGNewDocParserCtxt(c_document);
171
156
 
172
- return rb_schema;
157
+ return xml_relax_ng_parse_schema(klass, c_parser_context, rb_parse_options);
173
158
  }
174
159
 
175
160
  void
176
- noko_init_xml_relax_ng()
161
+ noko_init_xml_relax_ng(void)
177
162
  {
178
163
  assert(cNokogiriXmlSchema);
179
164
  cNokogiriXmlRelaxNG = rb_define_class_under(mNokogiriXml, "RelaxNG", cNokogiriXmlSchema);
@@ -195,40 +195,48 @@ comment_func(void *ctx, const xmlChar *value)
195
195
  rb_funcall(doc, id_comment, 1, str);
196
196
  }
197
197
 
198
+ PRINTFLIKE_DECL(2, 3)
198
199
  static void
199
200
  warning_func(void *ctx, const char *msg, ...)
200
201
  {
201
202
  VALUE self = NOKOGIRI_SAX_SELF(ctx);
202
203
  VALUE doc = rb_iv_get(self, "@document");
203
- char *message;
204
- VALUE ruby_message;
204
+ VALUE rb_message;
205
205
 
206
+ #ifdef TRUFFLERUBY_NOKOGIRI_SYSTEM_LIBRARIES
207
+ /* It is not currently possible to pass var args from native
208
+ functions to sulong, so we work around the issue here. */
209
+ rb_message = rb_sprintf("warning_func: %s", msg);
210
+ #else
206
211
  va_list args;
207
212
  va_start(args, msg);
208
- vasprintf(&message, msg, args);
213
+ rb_message = rb_vsprintf(msg, args);
209
214
  va_end(args);
215
+ #endif
210
216
 
211
- ruby_message = NOKOGIRI_STR_NEW2(message);
212
- free(message);
213
- rb_funcall(doc, id_warning, 1, ruby_message);
217
+ rb_funcall(doc, id_warning, 1, rb_message);
214
218
  }
215
219
 
220
+ PRINTFLIKE_DECL(2, 3)
216
221
  static void
217
222
  error_func(void *ctx, const char *msg, ...)
218
223
  {
219
224
  VALUE self = NOKOGIRI_SAX_SELF(ctx);
220
225
  VALUE doc = rb_iv_get(self, "@document");
221
- char *message;
222
- VALUE ruby_message;
226
+ VALUE rb_message;
223
227
 
228
+ #ifdef TRUFFLERUBY_NOKOGIRI_SYSTEM_LIBRARIES
229
+ /* It is not currently possible to pass var args from native
230
+ functions to sulong, so we work around the issue here. */
231
+ rb_message = rb_sprintf("error_func: %s", msg);
232
+ #else
224
233
  va_list args;
225
234
  va_start(args, msg);
226
- vasprintf(&message, msg, args);
235
+ rb_message = rb_vsprintf(msg, args);
227
236
  va_end(args);
237
+ #endif
228
238
 
229
- ruby_message = NOKOGIRI_STR_NEW2(message);
230
- free(message);
231
- rb_funcall(doc, id_error, 1, ruby_message);
239
+ rb_funcall(doc, id_error, 1, rb_message);
232
240
  }
233
241
 
234
242
  static void
@@ -257,18 +265,27 @@ processing_instruction(void *ctx, const xmlChar *name, const xmlChar *content)
257
265
  );
258
266
  }
259
267
 
260
- static void
261
- deallocate(xmlSAXHandlerPtr handler)
268
+ static size_t
269
+ memsize(const void *data)
262
270
  {
263
- NOKOGIRI_DEBUG_START(handler);
264
- free(handler);
265
- NOKOGIRI_DEBUG_END(handler);
271
+ return sizeof(xmlSAXHandler);
266
272
  }
267
273
 
274
+ /* Used by Nokogiri::XML::SAX::Parser and Nokogiri::HTML::SAX::Parser */
275
+ static const rb_data_type_t noko_sax_handler_type = {
276
+ .wrap_struct_name = "Nokogiri::SAXHandler",
277
+ .function = {
278
+ .dfree = RUBY_TYPED_DEFAULT_FREE,
279
+ .dsize = memsize
280
+ },
281
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
282
+ };
283
+
268
284
  static VALUE
269
285
  allocate(VALUE klass)
270
286
  {
271
- xmlSAXHandlerPtr handler = calloc((size_t)1, sizeof(xmlSAXHandler));
287
+ xmlSAXHandlerPtr handler;
288
+ VALUE self = TypedData_Make_Struct(klass, xmlSAXHandler, &noko_sax_handler_type, handler);
272
289
 
273
290
  handler->startDocument = start_document;
274
291
  handler->endDocument = end_document;
@@ -284,11 +301,19 @@ allocate(VALUE klass)
284
301
  handler->processingInstruction = processing_instruction;
285
302
  handler->initialized = XML_SAX2_MAGIC;
286
303
 
287
- return Data_Wrap_Struct(klass, NULL, deallocate, handler);
304
+ return self;
305
+ }
306
+
307
+ xmlSAXHandlerPtr
308
+ noko_sax_handler_unwrap(VALUE rb_sax_handler)
309
+ {
310
+ xmlSAXHandlerPtr c_sax_handler;
311
+ TypedData_Get_Struct(rb_sax_handler, xmlSAXHandler, &noko_sax_handler_type, c_sax_handler);
312
+ return c_sax_handler;
288
313
  }
289
314
 
290
315
  void
291
- noko_init_xml_sax_parser()
316
+ noko_init_xml_sax_parser(void)
292
317
  {
293
318
  cNokogiriXmlSaxParser = rb_define_class_under(mNokogiriXmlSax, "Parser", rb_cObject);
294
319