nokogiri 1.15.4 → 1.17.2

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (98) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +12 -19
  3. data/README.md +8 -1
  4. data/dependencies.yml +9 -8
  5. data/ext/nokogiri/extconf.rb +194 -141
  6. data/ext/nokogiri/gumbo.c +69 -53
  7. data/ext/nokogiri/html4_document.c +10 -4
  8. data/ext/nokogiri/html4_element_description.c +18 -18
  9. data/ext/nokogiri/html4_sax_parser.c +40 -0
  10. data/ext/nokogiri/html4_sax_parser_context.c +48 -58
  11. data/ext/nokogiri/html4_sax_push_parser.c +26 -25
  12. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  13. data/ext/nokogiri/nokogiri.c +9 -2
  14. data/ext/nokogiri/nokogiri.h +25 -33
  15. data/ext/nokogiri/test_global_handlers.c +1 -1
  16. data/ext/nokogiri/xml_attr.c +1 -1
  17. data/ext/nokogiri/xml_cdata.c +3 -12
  18. data/ext/nokogiri/xml_comment.c +3 -8
  19. data/ext/nokogiri/xml_document.c +167 -156
  20. data/ext/nokogiri/xml_document_fragment.c +10 -25
  21. data/ext/nokogiri/xml_dtd.c +1 -1
  22. data/ext/nokogiri/xml_element_content.c +9 -9
  23. data/ext/nokogiri/xml_encoding_handler.c +4 -4
  24. data/ext/nokogiri/xml_namespace.c +6 -10
  25. data/ext/nokogiri/xml_node.c +142 -108
  26. data/ext/nokogiri/xml_node_set.c +46 -44
  27. data/ext/nokogiri/xml_reader.c +74 -100
  28. data/ext/nokogiri/xml_relax_ng.c +35 -56
  29. data/ext/nokogiri/xml_sax_parser.c +156 -88
  30. data/ext/nokogiri/xml_sax_parser_context.c +214 -128
  31. data/ext/nokogiri/xml_sax_push_parser.c +69 -50
  32. data/ext/nokogiri/xml_schema.c +51 -87
  33. data/ext/nokogiri/xml_syntax_error.c +19 -11
  34. data/ext/nokogiri/xml_text.c +3 -6
  35. data/ext/nokogiri/xml_xpath_context.c +4 -7
  36. data/ext/nokogiri/xslt_stylesheet.c +16 -11
  37. data/gumbo-parser/Makefile +18 -0
  38. data/gumbo-parser/src/error.c +76 -48
  39. data/gumbo-parser/src/error.h +5 -1
  40. data/gumbo-parser/src/nokogiri_gumbo.h +11 -2
  41. data/gumbo-parser/src/parser.c +64 -23
  42. data/gumbo-parser/src/tokenizer.c +7 -6
  43. data/lib/nokogiri/class_resolver.rb +1 -1
  44. data/lib/nokogiri/css/node.rb +6 -2
  45. data/lib/nokogiri/css/parser.rb +6 -4
  46. data/lib/nokogiri/css/parser.y +2 -2
  47. data/lib/nokogiri/css/parser_extras.rb +6 -66
  48. data/lib/nokogiri/css/selector_cache.rb +38 -0
  49. data/lib/nokogiri/css/tokenizer.rb +4 -4
  50. data/lib/nokogiri/css/tokenizer.rex +9 -8
  51. data/lib/nokogiri/css/xpath_visitor.rb +43 -27
  52. data/lib/nokogiri/css.rb +86 -20
  53. data/lib/nokogiri/decorators/slop.rb +3 -5
  54. data/lib/nokogiri/encoding_handler.rb +2 -2
  55. data/lib/nokogiri/html4/document.rb +45 -24
  56. data/lib/nokogiri/html4/document_fragment.rb +124 -12
  57. data/lib/nokogiri/html4/encoding_reader.rb +2 -2
  58. data/lib/nokogiri/html4/sax/parser.rb +23 -38
  59. data/lib/nokogiri/html4/sax/parser_context.rb +4 -9
  60. data/lib/nokogiri/html4.rb +9 -14
  61. data/lib/nokogiri/html5/builder.rb +40 -0
  62. data/lib/nokogiri/html5/document.rb +61 -30
  63. data/lib/nokogiri/html5/document_fragment.rb +130 -20
  64. data/lib/nokogiri/html5/node.rb +4 -4
  65. data/lib/nokogiri/html5.rb +114 -138
  66. data/lib/nokogiri/version/constant.rb +1 -1
  67. data/lib/nokogiri/version/info.rb +6 -5
  68. data/lib/nokogiri/xml/attr.rb +2 -2
  69. data/lib/nokogiri/xml/builder.rb +8 -1
  70. data/lib/nokogiri/xml/document.rb +74 -31
  71. data/lib/nokogiri/xml/document_fragment.rb +86 -15
  72. data/lib/nokogiri/xml/namespace.rb +1 -2
  73. data/lib/nokogiri/xml/node.rb +113 -35
  74. data/lib/nokogiri/xml/node_set.rb +12 -10
  75. data/lib/nokogiri/xml/parse_options.rb +1 -1
  76. data/lib/nokogiri/xml/pp/node.rb +6 -1
  77. data/lib/nokogiri/xml/reader.rb +51 -17
  78. data/lib/nokogiri/xml/relax_ng.rb +57 -20
  79. data/lib/nokogiri/xml/sax/document.rb +174 -83
  80. data/lib/nokogiri/xml/sax/parser.rb +115 -41
  81. data/lib/nokogiri/xml/sax/parser_context.rb +116 -8
  82. data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
  83. data/lib/nokogiri/xml/sax.rb +48 -0
  84. data/lib/nokogiri/xml/schema.rb +112 -45
  85. data/lib/nokogiri/xml/searchable.rb +9 -11
  86. data/lib/nokogiri/xml/syntax_error.rb +23 -1
  87. data/lib/nokogiri/xml.rb +14 -25
  88. data/lib/nokogiri/xslt/stylesheet.rb +29 -7
  89. data/lib/nokogiri/xslt.rb +4 -10
  90. data/lib/nokogiri.rb +1 -1
  91. data/lib/xsd/xmlparser/nokogiri.rb +3 -4
  92. data/ports/archives/libxml2-2.13.5.tar.xz +0 -0
  93. data/ports/archives/libxslt-1.1.42.tar.xz +0 -0
  94. metadata +15 -14
  95. data/ext/nokogiri/libxml2_backwards_compat.c +0 -121
  96. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +0 -25
  97. data/ports/archives/libxml2-2.11.5.tar.xz +0 -0
  98. data/ports/archives/libxslt-1.1.38.tar.xz +0 -0
@@ -5,12 +5,18 @@ VALUE cNokogiriXmlReader;
5
5
  static void
6
6
  xml_reader_deallocate(void *data)
7
7
  {
8
+ // free the document separately because we _may_ have triggered preservation by calling
9
+ // xmlTextReaderCurrentDoc during a read_more.
8
10
  xmlTextReaderPtr reader = data;
11
+ xmlDocPtr doc = xmlTextReaderCurrentDoc(reader);
9
12
  xmlFreeTextReader(reader);
13
+ if (doc) {
14
+ xmlFreeDoc(doc);
15
+ }
10
16
  }
11
17
 
12
- static const rb_data_type_t xml_reader_type = {
13
- .wrap_struct_name = "Nokogiri::XML::Reader",
18
+ static const rb_data_type_t xml_text_reader_type = {
19
+ .wrap_struct_name = "xmlTextReader",
14
20
  .function = {
15
21
  .dfree = xml_reader_deallocate,
16
22
  },
@@ -78,7 +84,7 @@ default_eh(VALUE self)
78
84
  xmlTextReaderPtr reader;
79
85
  int eh;
80
86
 
81
- TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
87
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
82
88
  eh = xmlTextReaderIsDefault(reader);
83
89
  if (eh == 0) { return Qfalse; }
84
90
  if (eh == 1) { return Qtrue; }
@@ -98,7 +104,7 @@ value_eh(VALUE self)
98
104
  xmlTextReaderPtr reader;
99
105
  int eh;
100
106
 
101
- TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
107
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
102
108
  eh = xmlTextReaderHasValue(reader);
103
109
  if (eh == 0) { return Qfalse; }
104
110
  if (eh == 1) { return Qtrue; }
@@ -118,7 +124,7 @@ attributes_eh(VALUE self)
118
124
  xmlTextReaderPtr reader;
119
125
  int eh;
120
126
 
121
- TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
127
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
122
128
  eh = has_attributes(reader);
123
129
  if (eh == 0) { return Qfalse; }
124
130
  if (eh == 1) { return Qtrue; }
@@ -140,7 +146,7 @@ rb_xml_reader_namespaces(VALUE rb_reader)
140
146
  xmlNodePtr c_node;
141
147
  VALUE rb_errors;
142
148
 
143
- TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
149
+ TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_text_reader_type, c_reader);
144
150
 
145
151
  if (! has_attributes(c_reader)) {
146
152
  return rb_namespaces ;
@@ -148,7 +154,7 @@ rb_xml_reader_namespaces(VALUE rb_reader)
148
154
 
149
155
  rb_errors = rb_funcall(rb_reader, rb_intern("errors"), 0);
150
156
 
151
- xmlSetStructuredErrorFunc((void *)rb_errors, Nokogiri_error_array_pusher);
157
+ xmlSetStructuredErrorFunc((void *)rb_errors, noko__error_array_pusher);
152
158
  c_node = xmlTextReaderExpand(c_reader);
153
159
  xmlSetStructuredErrorFunc(NULL, NULL);
154
160
 
@@ -166,48 +172,6 @@ rb_xml_reader_namespaces(VALUE rb_reader)
166
172
  return rb_namespaces ;
167
173
  }
168
174
 
169
- /*
170
- :call-seq: attribute_nodes() → Array<Nokogiri::XML::Attr>
171
-
172
- Get the attributes of the current node as an Array of XML:Attr
173
-
174
- ⚠ This method is deprecated and unsafe to use. It will be removed in a future version of Nokogiri.
175
-
176
- See related: #attribute_hash, #attributes
177
- */
178
- static VALUE
179
- rb_xml_reader_attribute_nodes(VALUE rb_reader)
180
- {
181
- xmlTextReaderPtr c_reader;
182
- xmlNodePtr c_node;
183
- VALUE attr_nodes;
184
- int j;
185
-
186
- // TODO: deprecated, remove in Nokogiri v1.15, see https://github.com/sparklemotion/nokogiri/issues/2598
187
- // After removal, we can also remove all the "node_has_a_document" special handling from xml_node.c
188
- NOKO_WARN_DEPRECATION("Reader#attribute_nodes is deprecated and will be removed in a future version of Nokogiri. Please use Reader#attribute_hash instead.");
189
-
190
- TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
191
-
192
- if (! has_attributes(c_reader)) {
193
- return rb_ary_new() ;
194
- }
195
-
196
- c_node = xmlTextReaderExpand(c_reader);
197
- if (c_node == NULL) {
198
- return Qnil;
199
- }
200
-
201
- attr_nodes = noko_xml_node_attrs(c_node);
202
-
203
- /* ensure that the Reader won't be GCed as long as a node is referenced */
204
- for (j = 0 ; j < RARRAY_LEN(attr_nodes) ; j++) {
205
- rb_iv_set(rb_ary_entry(attr_nodes, j), "@reader", rb_reader);
206
- }
207
-
208
- return attr_nodes;
209
- }
210
-
211
175
  /*
212
176
  :call-seq: attribute_hash() → Hash<String ⇒ String>
213
177
 
@@ -224,7 +188,7 @@ rb_xml_reader_attribute_hash(VALUE rb_reader)
224
188
  xmlAttrPtr c_property;
225
189
  VALUE rb_errors;
226
190
 
227
- TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
191
+ TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_text_reader_type, c_reader);
228
192
 
229
193
  if (!has_attributes(c_reader)) {
230
194
  return rb_attributes;
@@ -232,7 +196,7 @@ rb_xml_reader_attribute_hash(VALUE rb_reader)
232
196
 
233
197
  rb_errors = rb_funcall(rb_reader, rb_intern("errors"), 0);
234
198
 
235
- xmlSetStructuredErrorFunc((void *)rb_errors, Nokogiri_error_array_pusher);
199
+ xmlSetStructuredErrorFunc((void *)rb_errors, noko__error_array_pusher);
236
200
  c_node = xmlTextReaderExpand(c_reader);
237
201
  xmlSetStructuredErrorFunc(NULL, NULL);
238
202
 
@@ -277,7 +241,7 @@ attribute_at(VALUE self, VALUE index)
277
241
  xmlChar *value;
278
242
  VALUE rb_value;
279
243
 
280
- TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
244
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
281
245
 
282
246
  if (NIL_P(index)) { return Qnil; }
283
247
  index = rb_Integer(index);
@@ -306,7 +270,7 @@ reader_attribute(VALUE self, VALUE name)
306
270
  xmlChar *value ;
307
271
  VALUE rb_value;
308
272
 
309
- TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
273
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
310
274
 
311
275
  if (NIL_P(name)) { return Qnil; }
312
276
  name = StringValue(name) ;
@@ -331,7 +295,7 @@ attribute_count(VALUE self)
331
295
  xmlTextReaderPtr reader;
332
296
  int count;
333
297
 
334
- TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
298
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
335
299
  count = xmlTextReaderAttributeCount(reader);
336
300
  if (count == -1) { return Qnil; }
337
301
 
@@ -350,7 +314,7 @@ depth(VALUE self)
350
314
  xmlTextReaderPtr reader;
351
315
  int depth;
352
316
 
353
- TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
317
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
354
318
  depth = xmlTextReaderDepth(reader);
355
319
  if (depth == -1) { return Qnil; }
356
320
 
@@ -369,7 +333,7 @@ xml_version(VALUE self)
369
333
  xmlTextReaderPtr reader;
370
334
  const char *version;
371
335
 
372
- TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
336
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
373
337
  version = (const char *)xmlTextReaderConstXmlVersion(reader);
374
338
  if (version == NULL) { return Qnil; }
375
339
 
@@ -388,7 +352,7 @@ lang(VALUE self)
388
352
  xmlTextReaderPtr reader;
389
353
  const char *lang;
390
354
 
391
- TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
355
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
392
356
  lang = (const char *)xmlTextReaderConstXmlLang(reader);
393
357
  if (lang == NULL) { return Qnil; }
394
358
 
@@ -407,7 +371,7 @@ value(VALUE self)
407
371
  xmlTextReaderPtr reader;
408
372
  const char *value;
409
373
 
410
- TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
374
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
411
375
  value = (const char *)xmlTextReaderConstValue(reader);
412
376
  if (value == NULL) { return Qnil; }
413
377
 
@@ -426,7 +390,7 @@ prefix(VALUE self)
426
390
  xmlTextReaderPtr reader;
427
391
  const char *prefix;
428
392
 
429
- TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
393
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
430
394
  prefix = (const char *)xmlTextReaderConstPrefix(reader);
431
395
  if (prefix == NULL) { return Qnil; }
432
396
 
@@ -445,7 +409,7 @@ namespace_uri(VALUE self)
445
409
  xmlTextReaderPtr reader;
446
410
  const char *uri;
447
411
 
448
- TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
412
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
449
413
  uri = (const char *)xmlTextReaderConstNamespaceUri(reader);
450
414
  if (uri == NULL) { return Qnil; }
451
415
 
@@ -464,7 +428,7 @@ local_name(VALUE self)
464
428
  xmlTextReaderPtr reader;
465
429
  const char *name;
466
430
 
467
- TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
431
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
468
432
  name = (const char *)xmlTextReaderConstLocalName(reader);
469
433
  if (name == NULL) { return Qnil; }
470
434
 
@@ -483,7 +447,7 @@ name(VALUE self)
483
447
  xmlTextReaderPtr reader;
484
448
  const char *name;
485
449
 
486
- TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
450
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
487
451
  name = (const char *)xmlTextReaderConstName(reader);
488
452
  if (name == NULL) { return Qnil; }
489
453
 
@@ -503,7 +467,7 @@ rb_xml_reader_base_uri(VALUE rb_reader)
503
467
  xmlTextReaderPtr c_reader;
504
468
  xmlChar *c_base_uri;
505
469
 
506
- TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
470
+ TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_text_reader_type, c_reader);
507
471
 
508
472
  c_base_uri = xmlTextReaderBaseUri(c_reader);
509
473
  if (c_base_uri == NULL) {
@@ -526,7 +490,7 @@ static VALUE
526
490
  state(VALUE self)
527
491
  {
528
492
  xmlTextReaderPtr reader;
529
- TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
493
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
530
494
  return INT2NUM(xmlTextReaderReadState(reader));
531
495
  }
532
496
 
@@ -540,7 +504,7 @@ static VALUE
540
504
  node_type(VALUE self)
541
505
  {
542
506
  xmlTextReaderPtr reader;
543
- TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
507
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
544
508
  return INT2NUM(xmlTextReaderNodeType(reader));
545
509
  }
546
510
 
@@ -551,32 +515,41 @@ node_type(VALUE self)
551
515
  * Move the Reader forward through the XML document.
552
516
  */
553
517
  static VALUE
554
- read_more(VALUE self)
518
+ read_more(VALUE rb_reader)
555
519
  {
556
- xmlTextReaderPtr reader;
557
- xmlErrorPtr error;
558
- VALUE error_list;
559
- int ret;
520
+ xmlTextReaderPtr c_reader;
521
+ libxmlStructuredErrorHandlerState handler_state;
560
522
 
561
- TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
523
+ TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_text_reader_type, c_reader);
562
524
 
563
- error_list = rb_funcall(self, rb_intern("errors"), 0);
525
+ VALUE rb_errors = rb_funcall(rb_reader, rb_intern("errors"), 0);
526
+ noko__structured_error_func_save_and_set(&handler_state, (void *)rb_errors, noko__error_array_pusher);
564
527
 
565
- xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
566
- ret = xmlTextReaderRead(reader);
567
- xmlSetStructuredErrorFunc(NULL, NULL);
528
+ int status = xmlTextReaderRead(c_reader);
568
529
 
569
- if (ret == 1) { return self; }
570
- if (ret == 0) { return Qnil; }
530
+ noko__structured_error_func_restore(&handler_state);
571
531
 
572
- error = xmlGetLastError();
573
- if (error) {
574
- rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
575
- } else {
576
- rb_raise(rb_eRuntimeError, "Error pulling: %d", ret);
532
+ xmlDocPtr c_document = xmlTextReaderCurrentDoc(c_reader);
533
+ if (c_document && c_document->encoding == NULL) {
534
+ VALUE constructor_encoding = rb_iv_get(rb_reader, "@encoding");
535
+ if (RTEST(constructor_encoding)) {
536
+ c_document->encoding = xmlStrdup(BAD_CAST StringValueCStr(constructor_encoding));
537
+ } else {
538
+ rb_iv_set(rb_reader, "@encoding", NOKOGIRI_STR_NEW2("UTF-8"));
539
+ c_document->encoding = xmlStrdup(BAD_CAST "UTF-8");
540
+ }
577
541
  }
578
542
 
579
- return Qnil;
543
+ if (status == 1) { return rb_reader; }
544
+ if (status == 0) { return Qnil; }
545
+
546
+ /* if we're here, there was an error */
547
+ VALUE exception = rb_funcall(cNokogiriXmlSyntaxError, rb_intern("aggregate"), 1, rb_errors);
548
+ if (RB_TEST(exception)) {
549
+ rb_exc_raise(exception);
550
+ } else {
551
+ rb_raise(rb_eRuntimeError, "Error pulling: %d", status);
552
+ }
580
553
  }
581
554
 
582
555
  /*
@@ -593,7 +566,7 @@ inner_xml(VALUE self)
593
566
  xmlChar *value;
594
567
  VALUE str;
595
568
 
596
- TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
569
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
597
570
 
598
571
  value = xmlTextReaderReadInnerXml(reader);
599
572
 
@@ -620,7 +593,7 @@ outer_xml(VALUE self)
620
593
  xmlChar *value;
621
594
  VALUE str = Qnil;
622
595
 
623
- TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
596
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
624
597
 
625
598
  value = xmlTextReaderReadOuterXml(reader);
626
599
 
@@ -635,11 +608,13 @@ outer_xml(VALUE self)
635
608
  * call-seq:
636
609
  * from_memory(string, url = nil, encoding = nil, options = 0)
637
610
  *
638
- * Create a new reader that parses +string+
611
+ * Create a new Reader to parse a String.
639
612
  */
640
613
  static VALUE
641
614
  from_memory(int argc, VALUE *argv, VALUE klass)
642
615
  {
616
+ /* TODO: deprecate this method, since Reader.new can handle both memory and IO. It can then
617
+ * become private. */
643
618
  VALUE rb_buffer, rb_url, encoding, rb_options;
644
619
  xmlTextReaderPtr reader;
645
620
  const char *c_url = NULL;
@@ -667,7 +642,7 @@ from_memory(int argc, VALUE *argv, VALUE klass)
667
642
  rb_raise(rb_eRuntimeError, "couldn't create a parser");
668
643
  }
669
644
 
670
- rb_reader = TypedData_Wrap_Struct(klass, &xml_reader_type, reader);
645
+ rb_reader = TypedData_Wrap_Struct(klass, &xml_text_reader_type, reader);
671
646
  args[0] = rb_buffer;
672
647
  args[1] = rb_url;
673
648
  args[2] = encoding;
@@ -680,11 +655,13 @@ from_memory(int argc, VALUE *argv, VALUE klass)
680
655
  * call-seq:
681
656
  * from_io(io, url = nil, encoding = nil, options = 0)
682
657
  *
683
- * Create a new reader that parses +io+
658
+ * Create a new Reader to parse an IO stream.
684
659
  */
685
660
  static VALUE
686
661
  from_io(int argc, VALUE *argv, VALUE klass)
687
662
  {
663
+ /* TODO: deprecate this method, since Reader.new can handle both memory and IO. It can then
664
+ * become private. */
688
665
  VALUE rb_io, rb_url, encoding, rb_options;
689
666
  xmlTextReaderPtr reader;
690
667
  const char *c_url = NULL;
@@ -713,7 +690,7 @@ from_io(int argc, VALUE *argv, VALUE klass)
713
690
  rb_raise(rb_eRuntimeError, "couldn't create a parser");
714
691
  }
715
692
 
716
- rb_reader = TypedData_Wrap_Struct(klass, &xml_reader_type, reader);
693
+ rb_reader = TypedData_Wrap_Struct(klass, &xml_text_reader_type, reader);
717
694
  args[0] = rb_io;
718
695
  args[1] = rb_url;
719
696
  args[2] = encoding;
@@ -733,7 +710,7 @@ empty_element_p(VALUE self)
733
710
  {
734
711
  xmlTextReaderPtr reader;
735
712
 
736
- TypedData_Get_Struct(self, xmlTextReader, &xml_reader_type, reader);
713
+ TypedData_Get_Struct(self, xmlTextReader, &xml_text_reader_type, reader);
737
714
 
738
715
  if (xmlTextReaderIsEmptyElement(reader)) {
739
716
  return Qtrue;
@@ -749,25 +726,23 @@ rb_xml_reader_encoding(VALUE rb_reader)
749
726
  const char *parser_encoding;
750
727
  VALUE constructor_encoding;
751
728
 
729
+ TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_text_reader_type, c_reader);
730
+ parser_encoding = (const char *)xmlTextReaderConstEncoding(c_reader);
731
+ if (parser_encoding) {
732
+ return NOKOGIRI_STR_NEW2(parser_encoding);
733
+ }
734
+
752
735
  constructor_encoding = rb_iv_get(rb_reader, "@encoding");
753
736
  if (RTEST(constructor_encoding)) {
754
737
  return constructor_encoding;
755
738
  }
756
739
 
757
- TypedData_Get_Struct(rb_reader, xmlTextReader, &xml_reader_type, c_reader);
758
- parser_encoding = (const char *)xmlTextReaderConstEncoding(c_reader);
759
- if (parser_encoding == NULL) { return Qnil; }
760
- return NOKOGIRI_STR_NEW2(parser_encoding);
740
+ return Qnil;
761
741
  }
762
742
 
763
743
  void
764
744
  noko_init_xml_reader(void)
765
745
  {
766
- /*
767
- * The Reader parser allows you to effectively pull parse an XML document.
768
- * Once instantiated, call Nokogiri::XML::Reader#each to iterate over each
769
- * node. Note that you may only iterate over the document once!
770
- */
771
746
  cNokogiriXmlReader = rb_define_class_under(mNokogiriXml, "Reader", rb_cObject);
772
747
 
773
748
  rb_undef_alloc_func(cNokogiriXmlReader);
@@ -778,7 +753,6 @@ noko_init_xml_reader(void)
778
753
  rb_define_method(cNokogiriXmlReader, "attribute", reader_attribute, 1);
779
754
  rb_define_method(cNokogiriXmlReader, "attribute_at", attribute_at, 1);
780
755
  rb_define_method(cNokogiriXmlReader, "attribute_count", attribute_count, 0);
781
- rb_define_method(cNokogiriXmlReader, "attribute_nodes", rb_xml_reader_attribute_nodes, 0);
782
756
  rb_define_method(cNokogiriXmlReader, "attribute_hash", rb_xml_reader_attribute_hash, 0);
783
757
  rb_define_method(cNokogiriXmlReader, "attributes?", attributes_eh, 0);
784
758
  rb_define_method(cNokogiriXmlReader, "base_uri", rb_xml_reader_base_uri, 0);
@@ -3,28 +3,22 @@
3
3
  VALUE cNokogiriXmlRelaxNG;
4
4
 
5
5
  static void
6
- xml_relax_ng_deallocate(void *data)
6
+ _noko_xml_relax_ng_deallocate(void *data)
7
7
  {
8
8
  xmlRelaxNGPtr schema = data;
9
9
  xmlRelaxNGFree(schema);
10
10
  }
11
11
 
12
12
  static const rb_data_type_t xml_relax_ng_type = {
13
- .wrap_struct_name = "Nokogiri::XML::RelaxNG",
13
+ .wrap_struct_name = "xmlRelaxNG",
14
14
  .function = {
15
- .dfree = xml_relax_ng_deallocate,
15
+ .dfree = _noko_xml_relax_ng_deallocate,
16
16
  },
17
17
  .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
18
18
  };
19
19
 
20
- /*
21
- * call-seq:
22
- * validate_document(document)
23
- *
24
- * Validate a Nokogiri::XML::Document against this RelaxNG schema.
25
- */
26
20
  static VALUE
27
- validate_document(VALUE self, VALUE document)
21
+ noko_xml_relax_ng__validate_document(VALUE self, VALUE document)
28
22
  {
29
23
  xmlDocPtr doc;
30
24
  xmlRelaxNGPtr schema;
@@ -43,13 +37,11 @@ validate_document(VALUE self, VALUE document)
43
37
  rb_raise(rb_eRuntimeError, "Could not create a validation context");
44
38
  }
45
39
 
46
- #ifdef HAVE_XMLRELAXNGSETVALIDSTRUCTUREDERRORS
47
40
  xmlRelaxNGSetValidStructuredErrors(
48
41
  valid_ctxt,
49
- Nokogiri_error_array_pusher,
42
+ noko__error_array_pusher,
50
43
  (void *)errors
51
44
  );
52
- #endif
53
45
 
54
46
  xmlRelaxNGValidateDoc(valid_ctxt, doc);
55
47
 
@@ -59,8 +51,8 @@ validate_document(VALUE self, VALUE document)
59
51
  }
60
52
 
61
53
  static VALUE
62
- xml_relax_ng_parse_schema(
63
- VALUE klass,
54
+ _noko_xml_relax_ng_parse_schema(
55
+ VALUE rb_class,
64
56
  xmlRelaxNGParserCtxtPtr c_parser_context,
65
57
  VALUE rb_parse_options
66
58
  )
@@ -68,6 +60,7 @@ xml_relax_ng_parse_schema(
68
60
  VALUE rb_errors;
69
61
  VALUE rb_schema;
70
62
  xmlRelaxNGPtr c_schema;
63
+ libxmlStructuredErrorHandlerState handler_state;
71
64
 
72
65
  if (NIL_P(rb_parse_options)) {
73
66
  rb_parse_options = rb_const_get_at(
@@ -77,33 +70,30 @@ xml_relax_ng_parse_schema(
77
70
  }
78
71
 
79
72
  rb_errors = rb_ary_new();
80
- xmlSetStructuredErrorFunc((void *)rb_errors, Nokogiri_error_array_pusher);
81
73
 
82
- #ifdef HAVE_XMLRELAXNGSETPARSERSTRUCTUREDERRORS
74
+ noko__structured_error_func_save_and_set(&handler_state, (void *)rb_errors, noko__error_array_pusher);
83
75
  xmlRelaxNGSetParserStructuredErrors(
84
76
  c_parser_context,
85
- Nokogiri_error_array_pusher,
77
+ noko__error_array_pusher,
86
78
  (void *)rb_errors
87
79
  );
88
- #endif
89
80
 
90
81
  c_schema = xmlRelaxNGParse(c_parser_context);
91
82
 
92
- xmlSetStructuredErrorFunc(NULL, NULL);
93
83
  xmlRelaxNGFreeParserCtxt(c_parser_context);
84
+ noko__structured_error_func_restore(&handler_state);
94
85
 
95
86
  if (NULL == c_schema) {
96
- xmlErrorPtr error = xmlGetLastError();
97
- if (error) {
98
- Nokogiri_error_raise(NULL, error);
87
+ VALUE exception = rb_funcall(cNokogiriXmlSyntaxError, rb_intern("aggregate"), 1, rb_errors);
88
+
89
+ if (RB_TEST(exception)) {
90
+ rb_exc_raise(exception);
99
91
  } else {
100
92
  rb_raise(rb_eRuntimeError, "Could not parse document");
101
93
  }
102
-
103
- return Qnil;
104
94
  }
105
95
 
106
- rb_schema = TypedData_Wrap_Struct(klass, &xml_relax_ng_type, c_schema);
96
+ rb_schema = TypedData_Wrap_Struct(rb_class, &xml_relax_ng_type, c_schema);
107
97
  rb_iv_set(rb_schema, "@errors", rb_errors);
108
98
  rb_iv_set(rb_schema, "@parse_options", rb_parse_options);
109
99
 
@@ -111,37 +101,27 @@ xml_relax_ng_parse_schema(
111
101
  }
112
102
 
113
103
  /*
114
- * call-seq:
115
- * read_memory(string)
104
+ * :call-seq:
105
+ * from_document(document) → Nokogiri::XML::RelaxNG
106
+ * from_document(document, parse_options) → Nokogiri::XML::RelaxNG
116
107
  *
117
- * Create a new RelaxNG from the contents of +string+
118
- */
119
- static VALUE
120
- read_memory(int argc, VALUE *argv, VALUE klass)
121
- {
122
- VALUE rb_content;
123
- VALUE rb_parse_options;
124
- xmlRelaxNGParserCtxtPtr c_parser_context;
125
-
126
- rb_scan_args(argc, argv, "11", &rb_content, &rb_parse_options);
127
-
128
- c_parser_context = xmlRelaxNGNewMemParserCtxt(
129
- (const char *)StringValuePtr(rb_content),
130
- (int)RSTRING_LEN(rb_content)
131
- );
132
-
133
- return xml_relax_ng_parse_schema(klass, c_parser_context, rb_parse_options);
134
- }
135
-
136
- /*
137
- * call-seq:
138
- * from_document(doc)
108
+ * Parse a RELAX NG schema definition from a Document to create a new Nokogiri::XML::RelaxNG.
109
+ *
110
+ * [Parameters]
111
+ * - +document+ (XML::Document) A document containing the RELAX NG schema definition
112
+ * - +parse_options+ (Nokogiri::XML::ParseOptions)
113
+ * Defaults to ParseOptions::DEFAULT_SCHEMA ⚠ Unused
114
+ *
115
+ * [Returns] Nokogiri::XML::RelaxNG
139
116
  *
140
- * Create a new RelaxNG schema from the Nokogiri::XML::Document +doc+
117
+ * +parse_options+ is currently unused by this method and is present only as a placeholder for
118
+ * future functionality.
141
119
  */
142
120
  static VALUE
143
- from_document(int argc, VALUE *argv, VALUE klass)
121
+ noko_xml_relax_ng_s_from_document(int argc, VALUE *argv, VALUE rb_class)
144
122
  {
123
+ /* TODO: deprecate this method and put file-or-string logic into .new so that becomes the
124
+ * preferred entry point, and this can become a private method */
145
125
  VALUE rb_document;
146
126
  VALUE rb_parse_options;
147
127
  xmlDocPtr c_document;
@@ -154,7 +134,7 @@ from_document(int argc, VALUE *argv, VALUE klass)
154
134
 
155
135
  c_parser_context = xmlRelaxNGNewDocParserCtxt(c_document);
156
136
 
157
- return xml_relax_ng_parse_schema(klass, c_parser_context, rb_parse_options);
137
+ return _noko_xml_relax_ng_parse_schema(rb_class, c_parser_context, rb_parse_options);
158
138
  }
159
139
 
160
140
  void
@@ -163,8 +143,7 @@ noko_init_xml_relax_ng(void)
163
143
  assert(cNokogiriXmlSchema);
164
144
  cNokogiriXmlRelaxNG = rb_define_class_under(mNokogiriXml, "RelaxNG", cNokogiriXmlSchema);
165
145
 
166
- rb_define_singleton_method(cNokogiriXmlRelaxNG, "read_memory", read_memory, -1);
167
- rb_define_singleton_method(cNokogiriXmlRelaxNG, "from_document", from_document, -1);
146
+ rb_define_singleton_method(cNokogiriXmlRelaxNG, "from_document", noko_xml_relax_ng_s_from_document, -1);
168
147
 
169
- rb_define_private_method(cNokogiriXmlRelaxNG, "validate_document", validate_document, 1);
148
+ rb_define_private_method(cNokogiriXmlRelaxNG, "validate_document", noko_xml_relax_ng__validate_document, 1);
170
149
  }