nokogiri 1.13.8 → 1.15.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (120) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +40 -0
  3. data/LICENSE-DEPENDENCIES.md +830 -509
  4. data/LICENSE.md +1 -1
  5. data/README.md +18 -11
  6. data/dependencies.yml +33 -15
  7. data/ext/nokogiri/extconf.rb +164 -46
  8. data/ext/nokogiri/gumbo.c +20 -10
  9. data/ext/nokogiri/html4_document.c +3 -4
  10. data/ext/nokogiri/html4_element_description.c +20 -15
  11. data/ext/nokogiri/html4_entity_lookup.c +2 -2
  12. data/ext/nokogiri/html4_sax_parser_context.c +11 -22
  13. data/ext/nokogiri/html4_sax_push_parser.c +3 -3
  14. data/ext/nokogiri/nokogiri.c +84 -75
  15. data/ext/nokogiri/nokogiri.h +31 -16
  16. data/ext/nokogiri/test_global_handlers.c +1 -1
  17. data/ext/nokogiri/xml_attr.c +2 -2
  18. data/ext/nokogiri/xml_attribute_decl.c +2 -2
  19. data/ext/nokogiri/xml_cdata.c +32 -18
  20. data/ext/nokogiri/xml_comment.c +2 -2
  21. data/ext/nokogiri/xml_document.c +127 -34
  22. data/ext/nokogiri/xml_document_fragment.c +2 -2
  23. data/ext/nokogiri/xml_dtd.c +2 -2
  24. data/ext/nokogiri/xml_element_content.c +34 -31
  25. data/ext/nokogiri/xml_element_decl.c +7 -7
  26. data/ext/nokogiri/xml_encoding_handler.c +15 -7
  27. data/ext/nokogiri/xml_entity_decl.c +1 -1
  28. data/ext/nokogiri/xml_entity_reference.c +2 -2
  29. data/ext/nokogiri/xml_namespace.c +79 -14
  30. data/ext/nokogiri/xml_node.c +300 -34
  31. data/ext/nokogiri/xml_node_set.c +125 -107
  32. data/ext/nokogiri/xml_processing_instruction.c +2 -2
  33. data/ext/nokogiri/xml_reader.c +81 -48
  34. data/ext/nokogiri/xml_relax_ng.c +66 -81
  35. data/ext/nokogiri/xml_sax_parser.c +45 -20
  36. data/ext/nokogiri/xml_sax_parser_context.c +46 -30
  37. data/ext/nokogiri/xml_sax_push_parser.c +30 -11
  38. data/ext/nokogiri/xml_schema.c +95 -117
  39. data/ext/nokogiri/xml_syntax_error.c +1 -1
  40. data/ext/nokogiri/xml_text.c +28 -14
  41. data/ext/nokogiri/xml_xpath_context.c +216 -136
  42. data/ext/nokogiri/xslt_stylesheet.c +118 -64
  43. data/gumbo-parser/Makefile +10 -0
  44. data/gumbo-parser/src/attribute.h +1 -1
  45. data/gumbo-parser/src/error.c +10 -6
  46. data/gumbo-parser/src/error.h +1 -1
  47. data/gumbo-parser/src/foreign_attrs.c +15 -16
  48. data/gumbo-parser/src/foreign_attrs.gperf +1 -1
  49. data/gumbo-parser/src/{gumbo.h → nokogiri_gumbo.h} +1 -0
  50. data/gumbo-parser/src/parser.c +21 -5
  51. data/gumbo-parser/src/replacement.h +1 -1
  52. data/gumbo-parser/src/string_buffer.h +1 -1
  53. data/gumbo-parser/src/string_piece.c +1 -1
  54. data/gumbo-parser/src/svg_attrs.c +2 -2
  55. data/gumbo-parser/src/svg_tags.c +2 -2
  56. data/gumbo-parser/src/tag.c +2 -1
  57. data/gumbo-parser/src/tag_lookup.c +7 -7
  58. data/gumbo-parser/src/tag_lookup.gperf +1 -0
  59. data/gumbo-parser/src/tag_lookup.h +1 -1
  60. data/gumbo-parser/src/token_buffer.h +1 -1
  61. data/gumbo-parser/src/tokenizer.c +1 -1
  62. data/gumbo-parser/src/tokenizer.h +1 -1
  63. data/gumbo-parser/src/utf8.c +1 -1
  64. data/gumbo-parser/src/utf8.h +1 -1
  65. data/gumbo-parser/src/util.c +1 -3
  66. data/gumbo-parser/src/util.h +4 -0
  67. data/gumbo-parser/src/vector.h +1 -1
  68. data/lib/nokogiri/css/node.rb +2 -2
  69. data/lib/nokogiri/css/xpath_visitor.rb +7 -5
  70. data/lib/nokogiri/css.rb +6 -0
  71. data/lib/nokogiri/decorators/slop.rb +1 -1
  72. data/lib/nokogiri/encoding_handler.rb +57 -0
  73. data/lib/nokogiri/extension.rb +4 -3
  74. data/lib/nokogiri/html4/document.rb +2 -121
  75. data/lib/nokogiri/html4/document_fragment.rb +1 -1
  76. data/lib/nokogiri/html4/element_description_defaults.rb +1827 -365
  77. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  78. data/lib/nokogiri/html4.rb +1 -0
  79. data/lib/nokogiri/html5/document.rb +113 -36
  80. data/lib/nokogiri/html5/document_fragment.rb +10 -3
  81. data/lib/nokogiri/html5/node.rb +8 -5
  82. data/lib/nokogiri/html5.rb +130 -216
  83. data/lib/nokogiri/jruby/dependencies.rb +1 -19
  84. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  85. data/lib/nokogiri/version/constant.rb +1 -1
  86. data/lib/nokogiri/version/info.rb +11 -10
  87. data/lib/nokogiri/xml/attr.rb +49 -0
  88. data/lib/nokogiri/xml/attribute_decl.rb +4 -2
  89. data/lib/nokogiri/xml/builder.rb +1 -1
  90. data/lib/nokogiri/xml/document.rb +102 -55
  91. data/lib/nokogiri/xml/document_fragment.rb +50 -7
  92. data/lib/nokogiri/xml/element_content.rb +10 -2
  93. data/lib/nokogiri/xml/element_decl.rb +4 -2
  94. data/lib/nokogiri/xml/entity_decl.rb +4 -2
  95. data/lib/nokogiri/xml/namespace.rb +42 -0
  96. data/lib/nokogiri/xml/node/save_options.rb +14 -4
  97. data/lib/nokogiri/xml/node.rb +212 -48
  98. data/lib/nokogiri/xml/node_set.rb +88 -9
  99. data/lib/nokogiri/xml/parse_options.rb +129 -50
  100. data/lib/nokogiri/xml/pp/node.rb +28 -15
  101. data/lib/nokogiri/xml/processing_instruction.rb +2 -1
  102. data/lib/nokogiri/xml/sax/document.rb +1 -1
  103. data/lib/nokogiri/xml/sax/parser.rb +2 -3
  104. data/lib/nokogiri/xml/searchable.rb +18 -10
  105. data/lib/nokogiri/xslt.rb +74 -4
  106. data/lib/nokogiri.rb +15 -15
  107. data/lib/xsd/xmlparser/nokogiri.rb +4 -2
  108. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  109. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  110. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  111. data/ports/archives/libxml2-2.11.7.tar.xz +0 -0
  112. data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
  113. metadata +19 -242
  114. data/patches/libxml2/0004-use-glibc-strlen.patch +0 -53
  115. data/patches/libxml2/0005-avoid-isnan-isinf.patch +0 -81
  116. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +0 -3040
  117. data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +0 -61
  118. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +0 -3037
  119. data/ports/archives/libxml2-2.9.14.tar.xz +0 -0
  120. data/ports/archives/libxslt-1.1.35.tar.xz +0 -0
@@ -5,17 +5,40 @@ VALUE cNokogiriXmlSaxParserContext ;
5
5
  static ID id_read;
6
6
 
7
7
  static void
8
- deallocate(xmlParserCtxtPtr ctxt)
8
+ xml_sax_parser_context_free(void *data)
9
9
  {
10
- NOKOGIRI_DEBUG_START(ctxt);
11
-
10
+ xmlParserCtxtPtr ctxt = data;
12
11
  ctxt->sax = NULL;
13
-
14
12
  xmlFreeParserCtxt(ctxt);
13
+ }
14
+
15
+ /*
16
+ * note that htmlParserCtxtPtr == xmlParserCtxtPtr and xmlFreeParserCtxt() == htmlFreeParserCtxt()
17
+ * so we use this type for both XML::SAX::ParserContext and HTML::SAX::ParserContext
18
+ */
19
+ static const rb_data_type_t xml_sax_parser_context_type = {
20
+ .wrap_struct_name = "Nokogiri::XML::SAX::ParserContext",
21
+ .function = {
22
+ .dfree = xml_sax_parser_context_free,
23
+ },
24
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
25
+ };
26
+
27
+ xmlParserCtxtPtr
28
+ noko_xml_sax_parser_context_unwrap(VALUE rb_context)
29
+ {
30
+ xmlParserCtxtPtr c_context;
31
+ TypedData_Get_Struct(rb_context, xmlParserCtxt, &xml_sax_parser_context_type, c_context);
32
+ return c_context;
33
+ }
15
34
 
16
- NOKOGIRI_DEBUG_END(ctxt);
35
+ VALUE
36
+ noko_xml_sax_parser_context_wrap(VALUE klass, xmlParserCtxtPtr c_context)
37
+ {
38
+ return TypedData_Wrap_Struct(klass, &xml_sax_parser_context_type, c_context);
17
39
  }
18
40
 
41
+
19
42
  /*
20
43
  * call-seq:
21
44
  * parse_io(io, encoding)
@@ -41,7 +64,7 @@ parse_io(VALUE klass, VALUE io, VALUE encoding)
41
64
  ctxt->sax = NULL;
42
65
  }
43
66
 
44
- return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
67
+ return noko_xml_sax_parser_context_wrap(klass, ctxt);
45
68
  }
46
69
 
47
70
  /*
@@ -54,7 +77,13 @@ static VALUE
54
77
  parse_file(VALUE klass, VALUE filename)
55
78
  {
56
79
  xmlParserCtxtPtr ctxt = xmlCreateFileParserCtxt(StringValueCStr(filename));
57
- return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
80
+
81
+ if (ctxt->sax) {
82
+ xmlFree(ctxt->sax);
83
+ ctxt->sax = NULL;
84
+ }
85
+
86
+ return noko_xml_sax_parser_context_wrap(klass, ctxt);
58
87
  }
59
88
 
60
89
  /*
@@ -81,7 +110,7 @@ parse_memory(VALUE klass, VALUE data)
81
110
  ctxt->sax = NULL;
82
111
  }
83
112
 
84
- return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
113
+ return noko_xml_sax_parser_context_wrap(klass, ctxt);
85
114
  }
86
115
 
87
116
  static VALUE
@@ -121,13 +150,8 @@ parse_with(VALUE self, VALUE sax_handler)
121
150
  rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
122
151
  }
123
152
 
124
- Data_Get_Struct(self, xmlParserCtxt, ctxt);
125
- Data_Get_Struct(sax_handler, xmlSAXHandler, sax);
126
-
127
- /* Free the sax handler since we'll assign our own */
128
- if (ctxt->sax && ctxt->sax != (xmlSAXHandlerPtr)&xmlDefaultSAXHandler) {
129
- xmlFree(ctxt->sax);
130
- }
153
+ ctxt = noko_xml_sax_parser_context_unwrap(self);
154
+ sax = noko_sax_handler_unwrap(sax_handler);
131
155
 
132
156
  ctxt->sax = sax;
133
157
  ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
@@ -149,8 +173,7 @@ parse_with(VALUE self, VALUE sax_handler)
149
173
  static VALUE
150
174
  set_replace_entities(VALUE self, VALUE value)
151
175
  {
152
- xmlParserCtxtPtr ctxt;
153
- Data_Get_Struct(self, xmlParserCtxt, ctxt);
176
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
154
177
 
155
178
  if (Qfalse == value) {
156
179
  ctxt->replaceEntities = 0;
@@ -171,8 +194,7 @@ set_replace_entities(VALUE self, VALUE value)
171
194
  static VALUE
172
195
  get_replace_entities(VALUE self)
173
196
  {
174
- xmlParserCtxtPtr ctxt;
175
- Data_Get_Struct(self, xmlParserCtxt, ctxt);
197
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
176
198
 
177
199
  if (0 == ctxt->replaceEntities) {
178
200
  return Qfalse;
@@ -189,10 +211,8 @@ get_replace_entities(VALUE self)
189
211
  static VALUE
190
212
  line(VALUE self)
191
213
  {
192
- xmlParserCtxtPtr ctxt;
193
214
  xmlParserInputPtr io;
194
-
195
- Data_Get_Struct(self, xmlParserCtxt, ctxt);
215
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
196
216
 
197
217
  io = ctxt->input;
198
218
  if (io) {
@@ -210,11 +230,9 @@ line(VALUE self)
210
230
  static VALUE
211
231
  column(VALUE self)
212
232
  {
213
- xmlParserCtxtPtr ctxt;
233
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
214
234
  xmlParserInputPtr io;
215
235
 
216
- Data_Get_Struct(self, xmlParserCtxt, ctxt);
217
-
218
236
  io = ctxt->input;
219
237
  if (io) {
220
238
  return INT2NUM(io->col);
@@ -233,8 +251,7 @@ column(VALUE self)
233
251
  static VALUE
234
252
  set_recovery(VALUE self, VALUE value)
235
253
  {
236
- xmlParserCtxtPtr ctxt;
237
- Data_Get_Struct(self, xmlParserCtxt, ctxt);
254
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
238
255
 
239
256
  if (value == Qfalse) {
240
257
  ctxt->recovery = 0;
@@ -255,8 +272,7 @@ set_recovery(VALUE self, VALUE value)
255
272
  static VALUE
256
273
  get_recovery(VALUE self)
257
274
  {
258
- xmlParserCtxtPtr ctxt;
259
- Data_Get_Struct(self, xmlParserCtxt, ctxt);
275
+ xmlParserCtxtPtr ctxt = noko_xml_sax_parser_context_unwrap(self);
260
276
 
261
277
  if (ctxt->recovery == 0) {
262
278
  return Qfalse;
@@ -266,7 +282,7 @@ get_recovery(VALUE self)
266
282
  }
267
283
 
268
284
  void
269
- noko_init_xml_sax_parser_context()
285
+ noko_init_xml_sax_parser_context(void)
270
286
  {
271
287
  cNokogiriXmlSaxParserContext = rb_define_class_under(mNokogiriXmlSax, "ParserContext", rb_cObject);
272
288
 
@@ -3,20 +3,35 @@
3
3
  VALUE cNokogiriXmlSaxPushParser ;
4
4
 
5
5
  static void
6
- deallocate(xmlParserCtxtPtr ctx)
6
+ xml_sax_push_parser_free(void *data)
7
7
  {
8
- NOKOGIRI_DEBUG_START(ctx);
8
+ xmlParserCtxtPtr ctx = data;
9
9
  if (ctx != NULL) {
10
10
  NOKOGIRI_SAX_TUPLE_DESTROY(ctx->userData);
11
11
  xmlFreeParserCtxt(ctx);
12
12
  }
13
- NOKOGIRI_DEBUG_END(ctx);
14
13
  }
15
14
 
15
+ static const rb_data_type_t xml_sax_push_parser_type = {
16
+ .wrap_struct_name = "Nokogiri::XML::SAX::PushParser",
17
+ .function = {
18
+ .dfree = xml_sax_push_parser_free,
19
+ },
20
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
21
+ };
22
+
16
23
  static VALUE
17
24
  allocate(VALUE klass)
18
25
  {
19
- return Data_Wrap_Struct(klass, NULL, deallocate, NULL);
26
+ return TypedData_Wrap_Struct(klass, &xml_sax_push_parser_type, NULL);
27
+ }
28
+
29
+ xmlParserCtxtPtr
30
+ noko_xml_sax_push_parser_unwrap(VALUE rb_parser)
31
+ {
32
+ xmlParserCtxtPtr c_parser;
33
+ TypedData_Get_Struct(rb_parser, xmlParserCtxt, &xml_sax_push_parser_type, c_parser);
34
+ return c_parser;
20
35
  }
21
36
 
22
37
  /*
@@ -33,7 +48,7 @@ native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
33
48
  int size = 0;
34
49
 
35
50
 
36
- Data_Get_Struct(self, xmlParserCtxt, ctx);
51
+ ctx = noko_xml_sax_push_parser_unwrap(self);
37
52
 
38
53
  if (Qnil != _chunk) {
39
54
  chunk = StringValuePtr(_chunk);
@@ -65,7 +80,7 @@ initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename)
65
80
  const char *filename = NULL;
66
81
  xmlParserCtxtPtr ctx;
67
82
 
68
- Data_Get_Struct(_xml_sax, xmlSAXHandler, sax);
83
+ sax = noko_sax_handler_unwrap(_xml_sax);
69
84
 
70
85
  if (_filename != Qnil) { filename = StringValueCStr(_filename); }
71
86
 
@@ -91,7 +106,8 @@ static VALUE
91
106
  get_options(VALUE self)
92
107
  {
93
108
  xmlParserCtxtPtr ctx;
94
- Data_Get_Struct(self, xmlParserCtxt, ctx);
109
+
110
+ ctx = noko_xml_sax_push_parser_unwrap(self);
95
111
 
96
112
  return INT2NUM(ctx->options);
97
113
  }
@@ -100,7 +116,8 @@ static VALUE
100
116
  set_options(VALUE self, VALUE options)
101
117
  {
102
118
  xmlParserCtxtPtr ctx;
103
- Data_Get_Struct(self, xmlParserCtxt, ctx);
119
+
120
+ ctx = noko_xml_sax_push_parser_unwrap(self);
104
121
 
105
122
  if (xmlCtxtUseOptions(ctx, (int)NUM2INT(options)) != 0) {
106
123
  rb_raise(rb_eRuntimeError, "Cannot set XML parser context options");
@@ -120,7 +137,8 @@ static VALUE
120
137
  get_replace_entities(VALUE self)
121
138
  {
122
139
  xmlParserCtxtPtr ctx;
123
- Data_Get_Struct(self, xmlParserCtxt, ctx);
140
+
141
+ ctx = noko_xml_sax_push_parser_unwrap(self);
124
142
 
125
143
  if (0 == ctx->replaceEntities) {
126
144
  return Qfalse;
@@ -140,7 +158,8 @@ static VALUE
140
158
  set_replace_entities(VALUE self, VALUE value)
141
159
  {
142
160
  xmlParserCtxtPtr ctx;
143
- Data_Get_Struct(self, xmlParserCtxt, ctx);
161
+
162
+ ctx = noko_xml_sax_push_parser_unwrap(self);
144
163
 
145
164
  if (Qfalse == value) {
146
165
  ctx->replaceEntities = 0;
@@ -152,7 +171,7 @@ set_replace_entities(VALUE self, VALUE value)
152
171
  }
153
172
 
154
173
  void
155
- noko_init_xml_sax_push_parser()
174
+ noko_init_xml_sax_push_parser(void)
156
175
  {
157
176
  cNokogiriXmlSaxPushParser = rb_define_class_under(mNokogiriXmlSax, "PushParser", rb_cObject);
158
177
 
@@ -3,13 +3,20 @@
3
3
  VALUE cNokogiriXmlSchema;
4
4
 
5
5
  static void
6
- dealloc(xmlSchemaPtr schema)
6
+ xml_schema_deallocate(void *data)
7
7
  {
8
- NOKOGIRI_DEBUG_START(schema);
8
+ xmlSchemaPtr schema = data;
9
9
  xmlSchemaFree(schema);
10
- NOKOGIRI_DEBUG_END(schema);
11
10
  }
12
11
 
12
+ static const rb_data_type_t xml_schema_type = {
13
+ .wrap_struct_name = "Nokogiri::XML::Schema",
14
+ .function = {
15
+ .dfree = xml_schema_deallocate,
16
+ },
17
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
18
+ };
19
+
13
20
  /*
14
21
  * call-seq:
15
22
  * validate_document(document)
@@ -24,8 +31,8 @@ validate_document(VALUE self, VALUE document)
24
31
  xmlSchemaValidCtxtPtr valid_ctxt;
25
32
  VALUE errors;
26
33
 
27
- Data_Get_Struct(self, xmlSchema, schema);
28
- Noko_Node_Get_Struct(document, xmlDoc, doc);
34
+ TypedData_Get_Struct(self, xmlSchema, &xml_schema_type, schema);
35
+ doc = noko_xml_document_unwrap(document);
29
36
 
30
37
  errors = rb_ary_new();
31
38
 
@@ -65,7 +72,7 @@ validate_file(VALUE self, VALUE rb_filename)
65
72
  const char *filename ;
66
73
  VALUE errors;
67
74
 
68
- Data_Get_Struct(self, xmlSchema, schema);
75
+ TypedData_Get_Struct(self, xmlSchema, &xml_schema_type, schema);
69
76
  filename = (const char *)StringValueCStr(rb_filename) ;
70
77
 
71
78
  errors = rb_ary_new();
@@ -92,59 +99,53 @@ validate_file(VALUE self, VALUE rb_filename)
92
99
  return errors;
93
100
  }
94
101
 
95
- /*
96
- * call-seq:
97
- * read_memory(string)
98
- *
99
- * Create a new Schema from the contents of +string+
100
- */
101
102
  static VALUE
102
- read_memory(int argc, VALUE *argv, VALUE klass)
103
+ xml_schema_parse_schema(
104
+ VALUE klass,
105
+ xmlSchemaParserCtxtPtr c_parser_context,
106
+ VALUE rb_parse_options
107
+ )
103
108
  {
104
- VALUE content;
105
- VALUE parse_options;
109
+ VALUE rb_errors;
106
110
  int parse_options_int;
107
- xmlSchemaParserCtxtPtr ctx;
108
- xmlSchemaPtr schema;
109
- VALUE errors;
110
- VALUE rb_schema;
111
- int scanned_args = 0;
111
+ xmlSchemaPtr c_schema;
112
112
  xmlExternalEntityLoader old_loader = 0;
113
+ VALUE rb_schema;
113
114
 
114
- scanned_args = rb_scan_args(argc, argv, "11", &content, &parse_options);
115
- if (scanned_args == 1) {
116
- parse_options = rb_const_get_at(rb_const_get_at(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
115
+ if (NIL_P(rb_parse_options)) {
116
+ rb_parse_options = rb_const_get_at(
117
+ rb_const_get_at(mNokogiriXml, rb_intern("ParseOptions")),
118
+ rb_intern("DEFAULT_SCHEMA")
119
+ );
117
120
  }
118
- parse_options_int = (int)NUM2INT(rb_funcall(parse_options, rb_intern("to_i"), 0));
119
121
 
120
- ctx = xmlSchemaNewMemParserCtxt((const char *)StringValuePtr(content), (int)RSTRING_LEN(content));
121
-
122
- errors = rb_ary_new();
123
- xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
122
+ rb_errors = rb_ary_new();
123
+ xmlSetStructuredErrorFunc((void *)rb_errors, Nokogiri_error_array_pusher);
124
124
 
125
125
  #ifdef HAVE_XMLSCHEMASETPARSERSTRUCTUREDERRORS
126
126
  xmlSchemaSetParserStructuredErrors(
127
- ctx,
127
+ c_parser_context,
128
128
  Nokogiri_error_array_pusher,
129
- (void *)errors
129
+ (void *)rb_errors
130
130
  );
131
131
  #endif
132
132
 
133
+ parse_options_int = (int)NUM2INT(rb_funcall(rb_parse_options, rb_intern("to_i"), 0));
133
134
  if (parse_options_int & XML_PARSE_NONET) {
134
135
  old_loader = xmlGetExternalEntityLoader();
135
136
  xmlSetExternalEntityLoader(xmlNoNetExternalEntityLoader);
136
137
  }
137
138
 
138
- schema = xmlSchemaParse(ctx);
139
+ c_schema = xmlSchemaParse(c_parser_context);
139
140
 
140
141
  if (old_loader) {
141
142
  xmlSetExternalEntityLoader(old_loader);
142
143
  }
143
144
 
144
145
  xmlSetStructuredErrorFunc(NULL, NULL);
145
- xmlSchemaFreeParserCtxt(ctx);
146
+ xmlSchemaFreeParserCtxt(c_parser_context);
146
147
 
147
- if (NULL == schema) {
148
+ if (NULL == c_schema) {
148
149
  xmlErrorPtr error = xmlGetLastError();
149
150
  if (error) {
150
151
  Nokogiri_error_raise(NULL, error);
@@ -155,129 +156,106 @@ read_memory(int argc, VALUE *argv, VALUE klass)
155
156
  return Qnil;
156
157
  }
157
158
 
158
- rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
159
- rb_iv_set(rb_schema, "@errors", errors);
160
- rb_iv_set(rb_schema, "@parse_options", parse_options);
159
+ rb_schema = TypedData_Wrap_Struct(klass, &xml_schema_type, c_schema);
160
+ rb_iv_set(rb_schema, "@errors", rb_errors);
161
+ rb_iv_set(rb_schema, "@parse_options", rb_parse_options);
161
162
 
162
163
  return rb_schema;
163
164
  }
164
165
 
165
- /* Schema creation will remove and deallocate "blank" nodes.
166
- * If those blank nodes have been exposed to Ruby, they could get freed
167
- * out from under the VALUE pointer. This function checks to see if any of
168
- * those nodes have been exposed to Ruby, and if so we should raise an exception.
166
+ /*
167
+ * call-seq:
168
+ * read_memory(string) Nokogiri::XML::Schema
169
+ *
170
+ * Create a new schema parsed from the contents of +string+
171
+ *
172
+ * [Parameters]
173
+ * - +string+: String containing XML to be parsed as a schema
174
+ *
175
+ * [Returns] Nokogiri::XML::Schema
169
176
  */
170
- static int
171
- has_blank_nodes_p(VALUE cache)
177
+ static VALUE
178
+ read_memory(int argc, VALUE *argv, VALUE klass)
172
179
  {
173
- long i;
180
+ VALUE rb_content;
181
+ VALUE rb_parse_options;
182
+ xmlSchemaParserCtxtPtr c_parser_context;
174
183
 
175
- if (NIL_P(cache)) {
176
- return 0;
177
- }
184
+ rb_scan_args(argc, argv, "11", &rb_content, &rb_parse_options);
178
185
 
179
- for (i = 0; i < RARRAY_LEN(cache); i++) {
180
- xmlNodePtr node;
181
- VALUE element = rb_ary_entry(cache, i);
182
- Noko_Node_Get_Struct(element, xmlNode, node);
183
- if (xmlIsBlankNode(node)) {
184
- return 1;
185
- }
186
- }
186
+ c_parser_context = xmlSchemaNewMemParserCtxt(
187
+ (const char *)StringValuePtr(rb_content),
188
+ (int)RSTRING_LEN(rb_content)
189
+ );
187
190
 
188
- return 0;
191
+ return xml_schema_parse_schema(klass, c_parser_context, rb_parse_options);
189
192
  }
190
193
 
191
194
  /*
192
195
  * call-seq:
193
- * from_document(doc)
196
+ * from_document(document) → Nokogiri::XML::Schema
194
197
  *
195
- * Create a new Schema from the Nokogiri::XML::Document +doc+
198
+ * Create a new schema parsed from the +document+.
199
+ *
200
+ * [Parameters]
201
+ * - +document+: Nokogiri::XML::Document to be parsed
202
+ *
203
+ * [Returns] Nokogiri::XML::Schema
196
204
  */
197
205
  static VALUE
198
- from_document(int argc, VALUE *argv, VALUE klass)
206
+ rb_xml_schema_s_from_document(int argc, VALUE *argv, VALUE klass)
199
207
  {
200
- VALUE document;
201
- VALUE parse_options;
202
- int parse_options_int;
203
- xmlDocPtr doc;
204
- xmlSchemaParserCtxtPtr ctx;
205
- xmlSchemaPtr schema;
206
- VALUE errors;
208
+ VALUE rb_document;
209
+ VALUE rb_parse_options;
207
210
  VALUE rb_schema;
208
- int scanned_args = 0;
209
- xmlExternalEntityLoader old_loader = 0;
211
+ xmlDocPtr c_document;
212
+ xmlSchemaParserCtxtPtr c_parser_context;
213
+ int defensive_copy_p = 0;
210
214
 
211
- scanned_args = rb_scan_args(argc, argv, "11", &document, &parse_options);
215
+ rb_scan_args(argc, argv, "11", &rb_document, &rb_parse_options);
212
216
 
213
- Noko_Node_Get_Struct(document, xmlDoc, doc);
214
- doc = doc->doc; /* In case someone passes us a node. ugh. */
215
-
216
- if (scanned_args == 1) {
217
- parse_options = rb_const_get_at(rb_const_get_at(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
218
- }
219
- parse_options_int = (int)NUM2INT(rb_funcall(parse_options, rb_intern("to_i"), 0));
220
-
221
- if (has_blank_nodes_p(DOC_NODE_CACHE(doc))) {
222
- rb_raise(rb_eArgError, "Creating a schema from a document that has blank nodes exposed to Ruby is dangerous");
217
+ if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlNode)) {
218
+ rb_raise(rb_eTypeError,
219
+ "expected parameter to be a Nokogiri::XML::Document, received %"PRIsVALUE,
220
+ rb_obj_class(rb_document));
223
221
  }
224
222
 
225
- ctx = xmlSchemaNewDocParserCtxt(doc);
226
-
227
- errors = rb_ary_new();
228
- xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
229
-
230
- #ifdef HAVE_XMLSCHEMASETPARSERSTRUCTUREDERRORS
231
- xmlSchemaSetParserStructuredErrors(
232
- ctx,
233
- Nokogiri_error_array_pusher,
234
- (void *)errors
235
- );
236
- #endif
237
-
238
- if (parse_options_int & XML_PARSE_NONET) {
239
- old_loader = xmlGetExternalEntityLoader();
240
- xmlSetExternalEntityLoader(xmlNoNetExternalEntityLoader);
223
+ if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlDocument)) {
224
+ xmlNodePtr deprecated_node_type_arg;
225
+ // TODO: deprecate allowing Node
226
+ NOKO_WARN_DEPRECATION("Passing a Node as the first parameter to Schema.from_document is deprecated. Please pass a Document instead. This will become an error in a future release of Nokogiri.");
227
+ Noko_Node_Get_Struct(rb_document, xmlNode, deprecated_node_type_arg);
228
+ c_document = deprecated_node_type_arg->doc;
229
+ } else {
230
+ c_document = noko_xml_document_unwrap(rb_document);
241
231
  }
242
232
 
243
- schema = xmlSchemaParse(ctx);
244
-
245
- if (old_loader) {
246
- xmlSetExternalEntityLoader(old_loader);
233
+ if (noko_xml_document_has_wrapped_blank_nodes_p(c_document)) {
234
+ // see https://github.com/sparklemotion/nokogiri/pull/2001
235
+ c_document = xmlCopyDoc(c_document, 1);
236
+ defensive_copy_p = 1;
247
237
  }
248
238
 
249
- xmlSetStructuredErrorFunc(NULL, NULL);
250
- xmlSchemaFreeParserCtxt(ctx);
239
+ c_parser_context = xmlSchemaNewDocParserCtxt(c_document);
240
+ rb_schema = xml_schema_parse_schema(klass, c_parser_context, rb_parse_options);
251
241
 
252
- if (NULL == schema) {
253
- xmlErrorPtr error = xmlGetLastError();
254
- if (error) {
255
- Nokogiri_error_raise(NULL, error);
256
- } else {
257
- rb_raise(rb_eRuntimeError, "Could not parse document");
258
- }
259
-
260
- return Qnil;
242
+ if (defensive_copy_p) {
243
+ xmlFreeDoc(c_document);
244
+ c_document = NULL;
261
245
  }
262
246
 
263
- rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
264
- rb_iv_set(rb_schema, "@errors", errors);
265
- rb_iv_set(rb_schema, "@parse_options", parse_options);
266
-
267
247
  return rb_schema;
268
-
269
- return Qnil;
270
248
  }
271
249
 
272
250
  void
273
- noko_init_xml_schema()
251
+ noko_init_xml_schema(void)
274
252
  {
275
253
  cNokogiriXmlSchema = rb_define_class_under(mNokogiriXml, "Schema", rb_cObject);
276
254
 
277
255
  rb_undef_alloc_func(cNokogiriXmlSchema);
278
256
 
279
257
  rb_define_singleton_method(cNokogiriXmlSchema, "read_memory", read_memory, -1);
280
- rb_define_singleton_method(cNokogiriXmlSchema, "from_document", from_document, -1);
258
+ rb_define_singleton_method(cNokogiriXmlSchema, "from_document", rb_xml_schema_s_from_document, -1);
281
259
 
282
260
  rb_define_private_method(cNokogiriXmlSchema, "validate_document", validate_document, 1);
283
261
  rb_define_private_method(cNokogiriXmlSchema, "validate_file", validate_file, 1);
@@ -75,7 +75,7 @@ Nokogiri_wrap_xml_syntax_error(xmlErrorPtr error)
75
75
  }
76
76
 
77
77
  void
78
- noko_init_xml_syntax_error()
78
+ noko_init_xml_syntax_error(void)
79
79
  {
80
80
  assert(cNokogiriSyntaxError);
81
81
  /*
@@ -9,25 +9,39 @@ VALUE cNokogiriXmlText ;
9
9
  * Create a new Text element on the +document+ with +content+
10
10
  */
11
11
  static VALUE
12
- new (int argc, VALUE *argv, VALUE klass)
12
+ rb_xml_text_s_new(int argc, VALUE *argv, VALUE klass)
13
13
  {
14
- xmlDocPtr doc;
15
- xmlNodePtr node;
16
- VALUE string;
17
- VALUE document;
18
- VALUE rest;
14
+ xmlDocPtr c_document;
15
+ xmlNodePtr c_node;
16
+ VALUE rb_string;
17
+ VALUE rb_document;
18
+ VALUE rb_rest;
19
19
  VALUE rb_node;
20
20
 
21
- rb_scan_args(argc, argv, "2*", &string, &document, &rest);
21
+ rb_scan_args(argc, argv, "2*", &rb_string, &rb_document, &rb_rest);
22
22
 
23
- Noko_Node_Get_Struct(document, xmlDoc, doc);
23
+ if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlNode)) {
24
+ rb_raise(rb_eTypeError,
25
+ "expected second parameter to be a Nokogiri::XML::Document, received %"PRIsVALUE,
26
+ rb_obj_class(rb_document));
27
+ }
24
28
 
25
- node = xmlNewText((xmlChar *)StringValueCStr(string));
26
- node->doc = doc->doc;
29
+ if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlDocument)) {
30
+ xmlNodePtr deprecated_node_type_arg;
31
+ // TODO: deprecate allowing Node
32
+ NOKO_WARN_DEPRECATION("Passing a Node as the second parameter to Text.new is deprecated. Please pass a Document instead. This will become an error in a future release of Nokogiri.");
33
+ Noko_Node_Get_Struct(rb_document, xmlNode, deprecated_node_type_arg);
34
+ c_document = deprecated_node_type_arg->doc;
35
+ } else {
36
+ c_document = noko_xml_document_unwrap(rb_document);
37
+ }
27
38
 
28
- noko_xml_document_pin_node(node);
39
+ c_node = xmlNewText((xmlChar *)StringValueCStr(rb_string));
40
+ c_node->doc = c_document;
29
41
 
30
- rb_node = noko_xml_node_wrap(klass, node) ;
42
+ noko_xml_document_pin_node(c_node);
43
+
44
+ rb_node = noko_xml_node_wrap(klass, c_node) ;
31
45
  rb_obj_call_init(rb_node, argc, argv);
32
46
 
33
47
  if (rb_block_given_p()) { rb_yield(rb_node); }
@@ -36,7 +50,7 @@ new (int argc, VALUE *argv, VALUE klass)
36
50
  }
37
51
 
38
52
  void
39
- noko_init_xml_text()
53
+ noko_init_xml_text(void)
40
54
  {
41
55
  assert(cNokogiriXmlCharacterData);
42
56
  /*
@@ -44,5 +58,5 @@ noko_init_xml_text()
44
58
  */
45
59
  cNokogiriXmlText = rb_define_class_under(mNokogiriXml, "Text", cNokogiriXmlCharacterData);
46
60
 
47
- rb_define_singleton_method(cNokogiriXmlText, "new", new, -1);
61
+ rb_define_singleton_method(cNokogiriXmlText, "new", rb_xml_text_s_new, -1);
48
62
  }