nokogiri 1.14.5 → 1.15.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (68) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +9 -8
  3. data/dependencies.yml +6 -6
  4. data/ext/nokogiri/extconf.rb +66 -22
  5. data/ext/nokogiri/html4_document.c +1 -2
  6. data/ext/nokogiri/html4_element_description.c +19 -14
  7. data/ext/nokogiri/html4_sax_parser_context.c +10 -16
  8. data/ext/nokogiri/html4_sax_push_parser.c +2 -2
  9. data/ext/nokogiri/nokogiri.c +46 -24
  10. data/ext/nokogiri/nokogiri.h +13 -2
  11. data/ext/nokogiri/xml_attr.c +1 -1
  12. data/ext/nokogiri/xml_cdata.c +10 -2
  13. data/ext/nokogiri/xml_comment.c +1 -1
  14. data/ext/nokogiri/xml_document.c +102 -22
  15. data/ext/nokogiri/xml_document_fragment.c +1 -1
  16. data/ext/nokogiri/xml_dtd.c +1 -1
  17. data/ext/nokogiri/xml_element_content.c +32 -29
  18. data/ext/nokogiri/xml_element_decl.c +5 -5
  19. data/ext/nokogiri/xml_encoding_handler.c +12 -4
  20. data/ext/nokogiri/xml_entity_reference.c +1 -1
  21. data/ext/nokogiri/xml_namespace.c +11 -12
  22. data/ext/nokogiri/xml_node.c +7 -7
  23. data/ext/nokogiri/xml_node_set.c +125 -105
  24. data/ext/nokogiri/xml_processing_instruction.c +1 -1
  25. data/ext/nokogiri/xml_reader.c +37 -28
  26. data/ext/nokogiri/xml_relax_ng.c +65 -78
  27. data/ext/nokogiri/xml_sax_parser.c +24 -5
  28. data/ext/nokogiri/xml_sax_parser_context.c +46 -25
  29. data/ext/nokogiri/xml_sax_push_parser.c +29 -8
  30. data/ext/nokogiri/xml_schema.c +90 -116
  31. data/ext/nokogiri/xml_text.c +10 -2
  32. data/ext/nokogiri/xml_xpath_context.c +156 -83
  33. data/ext/nokogiri/xslt_stylesheet.c +103 -50
  34. data/gumbo-parser/src/error.c +8 -4
  35. data/gumbo-parser/src/foreign_attrs.c +13 -14
  36. data/gumbo-parser/src/foreign_attrs.gperf +1 -1
  37. data/gumbo-parser/src/parser.c +13 -0
  38. data/lib/nokogiri/css/xpath_visitor.rb +2 -2
  39. data/lib/nokogiri/extension.rb +1 -1
  40. data/lib/nokogiri/html4/document_fragment.rb +1 -1
  41. data/lib/nokogiri/html4/element_description_defaults.rb +1821 -353
  42. data/lib/nokogiri/html5/document_fragment.rb +1 -1
  43. data/lib/nokogiri/html5/node.rb +5 -0
  44. data/lib/nokogiri/html5.rb +5 -2
  45. data/lib/nokogiri/jruby/nokogiri_jars.rb +3 -3
  46. data/lib/nokogiri/version/constant.rb +1 -1
  47. data/lib/nokogiri/xml/attribute_decl.rb +4 -2
  48. data/lib/nokogiri/xml/document_fragment.rb +1 -1
  49. data/lib/nokogiri/xml/element_content.rb +10 -2
  50. data/lib/nokogiri/xml/element_decl.rb +4 -2
  51. data/lib/nokogiri/xml/entity_decl.rb +4 -2
  52. data/lib/nokogiri/xml/node/save_options.rb +8 -0
  53. data/lib/nokogiri/xml/node.rb +22 -13
  54. data/lib/nokogiri/xml/pp/node.rb +23 -12
  55. data/lib/nokogiri/xml/sax/document.rb +1 -1
  56. data/lib/nokogiri/xml/searchable.rb +18 -10
  57. data/lib/nokogiri/xslt.rb +73 -3
  58. data/lib/nokogiri.rb +12 -4
  59. data/lib/xsd/xmlparser/nokogiri.rb +1 -1
  60. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  61. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  62. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  63. data/ports/archives/libxml2-2.11.3.tar.xz +0 -0
  64. data/ports/archives/libxslt-1.1.38.tar.xz +0 -0
  65. metadata +9 -7
  66. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +0 -3037
  67. data/ports/archives/libxml2-2.10.4.tar.xz +0 -0
  68. data/ports/archives/libxslt-1.1.37.tar.xz +0 -0
@@ -3,18 +3,35 @@
3
3
  VALUE cNokogiriXmlSaxPushParser ;
4
4
 
5
5
  static void
6
- deallocate(xmlParserCtxtPtr ctx)
6
+ xml_sax_push_parser_free(void *data)
7
7
  {
8
+ xmlParserCtxtPtr ctx = data;
8
9
  if (ctx != NULL) {
9
10
  NOKOGIRI_SAX_TUPLE_DESTROY(ctx->userData);
10
11
  xmlFreeParserCtxt(ctx);
11
12
  }
12
13
  }
13
14
 
15
+ static const rb_data_type_t xml_sax_push_parser_type = {
16
+ .wrap_struct_name = "Nokogiri::XML::SAX::PushParser",
17
+ .function = {
18
+ .dfree = xml_sax_push_parser_free,
19
+ },
20
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
21
+ };
22
+
14
23
  static VALUE
15
24
  allocate(VALUE klass)
16
25
  {
17
- return Data_Wrap_Struct(klass, NULL, deallocate, NULL);
26
+ return TypedData_Wrap_Struct(klass, &xml_sax_push_parser_type, NULL);
27
+ }
28
+
29
+ xmlParserCtxtPtr
30
+ noko_xml_sax_push_parser_unwrap(VALUE rb_parser)
31
+ {
32
+ xmlParserCtxtPtr c_parser;
33
+ TypedData_Get_Struct(rb_parser, xmlParserCtxt, &xml_sax_push_parser_type, c_parser);
34
+ return c_parser;
18
35
  }
19
36
 
20
37
  /*
@@ -31,7 +48,7 @@ native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
31
48
  int size = 0;
32
49
 
33
50
 
34
- Data_Get_Struct(self, xmlParserCtxt, ctx);
51
+ ctx = noko_xml_sax_push_parser_unwrap(self);
35
52
 
36
53
  if (Qnil != _chunk) {
37
54
  chunk = StringValuePtr(_chunk);
@@ -63,7 +80,7 @@ initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename)
63
80
  const char *filename = NULL;
64
81
  xmlParserCtxtPtr ctx;
65
82
 
66
- Data_Get_Struct(_xml_sax, xmlSAXHandler, sax);
83
+ sax = noko_sax_handler_unwrap(_xml_sax);
67
84
 
68
85
  if (_filename != Qnil) { filename = StringValueCStr(_filename); }
69
86
 
@@ -89,7 +106,8 @@ static VALUE
89
106
  get_options(VALUE self)
90
107
  {
91
108
  xmlParserCtxtPtr ctx;
92
- Data_Get_Struct(self, xmlParserCtxt, ctx);
109
+
110
+ ctx = noko_xml_sax_push_parser_unwrap(self);
93
111
 
94
112
  return INT2NUM(ctx->options);
95
113
  }
@@ -98,7 +116,8 @@ static VALUE
98
116
  set_options(VALUE self, VALUE options)
99
117
  {
100
118
  xmlParserCtxtPtr ctx;
101
- Data_Get_Struct(self, xmlParserCtxt, ctx);
119
+
120
+ ctx = noko_xml_sax_push_parser_unwrap(self);
102
121
 
103
122
  if (xmlCtxtUseOptions(ctx, (int)NUM2INT(options)) != 0) {
104
123
  rb_raise(rb_eRuntimeError, "Cannot set XML parser context options");
@@ -118,7 +137,8 @@ static VALUE
118
137
  get_replace_entities(VALUE self)
119
138
  {
120
139
  xmlParserCtxtPtr ctx;
121
- Data_Get_Struct(self, xmlParserCtxt, ctx);
140
+
141
+ ctx = noko_xml_sax_push_parser_unwrap(self);
122
142
 
123
143
  if (0 == ctx->replaceEntities) {
124
144
  return Qfalse;
@@ -138,7 +158,8 @@ static VALUE
138
158
  set_replace_entities(VALUE self, VALUE value)
139
159
  {
140
160
  xmlParserCtxtPtr ctx;
141
- Data_Get_Struct(self, xmlParserCtxt, ctx);
161
+
162
+ ctx = noko_xml_sax_push_parser_unwrap(self);
142
163
 
143
164
  if (Qfalse == value) {
144
165
  ctx->replaceEntities = 0;
@@ -3,11 +3,20 @@
3
3
  VALUE cNokogiriXmlSchema;
4
4
 
5
5
  static void
6
- dealloc(xmlSchemaPtr schema)
6
+ xml_schema_deallocate(void *data)
7
7
  {
8
+ xmlSchemaPtr schema = data;
8
9
  xmlSchemaFree(schema);
9
10
  }
10
11
 
12
+ static const rb_data_type_t xml_schema_type = {
13
+ .wrap_struct_name = "Nokogiri::XML::Schema",
14
+ .function = {
15
+ .dfree = xml_schema_deallocate,
16
+ },
17
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
18
+ };
19
+
11
20
  /*
12
21
  * call-seq:
13
22
  * validate_document(document)
@@ -22,8 +31,8 @@ validate_document(VALUE self, VALUE document)
22
31
  xmlSchemaValidCtxtPtr valid_ctxt;
23
32
  VALUE errors;
24
33
 
25
- Data_Get_Struct(self, xmlSchema, schema);
26
- Noko_Node_Get_Struct(document, xmlDoc, doc);
34
+ TypedData_Get_Struct(self, xmlSchema, &xml_schema_type, schema);
35
+ doc = noko_xml_document_unwrap(document);
27
36
 
28
37
  errors = rb_ary_new();
29
38
 
@@ -63,7 +72,7 @@ validate_file(VALUE self, VALUE rb_filename)
63
72
  const char *filename ;
64
73
  VALUE errors;
65
74
 
66
- Data_Get_Struct(self, xmlSchema, schema);
75
+ TypedData_Get_Struct(self, xmlSchema, &xml_schema_type, schema);
67
76
  filename = (const char *)StringValueCStr(rb_filename) ;
68
77
 
69
78
  errors = rb_ary_new();
@@ -90,59 +99,53 @@ validate_file(VALUE self, VALUE rb_filename)
90
99
  return errors;
91
100
  }
92
101
 
93
- /*
94
- * call-seq:
95
- * read_memory(string)
96
- *
97
- * Create a new Schema from the contents of +string+
98
- */
99
102
  static VALUE
100
- read_memory(int argc, VALUE *argv, VALUE klass)
103
+ xml_schema_parse_schema(
104
+ VALUE klass,
105
+ xmlSchemaParserCtxtPtr c_parser_context,
106
+ VALUE rb_parse_options
107
+ )
101
108
  {
102
- VALUE content;
103
- VALUE parse_options;
109
+ VALUE rb_errors;
104
110
  int parse_options_int;
105
- xmlSchemaParserCtxtPtr ctx;
106
- xmlSchemaPtr schema;
107
- VALUE errors;
108
- VALUE rb_schema;
109
- int scanned_args = 0;
111
+ xmlSchemaPtr c_schema;
110
112
  xmlExternalEntityLoader old_loader = 0;
113
+ VALUE rb_schema;
111
114
 
112
- scanned_args = rb_scan_args(argc, argv, "11", &content, &parse_options);
113
- if (scanned_args == 1) {
114
- parse_options = rb_const_get_at(rb_const_get_at(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
115
+ if (NIL_P(rb_parse_options)) {
116
+ rb_parse_options = rb_const_get_at(
117
+ rb_const_get_at(mNokogiriXml, rb_intern("ParseOptions")),
118
+ rb_intern("DEFAULT_SCHEMA")
119
+ );
115
120
  }
116
- parse_options_int = (int)NUM2INT(rb_funcall(parse_options, rb_intern("to_i"), 0));
117
121
 
118
- ctx = xmlSchemaNewMemParserCtxt((const char *)StringValuePtr(content), (int)RSTRING_LEN(content));
119
-
120
- errors = rb_ary_new();
121
- xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
122
+ rb_errors = rb_ary_new();
123
+ xmlSetStructuredErrorFunc((void *)rb_errors, Nokogiri_error_array_pusher);
122
124
 
123
125
  #ifdef HAVE_XMLSCHEMASETPARSERSTRUCTUREDERRORS
124
126
  xmlSchemaSetParserStructuredErrors(
125
- ctx,
127
+ c_parser_context,
126
128
  Nokogiri_error_array_pusher,
127
- (void *)errors
129
+ (void *)rb_errors
128
130
  );
129
131
  #endif
130
132
 
133
+ parse_options_int = (int)NUM2INT(rb_funcall(rb_parse_options, rb_intern("to_i"), 0));
131
134
  if (parse_options_int & XML_PARSE_NONET) {
132
135
  old_loader = xmlGetExternalEntityLoader();
133
136
  xmlSetExternalEntityLoader(xmlNoNetExternalEntityLoader);
134
137
  }
135
138
 
136
- schema = xmlSchemaParse(ctx);
139
+ c_schema = xmlSchemaParse(c_parser_context);
137
140
 
138
141
  if (old_loader) {
139
142
  xmlSetExternalEntityLoader(old_loader);
140
143
  }
141
144
 
142
145
  xmlSetStructuredErrorFunc(NULL, NULL);
143
- xmlSchemaFreeParserCtxt(ctx);
146
+ xmlSchemaFreeParserCtxt(c_parser_context);
144
147
 
145
- if (NULL == schema) {
148
+ if (NULL == c_schema) {
146
149
  xmlErrorPtr error = xmlGetLastError();
147
150
  if (error) {
148
151
  Nokogiri_error_raise(NULL, error);
@@ -153,118 +156,89 @@ read_memory(int argc, VALUE *argv, VALUE klass)
153
156
  return Qnil;
154
157
  }
155
158
 
156
- rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
157
- rb_iv_set(rb_schema, "@errors", errors);
158
- rb_iv_set(rb_schema, "@parse_options", parse_options);
159
+ rb_schema = TypedData_Wrap_Struct(klass, &xml_schema_type, c_schema);
160
+ rb_iv_set(rb_schema, "@errors", rb_errors);
161
+ rb_iv_set(rb_schema, "@parse_options", rb_parse_options);
159
162
 
160
163
  return rb_schema;
161
164
  }
162
165
 
163
- /* Schema creation will remove and deallocate "blank" nodes.
164
- * If those blank nodes have been exposed to Ruby, they could get freed
165
- * out from under the VALUE pointer. This function checks to see if any of
166
- * those nodes have been exposed to Ruby, and if so we should raise an exception.
166
+ /*
167
+ * call-seq:
168
+ * read_memory(string) Nokogiri::XML::Schema
169
+ *
170
+ * Create a new schema parsed from the contents of +string+
171
+ *
172
+ * [Parameters]
173
+ * - +string+: String containing XML to be parsed as a schema
174
+ *
175
+ * [Returns] Nokogiri::XML::Schema
167
176
  */
168
- static int
169
- has_blank_nodes_p(VALUE cache)
177
+ static VALUE
178
+ read_memory(int argc, VALUE *argv, VALUE klass)
170
179
  {
171
- long i;
180
+ VALUE rb_content;
181
+ VALUE rb_parse_options;
182
+ xmlSchemaParserCtxtPtr c_parser_context;
172
183
 
173
- if (NIL_P(cache)) {
174
- return 0;
175
- }
184
+ rb_scan_args(argc, argv, "11", &rb_content, &rb_parse_options);
176
185
 
177
- for (i = 0; i < RARRAY_LEN(cache); i++) {
178
- xmlNodePtr node;
179
- VALUE element = rb_ary_entry(cache, i);
180
- Noko_Node_Get_Struct(element, xmlNode, node);
181
- if (xmlIsBlankNode(node)) {
182
- return 1;
183
- }
184
- }
186
+ c_parser_context = xmlSchemaNewMemParserCtxt(
187
+ (const char *)StringValuePtr(rb_content),
188
+ (int)RSTRING_LEN(rb_content)
189
+ );
185
190
 
186
- return 0;
191
+ return xml_schema_parse_schema(klass, c_parser_context, rb_parse_options);
187
192
  }
188
193
 
189
194
  /*
190
195
  * call-seq:
191
- * from_document(doc)
196
+ * from_document(document) → Nokogiri::XML::Schema
197
+ *
198
+ * Create a new schema parsed from the +document+.
192
199
  *
193
- * Create a new Schema from the Nokogiri::XML::Document +doc+
200
+ * [Parameters]
201
+ * - +document+: Nokogiri::XML::Document to be parsed
202
+ *
203
+ * [Returns] Nokogiri::XML::Schema
194
204
  */
195
205
  static VALUE
196
206
  from_document(int argc, VALUE *argv, VALUE klass)
197
207
  {
198
- VALUE document;
199
- VALUE parse_options;
200
- int parse_options_int;
201
- xmlDocPtr doc;
202
- xmlSchemaParserCtxtPtr ctx;
203
- xmlSchemaPtr schema;
204
- VALUE errors;
208
+ VALUE rb_document;
209
+ VALUE rb_parse_options;
205
210
  VALUE rb_schema;
206
- int scanned_args = 0;
207
- xmlExternalEntityLoader old_loader = 0;
208
-
209
- scanned_args = rb_scan_args(argc, argv, "11", &document, &parse_options);
210
-
211
- Noko_Node_Get_Struct(document, xmlDoc, doc);
212
- doc = doc->doc; /* In case someone passes us a node. ugh. */
213
-
214
- if (scanned_args == 1) {
215
- parse_options = rb_const_get_at(rb_const_get_at(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
216
- }
217
- parse_options_int = (int)NUM2INT(rb_funcall(parse_options, rb_intern("to_i"), 0));
218
-
219
- if (has_blank_nodes_p(DOC_NODE_CACHE(doc))) {
220
- rb_raise(rb_eArgError, "Creating a schema from a document that has blank nodes exposed to Ruby is dangerous");
221
- }
222
-
223
- ctx = xmlSchemaNewDocParserCtxt(doc);
224
-
225
- errors = rb_ary_new();
226
- xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
227
-
228
- #ifdef HAVE_XMLSCHEMASETPARSERSTRUCTUREDERRORS
229
- xmlSchemaSetParserStructuredErrors(
230
- ctx,
231
- Nokogiri_error_array_pusher,
232
- (void *)errors
233
- );
234
- #endif
235
-
236
- if (parse_options_int & XML_PARSE_NONET) {
237
- old_loader = xmlGetExternalEntityLoader();
238
- xmlSetExternalEntityLoader(xmlNoNetExternalEntityLoader);
211
+ xmlDocPtr c_document;
212
+ xmlSchemaParserCtxtPtr c_parser_context;
213
+ int defensive_copy_p = 0;
214
+
215
+ rb_scan_args(argc, argv, "11", &rb_document, &rb_parse_options);
216
+
217
+ if (rb_obj_is_kind_of(rb_document, cNokogiriXmlDocument)) {
218
+ c_document = noko_xml_document_unwrap(rb_document);
219
+ } else {
220
+ xmlNodePtr deprecated_node_type_arg;
221
+ // TODO: deprecate allowing Node
222
+ NOKO_WARN_DEPRECATION("Passing a Node as the first parameter to Schema.from_document is deprecated. Please pass a Document instead. This will become an error in a future release of Nokogiri.");
223
+ Noko_Node_Get_Struct(rb_document, xmlNode, deprecated_node_type_arg);
224
+ c_document = deprecated_node_type_arg->doc;
239
225
  }
240
226
 
241
- schema = xmlSchemaParse(ctx);
242
-
243
- if (old_loader) {
244
- xmlSetExternalEntityLoader(old_loader);
227
+ if (noko_xml_document_has_wrapped_blank_nodes_p(c_document)) {
228
+ // see https://github.com/sparklemotion/nokogiri/pull/2001
229
+ c_document = xmlCopyDoc(c_document, 1);
230
+ defensive_copy_p = 1;
245
231
  }
246
232
 
247
- xmlSetStructuredErrorFunc(NULL, NULL);
248
- xmlSchemaFreeParserCtxt(ctx);
249
-
250
- if (NULL == schema) {
251
- xmlErrorPtr error = xmlGetLastError();
252
- if (error) {
253
- Nokogiri_error_raise(NULL, error);
254
- } else {
255
- rb_raise(rb_eRuntimeError, "Could not parse document");
256
- }
233
+ c_parser_context = xmlSchemaNewDocParserCtxt(c_document);
234
+ rb_schema = xml_schema_parse_schema(klass, c_parser_context, rb_parse_options);
257
235
 
258
- return Qnil;
236
+ if (defensive_copy_p) {
237
+ xmlFreeDoc(c_document);
238
+ c_document = NULL;
259
239
  }
260
240
 
261
- rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
262
- rb_iv_set(rb_schema, "@errors", errors);
263
- rb_iv_set(rb_schema, "@parse_options", parse_options);
264
-
265
241
  return rb_schema;
266
-
267
- return Qnil;
268
242
  }
269
243
 
270
244
  void
@@ -20,10 +20,18 @@ new (int argc, VALUE *argv, VALUE klass)
20
20
 
21
21
  rb_scan_args(argc, argv, "2*", &string, &document, &rest);
22
22
 
23
- Noko_Node_Get_Struct(document, xmlDoc, doc);
23
+ if (rb_obj_is_kind_of(document, cNokogiriXmlDocument)) {
24
+ doc = noko_xml_document_unwrap(document);
25
+ } else {
26
+ xmlNodePtr deprecated_node_type_arg;
27
+ // TODO: deprecate allowing Node
28
+ NOKO_WARN_DEPRECATION("Passing a Node as the second parameter to Text.new is deprecated. Please pass a Document instead. This will become an error in a future release of Nokogiri.");
29
+ Noko_Node_Get_Struct(document, xmlNode, deprecated_node_type_arg);
30
+ doc = deprecated_node_type_arg->doc;
31
+ }
24
32
 
25
33
  node = xmlNewText((xmlChar *)StringValueCStr(string));
26
- node->doc = doc->doc;
34
+ node->doc = doc;
27
35
 
28
36
  noko_xml_document_pin_node(node);
29
37