nokogiri 1.14.5 → 1.15.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +9 -8
- data/dependencies.yml +6 -6
- data/ext/nokogiri/extconf.rb +66 -22
- data/ext/nokogiri/html4_document.c +1 -2
- data/ext/nokogiri/html4_element_description.c +19 -14
- data/ext/nokogiri/html4_sax_parser_context.c +10 -16
- data/ext/nokogiri/html4_sax_push_parser.c +2 -2
- data/ext/nokogiri/nokogiri.c +46 -24
- data/ext/nokogiri/nokogiri.h +13 -2
- data/ext/nokogiri/xml_attr.c +1 -1
- data/ext/nokogiri/xml_cdata.c +10 -2
- data/ext/nokogiri/xml_comment.c +1 -1
- data/ext/nokogiri/xml_document.c +102 -22
- data/ext/nokogiri/xml_document_fragment.c +1 -1
- data/ext/nokogiri/xml_dtd.c +1 -1
- data/ext/nokogiri/xml_element_content.c +32 -29
- data/ext/nokogiri/xml_element_decl.c +5 -5
- data/ext/nokogiri/xml_encoding_handler.c +12 -4
- data/ext/nokogiri/xml_entity_reference.c +1 -1
- data/ext/nokogiri/xml_namespace.c +11 -12
- data/ext/nokogiri/xml_node.c +7 -7
- data/ext/nokogiri/xml_node_set.c +125 -105
- data/ext/nokogiri/xml_processing_instruction.c +1 -1
- data/ext/nokogiri/xml_reader.c +37 -28
- data/ext/nokogiri/xml_relax_ng.c +65 -78
- data/ext/nokogiri/xml_sax_parser.c +24 -5
- data/ext/nokogiri/xml_sax_parser_context.c +46 -25
- data/ext/nokogiri/xml_sax_push_parser.c +29 -8
- data/ext/nokogiri/xml_schema.c +90 -116
- data/ext/nokogiri/xml_text.c +10 -2
- data/ext/nokogiri/xml_xpath_context.c +156 -83
- data/ext/nokogiri/xslt_stylesheet.c +103 -50
- data/gumbo-parser/src/error.c +8 -4
- data/gumbo-parser/src/foreign_attrs.c +13 -14
- data/gumbo-parser/src/foreign_attrs.gperf +1 -1
- data/gumbo-parser/src/parser.c +13 -0
- data/lib/nokogiri/css/xpath_visitor.rb +2 -2
- data/lib/nokogiri/extension.rb +1 -1
- data/lib/nokogiri/html4/document_fragment.rb +1 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +1821 -353
- data/lib/nokogiri/html5/document_fragment.rb +1 -1
- data/lib/nokogiri/html5/node.rb +5 -0
- data/lib/nokogiri/html5.rb +5 -2
- data/lib/nokogiri/jruby/nokogiri_jars.rb +3 -3
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/xml/attribute_decl.rb +4 -2
- data/lib/nokogiri/xml/document_fragment.rb +1 -1
- data/lib/nokogiri/xml/element_content.rb +10 -2
- data/lib/nokogiri/xml/element_decl.rb +4 -2
- data/lib/nokogiri/xml/entity_decl.rb +4 -2
- data/lib/nokogiri/xml/node/save_options.rb +8 -0
- data/lib/nokogiri/xml/node.rb +22 -13
- data/lib/nokogiri/xml/pp/node.rb +23 -12
- data/lib/nokogiri/xml/sax/document.rb +1 -1
- data/lib/nokogiri/xml/searchable.rb +18 -10
- data/lib/nokogiri/xslt.rb +73 -3
- data/lib/nokogiri.rb +12 -4
- data/lib/xsd/xmlparser/nokogiri.rb +1 -1
- data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
- data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
- data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
- data/ports/archives/libxml2-2.11.3.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.38.tar.xz +0 -0
- metadata +9 -7
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +0 -3037
- data/ports/archives/libxml2-2.10.4.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.37.tar.xz +0 -0
@@ -3,18 +3,35 @@
|
|
3
3
|
VALUE cNokogiriXmlSaxPushParser ;
|
4
4
|
|
5
5
|
static void
|
6
|
-
|
6
|
+
xml_sax_push_parser_free(void *data)
|
7
7
|
{
|
8
|
+
xmlParserCtxtPtr ctx = data;
|
8
9
|
if (ctx != NULL) {
|
9
10
|
NOKOGIRI_SAX_TUPLE_DESTROY(ctx->userData);
|
10
11
|
xmlFreeParserCtxt(ctx);
|
11
12
|
}
|
12
13
|
}
|
13
14
|
|
15
|
+
static const rb_data_type_t xml_sax_push_parser_type = {
|
16
|
+
.wrap_struct_name = "Nokogiri::XML::SAX::PushParser",
|
17
|
+
.function = {
|
18
|
+
.dfree = xml_sax_push_parser_free,
|
19
|
+
},
|
20
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
21
|
+
};
|
22
|
+
|
14
23
|
static VALUE
|
15
24
|
allocate(VALUE klass)
|
16
25
|
{
|
17
|
-
return
|
26
|
+
return TypedData_Wrap_Struct(klass, &xml_sax_push_parser_type, NULL);
|
27
|
+
}
|
28
|
+
|
29
|
+
xmlParserCtxtPtr
|
30
|
+
noko_xml_sax_push_parser_unwrap(VALUE rb_parser)
|
31
|
+
{
|
32
|
+
xmlParserCtxtPtr c_parser;
|
33
|
+
TypedData_Get_Struct(rb_parser, xmlParserCtxt, &xml_sax_push_parser_type, c_parser);
|
34
|
+
return c_parser;
|
18
35
|
}
|
19
36
|
|
20
37
|
/*
|
@@ -31,7 +48,7 @@ native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
|
|
31
48
|
int size = 0;
|
32
49
|
|
33
50
|
|
34
|
-
|
51
|
+
ctx = noko_xml_sax_push_parser_unwrap(self);
|
35
52
|
|
36
53
|
if (Qnil != _chunk) {
|
37
54
|
chunk = StringValuePtr(_chunk);
|
@@ -63,7 +80,7 @@ initialize_native(VALUE self, VALUE _xml_sax, VALUE _filename)
|
|
63
80
|
const char *filename = NULL;
|
64
81
|
xmlParserCtxtPtr ctx;
|
65
82
|
|
66
|
-
|
83
|
+
sax = noko_sax_handler_unwrap(_xml_sax);
|
67
84
|
|
68
85
|
if (_filename != Qnil) { filename = StringValueCStr(_filename); }
|
69
86
|
|
@@ -89,7 +106,8 @@ static VALUE
|
|
89
106
|
get_options(VALUE self)
|
90
107
|
{
|
91
108
|
xmlParserCtxtPtr ctx;
|
92
|
-
|
109
|
+
|
110
|
+
ctx = noko_xml_sax_push_parser_unwrap(self);
|
93
111
|
|
94
112
|
return INT2NUM(ctx->options);
|
95
113
|
}
|
@@ -98,7 +116,8 @@ static VALUE
|
|
98
116
|
set_options(VALUE self, VALUE options)
|
99
117
|
{
|
100
118
|
xmlParserCtxtPtr ctx;
|
101
|
-
|
119
|
+
|
120
|
+
ctx = noko_xml_sax_push_parser_unwrap(self);
|
102
121
|
|
103
122
|
if (xmlCtxtUseOptions(ctx, (int)NUM2INT(options)) != 0) {
|
104
123
|
rb_raise(rb_eRuntimeError, "Cannot set XML parser context options");
|
@@ -118,7 +137,8 @@ static VALUE
|
|
118
137
|
get_replace_entities(VALUE self)
|
119
138
|
{
|
120
139
|
xmlParserCtxtPtr ctx;
|
121
|
-
|
140
|
+
|
141
|
+
ctx = noko_xml_sax_push_parser_unwrap(self);
|
122
142
|
|
123
143
|
if (0 == ctx->replaceEntities) {
|
124
144
|
return Qfalse;
|
@@ -138,7 +158,8 @@ static VALUE
|
|
138
158
|
set_replace_entities(VALUE self, VALUE value)
|
139
159
|
{
|
140
160
|
xmlParserCtxtPtr ctx;
|
141
|
-
|
161
|
+
|
162
|
+
ctx = noko_xml_sax_push_parser_unwrap(self);
|
142
163
|
|
143
164
|
if (Qfalse == value) {
|
144
165
|
ctx->replaceEntities = 0;
|
data/ext/nokogiri/xml_schema.c
CHANGED
@@ -3,11 +3,20 @@
|
|
3
3
|
VALUE cNokogiriXmlSchema;
|
4
4
|
|
5
5
|
static void
|
6
|
-
|
6
|
+
xml_schema_deallocate(void *data)
|
7
7
|
{
|
8
|
+
xmlSchemaPtr schema = data;
|
8
9
|
xmlSchemaFree(schema);
|
9
10
|
}
|
10
11
|
|
12
|
+
static const rb_data_type_t xml_schema_type = {
|
13
|
+
.wrap_struct_name = "Nokogiri::XML::Schema",
|
14
|
+
.function = {
|
15
|
+
.dfree = xml_schema_deallocate,
|
16
|
+
},
|
17
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
18
|
+
};
|
19
|
+
|
11
20
|
/*
|
12
21
|
* call-seq:
|
13
22
|
* validate_document(document)
|
@@ -22,8 +31,8 @@ validate_document(VALUE self, VALUE document)
|
|
22
31
|
xmlSchemaValidCtxtPtr valid_ctxt;
|
23
32
|
VALUE errors;
|
24
33
|
|
25
|
-
|
26
|
-
|
34
|
+
TypedData_Get_Struct(self, xmlSchema, &xml_schema_type, schema);
|
35
|
+
doc = noko_xml_document_unwrap(document);
|
27
36
|
|
28
37
|
errors = rb_ary_new();
|
29
38
|
|
@@ -63,7 +72,7 @@ validate_file(VALUE self, VALUE rb_filename)
|
|
63
72
|
const char *filename ;
|
64
73
|
VALUE errors;
|
65
74
|
|
66
|
-
|
75
|
+
TypedData_Get_Struct(self, xmlSchema, &xml_schema_type, schema);
|
67
76
|
filename = (const char *)StringValueCStr(rb_filename) ;
|
68
77
|
|
69
78
|
errors = rb_ary_new();
|
@@ -90,59 +99,53 @@ validate_file(VALUE self, VALUE rb_filename)
|
|
90
99
|
return errors;
|
91
100
|
}
|
92
101
|
|
93
|
-
/*
|
94
|
-
* call-seq:
|
95
|
-
* read_memory(string)
|
96
|
-
*
|
97
|
-
* Create a new Schema from the contents of +string+
|
98
|
-
*/
|
99
102
|
static VALUE
|
100
|
-
|
103
|
+
xml_schema_parse_schema(
|
104
|
+
VALUE klass,
|
105
|
+
xmlSchemaParserCtxtPtr c_parser_context,
|
106
|
+
VALUE rb_parse_options
|
107
|
+
)
|
101
108
|
{
|
102
|
-
VALUE
|
103
|
-
VALUE parse_options;
|
109
|
+
VALUE rb_errors;
|
104
110
|
int parse_options_int;
|
105
|
-
|
106
|
-
xmlSchemaPtr schema;
|
107
|
-
VALUE errors;
|
108
|
-
VALUE rb_schema;
|
109
|
-
int scanned_args = 0;
|
111
|
+
xmlSchemaPtr c_schema;
|
110
112
|
xmlExternalEntityLoader old_loader = 0;
|
113
|
+
VALUE rb_schema;
|
111
114
|
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
+
if (NIL_P(rb_parse_options)) {
|
116
|
+
rb_parse_options = rb_const_get_at(
|
117
|
+
rb_const_get_at(mNokogiriXml, rb_intern("ParseOptions")),
|
118
|
+
rb_intern("DEFAULT_SCHEMA")
|
119
|
+
);
|
115
120
|
}
|
116
|
-
parse_options_int = (int)NUM2INT(rb_funcall(parse_options, rb_intern("to_i"), 0));
|
117
121
|
|
118
|
-
|
119
|
-
|
120
|
-
errors = rb_ary_new();
|
121
|
-
xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
|
122
|
+
rb_errors = rb_ary_new();
|
123
|
+
xmlSetStructuredErrorFunc((void *)rb_errors, Nokogiri_error_array_pusher);
|
122
124
|
|
123
125
|
#ifdef HAVE_XMLSCHEMASETPARSERSTRUCTUREDERRORS
|
124
126
|
xmlSchemaSetParserStructuredErrors(
|
125
|
-
|
127
|
+
c_parser_context,
|
126
128
|
Nokogiri_error_array_pusher,
|
127
|
-
(void *)
|
129
|
+
(void *)rb_errors
|
128
130
|
);
|
129
131
|
#endif
|
130
132
|
|
133
|
+
parse_options_int = (int)NUM2INT(rb_funcall(rb_parse_options, rb_intern("to_i"), 0));
|
131
134
|
if (parse_options_int & XML_PARSE_NONET) {
|
132
135
|
old_loader = xmlGetExternalEntityLoader();
|
133
136
|
xmlSetExternalEntityLoader(xmlNoNetExternalEntityLoader);
|
134
137
|
}
|
135
138
|
|
136
|
-
|
139
|
+
c_schema = xmlSchemaParse(c_parser_context);
|
137
140
|
|
138
141
|
if (old_loader) {
|
139
142
|
xmlSetExternalEntityLoader(old_loader);
|
140
143
|
}
|
141
144
|
|
142
145
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
143
|
-
xmlSchemaFreeParserCtxt(
|
146
|
+
xmlSchemaFreeParserCtxt(c_parser_context);
|
144
147
|
|
145
|
-
if (NULL ==
|
148
|
+
if (NULL == c_schema) {
|
146
149
|
xmlErrorPtr error = xmlGetLastError();
|
147
150
|
if (error) {
|
148
151
|
Nokogiri_error_raise(NULL, error);
|
@@ -153,118 +156,89 @@ read_memory(int argc, VALUE *argv, VALUE klass)
|
|
153
156
|
return Qnil;
|
154
157
|
}
|
155
158
|
|
156
|
-
rb_schema =
|
157
|
-
rb_iv_set(rb_schema, "@errors",
|
158
|
-
rb_iv_set(rb_schema, "@parse_options",
|
159
|
+
rb_schema = TypedData_Wrap_Struct(klass, &xml_schema_type, c_schema);
|
160
|
+
rb_iv_set(rb_schema, "@errors", rb_errors);
|
161
|
+
rb_iv_set(rb_schema, "@parse_options", rb_parse_options);
|
159
162
|
|
160
163
|
return rb_schema;
|
161
164
|
}
|
162
165
|
|
163
|
-
/*
|
164
|
-
*
|
165
|
-
*
|
166
|
-
*
|
166
|
+
/*
|
167
|
+
* call-seq:
|
168
|
+
* read_memory(string) → Nokogiri::XML::Schema
|
169
|
+
*
|
170
|
+
* Create a new schema parsed from the contents of +string+
|
171
|
+
*
|
172
|
+
* [Parameters]
|
173
|
+
* - +string+: String containing XML to be parsed as a schema
|
174
|
+
*
|
175
|
+
* [Returns] Nokogiri::XML::Schema
|
167
176
|
*/
|
168
|
-
static
|
169
|
-
|
177
|
+
static VALUE
|
178
|
+
read_memory(int argc, VALUE *argv, VALUE klass)
|
170
179
|
{
|
171
|
-
|
180
|
+
VALUE rb_content;
|
181
|
+
VALUE rb_parse_options;
|
182
|
+
xmlSchemaParserCtxtPtr c_parser_context;
|
172
183
|
|
173
|
-
|
174
|
-
return 0;
|
175
|
-
}
|
184
|
+
rb_scan_args(argc, argv, "11", &rb_content, &rb_parse_options);
|
176
185
|
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
if (xmlIsBlankNode(node)) {
|
182
|
-
return 1;
|
183
|
-
}
|
184
|
-
}
|
186
|
+
c_parser_context = xmlSchemaNewMemParserCtxt(
|
187
|
+
(const char *)StringValuePtr(rb_content),
|
188
|
+
(int)RSTRING_LEN(rb_content)
|
189
|
+
);
|
185
190
|
|
186
|
-
return
|
191
|
+
return xml_schema_parse_schema(klass, c_parser_context, rb_parse_options);
|
187
192
|
}
|
188
193
|
|
189
194
|
/*
|
190
195
|
* call-seq:
|
191
|
-
*
|
196
|
+
* from_document(document) → Nokogiri::XML::Schema
|
197
|
+
*
|
198
|
+
* Create a new schema parsed from the +document+.
|
192
199
|
*
|
193
|
-
*
|
200
|
+
* [Parameters]
|
201
|
+
* - +document+: Nokogiri::XML::Document to be parsed
|
202
|
+
*
|
203
|
+
* [Returns] Nokogiri::XML::Schema
|
194
204
|
*/
|
195
205
|
static VALUE
|
196
206
|
from_document(int argc, VALUE *argv, VALUE klass)
|
197
207
|
{
|
198
|
-
VALUE
|
199
|
-
VALUE
|
200
|
-
int parse_options_int;
|
201
|
-
xmlDocPtr doc;
|
202
|
-
xmlSchemaParserCtxtPtr ctx;
|
203
|
-
xmlSchemaPtr schema;
|
204
|
-
VALUE errors;
|
208
|
+
VALUE rb_document;
|
209
|
+
VALUE rb_parse_options;
|
205
210
|
VALUE rb_schema;
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
rb_raise(rb_eArgError, "Creating a schema from a document that has blank nodes exposed to Ruby is dangerous");
|
221
|
-
}
|
222
|
-
|
223
|
-
ctx = xmlSchemaNewDocParserCtxt(doc);
|
224
|
-
|
225
|
-
errors = rb_ary_new();
|
226
|
-
xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
|
227
|
-
|
228
|
-
#ifdef HAVE_XMLSCHEMASETPARSERSTRUCTUREDERRORS
|
229
|
-
xmlSchemaSetParserStructuredErrors(
|
230
|
-
ctx,
|
231
|
-
Nokogiri_error_array_pusher,
|
232
|
-
(void *)errors
|
233
|
-
);
|
234
|
-
#endif
|
235
|
-
|
236
|
-
if (parse_options_int & XML_PARSE_NONET) {
|
237
|
-
old_loader = xmlGetExternalEntityLoader();
|
238
|
-
xmlSetExternalEntityLoader(xmlNoNetExternalEntityLoader);
|
211
|
+
xmlDocPtr c_document;
|
212
|
+
xmlSchemaParserCtxtPtr c_parser_context;
|
213
|
+
int defensive_copy_p = 0;
|
214
|
+
|
215
|
+
rb_scan_args(argc, argv, "11", &rb_document, &rb_parse_options);
|
216
|
+
|
217
|
+
if (rb_obj_is_kind_of(rb_document, cNokogiriXmlDocument)) {
|
218
|
+
c_document = noko_xml_document_unwrap(rb_document);
|
219
|
+
} else {
|
220
|
+
xmlNodePtr deprecated_node_type_arg;
|
221
|
+
// TODO: deprecate allowing Node
|
222
|
+
NOKO_WARN_DEPRECATION("Passing a Node as the first parameter to Schema.from_document is deprecated. Please pass a Document instead. This will become an error in a future release of Nokogiri.");
|
223
|
+
Noko_Node_Get_Struct(rb_document, xmlNode, deprecated_node_type_arg);
|
224
|
+
c_document = deprecated_node_type_arg->doc;
|
239
225
|
}
|
240
226
|
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
227
|
+
if (noko_xml_document_has_wrapped_blank_nodes_p(c_document)) {
|
228
|
+
// see https://github.com/sparklemotion/nokogiri/pull/2001
|
229
|
+
c_document = xmlCopyDoc(c_document, 1);
|
230
|
+
defensive_copy_p = 1;
|
245
231
|
}
|
246
232
|
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
if (NULL == schema) {
|
251
|
-
xmlErrorPtr error = xmlGetLastError();
|
252
|
-
if (error) {
|
253
|
-
Nokogiri_error_raise(NULL, error);
|
254
|
-
} else {
|
255
|
-
rb_raise(rb_eRuntimeError, "Could not parse document");
|
256
|
-
}
|
233
|
+
c_parser_context = xmlSchemaNewDocParserCtxt(c_document);
|
234
|
+
rb_schema = xml_schema_parse_schema(klass, c_parser_context, rb_parse_options);
|
257
235
|
|
258
|
-
|
236
|
+
if (defensive_copy_p) {
|
237
|
+
xmlFreeDoc(c_document);
|
238
|
+
c_document = NULL;
|
259
239
|
}
|
260
240
|
|
261
|
-
rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
|
262
|
-
rb_iv_set(rb_schema, "@errors", errors);
|
263
|
-
rb_iv_set(rb_schema, "@parse_options", parse_options);
|
264
|
-
|
265
241
|
return rb_schema;
|
266
|
-
|
267
|
-
return Qnil;
|
268
242
|
}
|
269
243
|
|
270
244
|
void
|
data/ext/nokogiri/xml_text.c
CHANGED
@@ -20,10 +20,18 @@ new (int argc, VALUE *argv, VALUE klass)
|
|
20
20
|
|
21
21
|
rb_scan_args(argc, argv, "2*", &string, &document, &rest);
|
22
22
|
|
23
|
-
|
23
|
+
if (rb_obj_is_kind_of(document, cNokogiriXmlDocument)) {
|
24
|
+
doc = noko_xml_document_unwrap(document);
|
25
|
+
} else {
|
26
|
+
xmlNodePtr deprecated_node_type_arg;
|
27
|
+
// TODO: deprecate allowing Node
|
28
|
+
NOKO_WARN_DEPRECATION("Passing a Node as the second parameter to Text.new is deprecated. Please pass a Document instead. This will become an error in a future release of Nokogiri.");
|
29
|
+
Noko_Node_Get_Struct(document, xmlNode, deprecated_node_type_arg);
|
30
|
+
doc = deprecated_node_type_arg->doc;
|
31
|
+
}
|
24
32
|
|
25
33
|
node = xmlNewText((xmlChar *)StringValueCStr(string));
|
26
|
-
node->doc = doc
|
34
|
+
node->doc = doc;
|
27
35
|
|
28
36
|
noko_xml_document_pin_node(node);
|
29
37
|
|