nokogiri 1.15.3 → 1.18.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +12 -17
- data/LICENSE-DEPENDENCIES.md +6 -6
- data/README.md +11 -5
- data/dependencies.yml +9 -8
- data/ext/nokogiri/extconf.rb +191 -154
- data/ext/nokogiri/gumbo.c +69 -53
- data/ext/nokogiri/html4_document.c +10 -4
- data/ext/nokogiri/html4_element_description.c +18 -18
- data/ext/nokogiri/html4_sax_parser.c +40 -0
- data/ext/nokogiri/html4_sax_parser_context.c +48 -58
- data/ext/nokogiri/html4_sax_push_parser.c +26 -25
- data/ext/nokogiri/libxml2_polyfill.c +114 -0
- data/ext/nokogiri/nokogiri.c +9 -2
- data/ext/nokogiri/nokogiri.h +25 -33
- data/ext/nokogiri/test_global_handlers.c +1 -1
- data/ext/nokogiri/xml_attr.c +1 -1
- data/ext/nokogiri/xml_cdata.c +3 -12
- data/ext/nokogiri/xml_comment.c +3 -8
- data/ext/nokogiri/xml_document.c +173 -158
- data/ext/nokogiri/xml_document_fragment.c +10 -25
- data/ext/nokogiri/xml_dtd.c +1 -1
- data/ext/nokogiri/xml_element_content.c +9 -9
- data/ext/nokogiri/xml_encoding_handler.c +4 -4
- data/ext/nokogiri/xml_namespace.c +6 -10
- data/ext/nokogiri/xml_node.c +142 -108
- data/ext/nokogiri/xml_node_set.c +46 -44
- data/ext/nokogiri/xml_reader.c +74 -100
- data/ext/nokogiri/xml_relax_ng.c +35 -56
- data/ext/nokogiri/xml_sax_parser.c +156 -88
- data/ext/nokogiri/xml_sax_parser_context.c +220 -128
- data/ext/nokogiri/xml_sax_push_parser.c +69 -50
- data/ext/nokogiri/xml_schema.c +51 -87
- data/ext/nokogiri/xml_syntax_error.c +19 -11
- data/ext/nokogiri/xml_text.c +3 -6
- data/ext/nokogiri/xml_xpath_context.c +104 -104
- data/ext/nokogiri/xslt_stylesheet.c +16 -11
- data/gumbo-parser/Makefile +18 -0
- data/gumbo-parser/src/ascii.c +2 -2
- data/gumbo-parser/src/error.c +76 -48
- data/gumbo-parser/src/error.h +5 -1
- data/gumbo-parser/src/nokogiri_gumbo.h +11 -2
- data/gumbo-parser/src/parser.c +66 -25
- data/gumbo-parser/src/tokenizer.c +7 -6
- data/lib/nokogiri/class_resolver.rb +1 -1
- data/lib/nokogiri/css/node.rb +6 -2
- data/lib/nokogiri/css/parser.rb +6 -4
- data/lib/nokogiri/css/parser.y +2 -2
- data/lib/nokogiri/css/parser_extras.rb +6 -66
- data/lib/nokogiri/css/selector_cache.rb +38 -0
- data/lib/nokogiri/css/tokenizer.rb +4 -4
- data/lib/nokogiri/css/tokenizer.rex +9 -8
- data/lib/nokogiri/css/xpath_visitor.rb +44 -27
- data/lib/nokogiri/css.rb +86 -20
- data/lib/nokogiri/decorators/slop.rb +3 -5
- data/lib/nokogiri/encoding_handler.rb +2 -2
- data/lib/nokogiri/html4/document.rb +45 -24
- data/lib/nokogiri/html4/document_fragment.rb +124 -12
- data/lib/nokogiri/html4/encoding_reader.rb +2 -2
- data/lib/nokogiri/html4/sax/parser.rb +23 -38
- data/lib/nokogiri/html4/sax/parser_context.rb +4 -9
- data/lib/nokogiri/html4.rb +9 -14
- data/lib/nokogiri/html5/builder.rb +40 -0
- data/lib/nokogiri/html5/document.rb +61 -30
- data/lib/nokogiri/html5/document_fragment.rb +130 -20
- data/lib/nokogiri/html5/node.rb +4 -4
- data/lib/nokogiri/html5.rb +114 -138
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +6 -5
- data/lib/nokogiri/xml/attr.rb +2 -2
- data/lib/nokogiri/xml/builder.rb +8 -1
- data/lib/nokogiri/xml/document.rb +74 -31
- data/lib/nokogiri/xml/document_fragment.rb +86 -15
- data/lib/nokogiri/xml/namespace.rb +1 -2
- data/lib/nokogiri/xml/node.rb +113 -35
- data/lib/nokogiri/xml/node_set.rb +12 -10
- data/lib/nokogiri/xml/parse_options.rb +1 -1
- data/lib/nokogiri/xml/pp/node.rb +6 -1
- data/lib/nokogiri/xml/reader.rb +51 -17
- data/lib/nokogiri/xml/relax_ng.rb +57 -20
- data/lib/nokogiri/xml/sax/document.rb +174 -83
- data/lib/nokogiri/xml/sax/parser.rb +115 -41
- data/lib/nokogiri/xml/sax/parser_context.rb +116 -8
- data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
- data/lib/nokogiri/xml/sax.rb +48 -0
- data/lib/nokogiri/xml/schema.rb +112 -45
- data/lib/nokogiri/xml/searchable.rb +39 -43
- data/lib/nokogiri/xml/syntax_error.rb +23 -1
- data/lib/nokogiri/xml/xpath_context.rb +14 -3
- data/lib/nokogiri/xml.rb +14 -25
- data/lib/nokogiri/xslt/stylesheet.rb +29 -7
- data/lib/nokogiri/xslt.rb +4 -10
- data/lib/nokogiri.rb +1 -1
- data/lib/xsd/xmlparser/nokogiri.rb +3 -4
- data/patches/libxml2/0019-xpath-Use-separate-static-hash-table-for-standard-fu.patch +244 -0
- data/ports/archives/libxml2-2.13.7.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.43.tar.xz +0 -0
- metadata +13 -14
- data/ext/nokogiri/libxml2_backwards_compat.c +0 -121
- data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +0 -25
- data/ports/archives/libxml2-2.11.4.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.38.tar.xz +0 -0
@@ -12,23 +12,23 @@ static const xmlChar *NOKOGIRI_BUILTIN_PREFIX = (const xmlChar *)"nokogiri-built
|
|
12
12
|
static const xmlChar *NOKOGIRI_BUILTIN_URI = (const xmlChar *)"https://www.nokogiri.org/default_ns/ruby/builtins";
|
13
13
|
|
14
14
|
static void
|
15
|
-
|
15
|
+
_noko_xml_xpath_context_dfree(void *data)
|
16
16
|
{
|
17
17
|
xmlXPathContextPtr c_context = data;
|
18
18
|
xmlXPathFreeContext(c_context);
|
19
19
|
}
|
20
20
|
|
21
|
-
static const rb_data_type_t
|
22
|
-
.wrap_struct_name = "
|
21
|
+
static const rb_data_type_t _noko_xml_xpath_context_type = {
|
22
|
+
.wrap_struct_name = "xmlXPathContext",
|
23
23
|
.function = {
|
24
|
-
.dfree =
|
24
|
+
.dfree = _noko_xml_xpath_context_dfree,
|
25
25
|
},
|
26
26
|
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
27
27
|
};
|
28
28
|
|
29
29
|
/* find a CSS class in an HTML element's `class` attribute */
|
30
30
|
static const xmlChar *
|
31
|
-
|
31
|
+
_noko_xml_xpath_context__css_class(const xmlChar *str, const xmlChar *val)
|
32
32
|
{
|
33
33
|
int val_len;
|
34
34
|
|
@@ -62,9 +62,9 @@ builtin_css_class(const xmlChar *str, const xmlChar *val)
|
|
62
62
|
return (NULL);
|
63
63
|
}
|
64
64
|
|
65
|
-
/* xmlXPathFunction to wrap
|
65
|
+
/* xmlXPathFunction to wrap _noko_xml_xpath_context__css_class() */
|
66
66
|
static void
|
67
|
-
|
67
|
+
noko_xml_xpath_context_xpath_func_css_class(xmlXPathParserContextPtr ctxt, int nargs)
|
68
68
|
{
|
69
69
|
xmlXPathObjectPtr hay, needle;
|
70
70
|
|
@@ -85,7 +85,7 @@ xpath_builtin_css_class(xmlXPathParserContextPtr ctxt, int nargs)
|
|
85
85
|
XP_ERROR(XPATH_INVALID_TYPE);
|
86
86
|
}
|
87
87
|
|
88
|
-
if (
|
88
|
+
if (_noko_xml_xpath_context__css_class(hay->stringval, needle->stringval)) {
|
89
89
|
valuePush(ctxt, xmlXPathNewBoolean(1));
|
90
90
|
} else {
|
91
91
|
valuePush(ctxt, xmlXPathNewBoolean(0));
|
@@ -99,7 +99,7 @@ xpath_builtin_css_class(xmlXPathParserContextPtr ctxt, int nargs)
|
|
99
99
|
/* xmlXPathFunction to select nodes whose local name matches, for HTML5 CSS queries that should
|
100
100
|
* ignore namespaces */
|
101
101
|
static void
|
102
|
-
|
102
|
+
noko_xml_xpath_context_xpath_func_local_name_is(xmlXPathParserContextPtr ctxt, int nargs)
|
103
103
|
{
|
104
104
|
xmlXPathObjectPtr element_name;
|
105
105
|
|
@@ -124,25 +124,26 @@ xpath_builtin_local_name_is(xmlXPathParserContextPtr ctxt, int nargs)
|
|
124
124
|
* register_ns(prefix, uri) → Nokogiri::XML::XPathContext
|
125
125
|
*
|
126
126
|
* Register the namespace with +prefix+ and +uri+ for use in future queries.
|
127
|
+
* Passing a uri of +nil+ will unregister the namespace.
|
127
128
|
*
|
128
129
|
* [Returns] +self+
|
129
130
|
*/
|
130
131
|
static VALUE
|
131
|
-
|
132
|
+
noko_xml_xpath_context_register_ns(VALUE rb_context, VALUE prefix, VALUE uri)
|
132
133
|
{
|
133
134
|
xmlXPathContextPtr c_context;
|
135
|
+
const xmlChar *ns_uri;
|
134
136
|
|
135
|
-
TypedData_Get_Struct(
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
137
|
+
TypedData_Get_Struct(rb_context, xmlXPathContext, &_noko_xml_xpath_context_type, c_context);
|
138
|
+
|
139
|
+
if (NIL_P(uri)) {
|
140
|
+
ns_uri = NULL;
|
141
|
+
} else {
|
142
|
+
ns_uri = (const xmlChar *)StringValueCStr(uri);
|
143
|
+
}
|
144
|
+
|
145
|
+
xmlXPathRegisterNs(c_context, (const xmlChar *)StringValueCStr(prefix), ns_uri);
|
141
146
|
|
142
|
-
xmlXPathRegisterNs(c_context,
|
143
|
-
(const xmlChar *)StringValueCStr(prefix),
|
144
|
-
(const xmlChar *)StringValueCStr(uri)
|
145
|
-
);
|
146
147
|
return rb_context;
|
147
148
|
}
|
148
149
|
|
@@ -151,29 +152,25 @@ rb_xml_xpath_context_register_ns(VALUE rb_context, VALUE prefix, VALUE uri)
|
|
151
152
|
* register_variable(name, value) → Nokogiri::XML::XPathContext
|
152
153
|
*
|
153
154
|
* Register the variable +name+ with +value+ for use in future queries.
|
155
|
+
* Passing a value of +nil+ will unregister the variable.
|
154
156
|
*
|
155
157
|
* [Returns] +self+
|
156
158
|
*/
|
157
159
|
static VALUE
|
158
|
-
|
160
|
+
noko_xml_xpath_context_register_variable(VALUE rb_context, VALUE name, VALUE value)
|
159
161
|
{
|
160
162
|
xmlXPathContextPtr c_context;
|
161
163
|
xmlXPathObjectPtr xmlValue;
|
162
164
|
|
163
|
-
TypedData_Get_Struct(
|
164
|
-
rb_context,
|
165
|
-
xmlXPathContext,
|
166
|
-
&xml_xpath_context_type,
|
167
|
-
c_context
|
168
|
-
);
|
165
|
+
TypedData_Get_Struct(rb_context, xmlXPathContext, &_noko_xml_xpath_context_type, c_context);
|
169
166
|
|
170
|
-
|
167
|
+
if (NIL_P(value)) {
|
168
|
+
xmlValue = NULL;
|
169
|
+
} else {
|
170
|
+
xmlValue = xmlXPathNewCString(StringValueCStr(value));
|
171
|
+
}
|
171
172
|
|
172
|
-
xmlXPathRegisterVariable(
|
173
|
-
c_context,
|
174
|
-
(const xmlChar *)StringValueCStr(name),
|
175
|
-
xmlValue
|
176
|
-
);
|
173
|
+
xmlXPathRegisterVariable(c_context, (const xmlChar *)StringValueCStr(name), xmlValue);
|
177
174
|
|
178
175
|
return rb_context;
|
179
176
|
}
|
@@ -184,7 +181,7 @@ rb_xml_xpath_context_register_variable(VALUE rb_context, VALUE name, VALUE value
|
|
184
181
|
* returns Qundef if no conversion was possible.
|
185
182
|
*/
|
186
183
|
static VALUE
|
187
|
-
|
184
|
+
_noko_xml_xpath_context__xpath2ruby(xmlXPathObjectPtr c_xpath_object, xmlXPathContextPtr c_context)
|
188
185
|
{
|
189
186
|
VALUE rb_retval;
|
190
187
|
|
@@ -238,7 +235,7 @@ Nokogiri_marshal_xpath_funcall_and_return_values(
|
|
238
235
|
|
239
236
|
for (int j = argc - 1 ; j >= 0 ; --j) {
|
240
237
|
c_xpath_object = valuePop(ctxt);
|
241
|
-
argv[j] =
|
238
|
+
argv[j] = _noko_xml_xpath_context__xpath2ruby(c_xpath_object, ctxt->context);
|
242
239
|
if (argv[j] == Qundef) {
|
243
240
|
argv[j] = NOKOGIRI_STR_NEW2(xmlXPathCastToString(c_xpath_object));
|
244
241
|
}
|
@@ -294,7 +291,7 @@ Nokogiri_marshal_xpath_funcall_and_return_values(
|
|
294
291
|
}
|
295
292
|
|
296
293
|
static void
|
297
|
-
|
294
|
+
_noko_xml_xpath_context__handler_invoker(xmlXPathParserContextPtr ctxt, int argc)
|
298
295
|
{
|
299
296
|
VALUE rb_xpath_handler = Qnil;
|
300
297
|
const char *method_name = NULL ;
|
@@ -316,18 +313,15 @@ method_caller(xmlXPathParserContextPtr ctxt, int argc)
|
|
316
313
|
}
|
317
314
|
|
318
315
|
static xmlXPathFunction
|
319
|
-
|
316
|
+
_noko_xml_xpath_context_handler_lookup(void *data, const xmlChar *c_name, const xmlChar *c_ns_uri)
|
320
317
|
{
|
321
318
|
VALUE rb_handler = (VALUE)data;
|
322
319
|
if (rb_respond_to(rb_handler, rb_intern((const char *)c_name))) {
|
323
320
|
if (c_ns_uri == NULL) {
|
324
|
-
NOKO_WARN_DEPRECATION(
|
325
|
-
|
326
|
-
" Please update your query to reference this function as 'nokogiri:%s'."
|
327
|
-
" Invoking custom handler functions without a namespace is deprecated and support will be removed in a future release of Nokogiri.",
|
328
|
-
c_name, c_name);
|
321
|
+
NOKO_WARN_DEPRECATION("A custom XPath or CSS handler function named '%s' is being invoked without a namespace. Please update your query to reference this function as 'nokogiri:%s'. Invoking custom handler functions without a namespace is deprecated and will become an error in Nokogiri v1.17.0.",
|
322
|
+
c_name, c_name); // TODO deprecated in v1.15.0, remove in v1.19.0
|
329
323
|
}
|
330
|
-
return
|
324
|
+
return _noko_xml_xpath_context__handler_invoker;
|
331
325
|
}
|
332
326
|
|
333
327
|
return NULL;
|
@@ -335,7 +329,7 @@ handler_lookup(void *data, const xmlChar *c_name, const xmlChar *c_ns_uri)
|
|
335
329
|
|
336
330
|
PRINTFLIKE_DECL(2, 3)
|
337
331
|
static void
|
338
|
-
|
332
|
+
_noko_xml_xpath_context__generic_exception_pusher(void *data, const char *msg, ...)
|
339
333
|
{
|
340
334
|
VALUE rb_errors = (VALUE)data;
|
341
335
|
VALUE rb_message;
|
@@ -346,7 +340,7 @@ generic_exception_pusher(void *data, const char *msg, ...)
|
|
346
340
|
#ifdef TRUFFLERUBY_NOKOGIRI_SYSTEM_LIBRARIES
|
347
341
|
/* It is not currently possible to pass var args from native
|
348
342
|
functions to sulong, so we work around the issue here. */
|
349
|
-
rb_message = rb_sprintf("
|
343
|
+
rb_message = rb_sprintf("_noko_xml_xpath_context__generic_exception_pusher: %s", msg);
|
350
344
|
#else
|
351
345
|
va_list args;
|
352
346
|
va_start(args, msg);
|
@@ -368,58 +362,55 @@ generic_exception_pusher(void *data, const char *msg, ...)
|
|
368
362
|
* a +Float+, or a boolean.
|
369
363
|
*/
|
370
364
|
static VALUE
|
371
|
-
|
365
|
+
noko_xml_xpath_context_evaluate(int argc, VALUE *argv, VALUE rb_context)
|
372
366
|
{
|
373
|
-
VALUE search_path, xpath_handler;
|
374
|
-
VALUE retval = Qnil;
|
375
367
|
xmlXPathContextPtr c_context;
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
xmlXPathContext,
|
383
|
-
&xml_xpath_context_type,
|
384
|
-
c_context
|
385
|
-
);
|
368
|
+
VALUE rb_expression = Qnil;
|
369
|
+
VALUE rb_function_lookup_handler = Qnil;
|
370
|
+
xmlChar *c_expression_str = NULL;
|
371
|
+
VALUE rb_errors = rb_ary_new();
|
372
|
+
xmlXPathObjectPtr c_xpath_object;
|
373
|
+
VALUE rb_xpath_object = Qnil;
|
386
374
|
|
387
|
-
|
388
|
-
|
389
|
-
|
375
|
+
TypedData_Get_Struct(rb_context, xmlXPathContext, &_noko_xml_xpath_context_type, c_context);
|
376
|
+
|
377
|
+
rb_scan_args(argc, argv, "11", &rb_expression, &rb_function_lookup_handler);
|
390
378
|
|
391
|
-
|
379
|
+
c_expression_str = (xmlChar *)StringValueCStr(rb_expression);
|
392
380
|
|
393
|
-
if (Qnil !=
|
381
|
+
if (Qnil != rb_function_lookup_handler) {
|
394
382
|
/* FIXME: not sure if this is the correct place to shove private data. */
|
395
|
-
c_context->userData = (void *)
|
383
|
+
c_context->userData = (void *)rb_function_lookup_handler;
|
396
384
|
xmlXPathRegisterFuncLookup(
|
397
385
|
c_context,
|
398
|
-
|
399
|
-
(void *)
|
386
|
+
_noko_xml_xpath_context_handler_lookup,
|
387
|
+
(void *)rb_function_lookup_handler
|
400
388
|
);
|
401
389
|
}
|
402
390
|
|
403
|
-
|
404
|
-
|
391
|
+
/* TODO: use xmlXPathSetErrorHandler (as of 2.13.0) */
|
392
|
+
xmlSetStructuredErrorFunc((void *)rb_errors, noko__error_array_pusher);
|
393
|
+
xmlSetGenericErrorFunc((void *)rb_errors, _noko_xml_xpath_context__generic_exception_pusher);
|
405
394
|
|
406
|
-
|
395
|
+
c_xpath_object = xmlXPathEvalExpression(c_expression_str, c_context);
|
407
396
|
|
408
397
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
409
398
|
xmlSetGenericErrorFunc(NULL, NULL);
|
410
399
|
|
411
|
-
|
412
|
-
|
400
|
+
xmlXPathRegisterFuncLookup(c_context, NULL, NULL);
|
401
|
+
|
402
|
+
if (c_xpath_object == NULL) {
|
403
|
+
rb_exc_raise(rb_ary_entry(rb_errors, 0));
|
413
404
|
}
|
414
405
|
|
415
|
-
|
416
|
-
if (
|
417
|
-
|
406
|
+
rb_xpath_object = _noko_xml_xpath_context__xpath2ruby(c_xpath_object, c_context);
|
407
|
+
if (rb_xpath_object == Qundef) {
|
408
|
+
rb_xpath_object = noko_xml_node_set_wrap(NULL, DOC_RUBY_OBJECT(c_context->doc));
|
418
409
|
}
|
419
410
|
|
420
|
-
xmlXPathFreeNodeSetList(
|
411
|
+
xmlXPathFreeNodeSetList(c_xpath_object);
|
421
412
|
|
422
|
-
return
|
413
|
+
return rb_xpath_object;
|
423
414
|
}
|
424
415
|
|
425
416
|
/*
|
@@ -429,45 +420,53 @@ rb_xml_xpath_context_evaluate(int argc, VALUE *argv, VALUE rb_context)
|
|
429
420
|
* Create a new XPathContext with +node+ as the context node.
|
430
421
|
*/
|
431
422
|
static VALUE
|
432
|
-
|
423
|
+
noko_xml_xpath_context_new(VALUE klass, VALUE rb_node)
|
433
424
|
{
|
434
|
-
xmlNodePtr
|
425
|
+
xmlNodePtr c_node;
|
435
426
|
xmlXPathContextPtr c_context;
|
436
427
|
VALUE rb_context;
|
437
428
|
|
438
|
-
Noko_Node_Get_Struct(rb_node, xmlNode,
|
429
|
+
Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
|
439
430
|
|
440
431
|
#if LIBXML_VERSION < 21000
|
441
|
-
/* deprecated in 40483d0 */
|
442
|
-
xmlXPathInit();
|
432
|
+
xmlXPathInit(); /* deprecated in 40483d0 */
|
443
433
|
#endif
|
444
434
|
|
445
|
-
c_context = xmlXPathNewContext(
|
446
|
-
c_context->node =
|
435
|
+
c_context = xmlXPathNewContext(c_node->doc);
|
436
|
+
c_context->node = c_node;
|
447
437
|
|
448
438
|
xmlXPathRegisterNs(c_context, NOKOGIRI_PREFIX, NOKOGIRI_URI);
|
449
439
|
xmlXPathRegisterNs(c_context, NOKOGIRI_BUILTIN_PREFIX, NOKOGIRI_BUILTIN_URI);
|
450
|
-
xmlXPathRegisterFuncNS(
|
451
|
-
c_context,
|
452
|
-
(const xmlChar *)"css-class",
|
453
|
-
NOKOGIRI_BUILTIN_URI,
|
454
|
-
xpath_builtin_css_class
|
455
|
-
);
|
456
|
-
xmlXPathRegisterFuncNS(
|
457
|
-
c_context,
|
458
|
-
(const xmlChar *)"local-name-is",
|
459
|
-
NOKOGIRI_BUILTIN_URI,
|
460
|
-
xpath_builtin_local_name_is
|
461
|
-
);
|
462
440
|
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
441
|
+
xmlXPathRegisterFuncNS(c_context,
|
442
|
+
(const xmlChar *)"css-class", NOKOGIRI_BUILTIN_URI,
|
443
|
+
noko_xml_xpath_context_xpath_func_css_class);
|
444
|
+
xmlXPathRegisterFuncNS(c_context,
|
445
|
+
(const xmlChar *)"local-name-is", NOKOGIRI_BUILTIN_URI,
|
446
|
+
noko_xml_xpath_context_xpath_func_local_name_is);
|
447
|
+
|
448
|
+
rb_context = TypedData_Wrap_Struct(klass, &_noko_xml_xpath_context_type, c_context);
|
449
|
+
|
468
450
|
return rb_context;
|
469
451
|
}
|
470
452
|
|
453
|
+
|
454
|
+
/* :nodoc: */
|
455
|
+
static VALUE
|
456
|
+
noko_xml_xpath_context_set_node(VALUE rb_context, VALUE rb_node)
|
457
|
+
{
|
458
|
+
xmlNodePtr c_node;
|
459
|
+
xmlXPathContextPtr c_context;
|
460
|
+
|
461
|
+
TypedData_Get_Struct(rb_context, xmlXPathContext, &_noko_xml_xpath_context_type, c_context);
|
462
|
+
Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
|
463
|
+
|
464
|
+
c_context->doc = c_node->doc;
|
465
|
+
c_context->node = c_node;
|
466
|
+
|
467
|
+
return rb_node;
|
468
|
+
}
|
469
|
+
|
471
470
|
void
|
472
471
|
noko_init_xml_xpath_context(void)
|
473
472
|
{
|
@@ -478,9 +477,10 @@ noko_init_xml_xpath_context(void)
|
|
478
477
|
|
479
478
|
rb_undef_alloc_func(cNokogiriXmlXpathContext);
|
480
479
|
|
481
|
-
rb_define_singleton_method(cNokogiriXmlXpathContext, "new",
|
480
|
+
rb_define_singleton_method(cNokogiriXmlXpathContext, "new", noko_xml_xpath_context_new, 1);
|
482
481
|
|
483
|
-
rb_define_method(cNokogiriXmlXpathContext, "evaluate",
|
484
|
-
rb_define_method(cNokogiriXmlXpathContext, "register_variable",
|
485
|
-
rb_define_method(cNokogiriXmlXpathContext, "register_ns",
|
482
|
+
rb_define_method(cNokogiriXmlXpathContext, "evaluate", noko_xml_xpath_context_evaluate, -1);
|
483
|
+
rb_define_method(cNokogiriXmlXpathContext, "register_variable", noko_xml_xpath_context_register_variable, 2);
|
484
|
+
rb_define_method(cNokogiriXmlXpathContext, "register_ns", noko_xml_xpath_context_register_ns, 2);
|
485
|
+
rb_define_method(cNokogiriXmlXpathContext, "node=", noko_xml_xpath_context_set_node, 1);
|
486
486
|
}
|
@@ -1,6 +1,6 @@
|
|
1
1
|
#include <nokogiri.h>
|
2
2
|
|
3
|
-
VALUE cNokogiriXsltStylesheet
|
3
|
+
VALUE cNokogiriXsltStylesheet;
|
4
4
|
|
5
5
|
static void
|
6
6
|
mark(void *data)
|
@@ -18,8 +18,8 @@ dealloc(void *data)
|
|
18
18
|
ruby_xfree(wrapper);
|
19
19
|
}
|
20
20
|
|
21
|
-
static const rb_data_type_t
|
22
|
-
.wrap_struct_name = "
|
21
|
+
static const rb_data_type_t nokogiri_xslt_stylesheet_tuple_type = {
|
22
|
+
.wrap_struct_name = "nokogiriXsltStylesheetTuple",
|
23
23
|
.function = {
|
24
24
|
.dmark = mark,
|
25
25
|
.dfree = dealloc,
|
@@ -56,7 +56,7 @@ Nokogiri_wrap_xslt_stylesheet(xsltStylesheetPtr ss)
|
|
56
56
|
self = TypedData_Make_Struct(
|
57
57
|
cNokogiriXsltStylesheet,
|
58
58
|
nokogiriXsltStylesheetTuple,
|
59
|
-
&
|
59
|
+
&nokogiri_xslt_stylesheet_tuple_type,
|
60
60
|
wrapper
|
61
61
|
);
|
62
62
|
|
@@ -71,7 +71,12 @@ Nokogiri_wrap_xslt_stylesheet(xsltStylesheetPtr ss)
|
|
71
71
|
* call-seq:
|
72
72
|
* parse_stylesheet_doc(document)
|
73
73
|
*
|
74
|
-
* Parse
|
74
|
+
* Parse an XSLT::Stylesheet from +document+.
|
75
|
+
*
|
76
|
+
* [Parameters]
|
77
|
+
* - +document+ (Nokogiri::XML::Document) the document to be parsed.
|
78
|
+
*
|
79
|
+
* [Returns] Nokogiri::XSLT::Stylesheet
|
75
80
|
*/
|
76
81
|
static VALUE
|
77
82
|
parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
|
@@ -104,7 +109,7 @@ parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
|
|
104
109
|
* call-seq:
|
105
110
|
* serialize(document)
|
106
111
|
*
|
107
|
-
* Serialize +document+ to an xml string.
|
112
|
+
* Serialize +document+ to an xml string, as specified by the +method+ parameter in the Stylesheet.
|
108
113
|
*/
|
109
114
|
static VALUE
|
110
115
|
rb_xslt_stylesheet_serialize(VALUE self, VALUE xmlobj)
|
@@ -119,7 +124,7 @@ rb_xslt_stylesheet_serialize(VALUE self, VALUE xmlobj)
|
|
119
124
|
TypedData_Get_Struct(
|
120
125
|
self,
|
121
126
|
nokogiriXsltStylesheetTuple,
|
122
|
-
&
|
127
|
+
&nokogiri_xslt_stylesheet_tuple_type,
|
123
128
|
wrapper
|
124
129
|
);
|
125
130
|
xsltSaveResultToString(&doc_ptr, &doc_len, xml, wrapper->ss);
|
@@ -133,7 +138,7 @@ rb_xslt_stylesheet_serialize(VALUE self, VALUE xmlobj)
|
|
133
138
|
* transform(document)
|
134
139
|
* transform(document, params = {})
|
135
140
|
*
|
136
|
-
*
|
141
|
+
* Transform an XML::Document as defined by an XSLT::Stylesheet.
|
137
142
|
*
|
138
143
|
* [Parameters]
|
139
144
|
* - +document+ (Nokogiri::XML::Document) the document to be transformed.
|
@@ -268,7 +273,7 @@ rb_xslt_stylesheet_transform(int argc, VALUE *argv, VALUE self)
|
|
268
273
|
Check_Type(rb_param, T_ARRAY);
|
269
274
|
|
270
275
|
c_document = noko_xml_document_unwrap(rb_document);
|
271
|
-
TypedData_Get_Struct(self, nokogiriXsltStylesheetTuple, &
|
276
|
+
TypedData_Get_Struct(self, nokogiriXsltStylesheetTuple, &nokogiri_xslt_stylesheet_tuple_type, wrapper);
|
272
277
|
|
273
278
|
param_len = RARRAY_LEN(rb_param);
|
274
279
|
params = ruby_xcalloc((size_t)param_len + 1, sizeof(char *));
|
@@ -357,7 +362,7 @@ initFunc(xsltTransformContextPtr ctxt, const xmlChar *uri)
|
|
357
362
|
TypedData_Get_Struct(
|
358
363
|
(VALUE)ctxt->style->_private,
|
359
364
|
nokogiriXsltStylesheetTuple,
|
360
|
-
&
|
365
|
+
&nokogiri_xslt_stylesheet_tuple_type,
|
361
366
|
wrapper
|
362
367
|
);
|
363
368
|
inst = rb_class_new_instance(0, NULL, obj);
|
@@ -375,7 +380,7 @@ shutdownFunc(xsltTransformContextPtr ctxt,
|
|
375
380
|
TypedData_Get_Struct(
|
376
381
|
(VALUE)ctxt->style->_private,
|
377
382
|
nokogiriXsltStylesheetTuple,
|
378
|
-
&
|
383
|
+
&nokogiri_xslt_stylesheet_tuple_type,
|
379
384
|
wrapper
|
380
385
|
);
|
381
386
|
|
data/gumbo-parser/Makefile
CHANGED
@@ -13,6 +13,23 @@ LDFLAGS := -pthread
|
|
13
13
|
|
14
14
|
all: check
|
15
15
|
|
16
|
+
oss-fuzz:
|
17
|
+
./fuzzer/build-ossfuzz.sh
|
18
|
+
|
19
|
+
fuzzers: fuzzer-normal fuzzer-asan fuzzer-ubsan fuzzer-msan
|
20
|
+
|
21
|
+
fuzzer-normal:
|
22
|
+
./fuzzer/build.sh
|
23
|
+
|
24
|
+
fuzzer-asan:
|
25
|
+
SANITIZER=asan ./fuzzer/build.sh
|
26
|
+
|
27
|
+
fuzzer-ubsan:
|
28
|
+
SANITIZER=ubsan ./fuzzer/build.sh
|
29
|
+
|
30
|
+
fuzzer-msan:
|
31
|
+
SANITIZER=msan ./fuzzer/build.sh
|
32
|
+
|
16
33
|
# don't try to regenerate ragel or gperf files in CI, that should be a development-only action and
|
17
34
|
# the generated files should be committed to SCM
|
18
35
|
ifneq ($(CI),true)
|
@@ -81,6 +98,7 @@ coverage:
|
|
81
98
|
|
82
99
|
clean:
|
83
100
|
$(RM) -r build
|
101
|
+
$(RM) -r fuzzer/build fuzzer/src-* fuzzer/gumbo_corpus
|
84
102
|
|
85
103
|
build/src/flags: | build/src
|
86
104
|
@echo 'old_CC := $(CC)' > $@
|
data/gumbo-parser/src/ascii.c
CHANGED
@@ -44,8 +44,8 @@ const unsigned char _gumbo_ascii_table[0x80] = {
|
|
44
44
|
};
|
45
45
|
|
46
46
|
// Table generation code.
|
47
|
-
// clang -DGUMBO_GEN_TABLE
|
48
|
-
#
|
47
|
+
// clang -DGUMBO_GEN_TABLE ascii.c && ./a.out && rm a.out
|
48
|
+
#ifdef GUMBO_GEN_TABLE
|
49
49
|
#include <stdio.h>
|
50
50
|
|
51
51
|
int main() {
|
data/gumbo-parser/src/error.c
CHANGED
@@ -46,33 +46,40 @@ static int PRINTF(2) print_message (
|
|
46
46
|
args
|
47
47
|
);
|
48
48
|
va_end(args);
|
49
|
-
|
49
|
+
|
50
|
+
#if (defined(_MSC_VER) && (_MSC_VER < 1900)) || defined(_RUBY_MSVCRT)
|
50
51
|
if (bytes_written == -1) {
|
51
52
|
// vsnprintf returns -1 on older MSVC++ if there's not enough capacity,
|
52
53
|
// instead of returning the number of bytes that would've been written had
|
53
|
-
// there been enough. In this case, we
|
54
|
-
//
|
55
|
-
//
|
56
|
-
|
54
|
+
// there been enough. In this case, we can call vsnprintf() again but
|
55
|
+
// with a count of 0 to get the number of bytes written, not including
|
56
|
+
// the null terminator.
|
57
|
+
// https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/vsnprintf-vsnprintf-vsnprintf-l-vsnwprintf-vsnwprintf-l?view=msvc-140#behavior-summary
|
58
|
+
|
57
59
|
va_start(args, format);
|
58
|
-
|
59
|
-
|
60
|
-
|
60
|
+
bytes_written = vsnprintf (
|
61
|
+
NULL,
|
62
|
+
0,
|
61
63
|
format,
|
62
64
|
args
|
63
65
|
);
|
64
66
|
va_end(args);
|
65
|
-
return result == -1 ? 0 : result;
|
66
67
|
}
|
67
|
-
#
|
68
|
+
#endif
|
69
|
+
|
68
70
|
// -1 in standard C99 indicates an encoding error. Return 0 and do nothing.
|
69
71
|
if (bytes_written == -1) {
|
70
72
|
return 0;
|
71
73
|
}
|
72
|
-
#endif
|
73
74
|
|
74
75
|
if (bytes_written >= remaining_capacity) {
|
75
|
-
|
76
|
+
// At least double the size of the buffer.
|
77
|
+
size_t new_capacity = output->capacity * 2;
|
78
|
+
if (new_capacity < output->length + bytes_written + 1) {
|
79
|
+
// The +1 is for the null terminator.
|
80
|
+
new_capacity = output->length + bytes_written + 1;
|
81
|
+
}
|
82
|
+
gumbo_string_buffer_reserve(new_capacity, output);
|
76
83
|
remaining_capacity = output->capacity - output->length;
|
77
84
|
va_start(args, format);
|
78
85
|
bytes_written = vsnprintf (
|
@@ -96,8 +103,14 @@ static void print_tag_stack (
|
|
96
103
|
if (i) {
|
97
104
|
print_message(output, ", ");
|
98
105
|
}
|
99
|
-
|
100
|
-
|
106
|
+
uintptr_t tag = (uintptr_t) error->tag_stack.data[i];
|
107
|
+
const char* tag_name;
|
108
|
+
if (tag > GUMBO_TAG_UNKNOWN) {
|
109
|
+
tag_name = error->tag_stack.data[i];
|
110
|
+
} else {
|
111
|
+
tag_name = gumbo_normalized_tagname((GumboTag)tag);
|
112
|
+
}
|
113
|
+
print_message(output, "%s", tag_name);
|
101
114
|
}
|
102
115
|
gumbo_string_buffer_append_codepoint('.', output);
|
103
116
|
}
|
@@ -326,41 +339,45 @@ static void handle_parser_error (
|
|
326
339
|
}
|
327
340
|
|
328
341
|
switch (error->input_type) {
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
print_tag_stack(error, output);
|
352
|
-
}
|
353
|
-
return;
|
354
|
-
case GUMBO_TOKEN_START_TAG:
|
355
|
-
print_message(output, "Start tag '%s' isn't allowed here.",
|
356
|
-
gumbo_normalized_tagname(error->input_tag));
|
357
|
-
print_tag_stack(error, output);
|
358
|
-
return;
|
359
|
-
case GUMBO_TOKEN_END_TAG:
|
360
|
-
print_message(output, "Eng tag '%s' isn't allowed here.",
|
361
|
-
gumbo_normalized_tagname(error->input_tag));
|
342
|
+
case GUMBO_TOKEN_DOCTYPE:
|
343
|
+
print_message(output, "This is not a legal doctype");
|
344
|
+
return;
|
345
|
+
case GUMBO_TOKEN_COMMENT:
|
346
|
+
// Should never happen; comments are always legal.
|
347
|
+
assert(0);
|
348
|
+
// But just in case...
|
349
|
+
print_message(output, "Comments aren't legal here");
|
350
|
+
return;
|
351
|
+
case GUMBO_TOKEN_CDATA:
|
352
|
+
case GUMBO_TOKEN_WHITESPACE:
|
353
|
+
case GUMBO_TOKEN_CHARACTER:
|
354
|
+
print_message(output, "Character tokens aren't legal here");
|
355
|
+
return;
|
356
|
+
case GUMBO_TOKEN_NULL:
|
357
|
+
print_message(output, "Null bytes are not allowed in HTML5");
|
358
|
+
return;
|
359
|
+
case GUMBO_TOKEN_EOF:
|
360
|
+
if (error->parser_state == GUMBO_INSERTION_MODE_INITIAL) {
|
361
|
+
print_message(output, "You must provide a doctype");
|
362
|
+
} else {
|
363
|
+
print_message(output, "Premature end of file.");
|
362
364
|
print_tag_stack(error, output);
|
363
|
-
|
365
|
+
}
|
366
|
+
return;
|
367
|
+
case GUMBO_TOKEN_START_TAG:
|
368
|
+
case GUMBO_TOKEN_END_TAG:
|
369
|
+
{
|
370
|
+
const char* tag_name;
|
371
|
+
const char* which = error->input_type == GUMBO_TOKEN_START_TAG ? "Start" : "End";
|
372
|
+
if (error->input_name) {
|
373
|
+
tag_name = error->input_name;
|
374
|
+
} else {
|
375
|
+
tag_name = gumbo_normalized_tagname(error->input_tag);
|
376
|
+
}
|
377
|
+
print_message(output, "%s tag '%s' isn't allowed here.", which, tag_name);
|
378
|
+
print_tag_stack(error, output);
|
379
|
+
return;
|
380
|
+
}
|
364
381
|
}
|
365
382
|
}
|
366
383
|
|
@@ -613,6 +630,17 @@ void gumbo_print_caret_diagnostic (
|
|
613
630
|
|
614
631
|
void gumbo_error_destroy(GumboError* error) {
|
615
632
|
if (error->type == GUMBO_ERR_PARSER) {
|
633
|
+
// Free the tag name.
|
634
|
+
if (error->v.parser.input_name) {
|
635
|
+
gumbo_free(error->v.parser.input_name);
|
636
|
+
}
|
637
|
+
|
638
|
+
for (unsigned int i = 0; i < error->v.parser.tag_stack.length; ++i) {
|
639
|
+
intptr_t tag = (intptr_t) error->v.parser.tag_stack.data[i];
|
640
|
+
if (tag > GUMBO_TAG_UNKNOWN) {
|
641
|
+
gumbo_free(error->v.parser.tag_stack.data[i]);
|
642
|
+
}
|
643
|
+
}
|
616
644
|
gumbo_vector_destroy(&error->v.parser.tag_stack);
|
617
645
|
}
|
618
646
|
gumbo_free(error);
|