nokogiri 1.16.8 → 1.18.1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +14 -22
- data/LICENSE-DEPENDENCIES.md +6 -6
- data/README.md +8 -5
- data/dependencies.yml +6 -6
- data/ext/nokogiri/extconf.rb +188 -142
- data/ext/nokogiri/gumbo.c +69 -53
- data/ext/nokogiri/html4_document.c +10 -4
- data/ext/nokogiri/html4_element_description.c +18 -18
- data/ext/nokogiri/html4_sax_parser.c +40 -0
- data/ext/nokogiri/html4_sax_parser_context.c +48 -58
- data/ext/nokogiri/html4_sax_push_parser.c +25 -24
- data/ext/nokogiri/libxml2_polyfill.c +114 -0
- data/ext/nokogiri/nokogiri.c +9 -2
- data/ext/nokogiri/nokogiri.h +18 -33
- data/ext/nokogiri/xml_attr.c +1 -1
- data/ext/nokogiri/xml_cdata.c +2 -10
- data/ext/nokogiri/xml_comment.c +3 -8
- data/ext/nokogiri/xml_document.c +163 -156
- data/ext/nokogiri/xml_document_fragment.c +10 -25
- data/ext/nokogiri/xml_dtd.c +1 -1
- data/ext/nokogiri/xml_element_content.c +9 -9
- data/ext/nokogiri/xml_encoding_handler.c +4 -4
- data/ext/nokogiri/xml_namespace.c +6 -6
- data/ext/nokogiri/xml_node.c +134 -103
- data/ext/nokogiri/xml_node_set.c +46 -44
- data/ext/nokogiri/xml_reader.c +54 -58
- data/ext/nokogiri/xml_relax_ng.c +35 -56
- data/ext/nokogiri/xml_sax_parser.c +156 -88
- data/ext/nokogiri/xml_sax_parser_context.c +219 -131
- data/ext/nokogiri/xml_sax_push_parser.c +68 -49
- data/ext/nokogiri/xml_schema.c +50 -85
- data/ext/nokogiri/xml_syntax_error.c +19 -11
- data/ext/nokogiri/xml_text.c +2 -4
- data/ext/nokogiri/xml_xpath_context.c +103 -100
- data/ext/nokogiri/xslt_stylesheet.c +8 -8
- data/gumbo-parser/src/ascii.c +2 -2
- data/gumbo-parser/src/error.c +76 -48
- data/gumbo-parser/src/error.h +5 -1
- data/gumbo-parser/src/nokogiri_gumbo.h +11 -2
- data/gumbo-parser/src/parser.c +63 -25
- data/gumbo-parser/src/tokenizer.c +6 -6
- data/lib/nokogiri/class_resolver.rb +1 -1
- data/lib/nokogiri/css/node.rb +6 -2
- data/lib/nokogiri/css/parser.rb +6 -4
- data/lib/nokogiri/css/parser.y +2 -2
- data/lib/nokogiri/css/parser_extras.rb +6 -66
- data/lib/nokogiri/css/selector_cache.rb +38 -0
- data/lib/nokogiri/css/tokenizer.rb +4 -4
- data/lib/nokogiri/css/tokenizer.rex +9 -8
- data/lib/nokogiri/css/xpath_visitor.rb +42 -6
- data/lib/nokogiri/css.rb +86 -20
- data/lib/nokogiri/decorators/slop.rb +3 -5
- data/lib/nokogiri/encoding_handler.rb +2 -2
- data/lib/nokogiri/html4/document.rb +44 -23
- data/lib/nokogiri/html4/document_fragment.rb +124 -12
- data/lib/nokogiri/html4/encoding_reader.rb +1 -1
- data/lib/nokogiri/html4/sax/parser.rb +23 -38
- data/lib/nokogiri/html4/sax/parser_context.rb +4 -9
- data/lib/nokogiri/html4.rb +9 -14
- data/lib/nokogiri/html5/builder.rb +40 -0
- data/lib/nokogiri/html5/document.rb +61 -30
- data/lib/nokogiri/html5/document_fragment.rb +130 -20
- data/lib/nokogiri/html5/node.rb +4 -4
- data/lib/nokogiri/html5.rb +114 -72
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/xml/builder.rb +8 -1
- data/lib/nokogiri/xml/document.rb +70 -26
- data/lib/nokogiri/xml/document_fragment.rb +84 -13
- data/lib/nokogiri/xml/node.rb +82 -11
- data/lib/nokogiri/xml/node_set.rb +9 -7
- data/lib/nokogiri/xml/parse_options.rb +1 -1
- data/lib/nokogiri/xml/pp/node.rb +6 -1
- data/lib/nokogiri/xml/reader.rb +46 -13
- data/lib/nokogiri/xml/relax_ng.rb +57 -20
- data/lib/nokogiri/xml/sax/document.rb +174 -83
- data/lib/nokogiri/xml/sax/parser.rb +115 -41
- data/lib/nokogiri/xml/sax/parser_context.rb +116 -8
- data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
- data/lib/nokogiri/xml/sax.rb +48 -0
- data/lib/nokogiri/xml/schema.rb +112 -45
- data/lib/nokogiri/xml/searchable.rb +38 -42
- data/lib/nokogiri/xml/syntax_error.rb +22 -0
- data/lib/nokogiri/xml/xpath_context.rb +14 -3
- data/lib/nokogiri/xml.rb +13 -24
- data/lib/nokogiri/xslt.rb +3 -9
- data/lib/xsd/xmlparser/nokogiri.rb +3 -4
- data/patches/libxml2/0019-xpath-Use-separate-static-hash-table-for-standard-fu.patch +244 -0
- data/ports/archives/libxml2-2.13.5.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.42.tar.xz +0 -0
- metadata +13 -12
- data/ext/nokogiri/libxml2_backwards_compat.c +0 -121
- data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +0 -25
- data/ports/archives/libxml2-2.12.9.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
@@ -12,23 +12,23 @@ static const xmlChar *NOKOGIRI_BUILTIN_PREFIX = (const xmlChar *)"nokogiri-built
|
|
12
12
|
static const xmlChar *NOKOGIRI_BUILTIN_URI = (const xmlChar *)"https://www.nokogiri.org/default_ns/ruby/builtins";
|
13
13
|
|
14
14
|
static void
|
15
|
-
|
15
|
+
_noko_xml_xpath_context_dfree(void *data)
|
16
16
|
{
|
17
17
|
xmlXPathContextPtr c_context = data;
|
18
18
|
xmlXPathFreeContext(c_context);
|
19
19
|
}
|
20
20
|
|
21
|
-
static const rb_data_type_t
|
22
|
-
.wrap_struct_name = "
|
21
|
+
static const rb_data_type_t _noko_xml_xpath_context_type = {
|
22
|
+
.wrap_struct_name = "xmlXPathContext",
|
23
23
|
.function = {
|
24
|
-
.dfree =
|
24
|
+
.dfree = _noko_xml_xpath_context_dfree,
|
25
25
|
},
|
26
26
|
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
27
27
|
};
|
28
28
|
|
29
29
|
/* find a CSS class in an HTML element's `class` attribute */
|
30
30
|
static const xmlChar *
|
31
|
-
|
31
|
+
_noko_xml_xpath_context__css_class(const xmlChar *str, const xmlChar *val)
|
32
32
|
{
|
33
33
|
int val_len;
|
34
34
|
|
@@ -62,9 +62,9 @@ builtin_css_class(const xmlChar *str, const xmlChar *val)
|
|
62
62
|
return (NULL);
|
63
63
|
}
|
64
64
|
|
65
|
-
/* xmlXPathFunction to wrap
|
65
|
+
/* xmlXPathFunction to wrap _noko_xml_xpath_context__css_class() */
|
66
66
|
static void
|
67
|
-
|
67
|
+
noko_xml_xpath_context_xpath_func_css_class(xmlXPathParserContextPtr ctxt, int nargs)
|
68
68
|
{
|
69
69
|
xmlXPathObjectPtr hay, needle;
|
70
70
|
|
@@ -85,7 +85,7 @@ xpath_builtin_css_class(xmlXPathParserContextPtr ctxt, int nargs)
|
|
85
85
|
XP_ERROR(XPATH_INVALID_TYPE);
|
86
86
|
}
|
87
87
|
|
88
|
-
if (
|
88
|
+
if (_noko_xml_xpath_context__css_class(hay->stringval, needle->stringval)) {
|
89
89
|
valuePush(ctxt, xmlXPathNewBoolean(1));
|
90
90
|
} else {
|
91
91
|
valuePush(ctxt, xmlXPathNewBoolean(0));
|
@@ -99,7 +99,7 @@ xpath_builtin_css_class(xmlXPathParserContextPtr ctxt, int nargs)
|
|
99
99
|
/* xmlXPathFunction to select nodes whose local name matches, for HTML5 CSS queries that should
|
100
100
|
* ignore namespaces */
|
101
101
|
static void
|
102
|
-
|
102
|
+
noko_xml_xpath_context_xpath_func_local_name_is(xmlXPathParserContextPtr ctxt, int nargs)
|
103
103
|
{
|
104
104
|
xmlXPathObjectPtr element_name;
|
105
105
|
|
@@ -124,25 +124,26 @@ xpath_builtin_local_name_is(xmlXPathParserContextPtr ctxt, int nargs)
|
|
124
124
|
* register_ns(prefix, uri) → Nokogiri::XML::XPathContext
|
125
125
|
*
|
126
126
|
* Register the namespace with +prefix+ and +uri+ for use in future queries.
|
127
|
+
* Passing a uri of +nil+ will unregister the namespace.
|
127
128
|
*
|
128
129
|
* [Returns] +self+
|
129
130
|
*/
|
130
131
|
static VALUE
|
131
|
-
|
132
|
+
noko_xml_xpath_context_register_ns(VALUE rb_context, VALUE prefix, VALUE uri)
|
132
133
|
{
|
133
134
|
xmlXPathContextPtr c_context;
|
135
|
+
const xmlChar *ns_uri;
|
134
136
|
|
135
|
-
TypedData_Get_Struct(
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
137
|
+
TypedData_Get_Struct(rb_context, xmlXPathContext, &_noko_xml_xpath_context_type, c_context);
|
138
|
+
|
139
|
+
if (NIL_P(uri)) {
|
140
|
+
ns_uri = NULL;
|
141
|
+
} else {
|
142
|
+
ns_uri = (const xmlChar *)StringValueCStr(uri);
|
143
|
+
}
|
144
|
+
|
145
|
+
xmlXPathRegisterNs(c_context, (const xmlChar *)StringValueCStr(prefix), ns_uri);
|
141
146
|
|
142
|
-
xmlXPathRegisterNs(c_context,
|
143
|
-
(const xmlChar *)StringValueCStr(prefix),
|
144
|
-
(const xmlChar *)StringValueCStr(uri)
|
145
|
-
);
|
146
147
|
return rb_context;
|
147
148
|
}
|
148
149
|
|
@@ -151,29 +152,25 @@ rb_xml_xpath_context_register_ns(VALUE rb_context, VALUE prefix, VALUE uri)
|
|
151
152
|
* register_variable(name, value) → Nokogiri::XML::XPathContext
|
152
153
|
*
|
153
154
|
* Register the variable +name+ with +value+ for use in future queries.
|
155
|
+
* Passing a value of +nil+ will unregister the variable.
|
154
156
|
*
|
155
157
|
* [Returns] +self+
|
156
158
|
*/
|
157
159
|
static VALUE
|
158
|
-
|
160
|
+
noko_xml_xpath_context_register_variable(VALUE rb_context, VALUE name, VALUE value)
|
159
161
|
{
|
160
162
|
xmlXPathContextPtr c_context;
|
161
163
|
xmlXPathObjectPtr xmlValue;
|
162
164
|
|
163
|
-
TypedData_Get_Struct(
|
164
|
-
rb_context,
|
165
|
-
xmlXPathContext,
|
166
|
-
&xml_xpath_context_type,
|
167
|
-
c_context
|
168
|
-
);
|
165
|
+
TypedData_Get_Struct(rb_context, xmlXPathContext, &_noko_xml_xpath_context_type, c_context);
|
169
166
|
|
170
|
-
|
167
|
+
if (NIL_P(value)) {
|
168
|
+
xmlValue = NULL;
|
169
|
+
} else {
|
170
|
+
xmlValue = xmlXPathNewCString(StringValueCStr(value));
|
171
|
+
}
|
171
172
|
|
172
|
-
xmlXPathRegisterVariable(
|
173
|
-
c_context,
|
174
|
-
(const xmlChar *)StringValueCStr(name),
|
175
|
-
xmlValue
|
176
|
-
);
|
173
|
+
xmlXPathRegisterVariable(c_context, (const xmlChar *)StringValueCStr(name), xmlValue);
|
177
174
|
|
178
175
|
return rb_context;
|
179
176
|
}
|
@@ -184,7 +181,7 @@ rb_xml_xpath_context_register_variable(VALUE rb_context, VALUE name, VALUE value
|
|
184
181
|
* returns Qundef if no conversion was possible.
|
185
182
|
*/
|
186
183
|
static VALUE
|
187
|
-
|
184
|
+
_noko_xml_xpath_context__xpath2ruby(xmlXPathObjectPtr c_xpath_object, xmlXPathContextPtr c_context)
|
188
185
|
{
|
189
186
|
VALUE rb_retval;
|
190
187
|
|
@@ -238,7 +235,7 @@ Nokogiri_marshal_xpath_funcall_and_return_values(
|
|
238
235
|
|
239
236
|
for (int j = argc - 1 ; j >= 0 ; --j) {
|
240
237
|
c_xpath_object = valuePop(ctxt);
|
241
|
-
argv[j] =
|
238
|
+
argv[j] = _noko_xml_xpath_context__xpath2ruby(c_xpath_object, ctxt->context);
|
242
239
|
if (argv[j] == Qundef) {
|
243
240
|
argv[j] = NOKOGIRI_STR_NEW2(xmlXPathCastToString(c_xpath_object));
|
244
241
|
}
|
@@ -294,7 +291,7 @@ Nokogiri_marshal_xpath_funcall_and_return_values(
|
|
294
291
|
}
|
295
292
|
|
296
293
|
static void
|
297
|
-
|
294
|
+
_noko_xml_xpath_context__handler_invoker(xmlXPathParserContextPtr ctxt, int argc)
|
298
295
|
{
|
299
296
|
VALUE rb_xpath_handler = Qnil;
|
300
297
|
const char *method_name = NULL ;
|
@@ -316,15 +313,15 @@ method_caller(xmlXPathParserContextPtr ctxt, int argc)
|
|
316
313
|
}
|
317
314
|
|
318
315
|
static xmlXPathFunction
|
319
|
-
|
316
|
+
_noko_xml_xpath_context_handler_lookup(void *data, const xmlChar *c_name, const xmlChar *c_ns_uri)
|
320
317
|
{
|
321
318
|
VALUE rb_handler = (VALUE)data;
|
322
319
|
if (rb_respond_to(rb_handler, rb_intern((const char *)c_name))) {
|
323
320
|
if (c_ns_uri == NULL) {
|
324
321
|
NOKO_WARN_DEPRECATION("A custom XPath or CSS handler function named '%s' is being invoked without a namespace. Please update your query to reference this function as 'nokogiri:%s'. Invoking custom handler functions without a namespace is deprecated and will become an error in Nokogiri v1.17.0.",
|
325
|
-
c_name, c_name); // deprecated in v1.15.0, remove in v1.
|
322
|
+
c_name, c_name); // TODO deprecated in v1.15.0, remove in v1.19.0
|
326
323
|
}
|
327
|
-
return
|
324
|
+
return _noko_xml_xpath_context__handler_invoker;
|
328
325
|
}
|
329
326
|
|
330
327
|
return NULL;
|
@@ -332,7 +329,7 @@ handler_lookup(void *data, const xmlChar *c_name, const xmlChar *c_ns_uri)
|
|
332
329
|
|
333
330
|
PRINTFLIKE_DECL(2, 3)
|
334
331
|
static void
|
335
|
-
|
332
|
+
_noko_xml_xpath_context__generic_exception_pusher(void *data, const char *msg, ...)
|
336
333
|
{
|
337
334
|
VALUE rb_errors = (VALUE)data;
|
338
335
|
VALUE rb_message;
|
@@ -343,7 +340,7 @@ generic_exception_pusher(void *data, const char *msg, ...)
|
|
343
340
|
#ifdef TRUFFLERUBY_NOKOGIRI_SYSTEM_LIBRARIES
|
344
341
|
/* It is not currently possible to pass var args from native
|
345
342
|
functions to sulong, so we work around the issue here. */
|
346
|
-
rb_message = rb_sprintf("
|
343
|
+
rb_message = rb_sprintf("_noko_xml_xpath_context__generic_exception_pusher: %s", msg);
|
347
344
|
#else
|
348
345
|
va_list args;
|
349
346
|
va_start(args, msg);
|
@@ -365,58 +362,55 @@ generic_exception_pusher(void *data, const char *msg, ...)
|
|
365
362
|
* a +Float+, or a boolean.
|
366
363
|
*/
|
367
364
|
static VALUE
|
368
|
-
|
365
|
+
noko_xml_xpath_context_evaluate(int argc, VALUE *argv, VALUE rb_context)
|
369
366
|
{
|
370
|
-
VALUE search_path, xpath_handler;
|
371
|
-
VALUE retval = Qnil;
|
372
367
|
xmlXPathContextPtr c_context;
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
xmlXPathContext,
|
380
|
-
&xml_xpath_context_type,
|
381
|
-
c_context
|
382
|
-
);
|
368
|
+
VALUE rb_expression = Qnil;
|
369
|
+
VALUE rb_function_lookup_handler = Qnil;
|
370
|
+
xmlChar *c_expression_str = NULL;
|
371
|
+
VALUE rb_errors = rb_ary_new();
|
372
|
+
xmlXPathObjectPtr c_xpath_object;
|
373
|
+
VALUE rb_xpath_object = Qnil;
|
383
374
|
|
384
|
-
|
385
|
-
|
386
|
-
|
375
|
+
TypedData_Get_Struct(rb_context, xmlXPathContext, &_noko_xml_xpath_context_type, c_context);
|
376
|
+
|
377
|
+
rb_scan_args(argc, argv, "11", &rb_expression, &rb_function_lookup_handler);
|
387
378
|
|
388
|
-
|
379
|
+
c_expression_str = (xmlChar *)StringValueCStr(rb_expression);
|
389
380
|
|
390
|
-
if (Qnil !=
|
381
|
+
if (Qnil != rb_function_lookup_handler) {
|
391
382
|
/* FIXME: not sure if this is the correct place to shove private data. */
|
392
|
-
c_context->userData = (void *)
|
383
|
+
c_context->userData = (void *)rb_function_lookup_handler;
|
393
384
|
xmlXPathRegisterFuncLookup(
|
394
385
|
c_context,
|
395
|
-
|
396
|
-
(void *)
|
386
|
+
_noko_xml_xpath_context_handler_lookup,
|
387
|
+
(void *)rb_function_lookup_handler
|
397
388
|
);
|
398
389
|
}
|
399
390
|
|
400
|
-
|
401
|
-
|
391
|
+
/* TODO: use xmlXPathSetErrorHandler (as of 2.13.0) */
|
392
|
+
xmlSetStructuredErrorFunc((void *)rb_errors, noko__error_array_pusher);
|
393
|
+
xmlSetGenericErrorFunc((void *)rb_errors, _noko_xml_xpath_context__generic_exception_pusher);
|
402
394
|
|
403
|
-
|
395
|
+
c_xpath_object = xmlXPathEvalExpression(c_expression_str, c_context);
|
404
396
|
|
405
397
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
406
398
|
xmlSetGenericErrorFunc(NULL, NULL);
|
407
399
|
|
408
|
-
|
409
|
-
|
400
|
+
xmlXPathRegisterFuncLookup(c_context, NULL, NULL);
|
401
|
+
|
402
|
+
if (c_xpath_object == NULL) {
|
403
|
+
rb_exc_raise(rb_ary_entry(rb_errors, 0));
|
410
404
|
}
|
411
405
|
|
412
|
-
|
413
|
-
if (
|
414
|
-
|
406
|
+
rb_xpath_object = _noko_xml_xpath_context__xpath2ruby(c_xpath_object, c_context);
|
407
|
+
if (rb_xpath_object == Qundef) {
|
408
|
+
rb_xpath_object = noko_xml_node_set_wrap(NULL, DOC_RUBY_OBJECT(c_context->doc));
|
415
409
|
}
|
416
410
|
|
417
|
-
xmlXPathFreeNodeSetList(
|
411
|
+
xmlXPathFreeNodeSetList(c_xpath_object);
|
418
412
|
|
419
|
-
return
|
413
|
+
return rb_xpath_object;
|
420
414
|
}
|
421
415
|
|
422
416
|
/*
|
@@ -426,45 +420,53 @@ rb_xml_xpath_context_evaluate(int argc, VALUE *argv, VALUE rb_context)
|
|
426
420
|
* Create a new XPathContext with +node+ as the context node.
|
427
421
|
*/
|
428
422
|
static VALUE
|
429
|
-
|
423
|
+
noko_xml_xpath_context_new(VALUE klass, VALUE rb_node)
|
430
424
|
{
|
431
|
-
xmlNodePtr
|
425
|
+
xmlNodePtr c_node;
|
432
426
|
xmlXPathContextPtr c_context;
|
433
427
|
VALUE rb_context;
|
434
428
|
|
435
|
-
Noko_Node_Get_Struct(rb_node, xmlNode,
|
429
|
+
Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
|
436
430
|
|
437
431
|
#if LIBXML_VERSION < 21000
|
438
|
-
/* deprecated in 40483d0 */
|
439
|
-
xmlXPathInit();
|
432
|
+
xmlXPathInit(); /* deprecated in 40483d0 */
|
440
433
|
#endif
|
441
434
|
|
442
|
-
c_context = xmlXPathNewContext(
|
443
|
-
c_context->node =
|
435
|
+
c_context = xmlXPathNewContext(c_node->doc);
|
436
|
+
c_context->node = c_node;
|
444
437
|
|
445
438
|
xmlXPathRegisterNs(c_context, NOKOGIRI_PREFIX, NOKOGIRI_URI);
|
446
439
|
xmlXPathRegisterNs(c_context, NOKOGIRI_BUILTIN_PREFIX, NOKOGIRI_BUILTIN_URI);
|
447
|
-
xmlXPathRegisterFuncNS(
|
448
|
-
c_context,
|
449
|
-
(const xmlChar *)"css-class",
|
450
|
-
NOKOGIRI_BUILTIN_URI,
|
451
|
-
xpath_builtin_css_class
|
452
|
-
);
|
453
|
-
xmlXPathRegisterFuncNS(
|
454
|
-
c_context,
|
455
|
-
(const xmlChar *)"local-name-is",
|
456
|
-
NOKOGIRI_BUILTIN_URI,
|
457
|
-
xpath_builtin_local_name_is
|
458
|
-
);
|
459
440
|
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
441
|
+
xmlXPathRegisterFuncNS(c_context,
|
442
|
+
(const xmlChar *)"css-class", NOKOGIRI_BUILTIN_URI,
|
443
|
+
noko_xml_xpath_context_xpath_func_css_class);
|
444
|
+
xmlXPathRegisterFuncNS(c_context,
|
445
|
+
(const xmlChar *)"local-name-is", NOKOGIRI_BUILTIN_URI,
|
446
|
+
noko_xml_xpath_context_xpath_func_local_name_is);
|
447
|
+
|
448
|
+
rb_context = TypedData_Wrap_Struct(klass, &_noko_xml_xpath_context_type, c_context);
|
449
|
+
|
465
450
|
return rb_context;
|
466
451
|
}
|
467
452
|
|
453
|
+
|
454
|
+
/* :nodoc: */
|
455
|
+
static VALUE
|
456
|
+
noko_xml_xpath_context_set_node(VALUE rb_context, VALUE rb_node)
|
457
|
+
{
|
458
|
+
xmlNodePtr c_node;
|
459
|
+
xmlXPathContextPtr c_context;
|
460
|
+
|
461
|
+
TypedData_Get_Struct(rb_context, xmlXPathContext, &_noko_xml_xpath_context_type, c_context);
|
462
|
+
Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
|
463
|
+
|
464
|
+
c_context->doc = c_node->doc;
|
465
|
+
c_context->node = c_node;
|
466
|
+
|
467
|
+
return rb_node;
|
468
|
+
}
|
469
|
+
|
468
470
|
void
|
469
471
|
noko_init_xml_xpath_context(void)
|
470
472
|
{
|
@@ -475,9 +477,10 @@ noko_init_xml_xpath_context(void)
|
|
475
477
|
|
476
478
|
rb_undef_alloc_func(cNokogiriXmlXpathContext);
|
477
479
|
|
478
|
-
rb_define_singleton_method(cNokogiriXmlXpathContext, "new",
|
480
|
+
rb_define_singleton_method(cNokogiriXmlXpathContext, "new", noko_xml_xpath_context_new, 1);
|
479
481
|
|
480
|
-
rb_define_method(cNokogiriXmlXpathContext, "evaluate",
|
481
|
-
rb_define_method(cNokogiriXmlXpathContext, "register_variable",
|
482
|
-
rb_define_method(cNokogiriXmlXpathContext, "register_ns",
|
482
|
+
rb_define_method(cNokogiriXmlXpathContext, "evaluate", noko_xml_xpath_context_evaluate, -1);
|
483
|
+
rb_define_method(cNokogiriXmlXpathContext, "register_variable", noko_xml_xpath_context_register_variable, 2);
|
484
|
+
rb_define_method(cNokogiriXmlXpathContext, "register_ns", noko_xml_xpath_context_register_ns, 2);
|
485
|
+
rb_define_method(cNokogiriXmlXpathContext, "node=", noko_xml_xpath_context_set_node, 1);
|
483
486
|
}
|
@@ -1,6 +1,6 @@
|
|
1
1
|
#include <nokogiri.h>
|
2
2
|
|
3
|
-
VALUE cNokogiriXsltStylesheet
|
3
|
+
VALUE cNokogiriXsltStylesheet;
|
4
4
|
|
5
5
|
static void
|
6
6
|
mark(void *data)
|
@@ -18,8 +18,8 @@ dealloc(void *data)
|
|
18
18
|
ruby_xfree(wrapper);
|
19
19
|
}
|
20
20
|
|
21
|
-
static const rb_data_type_t
|
22
|
-
.wrap_struct_name = "
|
21
|
+
static const rb_data_type_t nokogiri_xslt_stylesheet_tuple_type = {
|
22
|
+
.wrap_struct_name = "nokogiriXsltStylesheetTuple",
|
23
23
|
.function = {
|
24
24
|
.dmark = mark,
|
25
25
|
.dfree = dealloc,
|
@@ -56,7 +56,7 @@ Nokogiri_wrap_xslt_stylesheet(xsltStylesheetPtr ss)
|
|
56
56
|
self = TypedData_Make_Struct(
|
57
57
|
cNokogiriXsltStylesheet,
|
58
58
|
nokogiriXsltStylesheetTuple,
|
59
|
-
&
|
59
|
+
&nokogiri_xslt_stylesheet_tuple_type,
|
60
60
|
wrapper
|
61
61
|
);
|
62
62
|
|
@@ -124,7 +124,7 @@ rb_xslt_stylesheet_serialize(VALUE self, VALUE xmlobj)
|
|
124
124
|
TypedData_Get_Struct(
|
125
125
|
self,
|
126
126
|
nokogiriXsltStylesheetTuple,
|
127
|
-
&
|
127
|
+
&nokogiri_xslt_stylesheet_tuple_type,
|
128
128
|
wrapper
|
129
129
|
);
|
130
130
|
xsltSaveResultToString(&doc_ptr, &doc_len, xml, wrapper->ss);
|
@@ -273,7 +273,7 @@ rb_xslt_stylesheet_transform(int argc, VALUE *argv, VALUE self)
|
|
273
273
|
Check_Type(rb_param, T_ARRAY);
|
274
274
|
|
275
275
|
c_document = noko_xml_document_unwrap(rb_document);
|
276
|
-
TypedData_Get_Struct(self, nokogiriXsltStylesheetTuple, &
|
276
|
+
TypedData_Get_Struct(self, nokogiriXsltStylesheetTuple, &nokogiri_xslt_stylesheet_tuple_type, wrapper);
|
277
277
|
|
278
278
|
param_len = RARRAY_LEN(rb_param);
|
279
279
|
params = ruby_xcalloc((size_t)param_len + 1, sizeof(char *));
|
@@ -362,7 +362,7 @@ initFunc(xsltTransformContextPtr ctxt, const xmlChar *uri)
|
|
362
362
|
TypedData_Get_Struct(
|
363
363
|
(VALUE)ctxt->style->_private,
|
364
364
|
nokogiriXsltStylesheetTuple,
|
365
|
-
&
|
365
|
+
&nokogiri_xslt_stylesheet_tuple_type,
|
366
366
|
wrapper
|
367
367
|
);
|
368
368
|
inst = rb_class_new_instance(0, NULL, obj);
|
@@ -380,7 +380,7 @@ shutdownFunc(xsltTransformContextPtr ctxt,
|
|
380
380
|
TypedData_Get_Struct(
|
381
381
|
(VALUE)ctxt->style->_private,
|
382
382
|
nokogiriXsltStylesheetTuple,
|
383
|
-
&
|
383
|
+
&nokogiri_xslt_stylesheet_tuple_type,
|
384
384
|
wrapper
|
385
385
|
);
|
386
386
|
|
data/gumbo-parser/src/ascii.c
CHANGED
@@ -44,8 +44,8 @@ const unsigned char _gumbo_ascii_table[0x80] = {
|
|
44
44
|
};
|
45
45
|
|
46
46
|
// Table generation code.
|
47
|
-
// clang -DGUMBO_GEN_TABLE
|
48
|
-
#
|
47
|
+
// clang -DGUMBO_GEN_TABLE ascii.c && ./a.out && rm a.out
|
48
|
+
#ifdef GUMBO_GEN_TABLE
|
49
49
|
#include <stdio.h>
|
50
50
|
|
51
51
|
int main() {
|
data/gumbo-parser/src/error.c
CHANGED
@@ -46,33 +46,40 @@ static int PRINTF(2) print_message (
|
|
46
46
|
args
|
47
47
|
);
|
48
48
|
va_end(args);
|
49
|
-
|
49
|
+
|
50
|
+
#if (defined(_MSC_VER) && (_MSC_VER < 1900)) || defined(_RUBY_MSVCRT)
|
50
51
|
if (bytes_written == -1) {
|
51
52
|
// vsnprintf returns -1 on older MSVC++ if there's not enough capacity,
|
52
53
|
// instead of returning the number of bytes that would've been written had
|
53
|
-
// there been enough. In this case, we
|
54
|
-
//
|
55
|
-
//
|
56
|
-
|
54
|
+
// there been enough. In this case, we can call vsnprintf() again but
|
55
|
+
// with a count of 0 to get the number of bytes written, not including
|
56
|
+
// the null terminator.
|
57
|
+
// https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/vsnprintf-vsnprintf-vsnprintf-l-vsnwprintf-vsnwprintf-l?view=msvc-140#behavior-summary
|
58
|
+
|
57
59
|
va_start(args, format);
|
58
|
-
|
59
|
-
|
60
|
-
|
60
|
+
bytes_written = vsnprintf (
|
61
|
+
NULL,
|
62
|
+
0,
|
61
63
|
format,
|
62
64
|
args
|
63
65
|
);
|
64
66
|
va_end(args);
|
65
|
-
return result == -1 ? 0 : result;
|
66
67
|
}
|
67
|
-
#
|
68
|
+
#endif
|
69
|
+
|
68
70
|
// -1 in standard C99 indicates an encoding error. Return 0 and do nothing.
|
69
71
|
if (bytes_written == -1) {
|
70
72
|
return 0;
|
71
73
|
}
|
72
|
-
#endif
|
73
74
|
|
74
75
|
if (bytes_written >= remaining_capacity) {
|
75
|
-
|
76
|
+
// At least double the size of the buffer.
|
77
|
+
size_t new_capacity = output->capacity * 2;
|
78
|
+
if (new_capacity < output->length + bytes_written + 1) {
|
79
|
+
// The +1 is for the null terminator.
|
80
|
+
new_capacity = output->length + bytes_written + 1;
|
81
|
+
}
|
82
|
+
gumbo_string_buffer_reserve(new_capacity, output);
|
76
83
|
remaining_capacity = output->capacity - output->length;
|
77
84
|
va_start(args, format);
|
78
85
|
bytes_written = vsnprintf (
|
@@ -96,8 +103,14 @@ static void print_tag_stack (
|
|
96
103
|
if (i) {
|
97
104
|
print_message(output, ", ");
|
98
105
|
}
|
99
|
-
|
100
|
-
|
106
|
+
uintptr_t tag = (uintptr_t) error->tag_stack.data[i];
|
107
|
+
const char* tag_name;
|
108
|
+
if (tag > GUMBO_TAG_UNKNOWN) {
|
109
|
+
tag_name = error->tag_stack.data[i];
|
110
|
+
} else {
|
111
|
+
tag_name = gumbo_normalized_tagname((GumboTag)tag);
|
112
|
+
}
|
113
|
+
print_message(output, "%s", tag_name);
|
101
114
|
}
|
102
115
|
gumbo_string_buffer_append_codepoint('.', output);
|
103
116
|
}
|
@@ -326,41 +339,45 @@ static void handle_parser_error (
|
|
326
339
|
}
|
327
340
|
|
328
341
|
switch (error->input_type) {
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
print_tag_stack(error, output);
|
352
|
-
}
|
353
|
-
return;
|
354
|
-
case GUMBO_TOKEN_START_TAG:
|
355
|
-
print_message(output, "Start tag '%s' isn't allowed here.",
|
356
|
-
gumbo_normalized_tagname(error->input_tag));
|
357
|
-
print_tag_stack(error, output);
|
358
|
-
return;
|
359
|
-
case GUMBO_TOKEN_END_TAG:
|
360
|
-
print_message(output, "End tag '%s' isn't allowed here.",
|
361
|
-
gumbo_normalized_tagname(error->input_tag));
|
342
|
+
case GUMBO_TOKEN_DOCTYPE:
|
343
|
+
print_message(output, "This is not a legal doctype");
|
344
|
+
return;
|
345
|
+
case GUMBO_TOKEN_COMMENT:
|
346
|
+
// Should never happen; comments are always legal.
|
347
|
+
assert(0);
|
348
|
+
// But just in case...
|
349
|
+
print_message(output, "Comments aren't legal here");
|
350
|
+
return;
|
351
|
+
case GUMBO_TOKEN_CDATA:
|
352
|
+
case GUMBO_TOKEN_WHITESPACE:
|
353
|
+
case GUMBO_TOKEN_CHARACTER:
|
354
|
+
print_message(output, "Character tokens aren't legal here");
|
355
|
+
return;
|
356
|
+
case GUMBO_TOKEN_NULL:
|
357
|
+
print_message(output, "Null bytes are not allowed in HTML5");
|
358
|
+
return;
|
359
|
+
case GUMBO_TOKEN_EOF:
|
360
|
+
if (error->parser_state == GUMBO_INSERTION_MODE_INITIAL) {
|
361
|
+
print_message(output, "You must provide a doctype");
|
362
|
+
} else {
|
363
|
+
print_message(output, "Premature end of file.");
|
362
364
|
print_tag_stack(error, output);
|
363
|
-
|
365
|
+
}
|
366
|
+
return;
|
367
|
+
case GUMBO_TOKEN_START_TAG:
|
368
|
+
case GUMBO_TOKEN_END_TAG:
|
369
|
+
{
|
370
|
+
const char* tag_name;
|
371
|
+
const char* which = error->input_type == GUMBO_TOKEN_START_TAG ? "Start" : "End";
|
372
|
+
if (error->input_name) {
|
373
|
+
tag_name = error->input_name;
|
374
|
+
} else {
|
375
|
+
tag_name = gumbo_normalized_tagname(error->input_tag);
|
376
|
+
}
|
377
|
+
print_message(output, "%s tag '%s' isn't allowed here.", which, tag_name);
|
378
|
+
print_tag_stack(error, output);
|
379
|
+
return;
|
380
|
+
}
|
364
381
|
}
|
365
382
|
}
|
366
383
|
|
@@ -613,6 +630,17 @@ void gumbo_print_caret_diagnostic (
|
|
613
630
|
|
614
631
|
void gumbo_error_destroy(GumboError* error) {
|
615
632
|
if (error->type == GUMBO_ERR_PARSER) {
|
633
|
+
// Free the tag name.
|
634
|
+
if (error->v.parser.input_name) {
|
635
|
+
gumbo_free(error->v.parser.input_name);
|
636
|
+
}
|
637
|
+
|
638
|
+
for (unsigned int i = 0; i < error->v.parser.tag_stack.length; ++i) {
|
639
|
+
intptr_t tag = (intptr_t) error->v.parser.tag_stack.data[i];
|
640
|
+
if (tag > GUMBO_TAG_UNKNOWN) {
|
641
|
+
gumbo_free(error->v.parser.tag_stack.data[i]);
|
642
|
+
}
|
643
|
+
}
|
616
644
|
gumbo_vector_destroy(&error->v.parser.tag_stack);
|
617
645
|
}
|
618
646
|
gumbo_free(error);
|
data/gumbo-parser/src/error.h
CHANGED
@@ -95,12 +95,16 @@ typedef struct GumboInternalParserError {
|
|
95
95
|
// The HTML tag of the input token. TAG_UNKNOWN if this was not a tag token.
|
96
96
|
GumboTag input_tag;
|
97
97
|
|
98
|
+
// The HTML tag of the input token if it was nonstandard tag token. NULL otherwise.
|
99
|
+
char *input_name;
|
100
|
+
|
98
101
|
// The insertion mode that the parser was in at the time.
|
99
102
|
GumboInsertionMode parser_state;
|
100
103
|
|
101
104
|
// The tag stack at the point of the error. Note that this is an GumboVector
|
102
105
|
// of GumboTag's *stored by value* - cast the void* to an GumboTag directly to
|
103
|
-
// get at the tag.
|
106
|
+
// get at the tag. For nonstandard tags, this is a pointer to an owned char *
|
107
|
+
// containing the tag name.
|
104
108
|
GumboVector /* GumboTag */ tag_stack;
|
105
109
|
} GumboParserError;
|
106
110
|
|
@@ -780,6 +780,15 @@ typedef struct GumboInternalOptions {
|
|
780
780
|
* Default: `false`.
|
781
781
|
*/
|
782
782
|
bool fragment_context_has_form_ancestor;
|
783
|
+
|
784
|
+
/**
|
785
|
+
* Parse `noscript` elements as if scripting was enabled. This causes the
|
786
|
+
* contents of the `noscript` element to be parsed as raw text, rather
|
787
|
+
* than as HTML elements.
|
788
|
+
*
|
789
|
+
* Default: `false`.
|
790
|
+
*/
|
791
|
+
bool parse_noscript_content_as_text;
|
783
792
|
} GumboOptions;
|
784
793
|
|
785
794
|
/** Default options struct; use this with gumbo_parse_with_options. */
|
@@ -791,7 +800,7 @@ extern const GumboOptions kGumboDefaultOptions;
|
|
791
800
|
*/
|
792
801
|
typedef enum {
|
793
802
|
/**
|
794
|
-
* Indicates that parsing completed
|
803
|
+
* Indicates that parsing completed successfully. The resulting tree
|
795
804
|
* will be a complete document.
|
796
805
|
*/
|
797
806
|
GUMBO_STATUS_OK,
|
@@ -841,7 +850,7 @@ typedef struct GumboInternalOutput {
|
|
841
850
|
GumboVector /* GumboError */ errors;
|
842
851
|
|
843
852
|
/**
|
844
|
-
* True if the parser
|
853
|
+
* True if the parser encountered an error.
|
845
854
|
*
|
846
855
|
* This can be true and `errors` an empty `GumboVector` if the `max_errors`
|
847
856
|
* option was set to 0.
|