nokogiri 1.16.8 → 1.18.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +11 -21
- data/LICENSE-DEPENDENCIES.md +6 -6
- data/README.md +8 -5
- data/dependencies.yml +6 -6
- data/ext/nokogiri/extconf.rb +188 -142
- data/ext/nokogiri/gumbo.c +69 -53
- data/ext/nokogiri/html4_document.c +10 -4
- data/ext/nokogiri/html4_element_description.c +18 -18
- data/ext/nokogiri/html4_sax_parser.c +40 -0
- data/ext/nokogiri/html4_sax_parser_context.c +48 -58
- data/ext/nokogiri/html4_sax_push_parser.c +25 -24
- data/ext/nokogiri/libxml2_polyfill.c +114 -0
- data/ext/nokogiri/nokogiri.c +9 -2
- data/ext/nokogiri/nokogiri.h +18 -33
- data/ext/nokogiri/xml_attr.c +1 -1
- data/ext/nokogiri/xml_cdata.c +2 -10
- data/ext/nokogiri/xml_comment.c +3 -8
- data/ext/nokogiri/xml_document.c +163 -156
- data/ext/nokogiri/xml_document_fragment.c +10 -25
- data/ext/nokogiri/xml_dtd.c +1 -1
- data/ext/nokogiri/xml_element_content.c +9 -9
- data/ext/nokogiri/xml_encoding_handler.c +4 -4
- data/ext/nokogiri/xml_namespace.c +6 -6
- data/ext/nokogiri/xml_node.c +134 -103
- data/ext/nokogiri/xml_node_set.c +46 -44
- data/ext/nokogiri/xml_reader.c +54 -58
- data/ext/nokogiri/xml_relax_ng.c +35 -56
- data/ext/nokogiri/xml_sax_parser.c +156 -88
- data/ext/nokogiri/xml_sax_parser_context.c +219 -131
- data/ext/nokogiri/xml_sax_push_parser.c +68 -49
- data/ext/nokogiri/xml_schema.c +50 -85
- data/ext/nokogiri/xml_syntax_error.c +19 -11
- data/ext/nokogiri/xml_text.c +2 -4
- data/ext/nokogiri/xml_xpath_context.c +103 -100
- data/ext/nokogiri/xslt_stylesheet.c +8 -8
- data/gumbo-parser/src/ascii.c +2 -2
- data/gumbo-parser/src/error.c +76 -48
- data/gumbo-parser/src/error.h +5 -1
- data/gumbo-parser/src/nokogiri_gumbo.h +11 -2
- data/gumbo-parser/src/parser.c +63 -25
- data/gumbo-parser/src/tokenizer.c +6 -6
- data/lib/nokogiri/class_resolver.rb +1 -1
- data/lib/nokogiri/css/node.rb +6 -2
- data/lib/nokogiri/css/parser.rb +6 -4
- data/lib/nokogiri/css/parser.y +2 -2
- data/lib/nokogiri/css/parser_extras.rb +6 -66
- data/lib/nokogiri/css/selector_cache.rb +38 -0
- data/lib/nokogiri/css/tokenizer.rb +4 -4
- data/lib/nokogiri/css/tokenizer.rex +9 -8
- data/lib/nokogiri/css/xpath_visitor.rb +43 -6
- data/lib/nokogiri/css.rb +86 -20
- data/lib/nokogiri/decorators/slop.rb +3 -5
- data/lib/nokogiri/encoding_handler.rb +2 -2
- data/lib/nokogiri/html4/document.rb +44 -23
- data/lib/nokogiri/html4/document_fragment.rb +124 -12
- data/lib/nokogiri/html4/encoding_reader.rb +1 -1
- data/lib/nokogiri/html4/sax/parser.rb +23 -38
- data/lib/nokogiri/html4/sax/parser_context.rb +4 -9
- data/lib/nokogiri/html4.rb +9 -14
- data/lib/nokogiri/html5/builder.rb +40 -0
- data/lib/nokogiri/html5/document.rb +61 -30
- data/lib/nokogiri/html5/document_fragment.rb +130 -20
- data/lib/nokogiri/html5/node.rb +4 -4
- data/lib/nokogiri/html5.rb +114 -72
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/xml/builder.rb +8 -1
- data/lib/nokogiri/xml/document.rb +70 -26
- data/lib/nokogiri/xml/document_fragment.rb +84 -13
- data/lib/nokogiri/xml/node.rb +82 -11
- data/lib/nokogiri/xml/node_set.rb +9 -7
- data/lib/nokogiri/xml/parse_options.rb +1 -1
- data/lib/nokogiri/xml/pp/node.rb +6 -1
- data/lib/nokogiri/xml/reader.rb +46 -13
- data/lib/nokogiri/xml/relax_ng.rb +57 -20
- data/lib/nokogiri/xml/sax/document.rb +174 -83
- data/lib/nokogiri/xml/sax/parser.rb +115 -41
- data/lib/nokogiri/xml/sax/parser_context.rb +116 -8
- data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
- data/lib/nokogiri/xml/sax.rb +48 -0
- data/lib/nokogiri/xml/schema.rb +112 -45
- data/lib/nokogiri/xml/searchable.rb +38 -42
- data/lib/nokogiri/xml/syntax_error.rb +22 -0
- data/lib/nokogiri/xml/xpath_context.rb +14 -3
- data/lib/nokogiri/xml.rb +13 -24
- data/lib/nokogiri/xslt.rb +3 -9
- data/lib/xsd/xmlparser/nokogiri.rb +3 -4
- data/patches/libxml2/0019-xpath-Use-separate-static-hash-table-for-standard-fu.patch +244 -0
- data/ports/archives/libxml2-2.13.5.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.42.tar.xz +0 -0
- metadata +13 -12
- data/ext/nokogiri/libxml2_backwards_compat.c +0 -121
- data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +0 -25
- data/ports/archives/libxml2-2.12.9.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
@@ -12,23 +12,23 @@ static const xmlChar *NOKOGIRI_BUILTIN_PREFIX = (const xmlChar *)"nokogiri-built
|
|
12
12
|
static const xmlChar *NOKOGIRI_BUILTIN_URI = (const xmlChar *)"https://www.nokogiri.org/default_ns/ruby/builtins";
|
13
13
|
|
14
14
|
static void
|
15
|
-
|
15
|
+
_noko_xml_xpath_context_dfree(void *data)
|
16
16
|
{
|
17
17
|
xmlXPathContextPtr c_context = data;
|
18
18
|
xmlXPathFreeContext(c_context);
|
19
19
|
}
|
20
20
|
|
21
|
-
static const rb_data_type_t
|
22
|
-
.wrap_struct_name = "
|
21
|
+
static const rb_data_type_t _noko_xml_xpath_context_type = {
|
22
|
+
.wrap_struct_name = "xmlXPathContext",
|
23
23
|
.function = {
|
24
|
-
.dfree =
|
24
|
+
.dfree = _noko_xml_xpath_context_dfree,
|
25
25
|
},
|
26
26
|
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
|
27
27
|
};
|
28
28
|
|
29
29
|
/* find a CSS class in an HTML element's `class` attribute */
|
30
30
|
static const xmlChar *
|
31
|
-
|
31
|
+
_noko_xml_xpath_context__css_class(const xmlChar *str, const xmlChar *val)
|
32
32
|
{
|
33
33
|
int val_len;
|
34
34
|
|
@@ -62,9 +62,9 @@ builtin_css_class(const xmlChar *str, const xmlChar *val)
|
|
62
62
|
return (NULL);
|
63
63
|
}
|
64
64
|
|
65
|
-
/* xmlXPathFunction to wrap
|
65
|
+
/* xmlXPathFunction to wrap _noko_xml_xpath_context__css_class() */
|
66
66
|
static void
|
67
|
-
|
67
|
+
noko_xml_xpath_context_xpath_func_css_class(xmlXPathParserContextPtr ctxt, int nargs)
|
68
68
|
{
|
69
69
|
xmlXPathObjectPtr hay, needle;
|
70
70
|
|
@@ -85,7 +85,7 @@ xpath_builtin_css_class(xmlXPathParserContextPtr ctxt, int nargs)
|
|
85
85
|
XP_ERROR(XPATH_INVALID_TYPE);
|
86
86
|
}
|
87
87
|
|
88
|
-
if (
|
88
|
+
if (_noko_xml_xpath_context__css_class(hay->stringval, needle->stringval)) {
|
89
89
|
valuePush(ctxt, xmlXPathNewBoolean(1));
|
90
90
|
} else {
|
91
91
|
valuePush(ctxt, xmlXPathNewBoolean(0));
|
@@ -99,7 +99,7 @@ xpath_builtin_css_class(xmlXPathParserContextPtr ctxt, int nargs)
|
|
99
99
|
/* xmlXPathFunction to select nodes whose local name matches, for HTML5 CSS queries that should
|
100
100
|
* ignore namespaces */
|
101
101
|
static void
|
102
|
-
|
102
|
+
noko_xml_xpath_context_xpath_func_local_name_is(xmlXPathParserContextPtr ctxt, int nargs)
|
103
103
|
{
|
104
104
|
xmlXPathObjectPtr element_name;
|
105
105
|
|
@@ -124,25 +124,26 @@ xpath_builtin_local_name_is(xmlXPathParserContextPtr ctxt, int nargs)
|
|
124
124
|
* register_ns(prefix, uri) → Nokogiri::XML::XPathContext
|
125
125
|
*
|
126
126
|
* Register the namespace with +prefix+ and +uri+ for use in future queries.
|
127
|
+
* Passing a uri of +nil+ will unregister the namespace.
|
127
128
|
*
|
128
129
|
* [Returns] +self+
|
129
130
|
*/
|
130
131
|
static VALUE
|
131
|
-
|
132
|
+
noko_xml_xpath_context_register_ns(VALUE rb_context, VALUE prefix, VALUE uri)
|
132
133
|
{
|
133
134
|
xmlXPathContextPtr c_context;
|
135
|
+
const xmlChar *ns_uri;
|
134
136
|
|
135
|
-
TypedData_Get_Struct(
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
137
|
+
TypedData_Get_Struct(rb_context, xmlXPathContext, &_noko_xml_xpath_context_type, c_context);
|
138
|
+
|
139
|
+
if (NIL_P(uri)) {
|
140
|
+
ns_uri = NULL;
|
141
|
+
} else {
|
142
|
+
ns_uri = (const xmlChar *)StringValueCStr(uri);
|
143
|
+
}
|
144
|
+
|
145
|
+
xmlXPathRegisterNs(c_context, (const xmlChar *)StringValueCStr(prefix), ns_uri);
|
141
146
|
|
142
|
-
xmlXPathRegisterNs(c_context,
|
143
|
-
(const xmlChar *)StringValueCStr(prefix),
|
144
|
-
(const xmlChar *)StringValueCStr(uri)
|
145
|
-
);
|
146
147
|
return rb_context;
|
147
148
|
}
|
148
149
|
|
@@ -151,29 +152,25 @@ rb_xml_xpath_context_register_ns(VALUE rb_context, VALUE prefix, VALUE uri)
|
|
151
152
|
* register_variable(name, value) → Nokogiri::XML::XPathContext
|
152
153
|
*
|
153
154
|
* Register the variable +name+ with +value+ for use in future queries.
|
155
|
+
* Passing a value of +nil+ will unregister the variable.
|
154
156
|
*
|
155
157
|
* [Returns] +self+
|
156
158
|
*/
|
157
159
|
static VALUE
|
158
|
-
|
160
|
+
noko_xml_xpath_context_register_variable(VALUE rb_context, VALUE name, VALUE value)
|
159
161
|
{
|
160
162
|
xmlXPathContextPtr c_context;
|
161
163
|
xmlXPathObjectPtr xmlValue;
|
162
164
|
|
163
|
-
TypedData_Get_Struct(
|
164
|
-
rb_context,
|
165
|
-
xmlXPathContext,
|
166
|
-
&xml_xpath_context_type,
|
167
|
-
c_context
|
168
|
-
);
|
165
|
+
TypedData_Get_Struct(rb_context, xmlXPathContext, &_noko_xml_xpath_context_type, c_context);
|
169
166
|
|
170
|
-
|
167
|
+
if (NIL_P(value)) {
|
168
|
+
xmlValue = NULL;
|
169
|
+
} else {
|
170
|
+
xmlValue = xmlXPathNewCString(StringValueCStr(value));
|
171
|
+
}
|
171
172
|
|
172
|
-
xmlXPathRegisterVariable(
|
173
|
-
c_context,
|
174
|
-
(const xmlChar *)StringValueCStr(name),
|
175
|
-
xmlValue
|
176
|
-
);
|
173
|
+
xmlXPathRegisterVariable(c_context, (const xmlChar *)StringValueCStr(name), xmlValue);
|
177
174
|
|
178
175
|
return rb_context;
|
179
176
|
}
|
@@ -184,7 +181,7 @@ rb_xml_xpath_context_register_variable(VALUE rb_context, VALUE name, VALUE value
|
|
184
181
|
* returns Qundef if no conversion was possible.
|
185
182
|
*/
|
186
183
|
static VALUE
|
187
|
-
|
184
|
+
_noko_xml_xpath_context__xpath2ruby(xmlXPathObjectPtr c_xpath_object, xmlXPathContextPtr c_context)
|
188
185
|
{
|
189
186
|
VALUE rb_retval;
|
190
187
|
|
@@ -238,7 +235,7 @@ Nokogiri_marshal_xpath_funcall_and_return_values(
|
|
238
235
|
|
239
236
|
for (int j = argc - 1 ; j >= 0 ; --j) {
|
240
237
|
c_xpath_object = valuePop(ctxt);
|
241
|
-
argv[j] =
|
238
|
+
argv[j] = _noko_xml_xpath_context__xpath2ruby(c_xpath_object, ctxt->context);
|
242
239
|
if (argv[j] == Qundef) {
|
243
240
|
argv[j] = NOKOGIRI_STR_NEW2(xmlXPathCastToString(c_xpath_object));
|
244
241
|
}
|
@@ -294,7 +291,7 @@ Nokogiri_marshal_xpath_funcall_and_return_values(
|
|
294
291
|
}
|
295
292
|
|
296
293
|
static void
|
297
|
-
|
294
|
+
_noko_xml_xpath_context__handler_invoker(xmlXPathParserContextPtr ctxt, int argc)
|
298
295
|
{
|
299
296
|
VALUE rb_xpath_handler = Qnil;
|
300
297
|
const char *method_name = NULL ;
|
@@ -316,15 +313,15 @@ method_caller(xmlXPathParserContextPtr ctxt, int argc)
|
|
316
313
|
}
|
317
314
|
|
318
315
|
static xmlXPathFunction
|
319
|
-
|
316
|
+
_noko_xml_xpath_context_handler_lookup(void *data, const xmlChar *c_name, const xmlChar *c_ns_uri)
|
320
317
|
{
|
321
318
|
VALUE rb_handler = (VALUE)data;
|
322
319
|
if (rb_respond_to(rb_handler, rb_intern((const char *)c_name))) {
|
323
320
|
if (c_ns_uri == NULL) {
|
324
321
|
NOKO_WARN_DEPRECATION("A custom XPath or CSS handler function named '%s' is being invoked without a namespace. Please update your query to reference this function as 'nokogiri:%s'. Invoking custom handler functions without a namespace is deprecated and will become an error in Nokogiri v1.17.0.",
|
325
|
-
c_name, c_name); // deprecated in v1.15.0, remove in v1.
|
322
|
+
c_name, c_name); // TODO deprecated in v1.15.0, remove in v1.19.0
|
326
323
|
}
|
327
|
-
return
|
324
|
+
return _noko_xml_xpath_context__handler_invoker;
|
328
325
|
}
|
329
326
|
|
330
327
|
return NULL;
|
@@ -332,7 +329,7 @@ handler_lookup(void *data, const xmlChar *c_name, const xmlChar *c_ns_uri)
|
|
332
329
|
|
333
330
|
PRINTFLIKE_DECL(2, 3)
|
334
331
|
static void
|
335
|
-
|
332
|
+
_noko_xml_xpath_context__generic_exception_pusher(void *data, const char *msg, ...)
|
336
333
|
{
|
337
334
|
VALUE rb_errors = (VALUE)data;
|
338
335
|
VALUE rb_message;
|
@@ -343,7 +340,7 @@ generic_exception_pusher(void *data, const char *msg, ...)
|
|
343
340
|
#ifdef TRUFFLERUBY_NOKOGIRI_SYSTEM_LIBRARIES
|
344
341
|
/* It is not currently possible to pass var args from native
|
345
342
|
functions to sulong, so we work around the issue here. */
|
346
|
-
rb_message = rb_sprintf("
|
343
|
+
rb_message = rb_sprintf("_noko_xml_xpath_context__generic_exception_pusher: %s", msg);
|
347
344
|
#else
|
348
345
|
va_list args;
|
349
346
|
va_start(args, msg);
|
@@ -365,58 +362,55 @@ generic_exception_pusher(void *data, const char *msg, ...)
|
|
365
362
|
* a +Float+, or a boolean.
|
366
363
|
*/
|
367
364
|
static VALUE
|
368
|
-
|
365
|
+
noko_xml_xpath_context_evaluate(int argc, VALUE *argv, VALUE rb_context)
|
369
366
|
{
|
370
|
-
VALUE search_path, xpath_handler;
|
371
|
-
VALUE retval = Qnil;
|
372
367
|
xmlXPathContextPtr c_context;
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
xmlXPathContext,
|
380
|
-
&xml_xpath_context_type,
|
381
|
-
c_context
|
382
|
-
);
|
368
|
+
VALUE rb_expression = Qnil;
|
369
|
+
VALUE rb_function_lookup_handler = Qnil;
|
370
|
+
xmlChar *c_expression_str = NULL;
|
371
|
+
VALUE rb_errors = rb_ary_new();
|
372
|
+
xmlXPathObjectPtr c_xpath_object;
|
373
|
+
VALUE rb_xpath_object = Qnil;
|
383
374
|
|
384
|
-
|
385
|
-
|
386
|
-
|
375
|
+
TypedData_Get_Struct(rb_context, xmlXPathContext, &_noko_xml_xpath_context_type, c_context);
|
376
|
+
|
377
|
+
rb_scan_args(argc, argv, "11", &rb_expression, &rb_function_lookup_handler);
|
387
378
|
|
388
|
-
|
379
|
+
c_expression_str = (xmlChar *)StringValueCStr(rb_expression);
|
389
380
|
|
390
|
-
if (Qnil !=
|
381
|
+
if (Qnil != rb_function_lookup_handler) {
|
391
382
|
/* FIXME: not sure if this is the correct place to shove private data. */
|
392
|
-
c_context->userData = (void *)
|
383
|
+
c_context->userData = (void *)rb_function_lookup_handler;
|
393
384
|
xmlXPathRegisterFuncLookup(
|
394
385
|
c_context,
|
395
|
-
|
396
|
-
(void *)
|
386
|
+
_noko_xml_xpath_context_handler_lookup,
|
387
|
+
(void *)rb_function_lookup_handler
|
397
388
|
);
|
398
389
|
}
|
399
390
|
|
400
|
-
|
401
|
-
|
391
|
+
/* TODO: use xmlXPathSetErrorHandler (as of 2.13.0) */
|
392
|
+
xmlSetStructuredErrorFunc((void *)rb_errors, noko__error_array_pusher);
|
393
|
+
xmlSetGenericErrorFunc((void *)rb_errors, _noko_xml_xpath_context__generic_exception_pusher);
|
402
394
|
|
403
|
-
|
395
|
+
c_xpath_object = xmlXPathEvalExpression(c_expression_str, c_context);
|
404
396
|
|
405
397
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
406
398
|
xmlSetGenericErrorFunc(NULL, NULL);
|
407
399
|
|
408
|
-
|
409
|
-
|
400
|
+
xmlXPathRegisterFuncLookup(c_context, NULL, NULL);
|
401
|
+
|
402
|
+
if (c_xpath_object == NULL) {
|
403
|
+
rb_exc_raise(rb_ary_entry(rb_errors, 0));
|
410
404
|
}
|
411
405
|
|
412
|
-
|
413
|
-
if (
|
414
|
-
|
406
|
+
rb_xpath_object = _noko_xml_xpath_context__xpath2ruby(c_xpath_object, c_context);
|
407
|
+
if (rb_xpath_object == Qundef) {
|
408
|
+
rb_xpath_object = noko_xml_node_set_wrap(NULL, DOC_RUBY_OBJECT(c_context->doc));
|
415
409
|
}
|
416
410
|
|
417
|
-
xmlXPathFreeNodeSetList(
|
411
|
+
xmlXPathFreeNodeSetList(c_xpath_object);
|
418
412
|
|
419
|
-
return
|
413
|
+
return rb_xpath_object;
|
420
414
|
}
|
421
415
|
|
422
416
|
/*
|
@@ -426,45 +420,53 @@ rb_xml_xpath_context_evaluate(int argc, VALUE *argv, VALUE rb_context)
|
|
426
420
|
* Create a new XPathContext with +node+ as the context node.
|
427
421
|
*/
|
428
422
|
static VALUE
|
429
|
-
|
423
|
+
noko_xml_xpath_context_new(VALUE klass, VALUE rb_node)
|
430
424
|
{
|
431
|
-
xmlNodePtr
|
425
|
+
xmlNodePtr c_node;
|
432
426
|
xmlXPathContextPtr c_context;
|
433
427
|
VALUE rb_context;
|
434
428
|
|
435
|
-
Noko_Node_Get_Struct(rb_node, xmlNode,
|
429
|
+
Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
|
436
430
|
|
437
431
|
#if LIBXML_VERSION < 21000
|
438
|
-
/* deprecated in 40483d0 */
|
439
|
-
xmlXPathInit();
|
432
|
+
xmlXPathInit(); /* deprecated in 40483d0 */
|
440
433
|
#endif
|
441
434
|
|
442
|
-
c_context = xmlXPathNewContext(
|
443
|
-
c_context->node =
|
435
|
+
c_context = xmlXPathNewContext(c_node->doc);
|
436
|
+
c_context->node = c_node;
|
444
437
|
|
445
438
|
xmlXPathRegisterNs(c_context, NOKOGIRI_PREFIX, NOKOGIRI_URI);
|
446
439
|
xmlXPathRegisterNs(c_context, NOKOGIRI_BUILTIN_PREFIX, NOKOGIRI_BUILTIN_URI);
|
447
|
-
xmlXPathRegisterFuncNS(
|
448
|
-
c_context,
|
449
|
-
(const xmlChar *)"css-class",
|
450
|
-
NOKOGIRI_BUILTIN_URI,
|
451
|
-
xpath_builtin_css_class
|
452
|
-
);
|
453
|
-
xmlXPathRegisterFuncNS(
|
454
|
-
c_context,
|
455
|
-
(const xmlChar *)"local-name-is",
|
456
|
-
NOKOGIRI_BUILTIN_URI,
|
457
|
-
xpath_builtin_local_name_is
|
458
|
-
);
|
459
440
|
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
441
|
+
xmlXPathRegisterFuncNS(c_context,
|
442
|
+
(const xmlChar *)"css-class", NOKOGIRI_BUILTIN_URI,
|
443
|
+
noko_xml_xpath_context_xpath_func_css_class);
|
444
|
+
xmlXPathRegisterFuncNS(c_context,
|
445
|
+
(const xmlChar *)"local-name-is", NOKOGIRI_BUILTIN_URI,
|
446
|
+
noko_xml_xpath_context_xpath_func_local_name_is);
|
447
|
+
|
448
|
+
rb_context = TypedData_Wrap_Struct(klass, &_noko_xml_xpath_context_type, c_context);
|
449
|
+
|
465
450
|
return rb_context;
|
466
451
|
}
|
467
452
|
|
453
|
+
|
454
|
+
/* :nodoc: */
|
455
|
+
static VALUE
|
456
|
+
noko_xml_xpath_context_set_node(VALUE rb_context, VALUE rb_node)
|
457
|
+
{
|
458
|
+
xmlNodePtr c_node;
|
459
|
+
xmlXPathContextPtr c_context;
|
460
|
+
|
461
|
+
TypedData_Get_Struct(rb_context, xmlXPathContext, &_noko_xml_xpath_context_type, c_context);
|
462
|
+
Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
|
463
|
+
|
464
|
+
c_context->doc = c_node->doc;
|
465
|
+
c_context->node = c_node;
|
466
|
+
|
467
|
+
return rb_node;
|
468
|
+
}
|
469
|
+
|
468
470
|
void
|
469
471
|
noko_init_xml_xpath_context(void)
|
470
472
|
{
|
@@ -475,9 +477,10 @@ noko_init_xml_xpath_context(void)
|
|
475
477
|
|
476
478
|
rb_undef_alloc_func(cNokogiriXmlXpathContext);
|
477
479
|
|
478
|
-
rb_define_singleton_method(cNokogiriXmlXpathContext, "new",
|
480
|
+
rb_define_singleton_method(cNokogiriXmlXpathContext, "new", noko_xml_xpath_context_new, 1);
|
479
481
|
|
480
|
-
rb_define_method(cNokogiriXmlXpathContext, "evaluate",
|
481
|
-
rb_define_method(cNokogiriXmlXpathContext, "register_variable",
|
482
|
-
rb_define_method(cNokogiriXmlXpathContext, "register_ns",
|
482
|
+
rb_define_method(cNokogiriXmlXpathContext, "evaluate", noko_xml_xpath_context_evaluate, -1);
|
483
|
+
rb_define_method(cNokogiriXmlXpathContext, "register_variable", noko_xml_xpath_context_register_variable, 2);
|
484
|
+
rb_define_method(cNokogiriXmlXpathContext, "register_ns", noko_xml_xpath_context_register_ns, 2);
|
485
|
+
rb_define_method(cNokogiriXmlXpathContext, "node=", noko_xml_xpath_context_set_node, 1);
|
483
486
|
}
|
@@ -1,6 +1,6 @@
|
|
1
1
|
#include <nokogiri.h>
|
2
2
|
|
3
|
-
VALUE cNokogiriXsltStylesheet
|
3
|
+
VALUE cNokogiriXsltStylesheet;
|
4
4
|
|
5
5
|
static void
|
6
6
|
mark(void *data)
|
@@ -18,8 +18,8 @@ dealloc(void *data)
|
|
18
18
|
ruby_xfree(wrapper);
|
19
19
|
}
|
20
20
|
|
21
|
-
static const rb_data_type_t
|
22
|
-
.wrap_struct_name = "
|
21
|
+
static const rb_data_type_t nokogiri_xslt_stylesheet_tuple_type = {
|
22
|
+
.wrap_struct_name = "nokogiriXsltStylesheetTuple",
|
23
23
|
.function = {
|
24
24
|
.dmark = mark,
|
25
25
|
.dfree = dealloc,
|
@@ -56,7 +56,7 @@ Nokogiri_wrap_xslt_stylesheet(xsltStylesheetPtr ss)
|
|
56
56
|
self = TypedData_Make_Struct(
|
57
57
|
cNokogiriXsltStylesheet,
|
58
58
|
nokogiriXsltStylesheetTuple,
|
59
|
-
&
|
59
|
+
&nokogiri_xslt_stylesheet_tuple_type,
|
60
60
|
wrapper
|
61
61
|
);
|
62
62
|
|
@@ -124,7 +124,7 @@ rb_xslt_stylesheet_serialize(VALUE self, VALUE xmlobj)
|
|
124
124
|
TypedData_Get_Struct(
|
125
125
|
self,
|
126
126
|
nokogiriXsltStylesheetTuple,
|
127
|
-
&
|
127
|
+
&nokogiri_xslt_stylesheet_tuple_type,
|
128
128
|
wrapper
|
129
129
|
);
|
130
130
|
xsltSaveResultToString(&doc_ptr, &doc_len, xml, wrapper->ss);
|
@@ -273,7 +273,7 @@ rb_xslt_stylesheet_transform(int argc, VALUE *argv, VALUE self)
|
|
273
273
|
Check_Type(rb_param, T_ARRAY);
|
274
274
|
|
275
275
|
c_document = noko_xml_document_unwrap(rb_document);
|
276
|
-
TypedData_Get_Struct(self, nokogiriXsltStylesheetTuple, &
|
276
|
+
TypedData_Get_Struct(self, nokogiriXsltStylesheetTuple, &nokogiri_xslt_stylesheet_tuple_type, wrapper);
|
277
277
|
|
278
278
|
param_len = RARRAY_LEN(rb_param);
|
279
279
|
params = ruby_xcalloc((size_t)param_len + 1, sizeof(char *));
|
@@ -362,7 +362,7 @@ initFunc(xsltTransformContextPtr ctxt, const xmlChar *uri)
|
|
362
362
|
TypedData_Get_Struct(
|
363
363
|
(VALUE)ctxt->style->_private,
|
364
364
|
nokogiriXsltStylesheetTuple,
|
365
|
-
&
|
365
|
+
&nokogiri_xslt_stylesheet_tuple_type,
|
366
366
|
wrapper
|
367
367
|
);
|
368
368
|
inst = rb_class_new_instance(0, NULL, obj);
|
@@ -380,7 +380,7 @@ shutdownFunc(xsltTransformContextPtr ctxt,
|
|
380
380
|
TypedData_Get_Struct(
|
381
381
|
(VALUE)ctxt->style->_private,
|
382
382
|
nokogiriXsltStylesheetTuple,
|
383
|
-
&
|
383
|
+
&nokogiri_xslt_stylesheet_tuple_type,
|
384
384
|
wrapper
|
385
385
|
);
|
386
386
|
|
data/gumbo-parser/src/ascii.c
CHANGED
@@ -44,8 +44,8 @@ const unsigned char _gumbo_ascii_table[0x80] = {
|
|
44
44
|
};
|
45
45
|
|
46
46
|
// Table generation code.
|
47
|
-
// clang -DGUMBO_GEN_TABLE
|
48
|
-
#
|
47
|
+
// clang -DGUMBO_GEN_TABLE ascii.c && ./a.out && rm a.out
|
48
|
+
#ifdef GUMBO_GEN_TABLE
|
49
49
|
#include <stdio.h>
|
50
50
|
|
51
51
|
int main() {
|
data/gumbo-parser/src/error.c
CHANGED
@@ -46,33 +46,40 @@ static int PRINTF(2) print_message (
|
|
46
46
|
args
|
47
47
|
);
|
48
48
|
va_end(args);
|
49
|
-
|
49
|
+
|
50
|
+
#if (defined(_MSC_VER) && (_MSC_VER < 1900)) || defined(_RUBY_MSVCRT)
|
50
51
|
if (bytes_written == -1) {
|
51
52
|
// vsnprintf returns -1 on older MSVC++ if there's not enough capacity,
|
52
53
|
// instead of returning the number of bytes that would've been written had
|
53
|
-
// there been enough. In this case, we
|
54
|
-
//
|
55
|
-
//
|
56
|
-
|
54
|
+
// there been enough. In this case, we can call vsnprintf() again but
|
55
|
+
// with a count of 0 to get the number of bytes written, not including
|
56
|
+
// the null terminator.
|
57
|
+
// https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/vsnprintf-vsnprintf-vsnprintf-l-vsnwprintf-vsnwprintf-l?view=msvc-140#behavior-summary
|
58
|
+
|
57
59
|
va_start(args, format);
|
58
|
-
|
59
|
-
|
60
|
-
|
60
|
+
bytes_written = vsnprintf (
|
61
|
+
NULL,
|
62
|
+
0,
|
61
63
|
format,
|
62
64
|
args
|
63
65
|
);
|
64
66
|
va_end(args);
|
65
|
-
return result == -1 ? 0 : result;
|
66
67
|
}
|
67
|
-
#
|
68
|
+
#endif
|
69
|
+
|
68
70
|
// -1 in standard C99 indicates an encoding error. Return 0 and do nothing.
|
69
71
|
if (bytes_written == -1) {
|
70
72
|
return 0;
|
71
73
|
}
|
72
|
-
#endif
|
73
74
|
|
74
75
|
if (bytes_written >= remaining_capacity) {
|
75
|
-
|
76
|
+
// At least double the size of the buffer.
|
77
|
+
size_t new_capacity = output->capacity * 2;
|
78
|
+
if (new_capacity < output->length + bytes_written + 1) {
|
79
|
+
// The +1 is for the null terminator.
|
80
|
+
new_capacity = output->length + bytes_written + 1;
|
81
|
+
}
|
82
|
+
gumbo_string_buffer_reserve(new_capacity, output);
|
76
83
|
remaining_capacity = output->capacity - output->length;
|
77
84
|
va_start(args, format);
|
78
85
|
bytes_written = vsnprintf (
|
@@ -96,8 +103,14 @@ static void print_tag_stack (
|
|
96
103
|
if (i) {
|
97
104
|
print_message(output, ", ");
|
98
105
|
}
|
99
|
-
|
100
|
-
|
106
|
+
uintptr_t tag = (uintptr_t) error->tag_stack.data[i];
|
107
|
+
const char* tag_name;
|
108
|
+
if (tag > GUMBO_TAG_UNKNOWN) {
|
109
|
+
tag_name = error->tag_stack.data[i];
|
110
|
+
} else {
|
111
|
+
tag_name = gumbo_normalized_tagname((GumboTag)tag);
|
112
|
+
}
|
113
|
+
print_message(output, "%s", tag_name);
|
101
114
|
}
|
102
115
|
gumbo_string_buffer_append_codepoint('.', output);
|
103
116
|
}
|
@@ -326,41 +339,45 @@ static void handle_parser_error (
|
|
326
339
|
}
|
327
340
|
|
328
341
|
switch (error->input_type) {
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
print_tag_stack(error, output);
|
352
|
-
}
|
353
|
-
return;
|
354
|
-
case GUMBO_TOKEN_START_TAG:
|
355
|
-
print_message(output, "Start tag '%s' isn't allowed here.",
|
356
|
-
gumbo_normalized_tagname(error->input_tag));
|
357
|
-
print_tag_stack(error, output);
|
358
|
-
return;
|
359
|
-
case GUMBO_TOKEN_END_TAG:
|
360
|
-
print_message(output, "End tag '%s' isn't allowed here.",
|
361
|
-
gumbo_normalized_tagname(error->input_tag));
|
342
|
+
case GUMBO_TOKEN_DOCTYPE:
|
343
|
+
print_message(output, "This is not a legal doctype");
|
344
|
+
return;
|
345
|
+
case GUMBO_TOKEN_COMMENT:
|
346
|
+
// Should never happen; comments are always legal.
|
347
|
+
assert(0);
|
348
|
+
// But just in case...
|
349
|
+
print_message(output, "Comments aren't legal here");
|
350
|
+
return;
|
351
|
+
case GUMBO_TOKEN_CDATA:
|
352
|
+
case GUMBO_TOKEN_WHITESPACE:
|
353
|
+
case GUMBO_TOKEN_CHARACTER:
|
354
|
+
print_message(output, "Character tokens aren't legal here");
|
355
|
+
return;
|
356
|
+
case GUMBO_TOKEN_NULL:
|
357
|
+
print_message(output, "Null bytes are not allowed in HTML5");
|
358
|
+
return;
|
359
|
+
case GUMBO_TOKEN_EOF:
|
360
|
+
if (error->parser_state == GUMBO_INSERTION_MODE_INITIAL) {
|
361
|
+
print_message(output, "You must provide a doctype");
|
362
|
+
} else {
|
363
|
+
print_message(output, "Premature end of file.");
|
362
364
|
print_tag_stack(error, output);
|
363
|
-
|
365
|
+
}
|
366
|
+
return;
|
367
|
+
case GUMBO_TOKEN_START_TAG:
|
368
|
+
case GUMBO_TOKEN_END_TAG:
|
369
|
+
{
|
370
|
+
const char* tag_name;
|
371
|
+
const char* which = error->input_type == GUMBO_TOKEN_START_TAG ? "Start" : "End";
|
372
|
+
if (error->input_name) {
|
373
|
+
tag_name = error->input_name;
|
374
|
+
} else {
|
375
|
+
tag_name = gumbo_normalized_tagname(error->input_tag);
|
376
|
+
}
|
377
|
+
print_message(output, "%s tag '%s' isn't allowed here.", which, tag_name);
|
378
|
+
print_tag_stack(error, output);
|
379
|
+
return;
|
380
|
+
}
|
364
381
|
}
|
365
382
|
}
|
366
383
|
|
@@ -613,6 +630,17 @@ void gumbo_print_caret_diagnostic (
|
|
613
630
|
|
614
631
|
void gumbo_error_destroy(GumboError* error) {
|
615
632
|
if (error->type == GUMBO_ERR_PARSER) {
|
633
|
+
// Free the tag name.
|
634
|
+
if (error->v.parser.input_name) {
|
635
|
+
gumbo_free(error->v.parser.input_name);
|
636
|
+
}
|
637
|
+
|
638
|
+
for (unsigned int i = 0; i < error->v.parser.tag_stack.length; ++i) {
|
639
|
+
intptr_t tag = (intptr_t) error->v.parser.tag_stack.data[i];
|
640
|
+
if (tag > GUMBO_TAG_UNKNOWN) {
|
641
|
+
gumbo_free(error->v.parser.tag_stack.data[i]);
|
642
|
+
}
|
643
|
+
}
|
616
644
|
gumbo_vector_destroy(&error->v.parser.tag_stack);
|
617
645
|
}
|
618
646
|
gumbo_free(error);
|
data/gumbo-parser/src/error.h
CHANGED
@@ -95,12 +95,16 @@ typedef struct GumboInternalParserError {
|
|
95
95
|
// The HTML tag of the input token. TAG_UNKNOWN if this was not a tag token.
|
96
96
|
GumboTag input_tag;
|
97
97
|
|
98
|
+
// The HTML tag of the input token if it was nonstandard tag token. NULL otherwise.
|
99
|
+
char *input_name;
|
100
|
+
|
98
101
|
// The insertion mode that the parser was in at the time.
|
99
102
|
GumboInsertionMode parser_state;
|
100
103
|
|
101
104
|
// The tag stack at the point of the error. Note that this is an GumboVector
|
102
105
|
// of GumboTag's *stored by value* - cast the void* to an GumboTag directly to
|
103
|
-
// get at the tag.
|
106
|
+
// get at the tag. For nonstandard tags, this is a pointer to an owned char *
|
107
|
+
// containing the tag name.
|
104
108
|
GumboVector /* GumboTag */ tag_stack;
|
105
109
|
} GumboParserError;
|
106
110
|
|
@@ -780,6 +780,15 @@ typedef struct GumboInternalOptions {
|
|
780
780
|
* Default: `false`.
|
781
781
|
*/
|
782
782
|
bool fragment_context_has_form_ancestor;
|
783
|
+
|
784
|
+
/**
|
785
|
+
* Parse `noscript` elements as if scripting was enabled. This causes the
|
786
|
+
* contents of the `noscript` element to be parsed as raw text, rather
|
787
|
+
* than as HTML elements.
|
788
|
+
*
|
789
|
+
* Default: `false`.
|
790
|
+
*/
|
791
|
+
bool parse_noscript_content_as_text;
|
783
792
|
} GumboOptions;
|
784
793
|
|
785
794
|
/** Default options struct; use this with gumbo_parse_with_options. */
|
@@ -791,7 +800,7 @@ extern const GumboOptions kGumboDefaultOptions;
|
|
791
800
|
*/
|
792
801
|
typedef enum {
|
793
802
|
/**
|
794
|
-
* Indicates that parsing completed
|
803
|
+
* Indicates that parsing completed successfully. The resulting tree
|
795
804
|
* will be a complete document.
|
796
805
|
*/
|
797
806
|
GUMBO_STATUS_OK,
|
@@ -841,7 +850,7 @@ typedef struct GumboInternalOutput {
|
|
841
850
|
GumboVector /* GumboError */ errors;
|
842
851
|
|
843
852
|
/**
|
844
|
-
* True if the parser
|
853
|
+
* True if the parser encountered an error.
|
845
854
|
*
|
846
855
|
* This can be true and `errors` an empty `GumboVector` if the `max_errors`
|
847
856
|
* option was set to 0.
|