nokogiri 1.15.5 → 1.18.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +12 -17
  3. data/LICENSE-DEPENDENCIES.md +6 -6
  4. data/README.md +11 -5
  5. data/dependencies.yml +9 -9
  6. data/ext/nokogiri/extconf.rb +191 -154
  7. data/ext/nokogiri/gumbo.c +69 -53
  8. data/ext/nokogiri/html4_document.c +10 -4
  9. data/ext/nokogiri/html4_element_description.c +18 -18
  10. data/ext/nokogiri/html4_sax_parser.c +40 -0
  11. data/ext/nokogiri/html4_sax_parser_context.c +48 -58
  12. data/ext/nokogiri/html4_sax_push_parser.c +26 -25
  13. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  14. data/ext/nokogiri/nokogiri.c +9 -2
  15. data/ext/nokogiri/nokogiri.h +25 -33
  16. data/ext/nokogiri/test_global_handlers.c +1 -1
  17. data/ext/nokogiri/xml_attr.c +1 -1
  18. data/ext/nokogiri/xml_cdata.c +3 -12
  19. data/ext/nokogiri/xml_comment.c +3 -8
  20. data/ext/nokogiri/xml_document.c +167 -156
  21. data/ext/nokogiri/xml_document_fragment.c +10 -25
  22. data/ext/nokogiri/xml_dtd.c +1 -1
  23. data/ext/nokogiri/xml_element_content.c +9 -9
  24. data/ext/nokogiri/xml_encoding_handler.c +4 -4
  25. data/ext/nokogiri/xml_namespace.c +6 -10
  26. data/ext/nokogiri/xml_node.c +142 -108
  27. data/ext/nokogiri/xml_node_set.c +46 -44
  28. data/ext/nokogiri/xml_reader.c +74 -100
  29. data/ext/nokogiri/xml_relax_ng.c +35 -56
  30. data/ext/nokogiri/xml_sax_parser.c +156 -88
  31. data/ext/nokogiri/xml_sax_parser_context.c +220 -128
  32. data/ext/nokogiri/xml_sax_push_parser.c +69 -50
  33. data/ext/nokogiri/xml_schema.c +51 -87
  34. data/ext/nokogiri/xml_syntax_error.c +19 -11
  35. data/ext/nokogiri/xml_text.c +3 -6
  36. data/ext/nokogiri/xml_xpath_context.c +104 -104
  37. data/ext/nokogiri/xslt_stylesheet.c +16 -11
  38. data/gumbo-parser/Makefile +18 -0
  39. data/gumbo-parser/src/ascii.c +2 -2
  40. data/gumbo-parser/src/error.c +76 -48
  41. data/gumbo-parser/src/error.h +5 -1
  42. data/gumbo-parser/src/nokogiri_gumbo.h +11 -2
  43. data/gumbo-parser/src/parser.c +66 -25
  44. data/gumbo-parser/src/tokenizer.c +7 -6
  45. data/lib/nokogiri/class_resolver.rb +1 -1
  46. data/lib/nokogiri/css/node.rb +6 -2
  47. data/lib/nokogiri/css/parser.rb +6 -4
  48. data/lib/nokogiri/css/parser.y +2 -2
  49. data/lib/nokogiri/css/parser_extras.rb +6 -66
  50. data/lib/nokogiri/css/selector_cache.rb +38 -0
  51. data/lib/nokogiri/css/tokenizer.rb +4 -4
  52. data/lib/nokogiri/css/tokenizer.rex +9 -8
  53. data/lib/nokogiri/css/xpath_visitor.rb +44 -27
  54. data/lib/nokogiri/css.rb +86 -20
  55. data/lib/nokogiri/decorators/slop.rb +3 -5
  56. data/lib/nokogiri/encoding_handler.rb +2 -2
  57. data/lib/nokogiri/html4/document.rb +45 -24
  58. data/lib/nokogiri/html4/document_fragment.rb +124 -12
  59. data/lib/nokogiri/html4/encoding_reader.rb +2 -2
  60. data/lib/nokogiri/html4/sax/parser.rb +23 -38
  61. data/lib/nokogiri/html4/sax/parser_context.rb +4 -9
  62. data/lib/nokogiri/html4.rb +9 -14
  63. data/lib/nokogiri/html5/builder.rb +40 -0
  64. data/lib/nokogiri/html5/document.rb +61 -30
  65. data/lib/nokogiri/html5/document_fragment.rb +130 -20
  66. data/lib/nokogiri/html5/node.rb +4 -4
  67. data/lib/nokogiri/html5.rb +114 -138
  68. data/lib/nokogiri/version/constant.rb +1 -1
  69. data/lib/nokogiri/version/info.rb +6 -5
  70. data/lib/nokogiri/xml/attr.rb +2 -2
  71. data/lib/nokogiri/xml/builder.rb +8 -1
  72. data/lib/nokogiri/xml/document.rb +73 -29
  73. data/lib/nokogiri/xml/document_fragment.rb +86 -15
  74. data/lib/nokogiri/xml/namespace.rb +1 -2
  75. data/lib/nokogiri/xml/node.rb +113 -35
  76. data/lib/nokogiri/xml/node_set.rb +12 -10
  77. data/lib/nokogiri/xml/parse_options.rb +1 -1
  78. data/lib/nokogiri/xml/pp/node.rb +6 -1
  79. data/lib/nokogiri/xml/reader.rb +51 -17
  80. data/lib/nokogiri/xml/relax_ng.rb +57 -20
  81. data/lib/nokogiri/xml/sax/document.rb +174 -83
  82. data/lib/nokogiri/xml/sax/parser.rb +115 -41
  83. data/lib/nokogiri/xml/sax/parser_context.rb +116 -8
  84. data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
  85. data/lib/nokogiri/xml/sax.rb +48 -0
  86. data/lib/nokogiri/xml/schema.rb +112 -45
  87. data/lib/nokogiri/xml/searchable.rb +39 -43
  88. data/lib/nokogiri/xml/syntax_error.rb +23 -1
  89. data/lib/nokogiri/xml/xpath_context.rb +14 -3
  90. data/lib/nokogiri/xml.rb +14 -25
  91. data/lib/nokogiri/xslt/stylesheet.rb +29 -7
  92. data/lib/nokogiri/xslt.rb +4 -10
  93. data/lib/nokogiri.rb +1 -1
  94. data/lib/xsd/xmlparser/nokogiri.rb +3 -4
  95. data/patches/libxml2/0019-xpath-Use-separate-static-hash-table-for-standard-fu.patch +244 -0
  96. data/ports/archives/libxml2-2.13.7.tar.xz +0 -0
  97. data/ports/archives/libxslt-1.1.43.tar.xz +0 -0
  98. metadata +13 -14
  99. data/ext/nokogiri/libxml2_backwards_compat.c +0 -121
  100. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +0 -25
  101. data/ports/archives/libxml2-2.11.6.tar.xz +0 -0
  102. data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
@@ -12,23 +12,23 @@ static const xmlChar *NOKOGIRI_BUILTIN_PREFIX = (const xmlChar *)"nokogiri-built
12
12
  static const xmlChar *NOKOGIRI_BUILTIN_URI = (const xmlChar *)"https://www.nokogiri.org/default_ns/ruby/builtins";
13
13
 
14
14
  static void
15
- xml_xpath_context_deallocate(void *data)
15
+ _noko_xml_xpath_context_dfree(void *data)
16
16
  {
17
17
  xmlXPathContextPtr c_context = data;
18
18
  xmlXPathFreeContext(c_context);
19
19
  }
20
20
 
21
- static const rb_data_type_t xml_xpath_context_type = {
22
- .wrap_struct_name = "Nokogiri::XML::XPathContext",
21
+ static const rb_data_type_t _noko_xml_xpath_context_type = {
22
+ .wrap_struct_name = "xmlXPathContext",
23
23
  .function = {
24
- .dfree = xml_xpath_context_deallocate,
24
+ .dfree = _noko_xml_xpath_context_dfree,
25
25
  },
26
26
  .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
27
27
  };
28
28
 
29
29
  /* find a CSS class in an HTML element's `class` attribute */
30
30
  static const xmlChar *
31
- builtin_css_class(const xmlChar *str, const xmlChar *val)
31
+ _noko_xml_xpath_context__css_class(const xmlChar *str, const xmlChar *val)
32
32
  {
33
33
  int val_len;
34
34
 
@@ -62,9 +62,9 @@ builtin_css_class(const xmlChar *str, const xmlChar *val)
62
62
  return (NULL);
63
63
  }
64
64
 
65
- /* xmlXPathFunction to wrap builtin_css_class() */
65
+ /* xmlXPathFunction to wrap _noko_xml_xpath_context__css_class() */
66
66
  static void
67
- xpath_builtin_css_class(xmlXPathParserContextPtr ctxt, int nargs)
67
+ noko_xml_xpath_context_xpath_func_css_class(xmlXPathParserContextPtr ctxt, int nargs)
68
68
  {
69
69
  xmlXPathObjectPtr hay, needle;
70
70
 
@@ -85,7 +85,7 @@ xpath_builtin_css_class(xmlXPathParserContextPtr ctxt, int nargs)
85
85
  XP_ERROR(XPATH_INVALID_TYPE);
86
86
  }
87
87
 
88
- if (builtin_css_class(hay->stringval, needle->stringval)) {
88
+ if (_noko_xml_xpath_context__css_class(hay->stringval, needle->stringval)) {
89
89
  valuePush(ctxt, xmlXPathNewBoolean(1));
90
90
  } else {
91
91
  valuePush(ctxt, xmlXPathNewBoolean(0));
@@ -99,7 +99,7 @@ xpath_builtin_css_class(xmlXPathParserContextPtr ctxt, int nargs)
99
99
  /* xmlXPathFunction to select nodes whose local name matches, for HTML5 CSS queries that should
100
100
  * ignore namespaces */
101
101
  static void
102
- xpath_builtin_local_name_is(xmlXPathParserContextPtr ctxt, int nargs)
102
+ noko_xml_xpath_context_xpath_func_local_name_is(xmlXPathParserContextPtr ctxt, int nargs)
103
103
  {
104
104
  xmlXPathObjectPtr element_name;
105
105
 
@@ -124,25 +124,26 @@ xpath_builtin_local_name_is(xmlXPathParserContextPtr ctxt, int nargs)
124
124
  * register_ns(prefix, uri) → Nokogiri::XML::XPathContext
125
125
  *
126
126
  * Register the namespace with +prefix+ and +uri+ for use in future queries.
127
+ * Passing a uri of +nil+ will unregister the namespace.
127
128
  *
128
129
  * [Returns] +self+
129
130
  */
130
131
  static VALUE
131
- rb_xml_xpath_context_register_ns(VALUE rb_context, VALUE prefix, VALUE uri)
132
+ noko_xml_xpath_context_register_ns(VALUE rb_context, VALUE prefix, VALUE uri)
132
133
  {
133
134
  xmlXPathContextPtr c_context;
135
+ const xmlChar *ns_uri;
134
136
 
135
- TypedData_Get_Struct(
136
- rb_context,
137
- xmlXPathContext,
138
- &xml_xpath_context_type,
139
- c_context
140
- );
137
+ TypedData_Get_Struct(rb_context, xmlXPathContext, &_noko_xml_xpath_context_type, c_context);
138
+
139
+ if (NIL_P(uri)) {
140
+ ns_uri = NULL;
141
+ } else {
142
+ ns_uri = (const xmlChar *)StringValueCStr(uri);
143
+ }
144
+
145
+ xmlXPathRegisterNs(c_context, (const xmlChar *)StringValueCStr(prefix), ns_uri);
141
146
 
142
- xmlXPathRegisterNs(c_context,
143
- (const xmlChar *)StringValueCStr(prefix),
144
- (const xmlChar *)StringValueCStr(uri)
145
- );
146
147
  return rb_context;
147
148
  }
148
149
 
@@ -151,29 +152,25 @@ rb_xml_xpath_context_register_ns(VALUE rb_context, VALUE prefix, VALUE uri)
151
152
  * register_variable(name, value) → Nokogiri::XML::XPathContext
152
153
  *
153
154
  * Register the variable +name+ with +value+ for use in future queries.
155
+ * Passing a value of +nil+ will unregister the variable.
154
156
  *
155
157
  * [Returns] +self+
156
158
  */
157
159
  static VALUE
158
- rb_xml_xpath_context_register_variable(VALUE rb_context, VALUE name, VALUE value)
160
+ noko_xml_xpath_context_register_variable(VALUE rb_context, VALUE name, VALUE value)
159
161
  {
160
162
  xmlXPathContextPtr c_context;
161
163
  xmlXPathObjectPtr xmlValue;
162
164
 
163
- TypedData_Get_Struct(
164
- rb_context,
165
- xmlXPathContext,
166
- &xml_xpath_context_type,
167
- c_context
168
- );
165
+ TypedData_Get_Struct(rb_context, xmlXPathContext, &_noko_xml_xpath_context_type, c_context);
169
166
 
170
- xmlValue = xmlXPathNewCString(StringValueCStr(value));
167
+ if (NIL_P(value)) {
168
+ xmlValue = NULL;
169
+ } else {
170
+ xmlValue = xmlXPathNewCString(StringValueCStr(value));
171
+ }
171
172
 
172
- xmlXPathRegisterVariable(
173
- c_context,
174
- (const xmlChar *)StringValueCStr(name),
175
- xmlValue
176
- );
173
+ xmlXPathRegisterVariable(c_context, (const xmlChar *)StringValueCStr(name), xmlValue);
177
174
 
178
175
  return rb_context;
179
176
  }
@@ -184,7 +181,7 @@ rb_xml_xpath_context_register_variable(VALUE rb_context, VALUE name, VALUE value
184
181
  * returns Qundef if no conversion was possible.
185
182
  */
186
183
  static VALUE
187
- xpath2ruby(xmlXPathObjectPtr c_xpath_object, xmlXPathContextPtr c_context)
184
+ _noko_xml_xpath_context__xpath2ruby(xmlXPathObjectPtr c_xpath_object, xmlXPathContextPtr c_context)
188
185
  {
189
186
  VALUE rb_retval;
190
187
 
@@ -238,7 +235,7 @@ Nokogiri_marshal_xpath_funcall_and_return_values(
238
235
 
239
236
  for (int j = argc - 1 ; j >= 0 ; --j) {
240
237
  c_xpath_object = valuePop(ctxt);
241
- argv[j] = xpath2ruby(c_xpath_object, ctxt->context);
238
+ argv[j] = _noko_xml_xpath_context__xpath2ruby(c_xpath_object, ctxt->context);
242
239
  if (argv[j] == Qundef) {
243
240
  argv[j] = NOKOGIRI_STR_NEW2(xmlXPathCastToString(c_xpath_object));
244
241
  }
@@ -294,7 +291,7 @@ Nokogiri_marshal_xpath_funcall_and_return_values(
294
291
  }
295
292
 
296
293
  static void
297
- method_caller(xmlXPathParserContextPtr ctxt, int argc)
294
+ _noko_xml_xpath_context__handler_invoker(xmlXPathParserContextPtr ctxt, int argc)
298
295
  {
299
296
  VALUE rb_xpath_handler = Qnil;
300
297
  const char *method_name = NULL ;
@@ -316,18 +313,15 @@ method_caller(xmlXPathParserContextPtr ctxt, int argc)
316
313
  }
317
314
 
318
315
  static xmlXPathFunction
319
- handler_lookup(void *data, const xmlChar *c_name, const xmlChar *c_ns_uri)
316
+ _noko_xml_xpath_context_handler_lookup(void *data, const xmlChar *c_name, const xmlChar *c_ns_uri)
320
317
  {
321
318
  VALUE rb_handler = (VALUE)data;
322
319
  if (rb_respond_to(rb_handler, rb_intern((const char *)c_name))) {
323
320
  if (c_ns_uri == NULL) {
324
- NOKO_WARN_DEPRECATION(
325
- "A custom XPath or CSS handler function named '%s' is being invoked without a namespace."
326
- " Please update your query to reference this function as 'nokogiri:%s'."
327
- " Invoking custom handler functions without a namespace is deprecated and support will be removed in a future release of Nokogiri.",
328
- c_name, c_name);
321
+ NOKO_WARN_DEPRECATION("A custom XPath or CSS handler function named '%s' is being invoked without a namespace. Please update your query to reference this function as 'nokogiri:%s'. Invoking custom handler functions without a namespace is deprecated and will become an error in Nokogiri v1.17.0.",
322
+ c_name, c_name); // TODO deprecated in v1.15.0, remove in v1.19.0
329
323
  }
330
- return method_caller;
324
+ return _noko_xml_xpath_context__handler_invoker;
331
325
  }
332
326
 
333
327
  return NULL;
@@ -335,7 +329,7 @@ handler_lookup(void *data, const xmlChar *c_name, const xmlChar *c_ns_uri)
335
329
 
336
330
  PRINTFLIKE_DECL(2, 3)
337
331
  static void
338
- generic_exception_pusher(void *data, const char *msg, ...)
332
+ _noko_xml_xpath_context__generic_exception_pusher(void *data, const char *msg, ...)
339
333
  {
340
334
  VALUE rb_errors = (VALUE)data;
341
335
  VALUE rb_message;
@@ -346,7 +340,7 @@ generic_exception_pusher(void *data, const char *msg, ...)
346
340
  #ifdef TRUFFLERUBY_NOKOGIRI_SYSTEM_LIBRARIES
347
341
  /* It is not currently possible to pass var args from native
348
342
  functions to sulong, so we work around the issue here. */
349
- rb_message = rb_sprintf("generic_exception_pusher: %s", msg);
343
+ rb_message = rb_sprintf("_noko_xml_xpath_context__generic_exception_pusher: %s", msg);
350
344
  #else
351
345
  va_list args;
352
346
  va_start(args, msg);
@@ -368,58 +362,55 @@ generic_exception_pusher(void *data, const char *msg, ...)
368
362
  * a +Float+, or a boolean.
369
363
  */
370
364
  static VALUE
371
- rb_xml_xpath_context_evaluate(int argc, VALUE *argv, VALUE rb_context)
365
+ noko_xml_xpath_context_evaluate(int argc, VALUE *argv, VALUE rb_context)
372
366
  {
373
- VALUE search_path, xpath_handler;
374
- VALUE retval = Qnil;
375
367
  xmlXPathContextPtr c_context;
376
- xmlXPathObjectPtr xpath;
377
- xmlChar *query;
378
- VALUE errors = rb_ary_new();
379
-
380
- TypedData_Get_Struct(
381
- rb_context,
382
- xmlXPathContext,
383
- &xml_xpath_context_type,
384
- c_context
385
- );
368
+ VALUE rb_expression = Qnil;
369
+ VALUE rb_function_lookup_handler = Qnil;
370
+ xmlChar *c_expression_str = NULL;
371
+ VALUE rb_errors = rb_ary_new();
372
+ xmlXPathObjectPtr c_xpath_object;
373
+ VALUE rb_xpath_object = Qnil;
386
374
 
387
- if (rb_scan_args(argc, argv, "11", &search_path, &xpath_handler) == 1) {
388
- xpath_handler = Qnil;
389
- }
375
+ TypedData_Get_Struct(rb_context, xmlXPathContext, &_noko_xml_xpath_context_type, c_context);
376
+
377
+ rb_scan_args(argc, argv, "11", &rb_expression, &rb_function_lookup_handler);
390
378
 
391
- query = (xmlChar *)StringValueCStr(search_path);
379
+ c_expression_str = (xmlChar *)StringValueCStr(rb_expression);
392
380
 
393
- if (Qnil != xpath_handler) {
381
+ if (Qnil != rb_function_lookup_handler) {
394
382
  /* FIXME: not sure if this is the correct place to shove private data. */
395
- c_context->userData = (void *)xpath_handler;
383
+ c_context->userData = (void *)rb_function_lookup_handler;
396
384
  xmlXPathRegisterFuncLookup(
397
385
  c_context,
398
- handler_lookup,
399
- (void *)xpath_handler
386
+ _noko_xml_xpath_context_handler_lookup,
387
+ (void *)rb_function_lookup_handler
400
388
  );
401
389
  }
402
390
 
403
- xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
404
- xmlSetGenericErrorFunc((void *)errors, generic_exception_pusher);
391
+ /* TODO: use xmlXPathSetErrorHandler (as of 2.13.0) */
392
+ xmlSetStructuredErrorFunc((void *)rb_errors, noko__error_array_pusher);
393
+ xmlSetGenericErrorFunc((void *)rb_errors, _noko_xml_xpath_context__generic_exception_pusher);
405
394
 
406
- xpath = xmlXPathEvalExpression(query, c_context);
395
+ c_xpath_object = xmlXPathEvalExpression(c_expression_str, c_context);
407
396
 
408
397
  xmlSetStructuredErrorFunc(NULL, NULL);
409
398
  xmlSetGenericErrorFunc(NULL, NULL);
410
399
 
411
- if (xpath == NULL) {
412
- rb_exc_raise(rb_ary_entry(errors, 0));
400
+ xmlXPathRegisterFuncLookup(c_context, NULL, NULL);
401
+
402
+ if (c_xpath_object == NULL) {
403
+ rb_exc_raise(rb_ary_entry(rb_errors, 0));
413
404
  }
414
405
 
415
- retval = xpath2ruby(xpath, c_context);
416
- if (retval == Qundef) {
417
- retval = noko_xml_node_set_wrap(NULL, DOC_RUBY_OBJECT(c_context->doc));
406
+ rb_xpath_object = _noko_xml_xpath_context__xpath2ruby(c_xpath_object, c_context);
407
+ if (rb_xpath_object == Qundef) {
408
+ rb_xpath_object = noko_xml_node_set_wrap(NULL, DOC_RUBY_OBJECT(c_context->doc));
418
409
  }
419
410
 
420
- xmlXPathFreeNodeSetList(xpath);
411
+ xmlXPathFreeNodeSetList(c_xpath_object);
421
412
 
422
- return retval;
413
+ return rb_xpath_object;
423
414
  }
424
415
 
425
416
  /*
@@ -429,45 +420,53 @@ rb_xml_xpath_context_evaluate(int argc, VALUE *argv, VALUE rb_context)
429
420
  * Create a new XPathContext with +node+ as the context node.
430
421
  */
431
422
  static VALUE
432
- rb_xml_xpath_context_new(VALUE klass, VALUE rb_node)
423
+ noko_xml_xpath_context_new(VALUE klass, VALUE rb_node)
433
424
  {
434
- xmlNodePtr node;
425
+ xmlNodePtr c_node;
435
426
  xmlXPathContextPtr c_context;
436
427
  VALUE rb_context;
437
428
 
438
- Noko_Node_Get_Struct(rb_node, xmlNode, node);
429
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
439
430
 
440
431
  #if LIBXML_VERSION < 21000
441
- /* deprecated in 40483d0 */
442
- xmlXPathInit();
432
+ xmlXPathInit(); /* deprecated in 40483d0 */
443
433
  #endif
444
434
 
445
- c_context = xmlXPathNewContext(node->doc);
446
- c_context->node = node;
435
+ c_context = xmlXPathNewContext(c_node->doc);
436
+ c_context->node = c_node;
447
437
 
448
438
  xmlXPathRegisterNs(c_context, NOKOGIRI_PREFIX, NOKOGIRI_URI);
449
439
  xmlXPathRegisterNs(c_context, NOKOGIRI_BUILTIN_PREFIX, NOKOGIRI_BUILTIN_URI);
450
- xmlXPathRegisterFuncNS(
451
- c_context,
452
- (const xmlChar *)"css-class",
453
- NOKOGIRI_BUILTIN_URI,
454
- xpath_builtin_css_class
455
- );
456
- xmlXPathRegisterFuncNS(
457
- c_context,
458
- (const xmlChar *)"local-name-is",
459
- NOKOGIRI_BUILTIN_URI,
460
- xpath_builtin_local_name_is
461
- );
462
440
 
463
- rb_context = TypedData_Wrap_Struct(
464
- klass,
465
- &xml_xpath_context_type,
466
- c_context
467
- );
441
+ xmlXPathRegisterFuncNS(c_context,
442
+ (const xmlChar *)"css-class", NOKOGIRI_BUILTIN_URI,
443
+ noko_xml_xpath_context_xpath_func_css_class);
444
+ xmlXPathRegisterFuncNS(c_context,
445
+ (const xmlChar *)"local-name-is", NOKOGIRI_BUILTIN_URI,
446
+ noko_xml_xpath_context_xpath_func_local_name_is);
447
+
448
+ rb_context = TypedData_Wrap_Struct(klass, &_noko_xml_xpath_context_type, c_context);
449
+
468
450
  return rb_context;
469
451
  }
470
452
 
453
+
454
+ /* :nodoc: */
455
+ static VALUE
456
+ noko_xml_xpath_context_set_node(VALUE rb_context, VALUE rb_node)
457
+ {
458
+ xmlNodePtr c_node;
459
+ xmlXPathContextPtr c_context;
460
+
461
+ TypedData_Get_Struct(rb_context, xmlXPathContext, &_noko_xml_xpath_context_type, c_context);
462
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
463
+
464
+ c_context->doc = c_node->doc;
465
+ c_context->node = c_node;
466
+
467
+ return rb_node;
468
+ }
469
+
471
470
  void
472
471
  noko_init_xml_xpath_context(void)
473
472
  {
@@ -478,9 +477,10 @@ noko_init_xml_xpath_context(void)
478
477
 
479
478
  rb_undef_alloc_func(cNokogiriXmlXpathContext);
480
479
 
481
- rb_define_singleton_method(cNokogiriXmlXpathContext, "new", rb_xml_xpath_context_new, 1);
480
+ rb_define_singleton_method(cNokogiriXmlXpathContext, "new", noko_xml_xpath_context_new, 1);
482
481
 
483
- rb_define_method(cNokogiriXmlXpathContext, "evaluate", rb_xml_xpath_context_evaluate, -1);
484
- rb_define_method(cNokogiriXmlXpathContext, "register_variable", rb_xml_xpath_context_register_variable, 2);
485
- rb_define_method(cNokogiriXmlXpathContext, "register_ns", rb_xml_xpath_context_register_ns, 2);
482
+ rb_define_method(cNokogiriXmlXpathContext, "evaluate", noko_xml_xpath_context_evaluate, -1);
483
+ rb_define_method(cNokogiriXmlXpathContext, "register_variable", noko_xml_xpath_context_register_variable, 2);
484
+ rb_define_method(cNokogiriXmlXpathContext, "register_ns", noko_xml_xpath_context_register_ns, 2);
485
+ rb_define_method(cNokogiriXmlXpathContext, "node=", noko_xml_xpath_context_set_node, 1);
486
486
  }
@@ -1,6 +1,6 @@
1
1
  #include <nokogiri.h>
2
2
 
3
- VALUE cNokogiriXsltStylesheet ;
3
+ VALUE cNokogiriXsltStylesheet;
4
4
 
5
5
  static void
6
6
  mark(void *data)
@@ -18,8 +18,8 @@ dealloc(void *data)
18
18
  ruby_xfree(wrapper);
19
19
  }
20
20
 
21
- static const rb_data_type_t xslt_stylesheet_type = {
22
- .wrap_struct_name = "Nokogiri::XSLT::Stylesheet",
21
+ static const rb_data_type_t nokogiri_xslt_stylesheet_tuple_type = {
22
+ .wrap_struct_name = "nokogiriXsltStylesheetTuple",
23
23
  .function = {
24
24
  .dmark = mark,
25
25
  .dfree = dealloc,
@@ -56,7 +56,7 @@ Nokogiri_wrap_xslt_stylesheet(xsltStylesheetPtr ss)
56
56
  self = TypedData_Make_Struct(
57
57
  cNokogiriXsltStylesheet,
58
58
  nokogiriXsltStylesheetTuple,
59
- &xslt_stylesheet_type,
59
+ &nokogiri_xslt_stylesheet_tuple_type,
60
60
  wrapper
61
61
  );
62
62
 
@@ -71,7 +71,12 @@ Nokogiri_wrap_xslt_stylesheet(xsltStylesheetPtr ss)
71
71
  * call-seq:
72
72
  * parse_stylesheet_doc(document)
73
73
  *
74
- * Parse a stylesheet from +document+.
74
+ * Parse an XSLT::Stylesheet from +document+.
75
+ *
76
+ * [Parameters]
77
+ * - +document+ (Nokogiri::XML::Document) the document to be parsed.
78
+ *
79
+ * [Returns] Nokogiri::XSLT::Stylesheet
75
80
  */
76
81
  static VALUE
77
82
  parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
@@ -104,7 +109,7 @@ parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
104
109
  * call-seq:
105
110
  * serialize(document)
106
111
  *
107
- * Serialize +document+ to an xml string.
112
+ * Serialize +document+ to an xml string, as specified by the +method+ parameter in the Stylesheet.
108
113
  */
109
114
  static VALUE
110
115
  rb_xslt_stylesheet_serialize(VALUE self, VALUE xmlobj)
@@ -119,7 +124,7 @@ rb_xslt_stylesheet_serialize(VALUE self, VALUE xmlobj)
119
124
  TypedData_Get_Struct(
120
125
  self,
121
126
  nokogiriXsltStylesheetTuple,
122
- &xslt_stylesheet_type,
127
+ &nokogiri_xslt_stylesheet_tuple_type,
123
128
  wrapper
124
129
  );
125
130
  xsltSaveResultToString(&doc_ptr, &doc_len, xml, wrapper->ss);
@@ -133,7 +138,7 @@ rb_xslt_stylesheet_serialize(VALUE self, VALUE xmlobj)
133
138
  * transform(document)
134
139
  * transform(document, params = {})
135
140
  *
136
- * Apply an XSLT stylesheet to an XML::Document.
141
+ * Transform an XML::Document as defined by an XSLT::Stylesheet.
137
142
  *
138
143
  * [Parameters]
139
144
  * - +document+ (Nokogiri::XML::Document) the document to be transformed.
@@ -268,7 +273,7 @@ rb_xslt_stylesheet_transform(int argc, VALUE *argv, VALUE self)
268
273
  Check_Type(rb_param, T_ARRAY);
269
274
 
270
275
  c_document = noko_xml_document_unwrap(rb_document);
271
- TypedData_Get_Struct(self, nokogiriXsltStylesheetTuple, &xslt_stylesheet_type, wrapper);
276
+ TypedData_Get_Struct(self, nokogiriXsltStylesheetTuple, &nokogiri_xslt_stylesheet_tuple_type, wrapper);
272
277
 
273
278
  param_len = RARRAY_LEN(rb_param);
274
279
  params = ruby_xcalloc((size_t)param_len + 1, sizeof(char *));
@@ -357,7 +362,7 @@ initFunc(xsltTransformContextPtr ctxt, const xmlChar *uri)
357
362
  TypedData_Get_Struct(
358
363
  (VALUE)ctxt->style->_private,
359
364
  nokogiriXsltStylesheetTuple,
360
- &xslt_stylesheet_type,
365
+ &nokogiri_xslt_stylesheet_tuple_type,
361
366
  wrapper
362
367
  );
363
368
  inst = rb_class_new_instance(0, NULL, obj);
@@ -375,7 +380,7 @@ shutdownFunc(xsltTransformContextPtr ctxt,
375
380
  TypedData_Get_Struct(
376
381
  (VALUE)ctxt->style->_private,
377
382
  nokogiriXsltStylesheetTuple,
378
- &xslt_stylesheet_type,
383
+ &nokogiri_xslt_stylesheet_tuple_type,
379
384
  wrapper
380
385
  );
381
386
 
@@ -13,6 +13,23 @@ LDFLAGS := -pthread
13
13
 
14
14
  all: check
15
15
 
16
+ oss-fuzz:
17
+ ./fuzzer/build-ossfuzz.sh
18
+
19
+ fuzzers: fuzzer-normal fuzzer-asan fuzzer-ubsan fuzzer-msan
20
+
21
+ fuzzer-normal:
22
+ ./fuzzer/build.sh
23
+
24
+ fuzzer-asan:
25
+ SANITIZER=asan ./fuzzer/build.sh
26
+
27
+ fuzzer-ubsan:
28
+ SANITIZER=ubsan ./fuzzer/build.sh
29
+
30
+ fuzzer-msan:
31
+ SANITIZER=msan ./fuzzer/build.sh
32
+
16
33
  # don't try to regenerate ragel or gperf files in CI, that should be a development-only action and
17
34
  # the generated files should be committed to SCM
18
35
  ifneq ($(CI),true)
@@ -81,6 +98,7 @@ coverage:
81
98
 
82
99
  clean:
83
100
  $(RM) -r build
101
+ $(RM) -r fuzzer/build fuzzer/src-* fuzzer/gumbo_corpus
84
102
 
85
103
  build/src/flags: | build/src
86
104
  @echo 'old_CC := $(CC)' > $@
@@ -44,8 +44,8 @@ const unsigned char _gumbo_ascii_table[0x80] = {
44
44
  };
45
45
 
46
46
  // Table generation code.
47
- // clang -DGUMBO_GEN_TABLE=1 ascii.c && ./a.out && rm a.out
48
- #if GUMBO_GEN_TABLE
47
+ // clang -DGUMBO_GEN_TABLE ascii.c && ./a.out && rm a.out
48
+ #ifdef GUMBO_GEN_TABLE
49
49
  #include <stdio.h>
50
50
 
51
51
  int main() {
@@ -46,33 +46,40 @@ static int PRINTF(2) print_message (
46
46
  args
47
47
  );
48
48
  va_end(args);
49
- #if _MSC_VER && _MSC_VER < 1900
49
+
50
+ #if (defined(_MSC_VER) && (_MSC_VER < 1900)) || defined(_RUBY_MSVCRT)
50
51
  if (bytes_written == -1) {
51
52
  // vsnprintf returns -1 on older MSVC++ if there's not enough capacity,
52
53
  // instead of returning the number of bytes that would've been written had
53
- // there been enough. In this case, we'll double the buffer size and hope
54
- // it fits when we retry (letting it fail and returning 0 if it doesn't),
55
- // since there's no way to smartly resize the buffer.
56
- gumbo_string_buffer_reserve(output->capacity * 2, output);
54
+ // there been enough. In this case, we can call vsnprintf() again but
55
+ // with a count of 0 to get the number of bytes written, not including
56
+ // the null terminator.
57
+ // https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/vsnprintf-vsnprintf-vsnprintf-l-vsnwprintf-vsnwprintf-l?view=msvc-140#behavior-summary
58
+
57
59
  va_start(args, format);
58
- int result = vsnprintf (
59
- output->data + output->length,
60
- remaining_capacity,
60
+ bytes_written = vsnprintf (
61
+ NULL,
62
+ 0,
61
63
  format,
62
64
  args
63
65
  );
64
66
  va_end(args);
65
- return result == -1 ? 0 : result;
66
67
  }
67
- #else
68
+ #endif
69
+
68
70
  // -1 in standard C99 indicates an encoding error. Return 0 and do nothing.
69
71
  if (bytes_written == -1) {
70
72
  return 0;
71
73
  }
72
- #endif
73
74
 
74
75
  if (bytes_written >= remaining_capacity) {
75
- gumbo_string_buffer_reserve(output->capacity + bytes_written, output);
76
+ // At least double the size of the buffer.
77
+ size_t new_capacity = output->capacity * 2;
78
+ if (new_capacity < output->length + bytes_written + 1) {
79
+ // The +1 is for the null terminator.
80
+ new_capacity = output->length + bytes_written + 1;
81
+ }
82
+ gumbo_string_buffer_reserve(new_capacity, output);
76
83
  remaining_capacity = output->capacity - output->length;
77
84
  va_start(args, format);
78
85
  bytes_written = vsnprintf (
@@ -96,8 +103,14 @@ static void print_tag_stack (
96
103
  if (i) {
97
104
  print_message(output, ", ");
98
105
  }
99
- GumboTag tag = (GumboTag)(intptr_t) error->tag_stack.data[i];
100
- print_message(output, "%s", gumbo_normalized_tagname(tag));
106
+ uintptr_t tag = (uintptr_t) error->tag_stack.data[i];
107
+ const char* tag_name;
108
+ if (tag > GUMBO_TAG_UNKNOWN) {
109
+ tag_name = error->tag_stack.data[i];
110
+ } else {
111
+ tag_name = gumbo_normalized_tagname((GumboTag)tag);
112
+ }
113
+ print_message(output, "%s", tag_name);
101
114
  }
102
115
  gumbo_string_buffer_append_codepoint('.', output);
103
116
  }
@@ -326,41 +339,45 @@ static void handle_parser_error (
326
339
  }
327
340
 
328
341
  switch (error->input_type) {
329
- case GUMBO_TOKEN_DOCTYPE:
330
- print_message(output, "This is not a legal doctype");
331
- return;
332
- case GUMBO_TOKEN_COMMENT:
333
- // Should never happen; comments are always legal.
334
- assert(0);
335
- // But just in case...
336
- print_message(output, "Comments aren't legal here");
337
- return;
338
- case GUMBO_TOKEN_CDATA:
339
- case GUMBO_TOKEN_WHITESPACE:
340
- case GUMBO_TOKEN_CHARACTER:
341
- print_message(output, "Character tokens aren't legal here");
342
- return;
343
- case GUMBO_TOKEN_NULL:
344
- print_message(output, "Null bytes are not allowed in HTML5");
345
- return;
346
- case GUMBO_TOKEN_EOF:
347
- if (error->parser_state == GUMBO_INSERTION_MODE_INITIAL) {
348
- print_message(output, "You must provide a doctype");
349
- } else {
350
- print_message(output, "Premature end of file.");
351
- print_tag_stack(error, output);
352
- }
353
- return;
354
- case GUMBO_TOKEN_START_TAG:
355
- print_message(output, "Start tag '%s' isn't allowed here.",
356
- gumbo_normalized_tagname(error->input_tag));
357
- print_tag_stack(error, output);
358
- return;
359
- case GUMBO_TOKEN_END_TAG:
360
- print_message(output, "End tag '%s' isn't allowed here.",
361
- gumbo_normalized_tagname(error->input_tag));
342
+ case GUMBO_TOKEN_DOCTYPE:
343
+ print_message(output, "This is not a legal doctype");
344
+ return;
345
+ case GUMBO_TOKEN_COMMENT:
346
+ // Should never happen; comments are always legal.
347
+ assert(0);
348
+ // But just in case...
349
+ print_message(output, "Comments aren't legal here");
350
+ return;
351
+ case GUMBO_TOKEN_CDATA:
352
+ case GUMBO_TOKEN_WHITESPACE:
353
+ case GUMBO_TOKEN_CHARACTER:
354
+ print_message(output, "Character tokens aren't legal here");
355
+ return;
356
+ case GUMBO_TOKEN_NULL:
357
+ print_message(output, "Null bytes are not allowed in HTML5");
358
+ return;
359
+ case GUMBO_TOKEN_EOF:
360
+ if (error->parser_state == GUMBO_INSERTION_MODE_INITIAL) {
361
+ print_message(output, "You must provide a doctype");
362
+ } else {
363
+ print_message(output, "Premature end of file.");
362
364
  print_tag_stack(error, output);
363
- return;
365
+ }
366
+ return;
367
+ case GUMBO_TOKEN_START_TAG:
368
+ case GUMBO_TOKEN_END_TAG:
369
+ {
370
+ const char* tag_name;
371
+ const char* which = error->input_type == GUMBO_TOKEN_START_TAG ? "Start" : "End";
372
+ if (error->input_name) {
373
+ tag_name = error->input_name;
374
+ } else {
375
+ tag_name = gumbo_normalized_tagname(error->input_tag);
376
+ }
377
+ print_message(output, "%s tag '%s' isn't allowed here.", which, tag_name);
378
+ print_tag_stack(error, output);
379
+ return;
380
+ }
364
381
  }
365
382
  }
366
383
 
@@ -613,6 +630,17 @@ void gumbo_print_caret_diagnostic (
613
630
 
614
631
  void gumbo_error_destroy(GumboError* error) {
615
632
  if (error->type == GUMBO_ERR_PARSER) {
633
+ // Free the tag name.
634
+ if (error->v.parser.input_name) {
635
+ gumbo_free(error->v.parser.input_name);
636
+ }
637
+
638
+ for (unsigned int i = 0; i < error->v.parser.tag_stack.length; ++i) {
639
+ intptr_t tag = (intptr_t) error->v.parser.tag_stack.data[i];
640
+ if (tag > GUMBO_TAG_UNKNOWN) {
641
+ gumbo_free(error->v.parser.tag_stack.data[i]);
642
+ }
643
+ }
616
644
  gumbo_vector_destroy(&error->v.parser.tag_stack);
617
645
  }
618
646
  gumbo_free(error);