nokogiri 1.16.8 → 1.18.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (95) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +11 -21
  3. data/LICENSE-DEPENDENCIES.md +6 -6
  4. data/README.md +8 -5
  5. data/dependencies.yml +6 -6
  6. data/ext/nokogiri/extconf.rb +188 -142
  7. data/ext/nokogiri/gumbo.c +69 -53
  8. data/ext/nokogiri/html4_document.c +10 -4
  9. data/ext/nokogiri/html4_element_description.c +18 -18
  10. data/ext/nokogiri/html4_sax_parser.c +40 -0
  11. data/ext/nokogiri/html4_sax_parser_context.c +48 -58
  12. data/ext/nokogiri/html4_sax_push_parser.c +25 -24
  13. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  14. data/ext/nokogiri/nokogiri.c +9 -2
  15. data/ext/nokogiri/nokogiri.h +18 -33
  16. data/ext/nokogiri/xml_attr.c +1 -1
  17. data/ext/nokogiri/xml_cdata.c +2 -10
  18. data/ext/nokogiri/xml_comment.c +3 -8
  19. data/ext/nokogiri/xml_document.c +163 -156
  20. data/ext/nokogiri/xml_document_fragment.c +10 -25
  21. data/ext/nokogiri/xml_dtd.c +1 -1
  22. data/ext/nokogiri/xml_element_content.c +9 -9
  23. data/ext/nokogiri/xml_encoding_handler.c +4 -4
  24. data/ext/nokogiri/xml_namespace.c +6 -6
  25. data/ext/nokogiri/xml_node.c +134 -103
  26. data/ext/nokogiri/xml_node_set.c +46 -44
  27. data/ext/nokogiri/xml_reader.c +54 -58
  28. data/ext/nokogiri/xml_relax_ng.c +35 -56
  29. data/ext/nokogiri/xml_sax_parser.c +156 -88
  30. data/ext/nokogiri/xml_sax_parser_context.c +219 -131
  31. data/ext/nokogiri/xml_sax_push_parser.c +68 -49
  32. data/ext/nokogiri/xml_schema.c +50 -85
  33. data/ext/nokogiri/xml_syntax_error.c +19 -11
  34. data/ext/nokogiri/xml_text.c +2 -4
  35. data/ext/nokogiri/xml_xpath_context.c +103 -100
  36. data/ext/nokogiri/xslt_stylesheet.c +8 -8
  37. data/gumbo-parser/src/ascii.c +2 -2
  38. data/gumbo-parser/src/error.c +76 -48
  39. data/gumbo-parser/src/error.h +5 -1
  40. data/gumbo-parser/src/nokogiri_gumbo.h +11 -2
  41. data/gumbo-parser/src/parser.c +63 -25
  42. data/gumbo-parser/src/tokenizer.c +6 -6
  43. data/lib/nokogiri/class_resolver.rb +1 -1
  44. data/lib/nokogiri/css/node.rb +6 -2
  45. data/lib/nokogiri/css/parser.rb +6 -4
  46. data/lib/nokogiri/css/parser.y +2 -2
  47. data/lib/nokogiri/css/parser_extras.rb +6 -66
  48. data/lib/nokogiri/css/selector_cache.rb +38 -0
  49. data/lib/nokogiri/css/tokenizer.rb +4 -4
  50. data/lib/nokogiri/css/tokenizer.rex +9 -8
  51. data/lib/nokogiri/css/xpath_visitor.rb +43 -6
  52. data/lib/nokogiri/css.rb +86 -20
  53. data/lib/nokogiri/decorators/slop.rb +3 -5
  54. data/lib/nokogiri/encoding_handler.rb +2 -2
  55. data/lib/nokogiri/html4/document.rb +44 -23
  56. data/lib/nokogiri/html4/document_fragment.rb +124 -12
  57. data/lib/nokogiri/html4/encoding_reader.rb +1 -1
  58. data/lib/nokogiri/html4/sax/parser.rb +23 -38
  59. data/lib/nokogiri/html4/sax/parser_context.rb +4 -9
  60. data/lib/nokogiri/html4.rb +9 -14
  61. data/lib/nokogiri/html5/builder.rb +40 -0
  62. data/lib/nokogiri/html5/document.rb +61 -30
  63. data/lib/nokogiri/html5/document_fragment.rb +130 -20
  64. data/lib/nokogiri/html5/node.rb +4 -4
  65. data/lib/nokogiri/html5.rb +114 -72
  66. data/lib/nokogiri/version/constant.rb +1 -1
  67. data/lib/nokogiri/xml/builder.rb +8 -1
  68. data/lib/nokogiri/xml/document.rb +70 -26
  69. data/lib/nokogiri/xml/document_fragment.rb +84 -13
  70. data/lib/nokogiri/xml/node.rb +82 -11
  71. data/lib/nokogiri/xml/node_set.rb +9 -7
  72. data/lib/nokogiri/xml/parse_options.rb +1 -1
  73. data/lib/nokogiri/xml/pp/node.rb +6 -1
  74. data/lib/nokogiri/xml/reader.rb +46 -13
  75. data/lib/nokogiri/xml/relax_ng.rb +57 -20
  76. data/lib/nokogiri/xml/sax/document.rb +174 -83
  77. data/lib/nokogiri/xml/sax/parser.rb +115 -41
  78. data/lib/nokogiri/xml/sax/parser_context.rb +116 -8
  79. data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
  80. data/lib/nokogiri/xml/sax.rb +48 -0
  81. data/lib/nokogiri/xml/schema.rb +112 -45
  82. data/lib/nokogiri/xml/searchable.rb +38 -42
  83. data/lib/nokogiri/xml/syntax_error.rb +22 -0
  84. data/lib/nokogiri/xml/xpath_context.rb +14 -3
  85. data/lib/nokogiri/xml.rb +13 -24
  86. data/lib/nokogiri/xslt.rb +3 -9
  87. data/lib/xsd/xmlparser/nokogiri.rb +3 -4
  88. data/patches/libxml2/0019-xpath-Use-separate-static-hash-table-for-standard-fu.patch +244 -0
  89. data/ports/archives/libxml2-2.13.5.tar.xz +0 -0
  90. data/ports/archives/libxslt-1.1.42.tar.xz +0 -0
  91. metadata +13 -12
  92. data/ext/nokogiri/libxml2_backwards_compat.c +0 -121
  93. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +0 -25
  94. data/ports/archives/libxml2-2.12.9.tar.xz +0 -0
  95. data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
@@ -12,23 +12,23 @@ static const xmlChar *NOKOGIRI_BUILTIN_PREFIX = (const xmlChar *)"nokogiri-built
12
12
  static const xmlChar *NOKOGIRI_BUILTIN_URI = (const xmlChar *)"https://www.nokogiri.org/default_ns/ruby/builtins";
13
13
 
14
14
  static void
15
- xml_xpath_context_deallocate(void *data)
15
+ _noko_xml_xpath_context_dfree(void *data)
16
16
  {
17
17
  xmlXPathContextPtr c_context = data;
18
18
  xmlXPathFreeContext(c_context);
19
19
  }
20
20
 
21
- static const rb_data_type_t xml_xpath_context_type = {
22
- .wrap_struct_name = "Nokogiri::XML::XPathContext",
21
+ static const rb_data_type_t _noko_xml_xpath_context_type = {
22
+ .wrap_struct_name = "xmlXPathContext",
23
23
  .function = {
24
- .dfree = xml_xpath_context_deallocate,
24
+ .dfree = _noko_xml_xpath_context_dfree,
25
25
  },
26
26
  .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
27
27
  };
28
28
 
29
29
  /* find a CSS class in an HTML element's `class` attribute */
30
30
  static const xmlChar *
31
- builtin_css_class(const xmlChar *str, const xmlChar *val)
31
+ _noko_xml_xpath_context__css_class(const xmlChar *str, const xmlChar *val)
32
32
  {
33
33
  int val_len;
34
34
 
@@ -62,9 +62,9 @@ builtin_css_class(const xmlChar *str, const xmlChar *val)
62
62
  return (NULL);
63
63
  }
64
64
 
65
- /* xmlXPathFunction to wrap builtin_css_class() */
65
+ /* xmlXPathFunction to wrap _noko_xml_xpath_context__css_class() */
66
66
  static void
67
- xpath_builtin_css_class(xmlXPathParserContextPtr ctxt, int nargs)
67
+ noko_xml_xpath_context_xpath_func_css_class(xmlXPathParserContextPtr ctxt, int nargs)
68
68
  {
69
69
  xmlXPathObjectPtr hay, needle;
70
70
 
@@ -85,7 +85,7 @@ xpath_builtin_css_class(xmlXPathParserContextPtr ctxt, int nargs)
85
85
  XP_ERROR(XPATH_INVALID_TYPE);
86
86
  }
87
87
 
88
- if (builtin_css_class(hay->stringval, needle->stringval)) {
88
+ if (_noko_xml_xpath_context__css_class(hay->stringval, needle->stringval)) {
89
89
  valuePush(ctxt, xmlXPathNewBoolean(1));
90
90
  } else {
91
91
  valuePush(ctxt, xmlXPathNewBoolean(0));
@@ -99,7 +99,7 @@ xpath_builtin_css_class(xmlXPathParserContextPtr ctxt, int nargs)
99
99
  /* xmlXPathFunction to select nodes whose local name matches, for HTML5 CSS queries that should
100
100
  * ignore namespaces */
101
101
  static void
102
- xpath_builtin_local_name_is(xmlXPathParserContextPtr ctxt, int nargs)
102
+ noko_xml_xpath_context_xpath_func_local_name_is(xmlXPathParserContextPtr ctxt, int nargs)
103
103
  {
104
104
  xmlXPathObjectPtr element_name;
105
105
 
@@ -124,25 +124,26 @@ xpath_builtin_local_name_is(xmlXPathParserContextPtr ctxt, int nargs)
124
124
  * register_ns(prefix, uri) → Nokogiri::XML::XPathContext
125
125
  *
126
126
  * Register the namespace with +prefix+ and +uri+ for use in future queries.
127
+ * Passing a uri of +nil+ will unregister the namespace.
127
128
  *
128
129
  * [Returns] +self+
129
130
  */
130
131
  static VALUE
131
- rb_xml_xpath_context_register_ns(VALUE rb_context, VALUE prefix, VALUE uri)
132
+ noko_xml_xpath_context_register_ns(VALUE rb_context, VALUE prefix, VALUE uri)
132
133
  {
133
134
  xmlXPathContextPtr c_context;
135
+ const xmlChar *ns_uri;
134
136
 
135
- TypedData_Get_Struct(
136
- rb_context,
137
- xmlXPathContext,
138
- &xml_xpath_context_type,
139
- c_context
140
- );
137
+ TypedData_Get_Struct(rb_context, xmlXPathContext, &_noko_xml_xpath_context_type, c_context);
138
+
139
+ if (NIL_P(uri)) {
140
+ ns_uri = NULL;
141
+ } else {
142
+ ns_uri = (const xmlChar *)StringValueCStr(uri);
143
+ }
144
+
145
+ xmlXPathRegisterNs(c_context, (const xmlChar *)StringValueCStr(prefix), ns_uri);
141
146
 
142
- xmlXPathRegisterNs(c_context,
143
- (const xmlChar *)StringValueCStr(prefix),
144
- (const xmlChar *)StringValueCStr(uri)
145
- );
146
147
  return rb_context;
147
148
  }
148
149
 
@@ -151,29 +152,25 @@ rb_xml_xpath_context_register_ns(VALUE rb_context, VALUE prefix, VALUE uri)
151
152
  * register_variable(name, value) → Nokogiri::XML::XPathContext
152
153
  *
153
154
  * Register the variable +name+ with +value+ for use in future queries.
155
+ * Passing a value of +nil+ will unregister the variable.
154
156
  *
155
157
  * [Returns] +self+
156
158
  */
157
159
  static VALUE
158
- rb_xml_xpath_context_register_variable(VALUE rb_context, VALUE name, VALUE value)
160
+ noko_xml_xpath_context_register_variable(VALUE rb_context, VALUE name, VALUE value)
159
161
  {
160
162
  xmlXPathContextPtr c_context;
161
163
  xmlXPathObjectPtr xmlValue;
162
164
 
163
- TypedData_Get_Struct(
164
- rb_context,
165
- xmlXPathContext,
166
- &xml_xpath_context_type,
167
- c_context
168
- );
165
+ TypedData_Get_Struct(rb_context, xmlXPathContext, &_noko_xml_xpath_context_type, c_context);
169
166
 
170
- xmlValue = xmlXPathNewCString(StringValueCStr(value));
167
+ if (NIL_P(value)) {
168
+ xmlValue = NULL;
169
+ } else {
170
+ xmlValue = xmlXPathNewCString(StringValueCStr(value));
171
+ }
171
172
 
172
- xmlXPathRegisterVariable(
173
- c_context,
174
- (const xmlChar *)StringValueCStr(name),
175
- xmlValue
176
- );
173
+ xmlXPathRegisterVariable(c_context, (const xmlChar *)StringValueCStr(name), xmlValue);
177
174
 
178
175
  return rb_context;
179
176
  }
@@ -184,7 +181,7 @@ rb_xml_xpath_context_register_variable(VALUE rb_context, VALUE name, VALUE value
184
181
  * returns Qundef if no conversion was possible.
185
182
  */
186
183
  static VALUE
187
- xpath2ruby(xmlXPathObjectPtr c_xpath_object, xmlXPathContextPtr c_context)
184
+ _noko_xml_xpath_context__xpath2ruby(xmlXPathObjectPtr c_xpath_object, xmlXPathContextPtr c_context)
188
185
  {
189
186
  VALUE rb_retval;
190
187
 
@@ -238,7 +235,7 @@ Nokogiri_marshal_xpath_funcall_and_return_values(
238
235
 
239
236
  for (int j = argc - 1 ; j >= 0 ; --j) {
240
237
  c_xpath_object = valuePop(ctxt);
241
- argv[j] = xpath2ruby(c_xpath_object, ctxt->context);
238
+ argv[j] = _noko_xml_xpath_context__xpath2ruby(c_xpath_object, ctxt->context);
242
239
  if (argv[j] == Qundef) {
243
240
  argv[j] = NOKOGIRI_STR_NEW2(xmlXPathCastToString(c_xpath_object));
244
241
  }
@@ -294,7 +291,7 @@ Nokogiri_marshal_xpath_funcall_and_return_values(
294
291
  }
295
292
 
296
293
  static void
297
- method_caller(xmlXPathParserContextPtr ctxt, int argc)
294
+ _noko_xml_xpath_context__handler_invoker(xmlXPathParserContextPtr ctxt, int argc)
298
295
  {
299
296
  VALUE rb_xpath_handler = Qnil;
300
297
  const char *method_name = NULL ;
@@ -316,15 +313,15 @@ method_caller(xmlXPathParserContextPtr ctxt, int argc)
316
313
  }
317
314
 
318
315
  static xmlXPathFunction
319
- handler_lookup(void *data, const xmlChar *c_name, const xmlChar *c_ns_uri)
316
+ _noko_xml_xpath_context_handler_lookup(void *data, const xmlChar *c_name, const xmlChar *c_ns_uri)
320
317
  {
321
318
  VALUE rb_handler = (VALUE)data;
322
319
  if (rb_respond_to(rb_handler, rb_intern((const char *)c_name))) {
323
320
  if (c_ns_uri == NULL) {
324
321
  NOKO_WARN_DEPRECATION("A custom XPath or CSS handler function named '%s' is being invoked without a namespace. Please update your query to reference this function as 'nokogiri:%s'. Invoking custom handler functions without a namespace is deprecated and will become an error in Nokogiri v1.17.0.",
325
- c_name, c_name); // deprecated in v1.15.0, remove in v1.17.0
322
+ c_name, c_name); // TODO deprecated in v1.15.0, remove in v1.19.0
326
323
  }
327
- return method_caller;
324
+ return _noko_xml_xpath_context__handler_invoker;
328
325
  }
329
326
 
330
327
  return NULL;
@@ -332,7 +329,7 @@ handler_lookup(void *data, const xmlChar *c_name, const xmlChar *c_ns_uri)
332
329
 
333
330
  PRINTFLIKE_DECL(2, 3)
334
331
  static void
335
- generic_exception_pusher(void *data, const char *msg, ...)
332
+ _noko_xml_xpath_context__generic_exception_pusher(void *data, const char *msg, ...)
336
333
  {
337
334
  VALUE rb_errors = (VALUE)data;
338
335
  VALUE rb_message;
@@ -343,7 +340,7 @@ generic_exception_pusher(void *data, const char *msg, ...)
343
340
  #ifdef TRUFFLERUBY_NOKOGIRI_SYSTEM_LIBRARIES
344
341
  /* It is not currently possible to pass var args from native
345
342
  functions to sulong, so we work around the issue here. */
346
- rb_message = rb_sprintf("generic_exception_pusher: %s", msg);
343
+ rb_message = rb_sprintf("_noko_xml_xpath_context__generic_exception_pusher: %s", msg);
347
344
  #else
348
345
  va_list args;
349
346
  va_start(args, msg);
@@ -365,58 +362,55 @@ generic_exception_pusher(void *data, const char *msg, ...)
365
362
  * a +Float+, or a boolean.
366
363
  */
367
364
  static VALUE
368
- rb_xml_xpath_context_evaluate(int argc, VALUE *argv, VALUE rb_context)
365
+ noko_xml_xpath_context_evaluate(int argc, VALUE *argv, VALUE rb_context)
369
366
  {
370
- VALUE search_path, xpath_handler;
371
- VALUE retval = Qnil;
372
367
  xmlXPathContextPtr c_context;
373
- xmlXPathObjectPtr xpath;
374
- xmlChar *query;
375
- VALUE errors = rb_ary_new();
376
-
377
- TypedData_Get_Struct(
378
- rb_context,
379
- xmlXPathContext,
380
- &xml_xpath_context_type,
381
- c_context
382
- );
368
+ VALUE rb_expression = Qnil;
369
+ VALUE rb_function_lookup_handler = Qnil;
370
+ xmlChar *c_expression_str = NULL;
371
+ VALUE rb_errors = rb_ary_new();
372
+ xmlXPathObjectPtr c_xpath_object;
373
+ VALUE rb_xpath_object = Qnil;
383
374
 
384
- if (rb_scan_args(argc, argv, "11", &search_path, &xpath_handler) == 1) {
385
- xpath_handler = Qnil;
386
- }
375
+ TypedData_Get_Struct(rb_context, xmlXPathContext, &_noko_xml_xpath_context_type, c_context);
376
+
377
+ rb_scan_args(argc, argv, "11", &rb_expression, &rb_function_lookup_handler);
387
378
 
388
- query = (xmlChar *)StringValueCStr(search_path);
379
+ c_expression_str = (xmlChar *)StringValueCStr(rb_expression);
389
380
 
390
- if (Qnil != xpath_handler) {
381
+ if (Qnil != rb_function_lookup_handler) {
391
382
  /* FIXME: not sure if this is the correct place to shove private data. */
392
- c_context->userData = (void *)xpath_handler;
383
+ c_context->userData = (void *)rb_function_lookup_handler;
393
384
  xmlXPathRegisterFuncLookup(
394
385
  c_context,
395
- handler_lookup,
396
- (void *)xpath_handler
386
+ _noko_xml_xpath_context_handler_lookup,
387
+ (void *)rb_function_lookup_handler
397
388
  );
398
389
  }
399
390
 
400
- xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
401
- xmlSetGenericErrorFunc((void *)errors, generic_exception_pusher);
391
+ /* TODO: use xmlXPathSetErrorHandler (as of 2.13.0) */
392
+ xmlSetStructuredErrorFunc((void *)rb_errors, noko__error_array_pusher);
393
+ xmlSetGenericErrorFunc((void *)rb_errors, _noko_xml_xpath_context__generic_exception_pusher);
402
394
 
403
- xpath = xmlXPathEvalExpression(query, c_context);
395
+ c_xpath_object = xmlXPathEvalExpression(c_expression_str, c_context);
404
396
 
405
397
  xmlSetStructuredErrorFunc(NULL, NULL);
406
398
  xmlSetGenericErrorFunc(NULL, NULL);
407
399
 
408
- if (xpath == NULL) {
409
- rb_exc_raise(rb_ary_entry(errors, 0));
400
+ xmlXPathRegisterFuncLookup(c_context, NULL, NULL);
401
+
402
+ if (c_xpath_object == NULL) {
403
+ rb_exc_raise(rb_ary_entry(rb_errors, 0));
410
404
  }
411
405
 
412
- retval = xpath2ruby(xpath, c_context);
413
- if (retval == Qundef) {
414
- retval = noko_xml_node_set_wrap(NULL, DOC_RUBY_OBJECT(c_context->doc));
406
+ rb_xpath_object = _noko_xml_xpath_context__xpath2ruby(c_xpath_object, c_context);
407
+ if (rb_xpath_object == Qundef) {
408
+ rb_xpath_object = noko_xml_node_set_wrap(NULL, DOC_RUBY_OBJECT(c_context->doc));
415
409
  }
416
410
 
417
- xmlXPathFreeNodeSetList(xpath);
411
+ xmlXPathFreeNodeSetList(c_xpath_object);
418
412
 
419
- return retval;
413
+ return rb_xpath_object;
420
414
  }
421
415
 
422
416
  /*
@@ -426,45 +420,53 @@ rb_xml_xpath_context_evaluate(int argc, VALUE *argv, VALUE rb_context)
426
420
  * Create a new XPathContext with +node+ as the context node.
427
421
  */
428
422
  static VALUE
429
- rb_xml_xpath_context_new(VALUE klass, VALUE rb_node)
423
+ noko_xml_xpath_context_new(VALUE klass, VALUE rb_node)
430
424
  {
431
- xmlNodePtr node;
425
+ xmlNodePtr c_node;
432
426
  xmlXPathContextPtr c_context;
433
427
  VALUE rb_context;
434
428
 
435
- Noko_Node_Get_Struct(rb_node, xmlNode, node);
429
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
436
430
 
437
431
  #if LIBXML_VERSION < 21000
438
- /* deprecated in 40483d0 */
439
- xmlXPathInit();
432
+ xmlXPathInit(); /* deprecated in 40483d0 */
440
433
  #endif
441
434
 
442
- c_context = xmlXPathNewContext(node->doc);
443
- c_context->node = node;
435
+ c_context = xmlXPathNewContext(c_node->doc);
436
+ c_context->node = c_node;
444
437
 
445
438
  xmlXPathRegisterNs(c_context, NOKOGIRI_PREFIX, NOKOGIRI_URI);
446
439
  xmlXPathRegisterNs(c_context, NOKOGIRI_BUILTIN_PREFIX, NOKOGIRI_BUILTIN_URI);
447
- xmlXPathRegisterFuncNS(
448
- c_context,
449
- (const xmlChar *)"css-class",
450
- NOKOGIRI_BUILTIN_URI,
451
- xpath_builtin_css_class
452
- );
453
- xmlXPathRegisterFuncNS(
454
- c_context,
455
- (const xmlChar *)"local-name-is",
456
- NOKOGIRI_BUILTIN_URI,
457
- xpath_builtin_local_name_is
458
- );
459
440
 
460
- rb_context = TypedData_Wrap_Struct(
461
- klass,
462
- &xml_xpath_context_type,
463
- c_context
464
- );
441
+ xmlXPathRegisterFuncNS(c_context,
442
+ (const xmlChar *)"css-class", NOKOGIRI_BUILTIN_URI,
443
+ noko_xml_xpath_context_xpath_func_css_class);
444
+ xmlXPathRegisterFuncNS(c_context,
445
+ (const xmlChar *)"local-name-is", NOKOGIRI_BUILTIN_URI,
446
+ noko_xml_xpath_context_xpath_func_local_name_is);
447
+
448
+ rb_context = TypedData_Wrap_Struct(klass, &_noko_xml_xpath_context_type, c_context);
449
+
465
450
  return rb_context;
466
451
  }
467
452
 
453
+
454
+ /* :nodoc: */
455
+ static VALUE
456
+ noko_xml_xpath_context_set_node(VALUE rb_context, VALUE rb_node)
457
+ {
458
+ xmlNodePtr c_node;
459
+ xmlXPathContextPtr c_context;
460
+
461
+ TypedData_Get_Struct(rb_context, xmlXPathContext, &_noko_xml_xpath_context_type, c_context);
462
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
463
+
464
+ c_context->doc = c_node->doc;
465
+ c_context->node = c_node;
466
+
467
+ return rb_node;
468
+ }
469
+
468
470
  void
469
471
  noko_init_xml_xpath_context(void)
470
472
  {
@@ -475,9 +477,10 @@ noko_init_xml_xpath_context(void)
475
477
 
476
478
  rb_undef_alloc_func(cNokogiriXmlXpathContext);
477
479
 
478
- rb_define_singleton_method(cNokogiriXmlXpathContext, "new", rb_xml_xpath_context_new, 1);
480
+ rb_define_singleton_method(cNokogiriXmlXpathContext, "new", noko_xml_xpath_context_new, 1);
479
481
 
480
- rb_define_method(cNokogiriXmlXpathContext, "evaluate", rb_xml_xpath_context_evaluate, -1);
481
- rb_define_method(cNokogiriXmlXpathContext, "register_variable", rb_xml_xpath_context_register_variable, 2);
482
- rb_define_method(cNokogiriXmlXpathContext, "register_ns", rb_xml_xpath_context_register_ns, 2);
482
+ rb_define_method(cNokogiriXmlXpathContext, "evaluate", noko_xml_xpath_context_evaluate, -1);
483
+ rb_define_method(cNokogiriXmlXpathContext, "register_variable", noko_xml_xpath_context_register_variable, 2);
484
+ rb_define_method(cNokogiriXmlXpathContext, "register_ns", noko_xml_xpath_context_register_ns, 2);
485
+ rb_define_method(cNokogiriXmlXpathContext, "node=", noko_xml_xpath_context_set_node, 1);
483
486
  }
@@ -1,6 +1,6 @@
1
1
  #include <nokogiri.h>
2
2
 
3
- VALUE cNokogiriXsltStylesheet ;
3
+ VALUE cNokogiriXsltStylesheet;
4
4
 
5
5
  static void
6
6
  mark(void *data)
@@ -18,8 +18,8 @@ dealloc(void *data)
18
18
  ruby_xfree(wrapper);
19
19
  }
20
20
 
21
- static const rb_data_type_t xslt_stylesheet_type = {
22
- .wrap_struct_name = "Nokogiri::XSLT::Stylesheet",
21
+ static const rb_data_type_t nokogiri_xslt_stylesheet_tuple_type = {
22
+ .wrap_struct_name = "nokogiriXsltStylesheetTuple",
23
23
  .function = {
24
24
  .dmark = mark,
25
25
  .dfree = dealloc,
@@ -56,7 +56,7 @@ Nokogiri_wrap_xslt_stylesheet(xsltStylesheetPtr ss)
56
56
  self = TypedData_Make_Struct(
57
57
  cNokogiriXsltStylesheet,
58
58
  nokogiriXsltStylesheetTuple,
59
- &xslt_stylesheet_type,
59
+ &nokogiri_xslt_stylesheet_tuple_type,
60
60
  wrapper
61
61
  );
62
62
 
@@ -124,7 +124,7 @@ rb_xslt_stylesheet_serialize(VALUE self, VALUE xmlobj)
124
124
  TypedData_Get_Struct(
125
125
  self,
126
126
  nokogiriXsltStylesheetTuple,
127
- &xslt_stylesheet_type,
127
+ &nokogiri_xslt_stylesheet_tuple_type,
128
128
  wrapper
129
129
  );
130
130
  xsltSaveResultToString(&doc_ptr, &doc_len, xml, wrapper->ss);
@@ -273,7 +273,7 @@ rb_xslt_stylesheet_transform(int argc, VALUE *argv, VALUE self)
273
273
  Check_Type(rb_param, T_ARRAY);
274
274
 
275
275
  c_document = noko_xml_document_unwrap(rb_document);
276
- TypedData_Get_Struct(self, nokogiriXsltStylesheetTuple, &xslt_stylesheet_type, wrapper);
276
+ TypedData_Get_Struct(self, nokogiriXsltStylesheetTuple, &nokogiri_xslt_stylesheet_tuple_type, wrapper);
277
277
 
278
278
  param_len = RARRAY_LEN(rb_param);
279
279
  params = ruby_xcalloc((size_t)param_len + 1, sizeof(char *));
@@ -362,7 +362,7 @@ initFunc(xsltTransformContextPtr ctxt, const xmlChar *uri)
362
362
  TypedData_Get_Struct(
363
363
  (VALUE)ctxt->style->_private,
364
364
  nokogiriXsltStylesheetTuple,
365
- &xslt_stylesheet_type,
365
+ &nokogiri_xslt_stylesheet_tuple_type,
366
366
  wrapper
367
367
  );
368
368
  inst = rb_class_new_instance(0, NULL, obj);
@@ -380,7 +380,7 @@ shutdownFunc(xsltTransformContextPtr ctxt,
380
380
  TypedData_Get_Struct(
381
381
  (VALUE)ctxt->style->_private,
382
382
  nokogiriXsltStylesheetTuple,
383
- &xslt_stylesheet_type,
383
+ &nokogiri_xslt_stylesheet_tuple_type,
384
384
  wrapper
385
385
  );
386
386
 
@@ -44,8 +44,8 @@ const unsigned char _gumbo_ascii_table[0x80] = {
44
44
  };
45
45
 
46
46
  // Table generation code.
47
- // clang -DGUMBO_GEN_TABLE=1 ascii.c && ./a.out && rm a.out
48
- #if GUMBO_GEN_TABLE
47
+ // clang -DGUMBO_GEN_TABLE ascii.c && ./a.out && rm a.out
48
+ #ifdef GUMBO_GEN_TABLE
49
49
  #include <stdio.h>
50
50
 
51
51
  int main() {
@@ -46,33 +46,40 @@ static int PRINTF(2) print_message (
46
46
  args
47
47
  );
48
48
  va_end(args);
49
- #if _MSC_VER && _MSC_VER < 1900
49
+
50
+ #if (defined(_MSC_VER) && (_MSC_VER < 1900)) || defined(_RUBY_MSVCRT)
50
51
  if (bytes_written == -1) {
51
52
  // vsnprintf returns -1 on older MSVC++ if there's not enough capacity,
52
53
  // instead of returning the number of bytes that would've been written had
53
- // there been enough. In this case, we'll double the buffer size and hope
54
- // it fits when we retry (letting it fail and returning 0 if it doesn't),
55
- // since there's no way to smartly resize the buffer.
56
- gumbo_string_buffer_reserve(output->capacity * 2, output);
54
+ // there been enough. In this case, we can call vsnprintf() again but
55
+ // with a count of 0 to get the number of bytes written, not including
56
+ // the null terminator.
57
+ // https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/vsnprintf-vsnprintf-vsnprintf-l-vsnwprintf-vsnwprintf-l?view=msvc-140#behavior-summary
58
+
57
59
  va_start(args, format);
58
- int result = vsnprintf (
59
- output->data + output->length,
60
- remaining_capacity,
60
+ bytes_written = vsnprintf (
61
+ NULL,
62
+ 0,
61
63
  format,
62
64
  args
63
65
  );
64
66
  va_end(args);
65
- return result == -1 ? 0 : result;
66
67
  }
67
- #else
68
+ #endif
69
+
68
70
  // -1 in standard C99 indicates an encoding error. Return 0 and do nothing.
69
71
  if (bytes_written == -1) {
70
72
  return 0;
71
73
  }
72
- #endif
73
74
 
74
75
  if (bytes_written >= remaining_capacity) {
75
- gumbo_string_buffer_reserve(output->capacity + bytes_written, output);
76
+ // At least double the size of the buffer.
77
+ size_t new_capacity = output->capacity * 2;
78
+ if (new_capacity < output->length + bytes_written + 1) {
79
+ // The +1 is for the null terminator.
80
+ new_capacity = output->length + bytes_written + 1;
81
+ }
82
+ gumbo_string_buffer_reserve(new_capacity, output);
76
83
  remaining_capacity = output->capacity - output->length;
77
84
  va_start(args, format);
78
85
  bytes_written = vsnprintf (
@@ -96,8 +103,14 @@ static void print_tag_stack (
96
103
  if (i) {
97
104
  print_message(output, ", ");
98
105
  }
99
- GumboTag tag = (GumboTag)(intptr_t) error->tag_stack.data[i];
100
- print_message(output, "%s", gumbo_normalized_tagname(tag));
106
+ uintptr_t tag = (uintptr_t) error->tag_stack.data[i];
107
+ const char* tag_name;
108
+ if (tag > GUMBO_TAG_UNKNOWN) {
109
+ tag_name = error->tag_stack.data[i];
110
+ } else {
111
+ tag_name = gumbo_normalized_tagname((GumboTag)tag);
112
+ }
113
+ print_message(output, "%s", tag_name);
101
114
  }
102
115
  gumbo_string_buffer_append_codepoint('.', output);
103
116
  }
@@ -326,41 +339,45 @@ static void handle_parser_error (
326
339
  }
327
340
 
328
341
  switch (error->input_type) {
329
- case GUMBO_TOKEN_DOCTYPE:
330
- print_message(output, "This is not a legal doctype");
331
- return;
332
- case GUMBO_TOKEN_COMMENT:
333
- // Should never happen; comments are always legal.
334
- assert(0);
335
- // But just in case...
336
- print_message(output, "Comments aren't legal here");
337
- return;
338
- case GUMBO_TOKEN_CDATA:
339
- case GUMBO_TOKEN_WHITESPACE:
340
- case GUMBO_TOKEN_CHARACTER:
341
- print_message(output, "Character tokens aren't legal here");
342
- return;
343
- case GUMBO_TOKEN_NULL:
344
- print_message(output, "Null bytes are not allowed in HTML5");
345
- return;
346
- case GUMBO_TOKEN_EOF:
347
- if (error->parser_state == GUMBO_INSERTION_MODE_INITIAL) {
348
- print_message(output, "You must provide a doctype");
349
- } else {
350
- print_message(output, "Premature end of file.");
351
- print_tag_stack(error, output);
352
- }
353
- return;
354
- case GUMBO_TOKEN_START_TAG:
355
- print_message(output, "Start tag '%s' isn't allowed here.",
356
- gumbo_normalized_tagname(error->input_tag));
357
- print_tag_stack(error, output);
358
- return;
359
- case GUMBO_TOKEN_END_TAG:
360
- print_message(output, "End tag '%s' isn't allowed here.",
361
- gumbo_normalized_tagname(error->input_tag));
342
+ case GUMBO_TOKEN_DOCTYPE:
343
+ print_message(output, "This is not a legal doctype");
344
+ return;
345
+ case GUMBO_TOKEN_COMMENT:
346
+ // Should never happen; comments are always legal.
347
+ assert(0);
348
+ // But just in case...
349
+ print_message(output, "Comments aren't legal here");
350
+ return;
351
+ case GUMBO_TOKEN_CDATA:
352
+ case GUMBO_TOKEN_WHITESPACE:
353
+ case GUMBO_TOKEN_CHARACTER:
354
+ print_message(output, "Character tokens aren't legal here");
355
+ return;
356
+ case GUMBO_TOKEN_NULL:
357
+ print_message(output, "Null bytes are not allowed in HTML5");
358
+ return;
359
+ case GUMBO_TOKEN_EOF:
360
+ if (error->parser_state == GUMBO_INSERTION_MODE_INITIAL) {
361
+ print_message(output, "You must provide a doctype");
362
+ } else {
363
+ print_message(output, "Premature end of file.");
362
364
  print_tag_stack(error, output);
363
- return;
365
+ }
366
+ return;
367
+ case GUMBO_TOKEN_START_TAG:
368
+ case GUMBO_TOKEN_END_TAG:
369
+ {
370
+ const char* tag_name;
371
+ const char* which = error->input_type == GUMBO_TOKEN_START_TAG ? "Start" : "End";
372
+ if (error->input_name) {
373
+ tag_name = error->input_name;
374
+ } else {
375
+ tag_name = gumbo_normalized_tagname(error->input_tag);
376
+ }
377
+ print_message(output, "%s tag '%s' isn't allowed here.", which, tag_name);
378
+ print_tag_stack(error, output);
379
+ return;
380
+ }
364
381
  }
365
382
  }
366
383
 
@@ -613,6 +630,17 @@ void gumbo_print_caret_diagnostic (
613
630
 
614
631
  void gumbo_error_destroy(GumboError* error) {
615
632
  if (error->type == GUMBO_ERR_PARSER) {
633
+ // Free the tag name.
634
+ if (error->v.parser.input_name) {
635
+ gumbo_free(error->v.parser.input_name);
636
+ }
637
+
638
+ for (unsigned int i = 0; i < error->v.parser.tag_stack.length; ++i) {
639
+ intptr_t tag = (intptr_t) error->v.parser.tag_stack.data[i];
640
+ if (tag > GUMBO_TAG_UNKNOWN) {
641
+ gumbo_free(error->v.parser.tag_stack.data[i]);
642
+ }
643
+ }
616
644
  gumbo_vector_destroy(&error->v.parser.tag_stack);
617
645
  }
618
646
  gumbo_free(error);
@@ -95,12 +95,16 @@ typedef struct GumboInternalParserError {
95
95
  // The HTML tag of the input token. TAG_UNKNOWN if this was not a tag token.
96
96
  GumboTag input_tag;
97
97
 
98
+ // The HTML tag of the input token if it was nonstandard tag token. NULL otherwise.
99
+ char *input_name;
100
+
98
101
  // The insertion mode that the parser was in at the time.
99
102
  GumboInsertionMode parser_state;
100
103
 
101
104
  // The tag stack at the point of the error. Note that this is an GumboVector
102
105
  // of GumboTag's *stored by value* - cast the void* to an GumboTag directly to
103
- // get at the tag.
106
+ // get at the tag. For nonstandard tags, this is a pointer to an owned char *
107
+ // containing the tag name.
104
108
  GumboVector /* GumboTag */ tag_stack;
105
109
  } GumboParserError;
106
110
 
@@ -780,6 +780,15 @@ typedef struct GumboInternalOptions {
780
780
  * Default: `false`.
781
781
  */
782
782
  bool fragment_context_has_form_ancestor;
783
+
784
+ /**
785
+ * Parse `noscript` elements as if scripting was enabled. This causes the
786
+ * contents of the `noscript` element to be parsed as raw text, rather
787
+ * than as HTML elements.
788
+ *
789
+ * Default: `false`.
790
+ */
791
+ bool parse_noscript_content_as_text;
783
792
  } GumboOptions;
784
793
 
785
794
  /** Default options struct; use this with gumbo_parse_with_options. */
@@ -791,7 +800,7 @@ extern const GumboOptions kGumboDefaultOptions;
791
800
  */
792
801
  typedef enum {
793
802
  /**
794
- * Indicates that parsing completed successfuly. The resulting tree
803
+ * Indicates that parsing completed successfully. The resulting tree
795
804
  * will be a complete document.
796
805
  */
797
806
  GUMBO_STATUS_OK,
@@ -841,7 +850,7 @@ typedef struct GumboInternalOutput {
841
850
  GumboVector /* GumboError */ errors;
842
851
 
843
852
  /**
844
- * True if the parser encounted an error.
853
+ * True if the parser encountered an error.
845
854
  *
846
855
  * This can be true and `errors` an empty `GumboVector` if the `max_errors`
847
856
  * option was set to 0.