nokogiri 1.16.7 → 1.18.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (95) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +14 -22
  3. data/LICENSE-DEPENDENCIES.md +6 -6
  4. data/README.md +8 -5
  5. data/dependencies.yml +6 -6
  6. data/ext/nokogiri/extconf.rb +188 -142
  7. data/ext/nokogiri/gumbo.c +69 -53
  8. data/ext/nokogiri/html4_document.c +10 -4
  9. data/ext/nokogiri/html4_element_description.c +18 -18
  10. data/ext/nokogiri/html4_sax_parser.c +40 -0
  11. data/ext/nokogiri/html4_sax_parser_context.c +48 -58
  12. data/ext/nokogiri/html4_sax_push_parser.c +25 -24
  13. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  14. data/ext/nokogiri/nokogiri.c +9 -2
  15. data/ext/nokogiri/nokogiri.h +18 -33
  16. data/ext/nokogiri/xml_attr.c +1 -1
  17. data/ext/nokogiri/xml_cdata.c +2 -10
  18. data/ext/nokogiri/xml_comment.c +3 -8
  19. data/ext/nokogiri/xml_document.c +163 -156
  20. data/ext/nokogiri/xml_document_fragment.c +10 -25
  21. data/ext/nokogiri/xml_dtd.c +1 -1
  22. data/ext/nokogiri/xml_element_content.c +9 -9
  23. data/ext/nokogiri/xml_encoding_handler.c +4 -4
  24. data/ext/nokogiri/xml_namespace.c +6 -6
  25. data/ext/nokogiri/xml_node.c +141 -104
  26. data/ext/nokogiri/xml_node_set.c +46 -44
  27. data/ext/nokogiri/xml_reader.c +54 -58
  28. data/ext/nokogiri/xml_relax_ng.c +35 -56
  29. data/ext/nokogiri/xml_sax_parser.c +156 -88
  30. data/ext/nokogiri/xml_sax_parser_context.c +219 -131
  31. data/ext/nokogiri/xml_sax_push_parser.c +68 -49
  32. data/ext/nokogiri/xml_schema.c +50 -85
  33. data/ext/nokogiri/xml_syntax_error.c +19 -11
  34. data/ext/nokogiri/xml_text.c +2 -4
  35. data/ext/nokogiri/xml_xpath_context.c +103 -100
  36. data/ext/nokogiri/xslt_stylesheet.c +8 -8
  37. data/gumbo-parser/src/ascii.c +2 -2
  38. data/gumbo-parser/src/error.c +76 -48
  39. data/gumbo-parser/src/error.h +5 -1
  40. data/gumbo-parser/src/nokogiri_gumbo.h +11 -2
  41. data/gumbo-parser/src/parser.c +63 -25
  42. data/gumbo-parser/src/tokenizer.c +6 -6
  43. data/lib/nokogiri/class_resolver.rb +1 -1
  44. data/lib/nokogiri/css/node.rb +6 -2
  45. data/lib/nokogiri/css/parser.rb +6 -4
  46. data/lib/nokogiri/css/parser.y +2 -2
  47. data/lib/nokogiri/css/parser_extras.rb +6 -66
  48. data/lib/nokogiri/css/selector_cache.rb +38 -0
  49. data/lib/nokogiri/css/tokenizer.rb +4 -4
  50. data/lib/nokogiri/css/tokenizer.rex +9 -8
  51. data/lib/nokogiri/css/xpath_visitor.rb +42 -6
  52. data/lib/nokogiri/css.rb +86 -20
  53. data/lib/nokogiri/decorators/slop.rb +3 -5
  54. data/lib/nokogiri/encoding_handler.rb +2 -2
  55. data/lib/nokogiri/html4/document.rb +44 -23
  56. data/lib/nokogiri/html4/document_fragment.rb +124 -12
  57. data/lib/nokogiri/html4/encoding_reader.rb +1 -1
  58. data/lib/nokogiri/html4/sax/parser.rb +23 -38
  59. data/lib/nokogiri/html4/sax/parser_context.rb +4 -9
  60. data/lib/nokogiri/html4.rb +9 -14
  61. data/lib/nokogiri/html5/builder.rb +40 -0
  62. data/lib/nokogiri/html5/document.rb +61 -30
  63. data/lib/nokogiri/html5/document_fragment.rb +130 -20
  64. data/lib/nokogiri/html5/node.rb +4 -4
  65. data/lib/nokogiri/html5.rb +114 -72
  66. data/lib/nokogiri/version/constant.rb +1 -1
  67. data/lib/nokogiri/xml/builder.rb +8 -1
  68. data/lib/nokogiri/xml/document.rb +70 -26
  69. data/lib/nokogiri/xml/document_fragment.rb +84 -13
  70. data/lib/nokogiri/xml/node.rb +82 -11
  71. data/lib/nokogiri/xml/node_set.rb +9 -7
  72. data/lib/nokogiri/xml/parse_options.rb +1 -1
  73. data/lib/nokogiri/xml/pp/node.rb +6 -1
  74. data/lib/nokogiri/xml/reader.rb +46 -13
  75. data/lib/nokogiri/xml/relax_ng.rb +57 -20
  76. data/lib/nokogiri/xml/sax/document.rb +174 -83
  77. data/lib/nokogiri/xml/sax/parser.rb +115 -41
  78. data/lib/nokogiri/xml/sax/parser_context.rb +116 -8
  79. data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
  80. data/lib/nokogiri/xml/sax.rb +48 -0
  81. data/lib/nokogiri/xml/schema.rb +112 -45
  82. data/lib/nokogiri/xml/searchable.rb +38 -42
  83. data/lib/nokogiri/xml/syntax_error.rb +22 -0
  84. data/lib/nokogiri/xml/xpath_context.rb +14 -3
  85. data/lib/nokogiri/xml.rb +13 -24
  86. data/lib/nokogiri/xslt.rb +3 -9
  87. data/lib/xsd/xmlparser/nokogiri.rb +3 -4
  88. data/patches/libxml2/0019-xpath-Use-separate-static-hash-table-for-standard-fu.patch +244 -0
  89. data/ports/archives/libxml2-2.13.5.tar.xz +0 -0
  90. data/ports/archives/libxslt-1.1.42.tar.xz +0 -0
  91. metadata +13 -14
  92. data/ext/nokogiri/libxml2_backwards_compat.c +0 -121
  93. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +0 -25
  94. data/ports/archives/libxml2-2.12.9.tar.xz +0 -0
  95. data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
@@ -12,23 +12,23 @@ static const xmlChar *NOKOGIRI_BUILTIN_PREFIX = (const xmlChar *)"nokogiri-built
12
12
  static const xmlChar *NOKOGIRI_BUILTIN_URI = (const xmlChar *)"https://www.nokogiri.org/default_ns/ruby/builtins";
13
13
 
14
14
  static void
15
- xml_xpath_context_deallocate(void *data)
15
+ _noko_xml_xpath_context_dfree(void *data)
16
16
  {
17
17
  xmlXPathContextPtr c_context = data;
18
18
  xmlXPathFreeContext(c_context);
19
19
  }
20
20
 
21
- static const rb_data_type_t xml_xpath_context_type = {
22
- .wrap_struct_name = "Nokogiri::XML::XPathContext",
21
+ static const rb_data_type_t _noko_xml_xpath_context_type = {
22
+ .wrap_struct_name = "xmlXPathContext",
23
23
  .function = {
24
- .dfree = xml_xpath_context_deallocate,
24
+ .dfree = _noko_xml_xpath_context_dfree,
25
25
  },
26
26
  .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED,
27
27
  };
28
28
 
29
29
  /* find a CSS class in an HTML element's `class` attribute */
30
30
  static const xmlChar *
31
- builtin_css_class(const xmlChar *str, const xmlChar *val)
31
+ _noko_xml_xpath_context__css_class(const xmlChar *str, const xmlChar *val)
32
32
  {
33
33
  int val_len;
34
34
 
@@ -62,9 +62,9 @@ builtin_css_class(const xmlChar *str, const xmlChar *val)
62
62
  return (NULL);
63
63
  }
64
64
 
65
- /* xmlXPathFunction to wrap builtin_css_class() */
65
+ /* xmlXPathFunction to wrap _noko_xml_xpath_context__css_class() */
66
66
  static void
67
- xpath_builtin_css_class(xmlXPathParserContextPtr ctxt, int nargs)
67
+ noko_xml_xpath_context_xpath_func_css_class(xmlXPathParserContextPtr ctxt, int nargs)
68
68
  {
69
69
  xmlXPathObjectPtr hay, needle;
70
70
 
@@ -85,7 +85,7 @@ xpath_builtin_css_class(xmlXPathParserContextPtr ctxt, int nargs)
85
85
  XP_ERROR(XPATH_INVALID_TYPE);
86
86
  }
87
87
 
88
- if (builtin_css_class(hay->stringval, needle->stringval)) {
88
+ if (_noko_xml_xpath_context__css_class(hay->stringval, needle->stringval)) {
89
89
  valuePush(ctxt, xmlXPathNewBoolean(1));
90
90
  } else {
91
91
  valuePush(ctxt, xmlXPathNewBoolean(0));
@@ -99,7 +99,7 @@ xpath_builtin_css_class(xmlXPathParserContextPtr ctxt, int nargs)
99
99
  /* xmlXPathFunction to select nodes whose local name matches, for HTML5 CSS queries that should
100
100
  * ignore namespaces */
101
101
  static void
102
- xpath_builtin_local_name_is(xmlXPathParserContextPtr ctxt, int nargs)
102
+ noko_xml_xpath_context_xpath_func_local_name_is(xmlXPathParserContextPtr ctxt, int nargs)
103
103
  {
104
104
  xmlXPathObjectPtr element_name;
105
105
 
@@ -124,25 +124,26 @@ xpath_builtin_local_name_is(xmlXPathParserContextPtr ctxt, int nargs)
124
124
  * register_ns(prefix, uri) → Nokogiri::XML::XPathContext
125
125
  *
126
126
  * Register the namespace with +prefix+ and +uri+ for use in future queries.
127
+ * Passing a uri of +nil+ will unregister the namespace.
127
128
  *
128
129
  * [Returns] +self+
129
130
  */
130
131
  static VALUE
131
- rb_xml_xpath_context_register_ns(VALUE rb_context, VALUE prefix, VALUE uri)
132
+ noko_xml_xpath_context_register_ns(VALUE rb_context, VALUE prefix, VALUE uri)
132
133
  {
133
134
  xmlXPathContextPtr c_context;
135
+ const xmlChar *ns_uri;
134
136
 
135
- TypedData_Get_Struct(
136
- rb_context,
137
- xmlXPathContext,
138
- &xml_xpath_context_type,
139
- c_context
140
- );
137
+ TypedData_Get_Struct(rb_context, xmlXPathContext, &_noko_xml_xpath_context_type, c_context);
138
+
139
+ if (NIL_P(uri)) {
140
+ ns_uri = NULL;
141
+ } else {
142
+ ns_uri = (const xmlChar *)StringValueCStr(uri);
143
+ }
144
+
145
+ xmlXPathRegisterNs(c_context, (const xmlChar *)StringValueCStr(prefix), ns_uri);
141
146
 
142
- xmlXPathRegisterNs(c_context,
143
- (const xmlChar *)StringValueCStr(prefix),
144
- (const xmlChar *)StringValueCStr(uri)
145
- );
146
147
  return rb_context;
147
148
  }
148
149
 
@@ -151,29 +152,25 @@ rb_xml_xpath_context_register_ns(VALUE rb_context, VALUE prefix, VALUE uri)
151
152
  * register_variable(name, value) → Nokogiri::XML::XPathContext
152
153
  *
153
154
  * Register the variable +name+ with +value+ for use in future queries.
155
+ * Passing a value of +nil+ will unregister the variable.
154
156
  *
155
157
  * [Returns] +self+
156
158
  */
157
159
  static VALUE
158
- rb_xml_xpath_context_register_variable(VALUE rb_context, VALUE name, VALUE value)
160
+ noko_xml_xpath_context_register_variable(VALUE rb_context, VALUE name, VALUE value)
159
161
  {
160
162
  xmlXPathContextPtr c_context;
161
163
  xmlXPathObjectPtr xmlValue;
162
164
 
163
- TypedData_Get_Struct(
164
- rb_context,
165
- xmlXPathContext,
166
- &xml_xpath_context_type,
167
- c_context
168
- );
165
+ TypedData_Get_Struct(rb_context, xmlXPathContext, &_noko_xml_xpath_context_type, c_context);
169
166
 
170
- xmlValue = xmlXPathNewCString(StringValueCStr(value));
167
+ if (NIL_P(value)) {
168
+ xmlValue = NULL;
169
+ } else {
170
+ xmlValue = xmlXPathNewCString(StringValueCStr(value));
171
+ }
171
172
 
172
- xmlXPathRegisterVariable(
173
- c_context,
174
- (const xmlChar *)StringValueCStr(name),
175
- xmlValue
176
- );
173
+ xmlXPathRegisterVariable(c_context, (const xmlChar *)StringValueCStr(name), xmlValue);
177
174
 
178
175
  return rb_context;
179
176
  }
@@ -184,7 +181,7 @@ rb_xml_xpath_context_register_variable(VALUE rb_context, VALUE name, VALUE value
184
181
  * returns Qundef if no conversion was possible.
185
182
  */
186
183
  static VALUE
187
- xpath2ruby(xmlXPathObjectPtr c_xpath_object, xmlXPathContextPtr c_context)
184
+ _noko_xml_xpath_context__xpath2ruby(xmlXPathObjectPtr c_xpath_object, xmlXPathContextPtr c_context)
188
185
  {
189
186
  VALUE rb_retval;
190
187
 
@@ -238,7 +235,7 @@ Nokogiri_marshal_xpath_funcall_and_return_values(
238
235
 
239
236
  for (int j = argc - 1 ; j >= 0 ; --j) {
240
237
  c_xpath_object = valuePop(ctxt);
241
- argv[j] = xpath2ruby(c_xpath_object, ctxt->context);
238
+ argv[j] = _noko_xml_xpath_context__xpath2ruby(c_xpath_object, ctxt->context);
242
239
  if (argv[j] == Qundef) {
243
240
  argv[j] = NOKOGIRI_STR_NEW2(xmlXPathCastToString(c_xpath_object));
244
241
  }
@@ -294,7 +291,7 @@ Nokogiri_marshal_xpath_funcall_and_return_values(
294
291
  }
295
292
 
296
293
  static void
297
- method_caller(xmlXPathParserContextPtr ctxt, int argc)
294
+ _noko_xml_xpath_context__handler_invoker(xmlXPathParserContextPtr ctxt, int argc)
298
295
  {
299
296
  VALUE rb_xpath_handler = Qnil;
300
297
  const char *method_name = NULL ;
@@ -316,15 +313,15 @@ method_caller(xmlXPathParserContextPtr ctxt, int argc)
316
313
  }
317
314
 
318
315
  static xmlXPathFunction
319
- handler_lookup(void *data, const xmlChar *c_name, const xmlChar *c_ns_uri)
316
+ _noko_xml_xpath_context_handler_lookup(void *data, const xmlChar *c_name, const xmlChar *c_ns_uri)
320
317
  {
321
318
  VALUE rb_handler = (VALUE)data;
322
319
  if (rb_respond_to(rb_handler, rb_intern((const char *)c_name))) {
323
320
  if (c_ns_uri == NULL) {
324
321
  NOKO_WARN_DEPRECATION("A custom XPath or CSS handler function named '%s' is being invoked without a namespace. Please update your query to reference this function as 'nokogiri:%s'. Invoking custom handler functions without a namespace is deprecated and will become an error in Nokogiri v1.17.0.",
325
- c_name, c_name); // deprecated in v1.15.0, remove in v1.17.0
322
+ c_name, c_name); // TODO deprecated in v1.15.0, remove in v1.19.0
326
323
  }
327
- return method_caller;
324
+ return _noko_xml_xpath_context__handler_invoker;
328
325
  }
329
326
 
330
327
  return NULL;
@@ -332,7 +329,7 @@ handler_lookup(void *data, const xmlChar *c_name, const xmlChar *c_ns_uri)
332
329
 
333
330
  PRINTFLIKE_DECL(2, 3)
334
331
  static void
335
- generic_exception_pusher(void *data, const char *msg, ...)
332
+ _noko_xml_xpath_context__generic_exception_pusher(void *data, const char *msg, ...)
336
333
  {
337
334
  VALUE rb_errors = (VALUE)data;
338
335
  VALUE rb_message;
@@ -343,7 +340,7 @@ generic_exception_pusher(void *data, const char *msg, ...)
343
340
  #ifdef TRUFFLERUBY_NOKOGIRI_SYSTEM_LIBRARIES
344
341
  /* It is not currently possible to pass var args from native
345
342
  functions to sulong, so we work around the issue here. */
346
- rb_message = rb_sprintf("generic_exception_pusher: %s", msg);
343
+ rb_message = rb_sprintf("_noko_xml_xpath_context__generic_exception_pusher: %s", msg);
347
344
  #else
348
345
  va_list args;
349
346
  va_start(args, msg);
@@ -365,58 +362,55 @@ generic_exception_pusher(void *data, const char *msg, ...)
365
362
  * a +Float+, or a boolean.
366
363
  */
367
364
  static VALUE
368
- rb_xml_xpath_context_evaluate(int argc, VALUE *argv, VALUE rb_context)
365
+ noko_xml_xpath_context_evaluate(int argc, VALUE *argv, VALUE rb_context)
369
366
  {
370
- VALUE search_path, xpath_handler;
371
- VALUE retval = Qnil;
372
367
  xmlXPathContextPtr c_context;
373
- xmlXPathObjectPtr xpath;
374
- xmlChar *query;
375
- VALUE errors = rb_ary_new();
376
-
377
- TypedData_Get_Struct(
378
- rb_context,
379
- xmlXPathContext,
380
- &xml_xpath_context_type,
381
- c_context
382
- );
368
+ VALUE rb_expression = Qnil;
369
+ VALUE rb_function_lookup_handler = Qnil;
370
+ xmlChar *c_expression_str = NULL;
371
+ VALUE rb_errors = rb_ary_new();
372
+ xmlXPathObjectPtr c_xpath_object;
373
+ VALUE rb_xpath_object = Qnil;
383
374
 
384
- if (rb_scan_args(argc, argv, "11", &search_path, &xpath_handler) == 1) {
385
- xpath_handler = Qnil;
386
- }
375
+ TypedData_Get_Struct(rb_context, xmlXPathContext, &_noko_xml_xpath_context_type, c_context);
376
+
377
+ rb_scan_args(argc, argv, "11", &rb_expression, &rb_function_lookup_handler);
387
378
 
388
- query = (xmlChar *)StringValueCStr(search_path);
379
+ c_expression_str = (xmlChar *)StringValueCStr(rb_expression);
389
380
 
390
- if (Qnil != xpath_handler) {
381
+ if (Qnil != rb_function_lookup_handler) {
391
382
  /* FIXME: not sure if this is the correct place to shove private data. */
392
- c_context->userData = (void *)xpath_handler;
383
+ c_context->userData = (void *)rb_function_lookup_handler;
393
384
  xmlXPathRegisterFuncLookup(
394
385
  c_context,
395
- handler_lookup,
396
- (void *)xpath_handler
386
+ _noko_xml_xpath_context_handler_lookup,
387
+ (void *)rb_function_lookup_handler
397
388
  );
398
389
  }
399
390
 
400
- xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);
401
- xmlSetGenericErrorFunc((void *)errors, generic_exception_pusher);
391
+ /* TODO: use xmlXPathSetErrorHandler (as of 2.13.0) */
392
+ xmlSetStructuredErrorFunc((void *)rb_errors, noko__error_array_pusher);
393
+ xmlSetGenericErrorFunc((void *)rb_errors, _noko_xml_xpath_context__generic_exception_pusher);
402
394
 
403
- xpath = xmlXPathEvalExpression(query, c_context);
395
+ c_xpath_object = xmlXPathEvalExpression(c_expression_str, c_context);
404
396
 
405
397
  xmlSetStructuredErrorFunc(NULL, NULL);
406
398
  xmlSetGenericErrorFunc(NULL, NULL);
407
399
 
408
- if (xpath == NULL) {
409
- rb_exc_raise(rb_ary_entry(errors, 0));
400
+ xmlXPathRegisterFuncLookup(c_context, NULL, NULL);
401
+
402
+ if (c_xpath_object == NULL) {
403
+ rb_exc_raise(rb_ary_entry(rb_errors, 0));
410
404
  }
411
405
 
412
- retval = xpath2ruby(xpath, c_context);
413
- if (retval == Qundef) {
414
- retval = noko_xml_node_set_wrap(NULL, DOC_RUBY_OBJECT(c_context->doc));
406
+ rb_xpath_object = _noko_xml_xpath_context__xpath2ruby(c_xpath_object, c_context);
407
+ if (rb_xpath_object == Qundef) {
408
+ rb_xpath_object = noko_xml_node_set_wrap(NULL, DOC_RUBY_OBJECT(c_context->doc));
415
409
  }
416
410
 
417
- xmlXPathFreeNodeSetList(xpath);
411
+ xmlXPathFreeNodeSetList(c_xpath_object);
418
412
 
419
- return retval;
413
+ return rb_xpath_object;
420
414
  }
421
415
 
422
416
  /*
@@ -426,45 +420,53 @@ rb_xml_xpath_context_evaluate(int argc, VALUE *argv, VALUE rb_context)
426
420
  * Create a new XPathContext with +node+ as the context node.
427
421
  */
428
422
  static VALUE
429
- rb_xml_xpath_context_new(VALUE klass, VALUE rb_node)
423
+ noko_xml_xpath_context_new(VALUE klass, VALUE rb_node)
430
424
  {
431
- xmlNodePtr node;
425
+ xmlNodePtr c_node;
432
426
  xmlXPathContextPtr c_context;
433
427
  VALUE rb_context;
434
428
 
435
- Noko_Node_Get_Struct(rb_node, xmlNode, node);
429
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
436
430
 
437
431
  #if LIBXML_VERSION < 21000
438
- /* deprecated in 40483d0 */
439
- xmlXPathInit();
432
+ xmlXPathInit(); /* deprecated in 40483d0 */
440
433
  #endif
441
434
 
442
- c_context = xmlXPathNewContext(node->doc);
443
- c_context->node = node;
435
+ c_context = xmlXPathNewContext(c_node->doc);
436
+ c_context->node = c_node;
444
437
 
445
438
  xmlXPathRegisterNs(c_context, NOKOGIRI_PREFIX, NOKOGIRI_URI);
446
439
  xmlXPathRegisterNs(c_context, NOKOGIRI_BUILTIN_PREFIX, NOKOGIRI_BUILTIN_URI);
447
- xmlXPathRegisterFuncNS(
448
- c_context,
449
- (const xmlChar *)"css-class",
450
- NOKOGIRI_BUILTIN_URI,
451
- xpath_builtin_css_class
452
- );
453
- xmlXPathRegisterFuncNS(
454
- c_context,
455
- (const xmlChar *)"local-name-is",
456
- NOKOGIRI_BUILTIN_URI,
457
- xpath_builtin_local_name_is
458
- );
459
440
 
460
- rb_context = TypedData_Wrap_Struct(
461
- klass,
462
- &xml_xpath_context_type,
463
- c_context
464
- );
441
+ xmlXPathRegisterFuncNS(c_context,
442
+ (const xmlChar *)"css-class", NOKOGIRI_BUILTIN_URI,
443
+ noko_xml_xpath_context_xpath_func_css_class);
444
+ xmlXPathRegisterFuncNS(c_context,
445
+ (const xmlChar *)"local-name-is", NOKOGIRI_BUILTIN_URI,
446
+ noko_xml_xpath_context_xpath_func_local_name_is);
447
+
448
+ rb_context = TypedData_Wrap_Struct(klass, &_noko_xml_xpath_context_type, c_context);
449
+
465
450
  return rb_context;
466
451
  }
467
452
 
453
+
454
+ /* :nodoc: */
455
+ static VALUE
456
+ noko_xml_xpath_context_set_node(VALUE rb_context, VALUE rb_node)
457
+ {
458
+ xmlNodePtr c_node;
459
+ xmlXPathContextPtr c_context;
460
+
461
+ TypedData_Get_Struct(rb_context, xmlXPathContext, &_noko_xml_xpath_context_type, c_context);
462
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
463
+
464
+ c_context->doc = c_node->doc;
465
+ c_context->node = c_node;
466
+
467
+ return rb_node;
468
+ }
469
+
468
470
  void
469
471
  noko_init_xml_xpath_context(void)
470
472
  {
@@ -475,9 +477,10 @@ noko_init_xml_xpath_context(void)
475
477
 
476
478
  rb_undef_alloc_func(cNokogiriXmlXpathContext);
477
479
 
478
- rb_define_singleton_method(cNokogiriXmlXpathContext, "new", rb_xml_xpath_context_new, 1);
480
+ rb_define_singleton_method(cNokogiriXmlXpathContext, "new", noko_xml_xpath_context_new, 1);
479
481
 
480
- rb_define_method(cNokogiriXmlXpathContext, "evaluate", rb_xml_xpath_context_evaluate, -1);
481
- rb_define_method(cNokogiriXmlXpathContext, "register_variable", rb_xml_xpath_context_register_variable, 2);
482
- rb_define_method(cNokogiriXmlXpathContext, "register_ns", rb_xml_xpath_context_register_ns, 2);
482
+ rb_define_method(cNokogiriXmlXpathContext, "evaluate", noko_xml_xpath_context_evaluate, -1);
483
+ rb_define_method(cNokogiriXmlXpathContext, "register_variable", noko_xml_xpath_context_register_variable, 2);
484
+ rb_define_method(cNokogiriXmlXpathContext, "register_ns", noko_xml_xpath_context_register_ns, 2);
485
+ rb_define_method(cNokogiriXmlXpathContext, "node=", noko_xml_xpath_context_set_node, 1);
483
486
  }
@@ -1,6 +1,6 @@
1
1
  #include <nokogiri.h>
2
2
 
3
- VALUE cNokogiriXsltStylesheet ;
3
+ VALUE cNokogiriXsltStylesheet;
4
4
 
5
5
  static void
6
6
  mark(void *data)
@@ -18,8 +18,8 @@ dealloc(void *data)
18
18
  ruby_xfree(wrapper);
19
19
  }
20
20
 
21
- static const rb_data_type_t xslt_stylesheet_type = {
22
- .wrap_struct_name = "Nokogiri::XSLT::Stylesheet",
21
+ static const rb_data_type_t nokogiri_xslt_stylesheet_tuple_type = {
22
+ .wrap_struct_name = "nokogiriXsltStylesheetTuple",
23
23
  .function = {
24
24
  .dmark = mark,
25
25
  .dfree = dealloc,
@@ -56,7 +56,7 @@ Nokogiri_wrap_xslt_stylesheet(xsltStylesheetPtr ss)
56
56
  self = TypedData_Make_Struct(
57
57
  cNokogiriXsltStylesheet,
58
58
  nokogiriXsltStylesheetTuple,
59
- &xslt_stylesheet_type,
59
+ &nokogiri_xslt_stylesheet_tuple_type,
60
60
  wrapper
61
61
  );
62
62
 
@@ -124,7 +124,7 @@ rb_xslt_stylesheet_serialize(VALUE self, VALUE xmlobj)
124
124
  TypedData_Get_Struct(
125
125
  self,
126
126
  nokogiriXsltStylesheetTuple,
127
- &xslt_stylesheet_type,
127
+ &nokogiri_xslt_stylesheet_tuple_type,
128
128
  wrapper
129
129
  );
130
130
  xsltSaveResultToString(&doc_ptr, &doc_len, xml, wrapper->ss);
@@ -273,7 +273,7 @@ rb_xslt_stylesheet_transform(int argc, VALUE *argv, VALUE self)
273
273
  Check_Type(rb_param, T_ARRAY);
274
274
 
275
275
  c_document = noko_xml_document_unwrap(rb_document);
276
- TypedData_Get_Struct(self, nokogiriXsltStylesheetTuple, &xslt_stylesheet_type, wrapper);
276
+ TypedData_Get_Struct(self, nokogiriXsltStylesheetTuple, &nokogiri_xslt_stylesheet_tuple_type, wrapper);
277
277
 
278
278
  param_len = RARRAY_LEN(rb_param);
279
279
  params = ruby_xcalloc((size_t)param_len + 1, sizeof(char *));
@@ -362,7 +362,7 @@ initFunc(xsltTransformContextPtr ctxt, const xmlChar *uri)
362
362
  TypedData_Get_Struct(
363
363
  (VALUE)ctxt->style->_private,
364
364
  nokogiriXsltStylesheetTuple,
365
- &xslt_stylesheet_type,
365
+ &nokogiri_xslt_stylesheet_tuple_type,
366
366
  wrapper
367
367
  );
368
368
  inst = rb_class_new_instance(0, NULL, obj);
@@ -380,7 +380,7 @@ shutdownFunc(xsltTransformContextPtr ctxt,
380
380
  TypedData_Get_Struct(
381
381
  (VALUE)ctxt->style->_private,
382
382
  nokogiriXsltStylesheetTuple,
383
- &xslt_stylesheet_type,
383
+ &nokogiri_xslt_stylesheet_tuple_type,
384
384
  wrapper
385
385
  );
386
386
 
@@ -44,8 +44,8 @@ const unsigned char _gumbo_ascii_table[0x80] = {
44
44
  };
45
45
 
46
46
  // Table generation code.
47
- // clang -DGUMBO_GEN_TABLE=1 ascii.c && ./a.out && rm a.out
48
- #if GUMBO_GEN_TABLE
47
+ // clang -DGUMBO_GEN_TABLE ascii.c && ./a.out && rm a.out
48
+ #ifdef GUMBO_GEN_TABLE
49
49
  #include <stdio.h>
50
50
 
51
51
  int main() {
@@ -46,33 +46,40 @@ static int PRINTF(2) print_message (
46
46
  args
47
47
  );
48
48
  va_end(args);
49
- #if _MSC_VER && _MSC_VER < 1900
49
+
50
+ #if (defined(_MSC_VER) && (_MSC_VER < 1900)) || defined(_RUBY_MSVCRT)
50
51
  if (bytes_written == -1) {
51
52
  // vsnprintf returns -1 on older MSVC++ if there's not enough capacity,
52
53
  // instead of returning the number of bytes that would've been written had
53
- // there been enough. In this case, we'll double the buffer size and hope
54
- // it fits when we retry (letting it fail and returning 0 if it doesn't),
55
- // since there's no way to smartly resize the buffer.
56
- gumbo_string_buffer_reserve(output->capacity * 2, output);
54
+ // there been enough. In this case, we can call vsnprintf() again but
55
+ // with a count of 0 to get the number of bytes written, not including
56
+ // the null terminator.
57
+ // https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/vsnprintf-vsnprintf-vsnprintf-l-vsnwprintf-vsnwprintf-l?view=msvc-140#behavior-summary
58
+
57
59
  va_start(args, format);
58
- int result = vsnprintf (
59
- output->data + output->length,
60
- remaining_capacity,
60
+ bytes_written = vsnprintf (
61
+ NULL,
62
+ 0,
61
63
  format,
62
64
  args
63
65
  );
64
66
  va_end(args);
65
- return result == -1 ? 0 : result;
66
67
  }
67
- #else
68
+ #endif
69
+
68
70
  // -1 in standard C99 indicates an encoding error. Return 0 and do nothing.
69
71
  if (bytes_written == -1) {
70
72
  return 0;
71
73
  }
72
- #endif
73
74
 
74
75
  if (bytes_written >= remaining_capacity) {
75
- gumbo_string_buffer_reserve(output->capacity + bytes_written, output);
76
+ // At least double the size of the buffer.
77
+ size_t new_capacity = output->capacity * 2;
78
+ if (new_capacity < output->length + bytes_written + 1) {
79
+ // The +1 is for the null terminator.
80
+ new_capacity = output->length + bytes_written + 1;
81
+ }
82
+ gumbo_string_buffer_reserve(new_capacity, output);
76
83
  remaining_capacity = output->capacity - output->length;
77
84
  va_start(args, format);
78
85
  bytes_written = vsnprintf (
@@ -96,8 +103,14 @@ static void print_tag_stack (
96
103
  if (i) {
97
104
  print_message(output, ", ");
98
105
  }
99
- GumboTag tag = (GumboTag)(intptr_t) error->tag_stack.data[i];
100
- print_message(output, "%s", gumbo_normalized_tagname(tag));
106
+ uintptr_t tag = (uintptr_t) error->tag_stack.data[i];
107
+ const char* tag_name;
108
+ if (tag > GUMBO_TAG_UNKNOWN) {
109
+ tag_name = error->tag_stack.data[i];
110
+ } else {
111
+ tag_name = gumbo_normalized_tagname((GumboTag)tag);
112
+ }
113
+ print_message(output, "%s", tag_name);
101
114
  }
102
115
  gumbo_string_buffer_append_codepoint('.', output);
103
116
  }
@@ -326,41 +339,45 @@ static void handle_parser_error (
326
339
  }
327
340
 
328
341
  switch (error->input_type) {
329
- case GUMBO_TOKEN_DOCTYPE:
330
- print_message(output, "This is not a legal doctype");
331
- return;
332
- case GUMBO_TOKEN_COMMENT:
333
- // Should never happen; comments are always legal.
334
- assert(0);
335
- // But just in case...
336
- print_message(output, "Comments aren't legal here");
337
- return;
338
- case GUMBO_TOKEN_CDATA:
339
- case GUMBO_TOKEN_WHITESPACE:
340
- case GUMBO_TOKEN_CHARACTER:
341
- print_message(output, "Character tokens aren't legal here");
342
- return;
343
- case GUMBO_TOKEN_NULL:
344
- print_message(output, "Null bytes are not allowed in HTML5");
345
- return;
346
- case GUMBO_TOKEN_EOF:
347
- if (error->parser_state == GUMBO_INSERTION_MODE_INITIAL) {
348
- print_message(output, "You must provide a doctype");
349
- } else {
350
- print_message(output, "Premature end of file.");
351
- print_tag_stack(error, output);
352
- }
353
- return;
354
- case GUMBO_TOKEN_START_TAG:
355
- print_message(output, "Start tag '%s' isn't allowed here.",
356
- gumbo_normalized_tagname(error->input_tag));
357
- print_tag_stack(error, output);
358
- return;
359
- case GUMBO_TOKEN_END_TAG:
360
- print_message(output, "End tag '%s' isn't allowed here.",
361
- gumbo_normalized_tagname(error->input_tag));
342
+ case GUMBO_TOKEN_DOCTYPE:
343
+ print_message(output, "This is not a legal doctype");
344
+ return;
345
+ case GUMBO_TOKEN_COMMENT:
346
+ // Should never happen; comments are always legal.
347
+ assert(0);
348
+ // But just in case...
349
+ print_message(output, "Comments aren't legal here");
350
+ return;
351
+ case GUMBO_TOKEN_CDATA:
352
+ case GUMBO_TOKEN_WHITESPACE:
353
+ case GUMBO_TOKEN_CHARACTER:
354
+ print_message(output, "Character tokens aren't legal here");
355
+ return;
356
+ case GUMBO_TOKEN_NULL:
357
+ print_message(output, "Null bytes are not allowed in HTML5");
358
+ return;
359
+ case GUMBO_TOKEN_EOF:
360
+ if (error->parser_state == GUMBO_INSERTION_MODE_INITIAL) {
361
+ print_message(output, "You must provide a doctype");
362
+ } else {
363
+ print_message(output, "Premature end of file.");
362
364
  print_tag_stack(error, output);
363
- return;
365
+ }
366
+ return;
367
+ case GUMBO_TOKEN_START_TAG:
368
+ case GUMBO_TOKEN_END_TAG:
369
+ {
370
+ const char* tag_name;
371
+ const char* which = error->input_type == GUMBO_TOKEN_START_TAG ? "Start" : "End";
372
+ if (error->input_name) {
373
+ tag_name = error->input_name;
374
+ } else {
375
+ tag_name = gumbo_normalized_tagname(error->input_tag);
376
+ }
377
+ print_message(output, "%s tag '%s' isn't allowed here.", which, tag_name);
378
+ print_tag_stack(error, output);
379
+ return;
380
+ }
364
381
  }
365
382
  }
366
383
 
@@ -613,6 +630,17 @@ void gumbo_print_caret_diagnostic (
613
630
 
614
631
  void gumbo_error_destroy(GumboError* error) {
615
632
  if (error->type == GUMBO_ERR_PARSER) {
633
+ // Free the tag name.
634
+ if (error->v.parser.input_name) {
635
+ gumbo_free(error->v.parser.input_name);
636
+ }
637
+
638
+ for (unsigned int i = 0; i < error->v.parser.tag_stack.length; ++i) {
639
+ intptr_t tag = (intptr_t) error->v.parser.tag_stack.data[i];
640
+ if (tag > GUMBO_TAG_UNKNOWN) {
641
+ gumbo_free(error->v.parser.tag_stack.data[i]);
642
+ }
643
+ }
616
644
  gumbo_vector_destroy(&error->v.parser.tag_stack);
617
645
  }
618
646
  gumbo_free(error);
@@ -95,12 +95,16 @@ typedef struct GumboInternalParserError {
95
95
  // The HTML tag of the input token. TAG_UNKNOWN if this was not a tag token.
96
96
  GumboTag input_tag;
97
97
 
98
+ // The HTML tag of the input token if it was nonstandard tag token. NULL otherwise.
99
+ char *input_name;
100
+
98
101
  // The insertion mode that the parser was in at the time.
99
102
  GumboInsertionMode parser_state;
100
103
 
101
104
  // The tag stack at the point of the error. Note that this is an GumboVector
102
105
  // of GumboTag's *stored by value* - cast the void* to an GumboTag directly to
103
- // get at the tag.
106
+ // get at the tag. For nonstandard tags, this is a pointer to an owned char *
107
+ // containing the tag name.
104
108
  GumboVector /* GumboTag */ tag_stack;
105
109
  } GumboParserError;
106
110
 
@@ -780,6 +780,15 @@ typedef struct GumboInternalOptions {
780
780
  * Default: `false`.
781
781
  */
782
782
  bool fragment_context_has_form_ancestor;
783
+
784
+ /**
785
+ * Parse `noscript` elements as if scripting was enabled. This causes the
786
+ * contents of the `noscript` element to be parsed as raw text, rather
787
+ * than as HTML elements.
788
+ *
789
+ * Default: `false`.
790
+ */
791
+ bool parse_noscript_content_as_text;
783
792
  } GumboOptions;
784
793
 
785
794
  /** Default options struct; use this with gumbo_parse_with_options. */
@@ -791,7 +800,7 @@ extern const GumboOptions kGumboDefaultOptions;
791
800
  */
792
801
  typedef enum {
793
802
  /**
794
- * Indicates that parsing completed successfuly. The resulting tree
803
+ * Indicates that parsing completed successfully. The resulting tree
795
804
  * will be a complete document.
796
805
  */
797
806
  GUMBO_STATUS_OK,
@@ -841,7 +850,7 @@ typedef struct GumboInternalOutput {
841
850
  GumboVector /* GumboError */ errors;
842
851
 
843
852
  /**
844
- * True if the parser encounted an error.
853
+ * True if the parser encountered an error.
845
854
  *
846
855
  * This can be true and `errors` an empty `GumboVector` if the `max_errors`
847
856
  * option was set to 0.