nokogiri 1.14.5 → 1.16.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (84) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +19 -15
  3. data/README.md +4 -1
  4. data/dependencies.yml +9 -8
  5. data/ext/nokogiri/extconf.rb +69 -26
  6. data/ext/nokogiri/html4_document.c +1 -2
  7. data/ext/nokogiri/html4_element_description.c +19 -14
  8. data/ext/nokogiri/html4_sax_parser_context.c +10 -16
  9. data/ext/nokogiri/html4_sax_push_parser.c +3 -3
  10. data/ext/nokogiri/nokogiri.c +46 -24
  11. data/ext/nokogiri/nokogiri.h +23 -5
  12. data/ext/nokogiri/test_global_handlers.c +1 -1
  13. data/ext/nokogiri/xml_attr.c +1 -1
  14. data/ext/nokogiri/xml_cdata.c +30 -17
  15. data/ext/nokogiri/xml_comment.c +1 -1
  16. data/ext/nokogiri/xml_document.c +113 -25
  17. data/ext/nokogiri/xml_document_fragment.c +1 -1
  18. data/ext/nokogiri/xml_dtd.c +1 -1
  19. data/ext/nokogiri/xml_element_content.c +32 -29
  20. data/ext/nokogiri/xml_element_decl.c +5 -5
  21. data/ext/nokogiri/xml_encoding_handler.c +12 -4
  22. data/ext/nokogiri/xml_entity_reference.c +1 -1
  23. data/ext/nokogiri/xml_namespace.c +11 -16
  24. data/ext/nokogiri/xml_node.c +13 -16
  25. data/ext/nokogiri/xml_node_set.c +125 -105
  26. data/ext/nokogiri/xml_processing_instruction.c +1 -1
  27. data/ext/nokogiri/xml_reader.c +61 -74
  28. data/ext/nokogiri/xml_relax_ng.c +66 -79
  29. data/ext/nokogiri/xml_sax_parser.c +24 -5
  30. data/ext/nokogiri/xml_sax_parser_context.c +50 -25
  31. data/ext/nokogiri/xml_sax_push_parser.c +30 -9
  32. data/ext/nokogiri/xml_schema.c +94 -115
  33. data/ext/nokogiri/xml_syntax_error.c +3 -3
  34. data/ext/nokogiri/xml_text.c +26 -13
  35. data/ext/nokogiri/xml_xpath_context.c +153 -83
  36. data/ext/nokogiri/xslt_stylesheet.c +111 -53
  37. data/gumbo-parser/Makefile +18 -0
  38. data/gumbo-parser/src/error.c +8 -4
  39. data/gumbo-parser/src/foreign_attrs.c +13 -14
  40. data/gumbo-parser/src/foreign_attrs.gperf +1 -1
  41. data/gumbo-parser/src/parser.c +21 -5
  42. data/gumbo-parser/src/tokenizer.c +1 -0
  43. data/lib/nokogiri/css/parser_extras.rb +1 -1
  44. data/lib/nokogiri/css/xpath_visitor.rb +3 -23
  45. data/lib/nokogiri/extension.rb +1 -1
  46. data/lib/nokogiri/html4/document.rb +1 -1
  47. data/lib/nokogiri/html4/document_fragment.rb +1 -1
  48. data/lib/nokogiri/html4/element_description_defaults.rb +1821 -353
  49. data/lib/nokogiri/html4/encoding_reader.rb +1 -1
  50. data/lib/nokogiri/html5/document_fragment.rb +1 -1
  51. data/lib/nokogiri/html5/node.rb +5 -0
  52. data/lib/nokogiri/html5.rb +0 -63
  53. data/lib/nokogiri/version/constant.rb +1 -1
  54. data/lib/nokogiri/version/info.rb +6 -5
  55. data/lib/nokogiri/xml/attr.rb +2 -2
  56. data/lib/nokogiri/xml/attribute_decl.rb +4 -2
  57. data/lib/nokogiri/xml/document.rb +4 -5
  58. data/lib/nokogiri/xml/document_fragment.rb +3 -3
  59. data/lib/nokogiri/xml/element_content.rb +10 -2
  60. data/lib/nokogiri/xml/element_decl.rb +4 -2
  61. data/lib/nokogiri/xml/entity_decl.rb +4 -2
  62. data/lib/nokogiri/xml/namespace.rb +1 -2
  63. data/lib/nokogiri/xml/node/save_options.rb +8 -0
  64. data/lib/nokogiri/xml/node.rb +53 -37
  65. data/lib/nokogiri/xml/node_set.rb +3 -3
  66. data/lib/nokogiri/xml/pp/node.rb +23 -12
  67. data/lib/nokogiri/xml/reader.rb +10 -9
  68. data/lib/nokogiri/xml/sax/document.rb +1 -1
  69. data/lib/nokogiri/xml/searchable.rb +21 -13
  70. data/lib/nokogiri/xml/syntax_error.rb +1 -1
  71. data/lib/nokogiri/xml.rb +1 -1
  72. data/lib/nokogiri/xslt/stylesheet.rb +29 -7
  73. data/lib/nokogiri/xslt.rb +74 -4
  74. data/lib/nokogiri.rb +13 -5
  75. data/lib/xsd/xmlparser/nokogiri.rb +1 -1
  76. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  77. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  78. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  79. data/ports/archives/libxml2-2.12.7.tar.xz +0 -0
  80. data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
  81. metadata +14 -12
  82. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +0 -3037
  83. data/ports/archives/libxml2-2.10.4.tar.xz +0 -0
  84. data/ports/archives/libxslt-1.1.37.tar.xz +0 -0
@@ -3,19 +3,30 @@
3
3
  VALUE cNokogiriXsltStylesheet ;
4
4
 
5
5
  static void
6
- mark(nokogiriXsltStylesheetTuple *wrapper)
6
+ mark(void *data)
7
7
  {
8
+ nokogiriXsltStylesheetTuple *wrapper = (nokogiriXsltStylesheetTuple *)data;
8
9
  rb_gc_mark(wrapper->func_instances);
9
10
  }
10
11
 
11
12
  static void
12
- dealloc(nokogiriXsltStylesheetTuple *wrapper)
13
+ dealloc(void *data)
13
14
  {
15
+ nokogiriXsltStylesheetTuple *wrapper = (nokogiriXsltStylesheetTuple *)data;
14
16
  xsltStylesheetPtr doc = wrapper->ss;
15
17
  xsltFreeStylesheet(doc);
16
18
  ruby_xfree(wrapper);
17
19
  }
18
20
 
21
+ static const rb_data_type_t xslt_stylesheet_type = {
22
+ .wrap_struct_name = "Nokogiri::XSLT::Stylesheet",
23
+ .function = {
24
+ .dmark = mark,
25
+ .dfree = dealloc,
26
+ },
27
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY
28
+ };
29
+
19
30
  PRINTFLIKE_DECL(2, 3)
20
31
  static void
21
32
  xslt_generic_error_handler(void *ctx, const char *msg, ...)
@@ -42,8 +53,12 @@ Nokogiri_wrap_xslt_stylesheet(xsltStylesheetPtr ss)
42
53
  VALUE self;
43
54
  nokogiriXsltStylesheetTuple *wrapper;
44
55
 
45
- self = Data_Make_Struct(cNokogiriXsltStylesheet, nokogiriXsltStylesheetTuple,
46
- mark, dealloc, wrapper);
56
+ self = TypedData_Make_Struct(
57
+ cNokogiriXsltStylesheet,
58
+ nokogiriXsltStylesheetTuple,
59
+ &xslt_stylesheet_type,
60
+ wrapper
61
+ );
47
62
 
48
63
  ss->_private = (void *)self;
49
64
  wrapper->ss = ss;
@@ -56,7 +71,12 @@ Nokogiri_wrap_xslt_stylesheet(xsltStylesheetPtr ss)
56
71
  * call-seq:
57
72
  * parse_stylesheet_doc(document)
58
73
  *
59
- * Parse a stylesheet from +document+.
74
+ * Parse an XSLT::Stylesheet from +document+.
75
+ *
76
+ * [Parameters]
77
+ * - +document+ (Nokogiri::XML::Document) the document to be parsed.
78
+ *
79
+ * [Returns] Nokogiri::XSLT::Stylesheet
60
80
  */
61
81
  static VALUE
62
82
  parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
@@ -64,7 +84,8 @@ parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
64
84
  xmlDocPtr xml, xml_cpy;
65
85
  VALUE errstr, exception;
66
86
  xsltStylesheetPtr ss ;
67
- Data_Get_Struct(xmldocobj, xmlDoc, xml);
87
+
88
+ xml = noko_xml_document_unwrap(xmldocobj);
68
89
 
69
90
  errstr = rb_str_new(0, 0);
70
91
  xsltSetGenericErrorFunc((void *)errstr, xslt_generic_error_handler);
@@ -88,10 +109,10 @@ parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
88
109
  * call-seq:
89
110
  * serialize(document)
90
111
  *
91
- * Serialize +document+ to an xml string.
112
+ * Serialize +document+ to an xml string, as specified by the +method+ parameter in the Stylesheet.
92
113
  */
93
114
  static VALUE
94
- serialize(VALUE self, VALUE xmlobj)
115
+ rb_xslt_stylesheet_serialize(VALUE self, VALUE xmlobj)
95
116
  {
96
117
  xmlDocPtr xml ;
97
118
  nokogiriXsltStylesheetTuple *wrapper;
@@ -99,8 +120,13 @@ serialize(VALUE self, VALUE xmlobj)
99
120
  int doc_len ;
100
121
  VALUE rval ;
101
122
 
102
- Data_Get_Struct(xmlobj, xmlDoc, xml);
103
- Data_Get_Struct(self, nokogiriXsltStylesheetTuple, wrapper);
123
+ xml = noko_xml_document_unwrap(xmlobj);
124
+ TypedData_Get_Struct(
125
+ self,
126
+ nokogiriXsltStylesheetTuple,
127
+ &xslt_stylesheet_type,
128
+ wrapper
129
+ );
104
130
  xsltSaveResultToString(&doc_ptr, &doc_len, xml, wrapper->ss);
105
131
  rval = NOKOGIRI_STR_NEW(doc_ptr, doc_len);
106
132
  xmlFree(doc_ptr);
@@ -112,7 +138,7 @@ serialize(VALUE self, VALUE xmlobj)
112
138
  * transform(document)
113
139
  * transform(document, params = {})
114
140
  *
115
- * Apply an XSLT stylesheet to an XML::Document.
141
+ * Transform an XML::Document as defined by an XSLT::Stylesheet.
116
142
  *
117
143
  * [Parameters]
118
144
  * - +document+ (Nokogiri::XML::Document) the document to be transformed.
@@ -221,60 +247,74 @@ serialize(VALUE self, VALUE xmlobj)
221
247
  * See: Nokogiri::XSLT.quote_params
222
248
  */
223
249
  static VALUE
224
- transform(int argc, VALUE *argv, VALUE self)
250
+ rb_xslt_stylesheet_transform(int argc, VALUE *argv, VALUE self)
225
251
  {
226
- VALUE xmldoc, paramobj, errstr, exception ;
227
- xmlDocPtr xml ;
228
- xmlDocPtr result ;
252
+ VALUE rb_document, rb_param, rb_error_str;
253
+ xmlDocPtr c_document ;
254
+ xmlDocPtr c_result_document ;
229
255
  nokogiriXsltStylesheetTuple *wrapper;
230
256
  const char **params ;
231
257
  long param_len, j ;
232
258
  int parse_error_occurred ;
259
+ int defensive_copy_p = 0;
233
260
 
234
- rb_scan_args(argc, argv, "11", &xmldoc, &paramobj);
235
- if (NIL_P(paramobj)) { paramobj = rb_ary_new2(0L) ; }
236
- if (!rb_obj_is_kind_of(xmldoc, cNokogiriXmlDocument)) {
261
+ rb_scan_args(argc, argv, "11", &rb_document, &rb_param);
262
+ if (NIL_P(rb_param)) { rb_param = rb_ary_new2(0L) ; }
263
+ if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlDocument)) {
237
264
  rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::Document");
238
265
  }
239
266
 
240
267
  /* handle hashes as arguments. */
241
- if (T_HASH == TYPE(paramobj)) {
242
- paramobj = rb_funcall(paramobj, rb_intern("to_a"), 0);
243
- paramobj = rb_funcall(paramobj, rb_intern("flatten"), 0);
268
+ if (T_HASH == TYPE(rb_param)) {
269
+ rb_param = rb_funcall(rb_param, rb_intern("to_a"), 0);
270
+ rb_param = rb_funcall(rb_param, rb_intern("flatten"), 0);
244
271
  }
245
272
 
246
- Check_Type(paramobj, T_ARRAY);
273
+ Check_Type(rb_param, T_ARRAY);
247
274
 
248
- Data_Get_Struct(xmldoc, xmlDoc, xml);
249
- Data_Get_Struct(self, nokogiriXsltStylesheetTuple, wrapper);
275
+ c_document = noko_xml_document_unwrap(rb_document);
276
+ TypedData_Get_Struct(self, nokogiriXsltStylesheetTuple, &xslt_stylesheet_type, wrapper);
250
277
 
251
- param_len = RARRAY_LEN(paramobj);
278
+ param_len = RARRAY_LEN(rb_param);
252
279
  params = ruby_xcalloc((size_t)param_len + 1, sizeof(char *));
253
280
  for (j = 0 ; j < param_len ; j++) {
254
- VALUE entry = rb_ary_entry(paramobj, j);
281
+ VALUE entry = rb_ary_entry(rb_param, j);
255
282
  const char *ptr = StringValueCStr(entry);
256
283
  params[j] = ptr;
257
284
  }
258
285
  params[param_len] = 0 ;
259
286
 
260
- errstr = rb_str_new(0, 0);
261
- xsltSetGenericErrorFunc((void *)errstr, xslt_generic_error_handler);
262
- xmlSetGenericErrorFunc((void *)errstr, xslt_generic_error_handler);
287
+ xsltTransformContextPtr c_transform_context = xsltNewTransformContext(wrapper->ss, c_document);
288
+ if (xsltNeedElemSpaceHandling(c_transform_context) &&
289
+ noko_xml_document_has_wrapped_blank_nodes_p(c_document)) {
290
+ // see https://github.com/sparklemotion/nokogiri/issues/2800
291
+ c_document = xmlCopyDoc(c_document, 1);
292
+ defensive_copy_p = 1;
293
+ }
294
+ xsltFreeTransformContext(c_transform_context);
295
+
296
+ rb_error_str = rb_str_new(0, 0);
297
+ xsltSetGenericErrorFunc((void *)rb_error_str, xslt_generic_error_handler);
298
+ xmlSetGenericErrorFunc((void *)rb_error_str, xslt_generic_error_handler);
299
+
300
+ c_result_document = xsltApplyStylesheet(wrapper->ss, c_document, params);
263
301
 
264
- result = xsltApplyStylesheet(wrapper->ss, xml, params);
265
302
  ruby_xfree(params);
303
+ if (defensive_copy_p) {
304
+ xmlFreeDoc(c_document);
305
+ c_document = NULL;
306
+ }
266
307
 
267
308
  xsltSetGenericErrorFunc(NULL, NULL);
268
309
  xmlSetGenericErrorFunc(NULL, NULL);
269
310
 
270
- parse_error_occurred = (Qfalse == rb_funcall(errstr, rb_intern("empty?"), 0));
311
+ parse_error_occurred = (Qfalse == rb_funcall(rb_error_str, rb_intern("empty?"), 0));
271
312
 
272
313
  if (parse_error_occurred) {
273
- exception = rb_exc_new3(rb_eRuntimeError, errstr);
274
- rb_exc_raise(exception);
314
+ rb_exc_raise(rb_exc_new3(rb_eRuntimeError, rb_error_str));
275
315
  }
276
316
 
277
- return noko_xml_document_wrap((VALUE)0, result) ;
317
+ return noko_xml_document_wrap((VALUE)0, c_result_document) ;
278
318
  }
279
319
 
280
320
  static void
@@ -290,7 +330,12 @@ method_caller(xmlXPathParserContextPtr ctxt, int nargs)
290
330
  handler = (VALUE)xsltGetExtData(transform, functionURI);
291
331
  function_name = (const char *)(ctxt->context->function);
292
332
 
293
- Nokogiri_marshal_xpath_funcall_and_return_values(ctxt, nargs, handler, (const char *)function_name);
333
+ Nokogiri_marshal_xpath_funcall_and_return_values(
334
+ ctxt,
335
+ nargs,
336
+ handler,
337
+ (const char *)function_name
338
+ );
294
339
  }
295
340
 
296
341
  static void *
@@ -306,12 +351,20 @@ initFunc(xsltTransformContextPtr ctxt, const xmlChar *uri)
306
351
 
307
352
  for (i = 0; i < RARRAY_LEN(methods); i++) {
308
353
  VALUE method_name = rb_obj_as_string(rb_ary_entry(methods, i));
309
- xsltRegisterExtFunction(ctxt,
310
- (unsigned char *)StringValueCStr(method_name), uri, method_caller);
354
+ xsltRegisterExtFunction(
355
+ ctxt,
356
+ (unsigned char *)StringValueCStr(method_name),
357
+ uri,
358
+ method_caller
359
+ );
311
360
  }
312
361
 
313
- Data_Get_Struct((VALUE)ctxt->style->_private, nokogiriXsltStylesheetTuple,
314
- wrapper);
362
+ TypedData_Get_Struct(
363
+ (VALUE)ctxt->style->_private,
364
+ nokogiriXsltStylesheetTuple,
365
+ &xslt_stylesheet_type,
366
+ wrapper
367
+ );
315
368
  inst = rb_class_new_instance(0, NULL, obj);
316
369
  rb_ary_push(wrapper->func_instances, inst);
317
370
 
@@ -324,33 +377,38 @@ shutdownFunc(xsltTransformContextPtr ctxt,
324
377
  {
325
378
  nokogiriXsltStylesheetTuple *wrapper;
326
379
 
327
- Data_Get_Struct((VALUE)ctxt->style->_private, nokogiriXsltStylesheetTuple,
328
- wrapper);
380
+ TypedData_Get_Struct(
381
+ (VALUE)ctxt->style->_private,
382
+ nokogiriXsltStylesheetTuple,
383
+ &xslt_stylesheet_type,
384
+ wrapper
385
+ );
329
386
 
330
387
  rb_ary_clear(wrapper->func_instances);
331
388
  }
332
389
 
333
- /*
334
- * call-seq:
335
- * register(uri, custom_handler_class)
336
- *
337
- * Register a class that implements custom XSLT transformation functions.
338
- */
390
+ /* docstring is in lib/nokogiri/xslt.rb */
339
391
  static VALUE
340
- registr(VALUE self, VALUE uri, VALUE obj)
392
+ rb_xslt_s_register(VALUE self, VALUE uri, VALUE obj)
341
393
  {
342
394
  VALUE modules = rb_iv_get(self, "@modules");
343
- if (NIL_P(modules)) { rb_raise(rb_eRuntimeError, "wtf! @modules isn't set"); }
395
+ if (NIL_P(modules)) {
396
+ rb_raise(rb_eRuntimeError, "internal error: @modules not set");
397
+ }
344
398
 
345
399
  rb_hash_aset(modules, uri, obj);
346
- xsltRegisterExtModule((unsigned char *)StringValueCStr(uri), initFunc, shutdownFunc);
400
+ xsltRegisterExtModule(
401
+ (unsigned char *)StringValueCStr(uri),
402
+ initFunc,
403
+ shutdownFunc
404
+ );
347
405
  return self;
348
406
  }
349
407
 
350
408
  void
351
409
  noko_init_xslt_stylesheet(void)
352
410
  {
353
- rb_define_singleton_method(mNokogiriXslt, "register", registr, 2);
411
+ rb_define_singleton_method(mNokogiriXslt, "register", rb_xslt_s_register, 2);
354
412
  rb_iv_set(mNokogiriXslt, "@modules", rb_hash_new());
355
413
 
356
414
  cNokogiriXsltStylesheet = rb_define_class_under(mNokogiriXslt, "Stylesheet", rb_cObject);
@@ -358,6 +416,6 @@ noko_init_xslt_stylesheet(void)
358
416
  rb_undef_alloc_func(cNokogiriXsltStylesheet);
359
417
 
360
418
  rb_define_singleton_method(cNokogiriXsltStylesheet, "parse_stylesheet_doc", parse_stylesheet_doc, 1);
361
- rb_define_method(cNokogiriXsltStylesheet, "serialize", serialize, 1);
362
- rb_define_method(cNokogiriXsltStylesheet, "transform", transform, -1);
419
+ rb_define_method(cNokogiriXsltStylesheet, "serialize", rb_xslt_stylesheet_serialize, 1);
420
+ rb_define_method(cNokogiriXsltStylesheet, "transform", rb_xslt_stylesheet_transform, -1);
363
421
  }
@@ -13,6 +13,23 @@ LDFLAGS := -pthread
13
13
 
14
14
  all: check
15
15
 
16
+ oss-fuzz:
17
+ ./fuzzer/build-ossfuzz.sh
18
+
19
+ fuzzers: fuzzer-normal fuzzer-asan fuzzer-ubsan fuzzer-msan
20
+
21
+ fuzzer-normal:
22
+ ./fuzzer/build.sh
23
+
24
+ fuzzer-asan:
25
+ SANITIZER=asan ./fuzzer/build.sh
26
+
27
+ fuzzer-ubsan:
28
+ SANITIZER=ubsan ./fuzzer/build.sh
29
+
30
+ fuzzer-msan:
31
+ SANITIZER=msan ./fuzzer/build.sh
32
+
16
33
  # don't try to regenerate ragel or gperf files in CI, that should be a development-only action and
17
34
  # the generated files should be committed to SCM
18
35
  ifneq ($(CI),true)
@@ -81,6 +98,7 @@ coverage:
81
98
 
82
99
  clean:
83
100
  $(RM) -r build
101
+ $(RM) -r fuzzer/build fuzzer/src-* fuzzer/gumbo_corpus
84
102
 
85
103
  build/src/flags: | build/src
86
104
  @echo 'old_CC := $(CC)' > $@
@@ -91,7 +91,7 @@ static void print_tag_stack (
91
91
  const GumboParserError* error,
92
92
  GumboStringBuffer* output
93
93
  ) {
94
- print_message(output, " Currently open tags: ");
94
+ print_message(output, " Currently open tags: ");
95
95
  for (unsigned int i = 0; i < error->tag_stack.length; ++i) {
96
96
  if (i) {
97
97
  print_message(output, ", ");
@@ -347,15 +347,19 @@ static void handle_parser_error (
347
347
  if (error->parser_state == GUMBO_INSERTION_MODE_INITIAL) {
348
348
  print_message(output, "You must provide a doctype");
349
349
  } else {
350
- print_message(output, "Premature end of file");
350
+ print_message(output, "Premature end of file.");
351
351
  print_tag_stack(error, output);
352
352
  }
353
353
  return;
354
354
  case GUMBO_TOKEN_START_TAG:
355
+ print_message(output, "Start tag '%s' isn't allowed here.",
356
+ gumbo_normalized_tagname(error->input_tag));
357
+ print_tag_stack(error, output);
358
+ return;
355
359
  case GUMBO_TOKEN_END_TAG:
356
- print_message(output, "That tag isn't allowed here");
360
+ print_message(output, "End tag '%s' isn't allowed here.",
361
+ gumbo_normalized_tagname(error->input_tag));
357
362
  print_tag_stack(error, output);
358
- // TODO(jdtang): Give more specific messaging.
359
363
  return;
360
364
  }
361
365
  }
@@ -1,6 +1,6 @@
1
1
  /* ANSI-C code produced by gperf version 3.1 */
2
2
  /* Command-line: gperf -m100 -n src/foreign_attrs.gperf */
3
- /* Computed positions: -k'2,8' */
3
+ /* Computed positions: -k'8-9' */
4
4
  /* Filtered by: gperf-filter.sed */
5
5
 
6
6
  #include "replacement.h"
@@ -29,9 +29,9 @@ hash (register const char *str, register size_t len)
29
29
  11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
30
30
  11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
31
31
  11, 11, 11, 11, 11, 11, 11, 11, 11, 2,
32
- 11, 10, 11, 9, 7, 6, 11, 11, 1, 0,
33
- 11, 5, 11, 11, 4, 11, 11, 11, 11, 11,
34
- 11, 3, 11, 11, 11, 11, 11, 11, 11, 11,
32
+ 11, 1, 11, 10, 4, 4, 11, 11, 3, 11,
33
+ 11, 5, 3, 11, 0, 11, 2, 11, 11, 11,
34
+ 11, 2, 11, 11, 11, 11, 11, 11, 11, 11,
35
35
  11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
36
36
  11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
37
37
  11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
@@ -51,15 +51,14 @@ hash (register const char *str, register size_t len)
51
51
  switch (len)
52
52
  {
53
53
  default:
54
+ hval += asso_values[(unsigned char)str[8]];
55
+ /*FALLTHROUGH*/
56
+ case 8:
54
57
  hval += asso_values[(unsigned char)str[7]];
55
58
  /*FALLTHROUGH*/
56
59
  case 7:
57
60
  case 6:
58
61
  case 5:
59
- case 4:
60
- case 3:
61
- case 2:
62
- hval += asso_values[(unsigned char)str[1]];
63
62
  break;
64
63
  }
65
64
  return hval;
@@ -70,21 +69,21 @@ gumbo_get_foreign_attr_replacement (register const char *str, register size_t le
70
69
  {
71
70
  static const unsigned char lengthtable[] =
72
71
  {
73
- 5, 11, 9, 13, 10, 10, 10, 11, 10, 8, 8
72
+ 5, 10, 13, 9, 13, 10, 11, 11, 10, 10, 8
74
73
  };
75
74
  static const ForeignAttrReplacement wordlist[] =
76
75
  {
77
76
  {"xmlns", "xmlns", GUMBO_ATTR_NAMESPACE_XMLNS},
78
- {"xmlns:xlink", "xlink", GUMBO_ATTR_NAMESPACE_XMLNS},
77
+ {"xlink:href", "href", GUMBO_ATTR_NAMESPACE_XLINK},
78
+ {"xlink:arcrole", "arcrole", GUMBO_ATTR_NAMESPACE_XLINK},
79
79
  {"xml:space", "space", GUMBO_ATTR_NAMESPACE_XML},
80
80
  {"xlink:actuate", "actuate", GUMBO_ATTR_NAMESPACE_XLINK},
81
81
  {"xlink:type", "type", GUMBO_ATTR_NAMESPACE_XLINK},
82
- {"xlink:href", "href", GUMBO_ATTR_NAMESPACE_XLINK},
83
- {"xlink:role", "role", GUMBO_ATTR_NAMESPACE_XLINK},
84
82
  {"xlink:title", "title", GUMBO_ATTR_NAMESPACE_XLINK},
83
+ {"xmlns:xlink", "xlink", GUMBO_ATTR_NAMESPACE_XMLNS},
84
+ {"xlink:role", "role", GUMBO_ATTR_NAMESPACE_XLINK},
85
85
  {"xlink:show", "show", GUMBO_ATTR_NAMESPACE_XLINK},
86
- {"xml:lang", "lang", GUMBO_ATTR_NAMESPACE_XML},
87
- {"xml:base", "base", GUMBO_ATTR_NAMESPACE_XML}
86
+ {"xml:lang", "lang", GUMBO_ATTR_NAMESPACE_XML}
88
87
  };
89
88
 
90
89
  if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
@@ -15,12 +15,12 @@ ForeignAttrReplacement;
15
15
 
16
16
  %%
17
17
  "xlink:actuate", "actuate", GUMBO_ATTR_NAMESPACE_XLINK
18
+ "xlink:arcrole", "arcrole", GUMBO_ATTR_NAMESPACE_XLINK
18
19
  "xlink:href", "href", GUMBO_ATTR_NAMESPACE_XLINK
19
20
  "xlink:role", "role", GUMBO_ATTR_NAMESPACE_XLINK
20
21
  "xlink:show", "show", GUMBO_ATTR_NAMESPACE_XLINK
21
22
  "xlink:title", "title", GUMBO_ATTR_NAMESPACE_XLINK
22
23
  "xlink:type", "type", GUMBO_ATTR_NAMESPACE_XLINK
23
- "xml:base", "base", GUMBO_ATTR_NAMESPACE_XML
24
24
  "xml:lang", "lang", GUMBO_ATTR_NAMESPACE_XML
25
25
  "xml:space", "space", GUMBO_ATTR_NAMESPACE_XML
26
26
  "xmlns", "xmlns", GUMBO_ATTR_NAMESPACE_XMLNS
@@ -2678,6 +2678,7 @@ static void handle_in_head(GumboParser* parser, GumboToken* token) {
2678
2678
  static void handle_in_head_noscript(GumboParser* parser, GumboToken* token) {
2679
2679
  if (token->type == GUMBO_TOKEN_DOCTYPE) {
2680
2680
  parser_add_parse_error(parser, token);
2681
+ ignore_token(parser);
2681
2682
  return;
2682
2683
  }
2683
2684
  if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
@@ -3963,6 +3964,18 @@ static void handle_in_select(GumboParser* parser, GumboToken* token) {
3963
3964
  insert_element_from_token(parser, token);
3964
3965
  return;
3965
3966
  }
3967
+ if (tag_is(token, kStartTag, GUMBO_TAG_HR)) {
3968
+ if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)) {
3969
+ pop_current_node(parser);
3970
+ }
3971
+ if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTGROUP)) {
3972
+ pop_current_node(parser);
3973
+ }
3974
+ insert_element_from_token(parser, token);
3975
+ pop_current_node(parser);
3976
+ acknowledge_self_closing_tag(parser);
3977
+ return;
3978
+ }
3966
3979
  if (tag_is(token, kEndTag, GUMBO_TAG_OPTGROUP)) {
3967
3980
  GumboVector* open_elements = &parser->_parser_state->_open_elements;
3968
3981
  if (
@@ -4813,14 +4826,17 @@ GumboOutput* gumbo_parse_with_options (
4813
4826
  // to a token.
4814
4827
  if (token.type == GUMBO_TOKEN_END_TAG &&
4815
4828
  token.v.end_tag.tag == GUMBO_TAG_UNKNOWN)
4829
+ {
4816
4830
  gumbo_free(token.v.end_tag.name);
4831
+ token.v.end_tag.name = NULL;
4832
+ }
4833
+ if (unlikely(state->_open_elements.length > max_tree_depth)) {
4834
+ parser._output->status = GUMBO_STATUS_TREE_TOO_DEEP;
4835
+ gumbo_debug("Tree depth limit exceeded.\n");
4836
+ break;
4837
+ }
4817
4838
  }
4818
4839
 
4819
- if (unlikely(state->_open_elements.length > max_tree_depth)) {
4820
- parser._output->status = GUMBO_STATUS_TREE_TOO_DEEP;
4821
- gumbo_debug("Tree depth limit exceeded.\n");
4822
- break;
4823
- }
4824
4840
 
4825
4841
  ++loop_count;
4826
4842
  assert(loop_count < 1000000000UL);
@@ -506,6 +506,7 @@ static void abandon_current_tag(GumboParser* parser) {
506
506
  for (unsigned int i = 0; i < tag_state->_attributes.length; ++i) {
507
507
  gumbo_destroy_attribute(tag_state->_attributes.data[i]);
508
508
  }
509
+ gumbo_free(tag_state->_name);
509
510
  gumbo_free(tag_state->_attributes.data);
510
511
  mark_tag_state_as_empty(tag_state);
511
512
  gumbo_string_buffer_destroy(&tag_state->_buffer);
@@ -23,7 +23,7 @@ module Nokogiri
23
23
 
24
24
  # Get the css selector in +string+ from the cache
25
25
  def [](string)
26
- return nil unless cache_on?
26
+ return unless cache_on?
27
27
 
28
28
  @mutex.synchronize { @cache[string] }
29
29
  end
@@ -133,7 +133,7 @@ module Nokogiri
133
133
  args += node.value[1..-1].map do |n|
134
134
  n.is_a?(Nokogiri::CSS::Node) ? n.accept(self) : n
135
135
  end
136
- "#{node.value.first}#{args.join(",")})"
136
+ "nokogiri:#{node.value.first}#{args.join(",")})"
137
137
  end
138
138
  end
139
139
 
@@ -207,7 +207,7 @@ module Nokogiri
207
207
  when "parent" then "node()"
208
208
  when "root" then "not(parent::*)"
209
209
  else
210
- node.value.first + "(.)"
210
+ "nokogiri:#{node.value.first}(.)"
211
211
  end
212
212
  end
213
213
  end
@@ -302,7 +302,7 @@ module Nokogiri
302
302
  end
303
303
 
304
304
  def read_a_and_positive_b(values)
305
- op = values[2]
305
+ op = values[2].strip
306
306
  if op == "+"
307
307
  a = values[0].to_i
308
308
  b = values[3].to_i
@@ -335,25 +335,5 @@ module Nokogiri
335
335
  end
336
336
  end
337
337
  end
338
-
339
- module XPathVisitorAlwaysUseBuiltins # :nodoc:
340
- def self.new
341
- warn(
342
- "Nokogiri::CSS::XPathVisitorAlwaysUseBuiltins is deprecated and will be removed in a future version of Nokogiri",
343
- { uplevel: 1 },
344
- )
345
- XPathVisitor.new(builtins: :always)
346
- end
347
- end
348
-
349
- module XPathVisitorOptimallyUseBuiltins # :nodoc:
350
- def self.new
351
- warn(
352
- "Nokogiri::CSS::XPathVisitorOptimallyUseBuiltins is deprecated and will be removed in a future version of Nokogiri",
353
- { uplevel: 1 },
354
- )
355
- XPathVisitor.new(builtins: :optimal)
356
- end
357
- end
358
338
  end
359
339
  end
@@ -6,7 +6,7 @@ begin
6
6
  RUBY_VERSION =~ /(\d+\.\d+)/
7
7
  require_relative "#{Regexp.last_match(1)}/nokogiri"
8
8
  rescue LoadError => e
9
- if /GLIBC/.match?(e.message)
9
+ if e.message.include?("GLIBC")
10
10
  warn(<<~EOM)
11
11
 
12
12
  ERROR: It looks like you're trying to use Nokogiri as a precompiled native gem on a system
@@ -92,7 +92,7 @@ module Nokogiri
92
92
  title = XML::Node.new("title", self) << tnode
93
93
  if (head = at_xpath("//head"))
94
94
  head << title
95
- elsif (meta = (at_xpath("//meta[@charset]") || meta_content_type))
95
+ elsif (meta = at_xpath("//meta[@charset]") || meta_content_type)
96
96
  # better put after charset declaration
97
97
  meta.add_next_sibling(title)
98
98
  else
@@ -24,7 +24,7 @@ module Nokogiri
24
24
  new(doc, tags, nil, options, &block)
25
25
  end
26
26
 
27
- def initialize(document, tags = nil, ctx = nil, options = XML::ParseOptions::DEFAULT_HTML)
27
+ def initialize(document, tags = nil, ctx = nil, options = XML::ParseOptions::DEFAULT_HTML) # rubocop:disable Lint/MissingSuper
28
28
  return self unless tags
29
29
 
30
30
  options = Nokogiri::XML::ParseOptions.new(options) if Integer === options