nokogiri 1.13.8 → 1.15.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (120) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +40 -0
  3. data/LICENSE-DEPENDENCIES.md +830 -509
  4. data/LICENSE.md +1 -1
  5. data/README.md +18 -11
  6. data/dependencies.yml +33 -15
  7. data/ext/nokogiri/extconf.rb +164 -46
  8. data/ext/nokogiri/gumbo.c +20 -10
  9. data/ext/nokogiri/html4_document.c +3 -4
  10. data/ext/nokogiri/html4_element_description.c +20 -15
  11. data/ext/nokogiri/html4_entity_lookup.c +2 -2
  12. data/ext/nokogiri/html4_sax_parser_context.c +11 -22
  13. data/ext/nokogiri/html4_sax_push_parser.c +3 -3
  14. data/ext/nokogiri/nokogiri.c +84 -75
  15. data/ext/nokogiri/nokogiri.h +31 -16
  16. data/ext/nokogiri/test_global_handlers.c +1 -1
  17. data/ext/nokogiri/xml_attr.c +2 -2
  18. data/ext/nokogiri/xml_attribute_decl.c +2 -2
  19. data/ext/nokogiri/xml_cdata.c +32 -18
  20. data/ext/nokogiri/xml_comment.c +2 -2
  21. data/ext/nokogiri/xml_document.c +127 -34
  22. data/ext/nokogiri/xml_document_fragment.c +2 -2
  23. data/ext/nokogiri/xml_dtd.c +2 -2
  24. data/ext/nokogiri/xml_element_content.c +34 -31
  25. data/ext/nokogiri/xml_element_decl.c +7 -7
  26. data/ext/nokogiri/xml_encoding_handler.c +15 -7
  27. data/ext/nokogiri/xml_entity_decl.c +1 -1
  28. data/ext/nokogiri/xml_entity_reference.c +2 -2
  29. data/ext/nokogiri/xml_namespace.c +79 -14
  30. data/ext/nokogiri/xml_node.c +300 -34
  31. data/ext/nokogiri/xml_node_set.c +125 -107
  32. data/ext/nokogiri/xml_processing_instruction.c +2 -2
  33. data/ext/nokogiri/xml_reader.c +81 -48
  34. data/ext/nokogiri/xml_relax_ng.c +66 -81
  35. data/ext/nokogiri/xml_sax_parser.c +45 -20
  36. data/ext/nokogiri/xml_sax_parser_context.c +46 -30
  37. data/ext/nokogiri/xml_sax_push_parser.c +30 -11
  38. data/ext/nokogiri/xml_schema.c +95 -117
  39. data/ext/nokogiri/xml_syntax_error.c +1 -1
  40. data/ext/nokogiri/xml_text.c +28 -14
  41. data/ext/nokogiri/xml_xpath_context.c +216 -136
  42. data/ext/nokogiri/xslt_stylesheet.c +118 -64
  43. data/gumbo-parser/Makefile +10 -0
  44. data/gumbo-parser/src/attribute.h +1 -1
  45. data/gumbo-parser/src/error.c +10 -6
  46. data/gumbo-parser/src/error.h +1 -1
  47. data/gumbo-parser/src/foreign_attrs.c +15 -16
  48. data/gumbo-parser/src/foreign_attrs.gperf +1 -1
  49. data/gumbo-parser/src/{gumbo.h → nokogiri_gumbo.h} +1 -0
  50. data/gumbo-parser/src/parser.c +21 -5
  51. data/gumbo-parser/src/replacement.h +1 -1
  52. data/gumbo-parser/src/string_buffer.h +1 -1
  53. data/gumbo-parser/src/string_piece.c +1 -1
  54. data/gumbo-parser/src/svg_attrs.c +2 -2
  55. data/gumbo-parser/src/svg_tags.c +2 -2
  56. data/gumbo-parser/src/tag.c +2 -1
  57. data/gumbo-parser/src/tag_lookup.c +7 -7
  58. data/gumbo-parser/src/tag_lookup.gperf +1 -0
  59. data/gumbo-parser/src/tag_lookup.h +1 -1
  60. data/gumbo-parser/src/token_buffer.h +1 -1
  61. data/gumbo-parser/src/tokenizer.c +1 -1
  62. data/gumbo-parser/src/tokenizer.h +1 -1
  63. data/gumbo-parser/src/utf8.c +1 -1
  64. data/gumbo-parser/src/utf8.h +1 -1
  65. data/gumbo-parser/src/util.c +1 -3
  66. data/gumbo-parser/src/util.h +4 -0
  67. data/gumbo-parser/src/vector.h +1 -1
  68. data/lib/nokogiri/css/node.rb +2 -2
  69. data/lib/nokogiri/css/xpath_visitor.rb +7 -5
  70. data/lib/nokogiri/css.rb +6 -0
  71. data/lib/nokogiri/decorators/slop.rb +1 -1
  72. data/lib/nokogiri/encoding_handler.rb +57 -0
  73. data/lib/nokogiri/extension.rb +4 -3
  74. data/lib/nokogiri/html4/document.rb +2 -121
  75. data/lib/nokogiri/html4/document_fragment.rb +1 -1
  76. data/lib/nokogiri/html4/element_description_defaults.rb +1827 -365
  77. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  78. data/lib/nokogiri/html4.rb +1 -0
  79. data/lib/nokogiri/html5/document.rb +113 -36
  80. data/lib/nokogiri/html5/document_fragment.rb +10 -3
  81. data/lib/nokogiri/html5/node.rb +8 -5
  82. data/lib/nokogiri/html5.rb +130 -216
  83. data/lib/nokogiri/jruby/dependencies.rb +1 -19
  84. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  85. data/lib/nokogiri/version/constant.rb +1 -1
  86. data/lib/nokogiri/version/info.rb +11 -10
  87. data/lib/nokogiri/xml/attr.rb +49 -0
  88. data/lib/nokogiri/xml/attribute_decl.rb +4 -2
  89. data/lib/nokogiri/xml/builder.rb +1 -1
  90. data/lib/nokogiri/xml/document.rb +102 -55
  91. data/lib/nokogiri/xml/document_fragment.rb +50 -7
  92. data/lib/nokogiri/xml/element_content.rb +10 -2
  93. data/lib/nokogiri/xml/element_decl.rb +4 -2
  94. data/lib/nokogiri/xml/entity_decl.rb +4 -2
  95. data/lib/nokogiri/xml/namespace.rb +42 -0
  96. data/lib/nokogiri/xml/node/save_options.rb +14 -4
  97. data/lib/nokogiri/xml/node.rb +212 -48
  98. data/lib/nokogiri/xml/node_set.rb +88 -9
  99. data/lib/nokogiri/xml/parse_options.rb +129 -50
  100. data/lib/nokogiri/xml/pp/node.rb +28 -15
  101. data/lib/nokogiri/xml/processing_instruction.rb +2 -1
  102. data/lib/nokogiri/xml/sax/document.rb +1 -1
  103. data/lib/nokogiri/xml/sax/parser.rb +2 -3
  104. data/lib/nokogiri/xml/searchable.rb +18 -10
  105. data/lib/nokogiri/xslt.rb +74 -4
  106. data/lib/nokogiri.rb +15 -15
  107. data/lib/xsd/xmlparser/nokogiri.rb +4 -2
  108. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  109. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  110. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  111. data/ports/archives/libxml2-2.11.7.tar.xz +0 -0
  112. data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
  113. metadata +19 -242
  114. data/patches/libxml2/0004-use-glibc-strlen.patch +0 -53
  115. data/patches/libxml2/0005-avoid-isnan-isinf.patch +0 -81
  116. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +0 -3040
  117. data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +0 -61
  118. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +0 -3037
  119. data/ports/archives/libxml2-2.9.14.tar.xz +0 -0
  120. data/ports/archives/libxslt-1.1.35.tar.xz +0 -0
@@ -3,36 +3,48 @@
3
3
  VALUE cNokogiriXsltStylesheet ;
4
4
 
5
5
  static void
6
- mark(nokogiriXsltStylesheetTuple *wrapper)
6
+ mark(void *data)
7
7
  {
8
+ nokogiriXsltStylesheetTuple *wrapper = (nokogiriXsltStylesheetTuple *)data;
8
9
  rb_gc_mark(wrapper->func_instances);
9
10
  }
10
11
 
11
12
  static void
12
- dealloc(nokogiriXsltStylesheetTuple *wrapper)
13
+ dealloc(void *data)
13
14
  {
15
+ nokogiriXsltStylesheetTuple *wrapper = (nokogiriXsltStylesheetTuple *)data;
14
16
  xsltStylesheetPtr doc = wrapper->ss;
15
-
16
- NOKOGIRI_DEBUG_START(doc);
17
- xsltFreeStylesheet(doc); /* commented out for now. */
18
- NOKOGIRI_DEBUG_END(doc);
19
-
20
- free(wrapper);
17
+ xsltFreeStylesheet(doc);
18
+ ruby_xfree(wrapper);
21
19
  }
22
20
 
21
+ static const rb_data_type_t xslt_stylesheet_type = {
22
+ .wrap_struct_name = "Nokogiri::XSLT::Stylesheet",
23
+ .function = {
24
+ .dmark = mark,
25
+ .dfree = dealloc,
26
+ },
27
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY
28
+ };
29
+
30
+ PRINTFLIKE_DECL(2, 3)
23
31
  static void
24
32
  xslt_generic_error_handler(void *ctx, const char *msg, ...)
25
33
  {
26
- char *message;
34
+ VALUE message;
27
35
 
36
+ #ifdef TRUFFLERUBY_NOKOGIRI_SYSTEM_LIBRARIES
37
+ /* It is not currently possible to pass var args from native
38
+ functions to sulong, so we work around the issue here. */
39
+ message = rb_sprintf("xslt_generic_error_handler: %s", msg);
40
+ #else
28
41
  va_list args;
29
42
  va_start(args, msg);
30
- vasprintf(&message, msg, args);
43
+ message = rb_vsprintf(msg, args);
31
44
  va_end(args);
45
+ #endif
32
46
 
33
- rb_str_cat2((VALUE)ctx, message);
34
-
35
- free(message);
47
+ rb_str_concat((VALUE)ctx, message);
36
48
  }
37
49
 
38
50
  VALUE
@@ -41,8 +53,12 @@ Nokogiri_wrap_xslt_stylesheet(xsltStylesheetPtr ss)
41
53
  VALUE self;
42
54
  nokogiriXsltStylesheetTuple *wrapper;
43
55
 
44
- self = Data_Make_Struct(cNokogiriXsltStylesheet, nokogiriXsltStylesheetTuple,
45
- mark, dealloc, wrapper);
56
+ self = TypedData_Make_Struct(
57
+ cNokogiriXsltStylesheet,
58
+ nokogiriXsltStylesheetTuple,
59
+ &xslt_stylesheet_type,
60
+ wrapper
61
+ );
46
62
 
47
63
  ss->_private = (void *)self;
48
64
  wrapper->ss = ss;
@@ -63,7 +79,8 @@ parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
63
79
  xmlDocPtr xml, xml_cpy;
64
80
  VALUE errstr, exception;
65
81
  xsltStylesheetPtr ss ;
66
- Data_Get_Struct(xmldocobj, xmlDoc, xml);
82
+
83
+ xml = noko_xml_document_unwrap(xmldocobj);
67
84
 
68
85
  errstr = rb_str_new(0, 0);
69
86
  xsltSetGenericErrorFunc((void *)errstr, xslt_generic_error_handler);
@@ -90,7 +107,7 @@ parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
90
107
  * Serialize +document+ to an xml string.
91
108
  */
92
109
  static VALUE
93
- serialize(VALUE self, VALUE xmlobj)
110
+ rb_xslt_stylesheet_serialize(VALUE self, VALUE xmlobj)
94
111
  {
95
112
  xmlDocPtr xml ;
96
113
  nokogiriXsltStylesheetTuple *wrapper;
@@ -98,8 +115,13 @@ serialize(VALUE self, VALUE xmlobj)
98
115
  int doc_len ;
99
116
  VALUE rval ;
100
117
 
101
- Data_Get_Struct(xmlobj, xmlDoc, xml);
102
- Data_Get_Struct(self, nokogiriXsltStylesheetTuple, wrapper);
118
+ xml = noko_xml_document_unwrap(xmlobj);
119
+ TypedData_Get_Struct(
120
+ self,
121
+ nokogiriXsltStylesheetTuple,
122
+ &xslt_stylesheet_type,
123
+ wrapper
124
+ );
103
125
  xsltSaveResultToString(&doc_ptr, &doc_len, xml, wrapper->ss);
104
126
  rval = NOKOGIRI_STR_NEW(doc_ptr, doc_len);
105
127
  xmlFree(doc_ptr);
@@ -220,60 +242,74 @@ serialize(VALUE self, VALUE xmlobj)
220
242
  * See: Nokogiri::XSLT.quote_params
221
243
  */
222
244
  static VALUE
223
- transform(int argc, VALUE *argv, VALUE self)
245
+ rb_xslt_stylesheet_transform(int argc, VALUE *argv, VALUE self)
224
246
  {
225
- VALUE xmldoc, paramobj, errstr, exception ;
226
- xmlDocPtr xml ;
227
- xmlDocPtr result ;
247
+ VALUE rb_document, rb_param, rb_error_str;
248
+ xmlDocPtr c_document ;
249
+ xmlDocPtr c_result_document ;
228
250
  nokogiriXsltStylesheetTuple *wrapper;
229
251
  const char **params ;
230
252
  long param_len, j ;
231
253
  int parse_error_occurred ;
254
+ int defensive_copy_p = 0;
232
255
 
233
- rb_scan_args(argc, argv, "11", &xmldoc, &paramobj);
234
- if (NIL_P(paramobj)) { paramobj = rb_ary_new2(0L) ; }
235
- if (!rb_obj_is_kind_of(xmldoc, cNokogiriXmlDocument)) {
256
+ rb_scan_args(argc, argv, "11", &rb_document, &rb_param);
257
+ if (NIL_P(rb_param)) { rb_param = rb_ary_new2(0L) ; }
258
+ if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlDocument)) {
236
259
  rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::Document");
237
260
  }
238
261
 
239
262
  /* handle hashes as arguments. */
240
- if (T_HASH == TYPE(paramobj)) {
241
- paramobj = rb_funcall(paramobj, rb_intern("to_a"), 0);
242
- paramobj = rb_funcall(paramobj, rb_intern("flatten"), 0);
263
+ if (T_HASH == TYPE(rb_param)) {
264
+ rb_param = rb_funcall(rb_param, rb_intern("to_a"), 0);
265
+ rb_param = rb_funcall(rb_param, rb_intern("flatten"), 0);
243
266
  }
244
267
 
245
- Check_Type(paramobj, T_ARRAY);
268
+ Check_Type(rb_param, T_ARRAY);
246
269
 
247
- Data_Get_Struct(xmldoc, xmlDoc, xml);
248
- Data_Get_Struct(self, nokogiriXsltStylesheetTuple, wrapper);
270
+ c_document = noko_xml_document_unwrap(rb_document);
271
+ TypedData_Get_Struct(self, nokogiriXsltStylesheetTuple, &xslt_stylesheet_type, wrapper);
249
272
 
250
- param_len = RARRAY_LEN(paramobj);
251
- params = calloc((size_t)param_len + 1, sizeof(char *));
273
+ param_len = RARRAY_LEN(rb_param);
274
+ params = ruby_xcalloc((size_t)param_len + 1, sizeof(char *));
252
275
  for (j = 0 ; j < param_len ; j++) {
253
- VALUE entry = rb_ary_entry(paramobj, j);
276
+ VALUE entry = rb_ary_entry(rb_param, j);
254
277
  const char *ptr = StringValueCStr(entry);
255
278
  params[j] = ptr;
256
279
  }
257
280
  params[param_len] = 0 ;
258
281
 
259
- errstr = rb_str_new(0, 0);
260
- xsltSetGenericErrorFunc((void *)errstr, xslt_generic_error_handler);
261
- xmlSetGenericErrorFunc((void *)errstr, xslt_generic_error_handler);
282
+ xsltTransformContextPtr c_transform_context = xsltNewTransformContext(wrapper->ss, c_document);
283
+ if (xsltNeedElemSpaceHandling(c_transform_context) &&
284
+ noko_xml_document_has_wrapped_blank_nodes_p(c_document)) {
285
+ // see https://github.com/sparklemotion/nokogiri/issues/2800
286
+ c_document = xmlCopyDoc(c_document, 1);
287
+ defensive_copy_p = 1;
288
+ }
289
+ xsltFreeTransformContext(c_transform_context);
262
290
 
263
- result = xsltApplyStylesheet(wrapper->ss, xml, params);
264
- free(params);
291
+ rb_error_str = rb_str_new(0, 0);
292
+ xsltSetGenericErrorFunc((void *)rb_error_str, xslt_generic_error_handler);
293
+ xmlSetGenericErrorFunc((void *)rb_error_str, xslt_generic_error_handler);
294
+
295
+ c_result_document = xsltApplyStylesheet(wrapper->ss, c_document, params);
296
+
297
+ ruby_xfree(params);
298
+ if (defensive_copy_p) {
299
+ xmlFreeDoc(c_document);
300
+ c_document = NULL;
301
+ }
265
302
 
266
303
  xsltSetGenericErrorFunc(NULL, NULL);
267
304
  xmlSetGenericErrorFunc(NULL, NULL);
268
305
 
269
- parse_error_occurred = (Qfalse == rb_funcall(errstr, rb_intern("empty?"), 0));
306
+ parse_error_occurred = (Qfalse == rb_funcall(rb_error_str, rb_intern("empty?"), 0));
270
307
 
271
308
  if (parse_error_occurred) {
272
- exception = rb_exc_new3(rb_eRuntimeError, errstr);
273
- rb_exc_raise(exception);
309
+ rb_exc_raise(rb_exc_new3(rb_eRuntimeError, rb_error_str));
274
310
  }
275
311
 
276
- return noko_xml_document_wrap((VALUE)0, result) ;
312
+ return noko_xml_document_wrap((VALUE)0, c_result_document) ;
277
313
  }
278
314
 
279
315
  static void
@@ -289,7 +325,12 @@ method_caller(xmlXPathParserContextPtr ctxt, int nargs)
289
325
  handler = (VALUE)xsltGetExtData(transform, functionURI);
290
326
  function_name = (const char *)(ctxt->context->function);
291
327
 
292
- Nokogiri_marshal_xpath_funcall_and_return_values(ctxt, nargs, handler, (const char *)function_name);
328
+ Nokogiri_marshal_xpath_funcall_and_return_values(
329
+ ctxt,
330
+ nargs,
331
+ handler,
332
+ (const char *)function_name
333
+ );
293
334
  }
294
335
 
295
336
  static void *
@@ -305,12 +346,20 @@ initFunc(xsltTransformContextPtr ctxt, const xmlChar *uri)
305
346
 
306
347
  for (i = 0; i < RARRAY_LEN(methods); i++) {
307
348
  VALUE method_name = rb_obj_as_string(rb_ary_entry(methods, i));
308
- xsltRegisterExtFunction(ctxt,
309
- (unsigned char *)StringValueCStr(method_name), uri, method_caller);
349
+ xsltRegisterExtFunction(
350
+ ctxt,
351
+ (unsigned char *)StringValueCStr(method_name),
352
+ uri,
353
+ method_caller
354
+ );
310
355
  }
311
356
 
312
- Data_Get_Struct((VALUE)ctxt->style->_private, nokogiriXsltStylesheetTuple,
313
- wrapper);
357
+ TypedData_Get_Struct(
358
+ (VALUE)ctxt->style->_private,
359
+ nokogiriXsltStylesheetTuple,
360
+ &xslt_stylesheet_type,
361
+ wrapper
362
+ );
314
363
  inst = rb_class_new_instance(0, NULL, obj);
315
364
  rb_ary_push(wrapper->func_instances, inst);
316
365
 
@@ -323,33 +372,38 @@ shutdownFunc(xsltTransformContextPtr ctxt,
323
372
  {
324
373
  nokogiriXsltStylesheetTuple *wrapper;
325
374
 
326
- Data_Get_Struct((VALUE)ctxt->style->_private, nokogiriXsltStylesheetTuple,
327
- wrapper);
375
+ TypedData_Get_Struct(
376
+ (VALUE)ctxt->style->_private,
377
+ nokogiriXsltStylesheetTuple,
378
+ &xslt_stylesheet_type,
379
+ wrapper
380
+ );
328
381
 
329
382
  rb_ary_clear(wrapper->func_instances);
330
383
  }
331
384
 
332
- /*
333
- * call-seq:
334
- * register(uri, custom_handler_class)
335
- *
336
- * Register a class that implements custom XSLT transformation functions.
337
- */
385
+ /* docstring is in lib/nokogiri/xslt.rb */
338
386
  static VALUE
339
- registr(VALUE self, VALUE uri, VALUE obj)
387
+ rb_xslt_s_register(VALUE self, VALUE uri, VALUE obj)
340
388
  {
341
389
  VALUE modules = rb_iv_get(self, "@modules");
342
- if (NIL_P(modules)) { rb_raise(rb_eRuntimeError, "wtf! @modules isn't set"); }
390
+ if (NIL_P(modules)) {
391
+ rb_raise(rb_eRuntimeError, "internal error: @modules not set");
392
+ }
343
393
 
344
394
  rb_hash_aset(modules, uri, obj);
345
- xsltRegisterExtModule((unsigned char *)StringValueCStr(uri), initFunc, shutdownFunc);
395
+ xsltRegisterExtModule(
396
+ (unsigned char *)StringValueCStr(uri),
397
+ initFunc,
398
+ shutdownFunc
399
+ );
346
400
  return self;
347
401
  }
348
402
 
349
403
  void
350
- noko_init_xslt_stylesheet()
404
+ noko_init_xslt_stylesheet(void)
351
405
  {
352
- rb_define_singleton_method(mNokogiriXslt, "register", registr, 2);
406
+ rb_define_singleton_method(mNokogiriXslt, "register", rb_xslt_s_register, 2);
353
407
  rb_iv_set(mNokogiriXslt, "@modules", rb_hash_new());
354
408
 
355
409
  cNokogiriXsltStylesheet = rb_define_class_under(mNokogiriXslt, "Stylesheet", rb_cObject);
@@ -357,6 +411,6 @@ noko_init_xslt_stylesheet()
357
411
  rb_undef_alloc_func(cNokogiriXsltStylesheet);
358
412
 
359
413
  rb_define_singleton_method(cNokogiriXsltStylesheet, "parse_stylesheet_doc", parse_stylesheet_doc, 1);
360
- rb_define_method(cNokogiriXsltStylesheet, "serialize", serialize, 1);
361
- rb_define_method(cNokogiriXsltStylesheet, "transform", transform, -1);
414
+ rb_define_method(cNokogiriXsltStylesheet, "serialize", rb_xslt_stylesheet_serialize, 1);
415
+ rb_define_method(cNokogiriXsltStylesheet, "transform", rb_xslt_stylesheet_transform, -1);
362
416
  }
@@ -13,8 +13,18 @@ LDFLAGS := -pthread
13
13
 
14
14
  all: check
15
15
 
16
+ # don't try to regenerate ragel or gperf files in CI, that should be a development-only action and
17
+ # the generated files should be committed to SCM
18
+ ifneq ($(CI),true)
19
+ src/foreign_attrs.c: src/foreign_attrs.gperf
20
+ gperf -m100 -n $< | ./gperf-filter.sed > $@
21
+
22
+ src/%.c: src/%.gperf
23
+ gperf -m100 $< | ./gperf-filter.sed > $@
24
+
16
25
  src/%.c: src/%.rl
17
26
  ragel -F1 -o $@ $<
27
+ endif
18
28
 
19
29
  build/src:
20
30
  mkdir -p $@
@@ -1,7 +1,7 @@
1
1
  #ifndef GUMBO_ATTRIBUTE_H_
2
2
  #define GUMBO_ATTRIBUTE_H_
3
3
 
4
- #include "gumbo.h"
4
+ #include "nokogiri_gumbo.h"
5
5
 
6
6
  #ifdef __cplusplus
7
7
  extern "C" {
@@ -21,7 +21,7 @@
21
21
  #include <string.h>
22
22
  #include "ascii.h"
23
23
  #include "error.h"
24
- #include "gumbo.h"
24
+ #include "nokogiri_gumbo.h"
25
25
  #include "macros.h"
26
26
  #include "parser.h"
27
27
  #include "string_buffer.h"
@@ -91,12 +91,12 @@ static void print_tag_stack (
91
91
  const GumboParserError* error,
92
92
  GumboStringBuffer* output
93
93
  ) {
94
- print_message(output, " Currently open tags: ");
94
+ print_message(output, " Currently open tags: ");
95
95
  for (unsigned int i = 0; i < error->tag_stack.length; ++i) {
96
96
  if (i) {
97
97
  print_message(output, ", ");
98
98
  }
99
- GumboTag tag = (GumboTag) error->tag_stack.data[i];
99
+ GumboTag tag = (GumboTag)(intptr_t) error->tag_stack.data[i];
100
100
  print_message(output, "%s", gumbo_normalized_tagname(tag));
101
101
  }
102
102
  gumbo_string_buffer_append_codepoint('.', output);
@@ -347,15 +347,19 @@ static void handle_parser_error (
347
347
  if (error->parser_state == GUMBO_INSERTION_MODE_INITIAL) {
348
348
  print_message(output, "You must provide a doctype");
349
349
  } else {
350
- print_message(output, "Premature end of file");
350
+ print_message(output, "Premature end of file.");
351
351
  print_tag_stack(error, output);
352
352
  }
353
353
  return;
354
354
  case GUMBO_TOKEN_START_TAG:
355
+ print_message(output, "Start tag '%s' isn't allowed here.",
356
+ gumbo_normalized_tagname(error->input_tag));
357
+ print_tag_stack(error, output);
358
+ return;
355
359
  case GUMBO_TOKEN_END_TAG:
356
- print_message(output, "That tag isn't allowed here");
360
+ print_message(output, "End tag '%s' isn't allowed here.",
361
+ gumbo_normalized_tagname(error->input_tag));
357
362
  print_tag_stack(error, output);
358
- // TODO(jdtang): Give more specific messaging.
359
363
  return;
360
364
  }
361
365
  }
@@ -3,7 +3,7 @@
3
3
 
4
4
  #include <stdint.h>
5
5
 
6
- #include "gumbo.h"
6
+ #include "nokogiri_gumbo.h"
7
7
  #include "insertion_mode.h"
8
8
  #include "string_buffer.h"
9
9
  #include "token_type.h"
@@ -1,7 +1,7 @@
1
1
  /* ANSI-C code produced by gperf version 3.1 */
2
- /* Command-line: gperf -m100 -n lib/foreign_attrs.gperf */
3
- /* Computed positions: -k'2,8' */
4
- /* Filtered by: mk/gperf-filter.sed */
2
+ /* Command-line: gperf -m100 -n src/foreign_attrs.gperf */
3
+ /* Computed positions: -k'8-9' */
4
+ /* Filtered by: gperf-filter.sed */
5
5
 
6
6
  #include "replacement.h"
7
7
  #include "macros.h"
@@ -29,9 +29,9 @@ hash (register const char *str, register size_t len)
29
29
  11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
30
30
  11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
31
31
  11, 11, 11, 11, 11, 11, 11, 11, 11, 2,
32
- 11, 10, 11, 9, 7, 6, 11, 11, 1, 0,
33
- 11, 5, 11, 11, 4, 11, 11, 11, 11, 11,
34
- 11, 3, 11, 11, 11, 11, 11, 11, 11, 11,
32
+ 11, 1, 11, 10, 4, 4, 11, 11, 3, 11,
33
+ 11, 5, 3, 11, 0, 11, 2, 11, 11, 11,
34
+ 11, 2, 11, 11, 11, 11, 11, 11, 11, 11,
35
35
  11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
36
36
  11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
37
37
  11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
@@ -51,15 +51,14 @@ hash (register const char *str, register size_t len)
51
51
  switch (len)
52
52
  {
53
53
  default:
54
+ hval += asso_values[(unsigned char)str[8]];
55
+ /*FALLTHROUGH*/
56
+ case 8:
54
57
  hval += asso_values[(unsigned char)str[7]];
55
58
  /*FALLTHROUGH*/
56
59
  case 7:
57
60
  case 6:
58
61
  case 5:
59
- case 4:
60
- case 3:
61
- case 2:
62
- hval += asso_values[(unsigned char)str[1]];
63
62
  break;
64
63
  }
65
64
  return hval;
@@ -70,21 +69,21 @@ gumbo_get_foreign_attr_replacement (register const char *str, register size_t le
70
69
  {
71
70
  static const unsigned char lengthtable[] =
72
71
  {
73
- 5, 11, 9, 13, 10, 10, 10, 11, 10, 8, 8
72
+ 5, 10, 13, 9, 13, 10, 11, 11, 10, 10, 8
74
73
  };
75
74
  static const ForeignAttrReplacement wordlist[] =
76
75
  {
77
76
  {"xmlns", "xmlns", GUMBO_ATTR_NAMESPACE_XMLNS},
78
- {"xmlns:xlink", "xlink", GUMBO_ATTR_NAMESPACE_XMLNS},
77
+ {"xlink:href", "href", GUMBO_ATTR_NAMESPACE_XLINK},
78
+ {"xlink:arcrole", "arcrole", GUMBO_ATTR_NAMESPACE_XLINK},
79
79
  {"xml:space", "space", GUMBO_ATTR_NAMESPACE_XML},
80
80
  {"xlink:actuate", "actuate", GUMBO_ATTR_NAMESPACE_XLINK},
81
81
  {"xlink:type", "type", GUMBO_ATTR_NAMESPACE_XLINK},
82
- {"xlink:href", "href", GUMBO_ATTR_NAMESPACE_XLINK},
83
- {"xlink:role", "role", GUMBO_ATTR_NAMESPACE_XLINK},
84
82
  {"xlink:title", "title", GUMBO_ATTR_NAMESPACE_XLINK},
83
+ {"xmlns:xlink", "xlink", GUMBO_ATTR_NAMESPACE_XMLNS},
84
+ {"xlink:role", "role", GUMBO_ATTR_NAMESPACE_XLINK},
85
85
  {"xlink:show", "show", GUMBO_ATTR_NAMESPACE_XLINK},
86
- {"xml:lang", "lang", GUMBO_ATTR_NAMESPACE_XML},
87
- {"xml:base", "base", GUMBO_ATTR_NAMESPACE_XML}
86
+ {"xml:lang", "lang", GUMBO_ATTR_NAMESPACE_XML}
88
87
  };
89
88
 
90
89
  if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
@@ -15,12 +15,12 @@ ForeignAttrReplacement;
15
15
 
16
16
  %%
17
17
  "xlink:actuate", "actuate", GUMBO_ATTR_NAMESPACE_XLINK
18
+ "xlink:arcrole", "arcrole", GUMBO_ATTR_NAMESPACE_XLINK
18
19
  "xlink:href", "href", GUMBO_ATTR_NAMESPACE_XLINK
19
20
  "xlink:role", "role", GUMBO_ATTR_NAMESPACE_XLINK
20
21
  "xlink:show", "show", GUMBO_ATTR_NAMESPACE_XLINK
21
22
  "xlink:title", "title", GUMBO_ATTR_NAMESPACE_XLINK
22
23
  "xlink:type", "type", GUMBO_ATTR_NAMESPACE_XLINK
23
- "xml:base", "base", GUMBO_ATTR_NAMESPACE_XML
24
24
  "xml:lang", "lang", GUMBO_ATTR_NAMESPACE_XML
25
25
  "xml:space", "space", GUMBO_ATTR_NAMESPACE_XML
26
26
  "xmlns", "xmlns", GUMBO_ATTR_NAMESPACE_XMLNS
@@ -292,6 +292,7 @@ typedef enum {
292
292
  GUMBO_TAG_TT,
293
293
  GUMBO_TAG_RTC,
294
294
  GUMBO_TAG_DIALOG,
295
+ GUMBO_TAG_SEARCH,
295
296
  // Used for all tags that don't have special handling in HTML.
296
297
  GUMBO_TAG_UNKNOWN,
297
298
  // A marker value to indicate the end of the enum, for iterating over it.
@@ -24,7 +24,7 @@
24
24
  #include "ascii.h"
25
25
  #include "attribute.h"
26
26
  #include "error.h"
27
- #include "gumbo.h"
27
+ #include "nokogiri_gumbo.h"
28
28
  #include "insertion_mode.h"
29
29
  #include "macros.h"
30
30
  #include "parser.h"
@@ -537,7 +537,7 @@ static GumboInsertionMode get_current_template_insertion_mode (
537
537
  if (modes->length == 0) {
538
538
  return GUMBO_INSERTION_MODE_INITIAL;
539
539
  }
540
- return (GumboInsertionMode) modes->data[(modes->length - 1)];
540
+ return (GumboInsertionMode)(intptr_t) modes->data[(modes->length - 1)];
541
541
  }
542
542
 
543
543
  // Returns true if the specified token is either a start or end tag
@@ -2678,6 +2678,7 @@ static void handle_in_head(GumboParser* parser, GumboToken* token) {
2678
2678
  static void handle_in_head_noscript(GumboParser* parser, GumboToken* token) {
2679
2679
  if (token->type == GUMBO_TOKEN_DOCTYPE) {
2680
2680
  parser_add_parse_error(parser, token);
2681
+ ignore_token(parser);
2681
2682
  return;
2682
2683
  }
2683
2684
  if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
@@ -2940,7 +2941,7 @@ static void handle_in_body(GumboParser* parser, GumboToken* token) {
2940
2941
  TAG(DETAILS), TAG(DIALOG), TAG(DIR), TAG(DIV), TAG(DL), TAG(FIELDSET),
2941
2942
  TAG(FIGCAPTION), TAG(FIGURE), TAG(FOOTER), TAG(HEADER), TAG(HGROUP),
2942
2943
  TAG(MAIN), TAG(MENU), TAG(NAV), TAG(OL), TAG(P), TAG(SECTION),
2943
- TAG(SUMMARY), TAG(UL)
2944
+ TAG(SUMMARY), TAG(UL), TAG(SEARCH)
2944
2945
  })
2945
2946
  ) {
2946
2947
  maybe_implicitly_close_p_tag(parser, token);
@@ -3018,7 +3019,7 @@ static void handle_in_body(GumboParser* parser, GumboToken* token) {
3018
3019
  TAG(CENTER), TAG(DETAILS), TAG(DIALOG), TAG(DIR), TAG(DIV), TAG(DL),
3019
3020
  TAG(FIELDSET), TAG(FIGCAPTION), TAG(FIGURE), TAG(FOOTER), TAG(HEADER),
3020
3021
  TAG(HGROUP), TAG(LISTING), TAG(MAIN), TAG(MENU), TAG(NAV), TAG(OL),
3021
- TAG(PRE), TAG(SECTION), TAG(SUMMARY), TAG(UL)
3022
+ TAG(PRE), TAG(SECTION), TAG(SUMMARY), TAG(UL), TAG(SEARCH)
3022
3023
  })
3023
3024
  ) {
3024
3025
  GumboTag tag = token->v.end_tag.tag;
@@ -3057,6 +3058,9 @@ static void handle_in_body(GumboParser* parser, GumboToken* token) {
3057
3058
  ignore_token(parser);
3058
3059
  return;
3059
3060
  }
3061
+ // Since we remove the form node without popping, we need to make sure
3062
+ // that we flush any text nodes at the end of the form.
3063
+ maybe_flush_text_node_buffer(parser);
3060
3064
  // This differs from implicitly_close_tags because we remove *only* the
3061
3065
  // <form> element; other nodes are left in scope.
3062
3066
  generate_implied_end_tags(parser, GUMBO_TAG_LAST, NULL);
@@ -3434,7 +3438,7 @@ static void handle_in_table(GumboParser* parser, GumboToken* token) {
3434
3438
  || token->type == GUMBO_TOKEN_WHITESPACE
3435
3439
  || token->type == GUMBO_TOKEN_NULL)
3436
3440
  && node_tag_in_set(get_current_node(parser), &(const TagSet) {
3437
- TAG(TABLE), TAG(TBODY), TAG(TFOOT), TAG(THEAD), TAG(TR)
3441
+ TAG(TABLE), TAG(TBODY), TAG(TEMPLATE), TAG(TFOOT), TAG(THEAD), TAG(TR)
3438
3442
  })
3439
3443
  ) {
3440
3444
  // The "pending table character tokens" list described in the spec is
@@ -3960,6 +3964,18 @@ static void handle_in_select(GumboParser* parser, GumboToken* token) {
3960
3964
  insert_element_from_token(parser, token);
3961
3965
  return;
3962
3966
  }
3967
+ if (tag_is(token, kStartTag, GUMBO_TAG_HR)) {
3968
+ if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)) {
3969
+ pop_current_node(parser);
3970
+ }
3971
+ if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTGROUP)) {
3972
+ pop_current_node(parser);
3973
+ }
3974
+ insert_element_from_token(parser, token);
3975
+ pop_current_node(parser);
3976
+ acknowledge_self_closing_tag(parser);
3977
+ return;
3978
+ }
3963
3979
  if (tag_is(token, kEndTag, GUMBO_TAG_OPTGROUP)) {
3964
3980
  GumboVector* open_elements = &parser->_parser_state->_open_elements;
3965
3981
  if (
@@ -2,7 +2,7 @@
2
2
  #define GUMBO_REPLACEMENT_H_
3
3
 
4
4
  #include <stddef.h>
5
- #include "gumbo.h"
5
+ #include "nokogiri_gumbo.h"
6
6
 
7
7
  typedef struct {
8
8
  const char *const from;
@@ -4,7 +4,7 @@
4
4
  #include <stdbool.h>
5
5
  #include <stddef.h>
6
6
 
7
- #include "gumbo.h"
7
+ #include "nokogiri_gumbo.h"
8
8
 
9
9
  #ifdef __cplusplus
10
10
  extern "C" {
@@ -17,7 +17,7 @@
17
17
 
18
18
  #include <stddef.h>
19
19
  #include <string.h>
20
- #include "gumbo.h"
20
+ #include "nokogiri_gumbo.h"
21
21
  #include "ascii.h"
22
22
 
23
23
  bool gumbo_string_equals (
@@ -1,7 +1,7 @@
1
1
  /* ANSI-C code produced by gperf version 3.1 */
2
- /* Command-line: gperf -m100 lib/svg_attrs.gperf */
2
+ /* Command-line: gperf -m100 src/svg_attrs.gperf */
3
3
  /* Computed positions: -k'1,10,$' */
4
- /* Filtered by: mk/gperf-filter.sed */
4
+ /* Filtered by: gperf-filter.sed */
5
5
 
6
6
  #include "replacement.h"
7
7
  #include "macros.h"
@@ -1,7 +1,7 @@
1
1
  /* ANSI-C code produced by gperf version 3.1 */
2
- /* Command-line: gperf -m100 lib/svg_tags.gperf */
2
+ /* Command-line: gperf -m100 src/svg_tags.gperf */
3
3
  /* Computed positions: -k'3,7' */
4
- /* Filtered by: mk/gperf-filter.sed */
4
+ /* Filtered by: gperf-filter.sed */
5
5
 
6
6
  #include "replacement.h"
7
7
  #include "macros.h"
@@ -14,7 +14,7 @@
14
14
  limitations under the License.
15
15
  */
16
16
 
17
- #include "gumbo.h"
17
+ #include "nokogiri_gumbo.h"
18
18
  #include "util.h"
19
19
  #include "tag_lookup.h"
20
20
 
@@ -172,6 +172,7 @@ static const char kGumboTagNames[GUMBO_TAG_LAST+1][15] = {
172
172
  [GUMBO_TAG_TT] = "tt",
173
173
  [GUMBO_TAG_RTC] = "rtc",
174
174
  [GUMBO_TAG_DIALOG] = "dialog",
175
+ [GUMBO_TAG_SEARCH] = "search",
175
176
 
176
177
  [GUMBO_TAG_UNKNOWN] = "",
177
178
  [GUMBO_TAG_LAST] = "",