nokogiri 1.13.8 → 1.15.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +40 -0
- data/LICENSE-DEPENDENCIES.md +830 -509
- data/LICENSE.md +1 -1
- data/README.md +18 -11
- data/dependencies.yml +33 -15
- data/ext/nokogiri/extconf.rb +164 -46
- data/ext/nokogiri/gumbo.c +20 -10
- data/ext/nokogiri/html4_document.c +3 -4
- data/ext/nokogiri/html4_element_description.c +20 -15
- data/ext/nokogiri/html4_entity_lookup.c +2 -2
- data/ext/nokogiri/html4_sax_parser_context.c +11 -22
- data/ext/nokogiri/html4_sax_push_parser.c +3 -3
- data/ext/nokogiri/nokogiri.c +84 -75
- data/ext/nokogiri/nokogiri.h +31 -16
- data/ext/nokogiri/test_global_handlers.c +1 -1
- data/ext/nokogiri/xml_attr.c +2 -2
- data/ext/nokogiri/xml_attribute_decl.c +2 -2
- data/ext/nokogiri/xml_cdata.c +32 -18
- data/ext/nokogiri/xml_comment.c +2 -2
- data/ext/nokogiri/xml_document.c +127 -34
- data/ext/nokogiri/xml_document_fragment.c +2 -2
- data/ext/nokogiri/xml_dtd.c +2 -2
- data/ext/nokogiri/xml_element_content.c +34 -31
- data/ext/nokogiri/xml_element_decl.c +7 -7
- data/ext/nokogiri/xml_encoding_handler.c +15 -7
- data/ext/nokogiri/xml_entity_decl.c +1 -1
- data/ext/nokogiri/xml_entity_reference.c +2 -2
- data/ext/nokogiri/xml_namespace.c +79 -14
- data/ext/nokogiri/xml_node.c +300 -34
- data/ext/nokogiri/xml_node_set.c +125 -107
- data/ext/nokogiri/xml_processing_instruction.c +2 -2
- data/ext/nokogiri/xml_reader.c +81 -48
- data/ext/nokogiri/xml_relax_ng.c +66 -81
- data/ext/nokogiri/xml_sax_parser.c +45 -20
- data/ext/nokogiri/xml_sax_parser_context.c +46 -30
- data/ext/nokogiri/xml_sax_push_parser.c +30 -11
- data/ext/nokogiri/xml_schema.c +95 -117
- data/ext/nokogiri/xml_syntax_error.c +1 -1
- data/ext/nokogiri/xml_text.c +28 -14
- data/ext/nokogiri/xml_xpath_context.c +216 -136
- data/ext/nokogiri/xslt_stylesheet.c +118 -64
- data/gumbo-parser/Makefile +10 -0
- data/gumbo-parser/src/attribute.h +1 -1
- data/gumbo-parser/src/error.c +10 -6
- data/gumbo-parser/src/error.h +1 -1
- data/gumbo-parser/src/foreign_attrs.c +15 -16
- data/gumbo-parser/src/foreign_attrs.gperf +1 -1
- data/gumbo-parser/src/{gumbo.h → nokogiri_gumbo.h} +1 -0
- data/gumbo-parser/src/parser.c +21 -5
- data/gumbo-parser/src/replacement.h +1 -1
- data/gumbo-parser/src/string_buffer.h +1 -1
- data/gumbo-parser/src/string_piece.c +1 -1
- data/gumbo-parser/src/svg_attrs.c +2 -2
- data/gumbo-parser/src/svg_tags.c +2 -2
- data/gumbo-parser/src/tag.c +2 -1
- data/gumbo-parser/src/tag_lookup.c +7 -7
- data/gumbo-parser/src/tag_lookup.gperf +1 -0
- data/gumbo-parser/src/tag_lookup.h +1 -1
- data/gumbo-parser/src/token_buffer.h +1 -1
- data/gumbo-parser/src/tokenizer.c +1 -1
- data/gumbo-parser/src/tokenizer.h +1 -1
- data/gumbo-parser/src/utf8.c +1 -1
- data/gumbo-parser/src/utf8.h +1 -1
- data/gumbo-parser/src/util.c +1 -3
- data/gumbo-parser/src/util.h +4 -0
- data/gumbo-parser/src/vector.h +1 -1
- data/lib/nokogiri/css/node.rb +2 -2
- data/lib/nokogiri/css/xpath_visitor.rb +7 -5
- data/lib/nokogiri/css.rb +6 -0
- data/lib/nokogiri/decorators/slop.rb +1 -1
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +4 -3
- data/lib/nokogiri/html4/document.rb +2 -121
- data/lib/nokogiri/html4/document_fragment.rb +1 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +1827 -365
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/html4.rb +1 -0
- data/lib/nokogiri/html5/document.rb +113 -36
- data/lib/nokogiri/html5/document_fragment.rb +10 -3
- data/lib/nokogiri/html5/node.rb +8 -5
- data/lib/nokogiri/html5.rb +130 -216
- data/lib/nokogiri/jruby/dependencies.rb +1 -19
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +11 -10
- data/lib/nokogiri/xml/attr.rb +49 -0
- data/lib/nokogiri/xml/attribute_decl.rb +4 -2
- data/lib/nokogiri/xml/builder.rb +1 -1
- data/lib/nokogiri/xml/document.rb +102 -55
- data/lib/nokogiri/xml/document_fragment.rb +50 -7
- data/lib/nokogiri/xml/element_content.rb +10 -2
- data/lib/nokogiri/xml/element_decl.rb +4 -2
- data/lib/nokogiri/xml/entity_decl.rb +4 -2
- data/lib/nokogiri/xml/namespace.rb +42 -0
- data/lib/nokogiri/xml/node/save_options.rb +14 -4
- data/lib/nokogiri/xml/node.rb +212 -48
- data/lib/nokogiri/xml/node_set.rb +88 -9
- data/lib/nokogiri/xml/parse_options.rb +129 -50
- data/lib/nokogiri/xml/pp/node.rb +28 -15
- data/lib/nokogiri/xml/processing_instruction.rb +2 -1
- data/lib/nokogiri/xml/sax/document.rb +1 -1
- data/lib/nokogiri/xml/sax/parser.rb +2 -3
- data/lib/nokogiri/xml/searchable.rb +18 -10
- data/lib/nokogiri/xslt.rb +74 -4
- data/lib/nokogiri.rb +15 -15
- data/lib/xsd/xmlparser/nokogiri.rb +4 -2
- data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
- data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
- data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
- data/ports/archives/libxml2-2.11.7.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
- metadata +19 -242
- data/patches/libxml2/0004-use-glibc-strlen.patch +0 -53
- data/patches/libxml2/0005-avoid-isnan-isinf.patch +0 -81
- data/patches/libxml2/0006-update-automake-files-for-arm64.patch +0 -3040
- data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +0 -61
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +0 -3037
- data/ports/archives/libxml2-2.9.14.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.35.tar.xz +0 -0
@@ -3,36 +3,48 @@
|
|
3
3
|
VALUE cNokogiriXsltStylesheet ;
|
4
4
|
|
5
5
|
static void
|
6
|
-
mark(
|
6
|
+
mark(void *data)
|
7
7
|
{
|
8
|
+
nokogiriXsltStylesheetTuple *wrapper = (nokogiriXsltStylesheetTuple *)data;
|
8
9
|
rb_gc_mark(wrapper->func_instances);
|
9
10
|
}
|
10
11
|
|
11
12
|
static void
|
12
|
-
dealloc(
|
13
|
+
dealloc(void *data)
|
13
14
|
{
|
15
|
+
nokogiriXsltStylesheetTuple *wrapper = (nokogiriXsltStylesheetTuple *)data;
|
14
16
|
xsltStylesheetPtr doc = wrapper->ss;
|
15
|
-
|
16
|
-
|
17
|
-
xsltFreeStylesheet(doc); /* commented out for now. */
|
18
|
-
NOKOGIRI_DEBUG_END(doc);
|
19
|
-
|
20
|
-
free(wrapper);
|
17
|
+
xsltFreeStylesheet(doc);
|
18
|
+
ruby_xfree(wrapper);
|
21
19
|
}
|
22
20
|
|
21
|
+
static const rb_data_type_t xslt_stylesheet_type = {
|
22
|
+
.wrap_struct_name = "Nokogiri::XSLT::Stylesheet",
|
23
|
+
.function = {
|
24
|
+
.dmark = mark,
|
25
|
+
.dfree = dealloc,
|
26
|
+
},
|
27
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY
|
28
|
+
};
|
29
|
+
|
30
|
+
PRINTFLIKE_DECL(2, 3)
|
23
31
|
static void
|
24
32
|
xslt_generic_error_handler(void *ctx, const char *msg, ...)
|
25
33
|
{
|
26
|
-
|
34
|
+
VALUE message;
|
27
35
|
|
36
|
+
#ifdef TRUFFLERUBY_NOKOGIRI_SYSTEM_LIBRARIES
|
37
|
+
/* It is not currently possible to pass var args from native
|
38
|
+
functions to sulong, so we work around the issue here. */
|
39
|
+
message = rb_sprintf("xslt_generic_error_handler: %s", msg);
|
40
|
+
#else
|
28
41
|
va_list args;
|
29
42
|
va_start(args, msg);
|
30
|
-
|
43
|
+
message = rb_vsprintf(msg, args);
|
31
44
|
va_end(args);
|
45
|
+
#endif
|
32
46
|
|
33
|
-
|
34
|
-
|
35
|
-
free(message);
|
47
|
+
rb_str_concat((VALUE)ctx, message);
|
36
48
|
}
|
37
49
|
|
38
50
|
VALUE
|
@@ -41,8 +53,12 @@ Nokogiri_wrap_xslt_stylesheet(xsltStylesheetPtr ss)
|
|
41
53
|
VALUE self;
|
42
54
|
nokogiriXsltStylesheetTuple *wrapper;
|
43
55
|
|
44
|
-
self =
|
45
|
-
|
56
|
+
self = TypedData_Make_Struct(
|
57
|
+
cNokogiriXsltStylesheet,
|
58
|
+
nokogiriXsltStylesheetTuple,
|
59
|
+
&xslt_stylesheet_type,
|
60
|
+
wrapper
|
61
|
+
);
|
46
62
|
|
47
63
|
ss->_private = (void *)self;
|
48
64
|
wrapper->ss = ss;
|
@@ -63,7 +79,8 @@ parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
|
|
63
79
|
xmlDocPtr xml, xml_cpy;
|
64
80
|
VALUE errstr, exception;
|
65
81
|
xsltStylesheetPtr ss ;
|
66
|
-
|
82
|
+
|
83
|
+
xml = noko_xml_document_unwrap(xmldocobj);
|
67
84
|
|
68
85
|
errstr = rb_str_new(0, 0);
|
69
86
|
xsltSetGenericErrorFunc((void *)errstr, xslt_generic_error_handler);
|
@@ -90,7 +107,7 @@ parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
|
|
90
107
|
* Serialize +document+ to an xml string.
|
91
108
|
*/
|
92
109
|
static VALUE
|
93
|
-
|
110
|
+
rb_xslt_stylesheet_serialize(VALUE self, VALUE xmlobj)
|
94
111
|
{
|
95
112
|
xmlDocPtr xml ;
|
96
113
|
nokogiriXsltStylesheetTuple *wrapper;
|
@@ -98,8 +115,13 @@ serialize(VALUE self, VALUE xmlobj)
|
|
98
115
|
int doc_len ;
|
99
116
|
VALUE rval ;
|
100
117
|
|
101
|
-
|
102
|
-
|
118
|
+
xml = noko_xml_document_unwrap(xmlobj);
|
119
|
+
TypedData_Get_Struct(
|
120
|
+
self,
|
121
|
+
nokogiriXsltStylesheetTuple,
|
122
|
+
&xslt_stylesheet_type,
|
123
|
+
wrapper
|
124
|
+
);
|
103
125
|
xsltSaveResultToString(&doc_ptr, &doc_len, xml, wrapper->ss);
|
104
126
|
rval = NOKOGIRI_STR_NEW(doc_ptr, doc_len);
|
105
127
|
xmlFree(doc_ptr);
|
@@ -220,60 +242,74 @@ serialize(VALUE self, VALUE xmlobj)
|
|
220
242
|
* See: Nokogiri::XSLT.quote_params
|
221
243
|
*/
|
222
244
|
static VALUE
|
223
|
-
|
245
|
+
rb_xslt_stylesheet_transform(int argc, VALUE *argv, VALUE self)
|
224
246
|
{
|
225
|
-
VALUE
|
226
|
-
xmlDocPtr
|
227
|
-
xmlDocPtr
|
247
|
+
VALUE rb_document, rb_param, rb_error_str;
|
248
|
+
xmlDocPtr c_document ;
|
249
|
+
xmlDocPtr c_result_document ;
|
228
250
|
nokogiriXsltStylesheetTuple *wrapper;
|
229
251
|
const char **params ;
|
230
252
|
long param_len, j ;
|
231
253
|
int parse_error_occurred ;
|
254
|
+
int defensive_copy_p = 0;
|
232
255
|
|
233
|
-
rb_scan_args(argc, argv, "11", &
|
234
|
-
if (NIL_P(
|
235
|
-
if (!rb_obj_is_kind_of(
|
256
|
+
rb_scan_args(argc, argv, "11", &rb_document, &rb_param);
|
257
|
+
if (NIL_P(rb_param)) { rb_param = rb_ary_new2(0L) ; }
|
258
|
+
if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlDocument)) {
|
236
259
|
rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::Document");
|
237
260
|
}
|
238
261
|
|
239
262
|
/* handle hashes as arguments. */
|
240
|
-
if (T_HASH == TYPE(
|
241
|
-
|
242
|
-
|
263
|
+
if (T_HASH == TYPE(rb_param)) {
|
264
|
+
rb_param = rb_funcall(rb_param, rb_intern("to_a"), 0);
|
265
|
+
rb_param = rb_funcall(rb_param, rb_intern("flatten"), 0);
|
243
266
|
}
|
244
267
|
|
245
|
-
Check_Type(
|
268
|
+
Check_Type(rb_param, T_ARRAY);
|
246
269
|
|
247
|
-
|
248
|
-
|
270
|
+
c_document = noko_xml_document_unwrap(rb_document);
|
271
|
+
TypedData_Get_Struct(self, nokogiriXsltStylesheetTuple, &xslt_stylesheet_type, wrapper);
|
249
272
|
|
250
|
-
param_len = RARRAY_LEN(
|
251
|
-
params =
|
273
|
+
param_len = RARRAY_LEN(rb_param);
|
274
|
+
params = ruby_xcalloc((size_t)param_len + 1, sizeof(char *));
|
252
275
|
for (j = 0 ; j < param_len ; j++) {
|
253
|
-
VALUE entry = rb_ary_entry(
|
276
|
+
VALUE entry = rb_ary_entry(rb_param, j);
|
254
277
|
const char *ptr = StringValueCStr(entry);
|
255
278
|
params[j] = ptr;
|
256
279
|
}
|
257
280
|
params[param_len] = 0 ;
|
258
281
|
|
259
|
-
|
260
|
-
|
261
|
-
|
282
|
+
xsltTransformContextPtr c_transform_context = xsltNewTransformContext(wrapper->ss, c_document);
|
283
|
+
if (xsltNeedElemSpaceHandling(c_transform_context) &&
|
284
|
+
noko_xml_document_has_wrapped_blank_nodes_p(c_document)) {
|
285
|
+
// see https://github.com/sparklemotion/nokogiri/issues/2800
|
286
|
+
c_document = xmlCopyDoc(c_document, 1);
|
287
|
+
defensive_copy_p = 1;
|
288
|
+
}
|
289
|
+
xsltFreeTransformContext(c_transform_context);
|
262
290
|
|
263
|
-
|
264
|
-
|
291
|
+
rb_error_str = rb_str_new(0, 0);
|
292
|
+
xsltSetGenericErrorFunc((void *)rb_error_str, xslt_generic_error_handler);
|
293
|
+
xmlSetGenericErrorFunc((void *)rb_error_str, xslt_generic_error_handler);
|
294
|
+
|
295
|
+
c_result_document = xsltApplyStylesheet(wrapper->ss, c_document, params);
|
296
|
+
|
297
|
+
ruby_xfree(params);
|
298
|
+
if (defensive_copy_p) {
|
299
|
+
xmlFreeDoc(c_document);
|
300
|
+
c_document = NULL;
|
301
|
+
}
|
265
302
|
|
266
303
|
xsltSetGenericErrorFunc(NULL, NULL);
|
267
304
|
xmlSetGenericErrorFunc(NULL, NULL);
|
268
305
|
|
269
|
-
parse_error_occurred = (Qfalse == rb_funcall(
|
306
|
+
parse_error_occurred = (Qfalse == rb_funcall(rb_error_str, rb_intern("empty?"), 0));
|
270
307
|
|
271
308
|
if (parse_error_occurred) {
|
272
|
-
|
273
|
-
rb_exc_raise(exception);
|
309
|
+
rb_exc_raise(rb_exc_new3(rb_eRuntimeError, rb_error_str));
|
274
310
|
}
|
275
311
|
|
276
|
-
return noko_xml_document_wrap((VALUE)0,
|
312
|
+
return noko_xml_document_wrap((VALUE)0, c_result_document) ;
|
277
313
|
}
|
278
314
|
|
279
315
|
static void
|
@@ -289,7 +325,12 @@ method_caller(xmlXPathParserContextPtr ctxt, int nargs)
|
|
289
325
|
handler = (VALUE)xsltGetExtData(transform, functionURI);
|
290
326
|
function_name = (const char *)(ctxt->context->function);
|
291
327
|
|
292
|
-
Nokogiri_marshal_xpath_funcall_and_return_values(
|
328
|
+
Nokogiri_marshal_xpath_funcall_and_return_values(
|
329
|
+
ctxt,
|
330
|
+
nargs,
|
331
|
+
handler,
|
332
|
+
(const char *)function_name
|
333
|
+
);
|
293
334
|
}
|
294
335
|
|
295
336
|
static void *
|
@@ -305,12 +346,20 @@ initFunc(xsltTransformContextPtr ctxt, const xmlChar *uri)
|
|
305
346
|
|
306
347
|
for (i = 0; i < RARRAY_LEN(methods); i++) {
|
307
348
|
VALUE method_name = rb_obj_as_string(rb_ary_entry(methods, i));
|
308
|
-
xsltRegisterExtFunction(
|
309
|
-
|
349
|
+
xsltRegisterExtFunction(
|
350
|
+
ctxt,
|
351
|
+
(unsigned char *)StringValueCStr(method_name),
|
352
|
+
uri,
|
353
|
+
method_caller
|
354
|
+
);
|
310
355
|
}
|
311
356
|
|
312
|
-
|
313
|
-
|
357
|
+
TypedData_Get_Struct(
|
358
|
+
(VALUE)ctxt->style->_private,
|
359
|
+
nokogiriXsltStylesheetTuple,
|
360
|
+
&xslt_stylesheet_type,
|
361
|
+
wrapper
|
362
|
+
);
|
314
363
|
inst = rb_class_new_instance(0, NULL, obj);
|
315
364
|
rb_ary_push(wrapper->func_instances, inst);
|
316
365
|
|
@@ -323,33 +372,38 @@ shutdownFunc(xsltTransformContextPtr ctxt,
|
|
323
372
|
{
|
324
373
|
nokogiriXsltStylesheetTuple *wrapper;
|
325
374
|
|
326
|
-
|
327
|
-
|
375
|
+
TypedData_Get_Struct(
|
376
|
+
(VALUE)ctxt->style->_private,
|
377
|
+
nokogiriXsltStylesheetTuple,
|
378
|
+
&xslt_stylesheet_type,
|
379
|
+
wrapper
|
380
|
+
);
|
328
381
|
|
329
382
|
rb_ary_clear(wrapper->func_instances);
|
330
383
|
}
|
331
384
|
|
332
|
-
/*
|
333
|
-
* call-seq:
|
334
|
-
* register(uri, custom_handler_class)
|
335
|
-
*
|
336
|
-
* Register a class that implements custom XSLT transformation functions.
|
337
|
-
*/
|
385
|
+
/* docstring is in lib/nokogiri/xslt.rb */
|
338
386
|
static VALUE
|
339
|
-
|
387
|
+
rb_xslt_s_register(VALUE self, VALUE uri, VALUE obj)
|
340
388
|
{
|
341
389
|
VALUE modules = rb_iv_get(self, "@modules");
|
342
|
-
if (NIL_P(modules)) {
|
390
|
+
if (NIL_P(modules)) {
|
391
|
+
rb_raise(rb_eRuntimeError, "internal error: @modules not set");
|
392
|
+
}
|
343
393
|
|
344
394
|
rb_hash_aset(modules, uri, obj);
|
345
|
-
xsltRegisterExtModule(
|
395
|
+
xsltRegisterExtModule(
|
396
|
+
(unsigned char *)StringValueCStr(uri),
|
397
|
+
initFunc,
|
398
|
+
shutdownFunc
|
399
|
+
);
|
346
400
|
return self;
|
347
401
|
}
|
348
402
|
|
349
403
|
void
|
350
|
-
noko_init_xslt_stylesheet()
|
404
|
+
noko_init_xslt_stylesheet(void)
|
351
405
|
{
|
352
|
-
rb_define_singleton_method(mNokogiriXslt, "register",
|
406
|
+
rb_define_singleton_method(mNokogiriXslt, "register", rb_xslt_s_register, 2);
|
353
407
|
rb_iv_set(mNokogiriXslt, "@modules", rb_hash_new());
|
354
408
|
|
355
409
|
cNokogiriXsltStylesheet = rb_define_class_under(mNokogiriXslt, "Stylesheet", rb_cObject);
|
@@ -357,6 +411,6 @@ noko_init_xslt_stylesheet()
|
|
357
411
|
rb_undef_alloc_func(cNokogiriXsltStylesheet);
|
358
412
|
|
359
413
|
rb_define_singleton_method(cNokogiriXsltStylesheet, "parse_stylesheet_doc", parse_stylesheet_doc, 1);
|
360
|
-
rb_define_method(cNokogiriXsltStylesheet, "serialize",
|
361
|
-
rb_define_method(cNokogiriXsltStylesheet, "transform",
|
414
|
+
rb_define_method(cNokogiriXsltStylesheet, "serialize", rb_xslt_stylesheet_serialize, 1);
|
415
|
+
rb_define_method(cNokogiriXsltStylesheet, "transform", rb_xslt_stylesheet_transform, -1);
|
362
416
|
}
|
data/gumbo-parser/Makefile
CHANGED
@@ -13,8 +13,18 @@ LDFLAGS := -pthread
|
|
13
13
|
|
14
14
|
all: check
|
15
15
|
|
16
|
+
# don't try to regenerate ragel or gperf files in CI, that should be a development-only action and
|
17
|
+
# the generated files should be committed to SCM
|
18
|
+
ifneq ($(CI),true)
|
19
|
+
src/foreign_attrs.c: src/foreign_attrs.gperf
|
20
|
+
gperf -m100 -n $< | ./gperf-filter.sed > $@
|
21
|
+
|
22
|
+
src/%.c: src/%.gperf
|
23
|
+
gperf -m100 $< | ./gperf-filter.sed > $@
|
24
|
+
|
16
25
|
src/%.c: src/%.rl
|
17
26
|
ragel -F1 -o $@ $<
|
27
|
+
endif
|
18
28
|
|
19
29
|
build/src:
|
20
30
|
mkdir -p $@
|
data/gumbo-parser/src/error.c
CHANGED
@@ -21,7 +21,7 @@
|
|
21
21
|
#include <string.h>
|
22
22
|
#include "ascii.h"
|
23
23
|
#include "error.h"
|
24
|
-
#include "
|
24
|
+
#include "nokogiri_gumbo.h"
|
25
25
|
#include "macros.h"
|
26
26
|
#include "parser.h"
|
27
27
|
#include "string_buffer.h"
|
@@ -91,12 +91,12 @@ static void print_tag_stack (
|
|
91
91
|
const GumboParserError* error,
|
92
92
|
GumboStringBuffer* output
|
93
93
|
) {
|
94
|
-
print_message(output, "
|
94
|
+
print_message(output, " Currently open tags: ");
|
95
95
|
for (unsigned int i = 0; i < error->tag_stack.length; ++i) {
|
96
96
|
if (i) {
|
97
97
|
print_message(output, ", ");
|
98
98
|
}
|
99
|
-
GumboTag tag = (GumboTag) error->tag_stack.data[i];
|
99
|
+
GumboTag tag = (GumboTag)(intptr_t) error->tag_stack.data[i];
|
100
100
|
print_message(output, "%s", gumbo_normalized_tagname(tag));
|
101
101
|
}
|
102
102
|
gumbo_string_buffer_append_codepoint('.', output);
|
@@ -347,15 +347,19 @@ static void handle_parser_error (
|
|
347
347
|
if (error->parser_state == GUMBO_INSERTION_MODE_INITIAL) {
|
348
348
|
print_message(output, "You must provide a doctype");
|
349
349
|
} else {
|
350
|
-
print_message(output, "Premature end of file");
|
350
|
+
print_message(output, "Premature end of file.");
|
351
351
|
print_tag_stack(error, output);
|
352
352
|
}
|
353
353
|
return;
|
354
354
|
case GUMBO_TOKEN_START_TAG:
|
355
|
+
print_message(output, "Start tag '%s' isn't allowed here.",
|
356
|
+
gumbo_normalized_tagname(error->input_tag));
|
357
|
+
print_tag_stack(error, output);
|
358
|
+
return;
|
355
359
|
case GUMBO_TOKEN_END_TAG:
|
356
|
-
print_message(output, "
|
360
|
+
print_message(output, "End tag '%s' isn't allowed here.",
|
361
|
+
gumbo_normalized_tagname(error->input_tag));
|
357
362
|
print_tag_stack(error, output);
|
358
|
-
// TODO(jdtang): Give more specific messaging.
|
359
363
|
return;
|
360
364
|
}
|
361
365
|
}
|
data/gumbo-parser/src/error.h
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
/* ANSI-C code produced by gperf version 3.1 */
|
2
|
-
/* Command-line: gperf -m100 -n
|
3
|
-
/* Computed positions: -k'
|
4
|
-
/* Filtered by:
|
2
|
+
/* Command-line: gperf -m100 -n src/foreign_attrs.gperf */
|
3
|
+
/* Computed positions: -k'8-9' */
|
4
|
+
/* Filtered by: gperf-filter.sed */
|
5
5
|
|
6
6
|
#include "replacement.h"
|
7
7
|
#include "macros.h"
|
@@ -29,9 +29,9 @@ hash (register const char *str, register size_t len)
|
|
29
29
|
11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
30
30
|
11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
31
31
|
11, 11, 11, 11, 11, 11, 11, 11, 11, 2,
|
32
|
-
11,
|
33
|
-
11, 5,
|
34
|
-
11,
|
32
|
+
11, 1, 11, 10, 4, 4, 11, 11, 3, 11,
|
33
|
+
11, 5, 3, 11, 0, 11, 2, 11, 11, 11,
|
34
|
+
11, 2, 11, 11, 11, 11, 11, 11, 11, 11,
|
35
35
|
11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
36
36
|
11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
37
37
|
11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
@@ -51,15 +51,14 @@ hash (register const char *str, register size_t len)
|
|
51
51
|
switch (len)
|
52
52
|
{
|
53
53
|
default:
|
54
|
+
hval += asso_values[(unsigned char)str[8]];
|
55
|
+
/*FALLTHROUGH*/
|
56
|
+
case 8:
|
54
57
|
hval += asso_values[(unsigned char)str[7]];
|
55
58
|
/*FALLTHROUGH*/
|
56
59
|
case 7:
|
57
60
|
case 6:
|
58
61
|
case 5:
|
59
|
-
case 4:
|
60
|
-
case 3:
|
61
|
-
case 2:
|
62
|
-
hval += asso_values[(unsigned char)str[1]];
|
63
62
|
break;
|
64
63
|
}
|
65
64
|
return hval;
|
@@ -70,21 +69,21 @@ gumbo_get_foreign_attr_replacement (register const char *str, register size_t le
|
|
70
69
|
{
|
71
70
|
static const unsigned char lengthtable[] =
|
72
71
|
{
|
73
|
-
5,
|
72
|
+
5, 10, 13, 9, 13, 10, 11, 11, 10, 10, 8
|
74
73
|
};
|
75
74
|
static const ForeignAttrReplacement wordlist[] =
|
76
75
|
{
|
77
76
|
{"xmlns", "xmlns", GUMBO_ATTR_NAMESPACE_XMLNS},
|
78
|
-
{"
|
77
|
+
{"xlink:href", "href", GUMBO_ATTR_NAMESPACE_XLINK},
|
78
|
+
{"xlink:arcrole", "arcrole", GUMBO_ATTR_NAMESPACE_XLINK},
|
79
79
|
{"xml:space", "space", GUMBO_ATTR_NAMESPACE_XML},
|
80
80
|
{"xlink:actuate", "actuate", GUMBO_ATTR_NAMESPACE_XLINK},
|
81
81
|
{"xlink:type", "type", GUMBO_ATTR_NAMESPACE_XLINK},
|
82
|
-
{"xlink:href", "href", GUMBO_ATTR_NAMESPACE_XLINK},
|
83
|
-
{"xlink:role", "role", GUMBO_ATTR_NAMESPACE_XLINK},
|
84
82
|
{"xlink:title", "title", GUMBO_ATTR_NAMESPACE_XLINK},
|
83
|
+
{"xmlns:xlink", "xlink", GUMBO_ATTR_NAMESPACE_XMLNS},
|
84
|
+
{"xlink:role", "role", GUMBO_ATTR_NAMESPACE_XLINK},
|
85
85
|
{"xlink:show", "show", GUMBO_ATTR_NAMESPACE_XLINK},
|
86
|
-
{"xml:lang", "lang", GUMBO_ATTR_NAMESPACE_XML}
|
87
|
-
{"xml:base", "base", GUMBO_ATTR_NAMESPACE_XML}
|
86
|
+
{"xml:lang", "lang", GUMBO_ATTR_NAMESPACE_XML}
|
88
87
|
};
|
89
88
|
|
90
89
|
if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
|
@@ -15,12 +15,12 @@ ForeignAttrReplacement;
|
|
15
15
|
|
16
16
|
%%
|
17
17
|
"xlink:actuate", "actuate", GUMBO_ATTR_NAMESPACE_XLINK
|
18
|
+
"xlink:arcrole", "arcrole", GUMBO_ATTR_NAMESPACE_XLINK
|
18
19
|
"xlink:href", "href", GUMBO_ATTR_NAMESPACE_XLINK
|
19
20
|
"xlink:role", "role", GUMBO_ATTR_NAMESPACE_XLINK
|
20
21
|
"xlink:show", "show", GUMBO_ATTR_NAMESPACE_XLINK
|
21
22
|
"xlink:title", "title", GUMBO_ATTR_NAMESPACE_XLINK
|
22
23
|
"xlink:type", "type", GUMBO_ATTR_NAMESPACE_XLINK
|
23
|
-
"xml:base", "base", GUMBO_ATTR_NAMESPACE_XML
|
24
24
|
"xml:lang", "lang", GUMBO_ATTR_NAMESPACE_XML
|
25
25
|
"xml:space", "space", GUMBO_ATTR_NAMESPACE_XML
|
26
26
|
"xmlns", "xmlns", GUMBO_ATTR_NAMESPACE_XMLNS
|
@@ -292,6 +292,7 @@ typedef enum {
|
|
292
292
|
GUMBO_TAG_TT,
|
293
293
|
GUMBO_TAG_RTC,
|
294
294
|
GUMBO_TAG_DIALOG,
|
295
|
+
GUMBO_TAG_SEARCH,
|
295
296
|
// Used for all tags that don't have special handling in HTML.
|
296
297
|
GUMBO_TAG_UNKNOWN,
|
297
298
|
// A marker value to indicate the end of the enum, for iterating over it.
|
data/gumbo-parser/src/parser.c
CHANGED
@@ -24,7 +24,7 @@
|
|
24
24
|
#include "ascii.h"
|
25
25
|
#include "attribute.h"
|
26
26
|
#include "error.h"
|
27
|
-
#include "
|
27
|
+
#include "nokogiri_gumbo.h"
|
28
28
|
#include "insertion_mode.h"
|
29
29
|
#include "macros.h"
|
30
30
|
#include "parser.h"
|
@@ -537,7 +537,7 @@ static GumboInsertionMode get_current_template_insertion_mode (
|
|
537
537
|
if (modes->length == 0) {
|
538
538
|
return GUMBO_INSERTION_MODE_INITIAL;
|
539
539
|
}
|
540
|
-
return (GumboInsertionMode) modes->data[(modes->length - 1)];
|
540
|
+
return (GumboInsertionMode)(intptr_t) modes->data[(modes->length - 1)];
|
541
541
|
}
|
542
542
|
|
543
543
|
// Returns true if the specified token is either a start or end tag
|
@@ -2678,6 +2678,7 @@ static void handle_in_head(GumboParser* parser, GumboToken* token) {
|
|
2678
2678
|
static void handle_in_head_noscript(GumboParser* parser, GumboToken* token) {
|
2679
2679
|
if (token->type == GUMBO_TOKEN_DOCTYPE) {
|
2680
2680
|
parser_add_parse_error(parser, token);
|
2681
|
+
ignore_token(parser);
|
2681
2682
|
return;
|
2682
2683
|
}
|
2683
2684
|
if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
|
@@ -2940,7 +2941,7 @@ static void handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2940
2941
|
TAG(DETAILS), TAG(DIALOG), TAG(DIR), TAG(DIV), TAG(DL), TAG(FIELDSET),
|
2941
2942
|
TAG(FIGCAPTION), TAG(FIGURE), TAG(FOOTER), TAG(HEADER), TAG(HGROUP),
|
2942
2943
|
TAG(MAIN), TAG(MENU), TAG(NAV), TAG(OL), TAG(P), TAG(SECTION),
|
2943
|
-
TAG(SUMMARY), TAG(UL)
|
2944
|
+
TAG(SUMMARY), TAG(UL), TAG(SEARCH)
|
2944
2945
|
})
|
2945
2946
|
) {
|
2946
2947
|
maybe_implicitly_close_p_tag(parser, token);
|
@@ -3018,7 +3019,7 @@ static void handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
3018
3019
|
TAG(CENTER), TAG(DETAILS), TAG(DIALOG), TAG(DIR), TAG(DIV), TAG(DL),
|
3019
3020
|
TAG(FIELDSET), TAG(FIGCAPTION), TAG(FIGURE), TAG(FOOTER), TAG(HEADER),
|
3020
3021
|
TAG(HGROUP), TAG(LISTING), TAG(MAIN), TAG(MENU), TAG(NAV), TAG(OL),
|
3021
|
-
TAG(PRE), TAG(SECTION), TAG(SUMMARY), TAG(UL)
|
3022
|
+
TAG(PRE), TAG(SECTION), TAG(SUMMARY), TAG(UL), TAG(SEARCH)
|
3022
3023
|
})
|
3023
3024
|
) {
|
3024
3025
|
GumboTag tag = token->v.end_tag.tag;
|
@@ -3057,6 +3058,9 @@ static void handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
3057
3058
|
ignore_token(parser);
|
3058
3059
|
return;
|
3059
3060
|
}
|
3061
|
+
// Since we remove the form node without popping, we need to make sure
|
3062
|
+
// that we flush any text nodes at the end of the form.
|
3063
|
+
maybe_flush_text_node_buffer(parser);
|
3060
3064
|
// This differs from implicitly_close_tags because we remove *only* the
|
3061
3065
|
// <form> element; other nodes are left in scope.
|
3062
3066
|
generate_implied_end_tags(parser, GUMBO_TAG_LAST, NULL);
|
@@ -3434,7 +3438,7 @@ static void handle_in_table(GumboParser* parser, GumboToken* token) {
|
|
3434
3438
|
|| token->type == GUMBO_TOKEN_WHITESPACE
|
3435
3439
|
|| token->type == GUMBO_TOKEN_NULL)
|
3436
3440
|
&& node_tag_in_set(get_current_node(parser), &(const TagSet) {
|
3437
|
-
TAG(TABLE), TAG(TBODY), TAG(TFOOT), TAG(THEAD), TAG(TR)
|
3441
|
+
TAG(TABLE), TAG(TBODY), TAG(TEMPLATE), TAG(TFOOT), TAG(THEAD), TAG(TR)
|
3438
3442
|
})
|
3439
3443
|
) {
|
3440
3444
|
// The "pending table character tokens" list described in the spec is
|
@@ -3960,6 +3964,18 @@ static void handle_in_select(GumboParser* parser, GumboToken* token) {
|
|
3960
3964
|
insert_element_from_token(parser, token);
|
3961
3965
|
return;
|
3962
3966
|
}
|
3967
|
+
if (tag_is(token, kStartTag, GUMBO_TAG_HR)) {
|
3968
|
+
if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)) {
|
3969
|
+
pop_current_node(parser);
|
3970
|
+
}
|
3971
|
+
if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTGROUP)) {
|
3972
|
+
pop_current_node(parser);
|
3973
|
+
}
|
3974
|
+
insert_element_from_token(parser, token);
|
3975
|
+
pop_current_node(parser);
|
3976
|
+
acknowledge_self_closing_tag(parser);
|
3977
|
+
return;
|
3978
|
+
}
|
3963
3979
|
if (tag_is(token, kEndTag, GUMBO_TAG_OPTGROUP)) {
|
3964
3980
|
GumboVector* open_elements = &parser->_parser_state->_open_elements;
|
3965
3981
|
if (
|
@@ -1,7 +1,7 @@
|
|
1
1
|
/* ANSI-C code produced by gperf version 3.1 */
|
2
|
-
/* Command-line: gperf -m100
|
2
|
+
/* Command-line: gperf -m100 src/svg_attrs.gperf */
|
3
3
|
/* Computed positions: -k'1,10,$' */
|
4
|
-
/* Filtered by:
|
4
|
+
/* Filtered by: gperf-filter.sed */
|
5
5
|
|
6
6
|
#include "replacement.h"
|
7
7
|
#include "macros.h"
|
data/gumbo-parser/src/svg_tags.c
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
/* ANSI-C code produced by gperf version 3.1 */
|
2
|
-
/* Command-line: gperf -m100
|
2
|
+
/* Command-line: gperf -m100 src/svg_tags.gperf */
|
3
3
|
/* Computed positions: -k'3,7' */
|
4
|
-
/* Filtered by:
|
4
|
+
/* Filtered by: gperf-filter.sed */
|
5
5
|
|
6
6
|
#include "replacement.h"
|
7
7
|
#include "macros.h"
|
data/gumbo-parser/src/tag.c
CHANGED
@@ -14,7 +14,7 @@
|
|
14
14
|
limitations under the License.
|
15
15
|
*/
|
16
16
|
|
17
|
-
#include "
|
17
|
+
#include "nokogiri_gumbo.h"
|
18
18
|
#include "util.h"
|
19
19
|
#include "tag_lookup.h"
|
20
20
|
|
@@ -172,6 +172,7 @@ static const char kGumboTagNames[GUMBO_TAG_LAST+1][15] = {
|
|
172
172
|
[GUMBO_TAG_TT] = "tt",
|
173
173
|
[GUMBO_TAG_RTC] = "rtc",
|
174
174
|
[GUMBO_TAG_DIALOG] = "dialog",
|
175
|
+
[GUMBO_TAG_SEARCH] = "search",
|
175
176
|
|
176
177
|
[GUMBO_TAG_UNKNOWN] = "",
|
177
178
|
[GUMBO_TAG_LAST] = "",
|