nokogiri 1.14.2 → 1.16.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +19 -15
- data/README.md +4 -1
- data/dependencies.yml +9 -8
- data/ext/nokogiri/extconf.rb +69 -26
- data/ext/nokogiri/html4_document.c +1 -2
- data/ext/nokogiri/html4_element_description.c +19 -14
- data/ext/nokogiri/html4_sax_parser_context.c +10 -16
- data/ext/nokogiri/html4_sax_push_parser.c +3 -3
- data/ext/nokogiri/nokogiri.c +46 -24
- data/ext/nokogiri/nokogiri.h +23 -5
- data/ext/nokogiri/test_global_handlers.c +1 -1
- data/ext/nokogiri/xml_attr.c +1 -1
- data/ext/nokogiri/xml_cdata.c +30 -17
- data/ext/nokogiri/xml_comment.c +1 -1
- data/ext/nokogiri/xml_document.c +113 -25
- data/ext/nokogiri/xml_document_fragment.c +1 -1
- data/ext/nokogiri/xml_dtd.c +1 -1
- data/ext/nokogiri/xml_element_content.c +32 -29
- data/ext/nokogiri/xml_element_decl.c +5 -5
- data/ext/nokogiri/xml_encoding_handler.c +12 -4
- data/ext/nokogiri/xml_entity_reference.c +1 -1
- data/ext/nokogiri/xml_namespace.c +11 -16
- data/ext/nokogiri/xml_node.c +13 -16
- data/ext/nokogiri/xml_node_set.c +125 -105
- data/ext/nokogiri/xml_processing_instruction.c +1 -1
- data/ext/nokogiri/xml_reader.c +61 -74
- data/ext/nokogiri/xml_relax_ng.c +66 -79
- data/ext/nokogiri/xml_sax_parser.c +24 -5
- data/ext/nokogiri/xml_sax_parser_context.c +50 -25
- data/ext/nokogiri/xml_sax_push_parser.c +30 -9
- data/ext/nokogiri/xml_schema.c +94 -115
- data/ext/nokogiri/xml_syntax_error.c +3 -3
- data/ext/nokogiri/xml_text.c +26 -13
- data/ext/nokogiri/xml_xpath_context.c +153 -83
- data/ext/nokogiri/xslt_stylesheet.c +111 -53
- data/gumbo-parser/Makefile +18 -0
- data/gumbo-parser/src/error.c +8 -4
- data/gumbo-parser/src/foreign_attrs.c +13 -14
- data/gumbo-parser/src/foreign_attrs.gperf +1 -1
- data/gumbo-parser/src/parser.c +21 -5
- data/gumbo-parser/src/tokenizer.c +1 -0
- data/lib/nokogiri/css/parser_extras.rb +1 -1
- data/lib/nokogiri/css/xpath_visitor.rb +3 -23
- data/lib/nokogiri/extension.rb +1 -1
- data/lib/nokogiri/html4/document.rb +1 -1
- data/lib/nokogiri/html4/document_fragment.rb +1 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +1821 -353
- data/lib/nokogiri/html4/encoding_reader.rb +1 -1
- data/lib/nokogiri/html5/document_fragment.rb +1 -1
- data/lib/nokogiri/html5/node.rb +5 -0
- data/lib/nokogiri/html5.rb +0 -63
- data/lib/nokogiri/jruby/nokogiri_jars.rb +9 -9
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +6 -5
- data/lib/nokogiri/xml/attr.rb +2 -2
- data/lib/nokogiri/xml/attribute_decl.rb +4 -2
- data/lib/nokogiri/xml/document.rb +4 -5
- data/lib/nokogiri/xml/document_fragment.rb +3 -3
- data/lib/nokogiri/xml/element_content.rb +10 -2
- data/lib/nokogiri/xml/element_decl.rb +4 -2
- data/lib/nokogiri/xml/entity_decl.rb +4 -2
- data/lib/nokogiri/xml/namespace.rb +1 -2
- data/lib/nokogiri/xml/node/save_options.rb +8 -0
- data/lib/nokogiri/xml/node.rb +53 -37
- data/lib/nokogiri/xml/node_set.rb +3 -3
- data/lib/nokogiri/xml/pp/node.rb +23 -12
- data/lib/nokogiri/xml/reader.rb +10 -9
- data/lib/nokogiri/xml/sax/document.rb +1 -1
- data/lib/nokogiri/xml/searchable.rb +21 -13
- data/lib/nokogiri/xml/syntax_error.rb +1 -1
- data/lib/nokogiri/xml.rb +1 -1
- data/lib/nokogiri/xslt/stylesheet.rb +29 -7
- data/lib/nokogiri/xslt.rb +74 -4
- data/lib/nokogiri.rb +13 -5
- data/lib/xsd/xmlparser/nokogiri.rb +1 -1
- data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
- data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
- data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
- data/ports/archives/libxml2-2.12.7.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
- metadata +16 -12
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +0 -3037
- data/ports/archives/libxml2-2.10.3.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.37.tar.xz +0 -0
@@ -3,19 +3,30 @@
|
|
3
3
|
VALUE cNokogiriXsltStylesheet ;
|
4
4
|
|
5
5
|
static void
|
6
|
-
mark(
|
6
|
+
mark(void *data)
|
7
7
|
{
|
8
|
+
nokogiriXsltStylesheetTuple *wrapper = (nokogiriXsltStylesheetTuple *)data;
|
8
9
|
rb_gc_mark(wrapper->func_instances);
|
9
10
|
}
|
10
11
|
|
11
12
|
static void
|
12
|
-
dealloc(
|
13
|
+
dealloc(void *data)
|
13
14
|
{
|
15
|
+
nokogiriXsltStylesheetTuple *wrapper = (nokogiriXsltStylesheetTuple *)data;
|
14
16
|
xsltStylesheetPtr doc = wrapper->ss;
|
15
17
|
xsltFreeStylesheet(doc);
|
16
18
|
ruby_xfree(wrapper);
|
17
19
|
}
|
18
20
|
|
21
|
+
static const rb_data_type_t xslt_stylesheet_type = {
|
22
|
+
.wrap_struct_name = "Nokogiri::XSLT::Stylesheet",
|
23
|
+
.function = {
|
24
|
+
.dmark = mark,
|
25
|
+
.dfree = dealloc,
|
26
|
+
},
|
27
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY
|
28
|
+
};
|
29
|
+
|
19
30
|
PRINTFLIKE_DECL(2, 3)
|
20
31
|
static void
|
21
32
|
xslt_generic_error_handler(void *ctx, const char *msg, ...)
|
@@ -42,8 +53,12 @@ Nokogiri_wrap_xslt_stylesheet(xsltStylesheetPtr ss)
|
|
42
53
|
VALUE self;
|
43
54
|
nokogiriXsltStylesheetTuple *wrapper;
|
44
55
|
|
45
|
-
self =
|
46
|
-
|
56
|
+
self = TypedData_Make_Struct(
|
57
|
+
cNokogiriXsltStylesheet,
|
58
|
+
nokogiriXsltStylesheetTuple,
|
59
|
+
&xslt_stylesheet_type,
|
60
|
+
wrapper
|
61
|
+
);
|
47
62
|
|
48
63
|
ss->_private = (void *)self;
|
49
64
|
wrapper->ss = ss;
|
@@ -56,7 +71,12 @@ Nokogiri_wrap_xslt_stylesheet(xsltStylesheetPtr ss)
|
|
56
71
|
* call-seq:
|
57
72
|
* parse_stylesheet_doc(document)
|
58
73
|
*
|
59
|
-
* Parse
|
74
|
+
* Parse an XSLT::Stylesheet from +document+.
|
75
|
+
*
|
76
|
+
* [Parameters]
|
77
|
+
* - +document+ (Nokogiri::XML::Document) the document to be parsed.
|
78
|
+
*
|
79
|
+
* [Returns] Nokogiri::XSLT::Stylesheet
|
60
80
|
*/
|
61
81
|
static VALUE
|
62
82
|
parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
|
@@ -64,7 +84,8 @@ parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
|
|
64
84
|
xmlDocPtr xml, xml_cpy;
|
65
85
|
VALUE errstr, exception;
|
66
86
|
xsltStylesheetPtr ss ;
|
67
|
-
|
87
|
+
|
88
|
+
xml = noko_xml_document_unwrap(xmldocobj);
|
68
89
|
|
69
90
|
errstr = rb_str_new(0, 0);
|
70
91
|
xsltSetGenericErrorFunc((void *)errstr, xslt_generic_error_handler);
|
@@ -88,10 +109,10 @@ parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
|
|
88
109
|
* call-seq:
|
89
110
|
* serialize(document)
|
90
111
|
*
|
91
|
-
* Serialize +document+ to an xml string.
|
112
|
+
* Serialize +document+ to an xml string, as specified by the +method+ parameter in the Stylesheet.
|
92
113
|
*/
|
93
114
|
static VALUE
|
94
|
-
|
115
|
+
rb_xslt_stylesheet_serialize(VALUE self, VALUE xmlobj)
|
95
116
|
{
|
96
117
|
xmlDocPtr xml ;
|
97
118
|
nokogiriXsltStylesheetTuple *wrapper;
|
@@ -99,8 +120,13 @@ serialize(VALUE self, VALUE xmlobj)
|
|
99
120
|
int doc_len ;
|
100
121
|
VALUE rval ;
|
101
122
|
|
102
|
-
|
103
|
-
|
123
|
+
xml = noko_xml_document_unwrap(xmlobj);
|
124
|
+
TypedData_Get_Struct(
|
125
|
+
self,
|
126
|
+
nokogiriXsltStylesheetTuple,
|
127
|
+
&xslt_stylesheet_type,
|
128
|
+
wrapper
|
129
|
+
);
|
104
130
|
xsltSaveResultToString(&doc_ptr, &doc_len, xml, wrapper->ss);
|
105
131
|
rval = NOKOGIRI_STR_NEW(doc_ptr, doc_len);
|
106
132
|
xmlFree(doc_ptr);
|
@@ -112,7 +138,7 @@ serialize(VALUE self, VALUE xmlobj)
|
|
112
138
|
* transform(document)
|
113
139
|
* transform(document, params = {})
|
114
140
|
*
|
115
|
-
*
|
141
|
+
* Transform an XML::Document as defined by an XSLT::Stylesheet.
|
116
142
|
*
|
117
143
|
* [Parameters]
|
118
144
|
* - +document+ (Nokogiri::XML::Document) the document to be transformed.
|
@@ -221,60 +247,74 @@ serialize(VALUE self, VALUE xmlobj)
|
|
221
247
|
* See: Nokogiri::XSLT.quote_params
|
222
248
|
*/
|
223
249
|
static VALUE
|
224
|
-
|
250
|
+
rb_xslt_stylesheet_transform(int argc, VALUE *argv, VALUE self)
|
225
251
|
{
|
226
|
-
VALUE
|
227
|
-
xmlDocPtr
|
228
|
-
xmlDocPtr
|
252
|
+
VALUE rb_document, rb_param, rb_error_str;
|
253
|
+
xmlDocPtr c_document ;
|
254
|
+
xmlDocPtr c_result_document ;
|
229
255
|
nokogiriXsltStylesheetTuple *wrapper;
|
230
256
|
const char **params ;
|
231
257
|
long param_len, j ;
|
232
258
|
int parse_error_occurred ;
|
259
|
+
int defensive_copy_p = 0;
|
233
260
|
|
234
|
-
rb_scan_args(argc, argv, "11", &
|
235
|
-
if (NIL_P(
|
236
|
-
if (!rb_obj_is_kind_of(
|
261
|
+
rb_scan_args(argc, argv, "11", &rb_document, &rb_param);
|
262
|
+
if (NIL_P(rb_param)) { rb_param = rb_ary_new2(0L) ; }
|
263
|
+
if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlDocument)) {
|
237
264
|
rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::Document");
|
238
265
|
}
|
239
266
|
|
240
267
|
/* handle hashes as arguments. */
|
241
|
-
if (T_HASH == TYPE(
|
242
|
-
|
243
|
-
|
268
|
+
if (T_HASH == TYPE(rb_param)) {
|
269
|
+
rb_param = rb_funcall(rb_param, rb_intern("to_a"), 0);
|
270
|
+
rb_param = rb_funcall(rb_param, rb_intern("flatten"), 0);
|
244
271
|
}
|
245
272
|
|
246
|
-
Check_Type(
|
273
|
+
Check_Type(rb_param, T_ARRAY);
|
247
274
|
|
248
|
-
|
249
|
-
|
275
|
+
c_document = noko_xml_document_unwrap(rb_document);
|
276
|
+
TypedData_Get_Struct(self, nokogiriXsltStylesheetTuple, &xslt_stylesheet_type, wrapper);
|
250
277
|
|
251
|
-
param_len = RARRAY_LEN(
|
278
|
+
param_len = RARRAY_LEN(rb_param);
|
252
279
|
params = ruby_xcalloc((size_t)param_len + 1, sizeof(char *));
|
253
280
|
for (j = 0 ; j < param_len ; j++) {
|
254
|
-
VALUE entry = rb_ary_entry(
|
281
|
+
VALUE entry = rb_ary_entry(rb_param, j);
|
255
282
|
const char *ptr = StringValueCStr(entry);
|
256
283
|
params[j] = ptr;
|
257
284
|
}
|
258
285
|
params[param_len] = 0 ;
|
259
286
|
|
260
|
-
|
261
|
-
|
262
|
-
|
287
|
+
xsltTransformContextPtr c_transform_context = xsltNewTransformContext(wrapper->ss, c_document);
|
288
|
+
if (xsltNeedElemSpaceHandling(c_transform_context) &&
|
289
|
+
noko_xml_document_has_wrapped_blank_nodes_p(c_document)) {
|
290
|
+
// see https://github.com/sparklemotion/nokogiri/issues/2800
|
291
|
+
c_document = xmlCopyDoc(c_document, 1);
|
292
|
+
defensive_copy_p = 1;
|
293
|
+
}
|
294
|
+
xsltFreeTransformContext(c_transform_context);
|
295
|
+
|
296
|
+
rb_error_str = rb_str_new(0, 0);
|
297
|
+
xsltSetGenericErrorFunc((void *)rb_error_str, xslt_generic_error_handler);
|
298
|
+
xmlSetGenericErrorFunc((void *)rb_error_str, xslt_generic_error_handler);
|
299
|
+
|
300
|
+
c_result_document = xsltApplyStylesheet(wrapper->ss, c_document, params);
|
263
301
|
|
264
|
-
result = xsltApplyStylesheet(wrapper->ss, xml, params);
|
265
302
|
ruby_xfree(params);
|
303
|
+
if (defensive_copy_p) {
|
304
|
+
xmlFreeDoc(c_document);
|
305
|
+
c_document = NULL;
|
306
|
+
}
|
266
307
|
|
267
308
|
xsltSetGenericErrorFunc(NULL, NULL);
|
268
309
|
xmlSetGenericErrorFunc(NULL, NULL);
|
269
310
|
|
270
|
-
parse_error_occurred = (Qfalse == rb_funcall(
|
311
|
+
parse_error_occurred = (Qfalse == rb_funcall(rb_error_str, rb_intern("empty?"), 0));
|
271
312
|
|
272
313
|
if (parse_error_occurred) {
|
273
|
-
|
274
|
-
rb_exc_raise(exception);
|
314
|
+
rb_exc_raise(rb_exc_new3(rb_eRuntimeError, rb_error_str));
|
275
315
|
}
|
276
316
|
|
277
|
-
return noko_xml_document_wrap((VALUE)0,
|
317
|
+
return noko_xml_document_wrap((VALUE)0, c_result_document) ;
|
278
318
|
}
|
279
319
|
|
280
320
|
static void
|
@@ -290,7 +330,12 @@ method_caller(xmlXPathParserContextPtr ctxt, int nargs)
|
|
290
330
|
handler = (VALUE)xsltGetExtData(transform, functionURI);
|
291
331
|
function_name = (const char *)(ctxt->context->function);
|
292
332
|
|
293
|
-
Nokogiri_marshal_xpath_funcall_and_return_values(
|
333
|
+
Nokogiri_marshal_xpath_funcall_and_return_values(
|
334
|
+
ctxt,
|
335
|
+
nargs,
|
336
|
+
handler,
|
337
|
+
(const char *)function_name
|
338
|
+
);
|
294
339
|
}
|
295
340
|
|
296
341
|
static void *
|
@@ -306,12 +351,20 @@ initFunc(xsltTransformContextPtr ctxt, const xmlChar *uri)
|
|
306
351
|
|
307
352
|
for (i = 0; i < RARRAY_LEN(methods); i++) {
|
308
353
|
VALUE method_name = rb_obj_as_string(rb_ary_entry(methods, i));
|
309
|
-
xsltRegisterExtFunction(
|
310
|
-
|
354
|
+
xsltRegisterExtFunction(
|
355
|
+
ctxt,
|
356
|
+
(unsigned char *)StringValueCStr(method_name),
|
357
|
+
uri,
|
358
|
+
method_caller
|
359
|
+
);
|
311
360
|
}
|
312
361
|
|
313
|
-
|
314
|
-
|
362
|
+
TypedData_Get_Struct(
|
363
|
+
(VALUE)ctxt->style->_private,
|
364
|
+
nokogiriXsltStylesheetTuple,
|
365
|
+
&xslt_stylesheet_type,
|
366
|
+
wrapper
|
367
|
+
);
|
315
368
|
inst = rb_class_new_instance(0, NULL, obj);
|
316
369
|
rb_ary_push(wrapper->func_instances, inst);
|
317
370
|
|
@@ -324,33 +377,38 @@ shutdownFunc(xsltTransformContextPtr ctxt,
|
|
324
377
|
{
|
325
378
|
nokogiriXsltStylesheetTuple *wrapper;
|
326
379
|
|
327
|
-
|
328
|
-
|
380
|
+
TypedData_Get_Struct(
|
381
|
+
(VALUE)ctxt->style->_private,
|
382
|
+
nokogiriXsltStylesheetTuple,
|
383
|
+
&xslt_stylesheet_type,
|
384
|
+
wrapper
|
385
|
+
);
|
329
386
|
|
330
387
|
rb_ary_clear(wrapper->func_instances);
|
331
388
|
}
|
332
389
|
|
333
|
-
/*
|
334
|
-
* call-seq:
|
335
|
-
* register(uri, custom_handler_class)
|
336
|
-
*
|
337
|
-
* Register a class that implements custom XSLT transformation functions.
|
338
|
-
*/
|
390
|
+
/* docstring is in lib/nokogiri/xslt.rb */
|
339
391
|
static VALUE
|
340
|
-
|
392
|
+
rb_xslt_s_register(VALUE self, VALUE uri, VALUE obj)
|
341
393
|
{
|
342
394
|
VALUE modules = rb_iv_get(self, "@modules");
|
343
|
-
if (NIL_P(modules)) {
|
395
|
+
if (NIL_P(modules)) {
|
396
|
+
rb_raise(rb_eRuntimeError, "internal error: @modules not set");
|
397
|
+
}
|
344
398
|
|
345
399
|
rb_hash_aset(modules, uri, obj);
|
346
|
-
xsltRegisterExtModule(
|
400
|
+
xsltRegisterExtModule(
|
401
|
+
(unsigned char *)StringValueCStr(uri),
|
402
|
+
initFunc,
|
403
|
+
shutdownFunc
|
404
|
+
);
|
347
405
|
return self;
|
348
406
|
}
|
349
407
|
|
350
408
|
void
|
351
409
|
noko_init_xslt_stylesheet(void)
|
352
410
|
{
|
353
|
-
rb_define_singleton_method(mNokogiriXslt, "register",
|
411
|
+
rb_define_singleton_method(mNokogiriXslt, "register", rb_xslt_s_register, 2);
|
354
412
|
rb_iv_set(mNokogiriXslt, "@modules", rb_hash_new());
|
355
413
|
|
356
414
|
cNokogiriXsltStylesheet = rb_define_class_under(mNokogiriXslt, "Stylesheet", rb_cObject);
|
@@ -358,6 +416,6 @@ noko_init_xslt_stylesheet(void)
|
|
358
416
|
rb_undef_alloc_func(cNokogiriXsltStylesheet);
|
359
417
|
|
360
418
|
rb_define_singleton_method(cNokogiriXsltStylesheet, "parse_stylesheet_doc", parse_stylesheet_doc, 1);
|
361
|
-
rb_define_method(cNokogiriXsltStylesheet, "serialize",
|
362
|
-
rb_define_method(cNokogiriXsltStylesheet, "transform",
|
419
|
+
rb_define_method(cNokogiriXsltStylesheet, "serialize", rb_xslt_stylesheet_serialize, 1);
|
420
|
+
rb_define_method(cNokogiriXsltStylesheet, "transform", rb_xslt_stylesheet_transform, -1);
|
363
421
|
}
|
data/gumbo-parser/Makefile
CHANGED
@@ -13,6 +13,23 @@ LDFLAGS := -pthread
|
|
13
13
|
|
14
14
|
all: check
|
15
15
|
|
16
|
+
oss-fuzz:
|
17
|
+
./fuzzer/build-ossfuzz.sh
|
18
|
+
|
19
|
+
fuzzers: fuzzer-normal fuzzer-asan fuzzer-ubsan fuzzer-msan
|
20
|
+
|
21
|
+
fuzzer-normal:
|
22
|
+
./fuzzer/build.sh
|
23
|
+
|
24
|
+
fuzzer-asan:
|
25
|
+
SANITIZER=asan ./fuzzer/build.sh
|
26
|
+
|
27
|
+
fuzzer-ubsan:
|
28
|
+
SANITIZER=ubsan ./fuzzer/build.sh
|
29
|
+
|
30
|
+
fuzzer-msan:
|
31
|
+
SANITIZER=msan ./fuzzer/build.sh
|
32
|
+
|
16
33
|
# don't try to regenerate ragel or gperf files in CI, that should be a development-only action and
|
17
34
|
# the generated files should be committed to SCM
|
18
35
|
ifneq ($(CI),true)
|
@@ -81,6 +98,7 @@ coverage:
|
|
81
98
|
|
82
99
|
clean:
|
83
100
|
$(RM) -r build
|
101
|
+
$(RM) -r fuzzer/build fuzzer/src-* fuzzer/gumbo_corpus
|
84
102
|
|
85
103
|
build/src/flags: | build/src
|
86
104
|
@echo 'old_CC := $(CC)' > $@
|
data/gumbo-parser/src/error.c
CHANGED
@@ -91,7 +91,7 @@ static void print_tag_stack (
|
|
91
91
|
const GumboParserError* error,
|
92
92
|
GumboStringBuffer* output
|
93
93
|
) {
|
94
|
-
print_message(output, "
|
94
|
+
print_message(output, " Currently open tags: ");
|
95
95
|
for (unsigned int i = 0; i < error->tag_stack.length; ++i) {
|
96
96
|
if (i) {
|
97
97
|
print_message(output, ", ");
|
@@ -347,15 +347,19 @@ static void handle_parser_error (
|
|
347
347
|
if (error->parser_state == GUMBO_INSERTION_MODE_INITIAL) {
|
348
348
|
print_message(output, "You must provide a doctype");
|
349
349
|
} else {
|
350
|
-
print_message(output, "Premature end of file");
|
350
|
+
print_message(output, "Premature end of file.");
|
351
351
|
print_tag_stack(error, output);
|
352
352
|
}
|
353
353
|
return;
|
354
354
|
case GUMBO_TOKEN_START_TAG:
|
355
|
+
print_message(output, "Start tag '%s' isn't allowed here.",
|
356
|
+
gumbo_normalized_tagname(error->input_tag));
|
357
|
+
print_tag_stack(error, output);
|
358
|
+
return;
|
355
359
|
case GUMBO_TOKEN_END_TAG:
|
356
|
-
print_message(output, "
|
360
|
+
print_message(output, "End tag '%s' isn't allowed here.",
|
361
|
+
gumbo_normalized_tagname(error->input_tag));
|
357
362
|
print_tag_stack(error, output);
|
358
|
-
// TODO(jdtang): Give more specific messaging.
|
359
363
|
return;
|
360
364
|
}
|
361
365
|
}
|
@@ -1,6 +1,6 @@
|
|
1
1
|
/* ANSI-C code produced by gperf version 3.1 */
|
2
2
|
/* Command-line: gperf -m100 -n src/foreign_attrs.gperf */
|
3
|
-
/* Computed positions: -k'
|
3
|
+
/* Computed positions: -k'8-9' */
|
4
4
|
/* Filtered by: gperf-filter.sed */
|
5
5
|
|
6
6
|
#include "replacement.h"
|
@@ -29,9 +29,9 @@ hash (register const char *str, register size_t len)
|
|
29
29
|
11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
30
30
|
11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
31
31
|
11, 11, 11, 11, 11, 11, 11, 11, 11, 2,
|
32
|
-
11,
|
33
|
-
11, 5,
|
34
|
-
11,
|
32
|
+
11, 1, 11, 10, 4, 4, 11, 11, 3, 11,
|
33
|
+
11, 5, 3, 11, 0, 11, 2, 11, 11, 11,
|
34
|
+
11, 2, 11, 11, 11, 11, 11, 11, 11, 11,
|
35
35
|
11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
36
36
|
11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
37
37
|
11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
@@ -51,15 +51,14 @@ hash (register const char *str, register size_t len)
|
|
51
51
|
switch (len)
|
52
52
|
{
|
53
53
|
default:
|
54
|
+
hval += asso_values[(unsigned char)str[8]];
|
55
|
+
/*FALLTHROUGH*/
|
56
|
+
case 8:
|
54
57
|
hval += asso_values[(unsigned char)str[7]];
|
55
58
|
/*FALLTHROUGH*/
|
56
59
|
case 7:
|
57
60
|
case 6:
|
58
61
|
case 5:
|
59
|
-
case 4:
|
60
|
-
case 3:
|
61
|
-
case 2:
|
62
|
-
hval += asso_values[(unsigned char)str[1]];
|
63
62
|
break;
|
64
63
|
}
|
65
64
|
return hval;
|
@@ -70,21 +69,21 @@ gumbo_get_foreign_attr_replacement (register const char *str, register size_t le
|
|
70
69
|
{
|
71
70
|
static const unsigned char lengthtable[] =
|
72
71
|
{
|
73
|
-
5,
|
72
|
+
5, 10, 13, 9, 13, 10, 11, 11, 10, 10, 8
|
74
73
|
};
|
75
74
|
static const ForeignAttrReplacement wordlist[] =
|
76
75
|
{
|
77
76
|
{"xmlns", "xmlns", GUMBO_ATTR_NAMESPACE_XMLNS},
|
78
|
-
{"
|
77
|
+
{"xlink:href", "href", GUMBO_ATTR_NAMESPACE_XLINK},
|
78
|
+
{"xlink:arcrole", "arcrole", GUMBO_ATTR_NAMESPACE_XLINK},
|
79
79
|
{"xml:space", "space", GUMBO_ATTR_NAMESPACE_XML},
|
80
80
|
{"xlink:actuate", "actuate", GUMBO_ATTR_NAMESPACE_XLINK},
|
81
81
|
{"xlink:type", "type", GUMBO_ATTR_NAMESPACE_XLINK},
|
82
|
-
{"xlink:href", "href", GUMBO_ATTR_NAMESPACE_XLINK},
|
83
|
-
{"xlink:role", "role", GUMBO_ATTR_NAMESPACE_XLINK},
|
84
82
|
{"xlink:title", "title", GUMBO_ATTR_NAMESPACE_XLINK},
|
83
|
+
{"xmlns:xlink", "xlink", GUMBO_ATTR_NAMESPACE_XMLNS},
|
84
|
+
{"xlink:role", "role", GUMBO_ATTR_NAMESPACE_XLINK},
|
85
85
|
{"xlink:show", "show", GUMBO_ATTR_NAMESPACE_XLINK},
|
86
|
-
{"xml:lang", "lang", GUMBO_ATTR_NAMESPACE_XML}
|
87
|
-
{"xml:base", "base", GUMBO_ATTR_NAMESPACE_XML}
|
86
|
+
{"xml:lang", "lang", GUMBO_ATTR_NAMESPACE_XML}
|
88
87
|
};
|
89
88
|
|
90
89
|
if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
|
@@ -15,12 +15,12 @@ ForeignAttrReplacement;
|
|
15
15
|
|
16
16
|
%%
|
17
17
|
"xlink:actuate", "actuate", GUMBO_ATTR_NAMESPACE_XLINK
|
18
|
+
"xlink:arcrole", "arcrole", GUMBO_ATTR_NAMESPACE_XLINK
|
18
19
|
"xlink:href", "href", GUMBO_ATTR_NAMESPACE_XLINK
|
19
20
|
"xlink:role", "role", GUMBO_ATTR_NAMESPACE_XLINK
|
20
21
|
"xlink:show", "show", GUMBO_ATTR_NAMESPACE_XLINK
|
21
22
|
"xlink:title", "title", GUMBO_ATTR_NAMESPACE_XLINK
|
22
23
|
"xlink:type", "type", GUMBO_ATTR_NAMESPACE_XLINK
|
23
|
-
"xml:base", "base", GUMBO_ATTR_NAMESPACE_XML
|
24
24
|
"xml:lang", "lang", GUMBO_ATTR_NAMESPACE_XML
|
25
25
|
"xml:space", "space", GUMBO_ATTR_NAMESPACE_XML
|
26
26
|
"xmlns", "xmlns", GUMBO_ATTR_NAMESPACE_XMLNS
|
data/gumbo-parser/src/parser.c
CHANGED
@@ -2678,6 +2678,7 @@ static void handle_in_head(GumboParser* parser, GumboToken* token) {
|
|
2678
2678
|
static void handle_in_head_noscript(GumboParser* parser, GumboToken* token) {
|
2679
2679
|
if (token->type == GUMBO_TOKEN_DOCTYPE) {
|
2680
2680
|
parser_add_parse_error(parser, token);
|
2681
|
+
ignore_token(parser);
|
2681
2682
|
return;
|
2682
2683
|
}
|
2683
2684
|
if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
|
@@ -3963,6 +3964,18 @@ static void handle_in_select(GumboParser* parser, GumboToken* token) {
|
|
3963
3964
|
insert_element_from_token(parser, token);
|
3964
3965
|
return;
|
3965
3966
|
}
|
3967
|
+
if (tag_is(token, kStartTag, GUMBO_TAG_HR)) {
|
3968
|
+
if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)) {
|
3969
|
+
pop_current_node(parser);
|
3970
|
+
}
|
3971
|
+
if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTGROUP)) {
|
3972
|
+
pop_current_node(parser);
|
3973
|
+
}
|
3974
|
+
insert_element_from_token(parser, token);
|
3975
|
+
pop_current_node(parser);
|
3976
|
+
acknowledge_self_closing_tag(parser);
|
3977
|
+
return;
|
3978
|
+
}
|
3966
3979
|
if (tag_is(token, kEndTag, GUMBO_TAG_OPTGROUP)) {
|
3967
3980
|
GumboVector* open_elements = &parser->_parser_state->_open_elements;
|
3968
3981
|
if (
|
@@ -4813,14 +4826,17 @@ GumboOutput* gumbo_parse_with_options (
|
|
4813
4826
|
// to a token.
|
4814
4827
|
if (token.type == GUMBO_TOKEN_END_TAG &&
|
4815
4828
|
token.v.end_tag.tag == GUMBO_TAG_UNKNOWN)
|
4829
|
+
{
|
4816
4830
|
gumbo_free(token.v.end_tag.name);
|
4831
|
+
token.v.end_tag.name = NULL;
|
4832
|
+
}
|
4833
|
+
if (unlikely(state->_open_elements.length > max_tree_depth)) {
|
4834
|
+
parser._output->status = GUMBO_STATUS_TREE_TOO_DEEP;
|
4835
|
+
gumbo_debug("Tree depth limit exceeded.\n");
|
4836
|
+
break;
|
4837
|
+
}
|
4817
4838
|
}
|
4818
4839
|
|
4819
|
-
if (unlikely(state->_open_elements.length > max_tree_depth)) {
|
4820
|
-
parser._output->status = GUMBO_STATUS_TREE_TOO_DEEP;
|
4821
|
-
gumbo_debug("Tree depth limit exceeded.\n");
|
4822
|
-
break;
|
4823
|
-
}
|
4824
4840
|
|
4825
4841
|
++loop_count;
|
4826
4842
|
assert(loop_count < 1000000000UL);
|
@@ -506,6 +506,7 @@ static void abandon_current_tag(GumboParser* parser) {
|
|
506
506
|
for (unsigned int i = 0; i < tag_state->_attributes.length; ++i) {
|
507
507
|
gumbo_destroy_attribute(tag_state->_attributes.data[i]);
|
508
508
|
}
|
509
|
+
gumbo_free(tag_state->_name);
|
509
510
|
gumbo_free(tag_state->_attributes.data);
|
510
511
|
mark_tag_state_as_empty(tag_state);
|
511
512
|
gumbo_string_buffer_destroy(&tag_state->_buffer);
|
@@ -133,7 +133,7 @@ module Nokogiri
|
|
133
133
|
args += node.value[1..-1].map do |n|
|
134
134
|
n.is_a?(Nokogiri::CSS::Node) ? n.accept(self) : n
|
135
135
|
end
|
136
|
-
"
|
136
|
+
"nokogiri:#{node.value.first}#{args.join(",")})"
|
137
137
|
end
|
138
138
|
end
|
139
139
|
|
@@ -207,7 +207,7 @@ module Nokogiri
|
|
207
207
|
when "parent" then "node()"
|
208
208
|
when "root" then "not(parent::*)"
|
209
209
|
else
|
210
|
-
node.value.first
|
210
|
+
"nokogiri:#{node.value.first}(.)"
|
211
211
|
end
|
212
212
|
end
|
213
213
|
end
|
@@ -302,7 +302,7 @@ module Nokogiri
|
|
302
302
|
end
|
303
303
|
|
304
304
|
def read_a_and_positive_b(values)
|
305
|
-
op = values[2]
|
305
|
+
op = values[2].strip
|
306
306
|
if op == "+"
|
307
307
|
a = values[0].to_i
|
308
308
|
b = values[3].to_i
|
@@ -335,25 +335,5 @@ module Nokogiri
|
|
335
335
|
end
|
336
336
|
end
|
337
337
|
end
|
338
|
-
|
339
|
-
module XPathVisitorAlwaysUseBuiltins # :nodoc:
|
340
|
-
def self.new
|
341
|
-
warn(
|
342
|
-
"Nokogiri::CSS::XPathVisitorAlwaysUseBuiltins is deprecated and will be removed in a future version of Nokogiri",
|
343
|
-
{ uplevel: 1 },
|
344
|
-
)
|
345
|
-
XPathVisitor.new(builtins: :always)
|
346
|
-
end
|
347
|
-
end
|
348
|
-
|
349
|
-
module XPathVisitorOptimallyUseBuiltins # :nodoc:
|
350
|
-
def self.new
|
351
|
-
warn(
|
352
|
-
"Nokogiri::CSS::XPathVisitorOptimallyUseBuiltins is deprecated and will be removed in a future version of Nokogiri",
|
353
|
-
{ uplevel: 1 },
|
354
|
-
)
|
355
|
-
XPathVisitor.new(builtins: :optimal)
|
356
|
-
end
|
357
|
-
end
|
358
338
|
end
|
359
339
|
end
|
data/lib/nokogiri/extension.rb
CHANGED
@@ -6,7 +6,7 @@ begin
|
|
6
6
|
RUBY_VERSION =~ /(\d+\.\d+)/
|
7
7
|
require_relative "#{Regexp.last_match(1)}/nokogiri"
|
8
8
|
rescue LoadError => e
|
9
|
-
if
|
9
|
+
if e.message.include?("GLIBC")
|
10
10
|
warn(<<~EOM)
|
11
11
|
|
12
12
|
ERROR: It looks like you're trying to use Nokogiri as a precompiled native gem on a system
|
@@ -92,7 +92,7 @@ module Nokogiri
|
|
92
92
|
title = XML::Node.new("title", self) << tnode
|
93
93
|
if (head = at_xpath("//head"))
|
94
94
|
head << title
|
95
|
-
elsif (meta =
|
95
|
+
elsif (meta = at_xpath("//meta[@charset]") || meta_content_type)
|
96
96
|
# better put after charset declaration
|
97
97
|
meta.add_next_sibling(title)
|
98
98
|
else
|
@@ -24,7 +24,7 @@ module Nokogiri
|
|
24
24
|
new(doc, tags, nil, options, &block)
|
25
25
|
end
|
26
26
|
|
27
|
-
def initialize(document, tags = nil, ctx = nil, options = XML::ParseOptions::DEFAULT_HTML)
|
27
|
+
def initialize(document, tags = nil, ctx = nil, options = XML::ParseOptions::DEFAULT_HTML) # rubocop:disable Lint/MissingSuper
|
28
28
|
return self unless tags
|
29
29
|
|
30
30
|
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|