nokogiri 1.14.3 → 1.16.2
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +19 -15
- data/README.md +4 -1
- data/dependencies.yml +9 -8
- data/ext/nokogiri/extconf.rb +69 -26
- data/ext/nokogiri/html4_document.c +1 -2
- data/ext/nokogiri/html4_element_description.c +19 -14
- data/ext/nokogiri/html4_sax_parser_context.c +10 -16
- data/ext/nokogiri/html4_sax_push_parser.c +3 -3
- data/ext/nokogiri/nokogiri.c +46 -24
- data/ext/nokogiri/nokogiri.h +23 -5
- data/ext/nokogiri/test_global_handlers.c +1 -1
- data/ext/nokogiri/xml_attr.c +1 -1
- data/ext/nokogiri/xml_cdata.c +30 -17
- data/ext/nokogiri/xml_comment.c +1 -1
- data/ext/nokogiri/xml_document.c +113 -25
- data/ext/nokogiri/xml_document_fragment.c +1 -1
- data/ext/nokogiri/xml_dtd.c +1 -1
- data/ext/nokogiri/xml_element_content.c +32 -29
- data/ext/nokogiri/xml_element_decl.c +5 -5
- data/ext/nokogiri/xml_encoding_handler.c +12 -4
- data/ext/nokogiri/xml_entity_reference.c +1 -1
- data/ext/nokogiri/xml_namespace.c +11 -16
- data/ext/nokogiri/xml_node.c +13 -16
- data/ext/nokogiri/xml_node_set.c +125 -105
- data/ext/nokogiri/xml_processing_instruction.c +1 -1
- data/ext/nokogiri/xml_reader.c +60 -74
- data/ext/nokogiri/xml_relax_ng.c +66 -79
- data/ext/nokogiri/xml_sax_parser.c +24 -5
- data/ext/nokogiri/xml_sax_parser_context.c +50 -25
- data/ext/nokogiri/xml_sax_push_parser.c +30 -9
- data/ext/nokogiri/xml_schema.c +94 -115
- data/ext/nokogiri/xml_syntax_error.c +3 -3
- data/ext/nokogiri/xml_text.c +26 -13
- data/ext/nokogiri/xml_xpath_context.c +153 -83
- data/ext/nokogiri/xslt_stylesheet.c +111 -53
- data/gumbo-parser/Makefile +18 -0
- data/gumbo-parser/src/error.c +8 -4
- data/gumbo-parser/src/foreign_attrs.c +13 -14
- data/gumbo-parser/src/foreign_attrs.gperf +1 -1
- data/gumbo-parser/src/parser.c +21 -5
- data/gumbo-parser/src/tokenizer.c +1 -0
- data/lib/nokogiri/css/parser_extras.rb +1 -1
- data/lib/nokogiri/css/xpath_visitor.rb +3 -23
- data/lib/nokogiri/extension.rb +1 -1
- data/lib/nokogiri/html4/document.rb +1 -1
- data/lib/nokogiri/html4/document_fragment.rb +1 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +1821 -353
- data/lib/nokogiri/html4/encoding_reader.rb +1 -1
- data/lib/nokogiri/html5/document_fragment.rb +1 -1
- data/lib/nokogiri/html5/node.rb +5 -0
- data/lib/nokogiri/html5.rb +0 -63
- data/lib/nokogiri/jruby/nokogiri_jars.rb +9 -9
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +6 -5
- data/lib/nokogiri/xml/attr.rb +2 -2
- data/lib/nokogiri/xml/attribute_decl.rb +4 -2
- data/lib/nokogiri/xml/document.rb +4 -5
- data/lib/nokogiri/xml/document_fragment.rb +3 -3
- data/lib/nokogiri/xml/element_content.rb +10 -2
- data/lib/nokogiri/xml/element_decl.rb +4 -2
- data/lib/nokogiri/xml/entity_decl.rb +4 -2
- data/lib/nokogiri/xml/namespace.rb +1 -2
- data/lib/nokogiri/xml/node/save_options.rb +8 -0
- data/lib/nokogiri/xml/node.rb +53 -37
- data/lib/nokogiri/xml/node_set.rb +3 -3
- data/lib/nokogiri/xml/pp/node.rb +23 -12
- data/lib/nokogiri/xml/reader.rb +10 -9
- data/lib/nokogiri/xml/sax/document.rb +1 -1
- data/lib/nokogiri/xml/searchable.rb +21 -13
- data/lib/nokogiri/xml/syntax_error.rb +1 -1
- data/lib/nokogiri/xml.rb +1 -1
- data/lib/nokogiri/xslt/stylesheet.rb +29 -7
- data/lib/nokogiri/xslt.rb +74 -4
- data/lib/nokogiri.rb +13 -5
- data/lib/xsd/xmlparser/nokogiri.rb +1 -1
- data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
- data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
- data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
- data/ports/archives/libxml2-2.12.5.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
- metadata +11 -9
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +0 -3037
- data/ports/archives/libxml2-2.10.4.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.37.tar.xz +0 -0
@@ -3,19 +3,30 @@
|
|
3
3
|
VALUE cNokogiriXsltStylesheet ;
|
4
4
|
|
5
5
|
static void
|
6
|
-
mark(
|
6
|
+
mark(void *data)
|
7
7
|
{
|
8
|
+
nokogiriXsltStylesheetTuple *wrapper = (nokogiriXsltStylesheetTuple *)data;
|
8
9
|
rb_gc_mark(wrapper->func_instances);
|
9
10
|
}
|
10
11
|
|
11
12
|
static void
|
12
|
-
dealloc(
|
13
|
+
dealloc(void *data)
|
13
14
|
{
|
15
|
+
nokogiriXsltStylesheetTuple *wrapper = (nokogiriXsltStylesheetTuple *)data;
|
14
16
|
xsltStylesheetPtr doc = wrapper->ss;
|
15
17
|
xsltFreeStylesheet(doc);
|
16
18
|
ruby_xfree(wrapper);
|
17
19
|
}
|
18
20
|
|
21
|
+
static const rb_data_type_t xslt_stylesheet_type = {
|
22
|
+
.wrap_struct_name = "Nokogiri::XSLT::Stylesheet",
|
23
|
+
.function = {
|
24
|
+
.dmark = mark,
|
25
|
+
.dfree = dealloc,
|
26
|
+
},
|
27
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY
|
28
|
+
};
|
29
|
+
|
19
30
|
PRINTFLIKE_DECL(2, 3)
|
20
31
|
static void
|
21
32
|
xslt_generic_error_handler(void *ctx, const char *msg, ...)
|
@@ -42,8 +53,12 @@ Nokogiri_wrap_xslt_stylesheet(xsltStylesheetPtr ss)
|
|
42
53
|
VALUE self;
|
43
54
|
nokogiriXsltStylesheetTuple *wrapper;
|
44
55
|
|
45
|
-
self =
|
46
|
-
|
56
|
+
self = TypedData_Make_Struct(
|
57
|
+
cNokogiriXsltStylesheet,
|
58
|
+
nokogiriXsltStylesheetTuple,
|
59
|
+
&xslt_stylesheet_type,
|
60
|
+
wrapper
|
61
|
+
);
|
47
62
|
|
48
63
|
ss->_private = (void *)self;
|
49
64
|
wrapper->ss = ss;
|
@@ -56,7 +71,12 @@ Nokogiri_wrap_xslt_stylesheet(xsltStylesheetPtr ss)
|
|
56
71
|
* call-seq:
|
57
72
|
* parse_stylesheet_doc(document)
|
58
73
|
*
|
59
|
-
* Parse
|
74
|
+
* Parse an XSLT::Stylesheet from +document+.
|
75
|
+
*
|
76
|
+
* [Parameters]
|
77
|
+
* - +document+ (Nokogiri::XML::Document) the document to be parsed.
|
78
|
+
*
|
79
|
+
* [Returns] Nokogiri::XSLT::Stylesheet
|
60
80
|
*/
|
61
81
|
static VALUE
|
62
82
|
parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
|
@@ -64,7 +84,8 @@ parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
|
|
64
84
|
xmlDocPtr xml, xml_cpy;
|
65
85
|
VALUE errstr, exception;
|
66
86
|
xsltStylesheetPtr ss ;
|
67
|
-
|
87
|
+
|
88
|
+
xml = noko_xml_document_unwrap(xmldocobj);
|
68
89
|
|
69
90
|
errstr = rb_str_new(0, 0);
|
70
91
|
xsltSetGenericErrorFunc((void *)errstr, xslt_generic_error_handler);
|
@@ -88,10 +109,10 @@ parse_stylesheet_doc(VALUE klass, VALUE xmldocobj)
|
|
88
109
|
* call-seq:
|
89
110
|
* serialize(document)
|
90
111
|
*
|
91
|
-
* Serialize +document+ to an xml string.
|
112
|
+
* Serialize +document+ to an xml string, as specified by the +method+ parameter in the Stylesheet.
|
92
113
|
*/
|
93
114
|
static VALUE
|
94
|
-
|
115
|
+
rb_xslt_stylesheet_serialize(VALUE self, VALUE xmlobj)
|
95
116
|
{
|
96
117
|
xmlDocPtr xml ;
|
97
118
|
nokogiriXsltStylesheetTuple *wrapper;
|
@@ -99,8 +120,13 @@ serialize(VALUE self, VALUE xmlobj)
|
|
99
120
|
int doc_len ;
|
100
121
|
VALUE rval ;
|
101
122
|
|
102
|
-
|
103
|
-
|
123
|
+
xml = noko_xml_document_unwrap(xmlobj);
|
124
|
+
TypedData_Get_Struct(
|
125
|
+
self,
|
126
|
+
nokogiriXsltStylesheetTuple,
|
127
|
+
&xslt_stylesheet_type,
|
128
|
+
wrapper
|
129
|
+
);
|
104
130
|
xsltSaveResultToString(&doc_ptr, &doc_len, xml, wrapper->ss);
|
105
131
|
rval = NOKOGIRI_STR_NEW(doc_ptr, doc_len);
|
106
132
|
xmlFree(doc_ptr);
|
@@ -112,7 +138,7 @@ serialize(VALUE self, VALUE xmlobj)
|
|
112
138
|
* transform(document)
|
113
139
|
* transform(document, params = {})
|
114
140
|
*
|
115
|
-
*
|
141
|
+
* Transform an XML::Document as defined by an XSLT::Stylesheet.
|
116
142
|
*
|
117
143
|
* [Parameters]
|
118
144
|
* - +document+ (Nokogiri::XML::Document) the document to be transformed.
|
@@ -221,60 +247,74 @@ serialize(VALUE self, VALUE xmlobj)
|
|
221
247
|
* See: Nokogiri::XSLT.quote_params
|
222
248
|
*/
|
223
249
|
static VALUE
|
224
|
-
|
250
|
+
rb_xslt_stylesheet_transform(int argc, VALUE *argv, VALUE self)
|
225
251
|
{
|
226
|
-
VALUE
|
227
|
-
xmlDocPtr
|
228
|
-
xmlDocPtr
|
252
|
+
VALUE rb_document, rb_param, rb_error_str;
|
253
|
+
xmlDocPtr c_document ;
|
254
|
+
xmlDocPtr c_result_document ;
|
229
255
|
nokogiriXsltStylesheetTuple *wrapper;
|
230
256
|
const char **params ;
|
231
257
|
long param_len, j ;
|
232
258
|
int parse_error_occurred ;
|
259
|
+
int defensive_copy_p = 0;
|
233
260
|
|
234
|
-
rb_scan_args(argc, argv, "11", &
|
235
|
-
if (NIL_P(
|
236
|
-
if (!rb_obj_is_kind_of(
|
261
|
+
rb_scan_args(argc, argv, "11", &rb_document, &rb_param);
|
262
|
+
if (NIL_P(rb_param)) { rb_param = rb_ary_new2(0L) ; }
|
263
|
+
if (!rb_obj_is_kind_of(rb_document, cNokogiriXmlDocument)) {
|
237
264
|
rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::Document");
|
238
265
|
}
|
239
266
|
|
240
267
|
/* handle hashes as arguments. */
|
241
|
-
if (T_HASH == TYPE(
|
242
|
-
|
243
|
-
|
268
|
+
if (T_HASH == TYPE(rb_param)) {
|
269
|
+
rb_param = rb_funcall(rb_param, rb_intern("to_a"), 0);
|
270
|
+
rb_param = rb_funcall(rb_param, rb_intern("flatten"), 0);
|
244
271
|
}
|
245
272
|
|
246
|
-
Check_Type(
|
273
|
+
Check_Type(rb_param, T_ARRAY);
|
247
274
|
|
248
|
-
|
249
|
-
|
275
|
+
c_document = noko_xml_document_unwrap(rb_document);
|
276
|
+
TypedData_Get_Struct(self, nokogiriXsltStylesheetTuple, &xslt_stylesheet_type, wrapper);
|
250
277
|
|
251
|
-
param_len = RARRAY_LEN(
|
278
|
+
param_len = RARRAY_LEN(rb_param);
|
252
279
|
params = ruby_xcalloc((size_t)param_len + 1, sizeof(char *));
|
253
280
|
for (j = 0 ; j < param_len ; j++) {
|
254
|
-
VALUE entry = rb_ary_entry(
|
281
|
+
VALUE entry = rb_ary_entry(rb_param, j);
|
255
282
|
const char *ptr = StringValueCStr(entry);
|
256
283
|
params[j] = ptr;
|
257
284
|
}
|
258
285
|
params[param_len] = 0 ;
|
259
286
|
|
260
|
-
|
261
|
-
|
262
|
-
|
287
|
+
xsltTransformContextPtr c_transform_context = xsltNewTransformContext(wrapper->ss, c_document);
|
288
|
+
if (xsltNeedElemSpaceHandling(c_transform_context) &&
|
289
|
+
noko_xml_document_has_wrapped_blank_nodes_p(c_document)) {
|
290
|
+
// see https://github.com/sparklemotion/nokogiri/issues/2800
|
291
|
+
c_document = xmlCopyDoc(c_document, 1);
|
292
|
+
defensive_copy_p = 1;
|
293
|
+
}
|
294
|
+
xsltFreeTransformContext(c_transform_context);
|
295
|
+
|
296
|
+
rb_error_str = rb_str_new(0, 0);
|
297
|
+
xsltSetGenericErrorFunc((void *)rb_error_str, xslt_generic_error_handler);
|
298
|
+
xmlSetGenericErrorFunc((void *)rb_error_str, xslt_generic_error_handler);
|
299
|
+
|
300
|
+
c_result_document = xsltApplyStylesheet(wrapper->ss, c_document, params);
|
263
301
|
|
264
|
-
result = xsltApplyStylesheet(wrapper->ss, xml, params);
|
265
302
|
ruby_xfree(params);
|
303
|
+
if (defensive_copy_p) {
|
304
|
+
xmlFreeDoc(c_document);
|
305
|
+
c_document = NULL;
|
306
|
+
}
|
266
307
|
|
267
308
|
xsltSetGenericErrorFunc(NULL, NULL);
|
268
309
|
xmlSetGenericErrorFunc(NULL, NULL);
|
269
310
|
|
270
|
-
parse_error_occurred = (Qfalse == rb_funcall(
|
311
|
+
parse_error_occurred = (Qfalse == rb_funcall(rb_error_str, rb_intern("empty?"), 0));
|
271
312
|
|
272
313
|
if (parse_error_occurred) {
|
273
|
-
|
274
|
-
rb_exc_raise(exception);
|
314
|
+
rb_exc_raise(rb_exc_new3(rb_eRuntimeError, rb_error_str));
|
275
315
|
}
|
276
316
|
|
277
|
-
return noko_xml_document_wrap((VALUE)0,
|
317
|
+
return noko_xml_document_wrap((VALUE)0, c_result_document) ;
|
278
318
|
}
|
279
319
|
|
280
320
|
static void
|
@@ -290,7 +330,12 @@ method_caller(xmlXPathParserContextPtr ctxt, int nargs)
|
|
290
330
|
handler = (VALUE)xsltGetExtData(transform, functionURI);
|
291
331
|
function_name = (const char *)(ctxt->context->function);
|
292
332
|
|
293
|
-
Nokogiri_marshal_xpath_funcall_and_return_values(
|
333
|
+
Nokogiri_marshal_xpath_funcall_and_return_values(
|
334
|
+
ctxt,
|
335
|
+
nargs,
|
336
|
+
handler,
|
337
|
+
(const char *)function_name
|
338
|
+
);
|
294
339
|
}
|
295
340
|
|
296
341
|
static void *
|
@@ -306,12 +351,20 @@ initFunc(xsltTransformContextPtr ctxt, const xmlChar *uri)
|
|
306
351
|
|
307
352
|
for (i = 0; i < RARRAY_LEN(methods); i++) {
|
308
353
|
VALUE method_name = rb_obj_as_string(rb_ary_entry(methods, i));
|
309
|
-
xsltRegisterExtFunction(
|
310
|
-
|
354
|
+
xsltRegisterExtFunction(
|
355
|
+
ctxt,
|
356
|
+
(unsigned char *)StringValueCStr(method_name),
|
357
|
+
uri,
|
358
|
+
method_caller
|
359
|
+
);
|
311
360
|
}
|
312
361
|
|
313
|
-
|
314
|
-
|
362
|
+
TypedData_Get_Struct(
|
363
|
+
(VALUE)ctxt->style->_private,
|
364
|
+
nokogiriXsltStylesheetTuple,
|
365
|
+
&xslt_stylesheet_type,
|
366
|
+
wrapper
|
367
|
+
);
|
315
368
|
inst = rb_class_new_instance(0, NULL, obj);
|
316
369
|
rb_ary_push(wrapper->func_instances, inst);
|
317
370
|
|
@@ -324,33 +377,38 @@ shutdownFunc(xsltTransformContextPtr ctxt,
|
|
324
377
|
{
|
325
378
|
nokogiriXsltStylesheetTuple *wrapper;
|
326
379
|
|
327
|
-
|
328
|
-
|
380
|
+
TypedData_Get_Struct(
|
381
|
+
(VALUE)ctxt->style->_private,
|
382
|
+
nokogiriXsltStylesheetTuple,
|
383
|
+
&xslt_stylesheet_type,
|
384
|
+
wrapper
|
385
|
+
);
|
329
386
|
|
330
387
|
rb_ary_clear(wrapper->func_instances);
|
331
388
|
}
|
332
389
|
|
333
|
-
/*
|
334
|
-
* call-seq:
|
335
|
-
* register(uri, custom_handler_class)
|
336
|
-
*
|
337
|
-
* Register a class that implements custom XSLT transformation functions.
|
338
|
-
*/
|
390
|
+
/* docstring is in lib/nokogiri/xslt.rb */
|
339
391
|
static VALUE
|
340
|
-
|
392
|
+
rb_xslt_s_register(VALUE self, VALUE uri, VALUE obj)
|
341
393
|
{
|
342
394
|
VALUE modules = rb_iv_get(self, "@modules");
|
343
|
-
if (NIL_P(modules)) {
|
395
|
+
if (NIL_P(modules)) {
|
396
|
+
rb_raise(rb_eRuntimeError, "internal error: @modules not set");
|
397
|
+
}
|
344
398
|
|
345
399
|
rb_hash_aset(modules, uri, obj);
|
346
|
-
xsltRegisterExtModule(
|
400
|
+
xsltRegisterExtModule(
|
401
|
+
(unsigned char *)StringValueCStr(uri),
|
402
|
+
initFunc,
|
403
|
+
shutdownFunc
|
404
|
+
);
|
347
405
|
return self;
|
348
406
|
}
|
349
407
|
|
350
408
|
void
|
351
409
|
noko_init_xslt_stylesheet(void)
|
352
410
|
{
|
353
|
-
rb_define_singleton_method(mNokogiriXslt, "register",
|
411
|
+
rb_define_singleton_method(mNokogiriXslt, "register", rb_xslt_s_register, 2);
|
354
412
|
rb_iv_set(mNokogiriXslt, "@modules", rb_hash_new());
|
355
413
|
|
356
414
|
cNokogiriXsltStylesheet = rb_define_class_under(mNokogiriXslt, "Stylesheet", rb_cObject);
|
@@ -358,6 +416,6 @@ noko_init_xslt_stylesheet(void)
|
|
358
416
|
rb_undef_alloc_func(cNokogiriXsltStylesheet);
|
359
417
|
|
360
418
|
rb_define_singleton_method(cNokogiriXsltStylesheet, "parse_stylesheet_doc", parse_stylesheet_doc, 1);
|
361
|
-
rb_define_method(cNokogiriXsltStylesheet, "serialize",
|
362
|
-
rb_define_method(cNokogiriXsltStylesheet, "transform",
|
419
|
+
rb_define_method(cNokogiriXsltStylesheet, "serialize", rb_xslt_stylesheet_serialize, 1);
|
420
|
+
rb_define_method(cNokogiriXsltStylesheet, "transform", rb_xslt_stylesheet_transform, -1);
|
363
421
|
}
|
data/gumbo-parser/Makefile
CHANGED
@@ -13,6 +13,23 @@ LDFLAGS := -pthread
|
|
13
13
|
|
14
14
|
all: check
|
15
15
|
|
16
|
+
oss-fuzz:
|
17
|
+
./fuzzer/build-ossfuzz.sh
|
18
|
+
|
19
|
+
fuzzers: fuzzer-normal fuzzer-asan fuzzer-ubsan fuzzer-msan
|
20
|
+
|
21
|
+
fuzzer-normal:
|
22
|
+
./fuzzer/build.sh
|
23
|
+
|
24
|
+
fuzzer-asan:
|
25
|
+
SANITIZER=asan ./fuzzer/build.sh
|
26
|
+
|
27
|
+
fuzzer-ubsan:
|
28
|
+
SANITIZER=ubsan ./fuzzer/build.sh
|
29
|
+
|
30
|
+
fuzzer-msan:
|
31
|
+
SANITIZER=msan ./fuzzer/build.sh
|
32
|
+
|
16
33
|
# don't try to regenerate ragel or gperf files in CI, that should be a development-only action and
|
17
34
|
# the generated files should be committed to SCM
|
18
35
|
ifneq ($(CI),true)
|
@@ -81,6 +98,7 @@ coverage:
|
|
81
98
|
|
82
99
|
clean:
|
83
100
|
$(RM) -r build
|
101
|
+
$(RM) -r fuzzer/build fuzzer/src-* fuzzer/gumbo_corpus
|
84
102
|
|
85
103
|
build/src/flags: | build/src
|
86
104
|
@echo 'old_CC := $(CC)' > $@
|
data/gumbo-parser/src/error.c
CHANGED
@@ -91,7 +91,7 @@ static void print_tag_stack (
|
|
91
91
|
const GumboParserError* error,
|
92
92
|
GumboStringBuffer* output
|
93
93
|
) {
|
94
|
-
print_message(output, "
|
94
|
+
print_message(output, " Currently open tags: ");
|
95
95
|
for (unsigned int i = 0; i < error->tag_stack.length; ++i) {
|
96
96
|
if (i) {
|
97
97
|
print_message(output, ", ");
|
@@ -347,15 +347,19 @@ static void handle_parser_error (
|
|
347
347
|
if (error->parser_state == GUMBO_INSERTION_MODE_INITIAL) {
|
348
348
|
print_message(output, "You must provide a doctype");
|
349
349
|
} else {
|
350
|
-
print_message(output, "Premature end of file");
|
350
|
+
print_message(output, "Premature end of file.");
|
351
351
|
print_tag_stack(error, output);
|
352
352
|
}
|
353
353
|
return;
|
354
354
|
case GUMBO_TOKEN_START_TAG:
|
355
|
+
print_message(output, "Start tag '%s' isn't allowed here.",
|
356
|
+
gumbo_normalized_tagname(error->input_tag));
|
357
|
+
print_tag_stack(error, output);
|
358
|
+
return;
|
355
359
|
case GUMBO_TOKEN_END_TAG:
|
356
|
-
print_message(output, "
|
360
|
+
print_message(output, "End tag '%s' isn't allowed here.",
|
361
|
+
gumbo_normalized_tagname(error->input_tag));
|
357
362
|
print_tag_stack(error, output);
|
358
|
-
// TODO(jdtang): Give more specific messaging.
|
359
363
|
return;
|
360
364
|
}
|
361
365
|
}
|
@@ -1,6 +1,6 @@
|
|
1
1
|
/* ANSI-C code produced by gperf version 3.1 */
|
2
2
|
/* Command-line: gperf -m100 -n src/foreign_attrs.gperf */
|
3
|
-
/* Computed positions: -k'
|
3
|
+
/* Computed positions: -k'8-9' */
|
4
4
|
/* Filtered by: gperf-filter.sed */
|
5
5
|
|
6
6
|
#include "replacement.h"
|
@@ -29,9 +29,9 @@ hash (register const char *str, register size_t len)
|
|
29
29
|
11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
30
30
|
11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
31
31
|
11, 11, 11, 11, 11, 11, 11, 11, 11, 2,
|
32
|
-
11,
|
33
|
-
11, 5,
|
34
|
-
11,
|
32
|
+
11, 1, 11, 10, 4, 4, 11, 11, 3, 11,
|
33
|
+
11, 5, 3, 11, 0, 11, 2, 11, 11, 11,
|
34
|
+
11, 2, 11, 11, 11, 11, 11, 11, 11, 11,
|
35
35
|
11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
36
36
|
11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
37
37
|
11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
|
@@ -51,15 +51,14 @@ hash (register const char *str, register size_t len)
|
|
51
51
|
switch (len)
|
52
52
|
{
|
53
53
|
default:
|
54
|
+
hval += asso_values[(unsigned char)str[8]];
|
55
|
+
/*FALLTHROUGH*/
|
56
|
+
case 8:
|
54
57
|
hval += asso_values[(unsigned char)str[7]];
|
55
58
|
/*FALLTHROUGH*/
|
56
59
|
case 7:
|
57
60
|
case 6:
|
58
61
|
case 5:
|
59
|
-
case 4:
|
60
|
-
case 3:
|
61
|
-
case 2:
|
62
|
-
hval += asso_values[(unsigned char)str[1]];
|
63
62
|
break;
|
64
63
|
}
|
65
64
|
return hval;
|
@@ -70,21 +69,21 @@ gumbo_get_foreign_attr_replacement (register const char *str, register size_t le
|
|
70
69
|
{
|
71
70
|
static const unsigned char lengthtable[] =
|
72
71
|
{
|
73
|
-
5,
|
72
|
+
5, 10, 13, 9, 13, 10, 11, 11, 10, 10, 8
|
74
73
|
};
|
75
74
|
static const ForeignAttrReplacement wordlist[] =
|
76
75
|
{
|
77
76
|
{"xmlns", "xmlns", GUMBO_ATTR_NAMESPACE_XMLNS},
|
78
|
-
{"
|
77
|
+
{"xlink:href", "href", GUMBO_ATTR_NAMESPACE_XLINK},
|
78
|
+
{"xlink:arcrole", "arcrole", GUMBO_ATTR_NAMESPACE_XLINK},
|
79
79
|
{"xml:space", "space", GUMBO_ATTR_NAMESPACE_XML},
|
80
80
|
{"xlink:actuate", "actuate", GUMBO_ATTR_NAMESPACE_XLINK},
|
81
81
|
{"xlink:type", "type", GUMBO_ATTR_NAMESPACE_XLINK},
|
82
|
-
{"xlink:href", "href", GUMBO_ATTR_NAMESPACE_XLINK},
|
83
|
-
{"xlink:role", "role", GUMBO_ATTR_NAMESPACE_XLINK},
|
84
82
|
{"xlink:title", "title", GUMBO_ATTR_NAMESPACE_XLINK},
|
83
|
+
{"xmlns:xlink", "xlink", GUMBO_ATTR_NAMESPACE_XMLNS},
|
84
|
+
{"xlink:role", "role", GUMBO_ATTR_NAMESPACE_XLINK},
|
85
85
|
{"xlink:show", "show", GUMBO_ATTR_NAMESPACE_XLINK},
|
86
|
-
{"xml:lang", "lang", GUMBO_ATTR_NAMESPACE_XML}
|
87
|
-
{"xml:base", "base", GUMBO_ATTR_NAMESPACE_XML}
|
86
|
+
{"xml:lang", "lang", GUMBO_ATTR_NAMESPACE_XML}
|
88
87
|
};
|
89
88
|
|
90
89
|
if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
|
@@ -15,12 +15,12 @@ ForeignAttrReplacement;
|
|
15
15
|
|
16
16
|
%%
|
17
17
|
"xlink:actuate", "actuate", GUMBO_ATTR_NAMESPACE_XLINK
|
18
|
+
"xlink:arcrole", "arcrole", GUMBO_ATTR_NAMESPACE_XLINK
|
18
19
|
"xlink:href", "href", GUMBO_ATTR_NAMESPACE_XLINK
|
19
20
|
"xlink:role", "role", GUMBO_ATTR_NAMESPACE_XLINK
|
20
21
|
"xlink:show", "show", GUMBO_ATTR_NAMESPACE_XLINK
|
21
22
|
"xlink:title", "title", GUMBO_ATTR_NAMESPACE_XLINK
|
22
23
|
"xlink:type", "type", GUMBO_ATTR_NAMESPACE_XLINK
|
23
|
-
"xml:base", "base", GUMBO_ATTR_NAMESPACE_XML
|
24
24
|
"xml:lang", "lang", GUMBO_ATTR_NAMESPACE_XML
|
25
25
|
"xml:space", "space", GUMBO_ATTR_NAMESPACE_XML
|
26
26
|
"xmlns", "xmlns", GUMBO_ATTR_NAMESPACE_XMLNS
|
data/gumbo-parser/src/parser.c
CHANGED
@@ -2678,6 +2678,7 @@ static void handle_in_head(GumboParser* parser, GumboToken* token) {
|
|
2678
2678
|
static void handle_in_head_noscript(GumboParser* parser, GumboToken* token) {
|
2679
2679
|
if (token->type == GUMBO_TOKEN_DOCTYPE) {
|
2680
2680
|
parser_add_parse_error(parser, token);
|
2681
|
+
ignore_token(parser);
|
2681
2682
|
return;
|
2682
2683
|
}
|
2683
2684
|
if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
|
@@ -3963,6 +3964,18 @@ static void handle_in_select(GumboParser* parser, GumboToken* token) {
|
|
3963
3964
|
insert_element_from_token(parser, token);
|
3964
3965
|
return;
|
3965
3966
|
}
|
3967
|
+
if (tag_is(token, kStartTag, GUMBO_TAG_HR)) {
|
3968
|
+
if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)) {
|
3969
|
+
pop_current_node(parser);
|
3970
|
+
}
|
3971
|
+
if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTGROUP)) {
|
3972
|
+
pop_current_node(parser);
|
3973
|
+
}
|
3974
|
+
insert_element_from_token(parser, token);
|
3975
|
+
pop_current_node(parser);
|
3976
|
+
acknowledge_self_closing_tag(parser);
|
3977
|
+
return;
|
3978
|
+
}
|
3966
3979
|
if (tag_is(token, kEndTag, GUMBO_TAG_OPTGROUP)) {
|
3967
3980
|
GumboVector* open_elements = &parser->_parser_state->_open_elements;
|
3968
3981
|
if (
|
@@ -4813,14 +4826,17 @@ GumboOutput* gumbo_parse_with_options (
|
|
4813
4826
|
// to a token.
|
4814
4827
|
if (token.type == GUMBO_TOKEN_END_TAG &&
|
4815
4828
|
token.v.end_tag.tag == GUMBO_TAG_UNKNOWN)
|
4829
|
+
{
|
4816
4830
|
gumbo_free(token.v.end_tag.name);
|
4831
|
+
token.v.end_tag.name = NULL;
|
4832
|
+
}
|
4833
|
+
if (unlikely(state->_open_elements.length > max_tree_depth)) {
|
4834
|
+
parser._output->status = GUMBO_STATUS_TREE_TOO_DEEP;
|
4835
|
+
gumbo_debug("Tree depth limit exceeded.\n");
|
4836
|
+
break;
|
4837
|
+
}
|
4817
4838
|
}
|
4818
4839
|
|
4819
|
-
if (unlikely(state->_open_elements.length > max_tree_depth)) {
|
4820
|
-
parser._output->status = GUMBO_STATUS_TREE_TOO_DEEP;
|
4821
|
-
gumbo_debug("Tree depth limit exceeded.\n");
|
4822
|
-
break;
|
4823
|
-
}
|
4824
4840
|
|
4825
4841
|
++loop_count;
|
4826
4842
|
assert(loop_count < 1000000000UL);
|
@@ -506,6 +506,7 @@ static void abandon_current_tag(GumboParser* parser) {
|
|
506
506
|
for (unsigned int i = 0; i < tag_state->_attributes.length; ++i) {
|
507
507
|
gumbo_destroy_attribute(tag_state->_attributes.data[i]);
|
508
508
|
}
|
509
|
+
gumbo_free(tag_state->_name);
|
509
510
|
gumbo_free(tag_state->_attributes.data);
|
510
511
|
mark_tag_state_as_empty(tag_state);
|
511
512
|
gumbo_string_buffer_destroy(&tag_state->_buffer);
|
@@ -133,7 +133,7 @@ module Nokogiri
|
|
133
133
|
args += node.value[1..-1].map do |n|
|
134
134
|
n.is_a?(Nokogiri::CSS::Node) ? n.accept(self) : n
|
135
135
|
end
|
136
|
-
"
|
136
|
+
"nokogiri:#{node.value.first}#{args.join(",")})"
|
137
137
|
end
|
138
138
|
end
|
139
139
|
|
@@ -207,7 +207,7 @@ module Nokogiri
|
|
207
207
|
when "parent" then "node()"
|
208
208
|
when "root" then "not(parent::*)"
|
209
209
|
else
|
210
|
-
node.value.first
|
210
|
+
"nokogiri:#{node.value.first}(.)"
|
211
211
|
end
|
212
212
|
end
|
213
213
|
end
|
@@ -302,7 +302,7 @@ module Nokogiri
|
|
302
302
|
end
|
303
303
|
|
304
304
|
def read_a_and_positive_b(values)
|
305
|
-
op = values[2]
|
305
|
+
op = values[2].strip
|
306
306
|
if op == "+"
|
307
307
|
a = values[0].to_i
|
308
308
|
b = values[3].to_i
|
@@ -335,25 +335,5 @@ module Nokogiri
|
|
335
335
|
end
|
336
336
|
end
|
337
337
|
end
|
338
|
-
|
339
|
-
module XPathVisitorAlwaysUseBuiltins # :nodoc:
|
340
|
-
def self.new
|
341
|
-
warn(
|
342
|
-
"Nokogiri::CSS::XPathVisitorAlwaysUseBuiltins is deprecated and will be removed in a future version of Nokogiri",
|
343
|
-
{ uplevel: 1 },
|
344
|
-
)
|
345
|
-
XPathVisitor.new(builtins: :always)
|
346
|
-
end
|
347
|
-
end
|
348
|
-
|
349
|
-
module XPathVisitorOptimallyUseBuiltins # :nodoc:
|
350
|
-
def self.new
|
351
|
-
warn(
|
352
|
-
"Nokogiri::CSS::XPathVisitorOptimallyUseBuiltins is deprecated and will be removed in a future version of Nokogiri",
|
353
|
-
{ uplevel: 1 },
|
354
|
-
)
|
355
|
-
XPathVisitor.new(builtins: :optimal)
|
356
|
-
end
|
357
|
-
end
|
358
338
|
end
|
359
339
|
end
|
data/lib/nokogiri/extension.rb
CHANGED
@@ -6,7 +6,7 @@ begin
|
|
6
6
|
RUBY_VERSION =~ /(\d+\.\d+)/
|
7
7
|
require_relative "#{Regexp.last_match(1)}/nokogiri"
|
8
8
|
rescue LoadError => e
|
9
|
-
if
|
9
|
+
if e.message.include?("GLIBC")
|
10
10
|
warn(<<~EOM)
|
11
11
|
|
12
12
|
ERROR: It looks like you're trying to use Nokogiri as a precompiled native gem on a system
|
@@ -92,7 +92,7 @@ module Nokogiri
|
|
92
92
|
title = XML::Node.new("title", self) << tnode
|
93
93
|
if (head = at_xpath("//head"))
|
94
94
|
head << title
|
95
|
-
elsif (meta =
|
95
|
+
elsif (meta = at_xpath("//meta[@charset]") || meta_content_type)
|
96
96
|
# better put after charset declaration
|
97
97
|
meta.add_next_sibling(title)
|
98
98
|
else
|
@@ -24,7 +24,7 @@ module Nokogiri
|
|
24
24
|
new(doc, tags, nil, options, &block)
|
25
25
|
end
|
26
26
|
|
27
|
-
def initialize(document, tags = nil, ctx = nil, options = XML::ParseOptions::DEFAULT_HTML)
|
27
|
+
def initialize(document, tags = nil, ctx = nil, options = XML::ParseOptions::DEFAULT_HTML) # rubocop:disable Lint/MissingSuper
|
28
28
|
return self unless tags
|
29
29
|
|
30
30
|
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|