nokogumbo 1.1.12 → 1.1.13
Sign up to get free protection for your applications and to get access to all the features.
- data/ext/nokogumboc/extconf.rb +1 -1
- data/gumbo-parser/src/char_ref.c +22828 -2291
- data/gumbo-parser/src/char_ref.rl +2548 -0
- data/gumbo-parser/src/error.c +21 -0
- data/gumbo-parser/src/parser.c +109 -105
- data/gumbo-parser/src/tokenizer.c +103 -103
- data/gumbo-parser/src/utf8.c +114 -120
- data/gumbo-parser/src/utf8.h +6 -0
- metadata +3 -2
data/gumbo-parser/src/error.c
CHANGED
@@ -39,6 +39,27 @@ static int print_message(GumboParser* parser, GumboStringBuffer* output,
|
|
39
39
|
int remaining_capacity = output->capacity - output->length;
|
40
40
|
int bytes_written = vsnprintf(output->data + output->length,
|
41
41
|
remaining_capacity, format, args);
|
42
|
+
#ifdef _MSC_VER
|
43
|
+
if (bytes_written == -1) {
|
44
|
+
// vsnprintf returns -1 on MSVC++ if there's not enough capacity, instead of
|
45
|
+
// returning the number of bytes that would've been written had there been
|
46
|
+
// enough. In this case, we'll double the buffer size and hope it fits when
|
47
|
+
// we retry (letting it fail and returning 0 if it doesn't), since there's
|
48
|
+
// no way to smartly resize the buffer.
|
49
|
+
gumbo_string_buffer_reserve(parser, output->capacity * 2, output);
|
50
|
+
int result = vsnprintf(output->data + output->length,
|
51
|
+
remaining_capacity, format, args);
|
52
|
+
va_end(args);
|
53
|
+
return result == -1 ? 0 : result;
|
54
|
+
}
|
55
|
+
#else
|
56
|
+
// -1 in standard C99 indicates an encoding error. Return 0 and do nothing.
|
57
|
+
if (bytes_written == -1) {
|
58
|
+
va_end(args);
|
59
|
+
return 0;
|
60
|
+
}
|
61
|
+
#endif
|
62
|
+
|
42
63
|
if (bytes_written > remaining_capacity) {
|
43
64
|
gumbo_string_buffer_reserve(
|
44
65
|
parser, output->capacity + bytes_written, output);
|
data/gumbo-parser/src/parser.c
CHANGED
@@ -597,7 +597,7 @@ static void reset_insertion_mode_appropriately(GumboParser* parser) {
|
|
597
597
|
assert(0);
|
598
598
|
}
|
599
599
|
|
600
|
-
static GumboError*
|
600
|
+
static GumboError* parser_add_parse_error(GumboParser* parser, const GumboToken* token) {
|
601
601
|
gumbo_debug("Adding parse error.\n");
|
602
602
|
GumboError* error = gumbo_add_error(parser);
|
603
603
|
if (!error) {
|
@@ -1022,13 +1022,13 @@ static GumboNode* insert_foreign_element(
|
|
1022
1022
|
kLegalXmlns[tag_namespace])) {
|
1023
1023
|
// TODO(jdtang): Since there're multiple possible error codes here, we
|
1024
1024
|
// eventually need reason codes to differentiate them.
|
1025
|
-
|
1025
|
+
parser_add_parse_error(parser, token);
|
1026
1026
|
}
|
1027
1027
|
if (token_has_attribute(token, "xmlns:xlink") &&
|
1028
1028
|
!attribute_matches_case_sensitive(
|
1029
1029
|
&token->v.start_tag.attributes,
|
1030
1030
|
"xmlns:xlink", "http://www.w3.org/1999/xlink")) {
|
1031
|
-
|
1031
|
+
parser_add_parse_error(parser, token);
|
1032
1032
|
}
|
1033
1033
|
return element;
|
1034
1034
|
}
|
@@ -1478,7 +1478,7 @@ static bool close_table_cell(GumboParser* parser, const GumboToken* token,
|
|
1478
1478
|
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
1479
1479
|
const GumboNode* node = get_current_node(parser);
|
1480
1480
|
if (!node_tag_is(node, cell_tag)) {
|
1481
|
-
|
1481
|
+
parser_add_parse_error(parser, token);
|
1482
1482
|
result = false;
|
1483
1483
|
}
|
1484
1484
|
do {
|
@@ -1564,7 +1564,7 @@ static bool implicitly_close_tags(
|
|
1564
1564
|
bool result = true;
|
1565
1565
|
generate_implied_end_tags(parser, target);
|
1566
1566
|
if (!node_tag_is(get_current_node(parser), target)) {
|
1567
|
-
|
1567
|
+
parser_add_parse_error(parser, token);
|
1568
1568
|
while (!node_tag_is(get_current_node(parser), target)) {
|
1569
1569
|
pop_current_node(parser);
|
1570
1570
|
}
|
@@ -1728,7 +1728,7 @@ static bool maybe_add_doctype_error(
|
|
1728
1728
|
&kSystemIdXhtmlStrict1_1, false) ||
|
1729
1729
|
doctype_matches(doctype, &kPublicIdXhtml1_1,
|
1730
1730
|
&kSystemIdXhtml1_1, false)))) {
|
1731
|
-
|
1731
|
+
parser_add_parse_error(parser, token);
|
1732
1732
|
return false;
|
1733
1733
|
}
|
1734
1734
|
return true;
|
@@ -1801,12 +1801,12 @@ static bool adoption_agency_algorithm(
|
|
1801
1801
|
}
|
1802
1802
|
|
1803
1803
|
if (!has_an_element_in_scope(parser, formatting_node->v.element.tag)) {
|
1804
|
-
|
1804
|
+
parser_add_parse_error(parser, token);
|
1805
1805
|
gumbo_debug("Element not in scope.\n");
|
1806
1806
|
return false;
|
1807
1807
|
}
|
1808
1808
|
if (formatting_node != get_current_node(parser)) {
|
1809
|
-
|
1809
|
+
parser_add_parse_error(parser, token); // But continue onwards.
|
1810
1810
|
}
|
1811
1811
|
assert(formatting_node);
|
1812
1812
|
assert(!node_tag_is(formatting_node, GUMBO_TAG_HTML));
|
@@ -2018,7 +2018,7 @@ static bool handle_initial(GumboParser* parser, GumboToken* token) {
|
|
2018
2018
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_BEFORE_HTML);
|
2019
2019
|
return maybe_add_doctype_error(parser, token);
|
2020
2020
|
}
|
2021
|
-
|
2021
|
+
parser_add_parse_error(parser, token);
|
2022
2022
|
document->doc_type_quirks_mode = GUMBO_DOCTYPE_QUIRKS;
|
2023
2023
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_BEFORE_HTML);
|
2024
2024
|
parser->_parser_state->_reprocess_current_token = true;
|
@@ -2028,7 +2028,7 @@ static bool handle_initial(GumboParser* parser, GumboToken* token) {
|
|
2028
2028
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#the-before-html-insertion-mode
|
2029
2029
|
static bool handle_before_html(GumboParser* parser, GumboToken* token) {
|
2030
2030
|
if (token->type == GUMBO_TOKEN_DOCTYPE) {
|
2031
|
-
|
2031
|
+
parser_add_parse_error(parser, token);
|
2032
2032
|
ignore_token(parser);
|
2033
2033
|
return false;
|
2034
2034
|
} else if (token->type == GUMBO_TOKEN_COMMENT) {
|
@@ -2045,7 +2045,7 @@ static bool handle_before_html(GumboParser* parser, GumboToken* token) {
|
|
2045
2045
|
} else if (token->type == GUMBO_TOKEN_END_TAG && !tag_in(
|
2046
2046
|
token, false, GUMBO_TAG_HEAD, GUMBO_TAG_BODY, GUMBO_TAG_HTML,
|
2047
2047
|
GUMBO_TAG_BR, GUMBO_TAG_LAST)) {
|
2048
|
-
|
2048
|
+
parser_add_parse_error(parser, token);
|
2049
2049
|
ignore_token(parser);
|
2050
2050
|
return false;
|
2051
2051
|
} else {
|
@@ -2062,7 +2062,7 @@ static bool handle_before_html(GumboParser* parser, GumboToken* token) {
|
|
2062
2062
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#the-before-head-insertion-mode
|
2063
2063
|
static bool handle_before_head(GumboParser* parser, GumboToken* token) {
|
2064
2064
|
if (token->type == GUMBO_TOKEN_DOCTYPE) {
|
2065
|
-
|
2065
|
+
parser_add_parse_error(parser, token);
|
2066
2066
|
ignore_token(parser);
|
2067
2067
|
return false;
|
2068
2068
|
} else if (token->type == GUMBO_TOKEN_COMMENT) {
|
@@ -2079,7 +2079,7 @@ static bool handle_before_head(GumboParser* parser, GumboToken* token) {
|
|
2079
2079
|
} else if (token->type == GUMBO_TOKEN_END_TAG && !tag_in(
|
2080
2080
|
token, false, GUMBO_TAG_HEAD, GUMBO_TAG_BODY, GUMBO_TAG_HTML,
|
2081
2081
|
GUMBO_TAG_BR, GUMBO_TAG_LAST)) {
|
2082
|
-
|
2082
|
+
parser_add_parse_error(parser, token);
|
2083
2083
|
ignore_token(parser);
|
2084
2084
|
return false;
|
2085
2085
|
} else {
|
@@ -2102,7 +2102,7 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
|
|
2102
2102
|
insert_text_token(parser, token);
|
2103
2103
|
return true;
|
2104
2104
|
} else if (token->type == GUMBO_TOKEN_DOCTYPE) {
|
2105
|
-
|
2105
|
+
parser_add_parse_error(parser, token);
|
2106
2106
|
ignore_token(parser);
|
2107
2107
|
return false;
|
2108
2108
|
} else if (token->type == GUMBO_TOKEN_COMMENT) {
|
@@ -2147,14 +2147,18 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
|
|
2147
2147
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_HEAD);
|
2148
2148
|
return true;
|
2149
2149
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_HEAD)) {
|
2150
|
-
|
2150
|
+
parser_add_parse_error(parser, token);
|
2151
2151
|
ignore_token(parser);
|
2152
2152
|
return false;
|
2153
2153
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_HEAD) ||
|
2154
2154
|
(token->type == GUMBO_TOKEN_END_TAG &&
|
2155
2155
|
!tag_in(token, kEndTag, GUMBO_TAG_BODY, GUMBO_TAG_HTML,
|
2156
2156
|
GUMBO_TAG_BR, GUMBO_TAG_LAST))) {
|
2157
|
-
|
2157
|
+
parser_add_parse_error(parser, token);
|
2158
|
+
return false;
|
2159
|
+
} else if (tag_is(token, kStartTag, GUMBO_TAG_UNKNOWN) && token->v.start_tag.is_self_closing) {
|
2160
|
+
parser_add_parse_error(parser, token);
|
2161
|
+
ignore_token(parser);
|
2158
2162
|
return false;
|
2159
2163
|
} else {
|
2160
2164
|
const GumboNode* node = pop_current_node(parser);
|
@@ -2171,7 +2175,7 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
|
|
2171
2175
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-inheadnoscript
|
2172
2176
|
static bool handle_in_head_noscript(GumboParser* parser, GumboToken* token) {
|
2173
2177
|
if (token->type == GUMBO_TOKEN_DOCTYPE) {
|
2174
|
-
|
2178
|
+
parser_add_parse_error(parser, token);
|
2175
2179
|
return false;
|
2176
2180
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
|
2177
2181
|
return handle_in_body(parser, token);
|
@@ -2191,11 +2195,11 @@ static bool handle_in_head_noscript(GumboParser* parser, GumboToken* token) {
|
|
2191
2195
|
GUMBO_TAG_LAST) ||
|
2192
2196
|
(token->type == GUMBO_TOKEN_END_TAG &&
|
2193
2197
|
!tag_is(token, kEndTag, GUMBO_TAG_BR))) {
|
2194
|
-
|
2198
|
+
parser_add_parse_error(parser, token);
|
2195
2199
|
ignore_token(parser);
|
2196
2200
|
return false;
|
2197
2201
|
} else {
|
2198
|
-
|
2202
|
+
parser_add_parse_error(parser, token);
|
2199
2203
|
const GumboNode* node = pop_current_node(parser);
|
2200
2204
|
assert(node_tag_is(node, GUMBO_TAG_NOSCRIPT));
|
2201
2205
|
AVOID_UNUSED_VARIABLE_WARNING(node);
|
@@ -2212,7 +2216,7 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
|
|
2212
2216
|
insert_text_token(parser, token);
|
2213
2217
|
return true;
|
2214
2218
|
} else if (token->type == GUMBO_TOKEN_DOCTYPE) {
|
2215
|
-
|
2219
|
+
parser_add_parse_error(parser, token);
|
2216
2220
|
ignore_token(parser);
|
2217
2221
|
return false;
|
2218
2222
|
} else if (token->type == GUMBO_TOKEN_COMMENT) {
|
@@ -2233,7 +2237,7 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
|
|
2233
2237
|
GUMBO_TAG_BGSOUND, GUMBO_TAG_LINK, GUMBO_TAG_META,
|
2234
2238
|
GUMBO_TAG_NOFRAMES, GUMBO_TAG_SCRIPT, GUMBO_TAG_STYLE,
|
2235
2239
|
GUMBO_TAG_TITLE, GUMBO_TAG_LAST)) {
|
2236
|
-
|
2240
|
+
parser_add_parse_error(parser, token);
|
2237
2241
|
assert(state->_head_element != NULL);
|
2238
2242
|
// This must be flushed before we push the head element on, as there may be
|
2239
2243
|
// pending character tokens that should be attached to the root.
|
@@ -2246,7 +2250,7 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
|
|
2246
2250
|
(token->type == GUMBO_TOKEN_END_TAG &&
|
2247
2251
|
!tag_in(token, kEndTag, GUMBO_TAG_BODY, GUMBO_TAG_HTML,
|
2248
2252
|
GUMBO_TAG_BR, GUMBO_TAG_LAST))) {
|
2249
|
-
|
2253
|
+
parser_add_parse_error(parser, token);
|
2250
2254
|
ignore_token(parser);
|
2251
2255
|
return false;
|
2252
2256
|
} else {
|
@@ -2296,7 +2300,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2296
2300
|
GumboParserState* state = parser->_parser_state;
|
2297
2301
|
assert(state->_open_elements.length > 0);
|
2298
2302
|
if (token->type == GUMBO_TOKEN_NULL) {
|
2299
|
-
|
2303
|
+
parser_add_parse_error(parser, token);
|
2300
2304
|
ignore_token(parser);
|
2301
2305
|
return false;
|
2302
2306
|
} else if (token->type == GUMBO_TOKEN_WHITESPACE) {
|
@@ -2312,13 +2316,13 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2312
2316
|
append_comment_node(parser, get_current_node(parser), token);
|
2313
2317
|
return true;
|
2314
2318
|
} else if (token->type == GUMBO_TOKEN_DOCTYPE) {
|
2315
|
-
|
2319
|
+
parser_add_parse_error(parser, token);
|
2316
2320
|
ignore_token(parser);
|
2317
2321
|
return false;
|
2318
2322
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
|
2319
2323
|
assert(parser->_output->root != NULL);
|
2320
2324
|
assert(parser->_output->root->type == GUMBO_NODE_ELEMENT);
|
2321
|
-
|
2325
|
+
parser_add_parse_error(parser, token);
|
2322
2326
|
merge_attributes(parser, token, parser->_output->root);
|
2323
2327
|
return false;
|
2324
2328
|
} else if (tag_in(token, kStartTag, GUMBO_TAG_BASE, GUMBO_TAG_BASEFONT,
|
@@ -2327,7 +2331,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2327
2331
|
GUMBO_TAG_STYLE, GUMBO_TAG_TITLE, GUMBO_TAG_LAST)) {
|
2328
2332
|
return handle_in_head(parser, token);
|
2329
2333
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_BODY)) {
|
2330
|
-
|
2334
|
+
parser_add_parse_error(parser, token);
|
2331
2335
|
if (state->_open_elements.length < 2 ||
|
2332
2336
|
!node_tag_is(state->_open_elements.data[1], GUMBO_TAG_BODY)) {
|
2333
2337
|
ignore_token(parser);
|
@@ -2337,7 +2341,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2337
2341
|
merge_attributes(parser, token, state->_open_elements.data[1]);
|
2338
2342
|
return false;
|
2339
2343
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_FRAMESET)) {
|
2340
|
-
|
2344
|
+
parser_add_parse_error(parser, token);
|
2341
2345
|
if (state->_open_elements.length < 2 ||
|
2342
2346
|
!node_tag_is(state->_open_elements.data[1], GUMBO_TAG_BODY) ||
|
2343
2347
|
!state->_frameset_ok) {
|
@@ -2382,7 +2386,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2382
2386
|
GUMBO_TAG_TD, GUMBO_TAG_TFOOT, GUMBO_TAG_TH,
|
2383
2387
|
GUMBO_TAG_THEAD, GUMBO_TAG_TR, GUMBO_TAG_BODY,
|
2384
2388
|
GUMBO_TAG_HTML, GUMBO_TAG_LAST)) {
|
2385
|
-
|
2389
|
+
parser_add_parse_error(parser, token);
|
2386
2390
|
return false;
|
2387
2391
|
}
|
2388
2392
|
}
|
@@ -2390,7 +2394,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2390
2394
|
} else if (tag_in(token, kEndTag, GUMBO_TAG_BODY, GUMBO_TAG_HTML,
|
2391
2395
|
GUMBO_TAG_LAST)) {
|
2392
2396
|
if (!has_an_element_in_scope(parser, GUMBO_TAG_BODY)) {
|
2393
|
-
|
2397
|
+
parser_add_parse_error(parser, token);
|
2394
2398
|
ignore_token(parser);
|
2395
2399
|
return false;
|
2396
2400
|
}
|
@@ -2403,7 +2407,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2403
2407
|
GUMBO_TAG_TFOOT, GUMBO_TAG_TH, GUMBO_TAG_THEAD,
|
2404
2408
|
GUMBO_TAG_TR, GUMBO_TAG_BODY, GUMBO_TAG_HTML,
|
2405
2409
|
GUMBO_TAG_LAST)) {
|
2406
|
-
|
2410
|
+
parser_add_parse_error(parser, token);
|
2407
2411
|
success = false;
|
2408
2412
|
break;
|
2409
2413
|
}
|
@@ -2434,7 +2438,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2434
2438
|
if (node_tag_in(get_current_node(parser), GUMBO_TAG_H1, GUMBO_TAG_H2,
|
2435
2439
|
GUMBO_TAG_H3, GUMBO_TAG_H4, GUMBO_TAG_H5, GUMBO_TAG_H6,
|
2436
2440
|
GUMBO_TAG_LAST)) {
|
2437
|
-
|
2441
|
+
parser_add_parse_error(parser, token);
|
2438
2442
|
pop_current_node(parser);
|
2439
2443
|
result = false;
|
2440
2444
|
}
|
@@ -2450,7 +2454,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2450
2454
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_FORM)) {
|
2451
2455
|
if (state->_form_element != NULL) {
|
2452
2456
|
gumbo_debug("Ignoring nested form.\n");
|
2453
|
-
|
2457
|
+
parser_add_parse_error(parser, token);
|
2454
2458
|
ignore_token(parser);
|
2455
2459
|
return false;
|
2456
2460
|
}
|
@@ -2476,7 +2480,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2476
2480
|
return result;
|
2477
2481
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_BUTTON)) {
|
2478
2482
|
if (has_an_element_in_scope(parser, GUMBO_TAG_BUTTON)) {
|
2479
|
-
|
2483
|
+
parser_add_parse_error(parser, token);
|
2480
2484
|
implicitly_close_tags(parser, token, GUMBO_TAG_BUTTON);
|
2481
2485
|
state->_reprocess_current_token = true;
|
2482
2486
|
return false;
|
@@ -2496,7 +2500,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2496
2500
|
GUMBO_TAG_LAST)) {
|
2497
2501
|
GumboTag tag = token->v.end_tag;
|
2498
2502
|
if (!has_an_element_in_scope(parser, tag)) {
|
2499
|
-
|
2503
|
+
parser_add_parse_error(parser, token);
|
2500
2504
|
ignore_token(parser);
|
2501
2505
|
return false;
|
2502
2506
|
}
|
@@ -2509,7 +2513,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2509
2513
|
state->_form_element = NULL;
|
2510
2514
|
if (!node || !has_node_in_scope(parser, node)) {
|
2511
2515
|
gumbo_debug("Closing an unopened form.\n");
|
2512
|
-
|
2516
|
+
parser_add_parse_error(parser, token);
|
2513
2517
|
ignore_token(parser);
|
2514
2518
|
return false;
|
2515
2519
|
}
|
@@ -2517,7 +2521,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2517
2521
|
// <form> element; other nodes are left in scope.
|
2518
2522
|
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
2519
2523
|
if (get_current_node(parser) != node) {
|
2520
|
-
|
2524
|
+
parser_add_parse_error(parser, token);
|
2521
2525
|
result = false;
|
2522
2526
|
}
|
2523
2527
|
|
@@ -2529,7 +2533,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2529
2533
|
return result;
|
2530
2534
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_P)) {
|
2531
2535
|
if (!has_an_element_in_button_scope(parser, GUMBO_TAG_P)) {
|
2532
|
-
|
2536
|
+
parser_add_parse_error(parser, token);
|
2533
2537
|
reconstruct_active_formatting_elements(parser);
|
2534
2538
|
insert_element_of_tag_type(
|
2535
2539
|
parser, GUMBO_TAG_P, GUMBO_INSERTION_CONVERTED_FROM_END_TAG);
|
@@ -2539,7 +2543,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2539
2543
|
return implicitly_close_tags(parser, token, GUMBO_TAG_P);
|
2540
2544
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_LI)) {
|
2541
2545
|
if (!has_an_element_in_list_scope(parser, GUMBO_TAG_LI)) {
|
2542
|
-
|
2546
|
+
parser_add_parse_error(parser, token);
|
2543
2547
|
ignore_token(parser);
|
2544
2548
|
return false;
|
2545
2549
|
}
|
@@ -2549,7 +2553,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2549
2553
|
assert(token->type == GUMBO_TOKEN_END_TAG);
|
2550
2554
|
GumboTag token_tag = token->v.end_tag;
|
2551
2555
|
if (!has_an_element_in_scope(parser, token_tag)) {
|
2552
|
-
|
2556
|
+
parser_add_parse_error(parser, token);
|
2553
2557
|
ignore_token(parser);
|
2554
2558
|
return false;
|
2555
2559
|
}
|
@@ -2560,7 +2564,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2560
2564
|
parser, GUMBO_TAG_H1, GUMBO_TAG_H2, GUMBO_TAG_H3, GUMBO_TAG_H4,
|
2561
2565
|
GUMBO_TAG_H5, GUMBO_TAG_H6, GUMBO_TAG_LAST)) {
|
2562
2566
|
// No heading open; ignore the token entirely.
|
2563
|
-
|
2567
|
+
parser_add_parse_error(parser, token);
|
2564
2568
|
ignore_token(parser);
|
2565
2569
|
return false;
|
2566
2570
|
} else {
|
@@ -2572,7 +2576,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2572
2576
|
// record a parse error.
|
2573
2577
|
// TODO(jdtang): Add a way to distinguish this error case from the one
|
2574
2578
|
// above.
|
2575
|
-
|
2579
|
+
parser_add_parse_error(parser, token);
|
2576
2580
|
}
|
2577
2581
|
do {
|
2578
2582
|
current_node = pop_current_node(parser);
|
@@ -2587,7 +2591,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2587
2591
|
int has_matching_a = find_last_anchor_index(parser, &last_a);
|
2588
2592
|
if (has_matching_a) {
|
2589
2593
|
assert(has_matching_a == 1);
|
2590
|
-
|
2594
|
+
parser_add_parse_error(parser, token);
|
2591
2595
|
adoption_agency_algorithm(parser, token, GUMBO_TAG_A);
|
2592
2596
|
// The adoption agency algorithm usually removes all instances of <a>
|
2593
2597
|
// from the list of active formatting elements, but in case it doesn't,
|
@@ -2617,7 +2621,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2617
2621
|
reconstruct_active_formatting_elements(parser);
|
2618
2622
|
if (has_an_element_in_scope(parser, GUMBO_TAG_NOBR)) {
|
2619
2623
|
result = false;
|
2620
|
-
|
2624
|
+
parser_add_parse_error(parser, token);
|
2621
2625
|
adoption_agency_algorithm(parser, token, GUMBO_TAG_NOBR);
|
2622
2626
|
reconstruct_active_formatting_elements(parser);
|
2623
2627
|
}
|
@@ -2641,7 +2645,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2641
2645
|
GUMBO_TAG_OBJECT, GUMBO_TAG_LAST)) {
|
2642
2646
|
GumboTag token_tag = token->v.end_tag;
|
2643
2647
|
if (!has_an_element_in_table_scope(parser, token_tag)) {
|
2644
|
-
|
2648
|
+
parser_add_parse_error(parser, token);
|
2645
2649
|
ignore_token(parser);
|
2646
2650
|
return false;
|
2647
2651
|
}
|
@@ -2663,14 +2667,14 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2663
2667
|
bool success = true;
|
2664
2668
|
if (tag_is(token, kStartTag, GUMBO_TAG_IMAGE)) {
|
2665
2669
|
success = false;
|
2666
|
-
|
2670
|
+
parser_add_parse_error(parser, token);
|
2667
2671
|
token->v.start_tag.tag = GUMBO_TAG_IMG;
|
2668
2672
|
}
|
2669
2673
|
reconstruct_active_formatting_elements(parser);
|
2670
2674
|
GumboNode* node = insert_element_from_token(parser, token);
|
2671
2675
|
if (tag_is(token, kStartTag, GUMBO_TAG_IMAGE)) {
|
2672
2676
|
success = false;
|
2673
|
-
|
2677
|
+
parser_add_parse_error(parser, token);
|
2674
2678
|
node->v.element.tag = GUMBO_TAG_IMG;
|
2675
2679
|
node->parse_flags |= GUMBO_INSERTION_FROM_IMAGE;
|
2676
2680
|
}
|
@@ -2703,7 +2707,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2703
2707
|
set_frameset_not_ok(parser);
|
2704
2708
|
return result;
|
2705
2709
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_ISINDEX)) {
|
2706
|
-
|
2710
|
+
parser_add_parse_error(parser, token);
|
2707
2711
|
if (parser->_parser_state->_form_element != NULL) {
|
2708
2712
|
ignore_token(parser);
|
2709
2713
|
return false;
|
@@ -2715,7 +2719,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2715
2719
|
GumboVector* token_attrs = &token->v.start_tag.attributes;
|
2716
2720
|
GumboAttribute* prompt_attr = gumbo_get_attribute(token_attrs, "prompt");
|
2717
2721
|
GumboAttribute* action_attr = gumbo_get_attribute(token_attrs, "action");
|
2718
|
-
GumboAttribute* name_attr = gumbo_get_attribute(token_attrs, "
|
2722
|
+
GumboAttribute* name_attr = gumbo_get_attribute(token_attrs, "name");
|
2719
2723
|
|
2720
2724
|
GumboNode* form = insert_element_of_tag_type(
|
2721
2725
|
parser, GUMBO_TAG_FORM, GUMBO_INSERTION_FROM_ISINDEX);
|
@@ -2831,13 +2835,13 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2831
2835
|
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
2832
2836
|
}
|
2833
2837
|
if (!node_tag_is(get_current_node(parser), GUMBO_TAG_RUBY)) {
|
2834
|
-
|
2838
|
+
parser_add_parse_error(parser, token);
|
2835
2839
|
success = false;
|
2836
2840
|
}
|
2837
2841
|
insert_element_from_token(parser, token);
|
2838
2842
|
return success;
|
2839
2843
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_BR)) {
|
2840
|
-
|
2844
|
+
parser_add_parse_error(parser, token);
|
2841
2845
|
reconstruct_active_formatting_elements(parser);
|
2842
2846
|
insert_element_of_tag_type(
|
2843
2847
|
parser, GUMBO_TAG_BR, GUMBO_INSERTION_CONVERTED_FROM_END_TAG);
|
@@ -2868,7 +2872,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2868
2872
|
GUMBO_TAG_TBODY, GUMBO_TAG_TD, GUMBO_TAG_TFOOT,
|
2869
2873
|
GUMBO_TAG_TH, GUMBO_TAG_THEAD, GUMBO_TAG_TR,
|
2870
2874
|
GUMBO_TAG_LAST)) {
|
2871
|
-
|
2875
|
+
parser_add_parse_error(parser, token);
|
2872
2876
|
ignore_token(parser);
|
2873
2877
|
return false;
|
2874
2878
|
} else if (token->type == GUMBO_TOKEN_START_TAG) {
|
@@ -2897,7 +2901,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2897
2901
|
while (node != pop_current_node(parser)); // Pop everything.
|
2898
2902
|
return true;
|
2899
2903
|
} else if (is_special_node(node)) {
|
2900
|
-
|
2904
|
+
parser_add_parse_error(parser, token);
|
2901
2905
|
ignore_token(parser);
|
2902
2906
|
return false;
|
2903
2907
|
}
|
@@ -2921,7 +2925,7 @@ static bool handle_text(GumboParser* parser, GumboToken* token) {
|
|
2921
2925
|
// This behavior doesn't support document.write of partial HTML elements,
|
2922
2926
|
// but should be adequate for almost all other scripting support.
|
2923
2927
|
if (token->type == GUMBO_TOKEN_EOF) {
|
2924
|
-
|
2928
|
+
parser_add_parse_error(parser, token);
|
2925
2929
|
parser->_parser_state->_reprocess_current_token = true;
|
2926
2930
|
}
|
2927
2931
|
pop_current_node(parser);
|
@@ -2946,7 +2950,7 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
|
|
2946
2950
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_TEXT);
|
2947
2951
|
return true;
|
2948
2952
|
} else if (token->type == GUMBO_TOKEN_DOCTYPE) {
|
2949
|
-
|
2953
|
+
parser_add_parse_error(parser, token);
|
2950
2954
|
ignore_token(parser);
|
2951
2955
|
return false;
|
2952
2956
|
} else if (token->type == GUMBO_TOKEN_COMMENT) {
|
@@ -2985,7 +2989,7 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
|
|
2985
2989
|
}
|
2986
2990
|
return true;
|
2987
2991
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_TABLE)) {
|
2988
|
-
|
2992
|
+
parser_add_parse_error(parser, token);
|
2989
2993
|
if (close_table(parser)) {
|
2990
2994
|
parser->_parser_state->_reprocess_current_token = true;
|
2991
2995
|
} else {
|
@@ -2994,7 +2998,7 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
|
|
2994
2998
|
return false;
|
2995
2999
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_TABLE)) {
|
2996
3000
|
if (!close_table(parser)) {
|
2997
|
-
|
3001
|
+
parser_add_parse_error(parser, token);
|
2998
3002
|
return false;
|
2999
3003
|
}
|
3000
3004
|
return true;
|
@@ -3003,7 +3007,7 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
|
|
3003
3007
|
GUMBO_TAG_TBODY, GUMBO_TAG_TD, GUMBO_TAG_TFOOT,
|
3004
3008
|
GUMBO_TAG_TH, GUMBO_TAG_THEAD, GUMBO_TAG_TR,
|
3005
3009
|
GUMBO_TAG_LAST)) {
|
3006
|
-
|
3010
|
+
parser_add_parse_error(parser, token);
|
3007
3011
|
ignore_token(parser);
|
3008
3012
|
return false;
|
3009
3013
|
} else if (tag_in(token, kStartTag, GUMBO_TAG_STYLE, GUMBO_TAG_SCRIPT,
|
@@ -3012,12 +3016,12 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
|
|
3012
3016
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_INPUT) &&
|
3013
3017
|
attribute_matches(&token->v.start_tag.attributes,
|
3014
3018
|
"type", "hidden")) {
|
3015
|
-
|
3019
|
+
parser_add_parse_error(parser, token);
|
3016
3020
|
insert_element_from_token(parser, token);
|
3017
3021
|
pop_current_node(parser);
|
3018
3022
|
return false;
|
3019
3023
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_FORM)) {
|
3020
|
-
|
3024
|
+
parser_add_parse_error(parser, token);
|
3021
3025
|
if (state->_form_element) {
|
3022
3026
|
ignore_token(parser);
|
3023
3027
|
return false;
|
@@ -3027,12 +3031,12 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
|
|
3027
3031
|
return false;
|
3028
3032
|
} else if (token->type == GUMBO_TOKEN_EOF) {
|
3029
3033
|
if (!node_tag_is(get_current_node(parser), GUMBO_TAG_HTML)) {
|
3030
|
-
|
3034
|
+
parser_add_parse_error(parser, token);
|
3031
3035
|
return false;
|
3032
3036
|
}
|
3033
3037
|
return true;
|
3034
3038
|
} else {
|
3035
|
-
|
3039
|
+
parser_add_parse_error(parser, token);
|
3036
3040
|
state->_foster_parent_insertions = true;
|
3037
3041
|
bool result = handle_in_body(parser, token);
|
3038
3042
|
state->_foster_parent_insertions = false;
|
@@ -3043,7 +3047,7 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
|
|
3043
3047
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-intabletext
|
3044
3048
|
static bool handle_in_table_text(GumboParser* parser, GumboToken* token) {
|
3045
3049
|
if (token->type == GUMBO_TOKEN_NULL) {
|
3046
|
-
|
3050
|
+
parser_add_parse_error(parser, token);
|
3047
3051
|
ignore_token(parser);
|
3048
3052
|
return false;
|
3049
3053
|
} else if (token->type == GUMBO_TOKEN_CHARACTER ||
|
@@ -3082,18 +3086,18 @@ static bool handle_in_caption(GumboParser* parser, GumboToken* token) {
|
|
3082
3086
|
tag_in(token, kEndTag, GUMBO_TAG_CAPTION, GUMBO_TAG_TABLE,
|
3083
3087
|
GUMBO_TAG_LAST)) {
|
3084
3088
|
if (!has_an_element_in_table_scope(parser, GUMBO_TAG_CAPTION)) {
|
3085
|
-
|
3089
|
+
parser_add_parse_error(parser, token);
|
3086
3090
|
ignore_token(parser);
|
3087
3091
|
return false;
|
3088
3092
|
}
|
3089
3093
|
if (!tag_is(token, kEndTag, GUMBO_TAG_CAPTION)) {
|
3090
|
-
|
3094
|
+
parser_add_parse_error(parser, token);
|
3091
3095
|
parser->_parser_state->_reprocess_current_token = true;
|
3092
3096
|
}
|
3093
3097
|
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
3094
3098
|
bool result = true;
|
3095
3099
|
if (!node_tag_is(get_current_node(parser), GUMBO_TAG_CAPTION)) {
|
3096
|
-
|
3100
|
+
parser_add_parse_error(parser, token);
|
3097
3101
|
while (!node_tag_is(get_current_node(parser), GUMBO_TAG_CAPTION)) {
|
3098
3102
|
pop_current_node(parser);
|
3099
3103
|
}
|
@@ -3107,7 +3111,7 @@ static bool handle_in_caption(GumboParser* parser, GumboToken* token) {
|
|
3107
3111
|
GUMBO_TAG_COLGROUP, GUMBO_TAG_HTML, GUMBO_TAG_TBODY,
|
3108
3112
|
GUMBO_TAG_TD, GUMBO_TAG_TFOOT, GUMBO_TAG_TH,
|
3109
3113
|
GUMBO_TAG_THEAD, GUMBO_TAG_TR, GUMBO_TAG_LAST)) {
|
3110
|
-
|
3114
|
+
parser_add_parse_error(parser, token);
|
3111
3115
|
ignore_token(parser);
|
3112
3116
|
return false;
|
3113
3117
|
} else {
|
@@ -3121,7 +3125,7 @@ static bool handle_in_column_group(GumboParser* parser, GumboToken* token) {
|
|
3121
3125
|
insert_text_token(parser, token);
|
3122
3126
|
return true;
|
3123
3127
|
} else if (token->type == GUMBO_TOKEN_DOCTYPE) {
|
3124
|
-
|
3128
|
+
parser_add_parse_error(parser, token);
|
3125
3129
|
ignore_token(parser);
|
3126
3130
|
return false;
|
3127
3131
|
} else if (token->type == GUMBO_TOKEN_COMMENT) {
|
@@ -3135,7 +3139,7 @@ static bool handle_in_column_group(GumboParser* parser, GumboToken* token) {
|
|
3135
3139
|
acknowledge_self_closing_tag(parser);
|
3136
3140
|
return true;
|
3137
3141
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_COL)) {
|
3138
|
-
|
3142
|
+
parser_add_parse_error(parser, token);
|
3139
3143
|
ignore_token(parser);
|
3140
3144
|
return false;
|
3141
3145
|
} else if (token->type == GUMBO_TOKEN_EOF &&
|
@@ -3143,7 +3147,7 @@ static bool handle_in_column_group(GumboParser* parser, GumboToken* token) {
|
|
3143
3147
|
return true;
|
3144
3148
|
} else {
|
3145
3149
|
if (get_current_node(parser) == parser->_output->root) {
|
3146
|
-
|
3150
|
+
parser_add_parse_error(parser, token);
|
3147
3151
|
return false;
|
3148
3152
|
}
|
3149
3153
|
assert(node_tag_is(get_current_node(parser), GUMBO_TAG_COLGROUP));
|
@@ -3165,7 +3169,7 @@ static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
|
|
3165
3169
|
return true;
|
3166
3170
|
} else if (tag_in(token, kStartTag, GUMBO_TAG_TD, GUMBO_TAG_TH,
|
3167
3171
|
GUMBO_TAG_LAST)) {
|
3168
|
-
|
3172
|
+
parser_add_parse_error(parser, token);
|
3169
3173
|
clear_stack_to_table_body_context(parser);
|
3170
3174
|
insert_element_of_tag_type(parser, GUMBO_TAG_TR, GUMBO_INSERTION_IMPLIED);
|
3171
3175
|
parser->_parser_state->_reprocess_current_token = true;
|
@@ -3174,7 +3178,7 @@ static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
|
|
3174
3178
|
} else if (tag_in(token, kEndTag, GUMBO_TAG_TBODY, GUMBO_TAG_TFOOT,
|
3175
3179
|
GUMBO_TAG_THEAD, GUMBO_TAG_LAST)) {
|
3176
3180
|
if (!has_an_element_in_table_scope(parser, token->v.end_tag)) {
|
3177
|
-
|
3181
|
+
parser_add_parse_error(parser, token);
|
3178
3182
|
ignore_token(parser);
|
3179
3183
|
return false;
|
3180
3184
|
}
|
@@ -3189,7 +3193,7 @@ static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
|
|
3189
3193
|
if (!(has_an_element_in_table_scope(parser, GUMBO_TAG_TBODY) ||
|
3190
3194
|
has_an_element_in_table_scope(parser, GUMBO_TAG_THEAD) ||
|
3191
3195
|
has_an_element_in_table_scope(parser, GUMBO_TAG_TFOOT))) {
|
3192
|
-
|
3196
|
+
parser_add_parse_error(parser, token);
|
3193
3197
|
ignore_token(parser);
|
3194
3198
|
return false;
|
3195
3199
|
}
|
@@ -3202,7 +3206,7 @@ static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
|
|
3202
3206
|
GUMBO_TAG_COL, GUMBO_TAG_TR, GUMBO_TAG_COLGROUP,
|
3203
3207
|
GUMBO_TAG_HTML, GUMBO_TAG_TD, GUMBO_TAG_TH, GUMBO_TAG_LAST))
|
3204
3208
|
{
|
3205
|
-
|
3209
|
+
parser_add_parse_error(parser, token);
|
3206
3210
|
ignore_token(parser);
|
3207
3211
|
return false;
|
3208
3212
|
} else {
|
@@ -3238,7 +3242,7 @@ static bool handle_in_row(GumboParser* parser, GumboToken* token) {
|
|
3238
3242
|
const GumboNode* node = parser->_parser_state->_open_elements.data[i];
|
3239
3243
|
gumbo_debug("%s\n", gumbo_normalized_tagname(node->v.element.tag));
|
3240
3244
|
}
|
3241
|
-
|
3245
|
+
parser_add_parse_error(parser, token);
|
3242
3246
|
ignore_token(parser);
|
3243
3247
|
return false;
|
3244
3248
|
}
|
@@ -3254,7 +3258,7 @@ static bool handle_in_row(GumboParser* parser, GumboToken* token) {
|
|
3254
3258
|
} else if (tag_in(token, kEndTag, GUMBO_TAG_BODY, GUMBO_TAG_CAPTION,
|
3255
3259
|
GUMBO_TAG_COL, GUMBO_TAG_COLGROUP, GUMBO_TAG_HTML,
|
3256
3260
|
GUMBO_TAG_TD, GUMBO_TAG_TH, GUMBO_TAG_LAST)) {
|
3257
|
-
|
3261
|
+
parser_add_parse_error(parser, token);
|
3258
3262
|
ignore_token(parser);
|
3259
3263
|
return false;
|
3260
3264
|
} else {
|
@@ -3267,7 +3271,7 @@ static bool handle_in_cell(GumboParser* parser, GumboToken* token) {
|
|
3267
3271
|
if (tag_in(token, kEndTag, GUMBO_TAG_TD, GUMBO_TAG_TH, GUMBO_TAG_LAST)) {
|
3268
3272
|
GumboTag token_tag = token->v.end_tag;
|
3269
3273
|
if (!has_an_element_in_table_scope(parser, token_tag)) {
|
3270
|
-
|
3274
|
+
parser_add_parse_error(parser, token);
|
3271
3275
|
return false;
|
3272
3276
|
}
|
3273
3277
|
return close_table_cell(parser, token, token_tag);
|
@@ -3279,7 +3283,7 @@ static bool handle_in_cell(GumboParser* parser, GumboToken* token) {
|
|
3279
3283
|
if (!has_an_element_in_table_scope(parser, GUMBO_TAG_TH) &&
|
3280
3284
|
!has_an_element_in_table_scope(parser, GUMBO_TAG_TD)) {
|
3281
3285
|
gumbo_debug("Bailing out because there's no <td> or <th> in scope.\n");
|
3282
|
-
|
3286
|
+
parser_add_parse_error(parser, token);
|
3283
3287
|
ignore_token(parser);
|
3284
3288
|
return false;
|
3285
3289
|
}
|
@@ -3288,14 +3292,14 @@ static bool handle_in_cell(GumboParser* parser, GumboToken* token) {
|
|
3288
3292
|
} else if (tag_in(token, kEndTag, GUMBO_TAG_BODY, GUMBO_TAG_CAPTION,
|
3289
3293
|
GUMBO_TAG_COL, GUMBO_TAG_COLGROUP, GUMBO_TAG_HTML,
|
3290
3294
|
GUMBO_TAG_LAST)) {
|
3291
|
-
|
3295
|
+
parser_add_parse_error(parser, token);
|
3292
3296
|
ignore_token(parser);
|
3293
3297
|
return false;
|
3294
3298
|
} else if (tag_in(token, kEndTag, GUMBO_TAG_TABLE, GUMBO_TAG_TBODY,
|
3295
3299
|
GUMBO_TAG_TFOOT, GUMBO_TAG_THEAD, GUMBO_TAG_TR,
|
3296
3300
|
GUMBO_TAG_LAST)) {
|
3297
3301
|
if (!has_an_element_in_table_scope(parser, token->v.end_tag)) {
|
3298
|
-
|
3302
|
+
parser_add_parse_error(parser, token);
|
3299
3303
|
ignore_token(parser);
|
3300
3304
|
return false;
|
3301
3305
|
}
|
@@ -3309,7 +3313,7 @@ static bool handle_in_cell(GumboParser* parser, GumboToken* token) {
|
|
3309
3313
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-inselect
|
3310
3314
|
static bool handle_in_select(GumboParser* parser, GumboToken* token) {
|
3311
3315
|
if (token->type == GUMBO_TOKEN_NULL) {
|
3312
|
-
|
3316
|
+
parser_add_parse_error(parser, token);
|
3313
3317
|
ignore_token(parser);
|
3314
3318
|
return false;
|
3315
3319
|
} else if (token->type == GUMBO_TOKEN_CHARACTER ||
|
@@ -3317,7 +3321,7 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
|
|
3317
3321
|
insert_text_token(parser, token);
|
3318
3322
|
return true;
|
3319
3323
|
} else if (token->type == GUMBO_TOKEN_DOCTYPE) {
|
3320
|
-
|
3324
|
+
parser_add_parse_error(parser, token);
|
3321
3325
|
ignore_token(parser);
|
3322
3326
|
return false;
|
3323
3327
|
} else if (token->type == GUMBO_TOKEN_COMMENT) {
|
@@ -3351,7 +3355,7 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
|
|
3351
3355
|
pop_current_node(parser);
|
3352
3356
|
return true;
|
3353
3357
|
} else {
|
3354
|
-
|
3358
|
+
parser_add_parse_error(parser, token);
|
3355
3359
|
ignore_token(parser);
|
3356
3360
|
return false;
|
3357
3361
|
}
|
@@ -3360,26 +3364,26 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
|
|
3360
3364
|
pop_current_node(parser);
|
3361
3365
|
return true;
|
3362
3366
|
} else {
|
3363
|
-
|
3367
|
+
parser_add_parse_error(parser, token);
|
3364
3368
|
ignore_token(parser);
|
3365
3369
|
return false;
|
3366
3370
|
}
|
3367
3371
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_SELECT)) {
|
3368
3372
|
if (!has_an_element_in_select_scope(parser, GUMBO_TAG_SELECT)) {
|
3369
|
-
|
3373
|
+
parser_add_parse_error(parser, token);
|
3370
3374
|
ignore_token(parser);
|
3371
3375
|
return false;
|
3372
3376
|
}
|
3373
3377
|
close_current_select(parser);
|
3374
3378
|
return true;
|
3375
3379
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_SELECT)) {
|
3376
|
-
|
3380
|
+
parser_add_parse_error(parser, token);
|
3377
3381
|
ignore_token(parser);
|
3378
3382
|
close_current_select(parser);
|
3379
3383
|
return false;
|
3380
3384
|
} else if (tag_in(token, kStartTag, GUMBO_TAG_INPUT, GUMBO_TAG_KEYGEN,
|
3381
3385
|
GUMBO_TAG_TEXTAREA, GUMBO_TAG_LAST)) {
|
3382
|
-
|
3386
|
+
parser_add_parse_error(parser, token);
|
3383
3387
|
if (!has_an_element_in_select_scope(parser, GUMBO_TAG_SELECT)) {
|
3384
3388
|
ignore_token(parser);
|
3385
3389
|
} else {
|
@@ -3391,12 +3395,12 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
|
|
3391
3395
|
return handle_in_head(parser, token);
|
3392
3396
|
} else if (token->type == GUMBO_TOKEN_EOF) {
|
3393
3397
|
if (get_current_node(parser) != parser->_output->root) {
|
3394
|
-
|
3398
|
+
parser_add_parse_error(parser, token);
|
3395
3399
|
return false;
|
3396
3400
|
}
|
3397
3401
|
return true;
|
3398
3402
|
} else {
|
3399
|
-
|
3403
|
+
parser_add_parse_error(parser, token);
|
3400
3404
|
ignore_token(parser);
|
3401
3405
|
return false;
|
3402
3406
|
}
|
@@ -3407,14 +3411,14 @@ static bool handle_in_select_in_table(GumboParser* parser, GumboToken* token) {
|
|
3407
3411
|
if (tag_in(token, kStartTag, GUMBO_TAG_CAPTION, GUMBO_TAG_TABLE,
|
3408
3412
|
GUMBO_TAG_TBODY, GUMBO_TAG_TFOOT, GUMBO_TAG_THEAD, GUMBO_TAG_TR,
|
3409
3413
|
GUMBO_TAG_TD, GUMBO_TAG_TH, GUMBO_TAG_LAST)) {
|
3410
|
-
|
3414
|
+
parser_add_parse_error(parser, token);
|
3411
3415
|
close_current_select(parser);
|
3412
3416
|
parser->_parser_state->_reprocess_current_token = true;
|
3413
3417
|
return false;
|
3414
3418
|
} else if (tag_in(token, kEndTag, GUMBO_TAG_CAPTION, GUMBO_TAG_TABLE,
|
3415
3419
|
GUMBO_TAG_TBODY, GUMBO_TAG_TFOOT, GUMBO_TAG_THEAD,
|
3416
3420
|
GUMBO_TAG_TR, GUMBO_TAG_TD, GUMBO_TAG_TH, GUMBO_TAG_LAST)) {
|
3417
|
-
|
3421
|
+
parser_add_parse_error(parser, token);
|
3418
3422
|
if (has_an_element_in_table_scope(parser, token->v.end_tag)) {
|
3419
3423
|
close_current_select(parser);
|
3420
3424
|
reset_insertion_mode_appropriately(parser);
|
@@ -3445,7 +3449,7 @@ static bool handle_after_body(GumboParser* parser, GumboToken* token) {
|
|
3445
3449
|
append_comment_node(parser, html_node, token);
|
3446
3450
|
return true;
|
3447
3451
|
} else if (token->type == GUMBO_TOKEN_DOCTYPE) {
|
3448
|
-
|
3452
|
+
parser_add_parse_error(parser, token);
|
3449
3453
|
ignore_token(parser);
|
3450
3454
|
return false;
|
3451
3455
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_HTML)) {
|
@@ -3459,7 +3463,7 @@ static bool handle_after_body(GumboParser* parser, GumboToken* token) {
|
|
3459
3463
|
} else if (token->type == GUMBO_TOKEN_EOF) {
|
3460
3464
|
return true;
|
3461
3465
|
} else {
|
3462
|
-
|
3466
|
+
parser_add_parse_error(parser, token);
|
3463
3467
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_BODY);
|
3464
3468
|
parser->_parser_state->_reprocess_current_token = true;
|
3465
3469
|
return false;
|
@@ -3475,7 +3479,7 @@ static bool handle_in_frameset(GumboParser* parser, GumboToken* token) {
|
|
3475
3479
|
append_comment_node(parser, get_current_node(parser), token);
|
3476
3480
|
return true;
|
3477
3481
|
} else if (token->type == GUMBO_TOKEN_DOCTYPE) {
|
3478
|
-
|
3482
|
+
parser_add_parse_error(parser, token);
|
3479
3483
|
ignore_token(parser);
|
3480
3484
|
return false;
|
3481
3485
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
|
@@ -3485,7 +3489,7 @@ static bool handle_in_frameset(GumboParser* parser, GumboToken* token) {
|
|
3485
3489
|
return true;
|
3486
3490
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_FRAMESET)) {
|
3487
3491
|
if (node_tag_is(get_current_node(parser), GUMBO_TAG_HTML)) {
|
3488
|
-
|
3492
|
+
parser_add_parse_error(parser, token);
|
3489
3493
|
ignore_token(parser);
|
3490
3494
|
return false;
|
3491
3495
|
}
|
@@ -3505,12 +3509,12 @@ static bool handle_in_frameset(GumboParser* parser, GumboToken* token) {
|
|
3505
3509
|
return handle_in_head(parser, token);
|
3506
3510
|
} else if (token->type == GUMBO_TOKEN_EOF) {
|
3507
3511
|
if (!node_tag_is(get_current_node(parser), GUMBO_TAG_HTML)) {
|
3508
|
-
|
3512
|
+
parser_add_parse_error(parser, token);
|
3509
3513
|
return false;
|
3510
3514
|
}
|
3511
3515
|
return true;
|
3512
3516
|
} else {
|
3513
|
-
|
3517
|
+
parser_add_parse_error(parser, token);
|
3514
3518
|
ignore_token(parser);
|
3515
3519
|
return false;
|
3516
3520
|
}
|
@@ -3525,7 +3529,7 @@ static bool handle_after_frameset(GumboParser* parser, GumboToken* token) {
|
|
3525
3529
|
append_comment_node(parser, get_current_node(parser), token);
|
3526
3530
|
return true;
|
3527
3531
|
} else if (token->type == GUMBO_TOKEN_DOCTYPE) {
|
3528
|
-
|
3532
|
+
parser_add_parse_error(parser, token);
|
3529
3533
|
ignore_token(parser);
|
3530
3534
|
return false;
|
3531
3535
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
|
@@ -3542,7 +3546,7 @@ static bool handle_after_frameset(GumboParser* parser, GumboToken* token) {
|
|
3542
3546
|
} else if (token->type == GUMBO_TOKEN_EOF) {
|
3543
3547
|
return true;
|
3544
3548
|
} else {
|
3545
|
-
|
3549
|
+
parser_add_parse_error(parser, token);
|
3546
3550
|
ignore_token(parser);
|
3547
3551
|
return false;
|
3548
3552
|
}
|
@@ -3560,7 +3564,7 @@ static bool handle_after_after_body(GumboParser* parser, GumboToken* token) {
|
|
3560
3564
|
} else if (token->type == GUMBO_TOKEN_EOF) {
|
3561
3565
|
return true;
|
3562
3566
|
} else {
|
3563
|
-
|
3567
|
+
parser_add_parse_error(parser, token);
|
3564
3568
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_BODY);
|
3565
3569
|
parser->_parser_state->_reprocess_current_token = true;
|
3566
3570
|
return false;
|
@@ -3582,7 +3586,7 @@ static bool handle_after_after_frameset(
|
|
3582
3586
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_NOFRAMES)) {
|
3583
3587
|
return handle_in_head(parser, token);
|
3584
3588
|
} else {
|
3585
|
-
|
3589
|
+
parser_add_parse_error(parser, token);
|
3586
3590
|
ignore_token(parser);
|
3587
3591
|
return false;
|
3588
3592
|
}
|
@@ -3626,7 +3630,7 @@ static bool handle_html_content(GumboParser* parser, GumboToken* token) {
|
|
3626
3630
|
static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
|
3627
3631
|
switch (token->type) {
|
3628
3632
|
case GUMBO_TOKEN_NULL:
|
3629
|
-
|
3633
|
+
parser_add_parse_error(parser, token);
|
3630
3634
|
token->type = GUMBO_TOKEN_CHARACTER;
|
3631
3635
|
token->v.character = kUtf8ReplacementChar;
|
3632
3636
|
insert_text_token(parser, token);
|
@@ -3642,7 +3646,7 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
|
|
3642
3646
|
append_comment_node(parser, get_current_node(parser), token);
|
3643
3647
|
return true;
|
3644
3648
|
case GUMBO_TOKEN_DOCTYPE:
|
3645
|
-
|
3649
|
+
parser_add_parse_error(parser, token);
|
3646
3650
|
ignore_token(parser);
|
3647
3651
|
return false;
|
3648
3652
|
default:
|
@@ -3667,7 +3671,7 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
|
|
3667
3671
|
token_has_attribute(token, "color") ||
|
3668
3672
|
token_has_attribute(token, "face") ||
|
3669
3673
|
token_has_attribute(token, "size")))) {
|
3670
|
-
|
3674
|
+
parser_add_parse_error(parser, token);
|
3671
3675
|
do {
|
3672
3676
|
pop_current_node(parser);
|
3673
3677
|
} while(!(is_mathml_integration_point(get_current_node(parser)) ||
|
@@ -3707,7 +3711,7 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
|
|
3707
3711
|
|
3708
3712
|
bool is_success = true;
|
3709
3713
|
if (!gumbo_string_equals_ignore_case(&node_tagname, &token_tagname)) {
|
3710
|
-
|
3714
|
+
parser_add_parse_error(parser, token);
|
3711
3715
|
is_success = false;
|
3712
3716
|
}
|
3713
3717
|
int i = parser->_parser_state->_open_elements.length;
|
@@ -3864,7 +3868,7 @@ GumboOutput* gumbo_parse_with_options(
|
|
3864
3868
|
token.v.start_tag.attributes.data == NULL);
|
3865
3869
|
|
3866
3870
|
if (!state->_self_closing_flag_acknowledged) {
|
3867
|
-
GumboError* error =
|
3871
|
+
GumboError* error = parser_add_parse_error(&parser, &token);
|
3868
3872
|
if (error) {
|
3869
3873
|
error->type = GUMBO_ERR_UNACKNOWLEDGED_SELF_CLOSING_TAG;
|
3870
3874
|
}
|