nokogumbo 1.1.12 → 1.1.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ext/nokogumboc/extconf.rb +1 -1
- data/gumbo-parser/src/char_ref.c +22828 -2291
- data/gumbo-parser/src/char_ref.rl +2548 -0
- data/gumbo-parser/src/error.c +21 -0
- data/gumbo-parser/src/parser.c +109 -105
- data/gumbo-parser/src/tokenizer.c +103 -103
- data/gumbo-parser/src/utf8.c +114 -120
- data/gumbo-parser/src/utf8.h +6 -0
- metadata +3 -2
data/gumbo-parser/src/error.c
CHANGED
@@ -39,6 +39,27 @@ static int print_message(GumboParser* parser, GumboStringBuffer* output,
|
|
39
39
|
int remaining_capacity = output->capacity - output->length;
|
40
40
|
int bytes_written = vsnprintf(output->data + output->length,
|
41
41
|
remaining_capacity, format, args);
|
42
|
+
#ifdef _MSC_VER
|
43
|
+
if (bytes_written == -1) {
|
44
|
+
// vsnprintf returns -1 on MSVC++ if there's not enough capacity, instead of
|
45
|
+
// returning the number of bytes that would've been written had there been
|
46
|
+
// enough. In this case, we'll double the buffer size and hope it fits when
|
47
|
+
// we retry (letting it fail and returning 0 if it doesn't), since there's
|
48
|
+
// no way to smartly resize the buffer.
|
49
|
+
gumbo_string_buffer_reserve(parser, output->capacity * 2, output);
|
50
|
+
int result = vsnprintf(output->data + output->length,
|
51
|
+
remaining_capacity, format, args);
|
52
|
+
va_end(args);
|
53
|
+
return result == -1 ? 0 : result;
|
54
|
+
}
|
55
|
+
#else
|
56
|
+
// -1 in standard C99 indicates an encoding error. Return 0 and do nothing.
|
57
|
+
if (bytes_written == -1) {
|
58
|
+
va_end(args);
|
59
|
+
return 0;
|
60
|
+
}
|
61
|
+
#endif
|
62
|
+
|
42
63
|
if (bytes_written > remaining_capacity) {
|
43
64
|
gumbo_string_buffer_reserve(
|
44
65
|
parser, output->capacity + bytes_written, output);
|
data/gumbo-parser/src/parser.c
CHANGED
@@ -597,7 +597,7 @@ static void reset_insertion_mode_appropriately(GumboParser* parser) {
|
|
597
597
|
assert(0);
|
598
598
|
}
|
599
599
|
|
600
|
-
static GumboError*
|
600
|
+
static GumboError* parser_add_parse_error(GumboParser* parser, const GumboToken* token) {
|
601
601
|
gumbo_debug("Adding parse error.\n");
|
602
602
|
GumboError* error = gumbo_add_error(parser);
|
603
603
|
if (!error) {
|
@@ -1022,13 +1022,13 @@ static GumboNode* insert_foreign_element(
|
|
1022
1022
|
kLegalXmlns[tag_namespace])) {
|
1023
1023
|
// TODO(jdtang): Since there're multiple possible error codes here, we
|
1024
1024
|
// eventually need reason codes to differentiate them.
|
1025
|
-
|
1025
|
+
parser_add_parse_error(parser, token);
|
1026
1026
|
}
|
1027
1027
|
if (token_has_attribute(token, "xmlns:xlink") &&
|
1028
1028
|
!attribute_matches_case_sensitive(
|
1029
1029
|
&token->v.start_tag.attributes,
|
1030
1030
|
"xmlns:xlink", "http://www.w3.org/1999/xlink")) {
|
1031
|
-
|
1031
|
+
parser_add_parse_error(parser, token);
|
1032
1032
|
}
|
1033
1033
|
return element;
|
1034
1034
|
}
|
@@ -1478,7 +1478,7 @@ static bool close_table_cell(GumboParser* parser, const GumboToken* token,
|
|
1478
1478
|
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
1479
1479
|
const GumboNode* node = get_current_node(parser);
|
1480
1480
|
if (!node_tag_is(node, cell_tag)) {
|
1481
|
-
|
1481
|
+
parser_add_parse_error(parser, token);
|
1482
1482
|
result = false;
|
1483
1483
|
}
|
1484
1484
|
do {
|
@@ -1564,7 +1564,7 @@ static bool implicitly_close_tags(
|
|
1564
1564
|
bool result = true;
|
1565
1565
|
generate_implied_end_tags(parser, target);
|
1566
1566
|
if (!node_tag_is(get_current_node(parser), target)) {
|
1567
|
-
|
1567
|
+
parser_add_parse_error(parser, token);
|
1568
1568
|
while (!node_tag_is(get_current_node(parser), target)) {
|
1569
1569
|
pop_current_node(parser);
|
1570
1570
|
}
|
@@ -1728,7 +1728,7 @@ static bool maybe_add_doctype_error(
|
|
1728
1728
|
&kSystemIdXhtmlStrict1_1, false) ||
|
1729
1729
|
doctype_matches(doctype, &kPublicIdXhtml1_1,
|
1730
1730
|
&kSystemIdXhtml1_1, false)))) {
|
1731
|
-
|
1731
|
+
parser_add_parse_error(parser, token);
|
1732
1732
|
return false;
|
1733
1733
|
}
|
1734
1734
|
return true;
|
@@ -1801,12 +1801,12 @@ static bool adoption_agency_algorithm(
|
|
1801
1801
|
}
|
1802
1802
|
|
1803
1803
|
if (!has_an_element_in_scope(parser, formatting_node->v.element.tag)) {
|
1804
|
-
|
1804
|
+
parser_add_parse_error(parser, token);
|
1805
1805
|
gumbo_debug("Element not in scope.\n");
|
1806
1806
|
return false;
|
1807
1807
|
}
|
1808
1808
|
if (formatting_node != get_current_node(parser)) {
|
1809
|
-
|
1809
|
+
parser_add_parse_error(parser, token); // But continue onwards.
|
1810
1810
|
}
|
1811
1811
|
assert(formatting_node);
|
1812
1812
|
assert(!node_tag_is(formatting_node, GUMBO_TAG_HTML));
|
@@ -2018,7 +2018,7 @@ static bool handle_initial(GumboParser* parser, GumboToken* token) {
|
|
2018
2018
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_BEFORE_HTML);
|
2019
2019
|
return maybe_add_doctype_error(parser, token);
|
2020
2020
|
}
|
2021
|
-
|
2021
|
+
parser_add_parse_error(parser, token);
|
2022
2022
|
document->doc_type_quirks_mode = GUMBO_DOCTYPE_QUIRKS;
|
2023
2023
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_BEFORE_HTML);
|
2024
2024
|
parser->_parser_state->_reprocess_current_token = true;
|
@@ -2028,7 +2028,7 @@ static bool handle_initial(GumboParser* parser, GumboToken* token) {
|
|
2028
2028
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#the-before-html-insertion-mode
|
2029
2029
|
static bool handle_before_html(GumboParser* parser, GumboToken* token) {
|
2030
2030
|
if (token->type == GUMBO_TOKEN_DOCTYPE) {
|
2031
|
-
|
2031
|
+
parser_add_parse_error(parser, token);
|
2032
2032
|
ignore_token(parser);
|
2033
2033
|
return false;
|
2034
2034
|
} else if (token->type == GUMBO_TOKEN_COMMENT) {
|
@@ -2045,7 +2045,7 @@ static bool handle_before_html(GumboParser* parser, GumboToken* token) {
|
|
2045
2045
|
} else if (token->type == GUMBO_TOKEN_END_TAG && !tag_in(
|
2046
2046
|
token, false, GUMBO_TAG_HEAD, GUMBO_TAG_BODY, GUMBO_TAG_HTML,
|
2047
2047
|
GUMBO_TAG_BR, GUMBO_TAG_LAST)) {
|
2048
|
-
|
2048
|
+
parser_add_parse_error(parser, token);
|
2049
2049
|
ignore_token(parser);
|
2050
2050
|
return false;
|
2051
2051
|
} else {
|
@@ -2062,7 +2062,7 @@ static bool handle_before_html(GumboParser* parser, GumboToken* token) {
|
|
2062
2062
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#the-before-head-insertion-mode
|
2063
2063
|
static bool handle_before_head(GumboParser* parser, GumboToken* token) {
|
2064
2064
|
if (token->type == GUMBO_TOKEN_DOCTYPE) {
|
2065
|
-
|
2065
|
+
parser_add_parse_error(parser, token);
|
2066
2066
|
ignore_token(parser);
|
2067
2067
|
return false;
|
2068
2068
|
} else if (token->type == GUMBO_TOKEN_COMMENT) {
|
@@ -2079,7 +2079,7 @@ static bool handle_before_head(GumboParser* parser, GumboToken* token) {
|
|
2079
2079
|
} else if (token->type == GUMBO_TOKEN_END_TAG && !tag_in(
|
2080
2080
|
token, false, GUMBO_TAG_HEAD, GUMBO_TAG_BODY, GUMBO_TAG_HTML,
|
2081
2081
|
GUMBO_TAG_BR, GUMBO_TAG_LAST)) {
|
2082
|
-
|
2082
|
+
parser_add_parse_error(parser, token);
|
2083
2083
|
ignore_token(parser);
|
2084
2084
|
return false;
|
2085
2085
|
} else {
|
@@ -2102,7 +2102,7 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
|
|
2102
2102
|
insert_text_token(parser, token);
|
2103
2103
|
return true;
|
2104
2104
|
} else if (token->type == GUMBO_TOKEN_DOCTYPE) {
|
2105
|
-
|
2105
|
+
parser_add_parse_error(parser, token);
|
2106
2106
|
ignore_token(parser);
|
2107
2107
|
return false;
|
2108
2108
|
} else if (token->type == GUMBO_TOKEN_COMMENT) {
|
@@ -2147,14 +2147,18 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
|
|
2147
2147
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_HEAD);
|
2148
2148
|
return true;
|
2149
2149
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_HEAD)) {
|
2150
|
-
|
2150
|
+
parser_add_parse_error(parser, token);
|
2151
2151
|
ignore_token(parser);
|
2152
2152
|
return false;
|
2153
2153
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_HEAD) ||
|
2154
2154
|
(token->type == GUMBO_TOKEN_END_TAG &&
|
2155
2155
|
!tag_in(token, kEndTag, GUMBO_TAG_BODY, GUMBO_TAG_HTML,
|
2156
2156
|
GUMBO_TAG_BR, GUMBO_TAG_LAST))) {
|
2157
|
-
|
2157
|
+
parser_add_parse_error(parser, token);
|
2158
|
+
return false;
|
2159
|
+
} else if (tag_is(token, kStartTag, GUMBO_TAG_UNKNOWN) && token->v.start_tag.is_self_closing) {
|
2160
|
+
parser_add_parse_error(parser, token);
|
2161
|
+
ignore_token(parser);
|
2158
2162
|
return false;
|
2159
2163
|
} else {
|
2160
2164
|
const GumboNode* node = pop_current_node(parser);
|
@@ -2171,7 +2175,7 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
|
|
2171
2175
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-inheadnoscript
|
2172
2176
|
static bool handle_in_head_noscript(GumboParser* parser, GumboToken* token) {
|
2173
2177
|
if (token->type == GUMBO_TOKEN_DOCTYPE) {
|
2174
|
-
|
2178
|
+
parser_add_parse_error(parser, token);
|
2175
2179
|
return false;
|
2176
2180
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
|
2177
2181
|
return handle_in_body(parser, token);
|
@@ -2191,11 +2195,11 @@ static bool handle_in_head_noscript(GumboParser* parser, GumboToken* token) {
|
|
2191
2195
|
GUMBO_TAG_LAST) ||
|
2192
2196
|
(token->type == GUMBO_TOKEN_END_TAG &&
|
2193
2197
|
!tag_is(token, kEndTag, GUMBO_TAG_BR))) {
|
2194
|
-
|
2198
|
+
parser_add_parse_error(parser, token);
|
2195
2199
|
ignore_token(parser);
|
2196
2200
|
return false;
|
2197
2201
|
} else {
|
2198
|
-
|
2202
|
+
parser_add_parse_error(parser, token);
|
2199
2203
|
const GumboNode* node = pop_current_node(parser);
|
2200
2204
|
assert(node_tag_is(node, GUMBO_TAG_NOSCRIPT));
|
2201
2205
|
AVOID_UNUSED_VARIABLE_WARNING(node);
|
@@ -2212,7 +2216,7 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
|
|
2212
2216
|
insert_text_token(parser, token);
|
2213
2217
|
return true;
|
2214
2218
|
} else if (token->type == GUMBO_TOKEN_DOCTYPE) {
|
2215
|
-
|
2219
|
+
parser_add_parse_error(parser, token);
|
2216
2220
|
ignore_token(parser);
|
2217
2221
|
return false;
|
2218
2222
|
} else if (token->type == GUMBO_TOKEN_COMMENT) {
|
@@ -2233,7 +2237,7 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
|
|
2233
2237
|
GUMBO_TAG_BGSOUND, GUMBO_TAG_LINK, GUMBO_TAG_META,
|
2234
2238
|
GUMBO_TAG_NOFRAMES, GUMBO_TAG_SCRIPT, GUMBO_TAG_STYLE,
|
2235
2239
|
GUMBO_TAG_TITLE, GUMBO_TAG_LAST)) {
|
2236
|
-
|
2240
|
+
parser_add_parse_error(parser, token);
|
2237
2241
|
assert(state->_head_element != NULL);
|
2238
2242
|
// This must be flushed before we push the head element on, as there may be
|
2239
2243
|
// pending character tokens that should be attached to the root.
|
@@ -2246,7 +2250,7 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
|
|
2246
2250
|
(token->type == GUMBO_TOKEN_END_TAG &&
|
2247
2251
|
!tag_in(token, kEndTag, GUMBO_TAG_BODY, GUMBO_TAG_HTML,
|
2248
2252
|
GUMBO_TAG_BR, GUMBO_TAG_LAST))) {
|
2249
|
-
|
2253
|
+
parser_add_parse_error(parser, token);
|
2250
2254
|
ignore_token(parser);
|
2251
2255
|
return false;
|
2252
2256
|
} else {
|
@@ -2296,7 +2300,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2296
2300
|
GumboParserState* state = parser->_parser_state;
|
2297
2301
|
assert(state->_open_elements.length > 0);
|
2298
2302
|
if (token->type == GUMBO_TOKEN_NULL) {
|
2299
|
-
|
2303
|
+
parser_add_parse_error(parser, token);
|
2300
2304
|
ignore_token(parser);
|
2301
2305
|
return false;
|
2302
2306
|
} else if (token->type == GUMBO_TOKEN_WHITESPACE) {
|
@@ -2312,13 +2316,13 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2312
2316
|
append_comment_node(parser, get_current_node(parser), token);
|
2313
2317
|
return true;
|
2314
2318
|
} else if (token->type == GUMBO_TOKEN_DOCTYPE) {
|
2315
|
-
|
2319
|
+
parser_add_parse_error(parser, token);
|
2316
2320
|
ignore_token(parser);
|
2317
2321
|
return false;
|
2318
2322
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
|
2319
2323
|
assert(parser->_output->root != NULL);
|
2320
2324
|
assert(parser->_output->root->type == GUMBO_NODE_ELEMENT);
|
2321
|
-
|
2325
|
+
parser_add_parse_error(parser, token);
|
2322
2326
|
merge_attributes(parser, token, parser->_output->root);
|
2323
2327
|
return false;
|
2324
2328
|
} else if (tag_in(token, kStartTag, GUMBO_TAG_BASE, GUMBO_TAG_BASEFONT,
|
@@ -2327,7 +2331,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2327
2331
|
GUMBO_TAG_STYLE, GUMBO_TAG_TITLE, GUMBO_TAG_LAST)) {
|
2328
2332
|
return handle_in_head(parser, token);
|
2329
2333
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_BODY)) {
|
2330
|
-
|
2334
|
+
parser_add_parse_error(parser, token);
|
2331
2335
|
if (state->_open_elements.length < 2 ||
|
2332
2336
|
!node_tag_is(state->_open_elements.data[1], GUMBO_TAG_BODY)) {
|
2333
2337
|
ignore_token(parser);
|
@@ -2337,7 +2341,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2337
2341
|
merge_attributes(parser, token, state->_open_elements.data[1]);
|
2338
2342
|
return false;
|
2339
2343
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_FRAMESET)) {
|
2340
|
-
|
2344
|
+
parser_add_parse_error(parser, token);
|
2341
2345
|
if (state->_open_elements.length < 2 ||
|
2342
2346
|
!node_tag_is(state->_open_elements.data[1], GUMBO_TAG_BODY) ||
|
2343
2347
|
!state->_frameset_ok) {
|
@@ -2382,7 +2386,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2382
2386
|
GUMBO_TAG_TD, GUMBO_TAG_TFOOT, GUMBO_TAG_TH,
|
2383
2387
|
GUMBO_TAG_THEAD, GUMBO_TAG_TR, GUMBO_TAG_BODY,
|
2384
2388
|
GUMBO_TAG_HTML, GUMBO_TAG_LAST)) {
|
2385
|
-
|
2389
|
+
parser_add_parse_error(parser, token);
|
2386
2390
|
return false;
|
2387
2391
|
}
|
2388
2392
|
}
|
@@ -2390,7 +2394,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2390
2394
|
} else if (tag_in(token, kEndTag, GUMBO_TAG_BODY, GUMBO_TAG_HTML,
|
2391
2395
|
GUMBO_TAG_LAST)) {
|
2392
2396
|
if (!has_an_element_in_scope(parser, GUMBO_TAG_BODY)) {
|
2393
|
-
|
2397
|
+
parser_add_parse_error(parser, token);
|
2394
2398
|
ignore_token(parser);
|
2395
2399
|
return false;
|
2396
2400
|
}
|
@@ -2403,7 +2407,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2403
2407
|
GUMBO_TAG_TFOOT, GUMBO_TAG_TH, GUMBO_TAG_THEAD,
|
2404
2408
|
GUMBO_TAG_TR, GUMBO_TAG_BODY, GUMBO_TAG_HTML,
|
2405
2409
|
GUMBO_TAG_LAST)) {
|
2406
|
-
|
2410
|
+
parser_add_parse_error(parser, token);
|
2407
2411
|
success = false;
|
2408
2412
|
break;
|
2409
2413
|
}
|
@@ -2434,7 +2438,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2434
2438
|
if (node_tag_in(get_current_node(parser), GUMBO_TAG_H1, GUMBO_TAG_H2,
|
2435
2439
|
GUMBO_TAG_H3, GUMBO_TAG_H4, GUMBO_TAG_H5, GUMBO_TAG_H6,
|
2436
2440
|
GUMBO_TAG_LAST)) {
|
2437
|
-
|
2441
|
+
parser_add_parse_error(parser, token);
|
2438
2442
|
pop_current_node(parser);
|
2439
2443
|
result = false;
|
2440
2444
|
}
|
@@ -2450,7 +2454,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2450
2454
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_FORM)) {
|
2451
2455
|
if (state->_form_element != NULL) {
|
2452
2456
|
gumbo_debug("Ignoring nested form.\n");
|
2453
|
-
|
2457
|
+
parser_add_parse_error(parser, token);
|
2454
2458
|
ignore_token(parser);
|
2455
2459
|
return false;
|
2456
2460
|
}
|
@@ -2476,7 +2480,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2476
2480
|
return result;
|
2477
2481
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_BUTTON)) {
|
2478
2482
|
if (has_an_element_in_scope(parser, GUMBO_TAG_BUTTON)) {
|
2479
|
-
|
2483
|
+
parser_add_parse_error(parser, token);
|
2480
2484
|
implicitly_close_tags(parser, token, GUMBO_TAG_BUTTON);
|
2481
2485
|
state->_reprocess_current_token = true;
|
2482
2486
|
return false;
|
@@ -2496,7 +2500,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2496
2500
|
GUMBO_TAG_LAST)) {
|
2497
2501
|
GumboTag tag = token->v.end_tag;
|
2498
2502
|
if (!has_an_element_in_scope(parser, tag)) {
|
2499
|
-
|
2503
|
+
parser_add_parse_error(parser, token);
|
2500
2504
|
ignore_token(parser);
|
2501
2505
|
return false;
|
2502
2506
|
}
|
@@ -2509,7 +2513,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2509
2513
|
state->_form_element = NULL;
|
2510
2514
|
if (!node || !has_node_in_scope(parser, node)) {
|
2511
2515
|
gumbo_debug("Closing an unopened form.\n");
|
2512
|
-
|
2516
|
+
parser_add_parse_error(parser, token);
|
2513
2517
|
ignore_token(parser);
|
2514
2518
|
return false;
|
2515
2519
|
}
|
@@ -2517,7 +2521,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2517
2521
|
// <form> element; other nodes are left in scope.
|
2518
2522
|
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
2519
2523
|
if (get_current_node(parser) != node) {
|
2520
|
-
|
2524
|
+
parser_add_parse_error(parser, token);
|
2521
2525
|
result = false;
|
2522
2526
|
}
|
2523
2527
|
|
@@ -2529,7 +2533,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2529
2533
|
return result;
|
2530
2534
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_P)) {
|
2531
2535
|
if (!has_an_element_in_button_scope(parser, GUMBO_TAG_P)) {
|
2532
|
-
|
2536
|
+
parser_add_parse_error(parser, token);
|
2533
2537
|
reconstruct_active_formatting_elements(parser);
|
2534
2538
|
insert_element_of_tag_type(
|
2535
2539
|
parser, GUMBO_TAG_P, GUMBO_INSERTION_CONVERTED_FROM_END_TAG);
|
@@ -2539,7 +2543,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2539
2543
|
return implicitly_close_tags(parser, token, GUMBO_TAG_P);
|
2540
2544
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_LI)) {
|
2541
2545
|
if (!has_an_element_in_list_scope(parser, GUMBO_TAG_LI)) {
|
2542
|
-
|
2546
|
+
parser_add_parse_error(parser, token);
|
2543
2547
|
ignore_token(parser);
|
2544
2548
|
return false;
|
2545
2549
|
}
|
@@ -2549,7 +2553,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2549
2553
|
assert(token->type == GUMBO_TOKEN_END_TAG);
|
2550
2554
|
GumboTag token_tag = token->v.end_tag;
|
2551
2555
|
if (!has_an_element_in_scope(parser, token_tag)) {
|
2552
|
-
|
2556
|
+
parser_add_parse_error(parser, token);
|
2553
2557
|
ignore_token(parser);
|
2554
2558
|
return false;
|
2555
2559
|
}
|
@@ -2560,7 +2564,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2560
2564
|
parser, GUMBO_TAG_H1, GUMBO_TAG_H2, GUMBO_TAG_H3, GUMBO_TAG_H4,
|
2561
2565
|
GUMBO_TAG_H5, GUMBO_TAG_H6, GUMBO_TAG_LAST)) {
|
2562
2566
|
// No heading open; ignore the token entirely.
|
2563
|
-
|
2567
|
+
parser_add_parse_error(parser, token);
|
2564
2568
|
ignore_token(parser);
|
2565
2569
|
return false;
|
2566
2570
|
} else {
|
@@ -2572,7 +2576,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2572
2576
|
// record a parse error.
|
2573
2577
|
// TODO(jdtang): Add a way to distinguish this error case from the one
|
2574
2578
|
// above.
|
2575
|
-
|
2579
|
+
parser_add_parse_error(parser, token);
|
2576
2580
|
}
|
2577
2581
|
do {
|
2578
2582
|
current_node = pop_current_node(parser);
|
@@ -2587,7 +2591,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2587
2591
|
int has_matching_a = find_last_anchor_index(parser, &last_a);
|
2588
2592
|
if (has_matching_a) {
|
2589
2593
|
assert(has_matching_a == 1);
|
2590
|
-
|
2594
|
+
parser_add_parse_error(parser, token);
|
2591
2595
|
adoption_agency_algorithm(parser, token, GUMBO_TAG_A);
|
2592
2596
|
// The adoption agency algorithm usually removes all instances of <a>
|
2593
2597
|
// from the list of active formatting elements, but in case it doesn't,
|
@@ -2617,7 +2621,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2617
2621
|
reconstruct_active_formatting_elements(parser);
|
2618
2622
|
if (has_an_element_in_scope(parser, GUMBO_TAG_NOBR)) {
|
2619
2623
|
result = false;
|
2620
|
-
|
2624
|
+
parser_add_parse_error(parser, token);
|
2621
2625
|
adoption_agency_algorithm(parser, token, GUMBO_TAG_NOBR);
|
2622
2626
|
reconstruct_active_formatting_elements(parser);
|
2623
2627
|
}
|
@@ -2641,7 +2645,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2641
2645
|
GUMBO_TAG_OBJECT, GUMBO_TAG_LAST)) {
|
2642
2646
|
GumboTag token_tag = token->v.end_tag;
|
2643
2647
|
if (!has_an_element_in_table_scope(parser, token_tag)) {
|
2644
|
-
|
2648
|
+
parser_add_parse_error(parser, token);
|
2645
2649
|
ignore_token(parser);
|
2646
2650
|
return false;
|
2647
2651
|
}
|
@@ -2663,14 +2667,14 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2663
2667
|
bool success = true;
|
2664
2668
|
if (tag_is(token, kStartTag, GUMBO_TAG_IMAGE)) {
|
2665
2669
|
success = false;
|
2666
|
-
|
2670
|
+
parser_add_parse_error(parser, token);
|
2667
2671
|
token->v.start_tag.tag = GUMBO_TAG_IMG;
|
2668
2672
|
}
|
2669
2673
|
reconstruct_active_formatting_elements(parser);
|
2670
2674
|
GumboNode* node = insert_element_from_token(parser, token);
|
2671
2675
|
if (tag_is(token, kStartTag, GUMBO_TAG_IMAGE)) {
|
2672
2676
|
success = false;
|
2673
|
-
|
2677
|
+
parser_add_parse_error(parser, token);
|
2674
2678
|
node->v.element.tag = GUMBO_TAG_IMG;
|
2675
2679
|
node->parse_flags |= GUMBO_INSERTION_FROM_IMAGE;
|
2676
2680
|
}
|
@@ -2703,7 +2707,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2703
2707
|
set_frameset_not_ok(parser);
|
2704
2708
|
return result;
|
2705
2709
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_ISINDEX)) {
|
2706
|
-
|
2710
|
+
parser_add_parse_error(parser, token);
|
2707
2711
|
if (parser->_parser_state->_form_element != NULL) {
|
2708
2712
|
ignore_token(parser);
|
2709
2713
|
return false;
|
@@ -2715,7 +2719,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2715
2719
|
GumboVector* token_attrs = &token->v.start_tag.attributes;
|
2716
2720
|
GumboAttribute* prompt_attr = gumbo_get_attribute(token_attrs, "prompt");
|
2717
2721
|
GumboAttribute* action_attr = gumbo_get_attribute(token_attrs, "action");
|
2718
|
-
GumboAttribute* name_attr = gumbo_get_attribute(token_attrs, "
|
2722
|
+
GumboAttribute* name_attr = gumbo_get_attribute(token_attrs, "name");
|
2719
2723
|
|
2720
2724
|
GumboNode* form = insert_element_of_tag_type(
|
2721
2725
|
parser, GUMBO_TAG_FORM, GUMBO_INSERTION_FROM_ISINDEX);
|
@@ -2831,13 +2835,13 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2831
2835
|
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
2832
2836
|
}
|
2833
2837
|
if (!node_tag_is(get_current_node(parser), GUMBO_TAG_RUBY)) {
|
2834
|
-
|
2838
|
+
parser_add_parse_error(parser, token);
|
2835
2839
|
success = false;
|
2836
2840
|
}
|
2837
2841
|
insert_element_from_token(parser, token);
|
2838
2842
|
return success;
|
2839
2843
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_BR)) {
|
2840
|
-
|
2844
|
+
parser_add_parse_error(parser, token);
|
2841
2845
|
reconstruct_active_formatting_elements(parser);
|
2842
2846
|
insert_element_of_tag_type(
|
2843
2847
|
parser, GUMBO_TAG_BR, GUMBO_INSERTION_CONVERTED_FROM_END_TAG);
|
@@ -2868,7 +2872,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2868
2872
|
GUMBO_TAG_TBODY, GUMBO_TAG_TD, GUMBO_TAG_TFOOT,
|
2869
2873
|
GUMBO_TAG_TH, GUMBO_TAG_THEAD, GUMBO_TAG_TR,
|
2870
2874
|
GUMBO_TAG_LAST)) {
|
2871
|
-
|
2875
|
+
parser_add_parse_error(parser, token);
|
2872
2876
|
ignore_token(parser);
|
2873
2877
|
return false;
|
2874
2878
|
} else if (token->type == GUMBO_TOKEN_START_TAG) {
|
@@ -2897,7 +2901,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2897
2901
|
while (node != pop_current_node(parser)); // Pop everything.
|
2898
2902
|
return true;
|
2899
2903
|
} else if (is_special_node(node)) {
|
2900
|
-
|
2904
|
+
parser_add_parse_error(parser, token);
|
2901
2905
|
ignore_token(parser);
|
2902
2906
|
return false;
|
2903
2907
|
}
|
@@ -2921,7 +2925,7 @@ static bool handle_text(GumboParser* parser, GumboToken* token) {
|
|
2921
2925
|
// This behavior doesn't support document.write of partial HTML elements,
|
2922
2926
|
// but should be adequate for almost all other scripting support.
|
2923
2927
|
if (token->type == GUMBO_TOKEN_EOF) {
|
2924
|
-
|
2928
|
+
parser_add_parse_error(parser, token);
|
2925
2929
|
parser->_parser_state->_reprocess_current_token = true;
|
2926
2930
|
}
|
2927
2931
|
pop_current_node(parser);
|
@@ -2946,7 +2950,7 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
|
|
2946
2950
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_TEXT);
|
2947
2951
|
return true;
|
2948
2952
|
} else if (token->type == GUMBO_TOKEN_DOCTYPE) {
|
2949
|
-
|
2953
|
+
parser_add_parse_error(parser, token);
|
2950
2954
|
ignore_token(parser);
|
2951
2955
|
return false;
|
2952
2956
|
} else if (token->type == GUMBO_TOKEN_COMMENT) {
|
@@ -2985,7 +2989,7 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
|
|
2985
2989
|
}
|
2986
2990
|
return true;
|
2987
2991
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_TABLE)) {
|
2988
|
-
|
2992
|
+
parser_add_parse_error(parser, token);
|
2989
2993
|
if (close_table(parser)) {
|
2990
2994
|
parser->_parser_state->_reprocess_current_token = true;
|
2991
2995
|
} else {
|
@@ -2994,7 +2998,7 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
|
|
2994
2998
|
return false;
|
2995
2999
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_TABLE)) {
|
2996
3000
|
if (!close_table(parser)) {
|
2997
|
-
|
3001
|
+
parser_add_parse_error(parser, token);
|
2998
3002
|
return false;
|
2999
3003
|
}
|
3000
3004
|
return true;
|
@@ -3003,7 +3007,7 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
|
|
3003
3007
|
GUMBO_TAG_TBODY, GUMBO_TAG_TD, GUMBO_TAG_TFOOT,
|
3004
3008
|
GUMBO_TAG_TH, GUMBO_TAG_THEAD, GUMBO_TAG_TR,
|
3005
3009
|
GUMBO_TAG_LAST)) {
|
3006
|
-
|
3010
|
+
parser_add_parse_error(parser, token);
|
3007
3011
|
ignore_token(parser);
|
3008
3012
|
return false;
|
3009
3013
|
} else if (tag_in(token, kStartTag, GUMBO_TAG_STYLE, GUMBO_TAG_SCRIPT,
|
@@ -3012,12 +3016,12 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
|
|
3012
3016
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_INPUT) &&
|
3013
3017
|
attribute_matches(&token->v.start_tag.attributes,
|
3014
3018
|
"type", "hidden")) {
|
3015
|
-
|
3019
|
+
parser_add_parse_error(parser, token);
|
3016
3020
|
insert_element_from_token(parser, token);
|
3017
3021
|
pop_current_node(parser);
|
3018
3022
|
return false;
|
3019
3023
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_FORM)) {
|
3020
|
-
|
3024
|
+
parser_add_parse_error(parser, token);
|
3021
3025
|
if (state->_form_element) {
|
3022
3026
|
ignore_token(parser);
|
3023
3027
|
return false;
|
@@ -3027,12 +3031,12 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
|
|
3027
3031
|
return false;
|
3028
3032
|
} else if (token->type == GUMBO_TOKEN_EOF) {
|
3029
3033
|
if (!node_tag_is(get_current_node(parser), GUMBO_TAG_HTML)) {
|
3030
|
-
|
3034
|
+
parser_add_parse_error(parser, token);
|
3031
3035
|
return false;
|
3032
3036
|
}
|
3033
3037
|
return true;
|
3034
3038
|
} else {
|
3035
|
-
|
3039
|
+
parser_add_parse_error(parser, token);
|
3036
3040
|
state->_foster_parent_insertions = true;
|
3037
3041
|
bool result = handle_in_body(parser, token);
|
3038
3042
|
state->_foster_parent_insertions = false;
|
@@ -3043,7 +3047,7 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
|
|
3043
3047
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-intabletext
|
3044
3048
|
static bool handle_in_table_text(GumboParser* parser, GumboToken* token) {
|
3045
3049
|
if (token->type == GUMBO_TOKEN_NULL) {
|
3046
|
-
|
3050
|
+
parser_add_parse_error(parser, token);
|
3047
3051
|
ignore_token(parser);
|
3048
3052
|
return false;
|
3049
3053
|
} else if (token->type == GUMBO_TOKEN_CHARACTER ||
|
@@ -3082,18 +3086,18 @@ static bool handle_in_caption(GumboParser* parser, GumboToken* token) {
|
|
3082
3086
|
tag_in(token, kEndTag, GUMBO_TAG_CAPTION, GUMBO_TAG_TABLE,
|
3083
3087
|
GUMBO_TAG_LAST)) {
|
3084
3088
|
if (!has_an_element_in_table_scope(parser, GUMBO_TAG_CAPTION)) {
|
3085
|
-
|
3089
|
+
parser_add_parse_error(parser, token);
|
3086
3090
|
ignore_token(parser);
|
3087
3091
|
return false;
|
3088
3092
|
}
|
3089
3093
|
if (!tag_is(token, kEndTag, GUMBO_TAG_CAPTION)) {
|
3090
|
-
|
3094
|
+
parser_add_parse_error(parser, token);
|
3091
3095
|
parser->_parser_state->_reprocess_current_token = true;
|
3092
3096
|
}
|
3093
3097
|
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
3094
3098
|
bool result = true;
|
3095
3099
|
if (!node_tag_is(get_current_node(parser), GUMBO_TAG_CAPTION)) {
|
3096
|
-
|
3100
|
+
parser_add_parse_error(parser, token);
|
3097
3101
|
while (!node_tag_is(get_current_node(parser), GUMBO_TAG_CAPTION)) {
|
3098
3102
|
pop_current_node(parser);
|
3099
3103
|
}
|
@@ -3107,7 +3111,7 @@ static bool handle_in_caption(GumboParser* parser, GumboToken* token) {
|
|
3107
3111
|
GUMBO_TAG_COLGROUP, GUMBO_TAG_HTML, GUMBO_TAG_TBODY,
|
3108
3112
|
GUMBO_TAG_TD, GUMBO_TAG_TFOOT, GUMBO_TAG_TH,
|
3109
3113
|
GUMBO_TAG_THEAD, GUMBO_TAG_TR, GUMBO_TAG_LAST)) {
|
3110
|
-
|
3114
|
+
parser_add_parse_error(parser, token);
|
3111
3115
|
ignore_token(parser);
|
3112
3116
|
return false;
|
3113
3117
|
} else {
|
@@ -3121,7 +3125,7 @@ static bool handle_in_column_group(GumboParser* parser, GumboToken* token) {
|
|
3121
3125
|
insert_text_token(parser, token);
|
3122
3126
|
return true;
|
3123
3127
|
} else if (token->type == GUMBO_TOKEN_DOCTYPE) {
|
3124
|
-
|
3128
|
+
parser_add_parse_error(parser, token);
|
3125
3129
|
ignore_token(parser);
|
3126
3130
|
return false;
|
3127
3131
|
} else if (token->type == GUMBO_TOKEN_COMMENT) {
|
@@ -3135,7 +3139,7 @@ static bool handle_in_column_group(GumboParser* parser, GumboToken* token) {
|
|
3135
3139
|
acknowledge_self_closing_tag(parser);
|
3136
3140
|
return true;
|
3137
3141
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_COL)) {
|
3138
|
-
|
3142
|
+
parser_add_parse_error(parser, token);
|
3139
3143
|
ignore_token(parser);
|
3140
3144
|
return false;
|
3141
3145
|
} else if (token->type == GUMBO_TOKEN_EOF &&
|
@@ -3143,7 +3147,7 @@ static bool handle_in_column_group(GumboParser* parser, GumboToken* token) {
|
|
3143
3147
|
return true;
|
3144
3148
|
} else {
|
3145
3149
|
if (get_current_node(parser) == parser->_output->root) {
|
3146
|
-
|
3150
|
+
parser_add_parse_error(parser, token);
|
3147
3151
|
return false;
|
3148
3152
|
}
|
3149
3153
|
assert(node_tag_is(get_current_node(parser), GUMBO_TAG_COLGROUP));
|
@@ -3165,7 +3169,7 @@ static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
|
|
3165
3169
|
return true;
|
3166
3170
|
} else if (tag_in(token, kStartTag, GUMBO_TAG_TD, GUMBO_TAG_TH,
|
3167
3171
|
GUMBO_TAG_LAST)) {
|
3168
|
-
|
3172
|
+
parser_add_parse_error(parser, token);
|
3169
3173
|
clear_stack_to_table_body_context(parser);
|
3170
3174
|
insert_element_of_tag_type(parser, GUMBO_TAG_TR, GUMBO_INSERTION_IMPLIED);
|
3171
3175
|
parser->_parser_state->_reprocess_current_token = true;
|
@@ -3174,7 +3178,7 @@ static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
|
|
3174
3178
|
} else if (tag_in(token, kEndTag, GUMBO_TAG_TBODY, GUMBO_TAG_TFOOT,
|
3175
3179
|
GUMBO_TAG_THEAD, GUMBO_TAG_LAST)) {
|
3176
3180
|
if (!has_an_element_in_table_scope(parser, token->v.end_tag)) {
|
3177
|
-
|
3181
|
+
parser_add_parse_error(parser, token);
|
3178
3182
|
ignore_token(parser);
|
3179
3183
|
return false;
|
3180
3184
|
}
|
@@ -3189,7 +3193,7 @@ static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
|
|
3189
3193
|
if (!(has_an_element_in_table_scope(parser, GUMBO_TAG_TBODY) ||
|
3190
3194
|
has_an_element_in_table_scope(parser, GUMBO_TAG_THEAD) ||
|
3191
3195
|
has_an_element_in_table_scope(parser, GUMBO_TAG_TFOOT))) {
|
3192
|
-
|
3196
|
+
parser_add_parse_error(parser, token);
|
3193
3197
|
ignore_token(parser);
|
3194
3198
|
return false;
|
3195
3199
|
}
|
@@ -3202,7 +3206,7 @@ static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
|
|
3202
3206
|
GUMBO_TAG_COL, GUMBO_TAG_TR, GUMBO_TAG_COLGROUP,
|
3203
3207
|
GUMBO_TAG_HTML, GUMBO_TAG_TD, GUMBO_TAG_TH, GUMBO_TAG_LAST))
|
3204
3208
|
{
|
3205
|
-
|
3209
|
+
parser_add_parse_error(parser, token);
|
3206
3210
|
ignore_token(parser);
|
3207
3211
|
return false;
|
3208
3212
|
} else {
|
@@ -3238,7 +3242,7 @@ static bool handle_in_row(GumboParser* parser, GumboToken* token) {
|
|
3238
3242
|
const GumboNode* node = parser->_parser_state->_open_elements.data[i];
|
3239
3243
|
gumbo_debug("%s\n", gumbo_normalized_tagname(node->v.element.tag));
|
3240
3244
|
}
|
3241
|
-
|
3245
|
+
parser_add_parse_error(parser, token);
|
3242
3246
|
ignore_token(parser);
|
3243
3247
|
return false;
|
3244
3248
|
}
|
@@ -3254,7 +3258,7 @@ static bool handle_in_row(GumboParser* parser, GumboToken* token) {
|
|
3254
3258
|
} else if (tag_in(token, kEndTag, GUMBO_TAG_BODY, GUMBO_TAG_CAPTION,
|
3255
3259
|
GUMBO_TAG_COL, GUMBO_TAG_COLGROUP, GUMBO_TAG_HTML,
|
3256
3260
|
GUMBO_TAG_TD, GUMBO_TAG_TH, GUMBO_TAG_LAST)) {
|
3257
|
-
|
3261
|
+
parser_add_parse_error(parser, token);
|
3258
3262
|
ignore_token(parser);
|
3259
3263
|
return false;
|
3260
3264
|
} else {
|
@@ -3267,7 +3271,7 @@ static bool handle_in_cell(GumboParser* parser, GumboToken* token) {
|
|
3267
3271
|
if (tag_in(token, kEndTag, GUMBO_TAG_TD, GUMBO_TAG_TH, GUMBO_TAG_LAST)) {
|
3268
3272
|
GumboTag token_tag = token->v.end_tag;
|
3269
3273
|
if (!has_an_element_in_table_scope(parser, token_tag)) {
|
3270
|
-
|
3274
|
+
parser_add_parse_error(parser, token);
|
3271
3275
|
return false;
|
3272
3276
|
}
|
3273
3277
|
return close_table_cell(parser, token, token_tag);
|
@@ -3279,7 +3283,7 @@ static bool handle_in_cell(GumboParser* parser, GumboToken* token) {
|
|
3279
3283
|
if (!has_an_element_in_table_scope(parser, GUMBO_TAG_TH) &&
|
3280
3284
|
!has_an_element_in_table_scope(parser, GUMBO_TAG_TD)) {
|
3281
3285
|
gumbo_debug("Bailing out because there's no <td> or <th> in scope.\n");
|
3282
|
-
|
3286
|
+
parser_add_parse_error(parser, token);
|
3283
3287
|
ignore_token(parser);
|
3284
3288
|
return false;
|
3285
3289
|
}
|
@@ -3288,14 +3292,14 @@ static bool handle_in_cell(GumboParser* parser, GumboToken* token) {
|
|
3288
3292
|
} else if (tag_in(token, kEndTag, GUMBO_TAG_BODY, GUMBO_TAG_CAPTION,
|
3289
3293
|
GUMBO_TAG_COL, GUMBO_TAG_COLGROUP, GUMBO_TAG_HTML,
|
3290
3294
|
GUMBO_TAG_LAST)) {
|
3291
|
-
|
3295
|
+
parser_add_parse_error(parser, token);
|
3292
3296
|
ignore_token(parser);
|
3293
3297
|
return false;
|
3294
3298
|
} else if (tag_in(token, kEndTag, GUMBO_TAG_TABLE, GUMBO_TAG_TBODY,
|
3295
3299
|
GUMBO_TAG_TFOOT, GUMBO_TAG_THEAD, GUMBO_TAG_TR,
|
3296
3300
|
GUMBO_TAG_LAST)) {
|
3297
3301
|
if (!has_an_element_in_table_scope(parser, token->v.end_tag)) {
|
3298
|
-
|
3302
|
+
parser_add_parse_error(parser, token);
|
3299
3303
|
ignore_token(parser);
|
3300
3304
|
return false;
|
3301
3305
|
}
|
@@ -3309,7 +3313,7 @@ static bool handle_in_cell(GumboParser* parser, GumboToken* token) {
|
|
3309
3313
|
// http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-inselect
|
3310
3314
|
static bool handle_in_select(GumboParser* parser, GumboToken* token) {
|
3311
3315
|
if (token->type == GUMBO_TOKEN_NULL) {
|
3312
|
-
|
3316
|
+
parser_add_parse_error(parser, token);
|
3313
3317
|
ignore_token(parser);
|
3314
3318
|
return false;
|
3315
3319
|
} else if (token->type == GUMBO_TOKEN_CHARACTER ||
|
@@ -3317,7 +3321,7 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
|
|
3317
3321
|
insert_text_token(parser, token);
|
3318
3322
|
return true;
|
3319
3323
|
} else if (token->type == GUMBO_TOKEN_DOCTYPE) {
|
3320
|
-
|
3324
|
+
parser_add_parse_error(parser, token);
|
3321
3325
|
ignore_token(parser);
|
3322
3326
|
return false;
|
3323
3327
|
} else if (token->type == GUMBO_TOKEN_COMMENT) {
|
@@ -3351,7 +3355,7 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
|
|
3351
3355
|
pop_current_node(parser);
|
3352
3356
|
return true;
|
3353
3357
|
} else {
|
3354
|
-
|
3358
|
+
parser_add_parse_error(parser, token);
|
3355
3359
|
ignore_token(parser);
|
3356
3360
|
return false;
|
3357
3361
|
}
|
@@ -3360,26 +3364,26 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
|
|
3360
3364
|
pop_current_node(parser);
|
3361
3365
|
return true;
|
3362
3366
|
} else {
|
3363
|
-
|
3367
|
+
parser_add_parse_error(parser, token);
|
3364
3368
|
ignore_token(parser);
|
3365
3369
|
return false;
|
3366
3370
|
}
|
3367
3371
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_SELECT)) {
|
3368
3372
|
if (!has_an_element_in_select_scope(parser, GUMBO_TAG_SELECT)) {
|
3369
|
-
|
3373
|
+
parser_add_parse_error(parser, token);
|
3370
3374
|
ignore_token(parser);
|
3371
3375
|
return false;
|
3372
3376
|
}
|
3373
3377
|
close_current_select(parser);
|
3374
3378
|
return true;
|
3375
3379
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_SELECT)) {
|
3376
|
-
|
3380
|
+
parser_add_parse_error(parser, token);
|
3377
3381
|
ignore_token(parser);
|
3378
3382
|
close_current_select(parser);
|
3379
3383
|
return false;
|
3380
3384
|
} else if (tag_in(token, kStartTag, GUMBO_TAG_INPUT, GUMBO_TAG_KEYGEN,
|
3381
3385
|
GUMBO_TAG_TEXTAREA, GUMBO_TAG_LAST)) {
|
3382
|
-
|
3386
|
+
parser_add_parse_error(parser, token);
|
3383
3387
|
if (!has_an_element_in_select_scope(parser, GUMBO_TAG_SELECT)) {
|
3384
3388
|
ignore_token(parser);
|
3385
3389
|
} else {
|
@@ -3391,12 +3395,12 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
|
|
3391
3395
|
return handle_in_head(parser, token);
|
3392
3396
|
} else if (token->type == GUMBO_TOKEN_EOF) {
|
3393
3397
|
if (get_current_node(parser) != parser->_output->root) {
|
3394
|
-
|
3398
|
+
parser_add_parse_error(parser, token);
|
3395
3399
|
return false;
|
3396
3400
|
}
|
3397
3401
|
return true;
|
3398
3402
|
} else {
|
3399
|
-
|
3403
|
+
parser_add_parse_error(parser, token);
|
3400
3404
|
ignore_token(parser);
|
3401
3405
|
return false;
|
3402
3406
|
}
|
@@ -3407,14 +3411,14 @@ static bool handle_in_select_in_table(GumboParser* parser, GumboToken* token) {
|
|
3407
3411
|
if (tag_in(token, kStartTag, GUMBO_TAG_CAPTION, GUMBO_TAG_TABLE,
|
3408
3412
|
GUMBO_TAG_TBODY, GUMBO_TAG_TFOOT, GUMBO_TAG_THEAD, GUMBO_TAG_TR,
|
3409
3413
|
GUMBO_TAG_TD, GUMBO_TAG_TH, GUMBO_TAG_LAST)) {
|
3410
|
-
|
3414
|
+
parser_add_parse_error(parser, token);
|
3411
3415
|
close_current_select(parser);
|
3412
3416
|
parser->_parser_state->_reprocess_current_token = true;
|
3413
3417
|
return false;
|
3414
3418
|
} else if (tag_in(token, kEndTag, GUMBO_TAG_CAPTION, GUMBO_TAG_TABLE,
|
3415
3419
|
GUMBO_TAG_TBODY, GUMBO_TAG_TFOOT, GUMBO_TAG_THEAD,
|
3416
3420
|
GUMBO_TAG_TR, GUMBO_TAG_TD, GUMBO_TAG_TH, GUMBO_TAG_LAST)) {
|
3417
|
-
|
3421
|
+
parser_add_parse_error(parser, token);
|
3418
3422
|
if (has_an_element_in_table_scope(parser, token->v.end_tag)) {
|
3419
3423
|
close_current_select(parser);
|
3420
3424
|
reset_insertion_mode_appropriately(parser);
|
@@ -3445,7 +3449,7 @@ static bool handle_after_body(GumboParser* parser, GumboToken* token) {
|
|
3445
3449
|
append_comment_node(parser, html_node, token);
|
3446
3450
|
return true;
|
3447
3451
|
} else if (token->type == GUMBO_TOKEN_DOCTYPE) {
|
3448
|
-
|
3452
|
+
parser_add_parse_error(parser, token);
|
3449
3453
|
ignore_token(parser);
|
3450
3454
|
return false;
|
3451
3455
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_HTML)) {
|
@@ -3459,7 +3463,7 @@ static bool handle_after_body(GumboParser* parser, GumboToken* token) {
|
|
3459
3463
|
} else if (token->type == GUMBO_TOKEN_EOF) {
|
3460
3464
|
return true;
|
3461
3465
|
} else {
|
3462
|
-
|
3466
|
+
parser_add_parse_error(parser, token);
|
3463
3467
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_BODY);
|
3464
3468
|
parser->_parser_state->_reprocess_current_token = true;
|
3465
3469
|
return false;
|
@@ -3475,7 +3479,7 @@ static bool handle_in_frameset(GumboParser* parser, GumboToken* token) {
|
|
3475
3479
|
append_comment_node(parser, get_current_node(parser), token);
|
3476
3480
|
return true;
|
3477
3481
|
} else if (token->type == GUMBO_TOKEN_DOCTYPE) {
|
3478
|
-
|
3482
|
+
parser_add_parse_error(parser, token);
|
3479
3483
|
ignore_token(parser);
|
3480
3484
|
return false;
|
3481
3485
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
|
@@ -3485,7 +3489,7 @@ static bool handle_in_frameset(GumboParser* parser, GumboToken* token) {
|
|
3485
3489
|
return true;
|
3486
3490
|
} else if (tag_is(token, kEndTag, GUMBO_TAG_FRAMESET)) {
|
3487
3491
|
if (node_tag_is(get_current_node(parser), GUMBO_TAG_HTML)) {
|
3488
|
-
|
3492
|
+
parser_add_parse_error(parser, token);
|
3489
3493
|
ignore_token(parser);
|
3490
3494
|
return false;
|
3491
3495
|
}
|
@@ -3505,12 +3509,12 @@ static bool handle_in_frameset(GumboParser* parser, GumboToken* token) {
|
|
3505
3509
|
return handle_in_head(parser, token);
|
3506
3510
|
} else if (token->type == GUMBO_TOKEN_EOF) {
|
3507
3511
|
if (!node_tag_is(get_current_node(parser), GUMBO_TAG_HTML)) {
|
3508
|
-
|
3512
|
+
parser_add_parse_error(parser, token);
|
3509
3513
|
return false;
|
3510
3514
|
}
|
3511
3515
|
return true;
|
3512
3516
|
} else {
|
3513
|
-
|
3517
|
+
parser_add_parse_error(parser, token);
|
3514
3518
|
ignore_token(parser);
|
3515
3519
|
return false;
|
3516
3520
|
}
|
@@ -3525,7 +3529,7 @@ static bool handle_after_frameset(GumboParser* parser, GumboToken* token) {
|
|
3525
3529
|
append_comment_node(parser, get_current_node(parser), token);
|
3526
3530
|
return true;
|
3527
3531
|
} else if (token->type == GUMBO_TOKEN_DOCTYPE) {
|
3528
|
-
|
3532
|
+
parser_add_parse_error(parser, token);
|
3529
3533
|
ignore_token(parser);
|
3530
3534
|
return false;
|
3531
3535
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
|
@@ -3542,7 +3546,7 @@ static bool handle_after_frameset(GumboParser* parser, GumboToken* token) {
|
|
3542
3546
|
} else if (token->type == GUMBO_TOKEN_EOF) {
|
3543
3547
|
return true;
|
3544
3548
|
} else {
|
3545
|
-
|
3549
|
+
parser_add_parse_error(parser, token);
|
3546
3550
|
ignore_token(parser);
|
3547
3551
|
return false;
|
3548
3552
|
}
|
@@ -3560,7 +3564,7 @@ static bool handle_after_after_body(GumboParser* parser, GumboToken* token) {
|
|
3560
3564
|
} else if (token->type == GUMBO_TOKEN_EOF) {
|
3561
3565
|
return true;
|
3562
3566
|
} else {
|
3563
|
-
|
3567
|
+
parser_add_parse_error(parser, token);
|
3564
3568
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_BODY);
|
3565
3569
|
parser->_parser_state->_reprocess_current_token = true;
|
3566
3570
|
return false;
|
@@ -3582,7 +3586,7 @@ static bool handle_after_after_frameset(
|
|
3582
3586
|
} else if (tag_is(token, kStartTag, GUMBO_TAG_NOFRAMES)) {
|
3583
3587
|
return handle_in_head(parser, token);
|
3584
3588
|
} else {
|
3585
|
-
|
3589
|
+
parser_add_parse_error(parser, token);
|
3586
3590
|
ignore_token(parser);
|
3587
3591
|
return false;
|
3588
3592
|
}
|
@@ -3626,7 +3630,7 @@ static bool handle_html_content(GumboParser* parser, GumboToken* token) {
|
|
3626
3630
|
static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
|
3627
3631
|
switch (token->type) {
|
3628
3632
|
case GUMBO_TOKEN_NULL:
|
3629
|
-
|
3633
|
+
parser_add_parse_error(parser, token);
|
3630
3634
|
token->type = GUMBO_TOKEN_CHARACTER;
|
3631
3635
|
token->v.character = kUtf8ReplacementChar;
|
3632
3636
|
insert_text_token(parser, token);
|
@@ -3642,7 +3646,7 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
|
|
3642
3646
|
append_comment_node(parser, get_current_node(parser), token);
|
3643
3647
|
return true;
|
3644
3648
|
case GUMBO_TOKEN_DOCTYPE:
|
3645
|
-
|
3649
|
+
parser_add_parse_error(parser, token);
|
3646
3650
|
ignore_token(parser);
|
3647
3651
|
return false;
|
3648
3652
|
default:
|
@@ -3667,7 +3671,7 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
|
|
3667
3671
|
token_has_attribute(token, "color") ||
|
3668
3672
|
token_has_attribute(token, "face") ||
|
3669
3673
|
token_has_attribute(token, "size")))) {
|
3670
|
-
|
3674
|
+
parser_add_parse_error(parser, token);
|
3671
3675
|
do {
|
3672
3676
|
pop_current_node(parser);
|
3673
3677
|
} while(!(is_mathml_integration_point(get_current_node(parser)) ||
|
@@ -3707,7 +3711,7 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
|
|
3707
3711
|
|
3708
3712
|
bool is_success = true;
|
3709
3713
|
if (!gumbo_string_equals_ignore_case(&node_tagname, &token_tagname)) {
|
3710
|
-
|
3714
|
+
parser_add_parse_error(parser, token);
|
3711
3715
|
is_success = false;
|
3712
3716
|
}
|
3713
3717
|
int i = parser->_parser_state->_open_elements.length;
|
@@ -3864,7 +3868,7 @@ GumboOutput* gumbo_parse_with_options(
|
|
3864
3868
|
token.v.start_tag.attributes.data == NULL);
|
3865
3869
|
|
3866
3870
|
if (!state->_self_closing_flag_acknowledged) {
|
3867
|
-
GumboError* error =
|
3871
|
+
GumboError* error = parser_add_parse_error(&parser, &token);
|
3868
3872
|
if (error) {
|
3869
3873
|
error->type = GUMBO_ERR_UNACKNOWLEDGED_SELF_CLOSING_TAG;
|
3870
3874
|
}
|