nokogumbo 2.0.0 → 2.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +20 -4
- data/ext/nokogumbo/extconf.rb +50 -27
- data/ext/nokogumbo/nokogumbo.c +63 -14
- data/gumbo-parser/src/error.c +17 -8
- data/gumbo-parser/src/gumbo.h +27 -0
- data/gumbo-parser/src/parser.c +476 -480
- data/gumbo-parser/src/tokenizer.c +24 -27
- data/gumbo-parser/src/tokenizer.h +2 -13
- data/gumbo-parser/src/utf8.c +5 -0
- data/gumbo-parser/src/utf8.h +1 -0
- data/lib/nokogumbo.rb +22 -9
- data/lib/nokogumbo/html5.rb +15 -14
- data/lib/nokogumbo/html5/document.rb +7 -2
- data/lib/nokogumbo/html5/document_fragment.rb +2 -1
- data/lib/nokogumbo/version.rb +1 -1
- metadata +3 -4
data/gumbo-parser/src/parser.c
CHANGED
@@ -48,6 +48,7 @@ typedef uint8_t TagSet[GUMBO_TAG_LAST + 1];
|
|
48
48
|
const GumboOptions kGumboDefaultOptions = {
|
49
49
|
.tab_stop = 8,
|
50
50
|
.stop_on_first_error = false,
|
51
|
+
.max_attributes = 400,
|
51
52
|
.max_tree_depth = 400,
|
52
53
|
.max_errors = -1,
|
53
54
|
.fragment_context = NULL,
|
@@ -336,6 +337,7 @@ static void output_init(GumboParser* parser) {
|
|
336
337
|
GumboOutput* output = gumbo_alloc(sizeof(GumboOutput));
|
337
338
|
output->root = NULL;
|
338
339
|
output->document = new_document_node();
|
340
|
+
output->document_error = false;
|
339
341
|
output->status = GUMBO_STATUS_OK;
|
340
342
|
parser->_output = output;
|
341
343
|
gumbo_init_errors(parser);
|
@@ -608,6 +610,14 @@ static bool node_qualified_tagname_is (
|
|
608
610
|
return !gumbo_ascii_strcasecmp(element_name, name);
|
609
611
|
}
|
610
612
|
|
613
|
+
static bool node_html_tagname_is (
|
614
|
+
const GumboNode* node,
|
615
|
+
GumboTag tag,
|
616
|
+
const char *name
|
617
|
+
) {
|
618
|
+
return node_qualified_tagname_is(node, GUMBO_NAMESPACE_HTML, tag, name);
|
619
|
+
}
|
620
|
+
|
611
621
|
static bool node_tagname_is (
|
612
622
|
const GumboNode* node,
|
613
623
|
GumboTag tag,
|
@@ -633,7 +643,6 @@ static bool node_qualified_tag_is (
|
|
633
643
|
|
634
644
|
// Like node_tag_in, but for the single-tag case in the HTML namespace
|
635
645
|
static bool node_html_tag_is(const GumboNode* node, GumboTag tag) {
|
636
|
-
assert(tag != GUMBO_TAG_UNKNOWN);
|
637
646
|
return node_qualified_tag_is(node, GUMBO_NAMESPACE_HTML, tag);
|
638
647
|
}
|
639
648
|
|
@@ -1675,14 +1684,18 @@ static bool has_an_element_in_select_scope(const GumboParser* parser, GumboTag t
|
|
1675
1684
|
// https://html.spec.whatwg.org/multipage/parsing.html#generate-implied-end-tags
|
1676
1685
|
// "exception" is the "element to exclude from the process" listed in the spec.
|
1677
1686
|
// Pass GUMBO_TAG_LAST to not exclude any of them.
|
1678
|
-
static void generate_implied_end_tags(
|
1687
|
+
static void generate_implied_end_tags (
|
1688
|
+
GumboParser* parser,
|
1689
|
+
GumboTag exception,
|
1690
|
+
const char* exception_name
|
1691
|
+
) {
|
1679
1692
|
static const TagSet tags = {
|
1680
1693
|
TAG(DD), TAG(DT), TAG(LI), TAG(OPTGROUP), TAG(OPTION),
|
1681
1694
|
TAG(P), TAG(RB), TAG(RP), TAG(RT), TAG(RTC)
|
1682
1695
|
};
|
1683
1696
|
while (
|
1684
1697
|
node_tag_in_set(get_current_node(parser), &tags)
|
1685
|
-
&& !
|
1698
|
+
&& !node_html_tagname_is(get_current_node(parser), exception, exception_name)
|
1686
1699
|
) {
|
1687
1700
|
pop_current_node(parser);
|
1688
1701
|
}
|
@@ -1741,30 +1754,26 @@ static bool close_table(GumboParser* parser) {
|
|
1741
1754
|
|
1742
1755
|
// This factors out the clauses relating to "act as if an end tag token with tag
|
1743
1756
|
// name `cell_tag` had been seen".
|
1744
|
-
static
|
1757
|
+
static void close_table_cell (
|
1745
1758
|
GumboParser* parser,
|
1746
1759
|
const GumboToken* token,
|
1747
1760
|
GumboTag cell_tag
|
1748
1761
|
) {
|
1749
|
-
|
1750
|
-
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
1762
|
+
generate_implied_end_tags(parser, GUMBO_TAG_LAST, NULL);
|
1751
1763
|
const GumboNode* node = get_current_node(parser);
|
1752
|
-
if (!node_html_tag_is(node, cell_tag))
|
1764
|
+
if (!node_html_tag_is(node, cell_tag))
|
1753
1765
|
parser_add_parse_error(parser, token);
|
1754
|
-
result = false;
|
1755
|
-
}
|
1756
1766
|
do {
|
1757
1767
|
node = pop_current_node(parser);
|
1758
1768
|
} while (!node_html_tag_is(node, cell_tag));
|
1759
1769
|
|
1760
1770
|
clear_active_formatting_elements(parser);
|
1761
1771
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
|
1762
|
-
return result;
|
1763
1772
|
}
|
1764
1773
|
|
1765
1774
|
// https://html.spec.whatwg.org/multipage/parsing.html#close-the-cell
|
1766
1775
|
// This holds the logic to determine whether we should close a <td> or a <th>.
|
1767
|
-
static
|
1776
|
+
static void close_current_cell(GumboParser* parser, const GumboToken* token) {
|
1768
1777
|
GumboTag cell_tag;
|
1769
1778
|
if (has_an_element_in_table_scope(parser, GUMBO_TAG_TD)) {
|
1770
1779
|
assert(!has_an_element_in_table_scope(parser, GUMBO_TAG_TH));
|
@@ -1773,7 +1782,7 @@ static bool close_current_cell(GumboParser* parser, const GumboToken* token) {
|
|
1773
1782
|
assert(has_an_element_in_table_scope(parser, GUMBO_TAG_TH));
|
1774
1783
|
cell_tag = GUMBO_TAG_TH;
|
1775
1784
|
}
|
1776
|
-
|
1785
|
+
close_table_cell(parser, token, cell_tag);
|
1777
1786
|
}
|
1778
1787
|
|
1779
1788
|
// This factors out the "act as if an end tag of tag name 'select' had been
|
@@ -1830,14 +1839,14 @@ static bool is_special_node(const GumboNode* node) {
|
|
1830
1839
|
// specified qualified name. If the elements closed are in the set handled by
|
1831
1840
|
// generate_implied_end_tags, this is normal operation and this function returns
|
1832
1841
|
// true. Otherwise, a parse error is recorded and this function returns false.
|
1833
|
-
static
|
1842
|
+
static void implicitly_close_tags (
|
1834
1843
|
GumboParser* parser,
|
1835
1844
|
GumboToken* token,
|
1836
1845
|
GumboNamespaceEnum target_ns,
|
1837
1846
|
GumboTag target
|
1838
1847
|
) {
|
1839
|
-
|
1840
|
-
generate_implied_end_tags(parser, target);
|
1848
|
+
assert(target != GUMBO_TAG_UNKNOWN);
|
1849
|
+
generate_implied_end_tags(parser, target, NULL);
|
1841
1850
|
if (!node_qualified_tag_is(get_current_node(parser), target_ns, target)) {
|
1842
1851
|
parser_add_parse_error(parser, token);
|
1843
1852
|
while (
|
@@ -1845,35 +1854,32 @@ static bool implicitly_close_tags (
|
|
1845
1854
|
) {
|
1846
1855
|
pop_current_node(parser);
|
1847
1856
|
}
|
1848
|
-
result = false;
|
1849
1857
|
}
|
1850
1858
|
assert(node_qualified_tag_is(get_current_node(parser), target_ns, target));
|
1851
1859
|
pop_current_node(parser);
|
1852
|
-
return result;
|
1853
1860
|
}
|
1854
1861
|
|
1855
1862
|
// If the stack of open elements has a <p> tag in button scope, this acts as if
|
1856
1863
|
// a </p> tag was encountered, implicitly closing tags. Returns false if a
|
1857
1864
|
// parse error occurs. This is a convenience function because this particular
|
1858
1865
|
// clause appears several times in the spec.
|
1859
|
-
static
|
1866
|
+
static void maybe_implicitly_close_p_tag (
|
1860
1867
|
GumboParser* parser,
|
1861
1868
|
GumboToken* token
|
1862
1869
|
) {
|
1863
1870
|
if (has_an_element_in_button_scope(parser, GUMBO_TAG_P)) {
|
1864
|
-
|
1871
|
+
implicitly_close_tags (
|
1865
1872
|
parser,
|
1866
1873
|
token,
|
1867
1874
|
GUMBO_NAMESPACE_HTML,
|
1868
1875
|
GUMBO_TAG_P
|
1869
1876
|
);
|
1870
1877
|
}
|
1871
|
-
return true;
|
1872
1878
|
}
|
1873
1879
|
|
1874
1880
|
// Convenience function to encapsulate the logic for closing <li> or <dd>/<dt>
|
1875
1881
|
// tags. Pass true to is_li for handling <li> tags, false for <dd> and <dt>.
|
1876
|
-
static
|
1882
|
+
static void maybe_implicitly_close_list_tag (
|
1877
1883
|
GumboParser* parser,
|
1878
1884
|
GumboToken* token,
|
1879
1885
|
bool is_li
|
@@ -1887,21 +1893,22 @@ static bool maybe_implicitly_close_list_tag (
|
|
1887
1893
|
: node_tag_in_set(node, &dd_dt_tags)
|
1888
1894
|
;
|
1889
1895
|
if (is_list_tag) {
|
1890
|
-
|
1896
|
+
implicitly_close_tags (
|
1891
1897
|
parser,
|
1892
1898
|
token,
|
1893
1899
|
node->v.element.tag_namespace,
|
1894
1900
|
node->v.element.tag
|
1895
1901
|
);
|
1902
|
+
return;
|
1896
1903
|
}
|
1904
|
+
|
1897
1905
|
if (
|
1898
1906
|
is_special_node(node)
|
1899
1907
|
&& !node_tag_in_set(node, &(const TagSet){TAG(ADDRESS), TAG(DIV), TAG(P)})
|
1900
1908
|
) {
|
1901
|
-
return
|
1909
|
+
return;
|
1902
1910
|
}
|
1903
1911
|
}
|
1904
|
-
return true;
|
1905
1912
|
}
|
1906
1913
|
|
1907
1914
|
static void merge_attributes (
|
@@ -2020,7 +2027,7 @@ static void adjust_mathml_attributes(GumboToken* token) {
|
|
2020
2027
|
attr->name = gumbo_strdup("definitionURL");
|
2021
2028
|
}
|
2022
2029
|
|
2023
|
-
static
|
2030
|
+
static void maybe_add_doctype_error (
|
2024
2031
|
GumboParser* parser,
|
2025
2032
|
const GumboToken* token
|
2026
2033
|
) {
|
@@ -2032,9 +2039,7 @@ static bool maybe_add_doctype_error (
|
|
2032
2039
|
&& strcmp(doctype->system_identifier, "about:legacy-compat"))
|
2033
2040
|
) {
|
2034
2041
|
parser_add_parse_error(parser, token);
|
2035
|
-
return false;
|
2036
2042
|
}
|
2037
|
-
return true;
|
2038
2043
|
}
|
2039
2044
|
|
2040
2045
|
static void remove_from_parent(GumboNode* node) {
|
@@ -2059,30 +2064,103 @@ static void remove_from_parent(GumboNode* node) {
|
|
2059
2064
|
}
|
2060
2065
|
}
|
2061
2066
|
|
2067
|
+
// This is here to clean up memory when the spec says "Ignore current token."
|
2068
|
+
static void ignore_token(GumboParser* parser) {
|
2069
|
+
GumboToken* token = parser->_parser_state->_current_token;
|
2070
|
+
// Ownership of the token's internal buffers are normally transferred to the
|
2071
|
+
// element, but if no element is emitted (as happens in non-verbatim-mode
|
2072
|
+
// when a token is ignored), we need to free it here to prevent a memory
|
2073
|
+
// leak.
|
2074
|
+
gumbo_token_destroy(token);
|
2075
|
+
#ifndef NDEBUG
|
2076
|
+
if (token->type == GUMBO_TOKEN_START_TAG) {
|
2077
|
+
// Mark this sentinel so the assertion in the main loop knows it's been
|
2078
|
+
// destroyed.
|
2079
|
+
token->v.start_tag.attributes = kGumboEmptyVector;
|
2080
|
+
token->v.start_tag.name = NULL;
|
2081
|
+
}
|
2082
|
+
#endif
|
2083
|
+
}
|
2084
|
+
|
2085
|
+
// The token is usually an end tag; however, the adoption agency algorithm may
|
2086
|
+
// invoke this for an 'a' or 'nobr' start tag.
|
2087
|
+
// Returns false if there was an error.
|
2088
|
+
static void in_body_any_other_end_tag(GumboParser* parser, GumboToken* token)
|
2089
|
+
{
|
2090
|
+
GumboParserState* state = parser->_parser_state;
|
2091
|
+
GumboTag tag;
|
2092
|
+
const char* tagname;
|
2093
|
+
|
2094
|
+
if (token->type == GUMBO_TOKEN_END_TAG) {
|
2095
|
+
tag = token->v.end_tag.tag;
|
2096
|
+
tagname = token->v.end_tag.name;
|
2097
|
+
} else {
|
2098
|
+
assert(token->type == GUMBO_TOKEN_START_TAG);
|
2099
|
+
tag = token->v.start_tag.tag;
|
2100
|
+
tagname = token->v.start_tag.name;
|
2101
|
+
}
|
2102
|
+
|
2103
|
+
assert(state->_open_elements.length > 0);
|
2104
|
+
assert(node_html_tag_is(state->_open_elements.data[0], GUMBO_TAG_HTML));
|
2105
|
+
// Walk up the stack of open elements until we find one that either:
|
2106
|
+
// a) Matches the tag name we saw
|
2107
|
+
// b) Is in the "special" category.
|
2108
|
+
// If we see a), implicitly close everything up to and including it. If we
|
2109
|
+
// see b), then record a parse error, don't close anything (except the
|
2110
|
+
// implied end tags) and ignore the end tag token.
|
2111
|
+
for (int i = state->_open_elements.length; --i >= 0;) {
|
2112
|
+
const GumboNode* node = state->_open_elements.data[i];
|
2113
|
+
if (node_qualified_tagname_is(node, GUMBO_NAMESPACE_HTML, tag, tagname)) {
|
2114
|
+
generate_implied_end_tags(parser, tag, tagname);
|
2115
|
+
// <!DOCTYPE><body><sarcasm><foo></sarcasm> is an example of an error.
|
2116
|
+
// foo is the "current node" but sarcasm is node.
|
2117
|
+
// XXX: Write a test for this.
|
2118
|
+
if (node != get_current_node(parser)) {
|
2119
|
+
parser_add_parse_error(parser, token);
|
2120
|
+
}
|
2121
|
+
while (node != pop_current_node(parser))
|
2122
|
+
; // Pop everything.
|
2123
|
+
return;
|
2124
|
+
} else if (is_special_node(node)) {
|
2125
|
+
parser_add_parse_error(parser, token);
|
2126
|
+
ignore_token(parser);
|
2127
|
+
return;
|
2128
|
+
}
|
2129
|
+
}
|
2130
|
+
// <html> is in the special category, so we should never get here.
|
2131
|
+
assert(0 && "unreachable");
|
2132
|
+
}
|
2133
|
+
|
2062
2134
|
// https://html.spec.whatwg.org/multipage/parsing.html#an-introduction-to-error-handling-and-strange-cases-in-the-parser
|
2063
2135
|
// Also described in the "in body" handling for end formatting tags.
|
2064
|
-
// Returns
|
2065
|
-
|
2066
|
-
|
2067
|
-
GumboParser* parser,
|
2068
|
-
GumboToken* token,
|
2069
|
-
GumboTag subject
|
2070
|
-
) {
|
2136
|
+
// Returns false if there was an error.
|
2137
|
+
static void adoption_agency_algorithm(GumboParser* parser, GumboToken* token)
|
2138
|
+
{
|
2071
2139
|
GumboParserState* state = parser->_parser_state;
|
2072
2140
|
gumbo_debug("Entering adoption agency algorithm.\n");
|
2141
|
+
// Step 1.
|
2142
|
+
GumboTag subject;
|
2143
|
+
if (token->type == GUMBO_TOKEN_START_TAG) {
|
2144
|
+
subject = token->v.start_tag.tag;
|
2145
|
+
} else {
|
2146
|
+
assert(token->type == GUMBO_TOKEN_END_TAG);
|
2147
|
+
subject = token->v.end_tag.tag;
|
2148
|
+
}
|
2149
|
+
assert(subject != GUMBO_TAG_UNKNOWN);
|
2150
|
+
|
2073
2151
|
// Step 2.
|
2074
2152
|
GumboNode* current_node = get_current_node(parser);
|
2075
2153
|
if (
|
2076
|
-
current_node
|
2077
|
-
&& current_node->v.element.tag == subject
|
2154
|
+
node_html_tag_is(current_node, subject)
|
2078
2155
|
&& -1 == gumbo_vector_index_of (
|
2079
2156
|
&state->_active_formatting_elements,
|
2080
2157
|
current_node
|
2081
2158
|
)
|
2082
2159
|
) {
|
2083
2160
|
pop_current_node(parser);
|
2084
|
-
return
|
2161
|
+
return;
|
2085
2162
|
}
|
2163
|
+
|
2086
2164
|
// Steps 3-5 & 21:
|
2087
2165
|
for (unsigned int i = 0; i < 8; ++i) {
|
2088
2166
|
// Step 6.
|
@@ -2093,8 +2171,8 @@ static bool adoption_agency_algorithm (
|
|
2093
2171
|
if (current_node == &kActiveFormattingScopeMarker) {
|
2094
2172
|
gumbo_debug("Broke on scope marker; aborting.\n");
|
2095
2173
|
// Last scope marker; abort the algorithm and handle according to "any
|
2096
|
-
// other end tag.
|
2097
|
-
|
2174
|
+
// other end tag" (below).
|
2175
|
+
break;
|
2098
2176
|
}
|
2099
2177
|
if (node_html_tag_is(current_node, subject)) {
|
2100
2178
|
// Found it.
|
@@ -2116,7 +2194,8 @@ static bool adoption_agency_algorithm (
|
|
2116
2194
|
// "any other end tag" clause (which may potentially add a parse error,
|
2117
2195
|
// but not always).
|
2118
2196
|
gumbo_debug("No active formatting elements; aborting.\n");
|
2119
|
-
|
2197
|
+
in_body_any_other_end_tag(parser, token);
|
2198
|
+
return;
|
2120
2199
|
}
|
2121
2200
|
|
2122
2201
|
// Step 7
|
@@ -2127,20 +2206,19 @@ static bool adoption_agency_algorithm (
|
|
2127
2206
|
formatting_node,
|
2128
2207
|
&state->_active_formatting_elements
|
2129
2208
|
);
|
2130
|
-
return
|
2209
|
+
return;
|
2131
2210
|
}
|
2132
2211
|
|
2133
2212
|
// Step 8
|
2134
2213
|
if (!has_an_element_in_scope(parser, formatting_node->v.element.tag)) {
|
2135
2214
|
parser_add_parse_error(parser, token);
|
2136
2215
|
gumbo_debug("Element not in scope.\n");
|
2137
|
-
return
|
2216
|
+
return;
|
2138
2217
|
}
|
2139
2218
|
|
2140
2219
|
// Step 9
|
2141
|
-
if (formatting_node != get_current_node(parser))
|
2220
|
+
if (formatting_node != get_current_node(parser))
|
2142
2221
|
parser_add_parse_error(parser, token); // But continue onwards.
|
2143
|
-
}
|
2144
2222
|
assert(formatting_node);
|
2145
2223
|
assert(!node_html_tag_is(formatting_node, GUMBO_TAG_HTML));
|
2146
2224
|
assert(!node_html_tag_is(formatting_node, GUMBO_TAG_BODY));
|
@@ -2167,7 +2245,7 @@ static bool adoption_agency_algorithm (
|
|
2167
2245
|
formatting_node,
|
2168
2246
|
&state->_active_formatting_elements
|
2169
2247
|
);
|
2170
|
-
return
|
2248
|
+
return;
|
2171
2249
|
}
|
2172
2250
|
assert(!node_html_tag_is(furthest_block, GUMBO_TAG_HTML));
|
2173
2251
|
|
@@ -2348,25 +2426,6 @@ static bool adoption_agency_algorithm (
|
|
2348
2426
|
&state->_open_elements
|
2349
2427
|
);
|
2350
2428
|
} // Step 21.
|
2351
|
-
return true;
|
2352
|
-
}
|
2353
|
-
|
2354
|
-
// This is here to clean up memory when the spec says "Ignore current token."
|
2355
|
-
static void ignore_token(GumboParser* parser) {
|
2356
|
-
GumboToken* token = parser->_parser_state->_current_token;
|
2357
|
-
// Ownership of the token's internal buffers are normally transferred to the
|
2358
|
-
// element, but if no element is emitted (as happens in non-verbatim-mode
|
2359
|
-
// when a token is ignored), we need to free it here to prevent a memory
|
2360
|
-
// leak.
|
2361
|
-
gumbo_token_destroy(token);
|
2362
|
-
#ifndef NDEBUG
|
2363
|
-
if (token->type == GUMBO_TOKEN_START_TAG) {
|
2364
|
-
// Mark this sentinel so the assertion in the main loop knows it's been
|
2365
|
-
// destroyed.
|
2366
|
-
token->v.start_tag.attributes = kGumboEmptyVector;
|
2367
|
-
token->v.start_tag.name = NULL;
|
2368
|
-
}
|
2369
|
-
#endif
|
2370
2429
|
}
|
2371
2430
|
|
2372
2431
|
// https://html.spec.whatwg.org/multipage/parsing.html#the-end
|
@@ -2391,15 +2450,15 @@ static void finish_parsing(GumboParser* parser) {
|
|
2391
2450
|
; // Pop them all.
|
2392
2451
|
}
|
2393
2452
|
|
2394
|
-
static
|
2453
|
+
static void handle_initial(GumboParser* parser, GumboToken* token) {
|
2395
2454
|
GumboDocument* document = &get_document_node(parser)->v.document;
|
2396
2455
|
if (token->type == GUMBO_TOKEN_WHITESPACE) {
|
2397
2456
|
ignore_token(parser);
|
2398
|
-
return
|
2457
|
+
return;
|
2399
2458
|
}
|
2400
2459
|
if (token->type == GUMBO_TOKEN_COMMENT) {
|
2401
2460
|
append_comment_node(parser, get_document_node(parser), token);
|
2402
|
-
return
|
2461
|
+
return;
|
2403
2462
|
}
|
2404
2463
|
if (token->type == GUMBO_TOKEN_DOCTYPE) {
|
2405
2464
|
document->has_doctype = true;
|
@@ -2408,35 +2467,35 @@ static bool handle_initial(GumboParser* parser, GumboToken* token) {
|
|
2408
2467
|
document->system_identifier = token->v.doc_type.system_identifier;
|
2409
2468
|
document->doc_type_quirks_mode = compute_quirks_mode(&token->v.doc_type);
|
2410
2469
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_BEFORE_HTML);
|
2411
|
-
|
2470
|
+
maybe_add_doctype_error(parser, token);
|
2471
|
+
return;
|
2412
2472
|
}
|
2413
2473
|
parser_add_parse_error(parser, token);
|
2414
2474
|
document->doc_type_quirks_mode = GUMBO_DOCTYPE_QUIRKS;
|
2415
2475
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_BEFORE_HTML);
|
2416
2476
|
parser->_parser_state->_reprocess_current_token = true;
|
2417
|
-
return true;
|
2418
2477
|
}
|
2419
2478
|
|
2420
2479
|
// https://html.spec.whatwg.org/multipage/parsing.html#the-before-html-insertion-mode
|
2421
|
-
static
|
2480
|
+
static void handle_before_html(GumboParser* parser, GumboToken* token) {
|
2422
2481
|
if (token->type == GUMBO_TOKEN_DOCTYPE) {
|
2423
2482
|
parser_add_parse_error(parser, token);
|
2424
2483
|
ignore_token(parser);
|
2425
|
-
return
|
2484
|
+
return;
|
2426
2485
|
}
|
2427
2486
|
if (token->type == GUMBO_TOKEN_COMMENT) {
|
2428
2487
|
append_comment_node(parser, get_document_node(parser), token);
|
2429
|
-
return
|
2488
|
+
return;
|
2430
2489
|
}
|
2431
2490
|
if (token->type == GUMBO_TOKEN_WHITESPACE) {
|
2432
2491
|
ignore_token(parser);
|
2433
|
-
return
|
2492
|
+
return;
|
2434
2493
|
}
|
2435
2494
|
if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
|
2436
2495
|
GumboNode* html_node = insert_element_from_token(parser, token);
|
2437
2496
|
parser->_output->root = html_node;
|
2438
2497
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_BEFORE_HEAD);
|
2439
|
-
return
|
2498
|
+
return;
|
2440
2499
|
}
|
2441
2500
|
if (
|
2442
2501
|
token->type == GUMBO_TOKEN_END_TAG
|
@@ -2444,7 +2503,7 @@ static bool handle_before_html(GumboParser* parser, GumboToken* token) {
|
|
2444
2503
|
) {
|
2445
2504
|
parser_add_parse_error(parser, token);
|
2446
2505
|
ignore_token(parser);
|
2447
|
-
return
|
2506
|
+
return;
|
2448
2507
|
}
|
2449
2508
|
GumboNode* html_node = insert_element_of_tag_type (
|
2450
2509
|
parser,
|
@@ -2455,37 +2514,37 @@ static bool handle_before_html(GumboParser* parser, GumboToken* token) {
|
|
2455
2514
|
parser->_output->root = html_node;
|
2456
2515
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_BEFORE_HEAD);
|
2457
2516
|
parser->_parser_state->_reprocess_current_token = true;
|
2458
|
-
return true;
|
2459
2517
|
}
|
2460
2518
|
|
2461
2519
|
// Forward declarations because of mutual dependencies.
|
2462
|
-
static
|
2463
|
-
static
|
2464
|
-
static
|
2520
|
+
static void handle_token(GumboParser* parser, GumboToken* token);
|
2521
|
+
static void handle_in_body(GumboParser* parser, GumboToken* token);
|
2522
|
+
static void handle_in_template(GumboParser* parser, GumboToken* token);
|
2465
2523
|
|
2466
2524
|
// https://html.spec.whatwg.org/multipage/parsing.html#the-before-head-insertion-mode
|
2467
|
-
static
|
2525
|
+
static void handle_before_head(GumboParser* parser, GumboToken* token) {
|
2468
2526
|
if (token->type == GUMBO_TOKEN_WHITESPACE) {
|
2469
2527
|
ignore_token(parser);
|
2470
|
-
return
|
2528
|
+
return;
|
2471
2529
|
}
|
2472
2530
|
if (token->type == GUMBO_TOKEN_COMMENT) {
|
2473
2531
|
append_comment_node(parser, get_current_node(parser), token);
|
2474
|
-
return
|
2532
|
+
return;
|
2475
2533
|
}
|
2476
2534
|
if (token->type == GUMBO_TOKEN_DOCTYPE) {
|
2477
2535
|
parser_add_parse_error(parser, token);
|
2478
2536
|
ignore_token(parser);
|
2479
|
-
return
|
2537
|
+
return;
|
2480
2538
|
}
|
2481
2539
|
if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
|
2482
|
-
|
2540
|
+
handle_in_body(parser, token);
|
2541
|
+
return;
|
2483
2542
|
}
|
2484
2543
|
if (tag_is(token, kStartTag, GUMBO_TAG_HEAD)) {
|
2485
2544
|
GumboNode* node = insert_element_from_token(parser, token);
|
2486
2545
|
parser->_parser_state->_head_element = node;
|
2487
2546
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_HEAD);
|
2488
|
-
return
|
2547
|
+
return;
|
2489
2548
|
}
|
2490
2549
|
if (
|
2491
2550
|
token->type == GUMBO_TOKEN_END_TAG
|
@@ -2493,7 +2552,7 @@ static bool handle_before_head(GumboParser* parser, GumboToken* token) {
|
|
2493
2552
|
) {
|
2494
2553
|
parser_add_parse_error(parser, token);
|
2495
2554
|
ignore_token(parser);
|
2496
|
-
return
|
2555
|
+
return;
|
2497
2556
|
}
|
2498
2557
|
GumboNode* node = insert_element_of_tag_type (
|
2499
2558
|
parser,
|
@@ -2503,23 +2562,22 @@ static bool handle_before_head(GumboParser* parser, GumboToken* token) {
|
|
2503
2562
|
parser->_parser_state->_head_element = node;
|
2504
2563
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_HEAD);
|
2505
2564
|
parser->_parser_state->_reprocess_current_token = true;
|
2506
|
-
return true;
|
2507
2565
|
}
|
2508
2566
|
|
2509
2567
|
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inhead
|
2510
|
-
static
|
2568
|
+
static void handle_in_head(GumboParser* parser, GumboToken* token) {
|
2511
2569
|
if (token->type == GUMBO_TOKEN_WHITESPACE) {
|
2512
2570
|
insert_text_token(parser, token);
|
2513
|
-
return
|
2571
|
+
return;
|
2514
2572
|
}
|
2515
2573
|
if (token->type == GUMBO_TOKEN_COMMENT) {
|
2516
2574
|
append_comment_node(parser, get_current_node(parser), token);
|
2517
|
-
return
|
2575
|
+
return;
|
2518
2576
|
}
|
2519
2577
|
if (token->type == GUMBO_TOKEN_DOCTYPE) {
|
2520
2578
|
parser_add_parse_error(parser, token);
|
2521
2579
|
ignore_token(parser);
|
2522
|
-
return
|
2580
|
+
return;
|
2523
2581
|
}
|
2524
2582
|
if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
|
2525
2583
|
return handle_in_body(parser, token);
|
@@ -2532,7 +2590,7 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
|
|
2532
2590
|
insert_element_from_token(parser, token);
|
2533
2591
|
pop_current_node(parser);
|
2534
2592
|
acknowledge_self_closing_tag(parser);
|
2535
|
-
return
|
2593
|
+
return;
|
2536
2594
|
}
|
2537
2595
|
if (tag_is(token, kStartTag, GUMBO_TAG_META)) {
|
2538
2596
|
insert_element_from_token(parser, token);
|
@@ -2542,33 +2600,33 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
|
|
2542
2600
|
// spec doesn't apply. If clients want to handle meta-tag re-encoding, they
|
2543
2601
|
// should specifically look for that string in the document and re-encode it
|
2544
2602
|
// before passing to Gumbo.
|
2545
|
-
return
|
2603
|
+
return;
|
2546
2604
|
}
|
2547
2605
|
if (tag_is(token, kStartTag, GUMBO_TAG_TITLE)) {
|
2548
2606
|
run_generic_parsing_algorithm(parser, token, GUMBO_LEX_RCDATA);
|
2549
|
-
return
|
2607
|
+
return;
|
2550
2608
|
}
|
2551
2609
|
if (
|
2552
2610
|
tag_in(token, kStartTag, &(const TagSet){TAG(NOFRAMES), TAG(STYLE)})
|
2553
2611
|
) {
|
2554
2612
|
run_generic_parsing_algorithm(parser, token, GUMBO_LEX_RAWTEXT);
|
2555
|
-
return
|
2613
|
+
return;
|
2556
2614
|
}
|
2557
2615
|
if (tag_is(token, kStartTag, GUMBO_TAG_NOSCRIPT)) {
|
2558
2616
|
insert_element_from_token(parser, token);
|
2559
2617
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_HEAD_NOSCRIPT);
|
2560
|
-
return
|
2618
|
+
return;
|
2561
2619
|
}
|
2562
2620
|
if (tag_is(token, kStartTag, GUMBO_TAG_SCRIPT)) {
|
2563
2621
|
run_generic_parsing_algorithm(parser, token, GUMBO_LEX_SCRIPT_DATA);
|
2564
|
-
return
|
2622
|
+
return;
|
2565
2623
|
}
|
2566
2624
|
if (tag_is(token, kEndTag, GUMBO_TAG_HEAD)) {
|
2567
2625
|
GumboNode* head = pop_current_node(parser);
|
2568
2626
|
UNUSED_IF_NDEBUG(head);
|
2569
2627
|
assert(node_html_tag_is(head, GUMBO_TAG_HEAD));
|
2570
2628
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_HEAD);
|
2571
|
-
return
|
2629
|
+
return;
|
2572
2630
|
}
|
2573
2631
|
if (
|
2574
2632
|
tag_in(token, kEndTag, &(const TagSet){TAG(BODY), TAG(HTML), TAG(BR)})
|
@@ -2576,7 +2634,7 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
|
|
2576
2634
|
pop_current_node(parser);
|
2577
2635
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_HEAD);
|
2578
2636
|
parser->_parser_state->_reprocess_current_token = true;
|
2579
|
-
return
|
2637
|
+
return;
|
2580
2638
|
}
|
2581
2639
|
if (tag_is(token, kStartTag, GUMBO_TAG_TEMPLATE)) {
|
2582
2640
|
insert_element_from_token(parser, token);
|
@@ -2584,26 +2642,23 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
|
|
2584
2642
|
set_frameset_not_ok(parser);
|
2585
2643
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TEMPLATE);
|
2586
2644
|
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TEMPLATE);
|
2587
|
-
return
|
2645
|
+
return;
|
2588
2646
|
}
|
2589
2647
|
if (tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
|
2590
2648
|
if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2591
2649
|
parser_add_parse_error(parser, token);
|
2592
2650
|
ignore_token(parser);
|
2593
|
-
return
|
2651
|
+
return;
|
2594
2652
|
}
|
2595
2653
|
generate_all_implied_end_tags_thoroughly(parser);
|
2596
|
-
|
2597
|
-
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_TEMPLATE)) {
|
2654
|
+
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_TEMPLATE))
|
2598
2655
|
parser_add_parse_error(parser, token);
|
2599
|
-
success = false;
|
2600
|
-
}
|
2601
2656
|
while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_TEMPLATE))
|
2602
2657
|
;
|
2603
2658
|
clear_active_formatting_elements(parser);
|
2604
2659
|
pop_template_insertion_mode(parser);
|
2605
2660
|
reset_insertion_mode_appropriately(parser);
|
2606
|
-
return
|
2661
|
+
return;
|
2607
2662
|
}
|
2608
2663
|
if (
|
2609
2664
|
tag_is(token, kStartTag, GUMBO_TAG_HEAD)
|
@@ -2611,29 +2666,30 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
|
|
2611
2666
|
) {
|
2612
2667
|
parser_add_parse_error(parser, token);
|
2613
2668
|
ignore_token(parser);
|
2614
|
-
return
|
2669
|
+
return;
|
2615
2670
|
}
|
2616
2671
|
pop_current_node(parser);
|
2617
2672
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_HEAD);
|
2618
2673
|
parser->_parser_state->_reprocess_current_token = true;
|
2619
|
-
return
|
2674
|
+
return;
|
2620
2675
|
}
|
2621
2676
|
|
2622
2677
|
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inheadnoscript
|
2623
|
-
static
|
2678
|
+
static void handle_in_head_noscript(GumboParser* parser, GumboToken* token) {
|
2624
2679
|
if (token->type == GUMBO_TOKEN_DOCTYPE) {
|
2625
2680
|
parser_add_parse_error(parser, token);
|
2626
|
-
return
|
2681
|
+
return;
|
2627
2682
|
}
|
2628
2683
|
if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
|
2629
|
-
|
2684
|
+
handle_in_body(parser, token);
|
2685
|
+
return;
|
2630
2686
|
}
|
2631
2687
|
if (tag_is(token, kEndTag, GUMBO_TAG_NOSCRIPT)) {
|
2632
2688
|
const GumboNode* node = pop_current_node(parser);
|
2633
2689
|
assert(node_html_tag_is(node, GUMBO_TAG_NOSCRIPT));
|
2634
2690
|
UNUSED_IF_NDEBUG(node);
|
2635
2691
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_HEAD);
|
2636
|
-
return
|
2692
|
+
return;
|
2637
2693
|
}
|
2638
2694
|
if (
|
2639
2695
|
token->type == GUMBO_TOKEN_WHITESPACE
|
@@ -2643,7 +2699,8 @@ static bool handle_in_head_noscript(GumboParser* parser, GumboToken* token) {
|
|
2643
2699
|
TAG(META), TAG(NOFRAMES), TAG(STYLE)
|
2644
2700
|
})
|
2645
2701
|
) {
|
2646
|
-
|
2702
|
+
handle_in_head(parser, token);
|
2703
|
+
return;
|
2647
2704
|
}
|
2648
2705
|
if (
|
2649
2706
|
tag_in(token, kStartTag, &(const TagSet){TAG(HEAD), TAG(NOSCRIPT)})
|
@@ -2654,7 +2711,7 @@ static bool handle_in_head_noscript(GumboParser* parser, GumboToken* token) {
|
|
2654
2711
|
) {
|
2655
2712
|
parser_add_parse_error(parser, token);
|
2656
2713
|
ignore_token(parser);
|
2657
|
-
return
|
2714
|
+
return;
|
2658
2715
|
}
|
2659
2716
|
parser_add_parse_error(parser, token);
|
2660
2717
|
const GumboNode* node = pop_current_node(parser);
|
@@ -2662,38 +2719,38 @@ static bool handle_in_head_noscript(GumboParser* parser, GumboToken* token) {
|
|
2662
2719
|
UNUSED_IF_NDEBUG(node);
|
2663
2720
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_HEAD);
|
2664
2721
|
parser->_parser_state->_reprocess_current_token = true;
|
2665
|
-
return false;
|
2666
2722
|
}
|
2667
2723
|
|
2668
2724
|
// https://html.spec.whatwg.org/multipage/parsing.html#the-after-head-insertion-mode
|
2669
|
-
static
|
2725
|
+
static void handle_after_head(GumboParser* parser, GumboToken* token) {
|
2670
2726
|
GumboParserState* state = parser->_parser_state;
|
2671
2727
|
if (token->type == GUMBO_TOKEN_WHITESPACE) {
|
2672
2728
|
insert_text_token(parser, token);
|
2673
|
-
return
|
2729
|
+
return;
|
2674
2730
|
}
|
2675
2731
|
if (token->type == GUMBO_TOKEN_COMMENT) {
|
2676
2732
|
append_comment_node(parser, get_current_node(parser), token);
|
2677
|
-
return
|
2733
|
+
return;
|
2678
2734
|
}
|
2679
2735
|
if (token->type == GUMBO_TOKEN_DOCTYPE) {
|
2680
2736
|
parser_add_parse_error(parser, token);
|
2681
2737
|
ignore_token(parser);
|
2682
|
-
return
|
2738
|
+
return;
|
2683
2739
|
}
|
2684
2740
|
if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
|
2685
|
-
|
2741
|
+
handle_in_body(parser, token);
|
2742
|
+
return;
|
2686
2743
|
}
|
2687
2744
|
if (tag_is(token, kStartTag, GUMBO_TAG_BODY)) {
|
2688
2745
|
insert_element_from_token(parser, token);
|
2689
2746
|
set_frameset_not_ok(parser);
|
2690
2747
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_BODY);
|
2691
|
-
return
|
2748
|
+
return;
|
2692
2749
|
}
|
2693
2750
|
if (tag_is(token, kStartTag, GUMBO_TAG_FRAMESET)) {
|
2694
2751
|
insert_element_from_token(parser, token);
|
2695
2752
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_FRAMESET);
|
2696
|
-
return
|
2753
|
+
return;
|
2697
2754
|
}
|
2698
2755
|
if (
|
2699
2756
|
tag_in(token, kStartTag, &(const TagSet) {
|
@@ -2709,10 +2766,11 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
|
|
2709
2766
|
gumbo_vector_add(state->_head_element, &state->_open_elements);
|
2710
2767
|
handle_in_head(parser, token);
|
2711
2768
|
gumbo_vector_remove(state->_head_element, &state->_open_elements);
|
2712
|
-
return
|
2769
|
+
return;
|
2713
2770
|
}
|
2714
2771
|
if (tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
|
2715
|
-
|
2772
|
+
handle_in_head(parser, token);
|
2773
|
+
return;
|
2716
2774
|
}
|
2717
2775
|
if (
|
2718
2776
|
tag_is(token, kStartTag, GUMBO_TAG_HEAD)
|
@@ -2723,27 +2781,26 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
|
|
2723
2781
|
) {
|
2724
2782
|
parser_add_parse_error(parser, token);
|
2725
2783
|
ignore_token(parser);
|
2726
|
-
return
|
2784
|
+
return;
|
2727
2785
|
}
|
2728
2786
|
insert_element_of_tag_type(parser, GUMBO_TAG_BODY, GUMBO_INSERTION_IMPLIED);
|
2729
2787
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_BODY);
|
2730
2788
|
state->_reprocess_current_token = true;
|
2731
|
-
return true;
|
2732
2789
|
}
|
2733
2790
|
|
2734
2791
|
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inbody
|
2735
|
-
static
|
2792
|
+
static void handle_in_body(GumboParser* parser, GumboToken* token) {
|
2736
2793
|
GumboParserState* state = parser->_parser_state;
|
2737
2794
|
assert(state->_open_elements.length > 0);
|
2738
2795
|
if (token->type == GUMBO_TOKEN_NULL) {
|
2739
2796
|
parser_add_parse_error(parser, token);
|
2740
2797
|
ignore_token(parser);
|
2741
|
-
return
|
2798
|
+
return;
|
2742
2799
|
}
|
2743
2800
|
if (token->type == GUMBO_TOKEN_WHITESPACE) {
|
2744
2801
|
reconstruct_active_formatting_elements(parser);
|
2745
2802
|
insert_text_token(parser, token);
|
2746
|
-
return
|
2803
|
+
return;
|
2747
2804
|
}
|
2748
2805
|
if (
|
2749
2806
|
token->type == GUMBO_TOKEN_CHARACTER
|
@@ -2752,27 +2809,27 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2752
2809
|
reconstruct_active_formatting_elements(parser);
|
2753
2810
|
insert_text_token(parser, token);
|
2754
2811
|
set_frameset_not_ok(parser);
|
2755
|
-
return
|
2812
|
+
return;
|
2756
2813
|
}
|
2757
2814
|
if (token->type == GUMBO_TOKEN_COMMENT) {
|
2758
2815
|
append_comment_node(parser, get_current_node(parser), token);
|
2759
|
-
return
|
2816
|
+
return;
|
2760
2817
|
}
|
2761
2818
|
if (token->type == GUMBO_TOKEN_DOCTYPE) {
|
2762
2819
|
parser_add_parse_error(parser, token);
|
2763
2820
|
ignore_token(parser);
|
2764
|
-
return
|
2821
|
+
return;
|
2765
2822
|
}
|
2766
2823
|
if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
|
2767
2824
|
parser_add_parse_error(parser, token);
|
2768
2825
|
if (has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2769
2826
|
ignore_token(parser);
|
2770
|
-
return
|
2827
|
+
return;
|
2771
2828
|
}
|
2772
2829
|
assert(parser->_output->root != NULL);
|
2773
2830
|
assert(parser->_output->root->type == GUMBO_NODE_ELEMENT);
|
2774
2831
|
merge_attributes(token, parser->_output->root);
|
2775
|
-
return
|
2832
|
+
return;
|
2776
2833
|
}
|
2777
2834
|
if (
|
2778
2835
|
tag_in(token, kStartTag, &(const TagSet) {
|
@@ -2782,7 +2839,8 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2782
2839
|
})
|
2783
2840
|
|| tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)
|
2784
2841
|
) {
|
2785
|
-
|
2842
|
+
handle_in_head(parser, token);
|
2843
|
+
return;
|
2786
2844
|
}
|
2787
2845
|
if (tag_is(token, kStartTag, GUMBO_TAG_BODY)) {
|
2788
2846
|
parser_add_parse_error(parser, token);
|
@@ -2796,7 +2854,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2796
2854
|
set_frameset_not_ok(parser);
|
2797
2855
|
merge_attributes(token, state->_open_elements.data[1]);
|
2798
2856
|
}
|
2799
|
-
return
|
2857
|
+
return;
|
2800
2858
|
}
|
2801
2859
|
if (tag_is(token, kStartTag, GUMBO_TAG_FRAMESET)) {
|
2802
2860
|
parser_add_parse_error(parser, token);
|
@@ -2806,7 +2864,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2806
2864
|
|| !state->_frameset_ok
|
2807
2865
|
) {
|
2808
2866
|
ignore_token(parser);
|
2809
|
-
return
|
2867
|
+
return;
|
2810
2868
|
}
|
2811
2869
|
// Save the body node for later removal.
|
2812
2870
|
GumboNode* body_node = state->_open_elements.data[1];
|
@@ -2838,50 +2896,43 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2838
2896
|
// Insert the <frameset>, and switch the insertion mode.
|
2839
2897
|
insert_element_from_token(parser, token);
|
2840
2898
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_FRAMESET);
|
2841
|
-
return
|
2899
|
+
return;
|
2842
2900
|
}
|
2843
2901
|
if (token->type == GUMBO_TOKEN_EOF) {
|
2844
2902
|
if (get_current_template_insertion_mode(parser) !=
|
2845
2903
|
GUMBO_INSERTION_MODE_INITIAL) {
|
2846
|
-
|
2904
|
+
handle_in_template(parser, token);
|
2905
|
+
return;
|
2847
2906
|
}
|
2848
|
-
if (stack_contains_nonclosable_element(parser))
|
2907
|
+
if (stack_contains_nonclosable_element(parser))
|
2849
2908
|
parser_add_parse_error(parser, token);
|
2850
|
-
|
2851
|
-
}
|
2852
|
-
return true;
|
2909
|
+
return;
|
2853
2910
|
}
|
2854
2911
|
if (tag_is(token, kEndTag, GUMBO_TAG_BODY)) {
|
2855
2912
|
if (!has_an_element_in_scope(parser, GUMBO_TAG_BODY)) {
|
2856
2913
|
parser_add_parse_error(parser, token);
|
2857
2914
|
ignore_token(parser);
|
2858
|
-
return
|
2915
|
+
return;
|
2859
2916
|
}
|
2860
|
-
|
2861
|
-
if (stack_contains_nonclosable_element(parser)) {
|
2917
|
+
if (stack_contains_nonclosable_element(parser))
|
2862
2918
|
parser_add_parse_error(parser, token);
|
2863
|
-
success = false;
|
2864
|
-
}
|
2865
2919
|
GumboNode* body = state->_open_elements.data[1];
|
2866
2920
|
assert(node_html_tag_is(body, GUMBO_TAG_BODY));
|
2867
2921
|
record_end_of_element(state->_current_token, &body->v.element);
|
2868
2922
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_BODY);
|
2869
|
-
return
|
2923
|
+
return;
|
2870
2924
|
}
|
2871
2925
|
if (tag_is(token, kEndTag, GUMBO_TAG_HTML)) {
|
2872
2926
|
if (!has_an_element_in_scope(parser, GUMBO_TAG_BODY)) {
|
2873
2927
|
parser_add_parse_error(parser, token);
|
2874
2928
|
ignore_token(parser);
|
2875
|
-
return
|
2929
|
+
return;
|
2876
2930
|
}
|
2877
|
-
|
2878
|
-
if (stack_contains_nonclosable_element(parser)) {
|
2931
|
+
if (stack_contains_nonclosable_element(parser))
|
2879
2932
|
parser_add_parse_error(parser, token);
|
2880
|
-
success = false;
|
2881
|
-
}
|
2882
2933
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_BODY);
|
2883
2934
|
parser->_parser_state->_reprocess_current_token = true;
|
2884
|
-
return
|
2935
|
+
return;
|
2885
2936
|
}
|
2886
2937
|
if (
|
2887
2938
|
tag_in(token, kStartTag, &(const TagSet) {
|
@@ -2892,26 +2943,25 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2892
2943
|
TAG(SUMMARY), TAG(UL)
|
2893
2944
|
})
|
2894
2945
|
) {
|
2895
|
-
|
2946
|
+
maybe_implicitly_close_p_tag(parser, token);
|
2896
2947
|
insert_element_from_token(parser, token);
|
2897
|
-
return
|
2948
|
+
return;
|
2898
2949
|
}
|
2899
2950
|
if (tag_in(token, kStartTag, &heading_tags)) {
|
2900
|
-
|
2951
|
+
maybe_implicitly_close_p_tag(parser, token);
|
2901
2952
|
if (node_tag_in_set(get_current_node(parser), &heading_tags)) {
|
2902
2953
|
parser_add_parse_error(parser, token);
|
2903
2954
|
pop_current_node(parser);
|
2904
|
-
result = false;
|
2905
2955
|
}
|
2906
2956
|
insert_element_from_token(parser, token);
|
2907
|
-
return
|
2957
|
+
return;
|
2908
2958
|
}
|
2909
2959
|
if (tag_in(token, kStartTag, &(const TagSet){TAG(PRE), TAG(LISTING)})) {
|
2910
|
-
|
2960
|
+
maybe_implicitly_close_p_tag(parser, token);
|
2911
2961
|
insert_element_from_token(parser, token);
|
2912
2962
|
state->_ignore_next_linefeed = true;
|
2913
2963
|
set_frameset_not_ok(parser);
|
2914
|
-
return
|
2964
|
+
return;
|
2915
2965
|
}
|
2916
2966
|
if (tag_is(token, kStartTag, GUMBO_TAG_FORM)) {
|
2917
2967
|
if (
|
@@ -2921,48 +2971,46 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2921
2971
|
gumbo_debug("Ignoring nested form.\n");
|
2922
2972
|
parser_add_parse_error(parser, token);
|
2923
2973
|
ignore_token(parser);
|
2924
|
-
return
|
2974
|
+
return;
|
2925
2975
|
}
|
2926
|
-
|
2976
|
+
maybe_implicitly_close_p_tag(parser, token);
|
2927
2977
|
GumboNode* form_element = insert_element_from_token(parser, token);
|
2928
2978
|
if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2929
2979
|
state->_form_element = form_element;
|
2930
2980
|
}
|
2931
|
-
return
|
2981
|
+
return;
|
2932
2982
|
}
|
2933
2983
|
if (tag_is(token, kStartTag, GUMBO_TAG_LI)) {
|
2934
|
-
|
2935
|
-
|
2984
|
+
maybe_implicitly_close_list_tag(parser, token, true);
|
2985
|
+
maybe_implicitly_close_p_tag(parser, token);
|
2936
2986
|
insert_element_from_token(parser, token);
|
2937
|
-
return
|
2987
|
+
return;
|
2938
2988
|
}
|
2939
2989
|
if (tag_in(token, kStartTag, &dd_dt_tags)) {
|
2940
|
-
|
2941
|
-
|
2990
|
+
maybe_implicitly_close_list_tag(parser, token, false);
|
2991
|
+
maybe_implicitly_close_p_tag(parser, token);
|
2942
2992
|
insert_element_from_token(parser, token);
|
2943
|
-
return
|
2993
|
+
return;
|
2944
2994
|
}
|
2945
2995
|
if (tag_is(token, kStartTag, GUMBO_TAG_PLAINTEXT)) {
|
2946
|
-
|
2996
|
+
maybe_implicitly_close_p_tag(parser, token);
|
2947
2997
|
insert_element_from_token(parser, token);
|
2948
2998
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_PLAINTEXT);
|
2949
|
-
return
|
2999
|
+
return;
|
2950
3000
|
}
|
2951
3001
|
if (tag_is(token, kStartTag, GUMBO_TAG_BUTTON)) {
|
2952
|
-
bool success = true;
|
2953
3002
|
if (has_an_element_in_scope(parser, GUMBO_TAG_BUTTON)) {
|
2954
3003
|
parser_add_parse_error(parser, token);
|
2955
|
-
success = false;
|
2956
3004
|
// We don't want to use implicitly_close_tags here because it may add an
|
2957
3005
|
// error and we've already added the only error the standard specifies.
|
2958
|
-
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
3006
|
+
generate_implied_end_tags(parser, GUMBO_TAG_LAST, NULL);
|
2959
3007
|
while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_BUTTON))
|
2960
3008
|
;
|
2961
3009
|
}
|
2962
3010
|
reconstruct_active_formatting_elements(parser);
|
2963
3011
|
insert_element_from_token(parser, token);
|
2964
3012
|
set_frameset_not_ok(parser);
|
2965
|
-
return
|
3013
|
+
return;
|
2966
3014
|
}
|
2967
3015
|
if (
|
2968
3016
|
tag_in(token, kEndTag, &(const TagSet) {
|
@@ -2977,7 +3025,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2977
3025
|
if (!has_an_element_in_scope(parser, tag)) {
|
2978
3026
|
parser_add_parse_error(parser, token);
|
2979
3027
|
ignore_token(parser);
|
2980
|
-
return
|
3028
|
+
return;
|
2981
3029
|
}
|
2982
3030
|
return implicitly_close_tags (
|
2983
3031
|
parser,
|
@@ -2991,19 +3039,15 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2991
3039
|
if (!has_an_element_in_scope(parser, GUMBO_TAG_FORM)) {
|
2992
3040
|
parser_add_parse_error(parser, token);
|
2993
3041
|
ignore_token(parser);
|
2994
|
-
return
|
3042
|
+
return;
|
2995
3043
|
}
|
2996
|
-
|
2997
|
-
|
2998
|
-
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_FORM)) {
|
3044
|
+
generate_implied_end_tags(parser, GUMBO_TAG_LAST, NULL);
|
3045
|
+
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_FORM))
|
2999
3046
|
parser_add_parse_error(parser, token);
|
3000
|
-
success = false;
|
3001
|
-
}
|
3002
3047
|
while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_FORM))
|
3003
3048
|
;
|
3004
|
-
return
|
3049
|
+
return;
|
3005
3050
|
} else {
|
3006
|
-
bool result = true;
|
3007
3051
|
GumboNode* node = state->_form_element;
|
3008
3052
|
assert(!node || node->type == GUMBO_NODE_ELEMENT);
|
3009
3053
|
state->_form_element = NULL;
|
@@ -3011,27 +3055,24 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
3011
3055
|
gumbo_debug("Closing an unopened form.\n");
|
3012
3056
|
parser_add_parse_error(parser, token);
|
3013
3057
|
ignore_token(parser);
|
3014
|
-
return
|
3058
|
+
return;
|
3015
3059
|
}
|
3016
3060
|
// This differs from implicitly_close_tags because we remove *only* the
|
3017
3061
|
// <form> element; other nodes are left in scope.
|
3018
|
-
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
3019
|
-
if (get_current_node(parser) != node)
|
3062
|
+
generate_implied_end_tags(parser, GUMBO_TAG_LAST, NULL);
|
3063
|
+
if (get_current_node(parser) != node)
|
3020
3064
|
parser_add_parse_error(parser, token);
|
3021
|
-
|
3022
|
-
} else {
|
3065
|
+
else
|
3023
3066
|
record_end_of_element(token, &node->v.element);
|
3024
|
-
}
|
3025
3067
|
|
3026
3068
|
GumboVector* open_elements = &state->_open_elements;
|
3027
3069
|
int index = gumbo_vector_index_of(open_elements, node);
|
3028
3070
|
assert(index >= 0);
|
3029
3071
|
gumbo_vector_remove_at(index, open_elements);
|
3030
|
-
return
|
3072
|
+
return;
|
3031
3073
|
}
|
3032
3074
|
}
|
3033
3075
|
if (tag_is(token, kEndTag, GUMBO_TAG_P)) {
|
3034
|
-
bool success = true;
|
3035
3076
|
if (!has_an_element_in_button_scope(parser, GUMBO_TAG_P)) {
|
3036
3077
|
parser_add_parse_error(parser, token);
|
3037
3078
|
// reconstruct_active_formatting_elements(parser);
|
@@ -3040,41 +3081,43 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
3040
3081
|
GUMBO_TAG_P,
|
3041
3082
|
GUMBO_INSERTION_CONVERTED_FROM_END_TAG
|
3042
3083
|
);
|
3043
|
-
success = false;
|
3044
3084
|
}
|
3045
|
-
|
3085
|
+
implicitly_close_tags (
|
3046
3086
|
parser,
|
3047
3087
|
token,
|
3048
3088
|
GUMBO_NAMESPACE_HTML,
|
3049
3089
|
GUMBO_TAG_P
|
3050
|
-
)
|
3090
|
+
);
|
3091
|
+
return;
|
3051
3092
|
}
|
3052
3093
|
if (tag_is(token, kEndTag, GUMBO_TAG_LI)) {
|
3053
3094
|
if (!has_an_element_in_list_scope(parser, GUMBO_TAG_LI)) {
|
3054
3095
|
parser_add_parse_error(parser, token);
|
3055
3096
|
ignore_token(parser);
|
3056
|
-
return
|
3097
|
+
return;
|
3057
3098
|
}
|
3058
|
-
|
3099
|
+
implicitly_close_tags (
|
3059
3100
|
parser,
|
3060
3101
|
token,
|
3061
3102
|
GUMBO_NAMESPACE_HTML,
|
3062
3103
|
GUMBO_TAG_LI
|
3063
3104
|
);
|
3105
|
+
return;
|
3064
3106
|
}
|
3065
3107
|
if (tag_in(token, kEndTag, &dd_dt_tags)) {
|
3066
3108
|
GumboTag token_tag = token->v.end_tag.tag;
|
3067
3109
|
if (!has_an_element_in_scope(parser, token_tag)) {
|
3068
3110
|
parser_add_parse_error(parser, token);
|
3069
3111
|
ignore_token(parser);
|
3070
|
-
return
|
3112
|
+
return;
|
3071
3113
|
}
|
3072
|
-
|
3114
|
+
implicitly_close_tags (
|
3073
3115
|
parser,
|
3074
3116
|
token,
|
3075
3117
|
GUMBO_NAMESPACE_HTML,
|
3076
3118
|
token_tag
|
3077
3119
|
);
|
3120
|
+
return;
|
3078
3121
|
}
|
3079
3122
|
if (tag_in(token, kEndTag, &heading_tags)) {
|
3080
3123
|
if (
|
@@ -3086,12 +3129,11 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
3086
3129
|
// No heading open; ignore the token entirely.
|
3087
3130
|
parser_add_parse_error(parser, token);
|
3088
3131
|
ignore_token(parser);
|
3089
|
-
return
|
3132
|
+
return;
|
3090
3133
|
}
|
3091
|
-
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
3134
|
+
generate_implied_end_tags(parser, GUMBO_TAG_LAST, NULL);
|
3092
3135
|
const GumboNode* current_node = get_current_node(parser);
|
3093
|
-
|
3094
|
-
if (!success) {
|
3136
|
+
if (!node_html_tag_is(current_node, token->v.end_tag.tag)) {
|
3095
3137
|
// There're children of the heading currently open; close them below and
|
3096
3138
|
// record a parse error.
|
3097
3139
|
// TODO(jdtang): Add a way to distinguish this error case from the one
|
@@ -3101,17 +3143,15 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
3101
3143
|
do {
|
3102
3144
|
current_node = pop_current_node(parser);
|
3103
3145
|
} while (!node_tag_in_set(current_node, &heading_tags));
|
3104
|
-
return
|
3146
|
+
return;
|
3105
3147
|
}
|
3106
3148
|
if (tag_is(token, kStartTag, GUMBO_TAG_A)) {
|
3107
|
-
bool success = true;
|
3108
3149
|
int last_a;
|
3109
3150
|
int has_matching_a = find_last_anchor_index(parser, &last_a);
|
3110
3151
|
if (has_matching_a) {
|
3111
3152
|
assert(has_matching_a == 1);
|
3112
3153
|
parser_add_parse_error(parser, token);
|
3113
|
-
|
3114
|
-
assert(handled);
|
3154
|
+
(void)adoption_agency_algorithm(parser, token);
|
3115
3155
|
// The adoption agency algorithm usually removes all instances of <a>
|
3116
3156
|
// from the list of active formatting elements, but in case it doesn't,
|
3117
3157
|
// we're supposed to do this. (The conditions where it might not are
|
@@ -3123,11 +3163,10 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
3123
3163
|
);
|
3124
3164
|
gumbo_vector_remove(last_element, &state->_open_elements);
|
3125
3165
|
}
|
3126
|
-
success = false;
|
3127
3166
|
}
|
3128
3167
|
reconstruct_active_formatting_elements(parser);
|
3129
3168
|
add_formatting_element(parser, insert_element_from_token(parser, token));
|
3130
|
-
return
|
3169
|
+
return;
|
3131
3170
|
}
|
3132
3171
|
if (
|
3133
3172
|
tag_in(token, kStartTag, &(const TagSet) {
|
@@ -3137,21 +3176,18 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
3137
3176
|
) {
|
3138
3177
|
reconstruct_active_formatting_elements(parser);
|
3139
3178
|
add_formatting_element(parser, insert_element_from_token(parser, token));
|
3140
|
-
return
|
3179
|
+
return;
|
3141
3180
|
}
|
3142
3181
|
if (tag_is(token, kStartTag, GUMBO_TAG_NOBR)) {
|
3143
|
-
bool result = true;
|
3144
3182
|
reconstruct_active_formatting_elements(parser);
|
3145
3183
|
if (has_an_element_in_scope(parser, GUMBO_TAG_NOBR)) {
|
3146
|
-
result = false;
|
3147
3184
|
parser_add_parse_error(parser, token);
|
3148
|
-
|
3149
|
-
assert(handled);
|
3185
|
+
adoption_agency_algorithm(parser, token);
|
3150
3186
|
reconstruct_active_formatting_elements(parser);
|
3151
3187
|
}
|
3152
3188
|
insert_element_from_token(parser, token);
|
3153
3189
|
add_formatting_element(parser, get_current_node(parser));
|
3154
|
-
return
|
3190
|
+
return;
|
3155
3191
|
}
|
3156
3192
|
if (
|
3157
3193
|
tag_in(token, kEndTag, &(const TagSet) {
|
@@ -3160,9 +3196,8 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
3160
3196
|
TAG(U)
|
3161
3197
|
})
|
3162
3198
|
) {
|
3163
|
-
|
3164
|
-
|
3165
|
-
return true;
|
3199
|
+
adoption_agency_algorithm(parser, token);
|
3200
|
+
return;
|
3166
3201
|
}
|
3167
3202
|
if (
|
3168
3203
|
tag_in(token, kStartTag, &(const TagSet){TAG(APPLET), TAG(MARQUEE), TAG(OBJECT)})
|
@@ -3171,7 +3206,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
3171
3206
|
insert_element_from_token(parser, token);
|
3172
3207
|
add_formatting_element(parser, &kActiveFormattingScopeMarker);
|
3173
3208
|
set_frameset_not_ok(parser);
|
3174
|
-
return
|
3209
|
+
return;
|
3175
3210
|
}
|
3176
3211
|
if (
|
3177
3212
|
tag_in(token, kEndTag, &(const TagSet){TAG(APPLET), TAG(MARQUEE), TAG(OBJECT)})
|
@@ -3180,11 +3215,11 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
3180
3215
|
if (!has_an_element_in_scope(parser, token_tag)) {
|
3181
3216
|
parser_add_parse_error(parser, token);
|
3182
3217
|
ignore_token(parser);
|
3183
|
-
return
|
3218
|
+
return;
|
3184
3219
|
}
|
3185
|
-
|
3220
|
+
implicitly_close_tags(parser, token, GUMBO_NAMESPACE_HTML, token_tag);
|
3186
3221
|
clear_active_formatting_elements(parser);
|
3187
|
-
return
|
3222
|
+
return;
|
3188
3223
|
}
|
3189
3224
|
if (tag_is(token, kStartTag, GUMBO_TAG_TABLE)) {
|
3190
3225
|
if (
|
@@ -3196,7 +3231,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
3196
3231
|
insert_element_from_token(parser, token);
|
3197
3232
|
set_frameset_not_ok(parser);
|
3198
3233
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3199
|
-
return
|
3234
|
+
return;
|
3200
3235
|
}
|
3201
3236
|
if (tag_is(token, kEndTag, GUMBO_TAG_BR)) {
|
3202
3237
|
parser_add_parse_error(parser, token);
|
@@ -3209,7 +3244,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
3209
3244
|
pop_current_node(parser);
|
3210
3245
|
acknowledge_self_closing_tag(parser);
|
3211
3246
|
set_frameset_not_ok(parser);
|
3212
|
-
return
|
3247
|
+
return;
|
3213
3248
|
}
|
3214
3249
|
if (
|
3215
3250
|
tag_in(token, kStartTag, &(const TagSet) {
|
@@ -3229,7 +3264,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
3229
3264
|
pop_current_node(parser);
|
3230
3265
|
acknowledge_self_closing_tag(parser);
|
3231
3266
|
set_frameset_not_ok(parser);
|
3232
|
-
return
|
3267
|
+
return;
|
3233
3268
|
}
|
3234
3269
|
if (tag_is(token, kStartTag, GUMBO_TAG_INPUT)) {
|
3235
3270
|
reconstruct_active_formatting_elements(parser);
|
@@ -3238,7 +3273,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
3238
3273
|
acknowledge_self_closing_tag(parser);
|
3239
3274
|
if (!attribute_matches(&input->v.element.attributes, "type", "hidden"))
|
3240
3275
|
set_frameset_not_ok(parser);
|
3241
|
-
return
|
3276
|
+
return;
|
3242
3277
|
}
|
3243
3278
|
if (
|
3244
3279
|
tag_in(token, kStartTag, &(const TagSet){TAG(PARAM), TAG(SOURCE), TAG(TRACK)})
|
@@ -3246,37 +3281,37 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
3246
3281
|
insert_element_from_token(parser, token);
|
3247
3282
|
pop_current_node(parser);
|
3248
3283
|
acknowledge_self_closing_tag(parser);
|
3249
|
-
return
|
3284
|
+
return;
|
3250
3285
|
}
|
3251
3286
|
if (tag_is(token, kStartTag, GUMBO_TAG_HR)) {
|
3252
|
-
|
3287
|
+
maybe_implicitly_close_p_tag(parser, token);
|
3253
3288
|
insert_element_from_token(parser, token);
|
3254
3289
|
pop_current_node(parser);
|
3255
3290
|
acknowledge_self_closing_tag(parser);
|
3256
3291
|
set_frameset_not_ok(parser);
|
3257
|
-
return
|
3292
|
+
return;
|
3258
3293
|
}
|
3259
3294
|
if (tag_is(token, kStartTag, GUMBO_TAG_TEXTAREA)) {
|
3260
3295
|
run_generic_parsing_algorithm(parser, token, GUMBO_LEX_RCDATA);
|
3261
3296
|
parser->_parser_state->_ignore_next_linefeed = true;
|
3262
3297
|
set_frameset_not_ok(parser);
|
3263
|
-
return
|
3298
|
+
return;
|
3264
3299
|
}
|
3265
3300
|
if (tag_is(token, kStartTag, GUMBO_TAG_XMP)) {
|
3266
|
-
|
3301
|
+
maybe_implicitly_close_p_tag(parser, token);
|
3267
3302
|
reconstruct_active_formatting_elements(parser);
|
3268
3303
|
set_frameset_not_ok(parser);
|
3269
3304
|
run_generic_parsing_algorithm(parser, token, GUMBO_LEX_RAWTEXT);
|
3270
|
-
return
|
3305
|
+
return;
|
3271
3306
|
}
|
3272
3307
|
if (tag_is(token, kStartTag, GUMBO_TAG_IFRAME)) {
|
3273
3308
|
set_frameset_not_ok(parser);
|
3274
3309
|
run_generic_parsing_algorithm(parser, token, GUMBO_LEX_RAWTEXT);
|
3275
|
-
return
|
3310
|
+
return;
|
3276
3311
|
}
|
3277
3312
|
if (tag_is(token, kStartTag, GUMBO_TAG_NOEMBED)) {
|
3278
3313
|
run_generic_parsing_algorithm(parser, token, GUMBO_LEX_RAWTEXT);
|
3279
|
-
return
|
3314
|
+
return;
|
3280
3315
|
}
|
3281
3316
|
if (tag_is(token, kStartTag, GUMBO_TAG_SELECT)) {
|
3282
3317
|
reconstruct_active_formatting_elements(parser);
|
@@ -3294,7 +3329,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
3294
3329
|
} else {
|
3295
3330
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_SELECT);
|
3296
3331
|
}
|
3297
|
-
return
|
3332
|
+
return;
|
3298
3333
|
}
|
3299
3334
|
if (
|
3300
3335
|
tag_in(token, kStartTag, &(const TagSet){TAG(OPTGROUP), TAG(OPTION)})
|
@@ -3304,33 +3339,28 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
3304
3339
|
}
|
3305
3340
|
reconstruct_active_formatting_elements(parser);
|
3306
3341
|
insert_element_from_token(parser, token);
|
3307
|
-
return
|
3342
|
+
return;
|
3308
3343
|
}
|
3309
3344
|
if (tag_in(token, kStartTag, &(const TagSet){TAG(RB), TAG(RTC)})) {
|
3310
|
-
bool success = true;
|
3311
3345
|
if (has_an_element_in_scope(parser, GUMBO_TAG_RUBY)) {
|
3312
|
-
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
3313
|
-
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_RUBY))
|
3346
|
+
generate_implied_end_tags(parser, GUMBO_TAG_LAST, NULL);
|
3347
|
+
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_RUBY))
|
3314
3348
|
parser_add_parse_error(parser, token);
|
3315
|
-
success = false;
|
3316
|
-
}
|
3317
3349
|
}
|
3318
3350
|
insert_element_from_token(parser, token);
|
3319
|
-
return
|
3351
|
+
return;
|
3320
3352
|
}
|
3321
3353
|
if (tag_in(token, kStartTag, &(const TagSet){TAG(RP), TAG(RT)})) {
|
3322
|
-
bool success = true;
|
3323
3354
|
if (has_an_element_in_scope(parser, GUMBO_TAG_RUBY)) {
|
3324
|
-
generate_implied_end_tags(parser, GUMBO_TAG_RTC);
|
3355
|
+
generate_implied_end_tags(parser, GUMBO_TAG_RTC, NULL);
|
3325
3356
|
GumboNode* current = get_current_node(parser);
|
3326
3357
|
if (!node_html_tag_is(current, GUMBO_TAG_RUBY) &&
|
3327
3358
|
!node_html_tag_is(current, GUMBO_TAG_RTC)) {
|
3328
3359
|
parser_add_parse_error(parser, token);
|
3329
|
-
success = false;
|
3330
3360
|
}
|
3331
3361
|
}
|
3332
3362
|
insert_element_from_token(parser, token);
|
3333
|
-
return
|
3363
|
+
return;
|
3334
3364
|
}
|
3335
3365
|
if (tag_is(token, kStartTag, GUMBO_TAG_MATH)) {
|
3336
3366
|
reconstruct_active_formatting_elements(parser);
|
@@ -3341,7 +3371,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
3341
3371
|
pop_current_node(parser);
|
3342
3372
|
acknowledge_self_closing_tag(parser);
|
3343
3373
|
}
|
3344
|
-
return
|
3374
|
+
return;
|
3345
3375
|
}
|
3346
3376
|
if (tag_is(token, kStartTag, GUMBO_TAG_SVG)) {
|
3347
3377
|
reconstruct_active_formatting_elements(parser);
|
@@ -3352,7 +3382,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
3352
3382
|
pop_current_node(parser);
|
3353
3383
|
acknowledge_self_closing_tag(parser);
|
3354
3384
|
}
|
3355
|
-
return
|
3385
|
+
return;
|
3356
3386
|
}
|
3357
3387
|
if (
|
3358
3388
|
tag_in(token, kStartTag, &(const TagSet) {
|
@@ -3362,60 +3392,24 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
3362
3392
|
) {
|
3363
3393
|
parser_add_parse_error(parser, token);
|
3364
3394
|
ignore_token(parser);
|
3365
|
-
return
|
3395
|
+
return;
|
3366
3396
|
}
|
3367
3397
|
if (token->type == GUMBO_TOKEN_START_TAG) {
|
3368
3398
|
reconstruct_active_formatting_elements(parser);
|
3369
3399
|
insert_element_from_token(parser, token);
|
3370
|
-
return
|
3371
|
-
}
|
3372
|
-
any_other_end_tag:
|
3373
|
-
assert(token->type == GUMBO_TOKEN_END_TAG);
|
3374
|
-
GumboTag end_tag = token->v.end_tag.tag;
|
3375
|
-
const char *end_tagname = token->v.end_tag.name;
|
3376
|
-
assert(state->_open_elements.length > 0);
|
3377
|
-
assert(node_html_tag_is(state->_open_elements.data[0], GUMBO_TAG_HTML));
|
3378
|
-
// Walk up the stack of open elements until we find one that either:
|
3379
|
-
// a) Matches the tag name we saw
|
3380
|
-
// b) Is in the "special" category.
|
3381
|
-
// If we see a), implicitly close everything up to and including it. If we
|
3382
|
-
// see b), then record a parse error, don't close anything (except the
|
3383
|
-
// implied end tags) and ignore the end tag token.
|
3384
|
-
for (int i = state->_open_elements.length; --i >= 0;) {
|
3385
|
-
const GumboNode* node = state->_open_elements.data[i];
|
3386
|
-
if (node_qualified_tagname_is(node, GUMBO_NAMESPACE_HTML, end_tag, end_tagname)) {
|
3387
|
-
generate_implied_end_tags(parser, end_tag);
|
3388
|
-
// TODO(jdtang): Do I need to add a parse error here? The condition in
|
3389
|
-
// the spec seems like it's the inverse of the loop condition above, and
|
3390
|
-
// so would never fire.
|
3391
|
-
// sfc: Yes, an error is needed here.
|
3392
|
-
// <!DOCTYPE><body><sarcasm><foo></sarcasm> is an example.
|
3393
|
-
// foo is the "current node" but sarcasm is node.
|
3394
|
-
// XXX: Write a test for this.
|
3395
|
-
if (node != get_current_node(parser))
|
3396
|
-
parser_add_parse_error(parser, token);
|
3397
|
-
while (node != pop_current_node(parser))
|
3398
|
-
; // Pop everything.
|
3399
|
-
return true;
|
3400
|
-
} else if (is_special_node(node)) {
|
3401
|
-
parser_add_parse_error(parser, token);
|
3402
|
-
ignore_token(parser);
|
3403
|
-
return false;
|
3404
|
-
}
|
3400
|
+
return;
|
3405
3401
|
}
|
3406
|
-
|
3407
|
-
assert(0);
|
3408
|
-
return false;
|
3402
|
+
in_body_any_other_end_tag(parser, token);
|
3409
3403
|
}
|
3410
3404
|
|
3411
3405
|
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-incdata
|
3412
|
-
static
|
3406
|
+
static void handle_text(GumboParser* parser, GumboToken* token) {
|
3413
3407
|
if (
|
3414
3408
|
token->type == GUMBO_TOKEN_CHARACTER
|
3415
3409
|
|| token->type == GUMBO_TOKEN_WHITESPACE
|
3416
3410
|
) {
|
3417
3411
|
insert_text_token(parser, token);
|
3418
|
-
return
|
3412
|
+
return;
|
3419
3413
|
}
|
3420
3414
|
// We provide only bare-bones script handling that doesn't involve any of
|
3421
3415
|
// the parser-pause/already-started/script-nesting flags or re-entrant
|
@@ -3424,19 +3418,16 @@ static bool handle_text(GumboParser* parser, GumboToken* token) {
|
|
3424
3418
|
// provide the script body as a text-node child of the <script> element.
|
3425
3419
|
// This behavior doesn't support document.write of partial HTML elements,
|
3426
3420
|
// but should be adequate for almost all other scripting support.
|
3427
|
-
bool success = true;
|
3428
3421
|
if (token->type == GUMBO_TOKEN_EOF) {
|
3429
3422
|
parser_add_parse_error(parser, token);
|
3430
|
-
success = false;
|
3431
3423
|
parser->_parser_state->_reprocess_current_token = true;
|
3432
3424
|
}
|
3433
3425
|
pop_current_node(parser);
|
3434
3426
|
set_insertion_mode(parser, parser->_parser_state->_original_insertion_mode);
|
3435
|
-
return success;
|
3436
3427
|
}
|
3437
3428
|
|
3438
3429
|
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-intable
|
3439
|
-
static
|
3430
|
+
static void handle_in_table(GumboParser* parser, GumboToken* token) {
|
3440
3431
|
GumboParserState* state = parser->_parser_state;
|
3441
3432
|
if (
|
3442
3433
|
(token->type == GUMBO_TOKEN_CHARACTER
|
@@ -3456,29 +3447,29 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
|
|
3456
3447
|
state->_original_insertion_mode = state->_insertion_mode;
|
3457
3448
|
state->_reprocess_current_token = true;
|
3458
3449
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_TEXT);
|
3459
|
-
return
|
3450
|
+
return;
|
3460
3451
|
}
|
3461
3452
|
if (token->type == GUMBO_TOKEN_COMMENT) {
|
3462
3453
|
append_comment_node(parser, get_current_node(parser), token);
|
3463
|
-
return
|
3454
|
+
return;
|
3464
3455
|
}
|
3465
3456
|
if (token->type == GUMBO_TOKEN_DOCTYPE) {
|
3466
3457
|
parser_add_parse_error(parser, token);
|
3467
3458
|
ignore_token(parser);
|
3468
|
-
return
|
3459
|
+
return;
|
3469
3460
|
}
|
3470
3461
|
if (tag_is(token, kStartTag, GUMBO_TAG_CAPTION)) {
|
3471
3462
|
clear_stack_to_table_context(parser);
|
3472
3463
|
add_formatting_element(parser, &kActiveFormattingScopeMarker);
|
3473
3464
|
insert_element_from_token(parser, token);
|
3474
3465
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_CAPTION);
|
3475
|
-
return
|
3466
|
+
return;
|
3476
3467
|
}
|
3477
3468
|
if (tag_is(token, kStartTag, GUMBO_TAG_COLGROUP)) {
|
3478
3469
|
clear_stack_to_table_context(parser);
|
3479
3470
|
insert_element_from_token(parser, token);
|
3480
3471
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_COLUMN_GROUP);
|
3481
|
-
return
|
3472
|
+
return;
|
3482
3473
|
}
|
3483
3474
|
if (tag_is(token, kStartTag, GUMBO_TAG_COL)) {
|
3484
3475
|
clear_stack_to_table_context(parser);
|
@@ -3489,7 +3480,7 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
|
|
3489
3480
|
);
|
3490
3481
|
state->_reprocess_current_token = true;
|
3491
3482
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_COLUMN_GROUP);
|
3492
|
-
return
|
3483
|
+
return;
|
3493
3484
|
}
|
3494
3485
|
if (
|
3495
3486
|
tag_in(token, kStartTag, &(const TagSet) {
|
@@ -3499,7 +3490,7 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
|
|
3499
3490
|
clear_stack_to_table_context(parser);
|
3500
3491
|
insert_element_from_token(parser, token);
|
3501
3492
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
|
3502
|
-
return
|
3493
|
+
return;
|
3503
3494
|
}
|
3504
3495
|
if (
|
3505
3496
|
tag_in(token, kStartTag, &(const TagSet) {
|
@@ -3514,7 +3505,7 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
|
|
3514
3505
|
);
|
3515
3506
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
|
3516
3507
|
state->_reprocess_current_token = true;
|
3517
|
-
return
|
3508
|
+
return;
|
3518
3509
|
}
|
3519
3510
|
if (tag_is(token, kStartTag, GUMBO_TAG_TABLE)) {
|
3520
3511
|
parser_add_parse_error(parser, token);
|
@@ -3523,14 +3514,14 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
|
|
3523
3514
|
} else {
|
3524
3515
|
ignore_token(parser);
|
3525
3516
|
}
|
3526
|
-
return
|
3517
|
+
return;
|
3527
3518
|
}
|
3528
3519
|
if (tag_is(token, kEndTag, GUMBO_TAG_TABLE)) {
|
3529
3520
|
if (!close_table(parser)) {
|
3530
3521
|
parser_add_parse_error(parser, token);
|
3531
|
-
return
|
3522
|
+
return;
|
3532
3523
|
}
|
3533
|
-
return
|
3524
|
+
return;
|
3534
3525
|
}
|
3535
3526
|
if (
|
3536
3527
|
tag_in(token, kEndTag, &(const TagSet) {
|
@@ -3540,13 +3531,14 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
|
|
3540
3531
|
) {
|
3541
3532
|
parser_add_parse_error(parser, token);
|
3542
3533
|
ignore_token(parser);
|
3543
|
-
return
|
3534
|
+
return;
|
3544
3535
|
}
|
3545
3536
|
if (
|
3546
3537
|
tag_in(token, kStartTag, &(const TagSet){TAG(STYLE), TAG(SCRIPT), TAG(TEMPLATE)})
|
3547
3538
|
|| (tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE))
|
3548
3539
|
) {
|
3549
|
-
|
3540
|
+
handle_in_head(parser, token);
|
3541
|
+
return;
|
3550
3542
|
}
|
3551
3543
|
if (
|
3552
3544
|
tag_is(token, kStartTag, GUMBO_TAG_INPUT)
|
@@ -3556,35 +3548,35 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
|
|
3556
3548
|
insert_element_from_token(parser, token);
|
3557
3549
|
pop_current_node(parser);
|
3558
3550
|
acknowledge_self_closing_tag(parser);
|
3559
|
-
return
|
3551
|
+
return;
|
3560
3552
|
}
|
3561
3553
|
if (tag_is(token, kStartTag, GUMBO_TAG_FORM)) {
|
3562
3554
|
parser_add_parse_error(parser, token);
|
3563
3555
|
if (state->_form_element || has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
3564
3556
|
ignore_token(parser);
|
3565
|
-
return
|
3557
|
+
return;
|
3566
3558
|
}
|
3567
3559
|
state->_form_element = insert_element_from_token(parser, token);
|
3568
3560
|
pop_current_node(parser);
|
3569
|
-
return
|
3561
|
+
return;
|
3570
3562
|
}
|
3571
3563
|
if (token->type == GUMBO_TOKEN_EOF) {
|
3572
|
-
|
3564
|
+
handle_in_body(parser, token);
|
3565
|
+
return;
|
3573
3566
|
}
|
3574
3567
|
// foster-parenting-start-tag or foster-parenting-end-tag error
|
3575
3568
|
parser_add_parse_error(parser, token);
|
3576
3569
|
state->_foster_parent_insertions = true;
|
3577
|
-
|
3570
|
+
handle_in_body(parser, token);
|
3578
3571
|
state->_foster_parent_insertions = false;
|
3579
|
-
return result;
|
3580
3572
|
}
|
3581
3573
|
|
3582
3574
|
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-intabletext
|
3583
|
-
static
|
3575
|
+
static void handle_in_table_text(GumboParser* parser, GumboToken* token) {
|
3584
3576
|
if (token->type == GUMBO_TOKEN_NULL) {
|
3585
3577
|
parser_add_parse_error(parser, token);
|
3586
3578
|
ignore_token(parser);
|
3587
|
-
return
|
3579
|
+
return;
|
3588
3580
|
}
|
3589
3581
|
GumboParserState* state = parser->_parser_state;
|
3590
3582
|
// Non-whitespace tokens will cause parse errors later.
|
@@ -3594,7 +3586,7 @@ static bool handle_in_table_text(GumboParser* parser, GumboToken* token) {
|
|
3594
3586
|
|| token->type == GUMBO_TOKEN_CHARACTER) {
|
3595
3587
|
insert_text_token(parser, token);
|
3596
3588
|
gumbo_character_token_buffer_append(token, &state->_table_character_tokens);
|
3597
|
-
return
|
3589
|
+
return;
|
3598
3590
|
}
|
3599
3591
|
|
3600
3592
|
GumboCharacterTokenBuffer* buffer = &state->_table_character_tokens;
|
@@ -3616,26 +3608,24 @@ static bool handle_in_table_text(GumboParser* parser, GumboToken* token) {
|
|
3616
3608
|
state->_foster_parent_insertions = false;
|
3617
3609
|
state->_reprocess_current_token = true;
|
3618
3610
|
state->_insertion_mode = state->_original_insertion_mode;
|
3619
|
-
return true;
|
3620
3611
|
}
|
3621
3612
|
|
3622
3613
|
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-incaption
|
3623
|
-
static
|
3614
|
+
static void handle_in_caption(GumboParser* parser, GumboToken* token) {
|
3624
3615
|
if (tag_is(token, kEndTag, GUMBO_TAG_CAPTION)) {
|
3625
3616
|
if (!has_an_element_in_table_scope(parser, GUMBO_TAG_CAPTION)) {
|
3626
3617
|
parser_add_parse_error(parser, token);
|
3627
3618
|
ignore_token(parser);
|
3628
|
-
return
|
3619
|
+
return;
|
3629
3620
|
}
|
3630
|
-
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
3631
|
-
|
3632
|
-
if (!result)
|
3621
|
+
generate_implied_end_tags(parser, GUMBO_TAG_LAST, NULL);
|
3622
|
+
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_CAPTION))
|
3633
3623
|
parser_add_parse_error(parser, token);
|
3634
3624
|
while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_CAPTION))
|
3635
3625
|
;
|
3636
3626
|
clear_active_formatting_elements(parser);
|
3637
3627
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3638
|
-
return
|
3628
|
+
return;
|
3639
3629
|
}
|
3640
3630
|
if (
|
3641
3631
|
tag_in(token, kStartTag, &(const TagSet) {
|
@@ -3647,18 +3637,17 @@ static bool handle_in_caption(GumboParser* parser, GumboToken* token) {
|
|
3647
3637
|
if (!has_an_element_in_table_scope(parser, GUMBO_TAG_CAPTION)) {
|
3648
3638
|
parser_add_parse_error(parser, token);
|
3649
3639
|
ignore_token(parser);
|
3650
|
-
return
|
3640
|
+
return;
|
3651
3641
|
}
|
3652
|
-
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
3653
|
-
|
3654
|
-
if (!result)
|
3642
|
+
generate_implied_end_tags(parser, GUMBO_TAG_LAST, NULL);
|
3643
|
+
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_CAPTION))
|
3655
3644
|
parser_add_parse_error(parser, token);
|
3656
3645
|
while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_CAPTION))
|
3657
3646
|
;
|
3658
3647
|
clear_active_formatting_elements(parser);
|
3659
3648
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3660
3649
|
parser->_parser_state->_reprocess_current_token = true;
|
3661
|
-
return
|
3650
|
+
return;
|
3662
3651
|
}
|
3663
3652
|
if (
|
3664
3653
|
tag_in(token, kEndTag, &(const TagSet) {
|
@@ -3668,77 +3657,79 @@ static bool handle_in_caption(GumboParser* parser, GumboToken* token) {
|
|
3668
3657
|
) {
|
3669
3658
|
parser_add_parse_error(parser, token);
|
3670
3659
|
ignore_token(parser);
|
3671
|
-
return
|
3660
|
+
return;
|
3672
3661
|
}
|
3673
|
-
|
3662
|
+
handle_in_body(parser, token);
|
3674
3663
|
}
|
3675
3664
|
|
3676
3665
|
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-incolgroup
|
3677
|
-
static
|
3666
|
+
static void handle_in_column_group(GumboParser* parser, GumboToken* token) {
|
3678
3667
|
if (token->type == GUMBO_TOKEN_WHITESPACE) {
|
3679
3668
|
insert_text_token(parser, token);
|
3680
|
-
return
|
3669
|
+
return;
|
3681
3670
|
}
|
3682
3671
|
if (token->type == GUMBO_TOKEN_COMMENT) {
|
3683
3672
|
append_comment_node(parser, get_current_node(parser), token);
|
3684
|
-
return
|
3673
|
+
return;
|
3685
3674
|
}
|
3686
3675
|
if (token->type == GUMBO_TOKEN_DOCTYPE) {
|
3687
3676
|
parser_add_parse_error(parser, token);
|
3688
3677
|
ignore_token(parser);
|
3689
|
-
return
|
3678
|
+
return;
|
3690
3679
|
}
|
3691
3680
|
if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
|
3692
|
-
|
3681
|
+
handle_in_body(parser, token);
|
3682
|
+
return;
|
3693
3683
|
}
|
3694
3684
|
if (tag_is(token, kStartTag, GUMBO_TAG_COL)) {
|
3695
3685
|
insert_element_from_token(parser, token);
|
3696
3686
|
pop_current_node(parser);
|
3697
3687
|
acknowledge_self_closing_tag(parser);
|
3698
|
-
return
|
3688
|
+
return;
|
3699
3689
|
}
|
3700
3690
|
if (tag_is(token, kEndTag, GUMBO_TAG_COLGROUP)) {
|
3701
3691
|
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_COLGROUP)) {
|
3702
3692
|
parser_add_parse_error(parser, token);
|
3703
3693
|
ignore_token(parser);
|
3704
|
-
return
|
3694
|
+
return;
|
3705
3695
|
}
|
3706
3696
|
pop_current_node(parser);
|
3707
3697
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3708
|
-
return
|
3698
|
+
return;
|
3709
3699
|
}
|
3710
3700
|
if (tag_is(token, kEndTag, GUMBO_TAG_COL)) {
|
3711
3701
|
parser_add_parse_error(parser, token);
|
3712
3702
|
ignore_token(parser);
|
3713
|
-
return
|
3703
|
+
return;
|
3714
3704
|
}
|
3715
3705
|
if (
|
3716
3706
|
tag_is(token, kStartTag, GUMBO_TAG_TEMPLATE)
|
3717
3707
|
|| tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)
|
3718
3708
|
) {
|
3719
|
-
|
3709
|
+
handle_in_head(parser, token);
|
3710
|
+
return;
|
3720
3711
|
}
|
3721
3712
|
if (token->type == GUMBO_TOKEN_EOF) {
|
3722
|
-
|
3713
|
+
handle_in_body(parser, token);
|
3714
|
+
return;
|
3723
3715
|
}
|
3724
3716
|
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_COLGROUP)) {
|
3725
3717
|
parser_add_parse_error(parser, token);
|
3726
3718
|
ignore_token(parser);
|
3727
|
-
return
|
3719
|
+
return;
|
3728
3720
|
}
|
3729
3721
|
pop_current_node(parser);
|
3730
3722
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3731
3723
|
parser->_parser_state->_reprocess_current_token = true;
|
3732
|
-
return true;
|
3733
3724
|
}
|
3734
3725
|
|
3735
3726
|
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-intbody
|
3736
|
-
static
|
3727
|
+
static void handle_in_table_body(GumboParser* parser, GumboToken* token) {
|
3737
3728
|
if (tag_is(token, kStartTag, GUMBO_TAG_TR)) {
|
3738
3729
|
clear_stack_to_table_body_context(parser);
|
3739
3730
|
insert_element_from_token(parser, token);
|
3740
3731
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
|
3741
|
-
return
|
3732
|
+
return;
|
3742
3733
|
}
|
3743
3734
|
if (tag_in(token, kStartTag, &td_th_tags)) {
|
3744
3735
|
parser_add_parse_error(parser, token);
|
@@ -3746,7 +3737,7 @@ static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
|
|
3746
3737
|
insert_element_of_tag_type(parser, GUMBO_TAG_TR, GUMBO_INSERTION_IMPLIED);
|
3747
3738
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
|
3748
3739
|
parser->_parser_state->_reprocess_current_token = true;
|
3749
|
-
return
|
3740
|
+
return;
|
3750
3741
|
}
|
3751
3742
|
if (
|
3752
3743
|
tag_in(token, kEndTag, &(const TagSet){TAG(TBODY), TAG(TFOOT), TAG(THEAD)})
|
@@ -3754,12 +3745,12 @@ static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
|
|
3754
3745
|
if (!has_an_element_in_table_scope(parser, token->v.end_tag.tag)) {
|
3755
3746
|
parser_add_parse_error(parser, token);
|
3756
3747
|
ignore_token(parser);
|
3757
|
-
return
|
3748
|
+
return;
|
3758
3749
|
}
|
3759
3750
|
clear_stack_to_table_body_context(parser);
|
3760
3751
|
pop_current_node(parser);
|
3761
3752
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3762
|
-
return
|
3753
|
+
return;
|
3763
3754
|
}
|
3764
3755
|
if (
|
3765
3756
|
tag_in(token, kStartTag, &(const TagSet) {
|
@@ -3777,13 +3768,13 @@ static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
|
|
3777
3768
|
) {
|
3778
3769
|
parser_add_parse_error(parser, token);
|
3779
3770
|
ignore_token(parser);
|
3780
|
-
return
|
3771
|
+
return;
|
3781
3772
|
}
|
3782
3773
|
clear_stack_to_table_body_context(parser);
|
3783
3774
|
pop_current_node(parser);
|
3784
3775
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3785
3776
|
parser->_parser_state->_reprocess_current_token = true;
|
3786
|
-
return
|
3777
|
+
return;
|
3787
3778
|
}
|
3788
3779
|
if (
|
3789
3780
|
tag_in(token, kEndTag, &(const TagSet) {
|
@@ -3793,30 +3784,30 @@ static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
|
|
3793
3784
|
) {
|
3794
3785
|
parser_add_parse_error(parser, token);
|
3795
3786
|
ignore_token(parser);
|
3796
|
-
return
|
3787
|
+
return;
|
3797
3788
|
}
|
3798
|
-
|
3789
|
+
handle_in_table(parser, token);
|
3799
3790
|
}
|
3800
3791
|
|
3801
3792
|
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-intr
|
3802
|
-
static
|
3793
|
+
static void handle_in_row(GumboParser* parser, GumboToken* token) {
|
3803
3794
|
if (tag_in(token, kStartTag, &td_th_tags)) {
|
3804
3795
|
clear_stack_to_table_row_context(parser);
|
3805
3796
|
insert_element_from_token(parser, token);
|
3806
3797
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_CELL);
|
3807
3798
|
add_formatting_element(parser, &kActiveFormattingScopeMarker);
|
3808
|
-
return
|
3799
|
+
return;
|
3809
3800
|
}
|
3810
3801
|
if (tag_is(token, kEndTag, GUMBO_TAG_TR)) {
|
3811
3802
|
if (!has_an_element_in_table_scope(parser, GUMBO_TAG_TR)) {
|
3812
3803
|
parser_add_parse_error(parser, token);
|
3813
3804
|
ignore_token(parser);
|
3814
|
-
return
|
3805
|
+
return;
|
3815
3806
|
}
|
3816
3807
|
clear_stack_to_table_row_context(parser);
|
3817
3808
|
pop_current_node(parser);
|
3818
3809
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
|
3819
|
-
return
|
3810
|
+
return;
|
3820
3811
|
}
|
3821
3812
|
if (
|
3822
3813
|
tag_in(token, kStartTag, &(const TagSet) {
|
@@ -3828,13 +3819,13 @@ static bool handle_in_row(GumboParser* parser, GumboToken* token) {
|
|
3828
3819
|
if (!has_an_element_in_table_scope(parser, GUMBO_TAG_TR)) {
|
3829
3820
|
parser_add_parse_error(parser, token);
|
3830
3821
|
ignore_token(parser);
|
3831
|
-
return
|
3822
|
+
return;
|
3832
3823
|
}
|
3833
3824
|
clear_stack_to_table_row_context(parser);
|
3834
3825
|
pop_current_node(parser);
|
3835
3826
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
|
3836
3827
|
parser->_parser_state->_reprocess_current_token = true;
|
3837
|
-
return
|
3828
|
+
return;
|
3838
3829
|
}
|
3839
3830
|
if (
|
3840
3831
|
tag_in(token, kEndTag, &(const TagSet) {TAG(TBODY), TAG(TFOOT), TAG(THEAD)})
|
@@ -3842,17 +3833,17 @@ static bool handle_in_row(GumboParser* parser, GumboToken* token) {
|
|
3842
3833
|
if (!has_an_element_in_table_scope(parser, token->v.end_tag.tag)) {
|
3843
3834
|
parser_add_parse_error(parser, token);
|
3844
3835
|
ignore_token(parser);
|
3845
|
-
return
|
3836
|
+
return;
|
3846
3837
|
}
|
3847
3838
|
if (!has_an_element_in_table_scope(parser, GUMBO_TAG_TR)) {
|
3848
3839
|
ignore_token(parser);
|
3849
|
-
return
|
3840
|
+
return;
|
3850
3841
|
}
|
3851
3842
|
clear_stack_to_table_row_context(parser);
|
3852
3843
|
pop_current_node(parser);
|
3853
3844
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
|
3854
3845
|
parser->_parser_state->_reprocess_current_token = true;
|
3855
|
-
return
|
3846
|
+
return;
|
3856
3847
|
}
|
3857
3848
|
if (
|
3858
3849
|
tag_in(token, kEndTag, &(const TagSet) {
|
@@ -3862,21 +3853,22 @@ static bool handle_in_row(GumboParser* parser, GumboToken* token) {
|
|
3862
3853
|
) {
|
3863
3854
|
parser_add_parse_error(parser, token);
|
3864
3855
|
ignore_token(parser);
|
3865
|
-
return
|
3856
|
+
return;
|
3866
3857
|
}
|
3867
|
-
|
3858
|
+
handle_in_table(parser, token);
|
3868
3859
|
}
|
3869
3860
|
|
3870
3861
|
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-intd
|
3871
|
-
static
|
3862
|
+
static void handle_in_cell(GumboParser* parser, GumboToken* token) {
|
3872
3863
|
if (tag_in(token, kEndTag, &td_th_tags)) {
|
3873
3864
|
GumboTag token_tag = token->v.end_tag.tag;
|
3874
3865
|
if (!has_an_element_in_table_scope(parser, token_tag)) {
|
3875
3866
|
parser_add_parse_error(parser, token);
|
3876
3867
|
ignore_token(parser);
|
3877
|
-
return
|
3868
|
+
return;
|
3878
3869
|
}
|
3879
|
-
|
3870
|
+
close_table_cell(parser, token, token_tag);
|
3871
|
+
return;
|
3880
3872
|
}
|
3881
3873
|
if (
|
3882
3874
|
tag_in(token, kStartTag, &(const TagSet) {
|
@@ -3892,10 +3884,11 @@ static bool handle_in_cell(GumboParser* parser, GumboToken* token) {
|
|
3892
3884
|
gumbo_debug("Bailing out because there's no <td> or <th> in scope.\n");
|
3893
3885
|
parser_add_parse_error(parser, token);
|
3894
3886
|
ignore_token(parser);
|
3895
|
-
return
|
3887
|
+
return;
|
3896
3888
|
}
|
3897
3889
|
parser->_parser_state->_reprocess_current_token = true;
|
3898
|
-
|
3890
|
+
close_current_cell(parser, token);
|
3891
|
+
return;
|
3899
3892
|
}
|
3900
3893
|
if (
|
3901
3894
|
tag_in(token, kEndTag, &(const TagSet) {
|
@@ -3904,7 +3897,7 @@ static bool handle_in_cell(GumboParser* parser, GumboToken* token) {
|
|
3904
3897
|
) {
|
3905
3898
|
parser_add_parse_error(parser, token);
|
3906
3899
|
ignore_token(parser);
|
3907
|
-
return
|
3900
|
+
return;
|
3908
3901
|
}
|
3909
3902
|
if (
|
3910
3903
|
tag_in(token, kEndTag, &(const TagSet) {
|
@@ -3914,46 +3907,48 @@ static bool handle_in_cell(GumboParser* parser, GumboToken* token) {
|
|
3914
3907
|
if (!has_an_element_in_table_scope(parser, token->v.end_tag.tag)) {
|
3915
3908
|
parser_add_parse_error(parser, token);
|
3916
3909
|
ignore_token(parser);
|
3917
|
-
return
|
3910
|
+
return;
|
3918
3911
|
}
|
3919
3912
|
parser->_parser_state->_reprocess_current_token = true;
|
3920
|
-
|
3913
|
+
close_current_cell(parser, token);
|
3914
|
+
return;
|
3921
3915
|
}
|
3922
|
-
|
3916
|
+
handle_in_body(parser, token);
|
3923
3917
|
}
|
3924
3918
|
|
3925
3919
|
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inselect
|
3926
|
-
static
|
3920
|
+
static void handle_in_select(GumboParser* parser, GumboToken* token) {
|
3927
3921
|
if (token->type == GUMBO_TOKEN_NULL) {
|
3928
3922
|
parser_add_parse_error(parser, token);
|
3929
3923
|
ignore_token(parser);
|
3930
|
-
return
|
3924
|
+
return;
|
3931
3925
|
}
|
3932
3926
|
if (
|
3933
3927
|
token->type == GUMBO_TOKEN_CHARACTER
|
3934
3928
|
|| token->type == GUMBO_TOKEN_WHITESPACE
|
3935
3929
|
) {
|
3936
3930
|
insert_text_token(parser, token);
|
3937
|
-
return
|
3931
|
+
return;
|
3938
3932
|
}
|
3939
3933
|
if (token->type == GUMBO_TOKEN_COMMENT) {
|
3940
3934
|
append_comment_node(parser, get_current_node(parser), token);
|
3941
|
-
return
|
3935
|
+
return;
|
3942
3936
|
}
|
3943
3937
|
if (token->type == GUMBO_TOKEN_DOCTYPE) {
|
3944
3938
|
parser_add_parse_error(parser, token);
|
3945
3939
|
ignore_token(parser);
|
3946
|
-
return
|
3940
|
+
return;
|
3947
3941
|
}
|
3948
3942
|
if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
|
3949
|
-
|
3943
|
+
handle_in_body(parser, token);
|
3944
|
+
return;
|
3950
3945
|
}
|
3951
3946
|
if (tag_is(token, kStartTag, GUMBO_TAG_OPTION)) {
|
3952
3947
|
if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)) {
|
3953
3948
|
pop_current_node(parser);
|
3954
3949
|
}
|
3955
3950
|
insert_element_from_token(parser, token);
|
3956
|
-
return
|
3951
|
+
return;
|
3957
3952
|
}
|
3958
3953
|
if (tag_is(token, kStartTag, GUMBO_TAG_OPTGROUP)) {
|
3959
3954
|
if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)) {
|
@@ -3963,7 +3958,7 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
|
|
3963
3958
|
pop_current_node(parser);
|
3964
3959
|
}
|
3965
3960
|
insert_element_from_token(parser, token);
|
3966
|
-
return
|
3961
|
+
return;
|
3967
3962
|
}
|
3968
3963
|
if (tag_is(token, kEndTag, GUMBO_TAG_OPTGROUP)) {
|
3969
3964
|
GumboVector* open_elements = &parser->_parser_state->_open_elements;
|
@@ -3978,29 +3973,29 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
|
|
3978
3973
|
}
|
3979
3974
|
if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTGROUP)) {
|
3980
3975
|
pop_current_node(parser);
|
3981
|
-
return
|
3976
|
+
return;
|
3982
3977
|
}
|
3983
3978
|
parser_add_parse_error(parser, token);
|
3984
3979
|
ignore_token(parser);
|
3985
|
-
return
|
3980
|
+
return;
|
3986
3981
|
}
|
3987
3982
|
if (tag_is(token, kEndTag, GUMBO_TAG_OPTION)) {
|
3988
3983
|
if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)) {
|
3989
3984
|
pop_current_node(parser);
|
3990
|
-
return
|
3985
|
+
return;
|
3991
3986
|
}
|
3992
3987
|
parser_add_parse_error(parser, token);
|
3993
3988
|
ignore_token(parser);
|
3994
|
-
return
|
3989
|
+
return;
|
3995
3990
|
}
|
3996
3991
|
if (tag_is(token, kEndTag, GUMBO_TAG_SELECT)) {
|
3997
3992
|
if (!has_an_element_in_select_scope(parser, GUMBO_TAG_SELECT)) {
|
3998
3993
|
parser_add_parse_error(parser, token);
|
3999
3994
|
ignore_token(parser);
|
4000
|
-
return
|
3995
|
+
return;
|
4001
3996
|
}
|
4002
3997
|
close_current_select(parser);
|
4003
|
-
return
|
3998
|
+
return;
|
4004
3999
|
}
|
4005
4000
|
if (tag_is(token, kStartTag, GUMBO_TAG_SELECT)) {
|
4006
4001
|
parser_add_parse_error(parser, token);
|
@@ -4008,7 +4003,7 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
|
|
4008
4003
|
if (has_an_element_in_select_scope(parser, GUMBO_TAG_SELECT)) {
|
4009
4004
|
close_current_select(parser);
|
4010
4005
|
}
|
4011
|
-
return
|
4006
|
+
return;
|
4012
4007
|
}
|
4013
4008
|
if (
|
4014
4009
|
tag_in(token, kStartTag, &(const TagSet) {TAG(INPUT), TAG(KEYGEN), TAG(TEXTAREA)})
|
@@ -4020,23 +4015,25 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
|
|
4020
4015
|
close_current_select(parser);
|
4021
4016
|
parser->_parser_state->_reprocess_current_token = true;
|
4022
4017
|
}
|
4023
|
-
return
|
4018
|
+
return;
|
4024
4019
|
}
|
4025
4020
|
if (
|
4026
4021
|
tag_in(token, kStartTag, &(const TagSet){TAG(SCRIPT), TAG(TEMPLATE)})
|
4027
4022
|
|| tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)
|
4028
4023
|
) {
|
4029
|
-
|
4024
|
+
handle_in_head(parser, token);
|
4025
|
+
return;
|
4026
|
+
}
|
4027
|
+
if (token->type == GUMBO_TOKEN_EOF) {
|
4028
|
+
handle_in_body(parser, token);
|
4029
|
+
return;
|
4030
4030
|
}
|
4031
|
-
if (token->type == GUMBO_TOKEN_EOF)
|
4032
|
-
return handle_in_body(parser, token);
|
4033
4031
|
parser_add_parse_error(parser, token);
|
4034
4032
|
ignore_token(parser);
|
4035
|
-
return false;
|
4036
4033
|
}
|
4037
4034
|
|
4038
4035
|
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inselectintable
|
4039
|
-
static
|
4036
|
+
static void handle_in_select_in_table(GumboParser* parser, GumboToken* token) {
|
4040
4037
|
static const TagSet tags = {
|
4041
4038
|
TAG(CAPTION), TAG(TABLE), TAG(TBODY), TAG(TFOOT), TAG(THEAD),
|
4042
4039
|
TAG(TR), TAG(TD), TAG(TH)
|
@@ -4045,23 +4042,23 @@ static bool handle_in_select_in_table(GumboParser* parser, GumboToken* token) {
|
|
4045
4042
|
parser_add_parse_error(parser, token);
|
4046
4043
|
close_current_select(parser);
|
4047
4044
|
parser->_parser_state->_reprocess_current_token = true;
|
4048
|
-
return
|
4045
|
+
return;
|
4049
4046
|
}
|
4050
4047
|
if (tag_in(token, kEndTag, &tags)) {
|
4051
4048
|
parser_add_parse_error(parser, token);
|
4052
4049
|
if (!has_an_element_in_table_scope(parser, token->v.end_tag.tag)) {
|
4053
4050
|
ignore_token(parser);
|
4054
|
-
return
|
4051
|
+
return;
|
4055
4052
|
}
|
4056
4053
|
close_current_select(parser);
|
4057
4054
|
parser->_parser_state->_reprocess_current_token = true;
|
4058
|
-
return
|
4055
|
+
return;
|
4059
4056
|
}
|
4060
|
-
|
4057
|
+
handle_in_select(parser, token);
|
4061
4058
|
}
|
4062
4059
|
|
4063
4060
|
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-intemplate
|
4064
|
-
static
|
4061
|
+
static void handle_in_template(GumboParser* parser, GumboToken* token) {
|
4065
4062
|
GumboParserState* state = parser->_parser_state;
|
4066
4063
|
switch (token->type) {
|
4067
4064
|
case GUMBO_TOKEN_WHITESPACE:
|
@@ -4069,7 +4066,8 @@ static bool handle_in_template(GumboParser* parser, GumboToken* token) {
|
|
4069
4066
|
case GUMBO_TOKEN_COMMENT:
|
4070
4067
|
case GUMBO_TOKEN_NULL:
|
4071
4068
|
case GUMBO_TOKEN_DOCTYPE:
|
4072
|
-
|
4069
|
+
handle_in_body(parser, token);
|
4070
|
+
return;
|
4073
4071
|
default:
|
4074
4072
|
break;
|
4075
4073
|
}
|
@@ -4080,7 +4078,8 @@ static bool handle_in_template(GumboParser* parser, GumboToken* token) {
|
|
4080
4078
|
})
|
4081
4079
|
|| tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)
|
4082
4080
|
) {
|
4083
|
-
|
4081
|
+
handle_in_head(parser, token);
|
4082
|
+
return;
|
4084
4083
|
}
|
4085
4084
|
if (
|
4086
4085
|
tag_in(token, kStartTag, &(const TagSet) {
|
@@ -4091,45 +4090,45 @@ static bool handle_in_template(GumboParser* parser, GumboToken* token) {
|
|
4091
4090
|
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
4092
4091
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
4093
4092
|
state->_reprocess_current_token = true;
|
4094
|
-
return
|
4093
|
+
return;
|
4095
4094
|
}
|
4096
4095
|
if (tag_is(token, kStartTag, GUMBO_TAG_COL)) {
|
4097
4096
|
pop_template_insertion_mode(parser);
|
4098
4097
|
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_COLUMN_GROUP);
|
4099
4098
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_COLUMN_GROUP);
|
4100
4099
|
state->_reprocess_current_token = true;
|
4101
|
-
return
|
4100
|
+
return;
|
4102
4101
|
}
|
4103
4102
|
if (tag_is(token, kStartTag, GUMBO_TAG_TR)) {
|
4104
4103
|
pop_template_insertion_mode(parser);
|
4105
4104
|
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
|
4106
4105
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
|
4107
4106
|
state->_reprocess_current_token = true;
|
4108
|
-
return
|
4107
|
+
return;
|
4109
4108
|
}
|
4110
4109
|
if (tag_in(token, kStartTag, &td_th_tags)) {
|
4111
4110
|
pop_template_insertion_mode(parser);
|
4112
4111
|
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
|
4113
4112
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
|
4114
4113
|
state->_reprocess_current_token = true;
|
4115
|
-
return
|
4114
|
+
return;
|
4116
4115
|
}
|
4117
4116
|
if (token->type == GUMBO_TOKEN_START_TAG) {
|
4118
4117
|
pop_template_insertion_mode(parser);
|
4119
4118
|
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_BODY);
|
4120
4119
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_BODY);
|
4121
4120
|
state->_reprocess_current_token = true;
|
4122
|
-
return
|
4121
|
+
return;
|
4123
4122
|
}
|
4124
4123
|
if (token->type == GUMBO_TOKEN_END_TAG) {
|
4125
4124
|
parser_add_parse_error(parser, token);
|
4126
4125
|
ignore_token(parser);
|
4127
|
-
return
|
4126
|
+
return;
|
4128
4127
|
}
|
4129
4128
|
if (token->type == GUMBO_TOKEN_EOF) {
|
4130
4129
|
if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
4131
4130
|
// Stop parsing.
|
4132
|
-
return
|
4131
|
+
return;
|
4133
4132
|
}
|
4134
4133
|
parser_add_parse_error(parser, token);
|
4135
4134
|
while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_TEMPLATE))
|
@@ -4138,40 +4137,41 @@ static bool handle_in_template(GumboParser* parser, GumboToken* token) {
|
|
4138
4137
|
pop_template_insertion_mode(parser);
|
4139
4138
|
reset_insertion_mode_appropriately(parser);
|
4140
4139
|
state->_reprocess_current_token = true;
|
4141
|
-
return
|
4140
|
+
return;
|
4142
4141
|
}
|
4143
4142
|
assert(0 && "unreachable");
|
4144
|
-
return false;
|
4145
4143
|
}
|
4146
4144
|
|
4147
4145
|
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-afterbody
|
4148
|
-
static
|
4146
|
+
static void handle_after_body(GumboParser* parser, GumboToken* token) {
|
4149
4147
|
if (
|
4150
4148
|
token->type == GUMBO_TOKEN_WHITESPACE
|
4151
4149
|
|| tag_is(token, kStartTag, GUMBO_TAG_HTML)
|
4152
4150
|
) {
|
4153
|
-
|
4151
|
+
handle_in_body(parser, token);
|
4152
|
+
return;
|
4154
4153
|
}
|
4155
4154
|
if (token->type == GUMBO_TOKEN_COMMENT) {
|
4156
4155
|
GumboNode* html_node = parser->_output->root;
|
4157
4156
|
assert(html_node != NULL);
|
4158
4157
|
append_comment_node(parser, html_node, token);
|
4159
|
-
return
|
4158
|
+
return;
|
4160
4159
|
}
|
4161
4160
|
if (token->type == GUMBO_TOKEN_DOCTYPE) {
|
4162
4161
|
parser_add_parse_error(parser, token);
|
4163
4162
|
ignore_token(parser);
|
4164
|
-
return
|
4163
|
+
return;
|
4165
4164
|
}
|
4166
4165
|
if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
|
4167
|
-
|
4166
|
+
handle_in_body(parser, token);
|
4167
|
+
return;
|
4168
4168
|
}
|
4169
4169
|
if (tag_is(token, kEndTag, GUMBO_TAG_HTML)) {
|
4170
4170
|
/* fragment case: ignore the closing HTML token */
|
4171
4171
|
if (is_fragment_parser(parser)) {
|
4172
4172
|
parser_add_parse_error(parser, token);
|
4173
4173
|
ignore_token(parser);
|
4174
|
-
return
|
4174
|
+
return;
|
4175
4175
|
}
|
4176
4176
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_AFTER_BODY);
|
4177
4177
|
GumboNode* html = parser->_parser_state->_open_elements.data[0];
|
@@ -4180,44 +4180,44 @@ static bool handle_after_body(GumboParser* parser, GumboToken* token) {
|
|
4180
4180
|
parser->_parser_state->_current_token,
|
4181
4181
|
&html->v.element
|
4182
4182
|
);
|
4183
|
-
return
|
4183
|
+
return;
|
4184
4184
|
}
|
4185
4185
|
if (token->type == GUMBO_TOKEN_EOF) {
|
4186
|
-
return
|
4186
|
+
return;
|
4187
4187
|
}
|
4188
4188
|
parser_add_parse_error(parser, token);
|
4189
4189
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_BODY);
|
4190
4190
|
parser->_parser_state->_reprocess_current_token = true;
|
4191
|
-
return false;
|
4192
4191
|
}
|
4193
4192
|
|
4194
4193
|
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inframeset
|
4195
|
-
static
|
4194
|
+
static void handle_in_frameset(GumboParser* parser, GumboToken* token) {
|
4196
4195
|
if (token->type == GUMBO_TOKEN_WHITESPACE) {
|
4197
4196
|
insert_text_token(parser, token);
|
4198
|
-
return
|
4197
|
+
return;
|
4199
4198
|
}
|
4200
4199
|
if (token->type == GUMBO_TOKEN_COMMENT) {
|
4201
4200
|
append_comment_node(parser, get_current_node(parser), token);
|
4202
|
-
return
|
4201
|
+
return;
|
4203
4202
|
}
|
4204
4203
|
if (token->type == GUMBO_TOKEN_DOCTYPE) {
|
4205
4204
|
parser_add_parse_error(parser, token);
|
4206
4205
|
ignore_token(parser);
|
4207
|
-
return
|
4206
|
+
return;
|
4208
4207
|
}
|
4209
4208
|
if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
|
4210
|
-
|
4209
|
+
handle_in_body(parser, token);
|
4210
|
+
return;
|
4211
4211
|
}
|
4212
4212
|
if (tag_is(token, kStartTag, GUMBO_TAG_FRAMESET)) {
|
4213
4213
|
insert_element_from_token(parser, token);
|
4214
|
-
return
|
4214
|
+
return;
|
4215
4215
|
}
|
4216
4216
|
if (tag_is(token, kEndTag, GUMBO_TAG_FRAMESET)) {
|
4217
4217
|
if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_HTML)) {
|
4218
4218
|
parser_add_parse_error(parser, token);
|
4219
4219
|
ignore_token(parser);
|
4220
|
-
return
|
4220
|
+
return;
|
4221
4221
|
}
|
4222
4222
|
pop_current_node(parser);
|
4223
4223
|
if (
|
@@ -4226,46 +4226,45 @@ static bool handle_in_frameset(GumboParser* parser, GumboToken* token) {
|
|
4226
4226
|
) {
|
4227
4227
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_FRAMESET);
|
4228
4228
|
}
|
4229
|
-
return
|
4229
|
+
return;
|
4230
4230
|
}
|
4231
4231
|
if (tag_is(token, kStartTag, GUMBO_TAG_FRAME)) {
|
4232
4232
|
insert_element_from_token(parser, token);
|
4233
4233
|
pop_current_node(parser);
|
4234
4234
|
acknowledge_self_closing_tag(parser);
|
4235
|
-
return
|
4235
|
+
return;
|
4236
4236
|
}
|
4237
4237
|
if (tag_is(token, kStartTag, GUMBO_TAG_NOFRAMES)) {
|
4238
|
-
|
4238
|
+
handle_in_head(parser, token);
|
4239
|
+
return;
|
4239
4240
|
}
|
4240
4241
|
if (token->type == GUMBO_TOKEN_EOF) {
|
4241
|
-
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_HTML))
|
4242
|
+
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_HTML))
|
4242
4243
|
parser_add_parse_error(parser, token);
|
4243
|
-
|
4244
|
-
}
|
4245
|
-
return true;
|
4244
|
+
return;
|
4246
4245
|
}
|
4247
4246
|
parser_add_parse_error(parser, token);
|
4248
4247
|
ignore_token(parser);
|
4249
|
-
return false;
|
4250
4248
|
}
|
4251
4249
|
|
4252
4250
|
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-afterframeset
|
4253
|
-
static
|
4251
|
+
static void handle_after_frameset(GumboParser* parser, GumboToken* token) {
|
4254
4252
|
if (token->type == GUMBO_TOKEN_WHITESPACE) {
|
4255
4253
|
insert_text_token(parser, token);
|
4256
|
-
return
|
4254
|
+
return;
|
4257
4255
|
}
|
4258
4256
|
if (token->type == GUMBO_TOKEN_COMMENT) {
|
4259
4257
|
append_comment_node(parser, get_current_node(parser), token);
|
4260
|
-
return
|
4258
|
+
return;
|
4261
4259
|
}
|
4262
4260
|
if (token->type == GUMBO_TOKEN_DOCTYPE) {
|
4263
4261
|
parser_add_parse_error(parser, token);
|
4264
4262
|
ignore_token(parser);
|
4265
|
-
return
|
4263
|
+
return;
|
4266
4264
|
}
|
4267
4265
|
if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
|
4268
|
-
|
4266
|
+
handle_in_body(parser, token);
|
4267
|
+
return;
|
4269
4268
|
}
|
4270
4269
|
if (tag_is(token, kEndTag, GUMBO_TAG_HTML)) {
|
4271
4270
|
GumboNode* html = parser->_parser_state->_open_elements.data[0];
|
@@ -4275,71 +4274,71 @@ static bool handle_after_frameset(GumboParser* parser, GumboToken* token) {
|
|
4275
4274
|
&html->v.element
|
4276
4275
|
);
|
4277
4276
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_AFTER_FRAMESET);
|
4278
|
-
return
|
4277
|
+
return;
|
4279
4278
|
}
|
4280
4279
|
if (tag_is(token, kStartTag, GUMBO_TAG_NOFRAMES)) {
|
4281
4280
|
return handle_in_head(parser, token);
|
4282
4281
|
}
|
4283
4282
|
if (token->type == GUMBO_TOKEN_EOF) {
|
4284
|
-
return
|
4283
|
+
return;
|
4285
4284
|
}
|
4286
4285
|
parser_add_parse_error(parser, token);
|
4287
4286
|
ignore_token(parser);
|
4288
|
-
return false;
|
4289
4287
|
}
|
4290
4288
|
|
4291
4289
|
// https://html.spec.whatwg.org/multipage/parsing.html#the-after-after-body-insertion-mode
|
4292
|
-
static
|
4290
|
+
static void handle_after_after_body(GumboParser* parser, GumboToken* token) {
|
4293
4291
|
if (token->type == GUMBO_TOKEN_COMMENT) {
|
4294
4292
|
append_comment_node(parser, get_document_node(parser), token);
|
4295
|
-
return
|
4293
|
+
return;
|
4296
4294
|
}
|
4297
4295
|
if (
|
4298
4296
|
token->type == GUMBO_TOKEN_DOCTYPE
|
4299
4297
|
|| token->type == GUMBO_TOKEN_WHITESPACE
|
4300
4298
|
|| tag_is(token, kStartTag, GUMBO_TAG_HTML)
|
4301
4299
|
) {
|
4302
|
-
|
4300
|
+
handle_in_body(parser, token);
|
4301
|
+
return;
|
4303
4302
|
}
|
4304
4303
|
if (token->type == GUMBO_TOKEN_EOF) {
|
4305
|
-
return
|
4304
|
+
return;
|
4306
4305
|
}
|
4307
4306
|
parser_add_parse_error(parser, token);
|
4308
4307
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_BODY);
|
4309
4308
|
parser->_parser_state->_reprocess_current_token = true;
|
4310
|
-
return false;
|
4311
4309
|
}
|
4312
4310
|
|
4313
4311
|
// https://html.spec.whatwg.org/multipage/parsing.html#the-after-after-frameset-insertion-mode
|
4314
|
-
static
|
4312
|
+
static void handle_after_after_frameset (
|
4315
4313
|
GumboParser* parser,
|
4316
4314
|
GumboToken* token
|
4317
4315
|
) {
|
4318
4316
|
if (token->type == GUMBO_TOKEN_COMMENT) {
|
4319
4317
|
append_comment_node(parser, get_document_node(parser), token);
|
4320
|
-
return
|
4318
|
+
return;
|
4321
4319
|
}
|
4322
4320
|
if (
|
4323
4321
|
token->type == GUMBO_TOKEN_DOCTYPE
|
4324
4322
|
|| token->type == GUMBO_TOKEN_WHITESPACE
|
4325
4323
|
|| tag_is(token, kStartTag, GUMBO_TAG_HTML)
|
4326
4324
|
) {
|
4327
|
-
|
4325
|
+
handle_in_body(parser, token);
|
4326
|
+
return;
|
4328
4327
|
}
|
4329
4328
|
if (token->type == GUMBO_TOKEN_EOF) {
|
4330
|
-
return
|
4329
|
+
return;
|
4331
4330
|
}
|
4332
4331
|
if (tag_is(token, kStartTag, GUMBO_TAG_NOFRAMES)) {
|
4333
|
-
|
4332
|
+
handle_in_head(parser, token);
|
4333
|
+
return;
|
4334
4334
|
}
|
4335
4335
|
parser_add_parse_error(parser, token);
|
4336
4336
|
ignore_token(parser);
|
4337
|
-
return false;
|
4338
4337
|
}
|
4339
4338
|
|
4340
4339
|
// Function pointers for each insertion mode.
|
4341
4340
|
// Keep in sync with insertion_mode.h.
|
4342
|
-
typedef
|
4341
|
+
typedef void (*TokenHandler)(GumboParser* parser, GumboToken* token);
|
4343
4342
|
static const TokenHandler kTokenHandlers[] = {
|
4344
4343
|
handle_initial,
|
4345
4344
|
handle_before_html,
|
@@ -4366,36 +4365,36 @@ static const TokenHandler kTokenHandlers[] = {
|
|
4366
4365
|
handle_after_after_frameset
|
4367
4366
|
};
|
4368
4367
|
|
4369
|
-
static
|
4368
|
+
static void handle_html_content(GumboParser* parser, GumboToken* token) {
|
4370
4369
|
const GumboInsertionMode mode = parser->_parser_state->_insertion_mode;
|
4371
4370
|
const TokenHandler handler = kTokenHandlers[mode];
|
4372
|
-
|
4371
|
+
handler(parser, token);
|
4373
4372
|
}
|
4374
4373
|
|
4375
4374
|
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inforeign
|
4376
|
-
static
|
4375
|
+
static void handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
|
4377
4376
|
gumbo_debug("Handling foreign content");
|
4378
4377
|
switch (token->type) {
|
4379
4378
|
case GUMBO_TOKEN_NULL:
|
4380
4379
|
parser_add_parse_error(parser, token);
|
4381
4380
|
token->v.character = kUtf8ReplacementChar;
|
4382
4381
|
insert_text_token(parser, token);
|
4383
|
-
return
|
4382
|
+
return;
|
4384
4383
|
case GUMBO_TOKEN_WHITESPACE:
|
4385
4384
|
insert_text_token(parser, token);
|
4386
|
-
return
|
4385
|
+
return;
|
4387
4386
|
case GUMBO_TOKEN_CDATA:
|
4388
4387
|
case GUMBO_TOKEN_CHARACTER:
|
4389
4388
|
insert_text_token(parser, token);
|
4390
4389
|
set_frameset_not_ok(parser);
|
4391
|
-
return
|
4390
|
+
return;
|
4392
4391
|
case GUMBO_TOKEN_COMMENT:
|
4393
4392
|
append_comment_node(parser, get_current_node(parser), token);
|
4394
|
-
return
|
4393
|
+
return;
|
4395
4394
|
case GUMBO_TOKEN_DOCTYPE:
|
4396
4395
|
parser_add_parse_error(parser, token);
|
4397
4396
|
ignore_token(parser);
|
4398
|
-
return
|
4397
|
+
return;
|
4399
4398
|
default:
|
4400
4399
|
// Fall through to the if-statements below.
|
4401
4400
|
break;
|
@@ -4439,7 +4438,7 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
|
|
4439
4438
|
)
|
4440
4439
|
);
|
4441
4440
|
parser->_parser_state->_reprocess_current_token = true;
|
4442
|
-
return
|
4441
|
+
return;
|
4443
4442
|
}
|
4444
4443
|
// This is a start tag so the next if's then branch will be taken.
|
4445
4444
|
}
|
@@ -4460,7 +4459,7 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
|
|
4460
4459
|
pop_current_node(parser);
|
4461
4460
|
acknowledge_self_closing_tag(parser);
|
4462
4461
|
}
|
4463
|
-
return
|
4462
|
+
return;
|
4464
4463
|
// </script> tags are handled like any other end tag, putting the script's
|
4465
4464
|
// text into a text node child and closing the current node.
|
4466
4465
|
}
|
@@ -4470,11 +4469,8 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
|
|
4470
4469
|
const char* name = token->v.end_tag.name;
|
4471
4470
|
assert(node != NULL);
|
4472
4471
|
|
4473
|
-
|
4474
|
-
if (!node_tagname_is(node, tag, name)) {
|
4472
|
+
if (!node_tagname_is(node, tag, name))
|
4475
4473
|
parser_add_parse_error(parser, token);
|
4476
|
-
is_success = false;
|
4477
|
-
}
|
4478
4474
|
int i = parser->_parser_state->_open_elements.length;
|
4479
4475
|
for (--i; i > 0;) {
|
4480
4476
|
// Here we move up the stack until we find an HTML element (in which
|
@@ -4489,7 +4485,7 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
|
|
4489
4485
|
// be an element on the stack of open elements (set below), so
|
4490
4486
|
// this loop is guaranteed to terminate.
|
4491
4487
|
}
|
4492
|
-
return
|
4488
|
+
return;
|
4493
4489
|
}
|
4494
4490
|
--i;
|
4495
4491
|
node = parser->_parser_state->_open_elements.data[i];
|
@@ -4500,22 +4496,22 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
|
|
4500
4496
|
}
|
4501
4497
|
assert(node->v.element.tag_namespace == GUMBO_NAMESPACE_HTML);
|
4502
4498
|
if (i == 0)
|
4503
|
-
return
|
4499
|
+
return;
|
4504
4500
|
// We can't call handle_token directly because the current node is still in
|
4505
4501
|
// a foriegn namespace, so it would re-enter this and result in infinite
|
4506
4502
|
// recursion.
|
4507
|
-
|
4503
|
+
handle_html_content(parser, token);
|
4508
4504
|
}
|
4509
4505
|
|
4510
4506
|
// https://html.spec.whatwg.org/multipage/parsing.html#tree-construction
|
4511
|
-
static
|
4507
|
+
static void handle_token(GumboParser* parser, GumboToken* token) {
|
4512
4508
|
if (
|
4513
4509
|
parser->_parser_state->_ignore_next_linefeed
|
4514
4510
|
&& token->type == GUMBO_TOKEN_WHITESPACE && token->v.character == '\n'
|
4515
4511
|
) {
|
4516
4512
|
parser->_parser_state->_ignore_next_linefeed = false;
|
4517
4513
|
ignore_token(parser);
|
4518
|
-
return
|
4514
|
+
return;
|
4519
4515
|
}
|
4520
4516
|
// This needs to be reset both here and in the conditional above to catch both
|
4521
4517
|
// the case where the next token is not whitespace (so we don't ignore
|
@@ -4557,9 +4553,9 @@ static bool handle_token(GumboParser* parser, GumboToken* token) {
|
|
4557
4553
|
token->type == GUMBO_TOKEN_NULL ||
|
4558
4554
|
token->type == GUMBO_TOKEN_WHITESPACE)) ||
|
4559
4555
|
token->type == GUMBO_TOKEN_EOF) {
|
4560
|
-
|
4556
|
+
handle_html_content(parser, token);
|
4561
4557
|
} else {
|
4562
|
-
|
4558
|
+
handle_in_foreign_content(parser, token);
|
4563
4559
|
}
|
4564
4560
|
}
|
4565
4561
|
|
@@ -4746,7 +4742,6 @@ GumboOutput* gumbo_parse_with_options (
|
|
4746
4742
|
|
4747
4743
|
const unsigned int max_tree_depth = options->max_tree_depth;
|
4748
4744
|
GumboToken token;
|
4749
|
-
bool has_error = false;
|
4750
4745
|
|
4751
4746
|
do {
|
4752
4747
|
if (state->_reprocess_current_token) {
|
@@ -4758,7 +4753,7 @@ GumboOutput* gumbo_parse_with_options (
|
|
4758
4753
|
adjusted_current_node &&
|
4759
4754
|
adjusted_current_node->v.element.tag_namespace != GUMBO_NAMESPACE_HTML
|
4760
4755
|
);
|
4761
|
-
|
4756
|
+
gumbo_lex(&parser, &token);
|
4762
4757
|
}
|
4763
4758
|
|
4764
4759
|
const char* token_type = "text";
|
@@ -4792,7 +4787,7 @@ GumboOutput* gumbo_parse_with_options (
|
|
4792
4787
|
state->_current_token = &token;
|
4793
4788
|
state->_self_closing_flag_acknowledged = false;
|
4794
4789
|
|
4795
|
-
|
4790
|
+
handle_token(&parser, &token);
|
4796
4791
|
|
4797
4792
|
// Check for memory leaks when ownership is transferred from start tag
|
4798
4793
|
// tokens to nodes.
|
@@ -4809,7 +4804,6 @@ GumboOutput* gumbo_parse_with_options (
|
|
4809
4804
|
if (token.type == GUMBO_TOKEN_START_TAG &&
|
4810
4805
|
token.v.start_tag.is_self_closing &&
|
4811
4806
|
!state->_self_closing_flag_acknowledged) {
|
4812
|
-
has_error = true;
|
4813
4807
|
GumboError* error = gumbo_add_error(&parser);
|
4814
4808
|
if (error) {
|
4815
4809
|
// This is essentially a tokenizer error that's only caught during
|
@@ -4837,7 +4831,7 @@ GumboOutput* gumbo_parse_with_options (
|
|
4837
4831
|
|
4838
4832
|
} while (
|
4839
4833
|
(token.type != GUMBO_TOKEN_EOF || state->_reprocess_current_token)
|
4840
|
-
&& !(options->stop_on_first_error &&
|
4834
|
+
&& !(options->stop_on_first_error && parser._output->document_error)
|
4841
4835
|
);
|
4842
4836
|
|
4843
4837
|
finish_parsing(&parser);
|
@@ -4865,6 +4859,8 @@ const char* gumbo_status_to_string(GumboOutputStatus status) {
|
|
4865
4859
|
return "OK";
|
4866
4860
|
case GUMBO_STATUS_OUT_OF_MEMORY:
|
4867
4861
|
return "System allocator returned NULL during parsing";
|
4862
|
+
case GUMBO_STATUS_TOO_MANY_ATTRIBUTES:
|
4863
|
+
return "Attributes per element limit exceeded";
|
4868
4864
|
case GUMBO_STATUS_TREE_TOO_DEEP:
|
4869
4865
|
return "Document tree depth limit exceeded";
|
4870
4866
|
default:
|