nokogumbo 2.0.1 → 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/nokogumbo/extconf.rb +1 -1
- data/ext/nokogumbo/nokogumbo.c +1 -0
- data/gumbo-parser/src/error.c +17 -8
- data/gumbo-parser/src/gumbo.h +8 -0
- data/gumbo-parser/src/parser.c +473 -480
- data/gumbo-parser/src/tokenizer.c +12 -25
- data/gumbo-parser/src/tokenizer.h +2 -13
- data/lib/nokogumbo/version.rb +1 -1
- metadata +3 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dbc23d3b7a9665b48d0516f523756407ca46733286e89edaf2b5b01b05820ffd
|
4
|
+
data.tar.gz: 9d777f65fe5170fe66fde53dd9fa0d2904e2ca2d73d25d6fd240fc383cb20804
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 602ecadc3afd998eb380a9d4fed204cdd7fd48e5ee8539e4ae5713289fa65a923d27c4b67835546c048522f1a04a165d87f257f1389c591e41e127be06e98109
|
7
|
+
data.tar.gz: 88a714a552e5cc6f11b65da15bd9816d21bfc383dd5c68e1b6e8ab82ce842adf3159ad56579428b87b7c1dba002b9e38758494865153285f2563aeae9edeba54
|
data/ext/nokogumbo/extconf.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'rubygems'
|
1
2
|
require 'fileutils'
|
2
3
|
require 'mkmf'
|
3
4
|
require 'nokogiri'
|
@@ -23,7 +24,6 @@ def download_headers
|
|
23
24
|
return nil if dep_index.nil?
|
24
25
|
requirement = NG_SPEC.dependencies[dep_index].requirement.to_s
|
25
26
|
|
26
|
-
require 'rubygems'
|
27
27
|
gem 'mini_portile2', requirement
|
28
28
|
require 'mini_portile2'
|
29
29
|
p = MiniPortile::new('libxml2', version).tap do |r|
|
data/ext/nokogumbo/nokogumbo.c
CHANGED
data/gumbo-parser/src/error.c
CHANGED
@@ -365,11 +365,14 @@ static void handle_parser_error (
|
|
365
365
|
// pointer to the beginning of the string if this is the first line.
|
366
366
|
static const char* find_prev_newline (
|
367
367
|
const char* source_text,
|
368
|
+
size_t source_length,
|
368
369
|
const char* error_location
|
369
370
|
) {
|
371
|
+
const char* source_end = source_text + source_length;
|
370
372
|
assert(error_location >= source_text);
|
373
|
+
assert(error_location <= source_end);
|
371
374
|
const char* c = error_location;
|
372
|
-
if (*c == '\n'
|
375
|
+
if (c != source_text && (error_location == source_end || *c == '\n'))
|
373
376
|
--c;
|
374
377
|
while (c != source_text && *c != '\n')
|
375
378
|
--c;
|
@@ -377,20 +380,25 @@ static const char* find_prev_newline (
|
|
377
380
|
}
|
378
381
|
|
379
382
|
// Finds the next newline in the original source buffer from a given byte
|
380
|
-
// location. Returns a character pointer to that newline, or a pointer to
|
381
|
-
//
|
383
|
+
// location. Returns a character pointer to that newline, or a pointer to
|
384
|
+
// source_text + source_length if this is the last line.
|
382
385
|
static const char* find_next_newline(
|
383
|
-
const char*
|
386
|
+
const char* source_text,
|
387
|
+
size_t source_length,
|
384
388
|
const char* error_location
|
385
389
|
) {
|
386
|
-
|
390
|
+
const char* source_end = source_text + source_length;
|
391
|
+
assert(error_location >= source_text);
|
392
|
+
assert(error_location <= source_end);
|
387
393
|
const char* c = error_location;
|
388
|
-
while (c !=
|
394
|
+
while (c != source_end && *c != '\n')
|
389
395
|
++c;
|
390
396
|
return c;
|
391
397
|
}
|
392
398
|
|
393
399
|
GumboError* gumbo_add_error(GumboParser* parser) {
|
400
|
+
parser->_output->document_error = true;
|
401
|
+
|
394
402
|
int max_errors = parser->_options->max_errors;
|
395
403
|
if (max_errors >= 0 && parser->_output->errors.length >= (unsigned int) max_errors) {
|
396
404
|
return NULL;
|
@@ -547,8 +555,9 @@ void caret_diagnostic_to_string (
|
|
547
555
|
) {
|
548
556
|
error_to_string(error, output);
|
549
557
|
|
550
|
-
const char*
|
551
|
-
const char*
|
558
|
+
const char* error_text = error->original_text.data;
|
559
|
+
const char* line_start = find_prev_newline(source_text, source_length, error_text);
|
560
|
+
const char* line_end = find_next_newline(source_text, source_length, error_text);
|
552
561
|
GumboStringPiece original_line;
|
553
562
|
original_line.data = line_start;
|
554
563
|
original_line.length = line_end - line_start;
|
data/gumbo-parser/src/gumbo.h
CHANGED
@@ -820,6 +820,14 @@ typedef struct GumboInternalOutput {
|
|
820
820
|
*/
|
821
821
|
GumboVector /* GumboError */ errors;
|
822
822
|
|
823
|
+
/**
|
824
|
+
* True if the parser encounted an error.
|
825
|
+
*
|
826
|
+
* This can be true and `errors` an empty `GumboVector` if the `max_errors`
|
827
|
+
* option was set to 0.
|
828
|
+
*/
|
829
|
+
bool document_error;
|
830
|
+
|
823
831
|
/**
|
824
832
|
* A status code indicating whether parsing finished successfully or was
|
825
833
|
* stopped mid-document due to exceptional circumstances.
|
data/gumbo-parser/src/parser.c
CHANGED
@@ -336,6 +336,7 @@ static void output_init(GumboParser* parser) {
|
|
336
336
|
GumboOutput* output = gumbo_alloc(sizeof(GumboOutput));
|
337
337
|
output->root = NULL;
|
338
338
|
output->document = new_document_node();
|
339
|
+
output->document_error = false;
|
339
340
|
output->status = GUMBO_STATUS_OK;
|
340
341
|
parser->_output = output;
|
341
342
|
gumbo_init_errors(parser);
|
@@ -608,6 +609,14 @@ static bool node_qualified_tagname_is (
|
|
608
609
|
return !gumbo_ascii_strcasecmp(element_name, name);
|
609
610
|
}
|
610
611
|
|
612
|
+
static bool node_html_tagname_is (
|
613
|
+
const GumboNode* node,
|
614
|
+
GumboTag tag,
|
615
|
+
const char *name
|
616
|
+
) {
|
617
|
+
return node_qualified_tagname_is(node, GUMBO_NAMESPACE_HTML, tag, name);
|
618
|
+
}
|
619
|
+
|
611
620
|
static bool node_tagname_is (
|
612
621
|
const GumboNode* node,
|
613
622
|
GumboTag tag,
|
@@ -633,7 +642,6 @@ static bool node_qualified_tag_is (
|
|
633
642
|
|
634
643
|
// Like node_tag_in, but for the single-tag case in the HTML namespace
|
635
644
|
static bool node_html_tag_is(const GumboNode* node, GumboTag tag) {
|
636
|
-
assert(tag != GUMBO_TAG_UNKNOWN);
|
637
645
|
return node_qualified_tag_is(node, GUMBO_NAMESPACE_HTML, tag);
|
638
646
|
}
|
639
647
|
|
@@ -1675,14 +1683,18 @@ static bool has_an_element_in_select_scope(const GumboParser* parser, GumboTag t
|
|
1675
1683
|
// https://html.spec.whatwg.org/multipage/parsing.html#generate-implied-end-tags
|
1676
1684
|
// "exception" is the "element to exclude from the process" listed in the spec.
|
1677
1685
|
// Pass GUMBO_TAG_LAST to not exclude any of them.
|
1678
|
-
static void generate_implied_end_tags(
|
1686
|
+
static void generate_implied_end_tags (
|
1687
|
+
GumboParser* parser,
|
1688
|
+
GumboTag exception,
|
1689
|
+
const char* exception_name
|
1690
|
+
) {
|
1679
1691
|
static const TagSet tags = {
|
1680
1692
|
TAG(DD), TAG(DT), TAG(LI), TAG(OPTGROUP), TAG(OPTION),
|
1681
1693
|
TAG(P), TAG(RB), TAG(RP), TAG(RT), TAG(RTC)
|
1682
1694
|
};
|
1683
1695
|
while (
|
1684
1696
|
node_tag_in_set(get_current_node(parser), &tags)
|
1685
|
-
&& !
|
1697
|
+
&& !node_html_tagname_is(get_current_node(parser), exception, exception_name)
|
1686
1698
|
) {
|
1687
1699
|
pop_current_node(parser);
|
1688
1700
|
}
|
@@ -1741,30 +1753,26 @@ static bool close_table(GumboParser* parser) {
|
|
1741
1753
|
|
1742
1754
|
// This factors out the clauses relating to "act as if an end tag token with tag
|
1743
1755
|
// name `cell_tag` had been seen".
|
1744
|
-
static
|
1756
|
+
static void close_table_cell (
|
1745
1757
|
GumboParser* parser,
|
1746
1758
|
const GumboToken* token,
|
1747
1759
|
GumboTag cell_tag
|
1748
1760
|
) {
|
1749
|
-
|
1750
|
-
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
1761
|
+
generate_implied_end_tags(parser, GUMBO_TAG_LAST, NULL);
|
1751
1762
|
const GumboNode* node = get_current_node(parser);
|
1752
|
-
if (!node_html_tag_is(node, cell_tag))
|
1763
|
+
if (!node_html_tag_is(node, cell_tag))
|
1753
1764
|
parser_add_parse_error(parser, token);
|
1754
|
-
result = false;
|
1755
|
-
}
|
1756
1765
|
do {
|
1757
1766
|
node = pop_current_node(parser);
|
1758
1767
|
} while (!node_html_tag_is(node, cell_tag));
|
1759
1768
|
|
1760
1769
|
clear_active_formatting_elements(parser);
|
1761
1770
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
|
1762
|
-
return result;
|
1763
1771
|
}
|
1764
1772
|
|
1765
1773
|
// https://html.spec.whatwg.org/multipage/parsing.html#close-the-cell
|
1766
1774
|
// This holds the logic to determine whether we should close a <td> or a <th>.
|
1767
|
-
static
|
1775
|
+
static void close_current_cell(GumboParser* parser, const GumboToken* token) {
|
1768
1776
|
GumboTag cell_tag;
|
1769
1777
|
if (has_an_element_in_table_scope(parser, GUMBO_TAG_TD)) {
|
1770
1778
|
assert(!has_an_element_in_table_scope(parser, GUMBO_TAG_TH));
|
@@ -1773,7 +1781,7 @@ static bool close_current_cell(GumboParser* parser, const GumboToken* token) {
|
|
1773
1781
|
assert(has_an_element_in_table_scope(parser, GUMBO_TAG_TH));
|
1774
1782
|
cell_tag = GUMBO_TAG_TH;
|
1775
1783
|
}
|
1776
|
-
|
1784
|
+
close_table_cell(parser, token, cell_tag);
|
1777
1785
|
}
|
1778
1786
|
|
1779
1787
|
// This factors out the "act as if an end tag of tag name 'select' had been
|
@@ -1830,14 +1838,14 @@ static bool is_special_node(const GumboNode* node) {
|
|
1830
1838
|
// specified qualified name. If the elements closed are in the set handled by
|
1831
1839
|
// generate_implied_end_tags, this is normal operation and this function returns
|
1832
1840
|
// true. Otherwise, a parse error is recorded and this function returns false.
|
1833
|
-
static
|
1841
|
+
static void implicitly_close_tags (
|
1834
1842
|
GumboParser* parser,
|
1835
1843
|
GumboToken* token,
|
1836
1844
|
GumboNamespaceEnum target_ns,
|
1837
1845
|
GumboTag target
|
1838
1846
|
) {
|
1839
|
-
|
1840
|
-
generate_implied_end_tags(parser, target);
|
1847
|
+
assert(target != GUMBO_TAG_UNKNOWN);
|
1848
|
+
generate_implied_end_tags(parser, target, NULL);
|
1841
1849
|
if (!node_qualified_tag_is(get_current_node(parser), target_ns, target)) {
|
1842
1850
|
parser_add_parse_error(parser, token);
|
1843
1851
|
while (
|
@@ -1845,35 +1853,32 @@ static bool implicitly_close_tags (
|
|
1845
1853
|
) {
|
1846
1854
|
pop_current_node(parser);
|
1847
1855
|
}
|
1848
|
-
result = false;
|
1849
1856
|
}
|
1850
1857
|
assert(node_qualified_tag_is(get_current_node(parser), target_ns, target));
|
1851
1858
|
pop_current_node(parser);
|
1852
|
-
return result;
|
1853
1859
|
}
|
1854
1860
|
|
1855
1861
|
// If the stack of open elements has a <p> tag in button scope, this acts as if
|
1856
1862
|
// a </p> tag was encountered, implicitly closing tags. Returns false if a
|
1857
1863
|
// parse error occurs. This is a convenience function because this particular
|
1858
1864
|
// clause appears several times in the spec.
|
1859
|
-
static
|
1865
|
+
static void maybe_implicitly_close_p_tag (
|
1860
1866
|
GumboParser* parser,
|
1861
1867
|
GumboToken* token
|
1862
1868
|
) {
|
1863
1869
|
if (has_an_element_in_button_scope(parser, GUMBO_TAG_P)) {
|
1864
|
-
|
1870
|
+
implicitly_close_tags (
|
1865
1871
|
parser,
|
1866
1872
|
token,
|
1867
1873
|
GUMBO_NAMESPACE_HTML,
|
1868
1874
|
GUMBO_TAG_P
|
1869
1875
|
);
|
1870
1876
|
}
|
1871
|
-
return true;
|
1872
1877
|
}
|
1873
1878
|
|
1874
1879
|
// Convenience function to encapsulate the logic for closing <li> or <dd>/<dt>
|
1875
1880
|
// tags. Pass true to is_li for handling <li> tags, false for <dd> and <dt>.
|
1876
|
-
static
|
1881
|
+
static void maybe_implicitly_close_list_tag (
|
1877
1882
|
GumboParser* parser,
|
1878
1883
|
GumboToken* token,
|
1879
1884
|
bool is_li
|
@@ -1887,21 +1892,22 @@ static bool maybe_implicitly_close_list_tag (
|
|
1887
1892
|
: node_tag_in_set(node, &dd_dt_tags)
|
1888
1893
|
;
|
1889
1894
|
if (is_list_tag) {
|
1890
|
-
|
1895
|
+
implicitly_close_tags (
|
1891
1896
|
parser,
|
1892
1897
|
token,
|
1893
1898
|
node->v.element.tag_namespace,
|
1894
1899
|
node->v.element.tag
|
1895
1900
|
);
|
1901
|
+
return;
|
1896
1902
|
}
|
1903
|
+
|
1897
1904
|
if (
|
1898
1905
|
is_special_node(node)
|
1899
1906
|
&& !node_tag_in_set(node, &(const TagSet){TAG(ADDRESS), TAG(DIV), TAG(P)})
|
1900
1907
|
) {
|
1901
|
-
return
|
1908
|
+
return;
|
1902
1909
|
}
|
1903
1910
|
}
|
1904
|
-
return true;
|
1905
1911
|
}
|
1906
1912
|
|
1907
1913
|
static void merge_attributes (
|
@@ -2020,7 +2026,7 @@ static void adjust_mathml_attributes(GumboToken* token) {
|
|
2020
2026
|
attr->name = gumbo_strdup("definitionURL");
|
2021
2027
|
}
|
2022
2028
|
|
2023
|
-
static
|
2029
|
+
static void maybe_add_doctype_error (
|
2024
2030
|
GumboParser* parser,
|
2025
2031
|
const GumboToken* token
|
2026
2032
|
) {
|
@@ -2032,9 +2038,7 @@ static bool maybe_add_doctype_error (
|
|
2032
2038
|
&& strcmp(doctype->system_identifier, "about:legacy-compat"))
|
2033
2039
|
) {
|
2034
2040
|
parser_add_parse_error(parser, token);
|
2035
|
-
return false;
|
2036
2041
|
}
|
2037
|
-
return true;
|
2038
2042
|
}
|
2039
2043
|
|
2040
2044
|
static void remove_from_parent(GumboNode* node) {
|
@@ -2059,30 +2063,103 @@ static void remove_from_parent(GumboNode* node) {
|
|
2059
2063
|
}
|
2060
2064
|
}
|
2061
2065
|
|
2066
|
+
// This is here to clean up memory when the spec says "Ignore current token."
|
2067
|
+
static void ignore_token(GumboParser* parser) {
|
2068
|
+
GumboToken* token = parser->_parser_state->_current_token;
|
2069
|
+
// Ownership of the token's internal buffers are normally transferred to the
|
2070
|
+
// element, but if no element is emitted (as happens in non-verbatim-mode
|
2071
|
+
// when a token is ignored), we need to free it here to prevent a memory
|
2072
|
+
// leak.
|
2073
|
+
gumbo_token_destroy(token);
|
2074
|
+
#ifndef NDEBUG
|
2075
|
+
if (token->type == GUMBO_TOKEN_START_TAG) {
|
2076
|
+
// Mark this sentinel so the assertion in the main loop knows it's been
|
2077
|
+
// destroyed.
|
2078
|
+
token->v.start_tag.attributes = kGumboEmptyVector;
|
2079
|
+
token->v.start_tag.name = NULL;
|
2080
|
+
}
|
2081
|
+
#endif
|
2082
|
+
}
|
2083
|
+
|
2084
|
+
// The token is usually an end tag; however, the adoption agency algorithm may
|
2085
|
+
// invoke this for an 'a' or 'nobr' start tag.
|
2086
|
+
// Returns false if there was an error.
|
2087
|
+
static void in_body_any_other_end_tag(GumboParser* parser, GumboToken* token)
|
2088
|
+
{
|
2089
|
+
GumboParserState* state = parser->_parser_state;
|
2090
|
+
GumboTag tag;
|
2091
|
+
const char* tagname;
|
2092
|
+
|
2093
|
+
if (token->type == GUMBO_TOKEN_END_TAG) {
|
2094
|
+
tag = token->v.end_tag.tag;
|
2095
|
+
tagname = token->v.end_tag.name;
|
2096
|
+
} else {
|
2097
|
+
assert(token->type == GUMBO_TOKEN_START_TAG);
|
2098
|
+
tag = token->v.start_tag.tag;
|
2099
|
+
tagname = token->v.start_tag.name;
|
2100
|
+
}
|
2101
|
+
|
2102
|
+
assert(state->_open_elements.length > 0);
|
2103
|
+
assert(node_html_tag_is(state->_open_elements.data[0], GUMBO_TAG_HTML));
|
2104
|
+
// Walk up the stack of open elements until we find one that either:
|
2105
|
+
// a) Matches the tag name we saw
|
2106
|
+
// b) Is in the "special" category.
|
2107
|
+
// If we see a), implicitly close everything up to and including it. If we
|
2108
|
+
// see b), then record a parse error, don't close anything (except the
|
2109
|
+
// implied end tags) and ignore the end tag token.
|
2110
|
+
for (int i = state->_open_elements.length; --i >= 0;) {
|
2111
|
+
const GumboNode* node = state->_open_elements.data[i];
|
2112
|
+
if (node_qualified_tagname_is(node, GUMBO_NAMESPACE_HTML, tag, tagname)) {
|
2113
|
+
generate_implied_end_tags(parser, tag, tagname);
|
2114
|
+
// <!DOCTYPE><body><sarcasm><foo></sarcasm> is an example of an error.
|
2115
|
+
// foo is the "current node" but sarcasm is node.
|
2116
|
+
// XXX: Write a test for this.
|
2117
|
+
if (node != get_current_node(parser)) {
|
2118
|
+
parser_add_parse_error(parser, token);
|
2119
|
+
}
|
2120
|
+
while (node != pop_current_node(parser))
|
2121
|
+
; // Pop everything.
|
2122
|
+
return;
|
2123
|
+
} else if (is_special_node(node)) {
|
2124
|
+
parser_add_parse_error(parser, token);
|
2125
|
+
ignore_token(parser);
|
2126
|
+
return;
|
2127
|
+
}
|
2128
|
+
}
|
2129
|
+
// <html> is in the special category, so we should never get here.
|
2130
|
+
assert(0 && "unreachable");
|
2131
|
+
}
|
2132
|
+
|
2062
2133
|
// https://html.spec.whatwg.org/multipage/parsing.html#an-introduction-to-error-handling-and-strange-cases-in-the-parser
|
2063
2134
|
// Also described in the "in body" handling for end formatting tags.
|
2064
|
-
// Returns
|
2065
|
-
|
2066
|
-
|
2067
|
-
GumboParser* parser,
|
2068
|
-
GumboToken* token,
|
2069
|
-
GumboTag subject
|
2070
|
-
) {
|
2135
|
+
// Returns false if there was an error.
|
2136
|
+
static void adoption_agency_algorithm(GumboParser* parser, GumboToken* token)
|
2137
|
+
{
|
2071
2138
|
GumboParserState* state = parser->_parser_state;
|
2072
2139
|
gumbo_debug("Entering adoption agency algorithm.\n");
|
2140
|
+
// Step 1.
|
2141
|
+
GumboTag subject;
|
2142
|
+
if (token->type == GUMBO_TOKEN_START_TAG) {
|
2143
|
+
subject = token->v.start_tag.tag;
|
2144
|
+
} else {
|
2145
|
+
assert(token->type == GUMBO_TOKEN_END_TAG);
|
2146
|
+
subject = token->v.end_tag.tag;
|
2147
|
+
}
|
2148
|
+
assert(subject != GUMBO_TAG_UNKNOWN);
|
2149
|
+
|
2073
2150
|
// Step 2.
|
2074
2151
|
GumboNode* current_node = get_current_node(parser);
|
2075
2152
|
if (
|
2076
|
-
current_node
|
2077
|
-
&& current_node->v.element.tag == subject
|
2153
|
+
node_html_tag_is(current_node, subject)
|
2078
2154
|
&& -1 == gumbo_vector_index_of (
|
2079
2155
|
&state->_active_formatting_elements,
|
2080
2156
|
current_node
|
2081
2157
|
)
|
2082
2158
|
) {
|
2083
2159
|
pop_current_node(parser);
|
2084
|
-
return
|
2160
|
+
return;
|
2085
2161
|
}
|
2162
|
+
|
2086
2163
|
// Steps 3-5 & 21:
|
2087
2164
|
for (unsigned int i = 0; i < 8; ++i) {
|
2088
2165
|
// Step 6.
|
@@ -2093,8 +2170,8 @@ static bool adoption_agency_algorithm (
|
|
2093
2170
|
if (current_node == &kActiveFormattingScopeMarker) {
|
2094
2171
|
gumbo_debug("Broke on scope marker; aborting.\n");
|
2095
2172
|
// Last scope marker; abort the algorithm and handle according to "any
|
2096
|
-
// other end tag.
|
2097
|
-
|
2173
|
+
// other end tag" (below).
|
2174
|
+
break;
|
2098
2175
|
}
|
2099
2176
|
if (node_html_tag_is(current_node, subject)) {
|
2100
2177
|
// Found it.
|
@@ -2116,7 +2193,8 @@ static bool adoption_agency_algorithm (
|
|
2116
2193
|
// "any other end tag" clause (which may potentially add a parse error,
|
2117
2194
|
// but not always).
|
2118
2195
|
gumbo_debug("No active formatting elements; aborting.\n");
|
2119
|
-
|
2196
|
+
in_body_any_other_end_tag(parser, token);
|
2197
|
+
return;
|
2120
2198
|
}
|
2121
2199
|
|
2122
2200
|
// Step 7
|
@@ -2127,20 +2205,19 @@ static bool adoption_agency_algorithm (
|
|
2127
2205
|
formatting_node,
|
2128
2206
|
&state->_active_formatting_elements
|
2129
2207
|
);
|
2130
|
-
return
|
2208
|
+
return;
|
2131
2209
|
}
|
2132
2210
|
|
2133
2211
|
// Step 8
|
2134
2212
|
if (!has_an_element_in_scope(parser, formatting_node->v.element.tag)) {
|
2135
2213
|
parser_add_parse_error(parser, token);
|
2136
2214
|
gumbo_debug("Element not in scope.\n");
|
2137
|
-
return
|
2215
|
+
return;
|
2138
2216
|
}
|
2139
2217
|
|
2140
2218
|
// Step 9
|
2141
|
-
if (formatting_node != get_current_node(parser))
|
2219
|
+
if (formatting_node != get_current_node(parser))
|
2142
2220
|
parser_add_parse_error(parser, token); // But continue onwards.
|
2143
|
-
}
|
2144
2221
|
assert(formatting_node);
|
2145
2222
|
assert(!node_html_tag_is(formatting_node, GUMBO_TAG_HTML));
|
2146
2223
|
assert(!node_html_tag_is(formatting_node, GUMBO_TAG_BODY));
|
@@ -2167,7 +2244,7 @@ static bool adoption_agency_algorithm (
|
|
2167
2244
|
formatting_node,
|
2168
2245
|
&state->_active_formatting_elements
|
2169
2246
|
);
|
2170
|
-
return
|
2247
|
+
return;
|
2171
2248
|
}
|
2172
2249
|
assert(!node_html_tag_is(furthest_block, GUMBO_TAG_HTML));
|
2173
2250
|
|
@@ -2348,25 +2425,6 @@ static bool adoption_agency_algorithm (
|
|
2348
2425
|
&state->_open_elements
|
2349
2426
|
);
|
2350
2427
|
} // Step 21.
|
2351
|
-
return true;
|
2352
|
-
}
|
2353
|
-
|
2354
|
-
// This is here to clean up memory when the spec says "Ignore current token."
|
2355
|
-
static void ignore_token(GumboParser* parser) {
|
2356
|
-
GumboToken* token = parser->_parser_state->_current_token;
|
2357
|
-
// Ownership of the token's internal buffers are normally transferred to the
|
2358
|
-
// element, but if no element is emitted (as happens in non-verbatim-mode
|
2359
|
-
// when a token is ignored), we need to free it here to prevent a memory
|
2360
|
-
// leak.
|
2361
|
-
gumbo_token_destroy(token);
|
2362
|
-
#ifndef NDEBUG
|
2363
|
-
if (token->type == GUMBO_TOKEN_START_TAG) {
|
2364
|
-
// Mark this sentinel so the assertion in the main loop knows it's been
|
2365
|
-
// destroyed.
|
2366
|
-
token->v.start_tag.attributes = kGumboEmptyVector;
|
2367
|
-
token->v.start_tag.name = NULL;
|
2368
|
-
}
|
2369
|
-
#endif
|
2370
2428
|
}
|
2371
2429
|
|
2372
2430
|
// https://html.spec.whatwg.org/multipage/parsing.html#the-end
|
@@ -2391,15 +2449,15 @@ static void finish_parsing(GumboParser* parser) {
|
|
2391
2449
|
; // Pop them all.
|
2392
2450
|
}
|
2393
2451
|
|
2394
|
-
static
|
2452
|
+
static void handle_initial(GumboParser* parser, GumboToken* token) {
|
2395
2453
|
GumboDocument* document = &get_document_node(parser)->v.document;
|
2396
2454
|
if (token->type == GUMBO_TOKEN_WHITESPACE) {
|
2397
2455
|
ignore_token(parser);
|
2398
|
-
return
|
2456
|
+
return;
|
2399
2457
|
}
|
2400
2458
|
if (token->type == GUMBO_TOKEN_COMMENT) {
|
2401
2459
|
append_comment_node(parser, get_document_node(parser), token);
|
2402
|
-
return
|
2460
|
+
return;
|
2403
2461
|
}
|
2404
2462
|
if (token->type == GUMBO_TOKEN_DOCTYPE) {
|
2405
2463
|
document->has_doctype = true;
|
@@ -2408,35 +2466,35 @@ static bool handle_initial(GumboParser* parser, GumboToken* token) {
|
|
2408
2466
|
document->system_identifier = token->v.doc_type.system_identifier;
|
2409
2467
|
document->doc_type_quirks_mode = compute_quirks_mode(&token->v.doc_type);
|
2410
2468
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_BEFORE_HTML);
|
2411
|
-
|
2469
|
+
maybe_add_doctype_error(parser, token);
|
2470
|
+
return;
|
2412
2471
|
}
|
2413
2472
|
parser_add_parse_error(parser, token);
|
2414
2473
|
document->doc_type_quirks_mode = GUMBO_DOCTYPE_QUIRKS;
|
2415
2474
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_BEFORE_HTML);
|
2416
2475
|
parser->_parser_state->_reprocess_current_token = true;
|
2417
|
-
return true;
|
2418
2476
|
}
|
2419
2477
|
|
2420
2478
|
// https://html.spec.whatwg.org/multipage/parsing.html#the-before-html-insertion-mode
|
2421
|
-
static
|
2479
|
+
static void handle_before_html(GumboParser* parser, GumboToken* token) {
|
2422
2480
|
if (token->type == GUMBO_TOKEN_DOCTYPE) {
|
2423
2481
|
parser_add_parse_error(parser, token);
|
2424
2482
|
ignore_token(parser);
|
2425
|
-
return
|
2483
|
+
return;
|
2426
2484
|
}
|
2427
2485
|
if (token->type == GUMBO_TOKEN_COMMENT) {
|
2428
2486
|
append_comment_node(parser, get_document_node(parser), token);
|
2429
|
-
return
|
2487
|
+
return;
|
2430
2488
|
}
|
2431
2489
|
if (token->type == GUMBO_TOKEN_WHITESPACE) {
|
2432
2490
|
ignore_token(parser);
|
2433
|
-
return
|
2491
|
+
return;
|
2434
2492
|
}
|
2435
2493
|
if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
|
2436
2494
|
GumboNode* html_node = insert_element_from_token(parser, token);
|
2437
2495
|
parser->_output->root = html_node;
|
2438
2496
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_BEFORE_HEAD);
|
2439
|
-
return
|
2497
|
+
return;
|
2440
2498
|
}
|
2441
2499
|
if (
|
2442
2500
|
token->type == GUMBO_TOKEN_END_TAG
|
@@ -2444,7 +2502,7 @@ static bool handle_before_html(GumboParser* parser, GumboToken* token) {
|
|
2444
2502
|
) {
|
2445
2503
|
parser_add_parse_error(parser, token);
|
2446
2504
|
ignore_token(parser);
|
2447
|
-
return
|
2505
|
+
return;
|
2448
2506
|
}
|
2449
2507
|
GumboNode* html_node = insert_element_of_tag_type (
|
2450
2508
|
parser,
|
@@ -2455,37 +2513,37 @@ static bool handle_before_html(GumboParser* parser, GumboToken* token) {
|
|
2455
2513
|
parser->_output->root = html_node;
|
2456
2514
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_BEFORE_HEAD);
|
2457
2515
|
parser->_parser_state->_reprocess_current_token = true;
|
2458
|
-
return true;
|
2459
2516
|
}
|
2460
2517
|
|
2461
2518
|
// Forward declarations because of mutual dependencies.
|
2462
|
-
static
|
2463
|
-
static
|
2464
|
-
static
|
2519
|
+
static void handle_token(GumboParser* parser, GumboToken* token);
|
2520
|
+
static void handle_in_body(GumboParser* parser, GumboToken* token);
|
2521
|
+
static void handle_in_template(GumboParser* parser, GumboToken* token);
|
2465
2522
|
|
2466
2523
|
// https://html.spec.whatwg.org/multipage/parsing.html#the-before-head-insertion-mode
|
2467
|
-
static
|
2524
|
+
static void handle_before_head(GumboParser* parser, GumboToken* token) {
|
2468
2525
|
if (token->type == GUMBO_TOKEN_WHITESPACE) {
|
2469
2526
|
ignore_token(parser);
|
2470
|
-
return
|
2527
|
+
return;
|
2471
2528
|
}
|
2472
2529
|
if (token->type == GUMBO_TOKEN_COMMENT) {
|
2473
2530
|
append_comment_node(parser, get_current_node(parser), token);
|
2474
|
-
return
|
2531
|
+
return;
|
2475
2532
|
}
|
2476
2533
|
if (token->type == GUMBO_TOKEN_DOCTYPE) {
|
2477
2534
|
parser_add_parse_error(parser, token);
|
2478
2535
|
ignore_token(parser);
|
2479
|
-
return
|
2536
|
+
return;
|
2480
2537
|
}
|
2481
2538
|
if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
|
2482
|
-
|
2539
|
+
handle_in_body(parser, token);
|
2540
|
+
return;
|
2483
2541
|
}
|
2484
2542
|
if (tag_is(token, kStartTag, GUMBO_TAG_HEAD)) {
|
2485
2543
|
GumboNode* node = insert_element_from_token(parser, token);
|
2486
2544
|
parser->_parser_state->_head_element = node;
|
2487
2545
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_HEAD);
|
2488
|
-
return
|
2546
|
+
return;
|
2489
2547
|
}
|
2490
2548
|
if (
|
2491
2549
|
token->type == GUMBO_TOKEN_END_TAG
|
@@ -2493,7 +2551,7 @@ static bool handle_before_head(GumboParser* parser, GumboToken* token) {
|
|
2493
2551
|
) {
|
2494
2552
|
parser_add_parse_error(parser, token);
|
2495
2553
|
ignore_token(parser);
|
2496
|
-
return
|
2554
|
+
return;
|
2497
2555
|
}
|
2498
2556
|
GumboNode* node = insert_element_of_tag_type (
|
2499
2557
|
parser,
|
@@ -2503,23 +2561,22 @@ static bool handle_before_head(GumboParser* parser, GumboToken* token) {
|
|
2503
2561
|
parser->_parser_state->_head_element = node;
|
2504
2562
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_HEAD);
|
2505
2563
|
parser->_parser_state->_reprocess_current_token = true;
|
2506
|
-
return true;
|
2507
2564
|
}
|
2508
2565
|
|
2509
2566
|
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inhead
|
2510
|
-
static
|
2567
|
+
static void handle_in_head(GumboParser* parser, GumboToken* token) {
|
2511
2568
|
if (token->type == GUMBO_TOKEN_WHITESPACE) {
|
2512
2569
|
insert_text_token(parser, token);
|
2513
|
-
return
|
2570
|
+
return;
|
2514
2571
|
}
|
2515
2572
|
if (token->type == GUMBO_TOKEN_COMMENT) {
|
2516
2573
|
append_comment_node(parser, get_current_node(parser), token);
|
2517
|
-
return
|
2574
|
+
return;
|
2518
2575
|
}
|
2519
2576
|
if (token->type == GUMBO_TOKEN_DOCTYPE) {
|
2520
2577
|
parser_add_parse_error(parser, token);
|
2521
2578
|
ignore_token(parser);
|
2522
|
-
return
|
2579
|
+
return;
|
2523
2580
|
}
|
2524
2581
|
if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
|
2525
2582
|
return handle_in_body(parser, token);
|
@@ -2532,7 +2589,7 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
|
|
2532
2589
|
insert_element_from_token(parser, token);
|
2533
2590
|
pop_current_node(parser);
|
2534
2591
|
acknowledge_self_closing_tag(parser);
|
2535
|
-
return
|
2592
|
+
return;
|
2536
2593
|
}
|
2537
2594
|
if (tag_is(token, kStartTag, GUMBO_TAG_META)) {
|
2538
2595
|
insert_element_from_token(parser, token);
|
@@ -2542,33 +2599,33 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
|
|
2542
2599
|
// spec doesn't apply. If clients want to handle meta-tag re-encoding, they
|
2543
2600
|
// should specifically look for that string in the document and re-encode it
|
2544
2601
|
// before passing to Gumbo.
|
2545
|
-
return
|
2602
|
+
return;
|
2546
2603
|
}
|
2547
2604
|
if (tag_is(token, kStartTag, GUMBO_TAG_TITLE)) {
|
2548
2605
|
run_generic_parsing_algorithm(parser, token, GUMBO_LEX_RCDATA);
|
2549
|
-
return
|
2606
|
+
return;
|
2550
2607
|
}
|
2551
2608
|
if (
|
2552
2609
|
tag_in(token, kStartTag, &(const TagSet){TAG(NOFRAMES), TAG(STYLE)})
|
2553
2610
|
) {
|
2554
2611
|
run_generic_parsing_algorithm(parser, token, GUMBO_LEX_RAWTEXT);
|
2555
|
-
return
|
2612
|
+
return;
|
2556
2613
|
}
|
2557
2614
|
if (tag_is(token, kStartTag, GUMBO_TAG_NOSCRIPT)) {
|
2558
2615
|
insert_element_from_token(parser, token);
|
2559
2616
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_HEAD_NOSCRIPT);
|
2560
|
-
return
|
2617
|
+
return;
|
2561
2618
|
}
|
2562
2619
|
if (tag_is(token, kStartTag, GUMBO_TAG_SCRIPT)) {
|
2563
2620
|
run_generic_parsing_algorithm(parser, token, GUMBO_LEX_SCRIPT_DATA);
|
2564
|
-
return
|
2621
|
+
return;
|
2565
2622
|
}
|
2566
2623
|
if (tag_is(token, kEndTag, GUMBO_TAG_HEAD)) {
|
2567
2624
|
GumboNode* head = pop_current_node(parser);
|
2568
2625
|
UNUSED_IF_NDEBUG(head);
|
2569
2626
|
assert(node_html_tag_is(head, GUMBO_TAG_HEAD));
|
2570
2627
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_HEAD);
|
2571
|
-
return
|
2628
|
+
return;
|
2572
2629
|
}
|
2573
2630
|
if (
|
2574
2631
|
tag_in(token, kEndTag, &(const TagSet){TAG(BODY), TAG(HTML), TAG(BR)})
|
@@ -2576,7 +2633,7 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
|
|
2576
2633
|
pop_current_node(parser);
|
2577
2634
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_HEAD);
|
2578
2635
|
parser->_parser_state->_reprocess_current_token = true;
|
2579
|
-
return
|
2636
|
+
return;
|
2580
2637
|
}
|
2581
2638
|
if (tag_is(token, kStartTag, GUMBO_TAG_TEMPLATE)) {
|
2582
2639
|
insert_element_from_token(parser, token);
|
@@ -2584,26 +2641,23 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
|
|
2584
2641
|
set_frameset_not_ok(parser);
|
2585
2642
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TEMPLATE);
|
2586
2643
|
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TEMPLATE);
|
2587
|
-
return
|
2644
|
+
return;
|
2588
2645
|
}
|
2589
2646
|
if (tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
|
2590
2647
|
if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2591
2648
|
parser_add_parse_error(parser, token);
|
2592
2649
|
ignore_token(parser);
|
2593
|
-
return
|
2650
|
+
return;
|
2594
2651
|
}
|
2595
2652
|
generate_all_implied_end_tags_thoroughly(parser);
|
2596
|
-
|
2597
|
-
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_TEMPLATE)) {
|
2653
|
+
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_TEMPLATE))
|
2598
2654
|
parser_add_parse_error(parser, token);
|
2599
|
-
success = false;
|
2600
|
-
}
|
2601
2655
|
while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_TEMPLATE))
|
2602
2656
|
;
|
2603
2657
|
clear_active_formatting_elements(parser);
|
2604
2658
|
pop_template_insertion_mode(parser);
|
2605
2659
|
reset_insertion_mode_appropriately(parser);
|
2606
|
-
return
|
2660
|
+
return;
|
2607
2661
|
}
|
2608
2662
|
if (
|
2609
2663
|
tag_is(token, kStartTag, GUMBO_TAG_HEAD)
|
@@ -2611,29 +2665,30 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
|
|
2611
2665
|
) {
|
2612
2666
|
parser_add_parse_error(parser, token);
|
2613
2667
|
ignore_token(parser);
|
2614
|
-
return
|
2668
|
+
return;
|
2615
2669
|
}
|
2616
2670
|
pop_current_node(parser);
|
2617
2671
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_HEAD);
|
2618
2672
|
parser->_parser_state->_reprocess_current_token = true;
|
2619
|
-
return
|
2673
|
+
return;
|
2620
2674
|
}
|
2621
2675
|
|
2622
2676
|
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inheadnoscript
|
2623
|
-
static
|
2677
|
+
static void handle_in_head_noscript(GumboParser* parser, GumboToken* token) {
|
2624
2678
|
if (token->type == GUMBO_TOKEN_DOCTYPE) {
|
2625
2679
|
parser_add_parse_error(parser, token);
|
2626
|
-
return
|
2680
|
+
return;
|
2627
2681
|
}
|
2628
2682
|
if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
|
2629
|
-
|
2683
|
+
handle_in_body(parser, token);
|
2684
|
+
return;
|
2630
2685
|
}
|
2631
2686
|
if (tag_is(token, kEndTag, GUMBO_TAG_NOSCRIPT)) {
|
2632
2687
|
const GumboNode* node = pop_current_node(parser);
|
2633
2688
|
assert(node_html_tag_is(node, GUMBO_TAG_NOSCRIPT));
|
2634
2689
|
UNUSED_IF_NDEBUG(node);
|
2635
2690
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_HEAD);
|
2636
|
-
return
|
2691
|
+
return;
|
2637
2692
|
}
|
2638
2693
|
if (
|
2639
2694
|
token->type == GUMBO_TOKEN_WHITESPACE
|
@@ -2643,7 +2698,8 @@ static bool handle_in_head_noscript(GumboParser* parser, GumboToken* token) {
|
|
2643
2698
|
TAG(META), TAG(NOFRAMES), TAG(STYLE)
|
2644
2699
|
})
|
2645
2700
|
) {
|
2646
|
-
|
2701
|
+
handle_in_head(parser, token);
|
2702
|
+
return;
|
2647
2703
|
}
|
2648
2704
|
if (
|
2649
2705
|
tag_in(token, kStartTag, &(const TagSet){TAG(HEAD), TAG(NOSCRIPT)})
|
@@ -2654,7 +2710,7 @@ static bool handle_in_head_noscript(GumboParser* parser, GumboToken* token) {
|
|
2654
2710
|
) {
|
2655
2711
|
parser_add_parse_error(parser, token);
|
2656
2712
|
ignore_token(parser);
|
2657
|
-
return
|
2713
|
+
return;
|
2658
2714
|
}
|
2659
2715
|
parser_add_parse_error(parser, token);
|
2660
2716
|
const GumboNode* node = pop_current_node(parser);
|
@@ -2662,38 +2718,38 @@ static bool handle_in_head_noscript(GumboParser* parser, GumboToken* token) {
|
|
2662
2718
|
UNUSED_IF_NDEBUG(node);
|
2663
2719
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_HEAD);
|
2664
2720
|
parser->_parser_state->_reprocess_current_token = true;
|
2665
|
-
return false;
|
2666
2721
|
}
|
2667
2722
|
|
2668
2723
|
// https://html.spec.whatwg.org/multipage/parsing.html#the-after-head-insertion-mode
|
2669
|
-
static
|
2724
|
+
static void handle_after_head(GumboParser* parser, GumboToken* token) {
|
2670
2725
|
GumboParserState* state = parser->_parser_state;
|
2671
2726
|
if (token->type == GUMBO_TOKEN_WHITESPACE) {
|
2672
2727
|
insert_text_token(parser, token);
|
2673
|
-
return
|
2728
|
+
return;
|
2674
2729
|
}
|
2675
2730
|
if (token->type == GUMBO_TOKEN_COMMENT) {
|
2676
2731
|
append_comment_node(parser, get_current_node(parser), token);
|
2677
|
-
return
|
2732
|
+
return;
|
2678
2733
|
}
|
2679
2734
|
if (token->type == GUMBO_TOKEN_DOCTYPE) {
|
2680
2735
|
parser_add_parse_error(parser, token);
|
2681
2736
|
ignore_token(parser);
|
2682
|
-
return
|
2737
|
+
return;
|
2683
2738
|
}
|
2684
2739
|
if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
|
2685
|
-
|
2740
|
+
handle_in_body(parser, token);
|
2741
|
+
return;
|
2686
2742
|
}
|
2687
2743
|
if (tag_is(token, kStartTag, GUMBO_TAG_BODY)) {
|
2688
2744
|
insert_element_from_token(parser, token);
|
2689
2745
|
set_frameset_not_ok(parser);
|
2690
2746
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_BODY);
|
2691
|
-
return
|
2747
|
+
return;
|
2692
2748
|
}
|
2693
2749
|
if (tag_is(token, kStartTag, GUMBO_TAG_FRAMESET)) {
|
2694
2750
|
insert_element_from_token(parser, token);
|
2695
2751
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_FRAMESET);
|
2696
|
-
return
|
2752
|
+
return;
|
2697
2753
|
}
|
2698
2754
|
if (
|
2699
2755
|
tag_in(token, kStartTag, &(const TagSet) {
|
@@ -2709,10 +2765,11 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
|
|
2709
2765
|
gumbo_vector_add(state->_head_element, &state->_open_elements);
|
2710
2766
|
handle_in_head(parser, token);
|
2711
2767
|
gumbo_vector_remove(state->_head_element, &state->_open_elements);
|
2712
|
-
return
|
2768
|
+
return;
|
2713
2769
|
}
|
2714
2770
|
if (tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
|
2715
|
-
|
2771
|
+
handle_in_head(parser, token);
|
2772
|
+
return;
|
2716
2773
|
}
|
2717
2774
|
if (
|
2718
2775
|
tag_is(token, kStartTag, GUMBO_TAG_HEAD)
|
@@ -2723,27 +2780,26 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
|
|
2723
2780
|
) {
|
2724
2781
|
parser_add_parse_error(parser, token);
|
2725
2782
|
ignore_token(parser);
|
2726
|
-
return
|
2783
|
+
return;
|
2727
2784
|
}
|
2728
2785
|
insert_element_of_tag_type(parser, GUMBO_TAG_BODY, GUMBO_INSERTION_IMPLIED);
|
2729
2786
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_BODY);
|
2730
2787
|
state->_reprocess_current_token = true;
|
2731
|
-
return true;
|
2732
2788
|
}
|
2733
2789
|
|
2734
2790
|
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inbody
|
2735
|
-
static
|
2791
|
+
static void handle_in_body(GumboParser* parser, GumboToken* token) {
|
2736
2792
|
GumboParserState* state = parser->_parser_state;
|
2737
2793
|
assert(state->_open_elements.length > 0);
|
2738
2794
|
if (token->type == GUMBO_TOKEN_NULL) {
|
2739
2795
|
parser_add_parse_error(parser, token);
|
2740
2796
|
ignore_token(parser);
|
2741
|
-
return
|
2797
|
+
return;
|
2742
2798
|
}
|
2743
2799
|
if (token->type == GUMBO_TOKEN_WHITESPACE) {
|
2744
2800
|
reconstruct_active_formatting_elements(parser);
|
2745
2801
|
insert_text_token(parser, token);
|
2746
|
-
return
|
2802
|
+
return;
|
2747
2803
|
}
|
2748
2804
|
if (
|
2749
2805
|
token->type == GUMBO_TOKEN_CHARACTER
|
@@ -2752,27 +2808,27 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2752
2808
|
reconstruct_active_formatting_elements(parser);
|
2753
2809
|
insert_text_token(parser, token);
|
2754
2810
|
set_frameset_not_ok(parser);
|
2755
|
-
return
|
2811
|
+
return;
|
2756
2812
|
}
|
2757
2813
|
if (token->type == GUMBO_TOKEN_COMMENT) {
|
2758
2814
|
append_comment_node(parser, get_current_node(parser), token);
|
2759
|
-
return
|
2815
|
+
return;
|
2760
2816
|
}
|
2761
2817
|
if (token->type == GUMBO_TOKEN_DOCTYPE) {
|
2762
2818
|
parser_add_parse_error(parser, token);
|
2763
2819
|
ignore_token(parser);
|
2764
|
-
return
|
2820
|
+
return;
|
2765
2821
|
}
|
2766
2822
|
if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
|
2767
2823
|
parser_add_parse_error(parser, token);
|
2768
2824
|
if (has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2769
2825
|
ignore_token(parser);
|
2770
|
-
return
|
2826
|
+
return;
|
2771
2827
|
}
|
2772
2828
|
assert(parser->_output->root != NULL);
|
2773
2829
|
assert(parser->_output->root->type == GUMBO_NODE_ELEMENT);
|
2774
2830
|
merge_attributes(token, parser->_output->root);
|
2775
|
-
return
|
2831
|
+
return;
|
2776
2832
|
}
|
2777
2833
|
if (
|
2778
2834
|
tag_in(token, kStartTag, &(const TagSet) {
|
@@ -2782,7 +2838,8 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2782
2838
|
})
|
2783
2839
|
|| tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)
|
2784
2840
|
) {
|
2785
|
-
|
2841
|
+
handle_in_head(parser, token);
|
2842
|
+
return;
|
2786
2843
|
}
|
2787
2844
|
if (tag_is(token, kStartTag, GUMBO_TAG_BODY)) {
|
2788
2845
|
parser_add_parse_error(parser, token);
|
@@ -2796,7 +2853,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2796
2853
|
set_frameset_not_ok(parser);
|
2797
2854
|
merge_attributes(token, state->_open_elements.data[1]);
|
2798
2855
|
}
|
2799
|
-
return
|
2856
|
+
return;
|
2800
2857
|
}
|
2801
2858
|
if (tag_is(token, kStartTag, GUMBO_TAG_FRAMESET)) {
|
2802
2859
|
parser_add_parse_error(parser, token);
|
@@ -2806,7 +2863,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2806
2863
|
|| !state->_frameset_ok
|
2807
2864
|
) {
|
2808
2865
|
ignore_token(parser);
|
2809
|
-
return
|
2866
|
+
return;
|
2810
2867
|
}
|
2811
2868
|
// Save the body node for later removal.
|
2812
2869
|
GumboNode* body_node = state->_open_elements.data[1];
|
@@ -2838,50 +2895,43 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2838
2895
|
// Insert the <frameset>, and switch the insertion mode.
|
2839
2896
|
insert_element_from_token(parser, token);
|
2840
2897
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_FRAMESET);
|
2841
|
-
return
|
2898
|
+
return;
|
2842
2899
|
}
|
2843
2900
|
if (token->type == GUMBO_TOKEN_EOF) {
|
2844
2901
|
if (get_current_template_insertion_mode(parser) !=
|
2845
2902
|
GUMBO_INSERTION_MODE_INITIAL) {
|
2846
|
-
|
2903
|
+
handle_in_template(parser, token);
|
2904
|
+
return;
|
2847
2905
|
}
|
2848
|
-
if (stack_contains_nonclosable_element(parser))
|
2906
|
+
if (stack_contains_nonclosable_element(parser))
|
2849
2907
|
parser_add_parse_error(parser, token);
|
2850
|
-
|
2851
|
-
}
|
2852
|
-
return true;
|
2908
|
+
return;
|
2853
2909
|
}
|
2854
2910
|
if (tag_is(token, kEndTag, GUMBO_TAG_BODY)) {
|
2855
2911
|
if (!has_an_element_in_scope(parser, GUMBO_TAG_BODY)) {
|
2856
2912
|
parser_add_parse_error(parser, token);
|
2857
2913
|
ignore_token(parser);
|
2858
|
-
return
|
2914
|
+
return;
|
2859
2915
|
}
|
2860
|
-
|
2861
|
-
if (stack_contains_nonclosable_element(parser)) {
|
2916
|
+
if (stack_contains_nonclosable_element(parser))
|
2862
2917
|
parser_add_parse_error(parser, token);
|
2863
|
-
success = false;
|
2864
|
-
}
|
2865
2918
|
GumboNode* body = state->_open_elements.data[1];
|
2866
2919
|
assert(node_html_tag_is(body, GUMBO_TAG_BODY));
|
2867
2920
|
record_end_of_element(state->_current_token, &body->v.element);
|
2868
2921
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_BODY);
|
2869
|
-
return
|
2922
|
+
return;
|
2870
2923
|
}
|
2871
2924
|
if (tag_is(token, kEndTag, GUMBO_TAG_HTML)) {
|
2872
2925
|
if (!has_an_element_in_scope(parser, GUMBO_TAG_BODY)) {
|
2873
2926
|
parser_add_parse_error(parser, token);
|
2874
2927
|
ignore_token(parser);
|
2875
|
-
return
|
2928
|
+
return;
|
2876
2929
|
}
|
2877
|
-
|
2878
|
-
if (stack_contains_nonclosable_element(parser)) {
|
2930
|
+
if (stack_contains_nonclosable_element(parser))
|
2879
2931
|
parser_add_parse_error(parser, token);
|
2880
|
-
success = false;
|
2881
|
-
}
|
2882
2932
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_BODY);
|
2883
2933
|
parser->_parser_state->_reprocess_current_token = true;
|
2884
|
-
return
|
2934
|
+
return;
|
2885
2935
|
}
|
2886
2936
|
if (
|
2887
2937
|
tag_in(token, kStartTag, &(const TagSet) {
|
@@ -2892,26 +2942,25 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2892
2942
|
TAG(SUMMARY), TAG(UL)
|
2893
2943
|
})
|
2894
2944
|
) {
|
2895
|
-
|
2945
|
+
maybe_implicitly_close_p_tag(parser, token);
|
2896
2946
|
insert_element_from_token(parser, token);
|
2897
|
-
return
|
2947
|
+
return;
|
2898
2948
|
}
|
2899
2949
|
if (tag_in(token, kStartTag, &heading_tags)) {
|
2900
|
-
|
2950
|
+
maybe_implicitly_close_p_tag(parser, token);
|
2901
2951
|
if (node_tag_in_set(get_current_node(parser), &heading_tags)) {
|
2902
2952
|
parser_add_parse_error(parser, token);
|
2903
2953
|
pop_current_node(parser);
|
2904
|
-
result = false;
|
2905
2954
|
}
|
2906
2955
|
insert_element_from_token(parser, token);
|
2907
|
-
return
|
2956
|
+
return;
|
2908
2957
|
}
|
2909
2958
|
if (tag_in(token, kStartTag, &(const TagSet){TAG(PRE), TAG(LISTING)})) {
|
2910
|
-
|
2959
|
+
maybe_implicitly_close_p_tag(parser, token);
|
2911
2960
|
insert_element_from_token(parser, token);
|
2912
2961
|
state->_ignore_next_linefeed = true;
|
2913
2962
|
set_frameset_not_ok(parser);
|
2914
|
-
return
|
2963
|
+
return;
|
2915
2964
|
}
|
2916
2965
|
if (tag_is(token, kStartTag, GUMBO_TAG_FORM)) {
|
2917
2966
|
if (
|
@@ -2921,48 +2970,46 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2921
2970
|
gumbo_debug("Ignoring nested form.\n");
|
2922
2971
|
parser_add_parse_error(parser, token);
|
2923
2972
|
ignore_token(parser);
|
2924
|
-
return
|
2973
|
+
return;
|
2925
2974
|
}
|
2926
|
-
|
2975
|
+
maybe_implicitly_close_p_tag(parser, token);
|
2927
2976
|
GumboNode* form_element = insert_element_from_token(parser, token);
|
2928
2977
|
if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
2929
2978
|
state->_form_element = form_element;
|
2930
2979
|
}
|
2931
|
-
return
|
2980
|
+
return;
|
2932
2981
|
}
|
2933
2982
|
if (tag_is(token, kStartTag, GUMBO_TAG_LI)) {
|
2934
|
-
|
2935
|
-
|
2983
|
+
maybe_implicitly_close_list_tag(parser, token, true);
|
2984
|
+
maybe_implicitly_close_p_tag(parser, token);
|
2936
2985
|
insert_element_from_token(parser, token);
|
2937
|
-
return
|
2986
|
+
return;
|
2938
2987
|
}
|
2939
2988
|
if (tag_in(token, kStartTag, &dd_dt_tags)) {
|
2940
|
-
|
2941
|
-
|
2989
|
+
maybe_implicitly_close_list_tag(parser, token, false);
|
2990
|
+
maybe_implicitly_close_p_tag(parser, token);
|
2942
2991
|
insert_element_from_token(parser, token);
|
2943
|
-
return
|
2992
|
+
return;
|
2944
2993
|
}
|
2945
2994
|
if (tag_is(token, kStartTag, GUMBO_TAG_PLAINTEXT)) {
|
2946
|
-
|
2995
|
+
maybe_implicitly_close_p_tag(parser, token);
|
2947
2996
|
insert_element_from_token(parser, token);
|
2948
2997
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_PLAINTEXT);
|
2949
|
-
return
|
2998
|
+
return;
|
2950
2999
|
}
|
2951
3000
|
if (tag_is(token, kStartTag, GUMBO_TAG_BUTTON)) {
|
2952
|
-
bool success = true;
|
2953
3001
|
if (has_an_element_in_scope(parser, GUMBO_TAG_BUTTON)) {
|
2954
3002
|
parser_add_parse_error(parser, token);
|
2955
|
-
success = false;
|
2956
3003
|
// We don't want to use implicitly_close_tags here because it may add an
|
2957
3004
|
// error and we've already added the only error the standard specifies.
|
2958
|
-
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
3005
|
+
generate_implied_end_tags(parser, GUMBO_TAG_LAST, NULL);
|
2959
3006
|
while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_BUTTON))
|
2960
3007
|
;
|
2961
3008
|
}
|
2962
3009
|
reconstruct_active_formatting_elements(parser);
|
2963
3010
|
insert_element_from_token(parser, token);
|
2964
3011
|
set_frameset_not_ok(parser);
|
2965
|
-
return
|
3012
|
+
return;
|
2966
3013
|
}
|
2967
3014
|
if (
|
2968
3015
|
tag_in(token, kEndTag, &(const TagSet) {
|
@@ -2977,7 +3024,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2977
3024
|
if (!has_an_element_in_scope(parser, tag)) {
|
2978
3025
|
parser_add_parse_error(parser, token);
|
2979
3026
|
ignore_token(parser);
|
2980
|
-
return
|
3027
|
+
return;
|
2981
3028
|
}
|
2982
3029
|
return implicitly_close_tags (
|
2983
3030
|
parser,
|
@@ -2991,19 +3038,15 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
2991
3038
|
if (!has_an_element_in_scope(parser, GUMBO_TAG_FORM)) {
|
2992
3039
|
parser_add_parse_error(parser, token);
|
2993
3040
|
ignore_token(parser);
|
2994
|
-
return
|
3041
|
+
return;
|
2995
3042
|
}
|
2996
|
-
|
2997
|
-
|
2998
|
-
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_FORM)) {
|
3043
|
+
generate_implied_end_tags(parser, GUMBO_TAG_LAST, NULL);
|
3044
|
+
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_FORM))
|
2999
3045
|
parser_add_parse_error(parser, token);
|
3000
|
-
success = false;
|
3001
|
-
}
|
3002
3046
|
while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_FORM))
|
3003
3047
|
;
|
3004
|
-
return
|
3048
|
+
return;
|
3005
3049
|
} else {
|
3006
|
-
bool result = true;
|
3007
3050
|
GumboNode* node = state->_form_element;
|
3008
3051
|
assert(!node || node->type == GUMBO_NODE_ELEMENT);
|
3009
3052
|
state->_form_element = NULL;
|
@@ -3011,27 +3054,24 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
3011
3054
|
gumbo_debug("Closing an unopened form.\n");
|
3012
3055
|
parser_add_parse_error(parser, token);
|
3013
3056
|
ignore_token(parser);
|
3014
|
-
return
|
3057
|
+
return;
|
3015
3058
|
}
|
3016
3059
|
// This differs from implicitly_close_tags because we remove *only* the
|
3017
3060
|
// <form> element; other nodes are left in scope.
|
3018
|
-
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
3019
|
-
if (get_current_node(parser) != node)
|
3061
|
+
generate_implied_end_tags(parser, GUMBO_TAG_LAST, NULL);
|
3062
|
+
if (get_current_node(parser) != node)
|
3020
3063
|
parser_add_parse_error(parser, token);
|
3021
|
-
|
3022
|
-
} else {
|
3064
|
+
else
|
3023
3065
|
record_end_of_element(token, &node->v.element);
|
3024
|
-
}
|
3025
3066
|
|
3026
3067
|
GumboVector* open_elements = &state->_open_elements;
|
3027
3068
|
int index = gumbo_vector_index_of(open_elements, node);
|
3028
3069
|
assert(index >= 0);
|
3029
3070
|
gumbo_vector_remove_at(index, open_elements);
|
3030
|
-
return
|
3071
|
+
return;
|
3031
3072
|
}
|
3032
3073
|
}
|
3033
3074
|
if (tag_is(token, kEndTag, GUMBO_TAG_P)) {
|
3034
|
-
bool success = true;
|
3035
3075
|
if (!has_an_element_in_button_scope(parser, GUMBO_TAG_P)) {
|
3036
3076
|
parser_add_parse_error(parser, token);
|
3037
3077
|
// reconstruct_active_formatting_elements(parser);
|
@@ -3040,41 +3080,43 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
3040
3080
|
GUMBO_TAG_P,
|
3041
3081
|
GUMBO_INSERTION_CONVERTED_FROM_END_TAG
|
3042
3082
|
);
|
3043
|
-
success = false;
|
3044
3083
|
}
|
3045
|
-
|
3084
|
+
implicitly_close_tags (
|
3046
3085
|
parser,
|
3047
3086
|
token,
|
3048
3087
|
GUMBO_NAMESPACE_HTML,
|
3049
3088
|
GUMBO_TAG_P
|
3050
|
-
)
|
3089
|
+
);
|
3090
|
+
return;
|
3051
3091
|
}
|
3052
3092
|
if (tag_is(token, kEndTag, GUMBO_TAG_LI)) {
|
3053
3093
|
if (!has_an_element_in_list_scope(parser, GUMBO_TAG_LI)) {
|
3054
3094
|
parser_add_parse_error(parser, token);
|
3055
3095
|
ignore_token(parser);
|
3056
|
-
return
|
3096
|
+
return;
|
3057
3097
|
}
|
3058
|
-
|
3098
|
+
implicitly_close_tags (
|
3059
3099
|
parser,
|
3060
3100
|
token,
|
3061
3101
|
GUMBO_NAMESPACE_HTML,
|
3062
3102
|
GUMBO_TAG_LI
|
3063
3103
|
);
|
3104
|
+
return;
|
3064
3105
|
}
|
3065
3106
|
if (tag_in(token, kEndTag, &dd_dt_tags)) {
|
3066
3107
|
GumboTag token_tag = token->v.end_tag.tag;
|
3067
3108
|
if (!has_an_element_in_scope(parser, token_tag)) {
|
3068
3109
|
parser_add_parse_error(parser, token);
|
3069
3110
|
ignore_token(parser);
|
3070
|
-
return
|
3111
|
+
return;
|
3071
3112
|
}
|
3072
|
-
|
3113
|
+
implicitly_close_tags (
|
3073
3114
|
parser,
|
3074
3115
|
token,
|
3075
3116
|
GUMBO_NAMESPACE_HTML,
|
3076
3117
|
token_tag
|
3077
3118
|
);
|
3119
|
+
return;
|
3078
3120
|
}
|
3079
3121
|
if (tag_in(token, kEndTag, &heading_tags)) {
|
3080
3122
|
if (
|
@@ -3086,12 +3128,11 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
3086
3128
|
// No heading open; ignore the token entirely.
|
3087
3129
|
parser_add_parse_error(parser, token);
|
3088
3130
|
ignore_token(parser);
|
3089
|
-
return
|
3131
|
+
return;
|
3090
3132
|
}
|
3091
|
-
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
3133
|
+
generate_implied_end_tags(parser, GUMBO_TAG_LAST, NULL);
|
3092
3134
|
const GumboNode* current_node = get_current_node(parser);
|
3093
|
-
|
3094
|
-
if (!success) {
|
3135
|
+
if (!node_html_tag_is(current_node, token->v.end_tag.tag)) {
|
3095
3136
|
// There're children of the heading currently open; close them below and
|
3096
3137
|
// record a parse error.
|
3097
3138
|
// TODO(jdtang): Add a way to distinguish this error case from the one
|
@@ -3101,17 +3142,15 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
3101
3142
|
do {
|
3102
3143
|
current_node = pop_current_node(parser);
|
3103
3144
|
} while (!node_tag_in_set(current_node, &heading_tags));
|
3104
|
-
return
|
3145
|
+
return;
|
3105
3146
|
}
|
3106
3147
|
if (tag_is(token, kStartTag, GUMBO_TAG_A)) {
|
3107
|
-
bool success = true;
|
3108
3148
|
int last_a;
|
3109
3149
|
int has_matching_a = find_last_anchor_index(parser, &last_a);
|
3110
3150
|
if (has_matching_a) {
|
3111
3151
|
assert(has_matching_a == 1);
|
3112
3152
|
parser_add_parse_error(parser, token);
|
3113
|
-
|
3114
|
-
assert(handled);
|
3153
|
+
(void)adoption_agency_algorithm(parser, token);
|
3115
3154
|
// The adoption agency algorithm usually removes all instances of <a>
|
3116
3155
|
// from the list of active formatting elements, but in case it doesn't,
|
3117
3156
|
// we're supposed to do this. (The conditions where it might not are
|
@@ -3123,11 +3162,10 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
3123
3162
|
);
|
3124
3163
|
gumbo_vector_remove(last_element, &state->_open_elements);
|
3125
3164
|
}
|
3126
|
-
success = false;
|
3127
3165
|
}
|
3128
3166
|
reconstruct_active_formatting_elements(parser);
|
3129
3167
|
add_formatting_element(parser, insert_element_from_token(parser, token));
|
3130
|
-
return
|
3168
|
+
return;
|
3131
3169
|
}
|
3132
3170
|
if (
|
3133
3171
|
tag_in(token, kStartTag, &(const TagSet) {
|
@@ -3137,21 +3175,18 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
3137
3175
|
) {
|
3138
3176
|
reconstruct_active_formatting_elements(parser);
|
3139
3177
|
add_formatting_element(parser, insert_element_from_token(parser, token));
|
3140
|
-
return
|
3178
|
+
return;
|
3141
3179
|
}
|
3142
3180
|
if (tag_is(token, kStartTag, GUMBO_TAG_NOBR)) {
|
3143
|
-
bool result = true;
|
3144
3181
|
reconstruct_active_formatting_elements(parser);
|
3145
3182
|
if (has_an_element_in_scope(parser, GUMBO_TAG_NOBR)) {
|
3146
|
-
result = false;
|
3147
3183
|
parser_add_parse_error(parser, token);
|
3148
|
-
|
3149
|
-
assert(handled);
|
3184
|
+
adoption_agency_algorithm(parser, token);
|
3150
3185
|
reconstruct_active_formatting_elements(parser);
|
3151
3186
|
}
|
3152
3187
|
insert_element_from_token(parser, token);
|
3153
3188
|
add_formatting_element(parser, get_current_node(parser));
|
3154
|
-
return
|
3189
|
+
return;
|
3155
3190
|
}
|
3156
3191
|
if (
|
3157
3192
|
tag_in(token, kEndTag, &(const TagSet) {
|
@@ -3160,9 +3195,8 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
3160
3195
|
TAG(U)
|
3161
3196
|
})
|
3162
3197
|
) {
|
3163
|
-
|
3164
|
-
|
3165
|
-
return true;
|
3198
|
+
adoption_agency_algorithm(parser, token);
|
3199
|
+
return;
|
3166
3200
|
}
|
3167
3201
|
if (
|
3168
3202
|
tag_in(token, kStartTag, &(const TagSet){TAG(APPLET), TAG(MARQUEE), TAG(OBJECT)})
|
@@ -3171,7 +3205,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
3171
3205
|
insert_element_from_token(parser, token);
|
3172
3206
|
add_formatting_element(parser, &kActiveFormattingScopeMarker);
|
3173
3207
|
set_frameset_not_ok(parser);
|
3174
|
-
return
|
3208
|
+
return;
|
3175
3209
|
}
|
3176
3210
|
if (
|
3177
3211
|
tag_in(token, kEndTag, &(const TagSet){TAG(APPLET), TAG(MARQUEE), TAG(OBJECT)})
|
@@ -3180,11 +3214,11 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
3180
3214
|
if (!has_an_element_in_scope(parser, token_tag)) {
|
3181
3215
|
parser_add_parse_error(parser, token);
|
3182
3216
|
ignore_token(parser);
|
3183
|
-
return
|
3217
|
+
return;
|
3184
3218
|
}
|
3185
|
-
|
3219
|
+
implicitly_close_tags(parser, token, GUMBO_NAMESPACE_HTML, token_tag);
|
3186
3220
|
clear_active_formatting_elements(parser);
|
3187
|
-
return
|
3221
|
+
return;
|
3188
3222
|
}
|
3189
3223
|
if (tag_is(token, kStartTag, GUMBO_TAG_TABLE)) {
|
3190
3224
|
if (
|
@@ -3196,7 +3230,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
3196
3230
|
insert_element_from_token(parser, token);
|
3197
3231
|
set_frameset_not_ok(parser);
|
3198
3232
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3199
|
-
return
|
3233
|
+
return;
|
3200
3234
|
}
|
3201
3235
|
if (tag_is(token, kEndTag, GUMBO_TAG_BR)) {
|
3202
3236
|
parser_add_parse_error(parser, token);
|
@@ -3209,7 +3243,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
3209
3243
|
pop_current_node(parser);
|
3210
3244
|
acknowledge_self_closing_tag(parser);
|
3211
3245
|
set_frameset_not_ok(parser);
|
3212
|
-
return
|
3246
|
+
return;
|
3213
3247
|
}
|
3214
3248
|
if (
|
3215
3249
|
tag_in(token, kStartTag, &(const TagSet) {
|
@@ -3229,7 +3263,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
3229
3263
|
pop_current_node(parser);
|
3230
3264
|
acknowledge_self_closing_tag(parser);
|
3231
3265
|
set_frameset_not_ok(parser);
|
3232
|
-
return
|
3266
|
+
return;
|
3233
3267
|
}
|
3234
3268
|
if (tag_is(token, kStartTag, GUMBO_TAG_INPUT)) {
|
3235
3269
|
reconstruct_active_formatting_elements(parser);
|
@@ -3238,7 +3272,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
3238
3272
|
acknowledge_self_closing_tag(parser);
|
3239
3273
|
if (!attribute_matches(&input->v.element.attributes, "type", "hidden"))
|
3240
3274
|
set_frameset_not_ok(parser);
|
3241
|
-
return
|
3275
|
+
return;
|
3242
3276
|
}
|
3243
3277
|
if (
|
3244
3278
|
tag_in(token, kStartTag, &(const TagSet){TAG(PARAM), TAG(SOURCE), TAG(TRACK)})
|
@@ -3246,37 +3280,37 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
3246
3280
|
insert_element_from_token(parser, token);
|
3247
3281
|
pop_current_node(parser);
|
3248
3282
|
acknowledge_self_closing_tag(parser);
|
3249
|
-
return
|
3283
|
+
return;
|
3250
3284
|
}
|
3251
3285
|
if (tag_is(token, kStartTag, GUMBO_TAG_HR)) {
|
3252
|
-
|
3286
|
+
maybe_implicitly_close_p_tag(parser, token);
|
3253
3287
|
insert_element_from_token(parser, token);
|
3254
3288
|
pop_current_node(parser);
|
3255
3289
|
acknowledge_self_closing_tag(parser);
|
3256
3290
|
set_frameset_not_ok(parser);
|
3257
|
-
return
|
3291
|
+
return;
|
3258
3292
|
}
|
3259
3293
|
if (tag_is(token, kStartTag, GUMBO_TAG_TEXTAREA)) {
|
3260
3294
|
run_generic_parsing_algorithm(parser, token, GUMBO_LEX_RCDATA);
|
3261
3295
|
parser->_parser_state->_ignore_next_linefeed = true;
|
3262
3296
|
set_frameset_not_ok(parser);
|
3263
|
-
return
|
3297
|
+
return;
|
3264
3298
|
}
|
3265
3299
|
if (tag_is(token, kStartTag, GUMBO_TAG_XMP)) {
|
3266
|
-
|
3300
|
+
maybe_implicitly_close_p_tag(parser, token);
|
3267
3301
|
reconstruct_active_formatting_elements(parser);
|
3268
3302
|
set_frameset_not_ok(parser);
|
3269
3303
|
run_generic_parsing_algorithm(parser, token, GUMBO_LEX_RAWTEXT);
|
3270
|
-
return
|
3304
|
+
return;
|
3271
3305
|
}
|
3272
3306
|
if (tag_is(token, kStartTag, GUMBO_TAG_IFRAME)) {
|
3273
3307
|
set_frameset_not_ok(parser);
|
3274
3308
|
run_generic_parsing_algorithm(parser, token, GUMBO_LEX_RAWTEXT);
|
3275
|
-
return
|
3309
|
+
return;
|
3276
3310
|
}
|
3277
3311
|
if (tag_is(token, kStartTag, GUMBO_TAG_NOEMBED)) {
|
3278
3312
|
run_generic_parsing_algorithm(parser, token, GUMBO_LEX_RAWTEXT);
|
3279
|
-
return
|
3313
|
+
return;
|
3280
3314
|
}
|
3281
3315
|
if (tag_is(token, kStartTag, GUMBO_TAG_SELECT)) {
|
3282
3316
|
reconstruct_active_formatting_elements(parser);
|
@@ -3294,7 +3328,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
3294
3328
|
} else {
|
3295
3329
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_SELECT);
|
3296
3330
|
}
|
3297
|
-
return
|
3331
|
+
return;
|
3298
3332
|
}
|
3299
3333
|
if (
|
3300
3334
|
tag_in(token, kStartTag, &(const TagSet){TAG(OPTGROUP), TAG(OPTION)})
|
@@ -3304,33 +3338,28 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
3304
3338
|
}
|
3305
3339
|
reconstruct_active_formatting_elements(parser);
|
3306
3340
|
insert_element_from_token(parser, token);
|
3307
|
-
return
|
3341
|
+
return;
|
3308
3342
|
}
|
3309
3343
|
if (tag_in(token, kStartTag, &(const TagSet){TAG(RB), TAG(RTC)})) {
|
3310
|
-
bool success = true;
|
3311
3344
|
if (has_an_element_in_scope(parser, GUMBO_TAG_RUBY)) {
|
3312
|
-
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
3313
|
-
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_RUBY))
|
3345
|
+
generate_implied_end_tags(parser, GUMBO_TAG_LAST, NULL);
|
3346
|
+
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_RUBY))
|
3314
3347
|
parser_add_parse_error(parser, token);
|
3315
|
-
success = false;
|
3316
|
-
}
|
3317
3348
|
}
|
3318
3349
|
insert_element_from_token(parser, token);
|
3319
|
-
return
|
3350
|
+
return;
|
3320
3351
|
}
|
3321
3352
|
if (tag_in(token, kStartTag, &(const TagSet){TAG(RP), TAG(RT)})) {
|
3322
|
-
bool success = true;
|
3323
3353
|
if (has_an_element_in_scope(parser, GUMBO_TAG_RUBY)) {
|
3324
|
-
generate_implied_end_tags(parser, GUMBO_TAG_RTC);
|
3354
|
+
generate_implied_end_tags(parser, GUMBO_TAG_RTC, NULL);
|
3325
3355
|
GumboNode* current = get_current_node(parser);
|
3326
3356
|
if (!node_html_tag_is(current, GUMBO_TAG_RUBY) &&
|
3327
3357
|
!node_html_tag_is(current, GUMBO_TAG_RTC)) {
|
3328
3358
|
parser_add_parse_error(parser, token);
|
3329
|
-
success = false;
|
3330
3359
|
}
|
3331
3360
|
}
|
3332
3361
|
insert_element_from_token(parser, token);
|
3333
|
-
return
|
3362
|
+
return;
|
3334
3363
|
}
|
3335
3364
|
if (tag_is(token, kStartTag, GUMBO_TAG_MATH)) {
|
3336
3365
|
reconstruct_active_formatting_elements(parser);
|
@@ -3341,7 +3370,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
3341
3370
|
pop_current_node(parser);
|
3342
3371
|
acknowledge_self_closing_tag(parser);
|
3343
3372
|
}
|
3344
|
-
return
|
3373
|
+
return;
|
3345
3374
|
}
|
3346
3375
|
if (tag_is(token, kStartTag, GUMBO_TAG_SVG)) {
|
3347
3376
|
reconstruct_active_formatting_elements(parser);
|
@@ -3352,7 +3381,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
3352
3381
|
pop_current_node(parser);
|
3353
3382
|
acknowledge_self_closing_tag(parser);
|
3354
3383
|
}
|
3355
|
-
return
|
3384
|
+
return;
|
3356
3385
|
}
|
3357
3386
|
if (
|
3358
3387
|
tag_in(token, kStartTag, &(const TagSet) {
|
@@ -3362,60 +3391,24 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
|
|
3362
3391
|
) {
|
3363
3392
|
parser_add_parse_error(parser, token);
|
3364
3393
|
ignore_token(parser);
|
3365
|
-
return
|
3394
|
+
return;
|
3366
3395
|
}
|
3367
3396
|
if (token->type == GUMBO_TOKEN_START_TAG) {
|
3368
3397
|
reconstruct_active_formatting_elements(parser);
|
3369
3398
|
insert_element_from_token(parser, token);
|
3370
|
-
return
|
3371
|
-
}
|
3372
|
-
any_other_end_tag:
|
3373
|
-
assert(token->type == GUMBO_TOKEN_END_TAG);
|
3374
|
-
GumboTag end_tag = token->v.end_tag.tag;
|
3375
|
-
const char *end_tagname = token->v.end_tag.name;
|
3376
|
-
assert(state->_open_elements.length > 0);
|
3377
|
-
assert(node_html_tag_is(state->_open_elements.data[0], GUMBO_TAG_HTML));
|
3378
|
-
// Walk up the stack of open elements until we find one that either:
|
3379
|
-
// a) Matches the tag name we saw
|
3380
|
-
// b) Is in the "special" category.
|
3381
|
-
// If we see a), implicitly close everything up to and including it. If we
|
3382
|
-
// see b), then record a parse error, don't close anything (except the
|
3383
|
-
// implied end tags) and ignore the end tag token.
|
3384
|
-
for (int i = state->_open_elements.length; --i >= 0;) {
|
3385
|
-
const GumboNode* node = state->_open_elements.data[i];
|
3386
|
-
if (node_qualified_tagname_is(node, GUMBO_NAMESPACE_HTML, end_tag, end_tagname)) {
|
3387
|
-
generate_implied_end_tags(parser, end_tag);
|
3388
|
-
// TODO(jdtang): Do I need to add a parse error here? The condition in
|
3389
|
-
// the spec seems like it's the inverse of the loop condition above, and
|
3390
|
-
// so would never fire.
|
3391
|
-
// sfc: Yes, an error is needed here.
|
3392
|
-
// <!DOCTYPE><body><sarcasm><foo></sarcasm> is an example.
|
3393
|
-
// foo is the "current node" but sarcasm is node.
|
3394
|
-
// XXX: Write a test for this.
|
3395
|
-
if (node != get_current_node(parser))
|
3396
|
-
parser_add_parse_error(parser, token);
|
3397
|
-
while (node != pop_current_node(parser))
|
3398
|
-
; // Pop everything.
|
3399
|
-
return true;
|
3400
|
-
} else if (is_special_node(node)) {
|
3401
|
-
parser_add_parse_error(parser, token);
|
3402
|
-
ignore_token(parser);
|
3403
|
-
return false;
|
3404
|
-
}
|
3399
|
+
return;
|
3405
3400
|
}
|
3406
|
-
|
3407
|
-
assert(0);
|
3408
|
-
return false;
|
3401
|
+
in_body_any_other_end_tag(parser, token);
|
3409
3402
|
}
|
3410
3403
|
|
3411
3404
|
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-incdata
|
3412
|
-
static
|
3405
|
+
static void handle_text(GumboParser* parser, GumboToken* token) {
|
3413
3406
|
if (
|
3414
3407
|
token->type == GUMBO_TOKEN_CHARACTER
|
3415
3408
|
|| token->type == GUMBO_TOKEN_WHITESPACE
|
3416
3409
|
) {
|
3417
3410
|
insert_text_token(parser, token);
|
3418
|
-
return
|
3411
|
+
return;
|
3419
3412
|
}
|
3420
3413
|
// We provide only bare-bones script handling that doesn't involve any of
|
3421
3414
|
// the parser-pause/already-started/script-nesting flags or re-entrant
|
@@ -3424,19 +3417,16 @@ static bool handle_text(GumboParser* parser, GumboToken* token) {
|
|
3424
3417
|
// provide the script body as a text-node child of the <script> element.
|
3425
3418
|
// This behavior doesn't support document.write of partial HTML elements,
|
3426
3419
|
// but should be adequate for almost all other scripting support.
|
3427
|
-
bool success = true;
|
3428
3420
|
if (token->type == GUMBO_TOKEN_EOF) {
|
3429
3421
|
parser_add_parse_error(parser, token);
|
3430
|
-
success = false;
|
3431
3422
|
parser->_parser_state->_reprocess_current_token = true;
|
3432
3423
|
}
|
3433
3424
|
pop_current_node(parser);
|
3434
3425
|
set_insertion_mode(parser, parser->_parser_state->_original_insertion_mode);
|
3435
|
-
return success;
|
3436
3426
|
}
|
3437
3427
|
|
3438
3428
|
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-intable
|
3439
|
-
static
|
3429
|
+
static void handle_in_table(GumboParser* parser, GumboToken* token) {
|
3440
3430
|
GumboParserState* state = parser->_parser_state;
|
3441
3431
|
if (
|
3442
3432
|
(token->type == GUMBO_TOKEN_CHARACTER
|
@@ -3456,29 +3446,29 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
|
|
3456
3446
|
state->_original_insertion_mode = state->_insertion_mode;
|
3457
3447
|
state->_reprocess_current_token = true;
|
3458
3448
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_TEXT);
|
3459
|
-
return
|
3449
|
+
return;
|
3460
3450
|
}
|
3461
3451
|
if (token->type == GUMBO_TOKEN_COMMENT) {
|
3462
3452
|
append_comment_node(parser, get_current_node(parser), token);
|
3463
|
-
return
|
3453
|
+
return;
|
3464
3454
|
}
|
3465
3455
|
if (token->type == GUMBO_TOKEN_DOCTYPE) {
|
3466
3456
|
parser_add_parse_error(parser, token);
|
3467
3457
|
ignore_token(parser);
|
3468
|
-
return
|
3458
|
+
return;
|
3469
3459
|
}
|
3470
3460
|
if (tag_is(token, kStartTag, GUMBO_TAG_CAPTION)) {
|
3471
3461
|
clear_stack_to_table_context(parser);
|
3472
3462
|
add_formatting_element(parser, &kActiveFormattingScopeMarker);
|
3473
3463
|
insert_element_from_token(parser, token);
|
3474
3464
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_CAPTION);
|
3475
|
-
return
|
3465
|
+
return;
|
3476
3466
|
}
|
3477
3467
|
if (tag_is(token, kStartTag, GUMBO_TAG_COLGROUP)) {
|
3478
3468
|
clear_stack_to_table_context(parser);
|
3479
3469
|
insert_element_from_token(parser, token);
|
3480
3470
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_COLUMN_GROUP);
|
3481
|
-
return
|
3471
|
+
return;
|
3482
3472
|
}
|
3483
3473
|
if (tag_is(token, kStartTag, GUMBO_TAG_COL)) {
|
3484
3474
|
clear_stack_to_table_context(parser);
|
@@ -3489,7 +3479,7 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
|
|
3489
3479
|
);
|
3490
3480
|
state->_reprocess_current_token = true;
|
3491
3481
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_COLUMN_GROUP);
|
3492
|
-
return
|
3482
|
+
return;
|
3493
3483
|
}
|
3494
3484
|
if (
|
3495
3485
|
tag_in(token, kStartTag, &(const TagSet) {
|
@@ -3499,7 +3489,7 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
|
|
3499
3489
|
clear_stack_to_table_context(parser);
|
3500
3490
|
insert_element_from_token(parser, token);
|
3501
3491
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
|
3502
|
-
return
|
3492
|
+
return;
|
3503
3493
|
}
|
3504
3494
|
if (
|
3505
3495
|
tag_in(token, kStartTag, &(const TagSet) {
|
@@ -3514,7 +3504,7 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
|
|
3514
3504
|
);
|
3515
3505
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
|
3516
3506
|
state->_reprocess_current_token = true;
|
3517
|
-
return
|
3507
|
+
return;
|
3518
3508
|
}
|
3519
3509
|
if (tag_is(token, kStartTag, GUMBO_TAG_TABLE)) {
|
3520
3510
|
parser_add_parse_error(parser, token);
|
@@ -3523,14 +3513,14 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
|
|
3523
3513
|
} else {
|
3524
3514
|
ignore_token(parser);
|
3525
3515
|
}
|
3526
|
-
return
|
3516
|
+
return;
|
3527
3517
|
}
|
3528
3518
|
if (tag_is(token, kEndTag, GUMBO_TAG_TABLE)) {
|
3529
3519
|
if (!close_table(parser)) {
|
3530
3520
|
parser_add_parse_error(parser, token);
|
3531
|
-
return
|
3521
|
+
return;
|
3532
3522
|
}
|
3533
|
-
return
|
3523
|
+
return;
|
3534
3524
|
}
|
3535
3525
|
if (
|
3536
3526
|
tag_in(token, kEndTag, &(const TagSet) {
|
@@ -3540,13 +3530,14 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
|
|
3540
3530
|
) {
|
3541
3531
|
parser_add_parse_error(parser, token);
|
3542
3532
|
ignore_token(parser);
|
3543
|
-
return
|
3533
|
+
return;
|
3544
3534
|
}
|
3545
3535
|
if (
|
3546
3536
|
tag_in(token, kStartTag, &(const TagSet){TAG(STYLE), TAG(SCRIPT), TAG(TEMPLATE)})
|
3547
3537
|
|| (tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE))
|
3548
3538
|
) {
|
3549
|
-
|
3539
|
+
handle_in_head(parser, token);
|
3540
|
+
return;
|
3550
3541
|
}
|
3551
3542
|
if (
|
3552
3543
|
tag_is(token, kStartTag, GUMBO_TAG_INPUT)
|
@@ -3556,35 +3547,35 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
|
|
3556
3547
|
insert_element_from_token(parser, token);
|
3557
3548
|
pop_current_node(parser);
|
3558
3549
|
acknowledge_self_closing_tag(parser);
|
3559
|
-
return
|
3550
|
+
return;
|
3560
3551
|
}
|
3561
3552
|
if (tag_is(token, kStartTag, GUMBO_TAG_FORM)) {
|
3562
3553
|
parser_add_parse_error(parser, token);
|
3563
3554
|
if (state->_form_element || has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
3564
3555
|
ignore_token(parser);
|
3565
|
-
return
|
3556
|
+
return;
|
3566
3557
|
}
|
3567
3558
|
state->_form_element = insert_element_from_token(parser, token);
|
3568
3559
|
pop_current_node(parser);
|
3569
|
-
return
|
3560
|
+
return;
|
3570
3561
|
}
|
3571
3562
|
if (token->type == GUMBO_TOKEN_EOF) {
|
3572
|
-
|
3563
|
+
handle_in_body(parser, token);
|
3564
|
+
return;
|
3573
3565
|
}
|
3574
3566
|
// foster-parenting-start-tag or foster-parenting-end-tag error
|
3575
3567
|
parser_add_parse_error(parser, token);
|
3576
3568
|
state->_foster_parent_insertions = true;
|
3577
|
-
|
3569
|
+
handle_in_body(parser, token);
|
3578
3570
|
state->_foster_parent_insertions = false;
|
3579
|
-
return result;
|
3580
3571
|
}
|
3581
3572
|
|
3582
3573
|
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-intabletext
|
3583
|
-
static
|
3574
|
+
static void handle_in_table_text(GumboParser* parser, GumboToken* token) {
|
3584
3575
|
if (token->type == GUMBO_TOKEN_NULL) {
|
3585
3576
|
parser_add_parse_error(parser, token);
|
3586
3577
|
ignore_token(parser);
|
3587
|
-
return
|
3578
|
+
return;
|
3588
3579
|
}
|
3589
3580
|
GumboParserState* state = parser->_parser_state;
|
3590
3581
|
// Non-whitespace tokens will cause parse errors later.
|
@@ -3594,7 +3585,7 @@ static bool handle_in_table_text(GumboParser* parser, GumboToken* token) {
|
|
3594
3585
|
|| token->type == GUMBO_TOKEN_CHARACTER) {
|
3595
3586
|
insert_text_token(parser, token);
|
3596
3587
|
gumbo_character_token_buffer_append(token, &state->_table_character_tokens);
|
3597
|
-
return
|
3588
|
+
return;
|
3598
3589
|
}
|
3599
3590
|
|
3600
3591
|
GumboCharacterTokenBuffer* buffer = &state->_table_character_tokens;
|
@@ -3616,26 +3607,24 @@ static bool handle_in_table_text(GumboParser* parser, GumboToken* token) {
|
|
3616
3607
|
state->_foster_parent_insertions = false;
|
3617
3608
|
state->_reprocess_current_token = true;
|
3618
3609
|
state->_insertion_mode = state->_original_insertion_mode;
|
3619
|
-
return true;
|
3620
3610
|
}
|
3621
3611
|
|
3622
3612
|
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-incaption
|
3623
|
-
static
|
3613
|
+
static void handle_in_caption(GumboParser* parser, GumboToken* token) {
|
3624
3614
|
if (tag_is(token, kEndTag, GUMBO_TAG_CAPTION)) {
|
3625
3615
|
if (!has_an_element_in_table_scope(parser, GUMBO_TAG_CAPTION)) {
|
3626
3616
|
parser_add_parse_error(parser, token);
|
3627
3617
|
ignore_token(parser);
|
3628
|
-
return
|
3618
|
+
return;
|
3629
3619
|
}
|
3630
|
-
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
3631
|
-
|
3632
|
-
if (!result)
|
3620
|
+
generate_implied_end_tags(parser, GUMBO_TAG_LAST, NULL);
|
3621
|
+
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_CAPTION))
|
3633
3622
|
parser_add_parse_error(parser, token);
|
3634
3623
|
while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_CAPTION))
|
3635
3624
|
;
|
3636
3625
|
clear_active_formatting_elements(parser);
|
3637
3626
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3638
|
-
return
|
3627
|
+
return;
|
3639
3628
|
}
|
3640
3629
|
if (
|
3641
3630
|
tag_in(token, kStartTag, &(const TagSet) {
|
@@ -3647,18 +3636,17 @@ static bool handle_in_caption(GumboParser* parser, GumboToken* token) {
|
|
3647
3636
|
if (!has_an_element_in_table_scope(parser, GUMBO_TAG_CAPTION)) {
|
3648
3637
|
parser_add_parse_error(parser, token);
|
3649
3638
|
ignore_token(parser);
|
3650
|
-
return
|
3639
|
+
return;
|
3651
3640
|
}
|
3652
|
-
generate_implied_end_tags(parser, GUMBO_TAG_LAST);
|
3653
|
-
|
3654
|
-
if (!result)
|
3641
|
+
generate_implied_end_tags(parser, GUMBO_TAG_LAST, NULL);
|
3642
|
+
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_CAPTION))
|
3655
3643
|
parser_add_parse_error(parser, token);
|
3656
3644
|
while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_CAPTION))
|
3657
3645
|
;
|
3658
3646
|
clear_active_formatting_elements(parser);
|
3659
3647
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3660
3648
|
parser->_parser_state->_reprocess_current_token = true;
|
3661
|
-
return
|
3649
|
+
return;
|
3662
3650
|
}
|
3663
3651
|
if (
|
3664
3652
|
tag_in(token, kEndTag, &(const TagSet) {
|
@@ -3668,77 +3656,79 @@ static bool handle_in_caption(GumboParser* parser, GumboToken* token) {
|
|
3668
3656
|
) {
|
3669
3657
|
parser_add_parse_error(parser, token);
|
3670
3658
|
ignore_token(parser);
|
3671
|
-
return
|
3659
|
+
return;
|
3672
3660
|
}
|
3673
|
-
|
3661
|
+
handle_in_body(parser, token);
|
3674
3662
|
}
|
3675
3663
|
|
3676
3664
|
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-incolgroup
|
3677
|
-
static
|
3665
|
+
static void handle_in_column_group(GumboParser* parser, GumboToken* token) {
|
3678
3666
|
if (token->type == GUMBO_TOKEN_WHITESPACE) {
|
3679
3667
|
insert_text_token(parser, token);
|
3680
|
-
return
|
3668
|
+
return;
|
3681
3669
|
}
|
3682
3670
|
if (token->type == GUMBO_TOKEN_COMMENT) {
|
3683
3671
|
append_comment_node(parser, get_current_node(parser), token);
|
3684
|
-
return
|
3672
|
+
return;
|
3685
3673
|
}
|
3686
3674
|
if (token->type == GUMBO_TOKEN_DOCTYPE) {
|
3687
3675
|
parser_add_parse_error(parser, token);
|
3688
3676
|
ignore_token(parser);
|
3689
|
-
return
|
3677
|
+
return;
|
3690
3678
|
}
|
3691
3679
|
if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
|
3692
|
-
|
3680
|
+
handle_in_body(parser, token);
|
3681
|
+
return;
|
3693
3682
|
}
|
3694
3683
|
if (tag_is(token, kStartTag, GUMBO_TAG_COL)) {
|
3695
3684
|
insert_element_from_token(parser, token);
|
3696
3685
|
pop_current_node(parser);
|
3697
3686
|
acknowledge_self_closing_tag(parser);
|
3698
|
-
return
|
3687
|
+
return;
|
3699
3688
|
}
|
3700
3689
|
if (tag_is(token, kEndTag, GUMBO_TAG_COLGROUP)) {
|
3701
3690
|
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_COLGROUP)) {
|
3702
3691
|
parser_add_parse_error(parser, token);
|
3703
3692
|
ignore_token(parser);
|
3704
|
-
return
|
3693
|
+
return;
|
3705
3694
|
}
|
3706
3695
|
pop_current_node(parser);
|
3707
3696
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3708
|
-
return
|
3697
|
+
return;
|
3709
3698
|
}
|
3710
3699
|
if (tag_is(token, kEndTag, GUMBO_TAG_COL)) {
|
3711
3700
|
parser_add_parse_error(parser, token);
|
3712
3701
|
ignore_token(parser);
|
3713
|
-
return
|
3702
|
+
return;
|
3714
3703
|
}
|
3715
3704
|
if (
|
3716
3705
|
tag_is(token, kStartTag, GUMBO_TAG_TEMPLATE)
|
3717
3706
|
|| tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)
|
3718
3707
|
) {
|
3719
|
-
|
3708
|
+
handle_in_head(parser, token);
|
3709
|
+
return;
|
3720
3710
|
}
|
3721
3711
|
if (token->type == GUMBO_TOKEN_EOF) {
|
3722
|
-
|
3712
|
+
handle_in_body(parser, token);
|
3713
|
+
return;
|
3723
3714
|
}
|
3724
3715
|
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_COLGROUP)) {
|
3725
3716
|
parser_add_parse_error(parser, token);
|
3726
3717
|
ignore_token(parser);
|
3727
|
-
return
|
3718
|
+
return;
|
3728
3719
|
}
|
3729
3720
|
pop_current_node(parser);
|
3730
3721
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3731
3722
|
parser->_parser_state->_reprocess_current_token = true;
|
3732
|
-
return true;
|
3733
3723
|
}
|
3734
3724
|
|
3735
3725
|
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-intbody
|
3736
|
-
static
|
3726
|
+
static void handle_in_table_body(GumboParser* parser, GumboToken* token) {
|
3737
3727
|
if (tag_is(token, kStartTag, GUMBO_TAG_TR)) {
|
3738
3728
|
clear_stack_to_table_body_context(parser);
|
3739
3729
|
insert_element_from_token(parser, token);
|
3740
3730
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
|
3741
|
-
return
|
3731
|
+
return;
|
3742
3732
|
}
|
3743
3733
|
if (tag_in(token, kStartTag, &td_th_tags)) {
|
3744
3734
|
parser_add_parse_error(parser, token);
|
@@ -3746,7 +3736,7 @@ static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
|
|
3746
3736
|
insert_element_of_tag_type(parser, GUMBO_TAG_TR, GUMBO_INSERTION_IMPLIED);
|
3747
3737
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
|
3748
3738
|
parser->_parser_state->_reprocess_current_token = true;
|
3749
|
-
return
|
3739
|
+
return;
|
3750
3740
|
}
|
3751
3741
|
if (
|
3752
3742
|
tag_in(token, kEndTag, &(const TagSet){TAG(TBODY), TAG(TFOOT), TAG(THEAD)})
|
@@ -3754,12 +3744,12 @@ static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
|
|
3754
3744
|
if (!has_an_element_in_table_scope(parser, token->v.end_tag.tag)) {
|
3755
3745
|
parser_add_parse_error(parser, token);
|
3756
3746
|
ignore_token(parser);
|
3757
|
-
return
|
3747
|
+
return;
|
3758
3748
|
}
|
3759
3749
|
clear_stack_to_table_body_context(parser);
|
3760
3750
|
pop_current_node(parser);
|
3761
3751
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3762
|
-
return
|
3752
|
+
return;
|
3763
3753
|
}
|
3764
3754
|
if (
|
3765
3755
|
tag_in(token, kStartTag, &(const TagSet) {
|
@@ -3777,13 +3767,13 @@ static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
|
|
3777
3767
|
) {
|
3778
3768
|
parser_add_parse_error(parser, token);
|
3779
3769
|
ignore_token(parser);
|
3780
|
-
return
|
3770
|
+
return;
|
3781
3771
|
}
|
3782
3772
|
clear_stack_to_table_body_context(parser);
|
3783
3773
|
pop_current_node(parser);
|
3784
3774
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
3785
3775
|
parser->_parser_state->_reprocess_current_token = true;
|
3786
|
-
return
|
3776
|
+
return;
|
3787
3777
|
}
|
3788
3778
|
if (
|
3789
3779
|
tag_in(token, kEndTag, &(const TagSet) {
|
@@ -3793,30 +3783,30 @@ static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
|
|
3793
3783
|
) {
|
3794
3784
|
parser_add_parse_error(parser, token);
|
3795
3785
|
ignore_token(parser);
|
3796
|
-
return
|
3786
|
+
return;
|
3797
3787
|
}
|
3798
|
-
|
3788
|
+
handle_in_table(parser, token);
|
3799
3789
|
}
|
3800
3790
|
|
3801
3791
|
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-intr
|
3802
|
-
static
|
3792
|
+
static void handle_in_row(GumboParser* parser, GumboToken* token) {
|
3803
3793
|
if (tag_in(token, kStartTag, &td_th_tags)) {
|
3804
3794
|
clear_stack_to_table_row_context(parser);
|
3805
3795
|
insert_element_from_token(parser, token);
|
3806
3796
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_CELL);
|
3807
3797
|
add_formatting_element(parser, &kActiveFormattingScopeMarker);
|
3808
|
-
return
|
3798
|
+
return;
|
3809
3799
|
}
|
3810
3800
|
if (tag_is(token, kEndTag, GUMBO_TAG_TR)) {
|
3811
3801
|
if (!has_an_element_in_table_scope(parser, GUMBO_TAG_TR)) {
|
3812
3802
|
parser_add_parse_error(parser, token);
|
3813
3803
|
ignore_token(parser);
|
3814
|
-
return
|
3804
|
+
return;
|
3815
3805
|
}
|
3816
3806
|
clear_stack_to_table_row_context(parser);
|
3817
3807
|
pop_current_node(parser);
|
3818
3808
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
|
3819
|
-
return
|
3809
|
+
return;
|
3820
3810
|
}
|
3821
3811
|
if (
|
3822
3812
|
tag_in(token, kStartTag, &(const TagSet) {
|
@@ -3828,13 +3818,13 @@ static bool handle_in_row(GumboParser* parser, GumboToken* token) {
|
|
3828
3818
|
if (!has_an_element_in_table_scope(parser, GUMBO_TAG_TR)) {
|
3829
3819
|
parser_add_parse_error(parser, token);
|
3830
3820
|
ignore_token(parser);
|
3831
|
-
return
|
3821
|
+
return;
|
3832
3822
|
}
|
3833
3823
|
clear_stack_to_table_row_context(parser);
|
3834
3824
|
pop_current_node(parser);
|
3835
3825
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
|
3836
3826
|
parser->_parser_state->_reprocess_current_token = true;
|
3837
|
-
return
|
3827
|
+
return;
|
3838
3828
|
}
|
3839
3829
|
if (
|
3840
3830
|
tag_in(token, kEndTag, &(const TagSet) {TAG(TBODY), TAG(TFOOT), TAG(THEAD)})
|
@@ -3842,17 +3832,17 @@ static bool handle_in_row(GumboParser* parser, GumboToken* token) {
|
|
3842
3832
|
if (!has_an_element_in_table_scope(parser, token->v.end_tag.tag)) {
|
3843
3833
|
parser_add_parse_error(parser, token);
|
3844
3834
|
ignore_token(parser);
|
3845
|
-
return
|
3835
|
+
return;
|
3846
3836
|
}
|
3847
3837
|
if (!has_an_element_in_table_scope(parser, GUMBO_TAG_TR)) {
|
3848
3838
|
ignore_token(parser);
|
3849
|
-
return
|
3839
|
+
return;
|
3850
3840
|
}
|
3851
3841
|
clear_stack_to_table_row_context(parser);
|
3852
3842
|
pop_current_node(parser);
|
3853
3843
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
|
3854
3844
|
parser->_parser_state->_reprocess_current_token = true;
|
3855
|
-
return
|
3845
|
+
return;
|
3856
3846
|
}
|
3857
3847
|
if (
|
3858
3848
|
tag_in(token, kEndTag, &(const TagSet) {
|
@@ -3862,21 +3852,22 @@ static bool handle_in_row(GumboParser* parser, GumboToken* token) {
|
|
3862
3852
|
) {
|
3863
3853
|
parser_add_parse_error(parser, token);
|
3864
3854
|
ignore_token(parser);
|
3865
|
-
return
|
3855
|
+
return;
|
3866
3856
|
}
|
3867
|
-
|
3857
|
+
handle_in_table(parser, token);
|
3868
3858
|
}
|
3869
3859
|
|
3870
3860
|
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-intd
|
3871
|
-
static
|
3861
|
+
static void handle_in_cell(GumboParser* parser, GumboToken* token) {
|
3872
3862
|
if (tag_in(token, kEndTag, &td_th_tags)) {
|
3873
3863
|
GumboTag token_tag = token->v.end_tag.tag;
|
3874
3864
|
if (!has_an_element_in_table_scope(parser, token_tag)) {
|
3875
3865
|
parser_add_parse_error(parser, token);
|
3876
3866
|
ignore_token(parser);
|
3877
|
-
return
|
3867
|
+
return;
|
3878
3868
|
}
|
3879
|
-
|
3869
|
+
close_table_cell(parser, token, token_tag);
|
3870
|
+
return;
|
3880
3871
|
}
|
3881
3872
|
if (
|
3882
3873
|
tag_in(token, kStartTag, &(const TagSet) {
|
@@ -3892,10 +3883,11 @@ static bool handle_in_cell(GumboParser* parser, GumboToken* token) {
|
|
3892
3883
|
gumbo_debug("Bailing out because there's no <td> or <th> in scope.\n");
|
3893
3884
|
parser_add_parse_error(parser, token);
|
3894
3885
|
ignore_token(parser);
|
3895
|
-
return
|
3886
|
+
return;
|
3896
3887
|
}
|
3897
3888
|
parser->_parser_state->_reprocess_current_token = true;
|
3898
|
-
|
3889
|
+
close_current_cell(parser, token);
|
3890
|
+
return;
|
3899
3891
|
}
|
3900
3892
|
if (
|
3901
3893
|
tag_in(token, kEndTag, &(const TagSet) {
|
@@ -3904,7 +3896,7 @@ static bool handle_in_cell(GumboParser* parser, GumboToken* token) {
|
|
3904
3896
|
) {
|
3905
3897
|
parser_add_parse_error(parser, token);
|
3906
3898
|
ignore_token(parser);
|
3907
|
-
return
|
3899
|
+
return;
|
3908
3900
|
}
|
3909
3901
|
if (
|
3910
3902
|
tag_in(token, kEndTag, &(const TagSet) {
|
@@ -3914,46 +3906,48 @@ static bool handle_in_cell(GumboParser* parser, GumboToken* token) {
|
|
3914
3906
|
if (!has_an_element_in_table_scope(parser, token->v.end_tag.tag)) {
|
3915
3907
|
parser_add_parse_error(parser, token);
|
3916
3908
|
ignore_token(parser);
|
3917
|
-
return
|
3909
|
+
return;
|
3918
3910
|
}
|
3919
3911
|
parser->_parser_state->_reprocess_current_token = true;
|
3920
|
-
|
3912
|
+
close_current_cell(parser, token);
|
3913
|
+
return;
|
3921
3914
|
}
|
3922
|
-
|
3915
|
+
handle_in_body(parser, token);
|
3923
3916
|
}
|
3924
3917
|
|
3925
3918
|
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inselect
|
3926
|
-
static
|
3919
|
+
static void handle_in_select(GumboParser* parser, GumboToken* token) {
|
3927
3920
|
if (token->type == GUMBO_TOKEN_NULL) {
|
3928
3921
|
parser_add_parse_error(parser, token);
|
3929
3922
|
ignore_token(parser);
|
3930
|
-
return
|
3923
|
+
return;
|
3931
3924
|
}
|
3932
3925
|
if (
|
3933
3926
|
token->type == GUMBO_TOKEN_CHARACTER
|
3934
3927
|
|| token->type == GUMBO_TOKEN_WHITESPACE
|
3935
3928
|
) {
|
3936
3929
|
insert_text_token(parser, token);
|
3937
|
-
return
|
3930
|
+
return;
|
3938
3931
|
}
|
3939
3932
|
if (token->type == GUMBO_TOKEN_COMMENT) {
|
3940
3933
|
append_comment_node(parser, get_current_node(parser), token);
|
3941
|
-
return
|
3934
|
+
return;
|
3942
3935
|
}
|
3943
3936
|
if (token->type == GUMBO_TOKEN_DOCTYPE) {
|
3944
3937
|
parser_add_parse_error(parser, token);
|
3945
3938
|
ignore_token(parser);
|
3946
|
-
return
|
3939
|
+
return;
|
3947
3940
|
}
|
3948
3941
|
if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
|
3949
|
-
|
3942
|
+
handle_in_body(parser, token);
|
3943
|
+
return;
|
3950
3944
|
}
|
3951
3945
|
if (tag_is(token, kStartTag, GUMBO_TAG_OPTION)) {
|
3952
3946
|
if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)) {
|
3953
3947
|
pop_current_node(parser);
|
3954
3948
|
}
|
3955
3949
|
insert_element_from_token(parser, token);
|
3956
|
-
return
|
3950
|
+
return;
|
3957
3951
|
}
|
3958
3952
|
if (tag_is(token, kStartTag, GUMBO_TAG_OPTGROUP)) {
|
3959
3953
|
if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)) {
|
@@ -3963,7 +3957,7 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
|
|
3963
3957
|
pop_current_node(parser);
|
3964
3958
|
}
|
3965
3959
|
insert_element_from_token(parser, token);
|
3966
|
-
return
|
3960
|
+
return;
|
3967
3961
|
}
|
3968
3962
|
if (tag_is(token, kEndTag, GUMBO_TAG_OPTGROUP)) {
|
3969
3963
|
GumboVector* open_elements = &parser->_parser_state->_open_elements;
|
@@ -3978,29 +3972,29 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
|
|
3978
3972
|
}
|
3979
3973
|
if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTGROUP)) {
|
3980
3974
|
pop_current_node(parser);
|
3981
|
-
return
|
3975
|
+
return;
|
3982
3976
|
}
|
3983
3977
|
parser_add_parse_error(parser, token);
|
3984
3978
|
ignore_token(parser);
|
3985
|
-
return
|
3979
|
+
return;
|
3986
3980
|
}
|
3987
3981
|
if (tag_is(token, kEndTag, GUMBO_TAG_OPTION)) {
|
3988
3982
|
if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)) {
|
3989
3983
|
pop_current_node(parser);
|
3990
|
-
return
|
3984
|
+
return;
|
3991
3985
|
}
|
3992
3986
|
parser_add_parse_error(parser, token);
|
3993
3987
|
ignore_token(parser);
|
3994
|
-
return
|
3988
|
+
return;
|
3995
3989
|
}
|
3996
3990
|
if (tag_is(token, kEndTag, GUMBO_TAG_SELECT)) {
|
3997
3991
|
if (!has_an_element_in_select_scope(parser, GUMBO_TAG_SELECT)) {
|
3998
3992
|
parser_add_parse_error(parser, token);
|
3999
3993
|
ignore_token(parser);
|
4000
|
-
return
|
3994
|
+
return;
|
4001
3995
|
}
|
4002
3996
|
close_current_select(parser);
|
4003
|
-
return
|
3997
|
+
return;
|
4004
3998
|
}
|
4005
3999
|
if (tag_is(token, kStartTag, GUMBO_TAG_SELECT)) {
|
4006
4000
|
parser_add_parse_error(parser, token);
|
@@ -4008,7 +4002,7 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
|
|
4008
4002
|
if (has_an_element_in_select_scope(parser, GUMBO_TAG_SELECT)) {
|
4009
4003
|
close_current_select(parser);
|
4010
4004
|
}
|
4011
|
-
return
|
4005
|
+
return;
|
4012
4006
|
}
|
4013
4007
|
if (
|
4014
4008
|
tag_in(token, kStartTag, &(const TagSet) {TAG(INPUT), TAG(KEYGEN), TAG(TEXTAREA)})
|
@@ -4020,23 +4014,25 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
|
|
4020
4014
|
close_current_select(parser);
|
4021
4015
|
parser->_parser_state->_reprocess_current_token = true;
|
4022
4016
|
}
|
4023
|
-
return
|
4017
|
+
return;
|
4024
4018
|
}
|
4025
4019
|
if (
|
4026
4020
|
tag_in(token, kStartTag, &(const TagSet){TAG(SCRIPT), TAG(TEMPLATE)})
|
4027
4021
|
|| tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)
|
4028
4022
|
) {
|
4029
|
-
|
4023
|
+
handle_in_head(parser, token);
|
4024
|
+
return;
|
4025
|
+
}
|
4026
|
+
if (token->type == GUMBO_TOKEN_EOF) {
|
4027
|
+
handle_in_body(parser, token);
|
4028
|
+
return;
|
4030
4029
|
}
|
4031
|
-
if (token->type == GUMBO_TOKEN_EOF)
|
4032
|
-
return handle_in_body(parser, token);
|
4033
4030
|
parser_add_parse_error(parser, token);
|
4034
4031
|
ignore_token(parser);
|
4035
|
-
return false;
|
4036
4032
|
}
|
4037
4033
|
|
4038
4034
|
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inselectintable
|
4039
|
-
static
|
4035
|
+
static void handle_in_select_in_table(GumboParser* parser, GumboToken* token) {
|
4040
4036
|
static const TagSet tags = {
|
4041
4037
|
TAG(CAPTION), TAG(TABLE), TAG(TBODY), TAG(TFOOT), TAG(THEAD),
|
4042
4038
|
TAG(TR), TAG(TD), TAG(TH)
|
@@ -4045,23 +4041,23 @@ static bool handle_in_select_in_table(GumboParser* parser, GumboToken* token) {
|
|
4045
4041
|
parser_add_parse_error(parser, token);
|
4046
4042
|
close_current_select(parser);
|
4047
4043
|
parser->_parser_state->_reprocess_current_token = true;
|
4048
|
-
return
|
4044
|
+
return;
|
4049
4045
|
}
|
4050
4046
|
if (tag_in(token, kEndTag, &tags)) {
|
4051
4047
|
parser_add_parse_error(parser, token);
|
4052
4048
|
if (!has_an_element_in_table_scope(parser, token->v.end_tag.tag)) {
|
4053
4049
|
ignore_token(parser);
|
4054
|
-
return
|
4050
|
+
return;
|
4055
4051
|
}
|
4056
4052
|
close_current_select(parser);
|
4057
4053
|
parser->_parser_state->_reprocess_current_token = true;
|
4058
|
-
return
|
4054
|
+
return;
|
4059
4055
|
}
|
4060
|
-
|
4056
|
+
handle_in_select(parser, token);
|
4061
4057
|
}
|
4062
4058
|
|
4063
4059
|
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-intemplate
|
4064
|
-
static
|
4060
|
+
static void handle_in_template(GumboParser* parser, GumboToken* token) {
|
4065
4061
|
GumboParserState* state = parser->_parser_state;
|
4066
4062
|
switch (token->type) {
|
4067
4063
|
case GUMBO_TOKEN_WHITESPACE:
|
@@ -4069,7 +4065,8 @@ static bool handle_in_template(GumboParser* parser, GumboToken* token) {
|
|
4069
4065
|
case GUMBO_TOKEN_COMMENT:
|
4070
4066
|
case GUMBO_TOKEN_NULL:
|
4071
4067
|
case GUMBO_TOKEN_DOCTYPE:
|
4072
|
-
|
4068
|
+
handle_in_body(parser, token);
|
4069
|
+
return;
|
4073
4070
|
default:
|
4074
4071
|
break;
|
4075
4072
|
}
|
@@ -4080,7 +4077,8 @@ static bool handle_in_template(GumboParser* parser, GumboToken* token) {
|
|
4080
4077
|
})
|
4081
4078
|
|| tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)
|
4082
4079
|
) {
|
4083
|
-
|
4080
|
+
handle_in_head(parser, token);
|
4081
|
+
return;
|
4084
4082
|
}
|
4085
4083
|
if (
|
4086
4084
|
tag_in(token, kStartTag, &(const TagSet) {
|
@@ -4091,45 +4089,45 @@ static bool handle_in_template(GumboParser* parser, GumboToken* token) {
|
|
4091
4089
|
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
4092
4090
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
|
4093
4091
|
state->_reprocess_current_token = true;
|
4094
|
-
return
|
4092
|
+
return;
|
4095
4093
|
}
|
4096
4094
|
if (tag_is(token, kStartTag, GUMBO_TAG_COL)) {
|
4097
4095
|
pop_template_insertion_mode(parser);
|
4098
4096
|
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_COLUMN_GROUP);
|
4099
4097
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_COLUMN_GROUP);
|
4100
4098
|
state->_reprocess_current_token = true;
|
4101
|
-
return
|
4099
|
+
return;
|
4102
4100
|
}
|
4103
4101
|
if (tag_is(token, kStartTag, GUMBO_TAG_TR)) {
|
4104
4102
|
pop_template_insertion_mode(parser);
|
4105
4103
|
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
|
4106
4104
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
|
4107
4105
|
state->_reprocess_current_token = true;
|
4108
|
-
return
|
4106
|
+
return;
|
4109
4107
|
}
|
4110
4108
|
if (tag_in(token, kStartTag, &td_th_tags)) {
|
4111
4109
|
pop_template_insertion_mode(parser);
|
4112
4110
|
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
|
4113
4111
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
|
4114
4112
|
state->_reprocess_current_token = true;
|
4115
|
-
return
|
4113
|
+
return;
|
4116
4114
|
}
|
4117
4115
|
if (token->type == GUMBO_TOKEN_START_TAG) {
|
4118
4116
|
pop_template_insertion_mode(parser);
|
4119
4117
|
push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_BODY);
|
4120
4118
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_BODY);
|
4121
4119
|
state->_reprocess_current_token = true;
|
4122
|
-
return
|
4120
|
+
return;
|
4123
4121
|
}
|
4124
4122
|
if (token->type == GUMBO_TOKEN_END_TAG) {
|
4125
4123
|
parser_add_parse_error(parser, token);
|
4126
4124
|
ignore_token(parser);
|
4127
|
-
return
|
4125
|
+
return;
|
4128
4126
|
}
|
4129
4127
|
if (token->type == GUMBO_TOKEN_EOF) {
|
4130
4128
|
if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
|
4131
4129
|
// Stop parsing.
|
4132
|
-
return
|
4130
|
+
return;
|
4133
4131
|
}
|
4134
4132
|
parser_add_parse_error(parser, token);
|
4135
4133
|
while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_TEMPLATE))
|
@@ -4138,40 +4136,41 @@ static bool handle_in_template(GumboParser* parser, GumboToken* token) {
|
|
4138
4136
|
pop_template_insertion_mode(parser);
|
4139
4137
|
reset_insertion_mode_appropriately(parser);
|
4140
4138
|
state->_reprocess_current_token = true;
|
4141
|
-
return
|
4139
|
+
return;
|
4142
4140
|
}
|
4143
4141
|
assert(0 && "unreachable");
|
4144
|
-
return false;
|
4145
4142
|
}
|
4146
4143
|
|
4147
4144
|
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-afterbody
|
4148
|
-
static
|
4145
|
+
static void handle_after_body(GumboParser* parser, GumboToken* token) {
|
4149
4146
|
if (
|
4150
4147
|
token->type == GUMBO_TOKEN_WHITESPACE
|
4151
4148
|
|| tag_is(token, kStartTag, GUMBO_TAG_HTML)
|
4152
4149
|
) {
|
4153
|
-
|
4150
|
+
handle_in_body(parser, token);
|
4151
|
+
return;
|
4154
4152
|
}
|
4155
4153
|
if (token->type == GUMBO_TOKEN_COMMENT) {
|
4156
4154
|
GumboNode* html_node = parser->_output->root;
|
4157
4155
|
assert(html_node != NULL);
|
4158
4156
|
append_comment_node(parser, html_node, token);
|
4159
|
-
return
|
4157
|
+
return;
|
4160
4158
|
}
|
4161
4159
|
if (token->type == GUMBO_TOKEN_DOCTYPE) {
|
4162
4160
|
parser_add_parse_error(parser, token);
|
4163
4161
|
ignore_token(parser);
|
4164
|
-
return
|
4162
|
+
return;
|
4165
4163
|
}
|
4166
4164
|
if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
|
4167
|
-
|
4165
|
+
handle_in_body(parser, token);
|
4166
|
+
return;
|
4168
4167
|
}
|
4169
4168
|
if (tag_is(token, kEndTag, GUMBO_TAG_HTML)) {
|
4170
4169
|
/* fragment case: ignore the closing HTML token */
|
4171
4170
|
if (is_fragment_parser(parser)) {
|
4172
4171
|
parser_add_parse_error(parser, token);
|
4173
4172
|
ignore_token(parser);
|
4174
|
-
return
|
4173
|
+
return;
|
4175
4174
|
}
|
4176
4175
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_AFTER_BODY);
|
4177
4176
|
GumboNode* html = parser->_parser_state->_open_elements.data[0];
|
@@ -4180,44 +4179,44 @@ static bool handle_after_body(GumboParser* parser, GumboToken* token) {
|
|
4180
4179
|
parser->_parser_state->_current_token,
|
4181
4180
|
&html->v.element
|
4182
4181
|
);
|
4183
|
-
return
|
4182
|
+
return;
|
4184
4183
|
}
|
4185
4184
|
if (token->type == GUMBO_TOKEN_EOF) {
|
4186
|
-
return
|
4185
|
+
return;
|
4187
4186
|
}
|
4188
4187
|
parser_add_parse_error(parser, token);
|
4189
4188
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_BODY);
|
4190
4189
|
parser->_parser_state->_reprocess_current_token = true;
|
4191
|
-
return false;
|
4192
4190
|
}
|
4193
4191
|
|
4194
4192
|
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inframeset
|
4195
|
-
static
|
4193
|
+
static void handle_in_frameset(GumboParser* parser, GumboToken* token) {
|
4196
4194
|
if (token->type == GUMBO_TOKEN_WHITESPACE) {
|
4197
4195
|
insert_text_token(parser, token);
|
4198
|
-
return
|
4196
|
+
return;
|
4199
4197
|
}
|
4200
4198
|
if (token->type == GUMBO_TOKEN_COMMENT) {
|
4201
4199
|
append_comment_node(parser, get_current_node(parser), token);
|
4202
|
-
return
|
4200
|
+
return;
|
4203
4201
|
}
|
4204
4202
|
if (token->type == GUMBO_TOKEN_DOCTYPE) {
|
4205
4203
|
parser_add_parse_error(parser, token);
|
4206
4204
|
ignore_token(parser);
|
4207
|
-
return
|
4205
|
+
return;
|
4208
4206
|
}
|
4209
4207
|
if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
|
4210
|
-
|
4208
|
+
handle_in_body(parser, token);
|
4209
|
+
return;
|
4211
4210
|
}
|
4212
4211
|
if (tag_is(token, kStartTag, GUMBO_TAG_FRAMESET)) {
|
4213
4212
|
insert_element_from_token(parser, token);
|
4214
|
-
return
|
4213
|
+
return;
|
4215
4214
|
}
|
4216
4215
|
if (tag_is(token, kEndTag, GUMBO_TAG_FRAMESET)) {
|
4217
4216
|
if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_HTML)) {
|
4218
4217
|
parser_add_parse_error(parser, token);
|
4219
4218
|
ignore_token(parser);
|
4220
|
-
return
|
4219
|
+
return;
|
4221
4220
|
}
|
4222
4221
|
pop_current_node(parser);
|
4223
4222
|
if (
|
@@ -4226,46 +4225,45 @@ static bool handle_in_frameset(GumboParser* parser, GumboToken* token) {
|
|
4226
4225
|
) {
|
4227
4226
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_FRAMESET);
|
4228
4227
|
}
|
4229
|
-
return
|
4228
|
+
return;
|
4230
4229
|
}
|
4231
4230
|
if (tag_is(token, kStartTag, GUMBO_TAG_FRAME)) {
|
4232
4231
|
insert_element_from_token(parser, token);
|
4233
4232
|
pop_current_node(parser);
|
4234
4233
|
acknowledge_self_closing_tag(parser);
|
4235
|
-
return
|
4234
|
+
return;
|
4236
4235
|
}
|
4237
4236
|
if (tag_is(token, kStartTag, GUMBO_TAG_NOFRAMES)) {
|
4238
|
-
|
4237
|
+
handle_in_head(parser, token);
|
4238
|
+
return;
|
4239
4239
|
}
|
4240
4240
|
if (token->type == GUMBO_TOKEN_EOF) {
|
4241
|
-
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_HTML))
|
4241
|
+
if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_HTML))
|
4242
4242
|
parser_add_parse_error(parser, token);
|
4243
|
-
|
4244
|
-
}
|
4245
|
-
return true;
|
4243
|
+
return;
|
4246
4244
|
}
|
4247
4245
|
parser_add_parse_error(parser, token);
|
4248
4246
|
ignore_token(parser);
|
4249
|
-
return false;
|
4250
4247
|
}
|
4251
4248
|
|
4252
4249
|
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-afterframeset
|
4253
|
-
static
|
4250
|
+
static void handle_after_frameset(GumboParser* parser, GumboToken* token) {
|
4254
4251
|
if (token->type == GUMBO_TOKEN_WHITESPACE) {
|
4255
4252
|
insert_text_token(parser, token);
|
4256
|
-
return
|
4253
|
+
return;
|
4257
4254
|
}
|
4258
4255
|
if (token->type == GUMBO_TOKEN_COMMENT) {
|
4259
4256
|
append_comment_node(parser, get_current_node(parser), token);
|
4260
|
-
return
|
4257
|
+
return;
|
4261
4258
|
}
|
4262
4259
|
if (token->type == GUMBO_TOKEN_DOCTYPE) {
|
4263
4260
|
parser_add_parse_error(parser, token);
|
4264
4261
|
ignore_token(parser);
|
4265
|
-
return
|
4262
|
+
return;
|
4266
4263
|
}
|
4267
4264
|
if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
|
4268
|
-
|
4265
|
+
handle_in_body(parser, token);
|
4266
|
+
return;
|
4269
4267
|
}
|
4270
4268
|
if (tag_is(token, kEndTag, GUMBO_TAG_HTML)) {
|
4271
4269
|
GumboNode* html = parser->_parser_state->_open_elements.data[0];
|
@@ -4275,71 +4273,71 @@ static bool handle_after_frameset(GumboParser* parser, GumboToken* token) {
|
|
4275
4273
|
&html->v.element
|
4276
4274
|
);
|
4277
4275
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_AFTER_FRAMESET);
|
4278
|
-
return
|
4276
|
+
return;
|
4279
4277
|
}
|
4280
4278
|
if (tag_is(token, kStartTag, GUMBO_TAG_NOFRAMES)) {
|
4281
4279
|
return handle_in_head(parser, token);
|
4282
4280
|
}
|
4283
4281
|
if (token->type == GUMBO_TOKEN_EOF) {
|
4284
|
-
return
|
4282
|
+
return;
|
4285
4283
|
}
|
4286
4284
|
parser_add_parse_error(parser, token);
|
4287
4285
|
ignore_token(parser);
|
4288
|
-
return false;
|
4289
4286
|
}
|
4290
4287
|
|
4291
4288
|
// https://html.spec.whatwg.org/multipage/parsing.html#the-after-after-body-insertion-mode
|
4292
|
-
static
|
4289
|
+
static void handle_after_after_body(GumboParser* parser, GumboToken* token) {
|
4293
4290
|
if (token->type == GUMBO_TOKEN_COMMENT) {
|
4294
4291
|
append_comment_node(parser, get_document_node(parser), token);
|
4295
|
-
return
|
4292
|
+
return;
|
4296
4293
|
}
|
4297
4294
|
if (
|
4298
4295
|
token->type == GUMBO_TOKEN_DOCTYPE
|
4299
4296
|
|| token->type == GUMBO_TOKEN_WHITESPACE
|
4300
4297
|
|| tag_is(token, kStartTag, GUMBO_TAG_HTML)
|
4301
4298
|
) {
|
4302
|
-
|
4299
|
+
handle_in_body(parser, token);
|
4300
|
+
return;
|
4303
4301
|
}
|
4304
4302
|
if (token->type == GUMBO_TOKEN_EOF) {
|
4305
|
-
return
|
4303
|
+
return;
|
4306
4304
|
}
|
4307
4305
|
parser_add_parse_error(parser, token);
|
4308
4306
|
set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_BODY);
|
4309
4307
|
parser->_parser_state->_reprocess_current_token = true;
|
4310
|
-
return false;
|
4311
4308
|
}
|
4312
4309
|
|
4313
4310
|
// https://html.spec.whatwg.org/multipage/parsing.html#the-after-after-frameset-insertion-mode
|
4314
|
-
static
|
4311
|
+
static void handle_after_after_frameset (
|
4315
4312
|
GumboParser* parser,
|
4316
4313
|
GumboToken* token
|
4317
4314
|
) {
|
4318
4315
|
if (token->type == GUMBO_TOKEN_COMMENT) {
|
4319
4316
|
append_comment_node(parser, get_document_node(parser), token);
|
4320
|
-
return
|
4317
|
+
return;
|
4321
4318
|
}
|
4322
4319
|
if (
|
4323
4320
|
token->type == GUMBO_TOKEN_DOCTYPE
|
4324
4321
|
|| token->type == GUMBO_TOKEN_WHITESPACE
|
4325
4322
|
|| tag_is(token, kStartTag, GUMBO_TAG_HTML)
|
4326
4323
|
) {
|
4327
|
-
|
4324
|
+
handle_in_body(parser, token);
|
4325
|
+
return;
|
4328
4326
|
}
|
4329
4327
|
if (token->type == GUMBO_TOKEN_EOF) {
|
4330
|
-
return
|
4328
|
+
return;
|
4331
4329
|
}
|
4332
4330
|
if (tag_is(token, kStartTag, GUMBO_TAG_NOFRAMES)) {
|
4333
|
-
|
4331
|
+
handle_in_head(parser, token);
|
4332
|
+
return;
|
4334
4333
|
}
|
4335
4334
|
parser_add_parse_error(parser, token);
|
4336
4335
|
ignore_token(parser);
|
4337
|
-
return false;
|
4338
4336
|
}
|
4339
4337
|
|
4340
4338
|
// Function pointers for each insertion mode.
|
4341
4339
|
// Keep in sync with insertion_mode.h.
|
4342
|
-
typedef
|
4340
|
+
typedef void (*TokenHandler)(GumboParser* parser, GumboToken* token);
|
4343
4341
|
static const TokenHandler kTokenHandlers[] = {
|
4344
4342
|
handle_initial,
|
4345
4343
|
handle_before_html,
|
@@ -4366,36 +4364,36 @@ static const TokenHandler kTokenHandlers[] = {
|
|
4366
4364
|
handle_after_after_frameset
|
4367
4365
|
};
|
4368
4366
|
|
4369
|
-
static
|
4367
|
+
static void handle_html_content(GumboParser* parser, GumboToken* token) {
|
4370
4368
|
const GumboInsertionMode mode = parser->_parser_state->_insertion_mode;
|
4371
4369
|
const TokenHandler handler = kTokenHandlers[mode];
|
4372
|
-
|
4370
|
+
handler(parser, token);
|
4373
4371
|
}
|
4374
4372
|
|
4375
4373
|
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inforeign
|
4376
|
-
static
|
4374
|
+
static void handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
|
4377
4375
|
gumbo_debug("Handling foreign content");
|
4378
4376
|
switch (token->type) {
|
4379
4377
|
case GUMBO_TOKEN_NULL:
|
4380
4378
|
parser_add_parse_error(parser, token);
|
4381
4379
|
token->v.character = kUtf8ReplacementChar;
|
4382
4380
|
insert_text_token(parser, token);
|
4383
|
-
return
|
4381
|
+
return;
|
4384
4382
|
case GUMBO_TOKEN_WHITESPACE:
|
4385
4383
|
insert_text_token(parser, token);
|
4386
|
-
return
|
4384
|
+
return;
|
4387
4385
|
case GUMBO_TOKEN_CDATA:
|
4388
4386
|
case GUMBO_TOKEN_CHARACTER:
|
4389
4387
|
insert_text_token(parser, token);
|
4390
4388
|
set_frameset_not_ok(parser);
|
4391
|
-
return
|
4389
|
+
return;
|
4392
4390
|
case GUMBO_TOKEN_COMMENT:
|
4393
4391
|
append_comment_node(parser, get_current_node(parser), token);
|
4394
|
-
return
|
4392
|
+
return;
|
4395
4393
|
case GUMBO_TOKEN_DOCTYPE:
|
4396
4394
|
parser_add_parse_error(parser, token);
|
4397
4395
|
ignore_token(parser);
|
4398
|
-
return
|
4396
|
+
return;
|
4399
4397
|
default:
|
4400
4398
|
// Fall through to the if-statements below.
|
4401
4399
|
break;
|
@@ -4439,7 +4437,7 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
|
|
4439
4437
|
)
|
4440
4438
|
);
|
4441
4439
|
parser->_parser_state->_reprocess_current_token = true;
|
4442
|
-
return
|
4440
|
+
return;
|
4443
4441
|
}
|
4444
4442
|
// This is a start tag so the next if's then branch will be taken.
|
4445
4443
|
}
|
@@ -4460,7 +4458,7 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
|
|
4460
4458
|
pop_current_node(parser);
|
4461
4459
|
acknowledge_self_closing_tag(parser);
|
4462
4460
|
}
|
4463
|
-
return
|
4461
|
+
return;
|
4464
4462
|
// </script> tags are handled like any other end tag, putting the script's
|
4465
4463
|
// text into a text node child and closing the current node.
|
4466
4464
|
}
|
@@ -4470,11 +4468,8 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
|
|
4470
4468
|
const char* name = token->v.end_tag.name;
|
4471
4469
|
assert(node != NULL);
|
4472
4470
|
|
4473
|
-
|
4474
|
-
if (!node_tagname_is(node, tag, name)) {
|
4471
|
+
if (!node_tagname_is(node, tag, name))
|
4475
4472
|
parser_add_parse_error(parser, token);
|
4476
|
-
is_success = false;
|
4477
|
-
}
|
4478
4473
|
int i = parser->_parser_state->_open_elements.length;
|
4479
4474
|
for (--i; i > 0;) {
|
4480
4475
|
// Here we move up the stack until we find an HTML element (in which
|
@@ -4489,7 +4484,7 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
|
|
4489
4484
|
// be an element on the stack of open elements (set below), so
|
4490
4485
|
// this loop is guaranteed to terminate.
|
4491
4486
|
}
|
4492
|
-
return
|
4487
|
+
return;
|
4493
4488
|
}
|
4494
4489
|
--i;
|
4495
4490
|
node = parser->_parser_state->_open_elements.data[i];
|
@@ -4500,22 +4495,22 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
|
|
4500
4495
|
}
|
4501
4496
|
assert(node->v.element.tag_namespace == GUMBO_NAMESPACE_HTML);
|
4502
4497
|
if (i == 0)
|
4503
|
-
return
|
4498
|
+
return;
|
4504
4499
|
// We can't call handle_token directly because the current node is still in
|
4505
4500
|
// a foriegn namespace, so it would re-enter this and result in infinite
|
4506
4501
|
// recursion.
|
4507
|
-
|
4502
|
+
handle_html_content(parser, token);
|
4508
4503
|
}
|
4509
4504
|
|
4510
4505
|
// https://html.spec.whatwg.org/multipage/parsing.html#tree-construction
|
4511
|
-
static
|
4506
|
+
static void handle_token(GumboParser* parser, GumboToken* token) {
|
4512
4507
|
if (
|
4513
4508
|
parser->_parser_state->_ignore_next_linefeed
|
4514
4509
|
&& token->type == GUMBO_TOKEN_WHITESPACE && token->v.character == '\n'
|
4515
4510
|
) {
|
4516
4511
|
parser->_parser_state->_ignore_next_linefeed = false;
|
4517
4512
|
ignore_token(parser);
|
4518
|
-
return
|
4513
|
+
return;
|
4519
4514
|
}
|
4520
4515
|
// This needs to be reset both here and in the conditional above to catch both
|
4521
4516
|
// the case where the next token is not whitespace (so we don't ignore
|
@@ -4557,9 +4552,9 @@ static bool handle_token(GumboParser* parser, GumboToken* token) {
|
|
4557
4552
|
token->type == GUMBO_TOKEN_NULL ||
|
4558
4553
|
token->type == GUMBO_TOKEN_WHITESPACE)) ||
|
4559
4554
|
token->type == GUMBO_TOKEN_EOF) {
|
4560
|
-
|
4555
|
+
handle_html_content(parser, token);
|
4561
4556
|
} else {
|
4562
|
-
|
4557
|
+
handle_in_foreign_content(parser, token);
|
4563
4558
|
}
|
4564
4559
|
}
|
4565
4560
|
|
@@ -4746,7 +4741,6 @@ GumboOutput* gumbo_parse_with_options (
|
|
4746
4741
|
|
4747
4742
|
const unsigned int max_tree_depth = options->max_tree_depth;
|
4748
4743
|
GumboToken token;
|
4749
|
-
bool has_error = false;
|
4750
4744
|
|
4751
4745
|
do {
|
4752
4746
|
if (state->_reprocess_current_token) {
|
@@ -4758,7 +4752,7 @@ GumboOutput* gumbo_parse_with_options (
|
|
4758
4752
|
adjusted_current_node &&
|
4759
4753
|
adjusted_current_node->v.element.tag_namespace != GUMBO_NAMESPACE_HTML
|
4760
4754
|
);
|
4761
|
-
|
4755
|
+
gumbo_lex(&parser, &token);
|
4762
4756
|
}
|
4763
4757
|
|
4764
4758
|
const char* token_type = "text";
|
@@ -4792,7 +4786,7 @@ GumboOutput* gumbo_parse_with_options (
|
|
4792
4786
|
state->_current_token = &token;
|
4793
4787
|
state->_self_closing_flag_acknowledged = false;
|
4794
4788
|
|
4795
|
-
|
4789
|
+
handle_token(&parser, &token);
|
4796
4790
|
|
4797
4791
|
// Check for memory leaks when ownership is transferred from start tag
|
4798
4792
|
// tokens to nodes.
|
@@ -4809,7 +4803,6 @@ GumboOutput* gumbo_parse_with_options (
|
|
4809
4803
|
if (token.type == GUMBO_TOKEN_START_TAG &&
|
4810
4804
|
token.v.start_tag.is_self_closing &&
|
4811
4805
|
!state->_self_closing_flag_acknowledged) {
|
4812
|
-
has_error = true;
|
4813
4806
|
GumboError* error = gumbo_add_error(&parser);
|
4814
4807
|
if (error) {
|
4815
4808
|
// This is essentially a tokenizer error that's only caught during
|
@@ -4837,7 +4830,7 @@ GumboOutput* gumbo_parse_with_options (
|
|
4837
4830
|
|
4838
4831
|
} while (
|
4839
4832
|
(token.type != GUMBO_TOKEN_EOF || state->_reprocess_current_token)
|
4840
|
-
&& !(options->stop_on_first_error &&
|
4833
|
+
&& !(options->stop_on_first_error && parser._output->document_error)
|
4841
4834
|
);
|
4842
4835
|
|
4843
4836
|
finish_parsing(&parser);
|