nokogumbo 1.1.12 → 1.1.13
Sign up to get free protection for your applications and to get access to all the features.
- data/ext/nokogumboc/extconf.rb +1 -1
- data/gumbo-parser/src/char_ref.c +22828 -2291
- data/gumbo-parser/src/char_ref.rl +2548 -0
- data/gumbo-parser/src/error.c +21 -0
- data/gumbo-parser/src/parser.c +109 -105
- data/gumbo-parser/src/tokenizer.c +103 -103
- data/gumbo-parser/src/utf8.c +114 -120
- data/gumbo-parser/src/utf8.h +6 -0
- metadata +3 -2
@@ -196,7 +196,7 @@ typedef struct GumboInternalTokenizerState {
|
|
196
196
|
} GumboTokenizerState;
|
197
197
|
|
198
198
|
// Adds an ERR_UNEXPECTED_CODE_POINT parse error to the parser's error struct.
|
199
|
-
static void
|
199
|
+
static void tokenizer_add_parse_error(GumboParser* parser, GumboErrorType type) {
|
200
200
|
GumboError* error = gumbo_add_error(parser);
|
201
201
|
if (!error) {
|
202
202
|
return;
|
@@ -485,7 +485,7 @@ static void emit_char(GumboParser* parser, int c, GumboToken* output) {
|
|
485
485
|
static StateResult emit_replacement_char(
|
486
486
|
GumboParser* parser, GumboToken* output) {
|
487
487
|
// In all cases, this is because of a null byte in the input stream.
|
488
|
-
|
488
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_UTF8_NULL);
|
489
489
|
emit_char(parser, kUtf8ReplacementChar, output);
|
490
490
|
return RETURN_ERROR;
|
491
491
|
}
|
@@ -906,7 +906,7 @@ static StateResult handle_data_state(
|
|
906
906
|
append_char_to_temporary_buffer(parser, '<');
|
907
907
|
return NEXT_CHAR;
|
908
908
|
case '\0':
|
909
|
-
|
909
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_UTF8_NULL);
|
910
910
|
emit_char(parser, c, output);
|
911
911
|
return RETURN_ERROR;
|
912
912
|
default:
|
@@ -1023,7 +1023,7 @@ static StateResult handle_tag_open_state(
|
|
1023
1023
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_BOGUS_COMMENT);
|
1024
1024
|
clear_temporary_buffer(parser);
|
1025
1025
|
append_char_to_temporary_buffer(parser, '?');
|
1026
|
-
|
1026
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_TAG_STARTS_WITH_QUESTION);
|
1027
1027
|
return NEXT_CHAR;
|
1028
1028
|
default:
|
1029
1029
|
if (is_alpha(c)) {
|
@@ -1031,7 +1031,7 @@ static StateResult handle_tag_open_state(
|
|
1031
1031
|
start_new_tag(parser, true);
|
1032
1032
|
return NEXT_CHAR;
|
1033
1033
|
} else {
|
1034
|
-
|
1034
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_TAG_INVALID);
|
1035
1035
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
1036
1036
|
emit_temporary_buffer(parser, output);
|
1037
1037
|
return RETURN_ERROR;
|
@@ -1046,11 +1046,11 @@ static StateResult handle_end_tag_open_state(
|
|
1046
1046
|
assert(temporary_buffer_equals(parser, "</"));
|
1047
1047
|
switch (c) {
|
1048
1048
|
case '>':
|
1049
|
-
|
1049
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_CLOSE_TAG_EMPTY);
|
1050
1050
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
1051
1051
|
return NEXT_CHAR;
|
1052
1052
|
case -1:
|
1053
|
-
|
1053
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_CLOSE_TAG_EOF);
|
1054
1054
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
1055
1055
|
return emit_temporary_buffer(parser, output);
|
1056
1056
|
default:
|
@@ -1058,7 +1058,7 @@ static StateResult handle_end_tag_open_state(
|
|
1058
1058
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_TAG_NAME);
|
1059
1059
|
start_new_tag(parser, false);
|
1060
1060
|
} else {
|
1061
|
-
|
1061
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_CLOSE_TAG_INVALID);
|
1062
1062
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_BOGUS_COMMENT);
|
1063
1063
|
clear_temporary_buffer(parser);
|
1064
1064
|
append_char_to_temporary_buffer(parser, c);
|
@@ -1088,11 +1088,11 @@ static StateResult handle_tag_name_state(
|
|
1088
1088
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
1089
1089
|
return emit_current_tag(parser, output);
|
1090
1090
|
case '\0':
|
1091
|
-
|
1091
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_UTF8_NULL);
|
1092
1092
|
append_char_to_tag_buffer(parser, kUtf8ReplacementChar, true);
|
1093
1093
|
return NEXT_CHAR;
|
1094
1094
|
case -1:
|
1095
|
-
|
1095
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_TAG_EOF);
|
1096
1096
|
abandon_current_tag(parser);
|
1097
1097
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
1098
1098
|
return NEXT_CHAR;
|
@@ -1349,7 +1349,7 @@ static StateResult handle_script_escaped_state(
|
|
1349
1349
|
case '\0':
|
1350
1350
|
return emit_replacement_char(parser, output);
|
1351
1351
|
case -1:
|
1352
|
-
|
1352
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_SCRIPT_EOF);
|
1353
1353
|
return emit_eof(parser, output);
|
1354
1354
|
default:
|
1355
1355
|
return emit_current_char(parser, output);
|
@@ -1373,7 +1373,7 @@ static StateResult handle_script_escaped_dash_state(
|
|
1373
1373
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_ESCAPED);
|
1374
1374
|
return emit_replacement_char(parser, output);
|
1375
1375
|
case -1:
|
1376
|
-
|
1376
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_SCRIPT_EOF);
|
1377
1377
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
1378
1378
|
return NEXT_CHAR;
|
1379
1379
|
default:
|
@@ -1401,7 +1401,7 @@ static StateResult handle_script_escaped_dash_dash_state(
|
|
1401
1401
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_ESCAPED);
|
1402
1402
|
return emit_replacement_char(parser, output);
|
1403
1403
|
case -1:
|
1404
|
-
|
1404
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_SCRIPT_EOF);
|
1405
1405
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
1406
1406
|
return NEXT_CHAR;
|
1407
1407
|
default:
|
@@ -1523,7 +1523,7 @@ static StateResult handle_script_double_escaped_state(
|
|
1523
1523
|
case '\0':
|
1524
1524
|
return emit_replacement_char(parser, output);
|
1525
1525
|
case -1:
|
1526
|
-
|
1526
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_SCRIPT_EOF);
|
1527
1527
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
1528
1528
|
return NEXT_CHAR;
|
1529
1529
|
default:
|
@@ -1547,7 +1547,7 @@ static StateResult handle_script_double_escaped_dash_state(
|
|
1547
1547
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED);
|
1548
1548
|
return emit_replacement_char(parser, output);
|
1549
1549
|
case -1:
|
1550
|
-
|
1550
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_SCRIPT_EOF);
|
1551
1551
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
1552
1552
|
return NEXT_CHAR;
|
1553
1553
|
default:
|
@@ -1573,7 +1573,7 @@ static StateResult handle_script_double_escaped_dash_dash_state(
|
|
1573
1573
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED);
|
1574
1574
|
return emit_replacement_char(parser, output);
|
1575
1575
|
case -1:
|
1576
|
-
|
1576
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_SCRIPT_EOF);
|
1577
1577
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
1578
1578
|
return NEXT_CHAR;
|
1579
1579
|
default:
|
@@ -1644,12 +1644,12 @@ static StateResult handle_before_attr_name_state(
|
|
1644
1644
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
1645
1645
|
return emit_current_tag(parser, output);
|
1646
1646
|
case '\0':
|
1647
|
-
|
1647
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_UTF8_NULL);
|
1648
1648
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_ATTR_NAME);
|
1649
1649
|
append_char_to_temporary_buffer(parser, 0xfffd);
|
1650
1650
|
return NEXT_CHAR;
|
1651
1651
|
case -1:
|
1652
|
-
|
1652
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_ATTR_NAME_EOF);
|
1653
1653
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
1654
1654
|
abandon_current_tag(parser);
|
1655
1655
|
return NEXT_CHAR;
|
@@ -1657,7 +1657,7 @@ static StateResult handle_before_attr_name_state(
|
|
1657
1657
|
case '\'':
|
1658
1658
|
case '<':
|
1659
1659
|
case '=':
|
1660
|
-
|
1660
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_ATTR_NAME_INVALID);
|
1661
1661
|
// Fall through.
|
1662
1662
|
default:
|
1663
1663
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_ATTR_NAME);
|
@@ -1691,18 +1691,18 @@ static StateResult handle_attr_name_state(
|
|
1691
1691
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
1692
1692
|
return emit_current_tag(parser, output);
|
1693
1693
|
case '\0':
|
1694
|
-
|
1694
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_UTF8_NULL);
|
1695
1695
|
append_char_to_tag_buffer(parser, kUtf8ReplacementChar, true);
|
1696
1696
|
return NEXT_CHAR;
|
1697
1697
|
case -1:
|
1698
1698
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
1699
1699
|
abandon_current_tag(parser);
|
1700
|
-
|
1700
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_ATTR_NAME_EOF);
|
1701
1701
|
return NEXT_CHAR;
|
1702
1702
|
case '"':
|
1703
1703
|
case '\'':
|
1704
1704
|
case '<':
|
1705
|
-
|
1705
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_ATTR_NAME_INVALID);
|
1706
1706
|
// Fall through.
|
1707
1707
|
default:
|
1708
1708
|
append_char_to_tag_buffer(parser, ensure_lowercase(c), true);
|
@@ -1730,19 +1730,19 @@ static StateResult handle_after_attr_name_state(
|
|
1730
1730
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
1731
1731
|
return emit_current_tag(parser, output);
|
1732
1732
|
case '\0':
|
1733
|
-
|
1733
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_UTF8_NULL);
|
1734
1734
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_ATTR_NAME);
|
1735
1735
|
append_char_to_temporary_buffer(parser, 0xfffd);
|
1736
1736
|
return NEXT_CHAR;
|
1737
1737
|
case -1:
|
1738
|
-
|
1738
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_ATTR_NAME_EOF);
|
1739
1739
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
1740
1740
|
abandon_current_tag(parser);
|
1741
1741
|
return NEXT_CHAR;
|
1742
1742
|
case '"':
|
1743
1743
|
case '\'':
|
1744
1744
|
case '<':
|
1745
|
-
|
1745
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_ATTR_NAME_INVALID);
|
1746
1746
|
// Fall through.
|
1747
1747
|
default:
|
1748
1748
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_ATTR_NAME);
|
@@ -1774,25 +1774,25 @@ static StateResult handle_before_attr_value_state(
|
|
1774
1774
|
reset_tag_buffer_start_point(parser);
|
1775
1775
|
return NEXT_CHAR;
|
1776
1776
|
case '\0':
|
1777
|
-
|
1777
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_UTF8_NULL);
|
1778
1778
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_ATTR_VALUE_UNQUOTED);
|
1779
1779
|
append_char_to_tag_buffer(parser, kUtf8ReplacementChar, true);
|
1780
1780
|
return NEXT_CHAR;
|
1781
1781
|
case -1:
|
1782
|
-
|
1782
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_ATTR_UNQUOTED_EOF);
|
1783
1783
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
1784
1784
|
abandon_current_tag(parser);
|
1785
1785
|
tokenizer->_reconsume_current_input = true;
|
1786
1786
|
return NEXT_CHAR;
|
1787
1787
|
case '>':
|
1788
|
-
|
1788
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_ATTR_UNQUOTED_RIGHT_BRACKET);
|
1789
1789
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
1790
1790
|
emit_current_tag(parser, output);
|
1791
1791
|
return RETURN_ERROR;
|
1792
1792
|
case '<':
|
1793
1793
|
case '=':
|
1794
1794
|
case '`':
|
1795
|
-
|
1795
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_ATTR_UNQUOTED_EQUALS);
|
1796
1796
|
// Fall through.
|
1797
1797
|
default:
|
1798
1798
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_ATTR_VALUE_UNQUOTED);
|
@@ -1815,11 +1815,11 @@ static StateResult handle_attr_value_double_quoted_state(
|
|
1815
1815
|
tokenizer->_reconsume_current_input = true;
|
1816
1816
|
return NEXT_CHAR;
|
1817
1817
|
case '\0':
|
1818
|
-
|
1818
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_UTF8_NULL);
|
1819
1819
|
append_char_to_tag_buffer(parser, kUtf8ReplacementChar, false);
|
1820
1820
|
return NEXT_CHAR;
|
1821
1821
|
case -1:
|
1822
|
-
|
1822
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_ATTR_DOUBLE_QUOTE_EOF);
|
1823
1823
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
1824
1824
|
abandon_current_tag(parser);
|
1825
1825
|
tokenizer->_reconsume_current_input = true;
|
@@ -1844,11 +1844,11 @@ static StateResult handle_attr_value_single_quoted_state(
|
|
1844
1844
|
tokenizer->_reconsume_current_input = true;
|
1845
1845
|
return NEXT_CHAR;
|
1846
1846
|
case '\0':
|
1847
|
-
|
1847
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_UTF8_NULL);
|
1848
1848
|
append_char_to_tag_buffer(parser, kUtf8ReplacementChar, false);
|
1849
1849
|
return NEXT_CHAR;
|
1850
1850
|
case -1:
|
1851
|
-
|
1851
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_ATTR_SINGLE_QUOTE_EOF);
|
1852
1852
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
1853
1853
|
abandon_current_tag(parser);
|
1854
1854
|
tokenizer->_reconsume_current_input = true;
|
@@ -1881,11 +1881,11 @@ static StateResult handle_attr_value_unquoted_state(
|
|
1881
1881
|
finish_attribute_value(parser);
|
1882
1882
|
return emit_current_tag(parser, output);
|
1883
1883
|
case '\0':
|
1884
|
-
|
1884
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_UTF8_NULL);
|
1885
1885
|
append_char_to_tag_buffer(parser, kUtf8ReplacementChar, true);
|
1886
1886
|
return NEXT_CHAR;
|
1887
1887
|
case -1:
|
1888
|
-
|
1888
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_ATTR_UNQUOTED_EOF);
|
1889
1889
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
1890
1890
|
tokenizer->_reconsume_current_input = true;
|
1891
1891
|
abandon_current_tag(parser);
|
@@ -1895,7 +1895,7 @@ static StateResult handle_attr_value_unquoted_state(
|
|
1895
1895
|
case '"':
|
1896
1896
|
case '\'':
|
1897
1897
|
case '`':
|
1898
|
-
|
1898
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_ATTR_UNQUOTED_EQUALS);
|
1899
1899
|
// Fall through.
|
1900
1900
|
default:
|
1901
1901
|
append_char_to_tag_buffer(parser, c, true);
|
@@ -1965,13 +1965,13 @@ static StateResult handle_after_attr_value_quoted_state(
|
|
1965
1965
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
1966
1966
|
return emit_current_tag(parser, output);
|
1967
1967
|
case -1:
|
1968
|
-
|
1968
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_ATTR_AFTER_EOF);
|
1969
1969
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
1970
1970
|
abandon_current_tag(parser);
|
1971
1971
|
tokenizer->_reconsume_current_input = true;
|
1972
1972
|
return NEXT_CHAR;
|
1973
1973
|
default:
|
1974
|
-
|
1974
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_ATTR_AFTER_INVALID);
|
1975
1975
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_BEFORE_ATTR_NAME);
|
1976
1976
|
tokenizer->_reconsume_current_input = true;
|
1977
1977
|
return NEXT_CHAR;
|
@@ -1988,12 +1988,12 @@ static StateResult handle_self_closing_start_tag_state(
|
|
1988
1988
|
tokenizer->_tag_state._is_self_closing = true;
|
1989
1989
|
return emit_current_tag(parser, output);
|
1990
1990
|
case -1:
|
1991
|
-
|
1991
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_SOLIDUS_EOF);
|
1992
1992
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
1993
1993
|
abandon_current_tag(parser);
|
1994
1994
|
return NEXT_CHAR;
|
1995
1995
|
default:
|
1996
|
-
|
1996
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_SOLIDUS_INVALID);
|
1997
1997
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_BEFORE_ATTR_NAME);
|
1998
1998
|
tokenizer->_reconsume_current_input = true;
|
1999
1999
|
return NEXT_CHAR;
|
@@ -2043,7 +2043,7 @@ static StateResult handle_markup_declaration_state(
|
|
2043
2043
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_CDATA);
|
2044
2044
|
tokenizer->_reconsume_current_input = true;
|
2045
2045
|
} else {
|
2046
|
-
|
2046
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DASHES_OR_DOCTYPE);
|
2047
2047
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_BOGUS_COMMENT);
|
2048
2048
|
tokenizer->_reconsume_current_input = true;
|
2049
2049
|
clear_temporary_buffer(parser);
|
@@ -2060,17 +2060,17 @@ static StateResult handle_comment_start_state(
|
|
2060
2060
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT_START_DASH);
|
2061
2061
|
return NEXT_CHAR;
|
2062
2062
|
case '\0':
|
2063
|
-
|
2063
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_UTF8_NULL);
|
2064
2064
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT);
|
2065
2065
|
append_char_to_temporary_buffer(parser, kUtf8ReplacementChar);
|
2066
2066
|
return NEXT_CHAR;
|
2067
2067
|
case '>':
|
2068
|
-
|
2068
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_COMMENT_INVALID);
|
2069
2069
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2070
2070
|
emit_comment(parser, output);
|
2071
2071
|
return RETURN_ERROR;
|
2072
2072
|
case -1:
|
2073
|
-
|
2073
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_COMMENT_EOF);
|
2074
2074
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2075
2075
|
emit_comment(parser, output);
|
2076
2076
|
return RETURN_ERROR;
|
@@ -2090,18 +2090,18 @@ static StateResult handle_comment_start_dash_state(
|
|
2090
2090
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT_END);
|
2091
2091
|
return NEXT_CHAR;
|
2092
2092
|
case '\0':
|
2093
|
-
|
2093
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_UTF8_NULL);
|
2094
2094
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT);
|
2095
2095
|
append_char_to_temporary_buffer(parser, '-');
|
2096
2096
|
append_char_to_temporary_buffer(parser, kUtf8ReplacementChar);
|
2097
2097
|
return NEXT_CHAR;
|
2098
2098
|
case '>':
|
2099
|
-
|
2099
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_COMMENT_INVALID);
|
2100
2100
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2101
2101
|
emit_comment(parser, output);
|
2102
2102
|
return RETURN_ERROR;
|
2103
2103
|
case -1:
|
2104
|
-
|
2104
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_COMMENT_EOF);
|
2105
2105
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2106
2106
|
emit_comment(parser, output);
|
2107
2107
|
return RETURN_ERROR;
|
@@ -2122,11 +2122,11 @@ static StateResult handle_comment_state(
|
|
2122
2122
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT_END_DASH);
|
2123
2123
|
return NEXT_CHAR;
|
2124
2124
|
case '\0':
|
2125
|
-
|
2125
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_UTF8_NULL);
|
2126
2126
|
append_char_to_temporary_buffer(parser, kUtf8ReplacementChar);
|
2127
2127
|
return NEXT_CHAR;
|
2128
2128
|
case -1:
|
2129
|
-
|
2129
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_COMMENT_EOF);
|
2130
2130
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2131
2131
|
emit_comment(parser, output);
|
2132
2132
|
return RETURN_ERROR;
|
@@ -2145,13 +2145,13 @@ static StateResult handle_comment_end_dash_state(
|
|
2145
2145
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT_END);
|
2146
2146
|
return NEXT_CHAR;
|
2147
2147
|
case '\0':
|
2148
|
-
|
2148
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_UTF8_NULL);
|
2149
2149
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT);
|
2150
2150
|
append_char_to_temporary_buffer(parser, '-');
|
2151
2151
|
append_char_to_temporary_buffer(parser, kUtf8ReplacementChar);
|
2152
2152
|
return NEXT_CHAR;
|
2153
2153
|
case -1:
|
2154
|
-
|
2154
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_COMMENT_EOF);
|
2155
2155
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2156
2156
|
emit_comment(parser, output);
|
2157
2157
|
return RETURN_ERROR;
|
@@ -2172,27 +2172,27 @@ static StateResult handle_comment_end_state(
|
|
2172
2172
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2173
2173
|
return emit_comment(parser, output);
|
2174
2174
|
case '\0':
|
2175
|
-
|
2175
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_UTF8_NULL);
|
2176
2176
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT);
|
2177
2177
|
append_char_to_temporary_buffer(parser, '-');
|
2178
2178
|
append_char_to_temporary_buffer(parser, '-');
|
2179
2179
|
append_char_to_temporary_buffer(parser, kUtf8ReplacementChar);
|
2180
2180
|
return NEXT_CHAR;
|
2181
2181
|
case '!':
|
2182
|
-
|
2182
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_COMMENT_BANG_AFTER_DOUBLE_DASH);
|
2183
2183
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT_END_BANG);
|
2184
2184
|
return NEXT_CHAR;
|
2185
2185
|
case '-':
|
2186
|
-
|
2186
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_COMMENT_DASH_AFTER_DOUBLE_DASH);
|
2187
2187
|
append_char_to_temporary_buffer(parser, '-');
|
2188
2188
|
return NEXT_CHAR;
|
2189
2189
|
case -1:
|
2190
|
-
|
2190
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_UTF8_NULL);
|
2191
2191
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2192
2192
|
emit_comment(parser, output);
|
2193
2193
|
return RETURN_ERROR;
|
2194
2194
|
default:
|
2195
|
-
|
2195
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_COMMENT_INVALID);
|
2196
2196
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT);
|
2197
2197
|
append_char_to_temporary_buffer(parser, '-');
|
2198
2198
|
append_char_to_temporary_buffer(parser, '-');
|
@@ -2216,7 +2216,7 @@ static StateResult handle_comment_end_bang_state(
|
|
2216
2216
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2217
2217
|
return emit_comment(parser, output);
|
2218
2218
|
case '\0':
|
2219
|
-
|
2219
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_UTF8_NULL);
|
2220
2220
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT);
|
2221
2221
|
append_char_to_temporary_buffer(parser, '-');
|
2222
2222
|
append_char_to_temporary_buffer(parser, '-');
|
@@ -2224,7 +2224,7 @@ static StateResult handle_comment_end_bang_state(
|
|
2224
2224
|
append_char_to_temporary_buffer(parser, kUtf8ReplacementChar);
|
2225
2225
|
return NEXT_CHAR;
|
2226
2226
|
case -1:
|
2227
|
-
|
2227
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_COMMENT_END_BANG_EOF);
|
2228
2228
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2229
2229
|
emit_comment(parser, output);
|
2230
2230
|
return RETURN_ERROR;
|
@@ -2251,13 +2251,13 @@ static StateResult handle_doctype_state(
|
|
2251
2251
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_BEFORE_DOCTYPE_NAME);
|
2252
2252
|
return NEXT_CHAR;
|
2253
2253
|
case -1:
|
2254
|
-
|
2254
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_EOF);
|
2255
2255
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2256
2256
|
tokenizer->_doc_type_state.force_quirks = true;
|
2257
2257
|
emit_doctype(parser, output);
|
2258
2258
|
return RETURN_ERROR;
|
2259
2259
|
default:
|
2260
|
-
|
2260
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_SPACE);
|
2261
2261
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_BEFORE_DOCTYPE_NAME);
|
2262
2262
|
tokenizer->_reconsume_current_input = true;
|
2263
2263
|
tokenizer->_doc_type_state.force_quirks = true;
|
@@ -2276,19 +2276,19 @@ static StateResult handle_before_doctype_name_state(
|
|
2276
2276
|
case ' ':
|
2277
2277
|
return NEXT_CHAR;
|
2278
2278
|
case '\0':
|
2279
|
-
|
2279
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_UTF8_NULL);
|
2280
2280
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DOCTYPE_NAME);
|
2281
2281
|
tokenizer->_doc_type_state.force_quirks = true;
|
2282
2282
|
append_char_to_temporary_buffer(parser, kUtf8ReplacementChar);
|
2283
2283
|
return NEXT_CHAR;
|
2284
2284
|
case '>':
|
2285
|
-
|
2285
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_RIGHT_BRACKET);
|
2286
2286
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2287
2287
|
tokenizer->_doc_type_state.force_quirks = true;
|
2288
2288
|
emit_doctype(parser, output);
|
2289
2289
|
return RETURN_ERROR;
|
2290
2290
|
case -1:
|
2291
|
-
|
2291
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_EOF);
|
2292
2292
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2293
2293
|
tokenizer->_doc_type_state.force_quirks = true;
|
2294
2294
|
emit_doctype(parser, output);
|
@@ -2323,11 +2323,11 @@ static StateResult handle_doctype_name_state(
|
|
2323
2323
|
emit_doctype(parser, output);
|
2324
2324
|
return RETURN_SUCCESS;
|
2325
2325
|
case '\0':
|
2326
|
-
|
2326
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_UTF8_NULL);
|
2327
2327
|
append_char_to_temporary_buffer(parser, kUtf8ReplacementChar);
|
2328
2328
|
return NEXT_CHAR;
|
2329
2329
|
case -1:
|
2330
|
-
|
2330
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_EOF);
|
2331
2331
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2332
2332
|
tokenizer->_doc_type_state.force_quirks = true;
|
2333
2333
|
gumbo_parser_deallocate(parser, (void*) tokenizer->_doc_type_state.name);
|
@@ -2358,7 +2358,7 @@ static StateResult handle_after_doctype_name_state(
|
|
2358
2358
|
emit_doctype(parser, output);
|
2359
2359
|
return RETURN_SUCCESS;
|
2360
2360
|
case -1:
|
2361
|
-
|
2361
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_EOF);
|
2362
2362
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2363
2363
|
tokenizer->_doc_type_state.force_quirks = true;
|
2364
2364
|
emit_doctype(parser, output);
|
@@ -2375,7 +2375,7 @@ static StateResult handle_after_doctype_name_state(
|
|
2375
2375
|
parser, GUMBO_LEX_AFTER_DOCTYPE_SYSTEM_KEYWORD);
|
2376
2376
|
tokenizer->_reconsume_current_input = true;
|
2377
2377
|
} else {
|
2378
|
-
|
2378
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_SPACE_OR_RIGHT_BRACKET);
|
2379
2379
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_BOGUS_DOCTYPE);
|
2380
2380
|
tokenizer->_doc_type_state.force_quirks = true;
|
2381
2381
|
}
|
@@ -2396,31 +2396,31 @@ static StateResult handle_after_doctype_public_keyword_state(
|
|
2396
2396
|
parser, GUMBO_LEX_BEFORE_DOCTYPE_PUBLIC_ID);
|
2397
2397
|
return NEXT_CHAR;
|
2398
2398
|
case '"':
|
2399
|
-
|
2399
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_INVALID);
|
2400
2400
|
assert(temporary_buffer_equals(parser, ""));
|
2401
2401
|
gumbo_tokenizer_set_state(
|
2402
2402
|
parser, GUMBO_LEX_DOCTYPE_PUBLIC_ID_DOUBLE_QUOTED);
|
2403
2403
|
return NEXT_CHAR;
|
2404
2404
|
case '\'':
|
2405
|
-
|
2405
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_INVALID);
|
2406
2406
|
assert(temporary_buffer_equals(parser, ""));
|
2407
2407
|
gumbo_tokenizer_set_state(
|
2408
2408
|
parser, GUMBO_LEX_DOCTYPE_PUBLIC_ID_SINGLE_QUOTED);
|
2409
2409
|
return NEXT_CHAR;
|
2410
2410
|
case '>':
|
2411
|
-
|
2411
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_RIGHT_BRACKET);
|
2412
2412
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2413
2413
|
tokenizer->_doc_type_state.force_quirks = true;
|
2414
2414
|
emit_doctype(parser, output);
|
2415
2415
|
return RETURN_ERROR;
|
2416
2416
|
case -1:
|
2417
|
-
|
2417
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_EOF);
|
2418
2418
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2419
2419
|
tokenizer->_doc_type_state.force_quirks = true;
|
2420
2420
|
emit_doctype(parser, output);
|
2421
2421
|
return RETURN_ERROR;
|
2422
2422
|
default:
|
2423
|
-
|
2423
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_INVALID);
|
2424
2424
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_BOGUS_DOCTYPE);
|
2425
2425
|
tokenizer->_doc_type_state.force_quirks = true;
|
2426
2426
|
emit_doctype(parser, output);
|
@@ -2449,19 +2449,19 @@ static StateResult handle_before_doctype_public_id_state(
|
|
2449
2449
|
parser, GUMBO_LEX_DOCTYPE_PUBLIC_ID_SINGLE_QUOTED);
|
2450
2450
|
return NEXT_CHAR;
|
2451
2451
|
case '>':
|
2452
|
-
|
2452
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_END);
|
2453
2453
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2454
2454
|
tokenizer->_doc_type_state.force_quirks = true;
|
2455
2455
|
emit_doctype(parser, output);
|
2456
2456
|
return RETURN_ERROR;
|
2457
2457
|
case -1:
|
2458
|
-
|
2458
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_EOF);
|
2459
2459
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2460
2460
|
tokenizer->_doc_type_state.force_quirks = true;
|
2461
2461
|
emit_doctype(parser, output);
|
2462
2462
|
return RETURN_ERROR;
|
2463
2463
|
default:
|
2464
|
-
|
2464
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_INVALID);
|
2465
2465
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_BOGUS_DOCTYPE);
|
2466
2466
|
tokenizer->_doc_type_state.force_quirks = true;
|
2467
2467
|
emit_doctype(parser, output);
|
@@ -2479,18 +2479,18 @@ static StateResult handle_doctype_public_id_double_quoted_state(
|
|
2479
2479
|
finish_doctype_public_id(parser);
|
2480
2480
|
return NEXT_CHAR;
|
2481
2481
|
case '\0':
|
2482
|
-
|
2482
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_UTF8_NULL);
|
2483
2483
|
append_char_to_temporary_buffer(parser, kUtf8ReplacementChar);
|
2484
2484
|
return NEXT_CHAR;
|
2485
2485
|
case '>':
|
2486
|
-
|
2486
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_END);
|
2487
2487
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2488
2488
|
tokenizer->_doc_type_state.force_quirks = true;
|
2489
2489
|
finish_doctype_public_id(parser);
|
2490
2490
|
emit_doctype(parser, output);
|
2491
2491
|
return RETURN_ERROR;
|
2492
2492
|
case -1:
|
2493
|
-
|
2493
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_EOF);
|
2494
2494
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2495
2495
|
tokenizer->_doc_type_state.force_quirks = true;
|
2496
2496
|
finish_doctype_public_id(parser);
|
@@ -2512,18 +2512,18 @@ static StateResult handle_doctype_public_id_single_quoted_state(
|
|
2512
2512
|
finish_doctype_public_id(parser);
|
2513
2513
|
return NEXT_CHAR;
|
2514
2514
|
case '\0':
|
2515
|
-
|
2515
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_UTF8_NULL);
|
2516
2516
|
append_char_to_temporary_buffer(parser, kUtf8ReplacementChar);
|
2517
2517
|
return NEXT_CHAR;
|
2518
2518
|
case '>':
|
2519
|
-
|
2519
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_END);
|
2520
2520
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2521
2521
|
tokenizer->_doc_type_state.force_quirks = true;
|
2522
2522
|
finish_doctype_public_id(parser);
|
2523
2523
|
emit_doctype(parser, output);
|
2524
2524
|
return RETURN_ERROR;
|
2525
2525
|
case -1:
|
2526
|
-
|
2526
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_EOF);
|
2527
2527
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2528
2528
|
tokenizer->_doc_type_state.force_quirks = true;
|
2529
2529
|
finish_doctype_public_id(parser);
|
@@ -2552,25 +2552,25 @@ static StateResult handle_after_doctype_public_id_state(
|
|
2552
2552
|
emit_doctype(parser, output);
|
2553
2553
|
return RETURN_SUCCESS;
|
2554
2554
|
case '"':
|
2555
|
-
|
2555
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_INVALID);
|
2556
2556
|
assert(temporary_buffer_equals(parser, ""));
|
2557
2557
|
gumbo_tokenizer_set_state(
|
2558
2558
|
parser, GUMBO_LEX_DOCTYPE_SYSTEM_ID_DOUBLE_QUOTED);
|
2559
2559
|
return NEXT_CHAR;
|
2560
2560
|
case '\'':
|
2561
|
-
|
2561
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_INVALID);
|
2562
2562
|
assert(temporary_buffer_equals(parser, ""));
|
2563
2563
|
gumbo_tokenizer_set_state(
|
2564
2564
|
parser, GUMBO_LEX_DOCTYPE_SYSTEM_ID_SINGLE_QUOTED);
|
2565
2565
|
return NEXT_CHAR;
|
2566
2566
|
case -1:
|
2567
|
-
|
2567
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_EOF);
|
2568
2568
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2569
2569
|
tokenizer->_reconsume_current_input = true;
|
2570
2570
|
tokenizer->_doc_type_state.force_quirks = true;
|
2571
2571
|
return NEXT_CHAR;
|
2572
2572
|
default:
|
2573
|
-
|
2573
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_INVALID);
|
2574
2574
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_BOGUS_DOCTYPE);
|
2575
2575
|
tokenizer->_doc_type_state.force_quirks = true;
|
2576
2576
|
return NEXT_CHAR;
|
@@ -2602,13 +2602,13 @@ static StateResult handle_between_doctype_public_system_id_state(
|
|
2602
2602
|
parser, GUMBO_LEX_DOCTYPE_SYSTEM_ID_SINGLE_QUOTED);
|
2603
2603
|
return NEXT_CHAR;
|
2604
2604
|
case -1:
|
2605
|
-
|
2605
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_EOF);
|
2606
2606
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2607
2607
|
tokenizer->_doc_type_state.force_quirks = true;
|
2608
2608
|
emit_doctype(parser, output);
|
2609
2609
|
return RETURN_ERROR;
|
2610
2610
|
default:
|
2611
|
-
|
2611
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_INVALID);
|
2612
2612
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_BOGUS_DOCTYPE);
|
2613
2613
|
tokenizer->_doc_type_state.force_quirks = true;
|
2614
2614
|
emit_doctype(parser, output);
|
@@ -2628,31 +2628,31 @@ static StateResult handle_after_doctype_system_keyword_state(
|
|
2628
2628
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_BEFORE_DOCTYPE_SYSTEM_ID);
|
2629
2629
|
return NEXT_CHAR;
|
2630
2630
|
case '"':
|
2631
|
-
|
2631
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_INVALID);
|
2632
2632
|
assert(temporary_buffer_equals(parser, ""));
|
2633
2633
|
gumbo_tokenizer_set_state(
|
2634
2634
|
parser, GUMBO_LEX_DOCTYPE_SYSTEM_ID_DOUBLE_QUOTED);
|
2635
2635
|
return NEXT_CHAR;
|
2636
2636
|
case '\'':
|
2637
|
-
|
2637
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_INVALID);
|
2638
2638
|
assert(temporary_buffer_equals(parser, ""));
|
2639
2639
|
gumbo_tokenizer_set_state(
|
2640
2640
|
parser, GUMBO_LEX_DOCTYPE_SYSTEM_ID_SINGLE_QUOTED);
|
2641
2641
|
return NEXT_CHAR;
|
2642
2642
|
case '>':
|
2643
|
-
|
2643
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_END);
|
2644
2644
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2645
2645
|
tokenizer->_doc_type_state.force_quirks = true;
|
2646
2646
|
emit_doctype(parser, output);
|
2647
2647
|
return RETURN_ERROR;
|
2648
2648
|
case -1:
|
2649
|
-
|
2649
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_EOF);
|
2650
2650
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2651
2651
|
tokenizer->_doc_type_state.force_quirks = true;
|
2652
2652
|
emit_doctype(parser, output);
|
2653
2653
|
return RETURN_ERROR;
|
2654
2654
|
default:
|
2655
|
-
|
2655
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_INVALID);
|
2656
2656
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_BOGUS_DOCTYPE);
|
2657
2657
|
tokenizer->_doc_type_state.force_quirks = true;
|
2658
2658
|
return NEXT_CHAR;
|
@@ -2680,19 +2680,19 @@ static StateResult handle_before_doctype_system_id_state(
|
|
2680
2680
|
parser, GUMBO_LEX_DOCTYPE_SYSTEM_ID_SINGLE_QUOTED);
|
2681
2681
|
return NEXT_CHAR;
|
2682
2682
|
case '>':
|
2683
|
-
|
2683
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_END);
|
2684
2684
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2685
2685
|
tokenizer->_doc_type_state.force_quirks = true;
|
2686
2686
|
emit_doctype(parser, output);
|
2687
2687
|
return RETURN_ERROR;
|
2688
2688
|
case -1:
|
2689
|
-
|
2689
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_EOF);
|
2690
2690
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2691
2691
|
tokenizer->_doc_type_state.force_quirks = true;
|
2692
2692
|
emit_doctype(parser, output);
|
2693
2693
|
return RETURN_ERROR;
|
2694
2694
|
default:
|
2695
|
-
|
2695
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_INVALID);
|
2696
2696
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_BOGUS_DOCTYPE);
|
2697
2697
|
tokenizer->_doc_type_state.force_quirks = true;
|
2698
2698
|
return NEXT_CHAR;
|
@@ -2709,18 +2709,18 @@ static StateResult handle_doctype_system_id_double_quoted_state(
|
|
2709
2709
|
finish_doctype_system_id(parser);
|
2710
2710
|
return NEXT_CHAR;
|
2711
2711
|
case '\0':
|
2712
|
-
|
2712
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_UTF8_NULL);
|
2713
2713
|
append_char_to_temporary_buffer(parser, kUtf8ReplacementChar);
|
2714
2714
|
return NEXT_CHAR;
|
2715
2715
|
case '>':
|
2716
|
-
|
2716
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_END);
|
2717
2717
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2718
2718
|
tokenizer->_doc_type_state.force_quirks = true;
|
2719
2719
|
finish_doctype_system_id(parser);
|
2720
2720
|
emit_doctype(parser, output);
|
2721
2721
|
return RETURN_ERROR;
|
2722
2722
|
case -1:
|
2723
|
-
|
2723
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_EOF);
|
2724
2724
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2725
2725
|
tokenizer->_doc_type_state.force_quirks = true;
|
2726
2726
|
finish_doctype_system_id(parser);
|
@@ -2742,18 +2742,18 @@ static StateResult handle_doctype_system_id_single_quoted_state(
|
|
2742
2742
|
finish_doctype_system_id(parser);
|
2743
2743
|
return NEXT_CHAR;
|
2744
2744
|
case '\0':
|
2745
|
-
|
2745
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_UTF8_NULL);
|
2746
2746
|
append_char_to_temporary_buffer(parser, kUtf8ReplacementChar);
|
2747
2747
|
return NEXT_CHAR;
|
2748
2748
|
case '>':
|
2749
|
-
|
2749
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_END);
|
2750
2750
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2751
2751
|
tokenizer->_doc_type_state.force_quirks = true;
|
2752
2752
|
finish_doctype_system_id(parser);
|
2753
2753
|
emit_doctype(parser, output);
|
2754
2754
|
return RETURN_ERROR;
|
2755
2755
|
case -1:
|
2756
|
-
|
2756
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_INVALID);
|
2757
2757
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2758
2758
|
tokenizer->_doc_type_state.force_quirks = true;
|
2759
2759
|
finish_doctype_system_id(parser);
|
@@ -2780,13 +2780,13 @@ static StateResult handle_after_doctype_system_id_state(
|
|
2780
2780
|
emit_doctype(parser, output);
|
2781
2781
|
return RETURN_SUCCESS;
|
2782
2782
|
case -1:
|
2783
|
-
|
2783
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_EOF);
|
2784
2784
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2785
2785
|
tokenizer->_doc_type_state.force_quirks = true;
|
2786
2786
|
emit_doctype(parser, output);
|
2787
2787
|
return RETURN_ERROR;
|
2788
2788
|
default:
|
2789
|
-
|
2789
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_INVALID);
|
2790
2790
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_BOGUS_DOCTYPE);
|
2791
2791
|
return NEXT_CHAR;
|
2792
2792
|
}
|