nokogumbo 1.1.12 → 1.1.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ext/nokogumboc/extconf.rb +1 -1
- data/gumbo-parser/src/char_ref.c +22828 -2291
- data/gumbo-parser/src/char_ref.rl +2548 -0
- data/gumbo-parser/src/error.c +21 -0
- data/gumbo-parser/src/parser.c +109 -105
- data/gumbo-parser/src/tokenizer.c +103 -103
- data/gumbo-parser/src/utf8.c +114 -120
- data/gumbo-parser/src/utf8.h +6 -0
- metadata +3 -2
@@ -196,7 +196,7 @@ typedef struct GumboInternalTokenizerState {
|
|
196
196
|
} GumboTokenizerState;
|
197
197
|
|
198
198
|
// Adds an ERR_UNEXPECTED_CODE_POINT parse error to the parser's error struct.
|
199
|
-
static void
|
199
|
+
static void tokenizer_add_parse_error(GumboParser* parser, GumboErrorType type) {
|
200
200
|
GumboError* error = gumbo_add_error(parser);
|
201
201
|
if (!error) {
|
202
202
|
return;
|
@@ -485,7 +485,7 @@ static void emit_char(GumboParser* parser, int c, GumboToken* output) {
|
|
485
485
|
static StateResult emit_replacement_char(
|
486
486
|
GumboParser* parser, GumboToken* output) {
|
487
487
|
// In all cases, this is because of a null byte in the input stream.
|
488
|
-
|
488
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_UTF8_NULL);
|
489
489
|
emit_char(parser, kUtf8ReplacementChar, output);
|
490
490
|
return RETURN_ERROR;
|
491
491
|
}
|
@@ -906,7 +906,7 @@ static StateResult handle_data_state(
|
|
906
906
|
append_char_to_temporary_buffer(parser, '<');
|
907
907
|
return NEXT_CHAR;
|
908
908
|
case '\0':
|
909
|
-
|
909
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_UTF8_NULL);
|
910
910
|
emit_char(parser, c, output);
|
911
911
|
return RETURN_ERROR;
|
912
912
|
default:
|
@@ -1023,7 +1023,7 @@ static StateResult handle_tag_open_state(
|
|
1023
1023
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_BOGUS_COMMENT);
|
1024
1024
|
clear_temporary_buffer(parser);
|
1025
1025
|
append_char_to_temporary_buffer(parser, '?');
|
1026
|
-
|
1026
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_TAG_STARTS_WITH_QUESTION);
|
1027
1027
|
return NEXT_CHAR;
|
1028
1028
|
default:
|
1029
1029
|
if (is_alpha(c)) {
|
@@ -1031,7 +1031,7 @@ static StateResult handle_tag_open_state(
|
|
1031
1031
|
start_new_tag(parser, true);
|
1032
1032
|
return NEXT_CHAR;
|
1033
1033
|
} else {
|
1034
|
-
|
1034
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_TAG_INVALID);
|
1035
1035
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
1036
1036
|
emit_temporary_buffer(parser, output);
|
1037
1037
|
return RETURN_ERROR;
|
@@ -1046,11 +1046,11 @@ static StateResult handle_end_tag_open_state(
|
|
1046
1046
|
assert(temporary_buffer_equals(parser, "</"));
|
1047
1047
|
switch (c) {
|
1048
1048
|
case '>':
|
1049
|
-
|
1049
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_CLOSE_TAG_EMPTY);
|
1050
1050
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
1051
1051
|
return NEXT_CHAR;
|
1052
1052
|
case -1:
|
1053
|
-
|
1053
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_CLOSE_TAG_EOF);
|
1054
1054
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
1055
1055
|
return emit_temporary_buffer(parser, output);
|
1056
1056
|
default:
|
@@ -1058,7 +1058,7 @@ static StateResult handle_end_tag_open_state(
|
|
1058
1058
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_TAG_NAME);
|
1059
1059
|
start_new_tag(parser, false);
|
1060
1060
|
} else {
|
1061
|
-
|
1061
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_CLOSE_TAG_INVALID);
|
1062
1062
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_BOGUS_COMMENT);
|
1063
1063
|
clear_temporary_buffer(parser);
|
1064
1064
|
append_char_to_temporary_buffer(parser, c);
|
@@ -1088,11 +1088,11 @@ static StateResult handle_tag_name_state(
|
|
1088
1088
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
1089
1089
|
return emit_current_tag(parser, output);
|
1090
1090
|
case '\0':
|
1091
|
-
|
1091
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_UTF8_NULL);
|
1092
1092
|
append_char_to_tag_buffer(parser, kUtf8ReplacementChar, true);
|
1093
1093
|
return NEXT_CHAR;
|
1094
1094
|
case -1:
|
1095
|
-
|
1095
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_TAG_EOF);
|
1096
1096
|
abandon_current_tag(parser);
|
1097
1097
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
1098
1098
|
return NEXT_CHAR;
|
@@ -1349,7 +1349,7 @@ static StateResult handle_script_escaped_state(
|
|
1349
1349
|
case '\0':
|
1350
1350
|
return emit_replacement_char(parser, output);
|
1351
1351
|
case -1:
|
1352
|
-
|
1352
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_SCRIPT_EOF);
|
1353
1353
|
return emit_eof(parser, output);
|
1354
1354
|
default:
|
1355
1355
|
return emit_current_char(parser, output);
|
@@ -1373,7 +1373,7 @@ static StateResult handle_script_escaped_dash_state(
|
|
1373
1373
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_ESCAPED);
|
1374
1374
|
return emit_replacement_char(parser, output);
|
1375
1375
|
case -1:
|
1376
|
-
|
1376
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_SCRIPT_EOF);
|
1377
1377
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
1378
1378
|
return NEXT_CHAR;
|
1379
1379
|
default:
|
@@ -1401,7 +1401,7 @@ static StateResult handle_script_escaped_dash_dash_state(
|
|
1401
1401
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_ESCAPED);
|
1402
1402
|
return emit_replacement_char(parser, output);
|
1403
1403
|
case -1:
|
1404
|
-
|
1404
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_SCRIPT_EOF);
|
1405
1405
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
1406
1406
|
return NEXT_CHAR;
|
1407
1407
|
default:
|
@@ -1523,7 +1523,7 @@ static StateResult handle_script_double_escaped_state(
|
|
1523
1523
|
case '\0':
|
1524
1524
|
return emit_replacement_char(parser, output);
|
1525
1525
|
case -1:
|
1526
|
-
|
1526
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_SCRIPT_EOF);
|
1527
1527
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
1528
1528
|
return NEXT_CHAR;
|
1529
1529
|
default:
|
@@ -1547,7 +1547,7 @@ static StateResult handle_script_double_escaped_dash_state(
|
|
1547
1547
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED);
|
1548
1548
|
return emit_replacement_char(parser, output);
|
1549
1549
|
case -1:
|
1550
|
-
|
1550
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_SCRIPT_EOF);
|
1551
1551
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
1552
1552
|
return NEXT_CHAR;
|
1553
1553
|
default:
|
@@ -1573,7 +1573,7 @@ static StateResult handle_script_double_escaped_dash_dash_state(
|
|
1573
1573
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED);
|
1574
1574
|
return emit_replacement_char(parser, output);
|
1575
1575
|
case -1:
|
1576
|
-
|
1576
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_SCRIPT_EOF);
|
1577
1577
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
1578
1578
|
return NEXT_CHAR;
|
1579
1579
|
default:
|
@@ -1644,12 +1644,12 @@ static StateResult handle_before_attr_name_state(
|
|
1644
1644
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
1645
1645
|
return emit_current_tag(parser, output);
|
1646
1646
|
case '\0':
|
1647
|
-
|
1647
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_UTF8_NULL);
|
1648
1648
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_ATTR_NAME);
|
1649
1649
|
append_char_to_temporary_buffer(parser, 0xfffd);
|
1650
1650
|
return NEXT_CHAR;
|
1651
1651
|
case -1:
|
1652
|
-
|
1652
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_ATTR_NAME_EOF);
|
1653
1653
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
1654
1654
|
abandon_current_tag(parser);
|
1655
1655
|
return NEXT_CHAR;
|
@@ -1657,7 +1657,7 @@ static StateResult handle_before_attr_name_state(
|
|
1657
1657
|
case '\'':
|
1658
1658
|
case '<':
|
1659
1659
|
case '=':
|
1660
|
-
|
1660
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_ATTR_NAME_INVALID);
|
1661
1661
|
// Fall through.
|
1662
1662
|
default:
|
1663
1663
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_ATTR_NAME);
|
@@ -1691,18 +1691,18 @@ static StateResult handle_attr_name_state(
|
|
1691
1691
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
1692
1692
|
return emit_current_tag(parser, output);
|
1693
1693
|
case '\0':
|
1694
|
-
|
1694
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_UTF8_NULL);
|
1695
1695
|
append_char_to_tag_buffer(parser, kUtf8ReplacementChar, true);
|
1696
1696
|
return NEXT_CHAR;
|
1697
1697
|
case -1:
|
1698
1698
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
1699
1699
|
abandon_current_tag(parser);
|
1700
|
-
|
1700
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_ATTR_NAME_EOF);
|
1701
1701
|
return NEXT_CHAR;
|
1702
1702
|
case '"':
|
1703
1703
|
case '\'':
|
1704
1704
|
case '<':
|
1705
|
-
|
1705
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_ATTR_NAME_INVALID);
|
1706
1706
|
// Fall through.
|
1707
1707
|
default:
|
1708
1708
|
append_char_to_tag_buffer(parser, ensure_lowercase(c), true);
|
@@ -1730,19 +1730,19 @@ static StateResult handle_after_attr_name_state(
|
|
1730
1730
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
1731
1731
|
return emit_current_tag(parser, output);
|
1732
1732
|
case '\0':
|
1733
|
-
|
1733
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_UTF8_NULL);
|
1734
1734
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_ATTR_NAME);
|
1735
1735
|
append_char_to_temporary_buffer(parser, 0xfffd);
|
1736
1736
|
return NEXT_CHAR;
|
1737
1737
|
case -1:
|
1738
|
-
|
1738
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_ATTR_NAME_EOF);
|
1739
1739
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
1740
1740
|
abandon_current_tag(parser);
|
1741
1741
|
return NEXT_CHAR;
|
1742
1742
|
case '"':
|
1743
1743
|
case '\'':
|
1744
1744
|
case '<':
|
1745
|
-
|
1745
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_ATTR_NAME_INVALID);
|
1746
1746
|
// Fall through.
|
1747
1747
|
default:
|
1748
1748
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_ATTR_NAME);
|
@@ -1774,25 +1774,25 @@ static StateResult handle_before_attr_value_state(
|
|
1774
1774
|
reset_tag_buffer_start_point(parser);
|
1775
1775
|
return NEXT_CHAR;
|
1776
1776
|
case '\0':
|
1777
|
-
|
1777
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_UTF8_NULL);
|
1778
1778
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_ATTR_VALUE_UNQUOTED);
|
1779
1779
|
append_char_to_tag_buffer(parser, kUtf8ReplacementChar, true);
|
1780
1780
|
return NEXT_CHAR;
|
1781
1781
|
case -1:
|
1782
|
-
|
1782
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_ATTR_UNQUOTED_EOF);
|
1783
1783
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
1784
1784
|
abandon_current_tag(parser);
|
1785
1785
|
tokenizer->_reconsume_current_input = true;
|
1786
1786
|
return NEXT_CHAR;
|
1787
1787
|
case '>':
|
1788
|
-
|
1788
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_ATTR_UNQUOTED_RIGHT_BRACKET);
|
1789
1789
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
1790
1790
|
emit_current_tag(parser, output);
|
1791
1791
|
return RETURN_ERROR;
|
1792
1792
|
case '<':
|
1793
1793
|
case '=':
|
1794
1794
|
case '`':
|
1795
|
-
|
1795
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_ATTR_UNQUOTED_EQUALS);
|
1796
1796
|
// Fall through.
|
1797
1797
|
default:
|
1798
1798
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_ATTR_VALUE_UNQUOTED);
|
@@ -1815,11 +1815,11 @@ static StateResult handle_attr_value_double_quoted_state(
|
|
1815
1815
|
tokenizer->_reconsume_current_input = true;
|
1816
1816
|
return NEXT_CHAR;
|
1817
1817
|
case '\0':
|
1818
|
-
|
1818
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_UTF8_NULL);
|
1819
1819
|
append_char_to_tag_buffer(parser, kUtf8ReplacementChar, false);
|
1820
1820
|
return NEXT_CHAR;
|
1821
1821
|
case -1:
|
1822
|
-
|
1822
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_ATTR_DOUBLE_QUOTE_EOF);
|
1823
1823
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
1824
1824
|
abandon_current_tag(parser);
|
1825
1825
|
tokenizer->_reconsume_current_input = true;
|
@@ -1844,11 +1844,11 @@ static StateResult handle_attr_value_single_quoted_state(
|
|
1844
1844
|
tokenizer->_reconsume_current_input = true;
|
1845
1845
|
return NEXT_CHAR;
|
1846
1846
|
case '\0':
|
1847
|
-
|
1847
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_UTF8_NULL);
|
1848
1848
|
append_char_to_tag_buffer(parser, kUtf8ReplacementChar, false);
|
1849
1849
|
return NEXT_CHAR;
|
1850
1850
|
case -1:
|
1851
|
-
|
1851
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_ATTR_SINGLE_QUOTE_EOF);
|
1852
1852
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
1853
1853
|
abandon_current_tag(parser);
|
1854
1854
|
tokenizer->_reconsume_current_input = true;
|
@@ -1881,11 +1881,11 @@ static StateResult handle_attr_value_unquoted_state(
|
|
1881
1881
|
finish_attribute_value(parser);
|
1882
1882
|
return emit_current_tag(parser, output);
|
1883
1883
|
case '\0':
|
1884
|
-
|
1884
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_UTF8_NULL);
|
1885
1885
|
append_char_to_tag_buffer(parser, kUtf8ReplacementChar, true);
|
1886
1886
|
return NEXT_CHAR;
|
1887
1887
|
case -1:
|
1888
|
-
|
1888
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_ATTR_UNQUOTED_EOF);
|
1889
1889
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
1890
1890
|
tokenizer->_reconsume_current_input = true;
|
1891
1891
|
abandon_current_tag(parser);
|
@@ -1895,7 +1895,7 @@ static StateResult handle_attr_value_unquoted_state(
|
|
1895
1895
|
case '"':
|
1896
1896
|
case '\'':
|
1897
1897
|
case '`':
|
1898
|
-
|
1898
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_ATTR_UNQUOTED_EQUALS);
|
1899
1899
|
// Fall through.
|
1900
1900
|
default:
|
1901
1901
|
append_char_to_tag_buffer(parser, c, true);
|
@@ -1965,13 +1965,13 @@ static StateResult handle_after_attr_value_quoted_state(
|
|
1965
1965
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
1966
1966
|
return emit_current_tag(parser, output);
|
1967
1967
|
case -1:
|
1968
|
-
|
1968
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_ATTR_AFTER_EOF);
|
1969
1969
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
1970
1970
|
abandon_current_tag(parser);
|
1971
1971
|
tokenizer->_reconsume_current_input = true;
|
1972
1972
|
return NEXT_CHAR;
|
1973
1973
|
default:
|
1974
|
-
|
1974
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_ATTR_AFTER_INVALID);
|
1975
1975
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_BEFORE_ATTR_NAME);
|
1976
1976
|
tokenizer->_reconsume_current_input = true;
|
1977
1977
|
return NEXT_CHAR;
|
@@ -1988,12 +1988,12 @@ static StateResult handle_self_closing_start_tag_state(
|
|
1988
1988
|
tokenizer->_tag_state._is_self_closing = true;
|
1989
1989
|
return emit_current_tag(parser, output);
|
1990
1990
|
case -1:
|
1991
|
-
|
1991
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_SOLIDUS_EOF);
|
1992
1992
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
1993
1993
|
abandon_current_tag(parser);
|
1994
1994
|
return NEXT_CHAR;
|
1995
1995
|
default:
|
1996
|
-
|
1996
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_SOLIDUS_INVALID);
|
1997
1997
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_BEFORE_ATTR_NAME);
|
1998
1998
|
tokenizer->_reconsume_current_input = true;
|
1999
1999
|
return NEXT_CHAR;
|
@@ -2043,7 +2043,7 @@ static StateResult handle_markup_declaration_state(
|
|
2043
2043
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_CDATA);
|
2044
2044
|
tokenizer->_reconsume_current_input = true;
|
2045
2045
|
} else {
|
2046
|
-
|
2046
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DASHES_OR_DOCTYPE);
|
2047
2047
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_BOGUS_COMMENT);
|
2048
2048
|
tokenizer->_reconsume_current_input = true;
|
2049
2049
|
clear_temporary_buffer(parser);
|
@@ -2060,17 +2060,17 @@ static StateResult handle_comment_start_state(
|
|
2060
2060
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT_START_DASH);
|
2061
2061
|
return NEXT_CHAR;
|
2062
2062
|
case '\0':
|
2063
|
-
|
2063
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_UTF8_NULL);
|
2064
2064
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT);
|
2065
2065
|
append_char_to_temporary_buffer(parser, kUtf8ReplacementChar);
|
2066
2066
|
return NEXT_CHAR;
|
2067
2067
|
case '>':
|
2068
|
-
|
2068
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_COMMENT_INVALID);
|
2069
2069
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2070
2070
|
emit_comment(parser, output);
|
2071
2071
|
return RETURN_ERROR;
|
2072
2072
|
case -1:
|
2073
|
-
|
2073
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_COMMENT_EOF);
|
2074
2074
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2075
2075
|
emit_comment(parser, output);
|
2076
2076
|
return RETURN_ERROR;
|
@@ -2090,18 +2090,18 @@ static StateResult handle_comment_start_dash_state(
|
|
2090
2090
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT_END);
|
2091
2091
|
return NEXT_CHAR;
|
2092
2092
|
case '\0':
|
2093
|
-
|
2093
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_UTF8_NULL);
|
2094
2094
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT);
|
2095
2095
|
append_char_to_temporary_buffer(parser, '-');
|
2096
2096
|
append_char_to_temporary_buffer(parser, kUtf8ReplacementChar);
|
2097
2097
|
return NEXT_CHAR;
|
2098
2098
|
case '>':
|
2099
|
-
|
2099
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_COMMENT_INVALID);
|
2100
2100
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2101
2101
|
emit_comment(parser, output);
|
2102
2102
|
return RETURN_ERROR;
|
2103
2103
|
case -1:
|
2104
|
-
|
2104
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_COMMENT_EOF);
|
2105
2105
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2106
2106
|
emit_comment(parser, output);
|
2107
2107
|
return RETURN_ERROR;
|
@@ -2122,11 +2122,11 @@ static StateResult handle_comment_state(
|
|
2122
2122
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT_END_DASH);
|
2123
2123
|
return NEXT_CHAR;
|
2124
2124
|
case '\0':
|
2125
|
-
|
2125
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_UTF8_NULL);
|
2126
2126
|
append_char_to_temporary_buffer(parser, kUtf8ReplacementChar);
|
2127
2127
|
return NEXT_CHAR;
|
2128
2128
|
case -1:
|
2129
|
-
|
2129
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_COMMENT_EOF);
|
2130
2130
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2131
2131
|
emit_comment(parser, output);
|
2132
2132
|
return RETURN_ERROR;
|
@@ -2145,13 +2145,13 @@ static StateResult handle_comment_end_dash_state(
|
|
2145
2145
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT_END);
|
2146
2146
|
return NEXT_CHAR;
|
2147
2147
|
case '\0':
|
2148
|
-
|
2148
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_UTF8_NULL);
|
2149
2149
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT);
|
2150
2150
|
append_char_to_temporary_buffer(parser, '-');
|
2151
2151
|
append_char_to_temporary_buffer(parser, kUtf8ReplacementChar);
|
2152
2152
|
return NEXT_CHAR;
|
2153
2153
|
case -1:
|
2154
|
-
|
2154
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_COMMENT_EOF);
|
2155
2155
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2156
2156
|
emit_comment(parser, output);
|
2157
2157
|
return RETURN_ERROR;
|
@@ -2172,27 +2172,27 @@ static StateResult handle_comment_end_state(
|
|
2172
2172
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2173
2173
|
return emit_comment(parser, output);
|
2174
2174
|
case '\0':
|
2175
|
-
|
2175
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_UTF8_NULL);
|
2176
2176
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT);
|
2177
2177
|
append_char_to_temporary_buffer(parser, '-');
|
2178
2178
|
append_char_to_temporary_buffer(parser, '-');
|
2179
2179
|
append_char_to_temporary_buffer(parser, kUtf8ReplacementChar);
|
2180
2180
|
return NEXT_CHAR;
|
2181
2181
|
case '!':
|
2182
|
-
|
2182
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_COMMENT_BANG_AFTER_DOUBLE_DASH);
|
2183
2183
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT_END_BANG);
|
2184
2184
|
return NEXT_CHAR;
|
2185
2185
|
case '-':
|
2186
|
-
|
2186
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_COMMENT_DASH_AFTER_DOUBLE_DASH);
|
2187
2187
|
append_char_to_temporary_buffer(parser, '-');
|
2188
2188
|
return NEXT_CHAR;
|
2189
2189
|
case -1:
|
2190
|
-
|
2190
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_UTF8_NULL);
|
2191
2191
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2192
2192
|
emit_comment(parser, output);
|
2193
2193
|
return RETURN_ERROR;
|
2194
2194
|
default:
|
2195
|
-
|
2195
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_COMMENT_INVALID);
|
2196
2196
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT);
|
2197
2197
|
append_char_to_temporary_buffer(parser, '-');
|
2198
2198
|
append_char_to_temporary_buffer(parser, '-');
|
@@ -2216,7 +2216,7 @@ static StateResult handle_comment_end_bang_state(
|
|
2216
2216
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2217
2217
|
return emit_comment(parser, output);
|
2218
2218
|
case '\0':
|
2219
|
-
|
2219
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_UTF8_NULL);
|
2220
2220
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT);
|
2221
2221
|
append_char_to_temporary_buffer(parser, '-');
|
2222
2222
|
append_char_to_temporary_buffer(parser, '-');
|
@@ -2224,7 +2224,7 @@ static StateResult handle_comment_end_bang_state(
|
|
2224
2224
|
append_char_to_temporary_buffer(parser, kUtf8ReplacementChar);
|
2225
2225
|
return NEXT_CHAR;
|
2226
2226
|
case -1:
|
2227
|
-
|
2227
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_COMMENT_END_BANG_EOF);
|
2228
2228
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2229
2229
|
emit_comment(parser, output);
|
2230
2230
|
return RETURN_ERROR;
|
@@ -2251,13 +2251,13 @@ static StateResult handle_doctype_state(
|
|
2251
2251
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_BEFORE_DOCTYPE_NAME);
|
2252
2252
|
return NEXT_CHAR;
|
2253
2253
|
case -1:
|
2254
|
-
|
2254
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_EOF);
|
2255
2255
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2256
2256
|
tokenizer->_doc_type_state.force_quirks = true;
|
2257
2257
|
emit_doctype(parser, output);
|
2258
2258
|
return RETURN_ERROR;
|
2259
2259
|
default:
|
2260
|
-
|
2260
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_SPACE);
|
2261
2261
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_BEFORE_DOCTYPE_NAME);
|
2262
2262
|
tokenizer->_reconsume_current_input = true;
|
2263
2263
|
tokenizer->_doc_type_state.force_quirks = true;
|
@@ -2276,19 +2276,19 @@ static StateResult handle_before_doctype_name_state(
|
|
2276
2276
|
case ' ':
|
2277
2277
|
return NEXT_CHAR;
|
2278
2278
|
case '\0':
|
2279
|
-
|
2279
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_UTF8_NULL);
|
2280
2280
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DOCTYPE_NAME);
|
2281
2281
|
tokenizer->_doc_type_state.force_quirks = true;
|
2282
2282
|
append_char_to_temporary_buffer(parser, kUtf8ReplacementChar);
|
2283
2283
|
return NEXT_CHAR;
|
2284
2284
|
case '>':
|
2285
|
-
|
2285
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_RIGHT_BRACKET);
|
2286
2286
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2287
2287
|
tokenizer->_doc_type_state.force_quirks = true;
|
2288
2288
|
emit_doctype(parser, output);
|
2289
2289
|
return RETURN_ERROR;
|
2290
2290
|
case -1:
|
2291
|
-
|
2291
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_EOF);
|
2292
2292
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2293
2293
|
tokenizer->_doc_type_state.force_quirks = true;
|
2294
2294
|
emit_doctype(parser, output);
|
@@ -2323,11 +2323,11 @@ static StateResult handle_doctype_name_state(
|
|
2323
2323
|
emit_doctype(parser, output);
|
2324
2324
|
return RETURN_SUCCESS;
|
2325
2325
|
case '\0':
|
2326
|
-
|
2326
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_UTF8_NULL);
|
2327
2327
|
append_char_to_temporary_buffer(parser, kUtf8ReplacementChar);
|
2328
2328
|
return NEXT_CHAR;
|
2329
2329
|
case -1:
|
2330
|
-
|
2330
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_EOF);
|
2331
2331
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2332
2332
|
tokenizer->_doc_type_state.force_quirks = true;
|
2333
2333
|
gumbo_parser_deallocate(parser, (void*) tokenizer->_doc_type_state.name);
|
@@ -2358,7 +2358,7 @@ static StateResult handle_after_doctype_name_state(
|
|
2358
2358
|
emit_doctype(parser, output);
|
2359
2359
|
return RETURN_SUCCESS;
|
2360
2360
|
case -1:
|
2361
|
-
|
2361
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_EOF);
|
2362
2362
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2363
2363
|
tokenizer->_doc_type_state.force_quirks = true;
|
2364
2364
|
emit_doctype(parser, output);
|
@@ -2375,7 +2375,7 @@ static StateResult handle_after_doctype_name_state(
|
|
2375
2375
|
parser, GUMBO_LEX_AFTER_DOCTYPE_SYSTEM_KEYWORD);
|
2376
2376
|
tokenizer->_reconsume_current_input = true;
|
2377
2377
|
} else {
|
2378
|
-
|
2378
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_SPACE_OR_RIGHT_BRACKET);
|
2379
2379
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_BOGUS_DOCTYPE);
|
2380
2380
|
tokenizer->_doc_type_state.force_quirks = true;
|
2381
2381
|
}
|
@@ -2396,31 +2396,31 @@ static StateResult handle_after_doctype_public_keyword_state(
|
|
2396
2396
|
parser, GUMBO_LEX_BEFORE_DOCTYPE_PUBLIC_ID);
|
2397
2397
|
return NEXT_CHAR;
|
2398
2398
|
case '"':
|
2399
|
-
|
2399
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_INVALID);
|
2400
2400
|
assert(temporary_buffer_equals(parser, ""));
|
2401
2401
|
gumbo_tokenizer_set_state(
|
2402
2402
|
parser, GUMBO_LEX_DOCTYPE_PUBLIC_ID_DOUBLE_QUOTED);
|
2403
2403
|
return NEXT_CHAR;
|
2404
2404
|
case '\'':
|
2405
|
-
|
2405
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_INVALID);
|
2406
2406
|
assert(temporary_buffer_equals(parser, ""));
|
2407
2407
|
gumbo_tokenizer_set_state(
|
2408
2408
|
parser, GUMBO_LEX_DOCTYPE_PUBLIC_ID_SINGLE_QUOTED);
|
2409
2409
|
return NEXT_CHAR;
|
2410
2410
|
case '>':
|
2411
|
-
|
2411
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_RIGHT_BRACKET);
|
2412
2412
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2413
2413
|
tokenizer->_doc_type_state.force_quirks = true;
|
2414
2414
|
emit_doctype(parser, output);
|
2415
2415
|
return RETURN_ERROR;
|
2416
2416
|
case -1:
|
2417
|
-
|
2417
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_EOF);
|
2418
2418
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2419
2419
|
tokenizer->_doc_type_state.force_quirks = true;
|
2420
2420
|
emit_doctype(parser, output);
|
2421
2421
|
return RETURN_ERROR;
|
2422
2422
|
default:
|
2423
|
-
|
2423
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_INVALID);
|
2424
2424
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_BOGUS_DOCTYPE);
|
2425
2425
|
tokenizer->_doc_type_state.force_quirks = true;
|
2426
2426
|
emit_doctype(parser, output);
|
@@ -2449,19 +2449,19 @@ static StateResult handle_before_doctype_public_id_state(
|
|
2449
2449
|
parser, GUMBO_LEX_DOCTYPE_PUBLIC_ID_SINGLE_QUOTED);
|
2450
2450
|
return NEXT_CHAR;
|
2451
2451
|
case '>':
|
2452
|
-
|
2452
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_END);
|
2453
2453
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2454
2454
|
tokenizer->_doc_type_state.force_quirks = true;
|
2455
2455
|
emit_doctype(parser, output);
|
2456
2456
|
return RETURN_ERROR;
|
2457
2457
|
case -1:
|
2458
|
-
|
2458
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_EOF);
|
2459
2459
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2460
2460
|
tokenizer->_doc_type_state.force_quirks = true;
|
2461
2461
|
emit_doctype(parser, output);
|
2462
2462
|
return RETURN_ERROR;
|
2463
2463
|
default:
|
2464
|
-
|
2464
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_INVALID);
|
2465
2465
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_BOGUS_DOCTYPE);
|
2466
2466
|
tokenizer->_doc_type_state.force_quirks = true;
|
2467
2467
|
emit_doctype(parser, output);
|
@@ -2479,18 +2479,18 @@ static StateResult handle_doctype_public_id_double_quoted_state(
|
|
2479
2479
|
finish_doctype_public_id(parser);
|
2480
2480
|
return NEXT_CHAR;
|
2481
2481
|
case '\0':
|
2482
|
-
|
2482
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_UTF8_NULL);
|
2483
2483
|
append_char_to_temporary_buffer(parser, kUtf8ReplacementChar);
|
2484
2484
|
return NEXT_CHAR;
|
2485
2485
|
case '>':
|
2486
|
-
|
2486
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_END);
|
2487
2487
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2488
2488
|
tokenizer->_doc_type_state.force_quirks = true;
|
2489
2489
|
finish_doctype_public_id(parser);
|
2490
2490
|
emit_doctype(parser, output);
|
2491
2491
|
return RETURN_ERROR;
|
2492
2492
|
case -1:
|
2493
|
-
|
2493
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_EOF);
|
2494
2494
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2495
2495
|
tokenizer->_doc_type_state.force_quirks = true;
|
2496
2496
|
finish_doctype_public_id(parser);
|
@@ -2512,18 +2512,18 @@ static StateResult handle_doctype_public_id_single_quoted_state(
|
|
2512
2512
|
finish_doctype_public_id(parser);
|
2513
2513
|
return NEXT_CHAR;
|
2514
2514
|
case '\0':
|
2515
|
-
|
2515
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_UTF8_NULL);
|
2516
2516
|
append_char_to_temporary_buffer(parser, kUtf8ReplacementChar);
|
2517
2517
|
return NEXT_CHAR;
|
2518
2518
|
case '>':
|
2519
|
-
|
2519
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_END);
|
2520
2520
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2521
2521
|
tokenizer->_doc_type_state.force_quirks = true;
|
2522
2522
|
finish_doctype_public_id(parser);
|
2523
2523
|
emit_doctype(parser, output);
|
2524
2524
|
return RETURN_ERROR;
|
2525
2525
|
case -1:
|
2526
|
-
|
2526
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_EOF);
|
2527
2527
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2528
2528
|
tokenizer->_doc_type_state.force_quirks = true;
|
2529
2529
|
finish_doctype_public_id(parser);
|
@@ -2552,25 +2552,25 @@ static StateResult handle_after_doctype_public_id_state(
|
|
2552
2552
|
emit_doctype(parser, output);
|
2553
2553
|
return RETURN_SUCCESS;
|
2554
2554
|
case '"':
|
2555
|
-
|
2555
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_INVALID);
|
2556
2556
|
assert(temporary_buffer_equals(parser, ""));
|
2557
2557
|
gumbo_tokenizer_set_state(
|
2558
2558
|
parser, GUMBO_LEX_DOCTYPE_SYSTEM_ID_DOUBLE_QUOTED);
|
2559
2559
|
return NEXT_CHAR;
|
2560
2560
|
case '\'':
|
2561
|
-
|
2561
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_INVALID);
|
2562
2562
|
assert(temporary_buffer_equals(parser, ""));
|
2563
2563
|
gumbo_tokenizer_set_state(
|
2564
2564
|
parser, GUMBO_LEX_DOCTYPE_SYSTEM_ID_SINGLE_QUOTED);
|
2565
2565
|
return NEXT_CHAR;
|
2566
2566
|
case -1:
|
2567
|
-
|
2567
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_EOF);
|
2568
2568
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2569
2569
|
tokenizer->_reconsume_current_input = true;
|
2570
2570
|
tokenizer->_doc_type_state.force_quirks = true;
|
2571
2571
|
return NEXT_CHAR;
|
2572
2572
|
default:
|
2573
|
-
|
2573
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_INVALID);
|
2574
2574
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_BOGUS_DOCTYPE);
|
2575
2575
|
tokenizer->_doc_type_state.force_quirks = true;
|
2576
2576
|
return NEXT_CHAR;
|
@@ -2602,13 +2602,13 @@ static StateResult handle_between_doctype_public_system_id_state(
|
|
2602
2602
|
parser, GUMBO_LEX_DOCTYPE_SYSTEM_ID_SINGLE_QUOTED);
|
2603
2603
|
return NEXT_CHAR;
|
2604
2604
|
case -1:
|
2605
|
-
|
2605
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_EOF);
|
2606
2606
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2607
2607
|
tokenizer->_doc_type_state.force_quirks = true;
|
2608
2608
|
emit_doctype(parser, output);
|
2609
2609
|
return RETURN_ERROR;
|
2610
2610
|
default:
|
2611
|
-
|
2611
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_INVALID);
|
2612
2612
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_BOGUS_DOCTYPE);
|
2613
2613
|
tokenizer->_doc_type_state.force_quirks = true;
|
2614
2614
|
emit_doctype(parser, output);
|
@@ -2628,31 +2628,31 @@ static StateResult handle_after_doctype_system_keyword_state(
|
|
2628
2628
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_BEFORE_DOCTYPE_SYSTEM_ID);
|
2629
2629
|
return NEXT_CHAR;
|
2630
2630
|
case '"':
|
2631
|
-
|
2631
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_INVALID);
|
2632
2632
|
assert(temporary_buffer_equals(parser, ""));
|
2633
2633
|
gumbo_tokenizer_set_state(
|
2634
2634
|
parser, GUMBO_LEX_DOCTYPE_SYSTEM_ID_DOUBLE_QUOTED);
|
2635
2635
|
return NEXT_CHAR;
|
2636
2636
|
case '\'':
|
2637
|
-
|
2637
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_INVALID);
|
2638
2638
|
assert(temporary_buffer_equals(parser, ""));
|
2639
2639
|
gumbo_tokenizer_set_state(
|
2640
2640
|
parser, GUMBO_LEX_DOCTYPE_SYSTEM_ID_SINGLE_QUOTED);
|
2641
2641
|
return NEXT_CHAR;
|
2642
2642
|
case '>':
|
2643
|
-
|
2643
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_END);
|
2644
2644
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2645
2645
|
tokenizer->_doc_type_state.force_quirks = true;
|
2646
2646
|
emit_doctype(parser, output);
|
2647
2647
|
return RETURN_ERROR;
|
2648
2648
|
case -1:
|
2649
|
-
|
2649
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_EOF);
|
2650
2650
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2651
2651
|
tokenizer->_doc_type_state.force_quirks = true;
|
2652
2652
|
emit_doctype(parser, output);
|
2653
2653
|
return RETURN_ERROR;
|
2654
2654
|
default:
|
2655
|
-
|
2655
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_INVALID);
|
2656
2656
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_BOGUS_DOCTYPE);
|
2657
2657
|
tokenizer->_doc_type_state.force_quirks = true;
|
2658
2658
|
return NEXT_CHAR;
|
@@ -2680,19 +2680,19 @@ static StateResult handle_before_doctype_system_id_state(
|
|
2680
2680
|
parser, GUMBO_LEX_DOCTYPE_SYSTEM_ID_SINGLE_QUOTED);
|
2681
2681
|
return NEXT_CHAR;
|
2682
2682
|
case '>':
|
2683
|
-
|
2683
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_END);
|
2684
2684
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2685
2685
|
tokenizer->_doc_type_state.force_quirks = true;
|
2686
2686
|
emit_doctype(parser, output);
|
2687
2687
|
return RETURN_ERROR;
|
2688
2688
|
case -1:
|
2689
|
-
|
2689
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_EOF);
|
2690
2690
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2691
2691
|
tokenizer->_doc_type_state.force_quirks = true;
|
2692
2692
|
emit_doctype(parser, output);
|
2693
2693
|
return RETURN_ERROR;
|
2694
2694
|
default:
|
2695
|
-
|
2695
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_INVALID);
|
2696
2696
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_BOGUS_DOCTYPE);
|
2697
2697
|
tokenizer->_doc_type_state.force_quirks = true;
|
2698
2698
|
return NEXT_CHAR;
|
@@ -2709,18 +2709,18 @@ static StateResult handle_doctype_system_id_double_quoted_state(
|
|
2709
2709
|
finish_doctype_system_id(parser);
|
2710
2710
|
return NEXT_CHAR;
|
2711
2711
|
case '\0':
|
2712
|
-
|
2712
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_UTF8_NULL);
|
2713
2713
|
append_char_to_temporary_buffer(parser, kUtf8ReplacementChar);
|
2714
2714
|
return NEXT_CHAR;
|
2715
2715
|
case '>':
|
2716
|
-
|
2716
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_END);
|
2717
2717
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2718
2718
|
tokenizer->_doc_type_state.force_quirks = true;
|
2719
2719
|
finish_doctype_system_id(parser);
|
2720
2720
|
emit_doctype(parser, output);
|
2721
2721
|
return RETURN_ERROR;
|
2722
2722
|
case -1:
|
2723
|
-
|
2723
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_EOF);
|
2724
2724
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2725
2725
|
tokenizer->_doc_type_state.force_quirks = true;
|
2726
2726
|
finish_doctype_system_id(parser);
|
@@ -2742,18 +2742,18 @@ static StateResult handle_doctype_system_id_single_quoted_state(
|
|
2742
2742
|
finish_doctype_system_id(parser);
|
2743
2743
|
return NEXT_CHAR;
|
2744
2744
|
case '\0':
|
2745
|
-
|
2745
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_UTF8_NULL);
|
2746
2746
|
append_char_to_temporary_buffer(parser, kUtf8ReplacementChar);
|
2747
2747
|
return NEXT_CHAR;
|
2748
2748
|
case '>':
|
2749
|
-
|
2749
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_END);
|
2750
2750
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2751
2751
|
tokenizer->_doc_type_state.force_quirks = true;
|
2752
2752
|
finish_doctype_system_id(parser);
|
2753
2753
|
emit_doctype(parser, output);
|
2754
2754
|
return RETURN_ERROR;
|
2755
2755
|
case -1:
|
2756
|
-
|
2756
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_INVALID);
|
2757
2757
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2758
2758
|
tokenizer->_doc_type_state.force_quirks = true;
|
2759
2759
|
finish_doctype_system_id(parser);
|
@@ -2780,13 +2780,13 @@ static StateResult handle_after_doctype_system_id_state(
|
|
2780
2780
|
emit_doctype(parser, output);
|
2781
2781
|
return RETURN_SUCCESS;
|
2782
2782
|
case -1:
|
2783
|
-
|
2783
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_EOF);
|
2784
2784
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2785
2785
|
tokenizer->_doc_type_state.force_quirks = true;
|
2786
2786
|
emit_doctype(parser, output);
|
2787
2787
|
return RETURN_ERROR;
|
2788
2788
|
default:
|
2789
|
-
|
2789
|
+
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_INVALID);
|
2790
2790
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_BOGUS_DOCTYPE);
|
2791
2791
|
return NEXT_CHAR;
|
2792
2792
|
}
|