nokogumbo 1.4.2 → 1.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/gumbo-parser/src/attribute.c +1 -1
- data/gumbo-parser/src/char_ref.c +37 -67
- data/gumbo-parser/src/char_ref.h +3 -4
- data/gumbo-parser/src/char_ref.rl +6 -1
- data/gumbo-parser/src/error.c +50 -51
- data/gumbo-parser/src/error.h +7 -9
- data/gumbo-parser/src/gumbo.h +45 -181
- data/gumbo-parser/src/parser.c +1397 -989
- data/gumbo-parser/src/string_buffer.c +14 -10
- data/gumbo-parser/src/string_buffer.h +9 -6
- data/gumbo-parser/src/string_piece.c +5 -6
- data/gumbo-parser/src/string_piece.h +2 -3
- data/gumbo-parser/src/tag.c +36 -166
- data/gumbo-parser/src/tag.in +150 -0
- data/gumbo-parser/src/tag_enum.h +153 -0
- data/gumbo-parser/src/tag_gperf.h +105 -0
- data/gumbo-parser/src/tag_sizes.h +4 -0
- data/gumbo-parser/src/tag_strings.h +153 -0
- data/gumbo-parser/src/tokenizer.c +264 -360
- data/gumbo-parser/src/tokenizer.h +2 -2
- data/gumbo-parser/src/utf8.c +44 -44
- data/gumbo-parser/src/utf8.h +1 -2
- data/gumbo-parser/src/util.c +1 -1
- data/gumbo-parser/src/util.h +0 -2
- data/gumbo-parser/src/vector.c +17 -17
- data/gumbo-parser/src/vector.h +6 -8
- metadata +8 -3
@@ -0,0 +1,153 @@
|
|
1
|
+
// Generated via `gentags.py src/tag.in`.
|
2
|
+
// Do not edit; edit src/tag.in instead.
|
3
|
+
// clang-format off
|
4
|
+
GUMBO_TAG_HTML,
|
5
|
+
GUMBO_TAG_HEAD,
|
6
|
+
GUMBO_TAG_TITLE,
|
7
|
+
GUMBO_TAG_BASE,
|
8
|
+
GUMBO_TAG_LINK,
|
9
|
+
GUMBO_TAG_META,
|
10
|
+
GUMBO_TAG_STYLE,
|
11
|
+
GUMBO_TAG_SCRIPT,
|
12
|
+
GUMBO_TAG_NOSCRIPT,
|
13
|
+
GUMBO_TAG_TEMPLATE,
|
14
|
+
GUMBO_TAG_BODY,
|
15
|
+
GUMBO_TAG_ARTICLE,
|
16
|
+
GUMBO_TAG_SECTION,
|
17
|
+
GUMBO_TAG_NAV,
|
18
|
+
GUMBO_TAG_ASIDE,
|
19
|
+
GUMBO_TAG_H1,
|
20
|
+
GUMBO_TAG_H2,
|
21
|
+
GUMBO_TAG_H3,
|
22
|
+
GUMBO_TAG_H4,
|
23
|
+
GUMBO_TAG_H5,
|
24
|
+
GUMBO_TAG_H6,
|
25
|
+
GUMBO_TAG_HGROUP,
|
26
|
+
GUMBO_TAG_HEADER,
|
27
|
+
GUMBO_TAG_FOOTER,
|
28
|
+
GUMBO_TAG_ADDRESS,
|
29
|
+
GUMBO_TAG_P,
|
30
|
+
GUMBO_TAG_HR,
|
31
|
+
GUMBO_TAG_PRE,
|
32
|
+
GUMBO_TAG_BLOCKQUOTE,
|
33
|
+
GUMBO_TAG_OL,
|
34
|
+
GUMBO_TAG_UL,
|
35
|
+
GUMBO_TAG_LI,
|
36
|
+
GUMBO_TAG_DL,
|
37
|
+
GUMBO_TAG_DT,
|
38
|
+
GUMBO_TAG_DD,
|
39
|
+
GUMBO_TAG_FIGURE,
|
40
|
+
GUMBO_TAG_FIGCAPTION,
|
41
|
+
GUMBO_TAG_MAIN,
|
42
|
+
GUMBO_TAG_DIV,
|
43
|
+
GUMBO_TAG_A,
|
44
|
+
GUMBO_TAG_EM,
|
45
|
+
GUMBO_TAG_STRONG,
|
46
|
+
GUMBO_TAG_SMALL,
|
47
|
+
GUMBO_TAG_S,
|
48
|
+
GUMBO_TAG_CITE,
|
49
|
+
GUMBO_TAG_Q,
|
50
|
+
GUMBO_TAG_DFN,
|
51
|
+
GUMBO_TAG_ABBR,
|
52
|
+
GUMBO_TAG_DATA,
|
53
|
+
GUMBO_TAG_TIME,
|
54
|
+
GUMBO_TAG_CODE,
|
55
|
+
GUMBO_TAG_VAR,
|
56
|
+
GUMBO_TAG_SAMP,
|
57
|
+
GUMBO_TAG_KBD,
|
58
|
+
GUMBO_TAG_SUB,
|
59
|
+
GUMBO_TAG_SUP,
|
60
|
+
GUMBO_TAG_I,
|
61
|
+
GUMBO_TAG_B,
|
62
|
+
GUMBO_TAG_U,
|
63
|
+
GUMBO_TAG_MARK,
|
64
|
+
GUMBO_TAG_RUBY,
|
65
|
+
GUMBO_TAG_RT,
|
66
|
+
GUMBO_TAG_RP,
|
67
|
+
GUMBO_TAG_BDI,
|
68
|
+
GUMBO_TAG_BDO,
|
69
|
+
GUMBO_TAG_SPAN,
|
70
|
+
GUMBO_TAG_BR,
|
71
|
+
GUMBO_TAG_WBR,
|
72
|
+
GUMBO_TAG_INS,
|
73
|
+
GUMBO_TAG_DEL,
|
74
|
+
GUMBO_TAG_IMAGE,
|
75
|
+
GUMBO_TAG_IMG,
|
76
|
+
GUMBO_TAG_IFRAME,
|
77
|
+
GUMBO_TAG_EMBED,
|
78
|
+
GUMBO_TAG_OBJECT,
|
79
|
+
GUMBO_TAG_PARAM,
|
80
|
+
GUMBO_TAG_VIDEO,
|
81
|
+
GUMBO_TAG_AUDIO,
|
82
|
+
GUMBO_TAG_SOURCE,
|
83
|
+
GUMBO_TAG_TRACK,
|
84
|
+
GUMBO_TAG_CANVAS,
|
85
|
+
GUMBO_TAG_MAP,
|
86
|
+
GUMBO_TAG_AREA,
|
87
|
+
GUMBO_TAG_MATH,
|
88
|
+
GUMBO_TAG_MI,
|
89
|
+
GUMBO_TAG_MO,
|
90
|
+
GUMBO_TAG_MN,
|
91
|
+
GUMBO_TAG_MS,
|
92
|
+
GUMBO_TAG_MTEXT,
|
93
|
+
GUMBO_TAG_MGLYPH,
|
94
|
+
GUMBO_TAG_MALIGNMARK,
|
95
|
+
GUMBO_TAG_ANNOTATION_XML,
|
96
|
+
GUMBO_TAG_SVG,
|
97
|
+
GUMBO_TAG_FOREIGNOBJECT,
|
98
|
+
GUMBO_TAG_DESC,
|
99
|
+
GUMBO_TAG_TABLE,
|
100
|
+
GUMBO_TAG_CAPTION,
|
101
|
+
GUMBO_TAG_COLGROUP,
|
102
|
+
GUMBO_TAG_COL,
|
103
|
+
GUMBO_TAG_TBODY,
|
104
|
+
GUMBO_TAG_THEAD,
|
105
|
+
GUMBO_TAG_TFOOT,
|
106
|
+
GUMBO_TAG_TR,
|
107
|
+
GUMBO_TAG_TD,
|
108
|
+
GUMBO_TAG_TH,
|
109
|
+
GUMBO_TAG_FORM,
|
110
|
+
GUMBO_TAG_FIELDSET,
|
111
|
+
GUMBO_TAG_LEGEND,
|
112
|
+
GUMBO_TAG_LABEL,
|
113
|
+
GUMBO_TAG_INPUT,
|
114
|
+
GUMBO_TAG_BUTTON,
|
115
|
+
GUMBO_TAG_SELECT,
|
116
|
+
GUMBO_TAG_DATALIST,
|
117
|
+
GUMBO_TAG_OPTGROUP,
|
118
|
+
GUMBO_TAG_OPTION,
|
119
|
+
GUMBO_TAG_TEXTAREA,
|
120
|
+
GUMBO_TAG_KEYGEN,
|
121
|
+
GUMBO_TAG_OUTPUT,
|
122
|
+
GUMBO_TAG_PROGRESS,
|
123
|
+
GUMBO_TAG_METER,
|
124
|
+
GUMBO_TAG_DETAILS,
|
125
|
+
GUMBO_TAG_SUMMARY,
|
126
|
+
GUMBO_TAG_MENU,
|
127
|
+
GUMBO_TAG_MENUITEM,
|
128
|
+
GUMBO_TAG_APPLET,
|
129
|
+
GUMBO_TAG_ACRONYM,
|
130
|
+
GUMBO_TAG_BGSOUND,
|
131
|
+
GUMBO_TAG_DIR,
|
132
|
+
GUMBO_TAG_FRAME,
|
133
|
+
GUMBO_TAG_FRAMESET,
|
134
|
+
GUMBO_TAG_NOFRAMES,
|
135
|
+
GUMBO_TAG_ISINDEX,
|
136
|
+
GUMBO_TAG_LISTING,
|
137
|
+
GUMBO_TAG_XMP,
|
138
|
+
GUMBO_TAG_NEXTID,
|
139
|
+
GUMBO_TAG_NOEMBED,
|
140
|
+
GUMBO_TAG_PLAINTEXT,
|
141
|
+
GUMBO_TAG_RB,
|
142
|
+
GUMBO_TAG_STRIKE,
|
143
|
+
GUMBO_TAG_BASEFONT,
|
144
|
+
GUMBO_TAG_BIG,
|
145
|
+
GUMBO_TAG_BLINK,
|
146
|
+
GUMBO_TAG_CENTER,
|
147
|
+
GUMBO_TAG_FONT,
|
148
|
+
GUMBO_TAG_MARQUEE,
|
149
|
+
GUMBO_TAG_MULTICOL,
|
150
|
+
GUMBO_TAG_NOBR,
|
151
|
+
GUMBO_TAG_SPACER,
|
152
|
+
GUMBO_TAG_TT,
|
153
|
+
GUMBO_TAG_RTC,
|
@@ -0,0 +1,105 @@
|
|
1
|
+
static unsigned int tag_hash(
|
2
|
+
register const char *str, register unsigned int len) {
|
3
|
+
static unsigned short asso_values[] = {296, 296, 296, 296, 296, 296, 296, 296,
|
4
|
+
296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296,
|
5
|
+
296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296,
|
6
|
+
296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 6, 4, 3, 1, 1, 0,
|
7
|
+
1, 0, 0, 296, 296, 296, 296, 296, 296, 296, 22, 73, 151, 4, 13, 59, 65, 2,
|
8
|
+
69, 0, 134, 9, 16, 52, 55, 28, 101, 0, 1, 6, 63, 126, 104, 93, 124, 296,
|
9
|
+
296, 296, 296, 296, 296, 296, 22, 73, 151, 4, 13, 59, 65, 2, 69, 0, 134,
|
10
|
+
9, 16, 52, 55, 28, 101, 0, 1, 6, 63, 126, 104, 93, 124, 296, 296, 296,
|
11
|
+
296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296,
|
12
|
+
296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296,
|
13
|
+
296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296,
|
14
|
+
296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296,
|
15
|
+
296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296,
|
16
|
+
296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296,
|
17
|
+
296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296,
|
18
|
+
296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296,
|
19
|
+
296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296};
|
20
|
+
register unsigned int hval = len;
|
21
|
+
|
22
|
+
switch (hval) {
|
23
|
+
default:
|
24
|
+
hval += asso_values[(unsigned char) str[1] + 3];
|
25
|
+
/*FALLTHROUGH*/
|
26
|
+
case 1:
|
27
|
+
hval += asso_values[(unsigned char) str[0]];
|
28
|
+
break;
|
29
|
+
}
|
30
|
+
return hval + asso_values[(unsigned char) str[len - 1]];
|
31
|
+
}
|
32
|
+
|
33
|
+
static const unsigned char kGumboTagMap[] = {GUMBO_TAG_LAST, GUMBO_TAG_LAST,
|
34
|
+
GUMBO_TAG_LAST, GUMBO_TAG_S, GUMBO_TAG_H6, GUMBO_TAG_H5, GUMBO_TAG_H4,
|
35
|
+
GUMBO_TAG_H3, GUMBO_TAG_SPACER, GUMBO_TAG_H2, GUMBO_TAG_HEADER,
|
36
|
+
GUMBO_TAG_H1, GUMBO_TAG_HEAD, GUMBO_TAG_LAST, GUMBO_TAG_DETAILS,
|
37
|
+
GUMBO_TAG_SELECT, GUMBO_TAG_DIR, GUMBO_TAG_LAST, GUMBO_TAG_DEL,
|
38
|
+
GUMBO_TAG_LAST, GUMBO_TAG_SOURCE, GUMBO_TAG_LEGEND, GUMBO_TAG_DATALIST,
|
39
|
+
GUMBO_TAG_METER, GUMBO_TAG_MGLYPH, GUMBO_TAG_LAST, GUMBO_TAG_MATH,
|
40
|
+
GUMBO_TAG_LABEL, GUMBO_TAG_TABLE, GUMBO_TAG_TEMPLATE, GUMBO_TAG_LAST,
|
41
|
+
GUMBO_TAG_RP, GUMBO_TAG_TIME, GUMBO_TAG_TITLE, GUMBO_TAG_DATA,
|
42
|
+
GUMBO_TAG_APPLET, GUMBO_TAG_HGROUP, GUMBO_TAG_SAMP, GUMBO_TAG_TEXTAREA,
|
43
|
+
GUMBO_TAG_ABBR, GUMBO_TAG_MARQUEE, GUMBO_TAG_LAST, GUMBO_TAG_MENUITEM,
|
44
|
+
GUMBO_TAG_SMALL, GUMBO_TAG_META, GUMBO_TAG_A, GUMBO_TAG_LAST,
|
45
|
+
GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_EMBED,
|
46
|
+
GUMBO_TAG_MAP, GUMBO_TAG_LAST, GUMBO_TAG_PARAM, GUMBO_TAG_LAST,
|
47
|
+
GUMBO_TAG_LAST, GUMBO_TAG_NOBR, GUMBO_TAG_P, GUMBO_TAG_SPAN, GUMBO_TAG_EM,
|
48
|
+
GUMBO_TAG_LAST, GUMBO_TAG_NOFRAMES, GUMBO_TAG_SECTION, GUMBO_TAG_NOEMBED,
|
49
|
+
GUMBO_TAG_NEXTID, GUMBO_TAG_FOOTER, GUMBO_TAG_NOSCRIPT, GUMBO_TAG_HR,
|
50
|
+
GUMBO_TAG_LAST, GUMBO_TAG_FONT, GUMBO_TAG_DL, GUMBO_TAG_TR,
|
51
|
+
GUMBO_TAG_SCRIPT, GUMBO_TAG_MO, GUMBO_TAG_LAST, GUMBO_TAG_DD,
|
52
|
+
GUMBO_TAG_MAIN, GUMBO_TAG_TD, GUMBO_TAG_FOREIGNOBJECT, GUMBO_TAG_FORM,
|
53
|
+
GUMBO_TAG_OBJECT, GUMBO_TAG_LAST, GUMBO_TAG_FIELDSET, GUMBO_TAG_LAST,
|
54
|
+
GUMBO_TAG_BGSOUND, GUMBO_TAG_MENU, GUMBO_TAG_TFOOT, GUMBO_TAG_FIGURE,
|
55
|
+
GUMBO_TAG_RB, GUMBO_TAG_LI, GUMBO_TAG_LISTING, GUMBO_TAG_BASEFONT,
|
56
|
+
GUMBO_TAG_OPTGROUP, GUMBO_TAG_LAST, GUMBO_TAG_BASE, GUMBO_TAG_ADDRESS,
|
57
|
+
GUMBO_TAG_MI, GUMBO_TAG_LAST, GUMBO_TAG_PLAINTEXT, GUMBO_TAG_LAST,
|
58
|
+
GUMBO_TAG_PROGRESS, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
|
59
|
+
GUMBO_TAG_ACRONYM, GUMBO_TAG_ARTICLE, GUMBO_TAG_LAST, GUMBO_TAG_PRE,
|
60
|
+
GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_AREA,
|
61
|
+
GUMBO_TAG_RT, GUMBO_TAG_LAST, GUMBO_TAG_OPTION, GUMBO_TAG_IMAGE,
|
62
|
+
GUMBO_TAG_DT, GUMBO_TAG_LAST, GUMBO_TAG_TT, GUMBO_TAG_HTML, GUMBO_TAG_WBR,
|
63
|
+
GUMBO_TAG_OL, GUMBO_TAG_LAST, GUMBO_TAG_STYLE, GUMBO_TAG_STRIKE,
|
64
|
+
GUMBO_TAG_SUP, GUMBO_TAG_MULTICOL, GUMBO_TAG_U, GUMBO_TAG_DFN, GUMBO_TAG_UL,
|
65
|
+
GUMBO_TAG_FIGCAPTION, GUMBO_TAG_MTEXT, GUMBO_TAG_LAST, GUMBO_TAG_VAR,
|
66
|
+
GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_FRAMESET, GUMBO_TAG_LAST,
|
67
|
+
GUMBO_TAG_BR, GUMBO_TAG_I, GUMBO_TAG_FRAME, GUMBO_TAG_LAST, GUMBO_TAG_DIV,
|
68
|
+
GUMBO_TAG_LAST, GUMBO_TAG_TH, GUMBO_TAG_MS, GUMBO_TAG_ANNOTATION_XML,
|
69
|
+
GUMBO_TAG_B, GUMBO_TAG_TBODY, GUMBO_TAG_THEAD, GUMBO_TAG_BIG,
|
70
|
+
GUMBO_TAG_BLOCKQUOTE, GUMBO_TAG_XMP, GUMBO_TAG_LAST, GUMBO_TAG_KBD,
|
71
|
+
GUMBO_TAG_LAST, GUMBO_TAG_LINK, GUMBO_TAG_IFRAME, GUMBO_TAG_MARK,
|
72
|
+
GUMBO_TAG_CENTER, GUMBO_TAG_OUTPUT, GUMBO_TAG_DESC, GUMBO_TAG_CANVAS,
|
73
|
+
GUMBO_TAG_COL, GUMBO_TAG_MALIGNMARK, GUMBO_TAG_IMG, GUMBO_TAG_ASIDE,
|
74
|
+
GUMBO_TAG_LAST, GUMBO_TAG_CODE, GUMBO_TAG_LAST, GUMBO_TAG_SUB, GUMBO_TAG_MN,
|
75
|
+
GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_INS, GUMBO_TAG_AUDIO,
|
76
|
+
GUMBO_TAG_STRONG, GUMBO_TAG_CITE, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
|
77
|
+
GUMBO_TAG_LAST, GUMBO_TAG_INPUT, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
|
78
|
+
GUMBO_TAG_LAST, GUMBO_TAG_NAV, GUMBO_TAG_LAST, GUMBO_TAG_COLGROUP,
|
79
|
+
GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
|
80
|
+
GUMBO_TAG_LAST, GUMBO_TAG_SVG, GUMBO_TAG_KEYGEN, GUMBO_TAG_VIDEO,
|
81
|
+
GUMBO_TAG_BDO, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
|
82
|
+
GUMBO_TAG_LAST, GUMBO_TAG_BODY, GUMBO_TAG_LAST, GUMBO_TAG_Q, GUMBO_TAG_LAST,
|
83
|
+
GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_TRACK,
|
84
|
+
GUMBO_TAG_LAST, GUMBO_TAG_BDI, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
|
85
|
+
GUMBO_TAG_LAST, GUMBO_TAG_CAPTION, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
|
86
|
+
GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
|
87
|
+
GUMBO_TAG_RUBY, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_BUTTON,
|
88
|
+
GUMBO_TAG_SUMMARY, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
|
89
|
+
GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
|
90
|
+
GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
|
91
|
+
GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
|
92
|
+
GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
|
93
|
+
GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
|
94
|
+
GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
|
95
|
+
GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
|
96
|
+
GUMBO_TAG_LAST, GUMBO_TAG_RTC, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
|
97
|
+
GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
|
98
|
+
GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_BLINK, GUMBO_TAG_LAST,
|
99
|
+
GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
|
100
|
+
GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
|
101
|
+
GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
|
102
|
+
GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
|
103
|
+
GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
|
104
|
+
GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
|
105
|
+
GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_ISINDEX};
|
@@ -0,0 +1,4 @@
|
|
1
|
+
// Generated via `gentags.py src/tag.in`.
|
2
|
+
// Do not edit; edit src/tag.in instead.
|
3
|
+
// clang-format off
|
4
|
+
4, 4, 5, 4, 4, 4, 5, 6, 8, 8, 4, 7, 7, 3, 5, 2, 2, 2, 2, 2, 2, 6, 6, 6, 7, 1, 2, 3, 10, 2, 2, 2, 2, 2, 2, 6, 10, 4, 3, 1, 2, 6, 5, 1, 4, 1, 3, 4, 4, 4, 4, 3, 4, 3, 3, 3, 1, 1, 1, 4, 4, 2, 2, 3, 3, 4, 2, 3, 3, 3, 5, 3, 6, 5, 6, 5, 5, 5, 6, 5, 6, 3, 4, 4, 2, 2, 2, 2, 5, 6, 10, 14, 3, 13, 4, 5, 7, 8, 3, 5, 5, 5, 2, 2, 2, 4, 8, 6, 5, 5, 6, 6, 8, 8, 6, 8, 6, 6, 8, 5, 7, 7, 4, 8, 6, 7, 7, 3, 5, 8, 8, 7, 7, 3, 6, 7, 9, 2, 6, 8, 3, 5, 6, 4, 7, 8, 4, 6, 2, 3,
|
@@ -0,0 +1,153 @@
|
|
1
|
+
// Generated via `gentags.py src/tag.in`.
|
2
|
+
// Do not edit; edit src/tag.in instead.
|
3
|
+
// clang-format off
|
4
|
+
"html",
|
5
|
+
"head",
|
6
|
+
"title",
|
7
|
+
"base",
|
8
|
+
"link",
|
9
|
+
"meta",
|
10
|
+
"style",
|
11
|
+
"script",
|
12
|
+
"noscript",
|
13
|
+
"template",
|
14
|
+
"body",
|
15
|
+
"article",
|
16
|
+
"section",
|
17
|
+
"nav",
|
18
|
+
"aside",
|
19
|
+
"h1",
|
20
|
+
"h2",
|
21
|
+
"h3",
|
22
|
+
"h4",
|
23
|
+
"h5",
|
24
|
+
"h6",
|
25
|
+
"hgroup",
|
26
|
+
"header",
|
27
|
+
"footer",
|
28
|
+
"address",
|
29
|
+
"p",
|
30
|
+
"hr",
|
31
|
+
"pre",
|
32
|
+
"blockquote",
|
33
|
+
"ol",
|
34
|
+
"ul",
|
35
|
+
"li",
|
36
|
+
"dl",
|
37
|
+
"dt",
|
38
|
+
"dd",
|
39
|
+
"figure",
|
40
|
+
"figcaption",
|
41
|
+
"main",
|
42
|
+
"div",
|
43
|
+
"a",
|
44
|
+
"em",
|
45
|
+
"strong",
|
46
|
+
"small",
|
47
|
+
"s",
|
48
|
+
"cite",
|
49
|
+
"q",
|
50
|
+
"dfn",
|
51
|
+
"abbr",
|
52
|
+
"data",
|
53
|
+
"time",
|
54
|
+
"code",
|
55
|
+
"var",
|
56
|
+
"samp",
|
57
|
+
"kbd",
|
58
|
+
"sub",
|
59
|
+
"sup",
|
60
|
+
"i",
|
61
|
+
"b",
|
62
|
+
"u",
|
63
|
+
"mark",
|
64
|
+
"ruby",
|
65
|
+
"rt",
|
66
|
+
"rp",
|
67
|
+
"bdi",
|
68
|
+
"bdo",
|
69
|
+
"span",
|
70
|
+
"br",
|
71
|
+
"wbr",
|
72
|
+
"ins",
|
73
|
+
"del",
|
74
|
+
"image",
|
75
|
+
"img",
|
76
|
+
"iframe",
|
77
|
+
"embed",
|
78
|
+
"object",
|
79
|
+
"param",
|
80
|
+
"video",
|
81
|
+
"audio",
|
82
|
+
"source",
|
83
|
+
"track",
|
84
|
+
"canvas",
|
85
|
+
"map",
|
86
|
+
"area",
|
87
|
+
"math",
|
88
|
+
"mi",
|
89
|
+
"mo",
|
90
|
+
"mn",
|
91
|
+
"ms",
|
92
|
+
"mtext",
|
93
|
+
"mglyph",
|
94
|
+
"malignmark",
|
95
|
+
"annotation-xml",
|
96
|
+
"svg",
|
97
|
+
"foreignobject",
|
98
|
+
"desc",
|
99
|
+
"table",
|
100
|
+
"caption",
|
101
|
+
"colgroup",
|
102
|
+
"col",
|
103
|
+
"tbody",
|
104
|
+
"thead",
|
105
|
+
"tfoot",
|
106
|
+
"tr",
|
107
|
+
"td",
|
108
|
+
"th",
|
109
|
+
"form",
|
110
|
+
"fieldset",
|
111
|
+
"legend",
|
112
|
+
"label",
|
113
|
+
"input",
|
114
|
+
"button",
|
115
|
+
"select",
|
116
|
+
"datalist",
|
117
|
+
"optgroup",
|
118
|
+
"option",
|
119
|
+
"textarea",
|
120
|
+
"keygen",
|
121
|
+
"output",
|
122
|
+
"progress",
|
123
|
+
"meter",
|
124
|
+
"details",
|
125
|
+
"summary",
|
126
|
+
"menu",
|
127
|
+
"menuitem",
|
128
|
+
"applet",
|
129
|
+
"acronym",
|
130
|
+
"bgsound",
|
131
|
+
"dir",
|
132
|
+
"frame",
|
133
|
+
"frameset",
|
134
|
+
"noframes",
|
135
|
+
"isindex",
|
136
|
+
"listing",
|
137
|
+
"xmp",
|
138
|
+
"nextid",
|
139
|
+
"noembed",
|
140
|
+
"plaintext",
|
141
|
+
"rb",
|
142
|
+
"strike",
|
143
|
+
"basefont",
|
144
|
+
"big",
|
145
|
+
"blink",
|
146
|
+
"center",
|
147
|
+
"font",
|
148
|
+
"marquee",
|
149
|
+
"multicol",
|
150
|
+
"nobr",
|
151
|
+
"spacer",
|
152
|
+
"tt",
|
153
|
+
"rtc",
|
@@ -42,7 +42,6 @@
|
|
42
42
|
// prevents parse error position from being messed up by possible mark/resets in
|
43
43
|
// temporary buffer manipulation.
|
44
44
|
|
45
|
-
|
46
45
|
#include "tokenizer.h"
|
47
46
|
|
48
47
|
#include <assert.h>
|
@@ -64,13 +63,13 @@
|
|
64
63
|
|
65
64
|
// Compared against _script_data_buffer to determine if we're in double-escaped
|
66
65
|
// script mode.
|
67
|
-
const GumboStringPiece kScriptTag = {
|
66
|
+
const GumboStringPiece kScriptTag = {"script", 6};
|
68
67
|
|
69
68
|
// An enum for the return value of each individual state.
|
70
69
|
typedef enum {
|
71
|
-
RETURN_ERROR,
|
72
|
-
RETURN_SUCCESS,
|
73
|
-
NEXT_CHAR
|
70
|
+
RETURN_ERROR, // Return false (error) from the tokenizer.
|
71
|
+
RETURN_SUCCESS, // Return true (success) from the tokenizer.
|
72
|
+
NEXT_CHAR // Proceed to the next character and continue lexing.
|
74
73
|
} StateResult;
|
75
74
|
|
76
75
|
// This is a struct containing state necessary to build up a tag token,
|
@@ -200,7 +199,8 @@ typedef struct GumboInternalTokenizerState {
|
|
200
199
|
} GumboTokenizerState;
|
201
200
|
|
202
201
|
// Adds an ERR_UNEXPECTED_CODE_POINT parse error to the parser's error struct.
|
203
|
-
static void tokenizer_add_parse_error(
|
202
|
+
static void tokenizer_add_parse_error(
|
203
|
+
GumboParser* parser, GumboErrorType type) {
|
204
204
|
GumboError* error = gumbo_add_error(parser);
|
205
205
|
if (!error) {
|
206
206
|
return;
|
@@ -356,12 +356,10 @@ static void clear_temporary_buffer(GumboParser* parser) {
|
|
356
356
|
GumboTokenizerState* tokenizer = parser->_tokenizer_state;
|
357
357
|
assert(!tokenizer->_temporary_buffer_emit);
|
358
358
|
utf8iterator_mark(&tokenizer->_input);
|
359
|
-
|
360
|
-
gumbo_string_buffer_init(parser, &tokenizer->_temporary_buffer);
|
359
|
+
gumbo_string_buffer_clear(parser, &tokenizer->_temporary_buffer);
|
361
360
|
// The temporary buffer and script data buffer are the same object in the
|
362
361
|
// spec, so the script data buffer should be cleared as well.
|
363
|
-
|
364
|
-
gumbo_string_buffer_init(parser, &tokenizer->_script_data_buffer);
|
362
|
+
gumbo_string_buffer_clear(parser, &tokenizer->_script_data_buffer);
|
365
363
|
}
|
366
364
|
|
367
365
|
// Appends a codepoint to the temporary buffer.
|
@@ -374,15 +372,14 @@ static void append_char_to_temporary_buffer(
|
|
374
372
|
// Checks to see if the temporary buffer equals a certain string.
|
375
373
|
// Make sure this remains side-effect free; it's used in assertions.
|
376
374
|
#ifndef NDEBUG
|
377
|
-
static bool temporary_buffer_equals(
|
378
|
-
GumboParser* parser, const char* text) {
|
375
|
+
static bool temporary_buffer_equals(GumboParser* parser, const char* text) {
|
379
376
|
GumboStringBuffer* buffer = &parser->_tokenizer_state->_temporary_buffer;
|
380
377
|
// TODO(jdtang): See if the extra strlen is a performance problem, and replace
|
381
378
|
// it with an explicit sizeof(literal) if necessary. I don't think it will
|
382
379
|
// be, as this is only used in a couple of rare states.
|
383
380
|
int text_len = strlen(text);
|
384
381
|
return text_len == buffer->length &&
|
385
|
-
|
382
|
+
memcmp(buffer->data, text, text_len) == 0;
|
386
383
|
}
|
387
384
|
#endif
|
388
385
|
|
@@ -539,8 +536,8 @@ static StateResult emit_current_tag(GumboParser* parser, GumboToken* output) {
|
|
539
536
|
output->v.start_tag.is_self_closing = tag_state->_is_self_closing;
|
540
537
|
tag_state->_last_start_tag = tag_state->_tag;
|
541
538
|
mark_tag_state_as_empty(tag_state);
|
542
|
-
gumbo_debug(
|
543
|
-
|
539
|
+
gumbo_debug(
|
540
|
+
"Emitted start tag %s.\n", gumbo_normalized_tagname(tag_state->_tag));
|
544
541
|
} else {
|
545
542
|
output->type = GUMBO_TOKEN_END_TAG;
|
546
543
|
output->v.end_tag = tag_state->_tag;
|
@@ -548,17 +545,18 @@ static StateResult emit_current_tag(GumboParser* parser, GumboToken* output) {
|
|
548
545
|
// token, but it's still initialized as normal, so it must be manually
|
549
546
|
// deallocated. There may also be attributes to destroy, in certain broken
|
550
547
|
// cases like </div</th> (the "th" is an attribute there).
|
551
|
-
for (int i = 0; i < tag_state->_attributes.length; ++i) {
|
548
|
+
for (unsigned int i = 0; i < tag_state->_attributes.length; ++i) {
|
552
549
|
gumbo_destroy_attribute(parser, tag_state->_attributes.data[i]);
|
553
550
|
}
|
554
551
|
gumbo_parser_deallocate(parser, tag_state->_attributes.data);
|
555
552
|
mark_tag_state_as_empty(tag_state);
|
556
|
-
gumbo_debug(
|
557
|
-
|
553
|
+
gumbo_debug(
|
554
|
+
"Emitted end tag %s.\n", gumbo_normalized_tagname(tag_state->_tag));
|
558
555
|
}
|
559
556
|
gumbo_string_buffer_destroy(parser, &tag_state->_buffer);
|
560
557
|
finish_token(parser, output);
|
561
|
-
gumbo_debug("Original text = %.*s.\n", output->original_text.length,
|
558
|
+
gumbo_debug("Original text = %.*s.\n", output->original_text.length,
|
559
|
+
output->original_text.data);
|
562
560
|
assert(output->original_text.length >= 2);
|
563
561
|
assert(output->original_text.data[0] == '<');
|
564
562
|
assert(output->original_text.data[output->original_text.length - 1] == '>');
|
@@ -571,7 +569,7 @@ static StateResult emit_current_tag(GumboParser* parser, GumboToken* output) {
|
|
571
569
|
// avoid a memory leak.
|
572
570
|
static void abandon_current_tag(GumboParser* parser) {
|
573
571
|
GumboTagState* tag_state = &parser->_tokenizer_state->_tag_state;
|
574
|
-
for (int i = 0; i < tag_state->_attributes.length; ++i) {
|
572
|
+
for (unsigned int i = 0; i < tag_state->_attributes.length; ++i) {
|
575
573
|
gumbo_destroy_attribute(parser, tag_state->_attributes.data[i]);
|
576
574
|
}
|
577
575
|
gumbo_parser_deallocate(parser, tag_state->_attributes.data);
|
@@ -583,9 +581,8 @@ static void abandon_current_tag(GumboParser* parser) {
|
|
583
581
|
// Wraps the consume_char_ref function to handle its output and make the
|
584
582
|
// appropriate TokenizerState modifications. Returns RETURN_ERROR if a parse
|
585
583
|
// error occurred, RETURN_SUCCESS otherwise.
|
586
|
-
static StateResult emit_char_ref(
|
587
|
-
|
588
|
-
bool is_in_attribute, GumboToken* output) {
|
584
|
+
static StateResult emit_char_ref(GumboParser* parser,
|
585
|
+
int additional_allowed_char, bool is_in_attribute, GumboToken* output) {
|
589
586
|
GumboTokenizerState* tokenizer = parser->_tokenizer_state;
|
590
587
|
OneOrTwoCodepoints char_ref;
|
591
588
|
bool status = consume_char_ref(
|
@@ -649,8 +646,7 @@ static bool maybe_emit_from_temporary_buffer(
|
|
649
646
|
// _temporary_buffer_emit, and then (if the temporary buffer is non-empty) emits
|
650
647
|
// the first character in it. It returns true if a character was emitted, false
|
651
648
|
// otherwise.
|
652
|
-
static bool emit_temporary_buffer(
|
653
|
-
GumboParser* parser, GumboToken* output) {
|
649
|
+
static bool emit_temporary_buffer(GumboParser* parser, GumboToken* output) {
|
654
650
|
GumboTokenizerState* tokenizer = parser->_tokenizer_state;
|
655
651
|
assert(tokenizer->_temporary_buffer.data);
|
656
652
|
utf8iterator_reset(&tokenizer->_input);
|
@@ -663,8 +659,8 @@ static bool emit_temporary_buffer(
|
|
663
659
|
// start point; the only time you would *not* want to pass true for this
|
664
660
|
// parameter is if you want the original_text to include character (like an
|
665
661
|
// opening quote) that doesn't appear in the value.
|
666
|
-
static void append_char_to_tag_buffer(
|
667
|
-
|
662
|
+
static void append_char_to_tag_buffer(
|
663
|
+
GumboParser* parser, int codepoint, bool reinitilize_position_on_first) {
|
668
664
|
GumboStringBuffer* buffer = &parser->_tokenizer_state->_tag_state._buffer;
|
669
665
|
if (buffer->length == 0 && reinitilize_position_on_first) {
|
670
666
|
reset_tag_buffer_start_point(parser);
|
@@ -697,7 +693,11 @@ static void start_new_tag(GumboParser* parser, bool is_start_tag) {
|
|
697
693
|
gumbo_string_buffer_append_codepoint(parser, c, &tag_state->_buffer);
|
698
694
|
|
699
695
|
assert(tag_state->_attributes.data == NULL);
|
700
|
-
|
696
|
+
// Initial size chosen by statistical analysis of a corpus of 60k webpages.
|
697
|
+
// 99.5% of elements have 0 attributes, 93% of the remainder have 1. These
|
698
|
+
// numbers are a bit higher for more modern websites (eg. ~45% = 0, ~40% = 1
|
699
|
+
// for the HTML5 Spec), but still have basically 99% of nodes with <= 2 attrs.
|
700
|
+
gumbo_vector_init(parser, 1, &tag_state->_attributes);
|
701
701
|
tag_state->_drop_next_attr_value = false;
|
702
702
|
tag_state->_is_start_tag = is_start_tag;
|
703
703
|
tag_state->_is_self_closing = false;
|
@@ -717,16 +717,15 @@ static void copy_over_tag_buffer(GumboParser* parser, const char** output) {
|
|
717
717
|
// * The start_pos GumboSourcePosition with the start position of the tag
|
718
718
|
// buffer.
|
719
719
|
// * The end_pos GumboSourcePosition with the current source position.
|
720
|
-
static void copy_over_original_tag_text(
|
721
|
-
|
722
|
-
GumboSourcePosition*
|
720
|
+
static void copy_over_original_tag_text(GumboParser* parser,
|
721
|
+
GumboStringPiece* original_text, GumboSourcePosition* start_pos,
|
722
|
+
GumboSourcePosition* end_pos) {
|
723
723
|
GumboTokenizerState* tokenizer = parser->_tokenizer_state;
|
724
724
|
GumboTagState* tag_state = &tokenizer->_tag_state;
|
725
725
|
|
726
726
|
original_text->data = tag_state->_original_text;
|
727
|
-
original_text->length =
|
728
|
-
|
729
|
-
tag_state->_original_text;
|
727
|
+
original_text->length = utf8iterator_get_char_pointer(&tokenizer->_input) -
|
728
|
+
tag_state->_original_text;
|
730
729
|
if (original_text->data[original_text->length - 1] == '\r') {
|
731
730
|
// Since \r is skipped by the UTF-8 iterator, it can sometimes end up
|
732
731
|
// appended to the end of original text even when it's really the first part
|
@@ -751,16 +750,14 @@ static void finish_tag_name(GumboParser* parser) {
|
|
751
750
|
GumboTokenizerState* tokenizer = parser->_tokenizer_state;
|
752
751
|
GumboTagState* tag_state = &tokenizer->_tag_state;
|
753
752
|
|
754
|
-
|
755
|
-
|
756
|
-
tag_state->_tag = gumbo_tag_enum(temp);
|
753
|
+
tag_state->_tag =
|
754
|
+
gumbo_tagn_enum(tag_state->_buffer.data, tag_state->_buffer.length);
|
757
755
|
reinitialize_tag_buffer(parser);
|
758
|
-
gumbo_parser_deallocate(parser, (void*) temp);
|
759
756
|
}
|
760
757
|
|
761
758
|
// Adds an ERR_DUPLICATE_ATTR parse error to the parser's error struct.
|
762
759
|
static void add_duplicate_attr_error(GumboParser* parser, const char* attr_name,
|
763
|
-
|
760
|
+
int original_index, int new_index) {
|
764
761
|
GumboError* error = gumbo_add_error(parser);
|
765
762
|
if (!error) {
|
766
763
|
return;
|
@@ -790,14 +787,13 @@ static bool finish_attribute_name(GumboParser* parser) {
|
|
790
787
|
assert(tag_state->_attributes.capacity);
|
791
788
|
|
792
789
|
GumboVector* /* GumboAttribute* */ attributes = &tag_state->_attributes;
|
793
|
-
for (int i = 0; i < attributes->length; ++i) {
|
790
|
+
for (unsigned int i = 0; i < attributes->length; ++i) {
|
794
791
|
GumboAttribute* attr = attributes->data[i];
|
795
792
|
if (strlen(attr->name) == tag_state->_buffer.length &&
|
796
793
|
memcmp(attr->name, tag_state->_buffer.data,
|
797
|
-
|
794
|
+
tag_state->_buffer.length) == 0) {
|
798
795
|
// Identical attribute; bail.
|
799
|
-
add_duplicate_attr_error(
|
800
|
-
parser, attr->name, i, attributes->length);
|
796
|
+
add_duplicate_attr_error(parser, attr->name, i, attributes->length);
|
801
797
|
tag_state->_drop_next_attr_value = true;
|
802
798
|
return false;
|
803
799
|
}
|
@@ -806,11 +802,11 @@ static bool finish_attribute_name(GumboParser* parser) {
|
|
806
802
|
GumboAttribute* attr = gumbo_parser_allocate(parser, sizeof(GumboAttribute));
|
807
803
|
attr->attr_namespace = GUMBO_ATTR_NAMESPACE_NONE;
|
808
804
|
copy_over_tag_buffer(parser, &attr->name);
|
809
|
-
copy_over_original_tag_text(
|
810
|
-
|
805
|
+
copy_over_original_tag_text(
|
806
|
+
parser, &attr->original_name, &attr->name_start, &attr->name_end);
|
811
807
|
attr->value = gumbo_copy_stringz(parser, "");
|
812
|
-
copy_over_original_tag_text(
|
813
|
-
|
808
|
+
copy_over_original_tag_text(
|
809
|
+
parser, &attr->original_value, &attr->name_start, &attr->name_end);
|
814
810
|
gumbo_vector_add(parser, attr, attributes);
|
815
811
|
reinitialize_tag_buffer(parser);
|
816
812
|
return true;
|
@@ -832,8 +828,8 @@ static void finish_attribute_value(GumboParser* parser) {
|
|
832
828
|
tag_state->_attributes.data[tag_state->_attributes.length - 1];
|
833
829
|
gumbo_parser_deallocate(parser, (void*) attr->value);
|
834
830
|
copy_over_tag_buffer(parser, &attr->value);
|
835
|
-
copy_over_original_tag_text(
|
836
|
-
|
831
|
+
copy_over_original_tag_text(
|
832
|
+
parser, &attr->original_value, &attr->value_start, &attr->value_end);
|
837
833
|
reinitialize_tag_buffer(parser);
|
838
834
|
}
|
839
835
|
|
@@ -841,13 +837,9 @@ static void finish_attribute_value(GumboParser* parser) {
|
|
841
837
|
static bool is_appropriate_end_tag(GumboParser* parser) {
|
842
838
|
GumboTagState* tag_state = &parser->_tokenizer_state->_tag_state;
|
843
839
|
assert(!tag_state->_is_start_tag);
|
844
|
-
// Null terminate the current string buffer, so it can be passed to
|
845
|
-
// gumbo_tag_enum, but don't increment the length in case we need to dump the
|
846
|
-
// buffer as character tokens.
|
847
|
-
gumbo_string_buffer_append_codepoint(parser, '\0', &tag_state->_buffer);
|
848
|
-
--tag_state->_buffer.length;
|
849
840
|
return tag_state->_last_start_tag != GUMBO_TAG_LAST &&
|
850
|
-
|
841
|
+
tag_state->_last_start_tag == gumbo_tagn_enum(tag_state->_buffer.data,
|
842
|
+
tag_state->_buffer.length);
|
851
843
|
}
|
852
844
|
|
853
845
|
void gumbo_tokenizer_state_init(
|
@@ -892,15 +884,14 @@ void gumbo_tokenizer_set_is_current_node_foreign(
|
|
892
884
|
GumboParser* parser, bool is_foreign) {
|
893
885
|
if (is_foreign != parser->_tokenizer_state->_is_current_node_foreign) {
|
894
886
|
gumbo_debug("Toggling is_current_node_foreign to %s.\n",
|
895
|
-
|
887
|
+
is_foreign ? "true" : "false");
|
896
888
|
}
|
897
889
|
parser->_tokenizer_state->_is_current_node_foreign = is_foreign;
|
898
890
|
}
|
899
891
|
|
900
892
|
// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#data-state
|
901
|
-
static StateResult handle_data_state(
|
902
|
-
|
903
|
-
int c, GumboToken* output) {
|
893
|
+
static StateResult handle_data_state(GumboParser* parser,
|
894
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
904
895
|
switch (c) {
|
905
896
|
case '&':
|
906
897
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_CHAR_REF_IN_DATA);
|
@@ -924,17 +915,15 @@ static StateResult handle_data_state(
|
|
924
915
|
}
|
925
916
|
|
926
917
|
// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#character-reference-in-data-state
|
927
|
-
static StateResult handle_char_ref_in_data_state(
|
928
|
-
|
929
|
-
int c, GumboToken* output) {
|
918
|
+
static StateResult handle_char_ref_in_data_state(GumboParser* parser,
|
919
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
930
920
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
931
921
|
return emit_char_ref(parser, ' ', false, output);
|
932
922
|
}
|
933
923
|
|
934
924
|
// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#rcdata-state
|
935
|
-
static StateResult handle_rcdata_state(
|
936
|
-
|
937
|
-
int c, GumboToken* output) {
|
925
|
+
static StateResult handle_rcdata_state(GumboParser* parser,
|
926
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
938
927
|
switch (c) {
|
939
928
|
case '&':
|
940
929
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_CHAR_REF_IN_RCDATA);
|
@@ -955,17 +944,15 @@ static StateResult handle_rcdata_state(
|
|
955
944
|
}
|
956
945
|
|
957
946
|
// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#character-reference-in-rcdata-state
|
958
|
-
static StateResult handle_char_ref_in_rcdata_state(
|
959
|
-
|
960
|
-
int c, GumboToken* output) {
|
947
|
+
static StateResult handle_char_ref_in_rcdata_state(GumboParser* parser,
|
948
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
961
949
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_RCDATA);
|
962
950
|
return emit_char_ref(parser, ' ', false, output);
|
963
951
|
}
|
964
952
|
|
965
953
|
// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#rawtext-state
|
966
|
-
static StateResult handle_rawtext_state(
|
967
|
-
|
968
|
-
int c, GumboToken* output) {
|
954
|
+
static StateResult handle_rawtext_state(GumboParser* parser,
|
955
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
969
956
|
switch (c) {
|
970
957
|
case '<':
|
971
958
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_RAWTEXT_LT);
|
@@ -982,9 +969,8 @@ static StateResult handle_rawtext_state(
|
|
982
969
|
}
|
983
970
|
|
984
971
|
// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-state
|
985
|
-
static StateResult handle_script_state(
|
986
|
-
|
987
|
-
int c, GumboToken* output) {
|
972
|
+
static StateResult handle_script_state(GumboParser* parser,
|
973
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
988
974
|
switch (c) {
|
989
975
|
case '<':
|
990
976
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_LT);
|
@@ -1001,9 +987,8 @@ static StateResult handle_script_state(
|
|
1001
987
|
}
|
1002
988
|
|
1003
989
|
// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#plaintext-state
|
1004
|
-
static StateResult handle_plaintext_state(
|
1005
|
-
|
1006
|
-
int c, GumboToken* output) {
|
990
|
+
static StateResult handle_plaintext_state(GumboParser* parser,
|
991
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
1007
992
|
switch (c) {
|
1008
993
|
case '\0':
|
1009
994
|
return emit_replacement_char(parser, output);
|
@@ -1015,9 +1000,8 @@ static StateResult handle_plaintext_state(
|
|
1015
1000
|
}
|
1016
1001
|
|
1017
1002
|
// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#tag-open-state
|
1018
|
-
static StateResult handle_tag_open_state(
|
1019
|
-
|
1020
|
-
int c, GumboToken* output) {
|
1003
|
+
static StateResult handle_tag_open_state(GumboParser* parser,
|
1004
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
1021
1005
|
assert(temporary_buffer_equals(parser, "<"));
|
1022
1006
|
switch (c) {
|
1023
1007
|
case '!':
|
@@ -1049,9 +1033,8 @@ static StateResult handle_tag_open_state(
|
|
1049
1033
|
}
|
1050
1034
|
|
1051
1035
|
// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#end-tag-open-state
|
1052
|
-
static StateResult handle_end_tag_open_state(
|
1053
|
-
|
1054
|
-
int c, GumboToken* output) {
|
1036
|
+
static StateResult handle_end_tag_open_state(GumboParser* parser,
|
1037
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
1055
1038
|
assert(temporary_buffer_equals(parser, "</"));
|
1056
1039
|
switch (c) {
|
1057
1040
|
case '>':
|
@@ -1077,9 +1060,8 @@ static StateResult handle_end_tag_open_state(
|
|
1077
1060
|
}
|
1078
1061
|
|
1079
1062
|
// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#tag-name-state
|
1080
|
-
static StateResult handle_tag_name_state(
|
1081
|
-
|
1082
|
-
int c, GumboToken* output) {
|
1063
|
+
static StateResult handle_tag_name_state(GumboParser* parser,
|
1064
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
1083
1065
|
switch (c) {
|
1084
1066
|
case '\t':
|
1085
1067
|
case '\n':
|
@@ -1112,9 +1094,8 @@ static StateResult handle_tag_name_state(
|
|
1112
1094
|
}
|
1113
1095
|
|
1114
1096
|
// http://www.whatwg.org/specs/web-apps/current-work/complete.html#rcdata-less-than-sign-state
|
1115
|
-
static StateResult handle_rcdata_lt_state(
|
1116
|
-
|
1117
|
-
int c, GumboToken* output) {
|
1097
|
+
static StateResult handle_rcdata_lt_state(GumboParser* parser,
|
1098
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
1118
1099
|
assert(temporary_buffer_equals(parser, "<"));
|
1119
1100
|
if (c == '/') {
|
1120
1101
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_RCDATA_END_TAG_OPEN);
|
@@ -1128,9 +1109,8 @@ static StateResult handle_rcdata_lt_state(
|
|
1128
1109
|
}
|
1129
1110
|
|
1130
1111
|
// http://www.whatwg.org/specs/web-apps/current-work/complete.html#rcdata-end-tag-open-state
|
1131
|
-
static StateResult handle_rcdata_end_tag_open_state(
|
1132
|
-
|
1133
|
-
int c, GumboToken* output) {
|
1112
|
+
static StateResult handle_rcdata_end_tag_open_state(GumboParser* parser,
|
1113
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
1134
1114
|
assert(temporary_buffer_equals(parser, "</"));
|
1135
1115
|
if (is_alpha(c)) {
|
1136
1116
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_RCDATA_END_TAG_NAME);
|
@@ -1145,9 +1125,8 @@ static StateResult handle_rcdata_end_tag_open_state(
|
|
1145
1125
|
}
|
1146
1126
|
|
1147
1127
|
// http://www.whatwg.org/specs/web-apps/current-work/complete.html#rcdata-end-tag-name-state
|
1148
|
-
static StateResult handle_rcdata_end_tag_name_state(
|
1149
|
-
|
1150
|
-
int c, GumboToken* output) {
|
1128
|
+
static StateResult handle_rcdata_end_tag_name_state(GumboParser* parser,
|
1129
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
1151
1130
|
assert(tokenizer->_temporary_buffer.length >= 2);
|
1152
1131
|
if (is_alpha(c)) {
|
1153
1132
|
append_char_to_tag_buffer(parser, ensure_lowercase(c), true);
|
@@ -1178,9 +1157,8 @@ static StateResult handle_rcdata_end_tag_name_state(
|
|
1178
1157
|
}
|
1179
1158
|
|
1180
1159
|
// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#rawtext-less-than-sign-state
|
1181
|
-
static StateResult handle_rawtext_lt_state(
|
1182
|
-
|
1183
|
-
int c, GumboToken* output) {
|
1160
|
+
static StateResult handle_rawtext_lt_state(GumboParser* parser,
|
1161
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
1184
1162
|
assert(temporary_buffer_equals(parser, "<"));
|
1185
1163
|
if (c == '/') {
|
1186
1164
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_RAWTEXT_END_TAG_OPEN);
|
@@ -1194,9 +1172,8 @@ static StateResult handle_rawtext_lt_state(
|
|
1194
1172
|
}
|
1195
1173
|
|
1196
1174
|
// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#rawtext-end-tag-open-state
|
1197
|
-
static StateResult handle_rawtext_end_tag_open_state(
|
1198
|
-
|
1199
|
-
int c, GumboToken* output) {
|
1175
|
+
static StateResult handle_rawtext_end_tag_open_state(GumboParser* parser,
|
1176
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
1200
1177
|
assert(temporary_buffer_equals(parser, "</"));
|
1201
1178
|
if (is_alpha(c)) {
|
1202
1179
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_RAWTEXT_END_TAG_NAME);
|
@@ -1210,12 +1187,11 @@ static StateResult handle_rawtext_end_tag_open_state(
|
|
1210
1187
|
}
|
1211
1188
|
|
1212
1189
|
// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#rawtext-end-tag-name-state
|
1213
|
-
static StateResult handle_rawtext_end_tag_name_state(
|
1214
|
-
|
1215
|
-
int c, GumboToken* output) {
|
1190
|
+
static StateResult handle_rawtext_end_tag_name_state(GumboParser* parser,
|
1191
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
1216
1192
|
assert(tokenizer->_temporary_buffer.length >= 2);
|
1217
1193
|
gumbo_debug("Last end tag: %*s\n", (int) tokenizer->_tag_state._buffer.length,
|
1218
|
-
|
1194
|
+
tokenizer->_tag_state._buffer.data);
|
1219
1195
|
if (is_alpha(c)) {
|
1220
1196
|
append_char_to_tag_buffer(parser, ensure_lowercase(c), true);
|
1221
1197
|
append_char_to_temporary_buffer(parser, c);
|
@@ -1246,9 +1222,8 @@ static StateResult handle_rawtext_end_tag_name_state(
|
|
1246
1222
|
}
|
1247
1223
|
|
1248
1224
|
// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-less-than-sign-state
|
1249
|
-
static StateResult handle_script_lt_state(
|
1250
|
-
|
1251
|
-
int c, GumboToken* output) {
|
1225
|
+
static StateResult handle_script_lt_state(GumboParser* parser,
|
1226
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
1252
1227
|
assert(temporary_buffer_equals(parser, "<"));
|
1253
1228
|
if (c == '/') {
|
1254
1229
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_END_TAG_OPEN);
|
@@ -1266,9 +1241,8 @@ static StateResult handle_script_lt_state(
|
|
1266
1241
|
}
|
1267
1242
|
|
1268
1243
|
// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-end-tag-open-state
|
1269
|
-
static StateResult handle_script_end_tag_open_state(
|
1270
|
-
|
1271
|
-
int c, GumboToken* output) {
|
1244
|
+
static StateResult handle_script_end_tag_open_state(GumboParser* parser,
|
1245
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
1272
1246
|
assert(temporary_buffer_equals(parser, "</"));
|
1273
1247
|
if (is_alpha(c)) {
|
1274
1248
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_END_TAG_NAME);
|
@@ -1282,9 +1256,8 @@ static StateResult handle_script_end_tag_open_state(
|
|
1282
1256
|
}
|
1283
1257
|
|
1284
1258
|
// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-end-tag-name-state
|
1285
|
-
static StateResult handle_script_end_tag_name_state(
|
1286
|
-
|
1287
|
-
int c, GumboToken* output) {
|
1259
|
+
static StateResult handle_script_end_tag_name_state(GumboParser* parser,
|
1260
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
1288
1261
|
assert(tokenizer->_temporary_buffer.length >= 2);
|
1289
1262
|
if (is_alpha(c)) {
|
1290
1263
|
append_char_to_tag_buffer(parser, ensure_lowercase(c), true);
|
@@ -1315,9 +1288,8 @@ static StateResult handle_script_end_tag_name_state(
|
|
1315
1288
|
}
|
1316
1289
|
|
1317
1290
|
// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-escape-start-state
|
1318
|
-
static StateResult handle_script_escaped_start_state(
|
1319
|
-
|
1320
|
-
int c, GumboToken* output) {
|
1291
|
+
static StateResult handle_script_escaped_start_state(GumboParser* parser,
|
1292
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
1321
1293
|
if (c == '-') {
|
1322
1294
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_ESCAPED_START_DASH);
|
1323
1295
|
return emit_current_char(parser, output);
|
@@ -1329,9 +1301,8 @@ static StateResult handle_script_escaped_start_state(
|
|
1329
1301
|
}
|
1330
1302
|
|
1331
1303
|
// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-escape-start-dash-state
|
1332
|
-
static StateResult handle_script_escaped_start_dash_state(
|
1333
|
-
|
1334
|
-
int c, GumboToken* output) {
|
1304
|
+
static StateResult handle_script_escaped_start_dash_state(GumboParser* parser,
|
1305
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
1335
1306
|
if (c == '-') {
|
1336
1307
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_ESCAPED_DASH_DASH);
|
1337
1308
|
return emit_current_char(parser, output);
|
@@ -1343,9 +1314,8 @@ static StateResult handle_script_escaped_start_dash_state(
|
|
1343
1314
|
}
|
1344
1315
|
|
1345
1316
|
// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-escaped-state
|
1346
|
-
static StateResult handle_script_escaped_state(
|
1347
|
-
|
1348
|
-
int c, GumboToken* output) {
|
1317
|
+
static StateResult handle_script_escaped_state(GumboParser* parser,
|
1318
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
1349
1319
|
switch (c) {
|
1350
1320
|
case '-':
|
1351
1321
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_ESCAPED_DASH);
|
@@ -1366,9 +1336,8 @@ static StateResult handle_script_escaped_state(
|
|
1366
1336
|
}
|
1367
1337
|
|
1368
1338
|
// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-escaped-dash-state
|
1369
|
-
static StateResult handle_script_escaped_dash_state(
|
1370
|
-
|
1371
|
-
int c, GumboToken* output) {
|
1339
|
+
static StateResult handle_script_escaped_dash_state(GumboParser* parser,
|
1340
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
1372
1341
|
switch (c) {
|
1373
1342
|
case '-':
|
1374
1343
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_ESCAPED_DASH_DASH);
|
@@ -1392,9 +1361,8 @@ static StateResult handle_script_escaped_dash_state(
|
|
1392
1361
|
}
|
1393
1362
|
|
1394
1363
|
// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-escaped-dash-dash-state
|
1395
|
-
static StateResult handle_script_escaped_dash_dash_state(
|
1396
|
-
|
1397
|
-
int c, GumboToken* output) {
|
1364
|
+
static StateResult handle_script_escaped_dash_dash_state(GumboParser* parser,
|
1365
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
1398
1366
|
switch (c) {
|
1399
1367
|
case '-':
|
1400
1368
|
return emit_current_char(parser, output);
|
@@ -1420,9 +1388,8 @@ static StateResult handle_script_escaped_dash_dash_state(
|
|
1420
1388
|
}
|
1421
1389
|
|
1422
1390
|
// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-escaped-less-than-sign-state
|
1423
|
-
static StateResult handle_script_escaped_lt_state(
|
1424
|
-
|
1425
|
-
int c, GumboToken* output) {
|
1391
|
+
static StateResult handle_script_escaped_lt_state(GumboParser* parser,
|
1392
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
1426
1393
|
assert(temporary_buffer_equals(parser, "<"));
|
1427
1394
|
assert(!tokenizer->_script_data_buffer.length);
|
1428
1395
|
if (c == '/') {
|
@@ -1442,9 +1409,8 @@ static StateResult handle_script_escaped_lt_state(
|
|
1442
1409
|
}
|
1443
1410
|
|
1444
1411
|
// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-escaped-end-tag-open-state
|
1445
|
-
static StateResult handle_script_escaped_end_tag_open_state(
|
1446
|
-
|
1447
|
-
int c, GumboToken* output) {
|
1412
|
+
static StateResult handle_script_escaped_end_tag_open_state(GumboParser* parser,
|
1413
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
1448
1414
|
assert(temporary_buffer_equals(parser, "</"));
|
1449
1415
|
if (is_alpha(c)) {
|
1450
1416
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_ESCAPED_END_TAG_NAME);
|
@@ -1458,9 +1424,8 @@ static StateResult handle_script_escaped_end_tag_open_state(
|
|
1458
1424
|
}
|
1459
1425
|
|
1460
1426
|
// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-escaped-end-tag-name-state
|
1461
|
-
static StateResult handle_script_escaped_end_tag_name_state(
|
1462
|
-
|
1463
|
-
int c, GumboToken* output) {
|
1427
|
+
static StateResult handle_script_escaped_end_tag_name_state(GumboParser* parser,
|
1428
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
1464
1429
|
assert(tokenizer->_temporary_buffer.length >= 2);
|
1465
1430
|
if (is_alpha(c)) {
|
1466
1431
|
append_char_to_tag_buffer(parser, ensure_lowercase(c), true);
|
@@ -1491,9 +1456,8 @@ static StateResult handle_script_escaped_end_tag_name_state(
|
|
1491
1456
|
}
|
1492
1457
|
|
1493
1458
|
// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-double-escape-start-state
|
1494
|
-
static StateResult handle_script_double_escaped_start_state(
|
1495
|
-
|
1496
|
-
int c, GumboToken* output) {
|
1459
|
+
static StateResult handle_script_double_escaped_start_state(GumboParser* parser,
|
1460
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
1497
1461
|
switch (c) {
|
1498
1462
|
case '\t':
|
1499
1463
|
case '\n':
|
@@ -1501,9 +1465,11 @@ static StateResult handle_script_double_escaped_start_state(
|
|
1501
1465
|
case ' ':
|
1502
1466
|
case '/':
|
1503
1467
|
case '>':
|
1504
|
-
gumbo_tokenizer_set_state(
|
1505
|
-
|
1506
|
-
|
1468
|
+
gumbo_tokenizer_set_state(
|
1469
|
+
parser, gumbo_string_equals(&kScriptTag,
|
1470
|
+
(GumboStringPiece*) &tokenizer->_script_data_buffer)
|
1471
|
+
? GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED
|
1472
|
+
: GUMBO_LEX_SCRIPT_ESCAPED);
|
1507
1473
|
return emit_current_char(parser, output);
|
1508
1474
|
default:
|
1509
1475
|
if (is_alpha(c)) {
|
@@ -1519,9 +1485,8 @@ static StateResult handle_script_double_escaped_start_state(
|
|
1519
1485
|
}
|
1520
1486
|
|
1521
1487
|
// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-double-escaped-state
|
1522
|
-
static StateResult handle_script_double_escaped_state(
|
1523
|
-
|
1524
|
-
int c, GumboToken* output) {
|
1488
|
+
static StateResult handle_script_double_escaped_state(GumboParser* parser,
|
1489
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
1525
1490
|
switch (c) {
|
1526
1491
|
case '-':
|
1527
1492
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED_DASH);
|
@@ -1541,9 +1506,8 @@ static StateResult handle_script_double_escaped_state(
|
|
1541
1506
|
}
|
1542
1507
|
|
1543
1508
|
// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-double-escaped-dash-state
|
1544
|
-
static StateResult handle_script_double_escaped_dash_state(
|
1545
|
-
|
1546
|
-
int c, GumboToken* output) {
|
1509
|
+
static StateResult handle_script_double_escaped_dash_state(GumboParser* parser,
|
1510
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
1547
1511
|
switch (c) {
|
1548
1512
|
case '-':
|
1549
1513
|
gumbo_tokenizer_set_state(
|
@@ -1567,8 +1531,8 @@ static StateResult handle_script_double_escaped_dash_state(
|
|
1567
1531
|
|
1568
1532
|
// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-double-escaped-dash-dash-state
|
1569
1533
|
static StateResult handle_script_double_escaped_dash_dash_state(
|
1570
|
-
GumboParser* parser, GumboTokenizerState* tokenizer,
|
1571
|
-
|
1534
|
+
GumboParser* parser, GumboTokenizerState* tokenizer, int c,
|
1535
|
+
GumboToken* output) {
|
1572
1536
|
switch (c) {
|
1573
1537
|
case '-':
|
1574
1538
|
return emit_current_char(parser, output);
|
@@ -1592,26 +1556,22 @@ static StateResult handle_script_double_escaped_dash_dash_state(
|
|
1592
1556
|
}
|
1593
1557
|
|
1594
1558
|
// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-double-escaped-less-than-sign-state
|
1595
|
-
static StateResult handle_script_double_escaped_lt_state(
|
1596
|
-
|
1597
|
-
int c, GumboToken* output) {
|
1559
|
+
static StateResult handle_script_double_escaped_lt_state(GumboParser* parser,
|
1560
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
1598
1561
|
if (c == '/') {
|
1599
1562
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED_END);
|
1600
|
-
|
1601
|
-
gumbo_string_buffer_init(parser, &tokenizer->_script_data_buffer);
|
1563
|
+
gumbo_string_buffer_clear(parser, &tokenizer->_script_data_buffer);
|
1602
1564
|
return emit_current_char(parser, output);
|
1603
1565
|
} else {
|
1604
1566
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED);
|
1605
1567
|
tokenizer->_reconsume_current_input = true;
|
1606
1568
|
return NEXT_CHAR;
|
1607
1569
|
}
|
1608
|
-
|
1609
1570
|
}
|
1610
1571
|
|
1611
1572
|
// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-double-escape-end-state
|
1612
|
-
static StateResult handle_script_double_escaped_end_state(
|
1613
|
-
|
1614
|
-
int c, GumboToken* output) {
|
1573
|
+
static StateResult handle_script_double_escaped_end_state(GumboParser* parser,
|
1574
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
1615
1575
|
switch (c) {
|
1616
1576
|
case '\t':
|
1617
1577
|
case '\n':
|
@@ -1619,9 +1579,11 @@ static StateResult handle_script_double_escaped_end_state(
|
|
1619
1579
|
case ' ':
|
1620
1580
|
case '/':
|
1621
1581
|
case '>':
|
1622
|
-
gumbo_tokenizer_set_state(
|
1623
|
-
|
1624
|
-
|
1582
|
+
gumbo_tokenizer_set_state(
|
1583
|
+
parser, gumbo_string_equals(&kScriptTag,
|
1584
|
+
(GumboStringPiece*) &tokenizer->_script_data_buffer)
|
1585
|
+
? GUMBO_LEX_SCRIPT_ESCAPED
|
1586
|
+
: GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED);
|
1625
1587
|
return emit_current_char(parser, output);
|
1626
1588
|
default:
|
1627
1589
|
if (is_alpha(c)) {
|
@@ -1637,9 +1599,8 @@ static StateResult handle_script_double_escaped_end_state(
|
|
1637
1599
|
}
|
1638
1600
|
|
1639
1601
|
// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#before-attribute-name-state
|
1640
|
-
static StateResult handle_before_attr_name_state(
|
1641
|
-
|
1642
|
-
int c, GumboToken* output) {
|
1602
|
+
static StateResult handle_before_attr_name_state(GumboParser* parser,
|
1603
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
1643
1604
|
switch (c) {
|
1644
1605
|
case '\t':
|
1645
1606
|
case '\n':
|
@@ -1667,7 +1628,7 @@ static StateResult handle_before_attr_name_state(
|
|
1667
1628
|
case '<':
|
1668
1629
|
case '=':
|
1669
1630
|
tokenizer_add_parse_error(parser, GUMBO_ERR_ATTR_NAME_INVALID);
|
1670
|
-
|
1631
|
+
// Fall through.
|
1671
1632
|
default:
|
1672
1633
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_ATTR_NAME);
|
1673
1634
|
append_char_to_tag_buffer(parser, ensure_lowercase(c), true);
|
@@ -1676,9 +1637,8 @@ static StateResult handle_before_attr_name_state(
|
|
1676
1637
|
}
|
1677
1638
|
|
1678
1639
|
// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#attribute-name-state
|
1679
|
-
static StateResult handle_attr_name_state(
|
1680
|
-
|
1681
|
-
int c, GumboToken* output) {
|
1640
|
+
static StateResult handle_attr_name_state(GumboParser* parser,
|
1641
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
1682
1642
|
switch (c) {
|
1683
1643
|
case '\t':
|
1684
1644
|
case '\n':
|
@@ -1712,7 +1672,7 @@ static StateResult handle_attr_name_state(
|
|
1712
1672
|
case '\'':
|
1713
1673
|
case '<':
|
1714
1674
|
tokenizer_add_parse_error(parser, GUMBO_ERR_ATTR_NAME_INVALID);
|
1715
|
-
|
1675
|
+
// Fall through.
|
1716
1676
|
default:
|
1717
1677
|
append_char_to_tag_buffer(parser, ensure_lowercase(c), true);
|
1718
1678
|
return NEXT_CHAR;
|
@@ -1720,9 +1680,8 @@ static StateResult handle_attr_name_state(
|
|
1720
1680
|
}
|
1721
1681
|
|
1722
1682
|
// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#after-attribute-name-state
|
1723
|
-
static StateResult handle_after_attr_name_state(
|
1724
|
-
|
1725
|
-
int c, GumboToken* output) {
|
1683
|
+
static StateResult handle_after_attr_name_state(GumboParser* parser,
|
1684
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
1726
1685
|
switch (c) {
|
1727
1686
|
case '\t':
|
1728
1687
|
case '\n':
|
@@ -1752,7 +1711,7 @@ static StateResult handle_after_attr_name_state(
|
|
1752
1711
|
case '\'':
|
1753
1712
|
case '<':
|
1754
1713
|
tokenizer_add_parse_error(parser, GUMBO_ERR_ATTR_NAME_INVALID);
|
1755
|
-
|
1714
|
+
// Fall through.
|
1756
1715
|
default:
|
1757
1716
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_ATTR_NAME);
|
1758
1717
|
append_char_to_tag_buffer(parser, ensure_lowercase(c), true);
|
@@ -1761,9 +1720,8 @@ static StateResult handle_after_attr_name_state(
|
|
1761
1720
|
}
|
1762
1721
|
|
1763
1722
|
// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#before-attribute-value-state
|
1764
|
-
static StateResult handle_before_attr_value_state(
|
1765
|
-
|
1766
|
-
int c, GumboToken* output) {
|
1723
|
+
static StateResult handle_before_attr_value_state(GumboParser* parser,
|
1724
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
1767
1725
|
switch (c) {
|
1768
1726
|
case '\t':
|
1769
1727
|
case '\n':
|
@@ -1802,7 +1760,7 @@ static StateResult handle_before_attr_value_state(
|
|
1802
1760
|
case '=':
|
1803
1761
|
case '`':
|
1804
1762
|
tokenizer_add_parse_error(parser, GUMBO_ERR_ATTR_UNQUOTED_EQUALS);
|
1805
|
-
|
1763
|
+
// Fall through.
|
1806
1764
|
default:
|
1807
1765
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_ATTR_VALUE_UNQUOTED);
|
1808
1766
|
append_char_to_tag_buffer(parser, c, true);
|
@@ -1811,9 +1769,8 @@ static StateResult handle_before_attr_value_state(
|
|
1811
1769
|
}
|
1812
1770
|
|
1813
1771
|
// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#attribute-value-double-quoted-state
|
1814
|
-
static StateResult handle_attr_value_double_quoted_state(
|
1815
|
-
|
1816
|
-
int c, GumboToken* output) {
|
1772
|
+
static StateResult handle_attr_value_double_quoted_state(GumboParser* parser,
|
1773
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
1817
1774
|
switch (c) {
|
1818
1775
|
case '"':
|
1819
1776
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_AFTER_ATTR_VALUE_QUOTED);
|
@@ -1840,9 +1797,8 @@ static StateResult handle_attr_value_double_quoted_state(
|
|
1840
1797
|
}
|
1841
1798
|
|
1842
1799
|
// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#attribute-value-single-quoted-state
|
1843
|
-
static StateResult handle_attr_value_single_quoted_state(
|
1844
|
-
|
1845
|
-
int c, GumboToken* output) {
|
1800
|
+
static StateResult handle_attr_value_single_quoted_state(GumboParser* parser,
|
1801
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
1846
1802
|
switch (c) {
|
1847
1803
|
case '\'':
|
1848
1804
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_AFTER_ATTR_VALUE_QUOTED);
|
@@ -1869,9 +1825,8 @@ static StateResult handle_attr_value_single_quoted_state(
|
|
1869
1825
|
}
|
1870
1826
|
|
1871
1827
|
// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#attribute-value-unquoted-state
|
1872
|
-
static StateResult handle_attr_value_unquoted_state(
|
1873
|
-
|
1874
|
-
int c, GumboToken* output) {
|
1828
|
+
static StateResult handle_attr_value_unquoted_state(GumboParser* parser,
|
1829
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
1875
1830
|
switch (c) {
|
1876
1831
|
case '\t':
|
1877
1832
|
case '\n':
|
@@ -1905,7 +1860,7 @@ static StateResult handle_attr_value_unquoted_state(
|
|
1905
1860
|
case '\'':
|
1906
1861
|
case '`':
|
1907
1862
|
tokenizer_add_parse_error(parser, GUMBO_ERR_ATTR_UNQUOTED_EQUALS);
|
1908
|
-
|
1863
|
+
// Fall through.
|
1909
1864
|
default:
|
1910
1865
|
append_char_to_tag_buffer(parser, c, true);
|
1911
1866
|
return NEXT_CHAR;
|
@@ -1913,9 +1868,8 @@ static StateResult handle_attr_value_unquoted_state(
|
|
1913
1868
|
}
|
1914
1869
|
|
1915
1870
|
// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#character-reference-in-attribute-value-state
|
1916
|
-
static StateResult handle_char_ref_in_attr_value_state(
|
1917
|
-
|
1918
|
-
int c, GumboToken* output) {
|
1871
|
+
static StateResult handle_char_ref_in_attr_value_state(GumboParser* parser,
|
1872
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
1919
1873
|
OneOrTwoCodepoints char_ref;
|
1920
1874
|
int allowed_char;
|
1921
1875
|
bool is_unquoted = false;
|
@@ -1956,9 +1910,8 @@ static StateResult handle_char_ref_in_attr_value_state(
|
|
1956
1910
|
}
|
1957
1911
|
|
1958
1912
|
// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#after-attribute-value-quoted-state
|
1959
|
-
static StateResult handle_after_attr_value_quoted_state(
|
1960
|
-
|
1961
|
-
int c, GumboToken* output) {
|
1913
|
+
static StateResult handle_after_attr_value_quoted_state(GumboParser* parser,
|
1914
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
1962
1915
|
finish_attribute_value(parser);
|
1963
1916
|
switch (c) {
|
1964
1917
|
case '\t':
|
@@ -1988,9 +1941,8 @@ static StateResult handle_after_attr_value_quoted_state(
|
|
1988
1941
|
}
|
1989
1942
|
|
1990
1943
|
// http://www.whatwg.org/specs/web-apps/current-work/complete.html#self-closing-start-tag-state
|
1991
|
-
static StateResult handle_self_closing_start_tag_state(
|
1992
|
-
|
1993
|
-
int c, GumboToken* output) {
|
1944
|
+
static StateResult handle_self_closing_start_tag_state(GumboParser* parser,
|
1945
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
1994
1946
|
switch (c) {
|
1995
1947
|
case '>':
|
1996
1948
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
@@ -2010,9 +1962,8 @@ static StateResult handle_self_closing_start_tag_state(
|
|
2010
1962
|
}
|
2011
1963
|
|
2012
1964
|
// http://www.whatwg.org/specs/web-apps/current-work/complete.html#bogus-comment-state
|
2013
|
-
static StateResult handle_bogus_comment_state(
|
2014
|
-
|
2015
|
-
int c, GumboToken* output) {
|
1965
|
+
static StateResult handle_bogus_comment_state(GumboParser* parser,
|
1966
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
2016
1967
|
while (c != '>' && c != -1) {
|
2017
1968
|
if (c == '\0') {
|
2018
1969
|
c = 0xFFFD;
|
@@ -2026,15 +1977,14 @@ static StateResult handle_bogus_comment_state(
|
|
2026
1977
|
}
|
2027
1978
|
|
2028
1979
|
// http://www.whatwg.org/specs/web-apps/current-work/complete.html#markup-declaration-open-state
|
2029
|
-
static StateResult handle_markup_declaration_state(
|
2030
|
-
|
2031
|
-
int c, GumboToken* output) {
|
1980
|
+
static StateResult handle_markup_declaration_state(GumboParser* parser,
|
1981
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
2032
1982
|
if (utf8iterator_maybe_consume_match(
|
2033
|
-
|
1983
|
+
&tokenizer->_input, "--", sizeof("--") - 1, true)) {
|
2034
1984
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT_START);
|
2035
1985
|
tokenizer->_reconsume_current_input = true;
|
2036
1986
|
} else if (utf8iterator_maybe_consume_match(
|
2037
|
-
|
1987
|
+
&tokenizer->_input, "DOCTYPE", sizeof("DOCTYPE") - 1, false)) {
|
2038
1988
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DOCTYPE);
|
2039
1989
|
tokenizer->_reconsume_current_input = true;
|
2040
1990
|
// If we get here, we know we'll eventually emit a doctype token, so now is
|
@@ -2048,7 +1998,7 @@ static StateResult handle_markup_declaration_state(
|
|
2048
1998
|
gumbo_copy_stringz(parser, "");
|
2049
1999
|
} else if (tokenizer->_is_current_node_foreign &&
|
2050
2000
|
utf8iterator_maybe_consume_match(
|
2051
|
-
|
2001
|
+
&tokenizer->_input, "[CDATA[", sizeof("[CDATA[") - 1, true)) {
|
2052
2002
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_CDATA);
|
2053
2003
|
tokenizer->_is_in_cdata = true;
|
2054
2004
|
tokenizer->_reconsume_current_input = true;
|
@@ -2062,9 +2012,8 @@ static StateResult handle_markup_declaration_state(
|
|
2062
2012
|
}
|
2063
2013
|
|
2064
2014
|
// http://www.whatwg.org/specs/web-apps/current-work/complete.html#comment-start-state
|
2065
|
-
static StateResult handle_comment_start_state(
|
2066
|
-
|
2067
|
-
int c, GumboToken* output) {
|
2015
|
+
static StateResult handle_comment_start_state(GumboParser* parser,
|
2016
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
2068
2017
|
switch (c) {
|
2069
2018
|
case '-':
|
2070
2019
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT_START_DASH);
|
@@ -2092,9 +2041,8 @@ static StateResult handle_comment_start_state(
|
|
2092
2041
|
}
|
2093
2042
|
|
2094
2043
|
// http://www.whatwg.org/specs/web-apps/current-work/complete.html#comment-start-dash-state
|
2095
|
-
static StateResult handle_comment_start_dash_state(
|
2096
|
-
|
2097
|
-
int c, GumboToken* output) {
|
2044
|
+
static StateResult handle_comment_start_dash_state(GumboParser* parser,
|
2045
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
2098
2046
|
switch (c) {
|
2099
2047
|
case '-':
|
2100
2048
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT_END);
|
@@ -2124,9 +2072,8 @@ static StateResult handle_comment_start_dash_state(
|
|
2124
2072
|
}
|
2125
2073
|
|
2126
2074
|
// http://www.whatwg.org/specs/web-apps/current-work/complete.html#comment-state
|
2127
|
-
static StateResult handle_comment_state(
|
2128
|
-
|
2129
|
-
int c, GumboToken* output) {
|
2075
|
+
static StateResult handle_comment_state(GumboParser* parser,
|
2076
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
2130
2077
|
switch (c) {
|
2131
2078
|
case '-':
|
2132
2079
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT_END_DASH);
|
@@ -2147,9 +2094,8 @@ static StateResult handle_comment_state(
|
|
2147
2094
|
}
|
2148
2095
|
|
2149
2096
|
// http://www.whatwg.org/specs/web-apps/current-work/complete.html#comment-end-dash-state
|
2150
|
-
static StateResult handle_comment_end_dash_state(
|
2151
|
-
|
2152
|
-
int c, GumboToken* output) {
|
2097
|
+
static StateResult handle_comment_end_dash_state(GumboParser* parser,
|
2098
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
2153
2099
|
switch (c) {
|
2154
2100
|
case '-':
|
2155
2101
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT_END);
|
@@ -2174,9 +2120,8 @@ static StateResult handle_comment_end_dash_state(
|
|
2174
2120
|
}
|
2175
2121
|
|
2176
2122
|
// http://www.whatwg.org/specs/web-apps/current-work/complete.html#comment-end-state
|
2177
|
-
static StateResult handle_comment_end_state(
|
2178
|
-
|
2179
|
-
int c, GumboToken* output) {
|
2123
|
+
static StateResult handle_comment_end_state(GumboParser* parser,
|
2124
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
2180
2125
|
switch (c) {
|
2181
2126
|
case '>':
|
2182
2127
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
@@ -2189,11 +2134,13 @@ static StateResult handle_comment_end_state(
|
|
2189
2134
|
append_char_to_temporary_buffer(parser, kUtf8ReplacementChar);
|
2190
2135
|
return NEXT_CHAR;
|
2191
2136
|
case '!':
|
2192
|
-
tokenizer_add_parse_error(
|
2137
|
+
tokenizer_add_parse_error(
|
2138
|
+
parser, GUMBO_ERR_COMMENT_BANG_AFTER_DOUBLE_DASH);
|
2193
2139
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT_END_BANG);
|
2194
2140
|
return NEXT_CHAR;
|
2195
2141
|
case '-':
|
2196
|
-
tokenizer_add_parse_error(
|
2142
|
+
tokenizer_add_parse_error(
|
2143
|
+
parser, GUMBO_ERR_COMMENT_DASH_AFTER_DOUBLE_DASH);
|
2197
2144
|
append_char_to_temporary_buffer(parser, '-');
|
2198
2145
|
return NEXT_CHAR;
|
2199
2146
|
case -1:
|
@@ -2212,9 +2159,8 @@ static StateResult handle_comment_end_state(
|
|
2212
2159
|
}
|
2213
2160
|
|
2214
2161
|
// http://www.whatwg.org/specs/web-apps/current-work/complete.html#comment-end-bang-state
|
2215
|
-
static StateResult handle_comment_end_bang_state(
|
2216
|
-
|
2217
|
-
int c, GumboToken* output) {
|
2162
|
+
static StateResult handle_comment_end_bang_state(GumboParser* parser,
|
2163
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
2218
2164
|
switch (c) {
|
2219
2165
|
case '-':
|
2220
2166
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT_END_DASH);
|
@@ -2249,9 +2195,8 @@ static StateResult handle_comment_end_bang_state(
|
|
2249
2195
|
}
|
2250
2196
|
|
2251
2197
|
// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#doctype-state
|
2252
|
-
static StateResult handle_doctype_state(
|
2253
|
-
|
2254
|
-
int c, GumboToken* output) {
|
2198
|
+
static StateResult handle_doctype_state(GumboParser* parser,
|
2199
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
2255
2200
|
assert(!tokenizer->_temporary_buffer.length);
|
2256
2201
|
switch (c) {
|
2257
2202
|
case '\t':
|
@@ -2276,9 +2221,8 @@ static StateResult handle_doctype_state(
|
|
2276
2221
|
}
|
2277
2222
|
|
2278
2223
|
// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#before-doctype-name-state
|
2279
|
-
static StateResult handle_before_doctype_name_state(
|
2280
|
-
|
2281
|
-
int c, GumboToken* output) {
|
2224
|
+
static StateResult handle_before_doctype_name_state(GumboParser* parser,
|
2225
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
2282
2226
|
switch (c) {
|
2283
2227
|
case '\t':
|
2284
2228
|
case '\n':
|
@@ -2312,9 +2256,8 @@ static StateResult handle_before_doctype_name_state(
|
|
2312
2256
|
}
|
2313
2257
|
|
2314
2258
|
// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#doctype-name-state
|
2315
|
-
static StateResult handle_doctype_name_state(
|
2316
|
-
|
2317
|
-
int c, GumboToken* output) {
|
2259
|
+
static StateResult handle_doctype_name_state(GumboParser* parser,
|
2260
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
2318
2261
|
switch (c) {
|
2319
2262
|
case '\t':
|
2320
2263
|
case '\n':
|
@@ -2322,14 +2265,12 @@ static StateResult handle_doctype_name_state(
|
|
2322
2265
|
case ' ':
|
2323
2266
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_AFTER_DOCTYPE_NAME);
|
2324
2267
|
gumbo_parser_deallocate(parser, (void*) tokenizer->_doc_type_state.name);
|
2325
|
-
finish_temporary_buffer(
|
2326
|
-
parser, &tokenizer->_doc_type_state.name);
|
2268
|
+
finish_temporary_buffer(parser, &tokenizer->_doc_type_state.name);
|
2327
2269
|
return NEXT_CHAR;
|
2328
2270
|
case '>':
|
2329
2271
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2330
2272
|
gumbo_parser_deallocate(parser, (void*) tokenizer->_doc_type_state.name);
|
2331
|
-
finish_temporary_buffer(
|
2332
|
-
parser, &tokenizer->_doc_type_state.name);
|
2273
|
+
finish_temporary_buffer(parser, &tokenizer->_doc_type_state.name);
|
2333
2274
|
emit_doctype(parser, output);
|
2334
2275
|
return RETURN_SUCCESS;
|
2335
2276
|
case '\0':
|
@@ -2341,8 +2282,7 @@ static StateResult handle_doctype_name_state(
|
|
2341
2282
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2342
2283
|
tokenizer->_doc_type_state.force_quirks = true;
|
2343
2284
|
gumbo_parser_deallocate(parser, (void*) tokenizer->_doc_type_state.name);
|
2344
|
-
finish_temporary_buffer(
|
2345
|
-
parser, &tokenizer->_doc_type_state.name);
|
2285
|
+
finish_temporary_buffer(parser, &tokenizer->_doc_type_state.name);
|
2346
2286
|
emit_doctype(parser, output);
|
2347
2287
|
return RETURN_ERROR;
|
2348
2288
|
default:
|
@@ -2354,9 +2294,8 @@ static StateResult handle_doctype_name_state(
|
|
2354
2294
|
}
|
2355
2295
|
|
2356
2296
|
// http://www.whatwg.org/specs/web-apps/current-work/complete.html#after-doctype-name-state
|
2357
|
-
static StateResult handle_after_doctype_name_state(
|
2358
|
-
|
2359
|
-
int c, GumboToken* output) {
|
2297
|
+
static StateResult handle_after_doctype_name_state(GumboParser* parser,
|
2298
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
2360
2299
|
switch (c) {
|
2361
2300
|
case '\t':
|
2362
2301
|
case '\n':
|
@@ -2375,17 +2314,18 @@ static StateResult handle_after_doctype_name_state(
|
|
2375
2314
|
return RETURN_ERROR;
|
2376
2315
|
default:
|
2377
2316
|
if (utf8iterator_maybe_consume_match(
|
2378
|
-
|
2317
|
+
&tokenizer->_input, "PUBLIC", sizeof("PUBLIC") - 1, false)) {
|
2379
2318
|
gumbo_tokenizer_set_state(
|
2380
2319
|
parser, GUMBO_LEX_AFTER_DOCTYPE_PUBLIC_KEYWORD);
|
2381
2320
|
tokenizer->_reconsume_current_input = true;
|
2382
|
-
} else if (utf8iterator_maybe_consume_match(
|
2383
|
-
|
2321
|
+
} else if (utf8iterator_maybe_consume_match(&tokenizer->_input, "SYSTEM",
|
2322
|
+
sizeof("SYSTEM") - 1, false)) {
|
2384
2323
|
gumbo_tokenizer_set_state(
|
2385
2324
|
parser, GUMBO_LEX_AFTER_DOCTYPE_SYSTEM_KEYWORD);
|
2386
2325
|
tokenizer->_reconsume_current_input = true;
|
2387
2326
|
} else {
|
2388
|
-
tokenizer_add_parse_error(
|
2327
|
+
tokenizer_add_parse_error(
|
2328
|
+
parser, GUMBO_ERR_DOCTYPE_SPACE_OR_RIGHT_BRACKET);
|
2389
2329
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_BOGUS_DOCTYPE);
|
2390
2330
|
tokenizer->_doc_type_state.force_quirks = true;
|
2391
2331
|
}
|
@@ -2395,15 +2335,14 @@ static StateResult handle_after_doctype_name_state(
|
|
2395
2335
|
|
2396
2336
|
// http://www.whatwg.org/specs/web-apps/current-work/complete.html#after-doctype-public-keyword-state
|
2397
2337
|
static StateResult handle_after_doctype_public_keyword_state(
|
2398
|
-
GumboParser* parser, GumboTokenizerState* tokenizer,
|
2399
|
-
|
2338
|
+
GumboParser* parser, GumboTokenizerState* tokenizer, int c,
|
2339
|
+
GumboToken* output) {
|
2400
2340
|
switch (c) {
|
2401
2341
|
case '\t':
|
2402
2342
|
case '\n':
|
2403
2343
|
case '\f':
|
2404
2344
|
case ' ':
|
2405
|
-
gumbo_tokenizer_set_state(
|
2406
|
-
parser, GUMBO_LEX_BEFORE_DOCTYPE_PUBLIC_ID);
|
2345
|
+
gumbo_tokenizer_set_state(parser, GUMBO_LEX_BEFORE_DOCTYPE_PUBLIC_ID);
|
2407
2346
|
return NEXT_CHAR;
|
2408
2347
|
case '"':
|
2409
2348
|
tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_INVALID);
|
@@ -2439,9 +2378,8 @@ static StateResult handle_after_doctype_public_keyword_state(
|
|
2439
2378
|
}
|
2440
2379
|
|
2441
2380
|
// http://www.whatwg.org/specs/web-apps/current-work/complete.html#before-doctype-public-identifier-state
|
2442
|
-
static StateResult handle_before_doctype_public_id_state(
|
2443
|
-
|
2444
|
-
int c, GumboToken* output) {
|
2381
|
+
static StateResult handle_before_doctype_public_id_state(GumboParser* parser,
|
2382
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
2445
2383
|
switch (c) {
|
2446
2384
|
case '\t':
|
2447
2385
|
case '\n':
|
@@ -2481,8 +2419,8 @@ static StateResult handle_before_doctype_public_id_state(
|
|
2481
2419
|
|
2482
2420
|
// http://www.whatwg.org/specs/web-apps/current-work/complete.html#doctype-public-identifier-(double-quoted)-state
|
2483
2421
|
static StateResult handle_doctype_public_id_double_quoted_state(
|
2484
|
-
GumboParser* parser, GumboTokenizerState* tokenizer,
|
2485
|
-
|
2422
|
+
GumboParser* parser, GumboTokenizerState* tokenizer, int c,
|
2423
|
+
GumboToken* output) {
|
2486
2424
|
switch (c) {
|
2487
2425
|
case '"':
|
2488
2426
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_AFTER_DOCTYPE_PUBLIC_ID);
|
@@ -2514,8 +2452,8 @@ static StateResult handle_doctype_public_id_double_quoted_state(
|
|
2514
2452
|
|
2515
2453
|
// http://www.whatwg.org/specs/web-apps/current-work/complete.html#doctype-public-identifier-(single-quoted)-state
|
2516
2454
|
static StateResult handle_doctype_public_id_single_quoted_state(
|
2517
|
-
GumboParser* parser, GumboTokenizerState* tokenizer,
|
2518
|
-
|
2455
|
+
GumboParser* parser, GumboTokenizerState* tokenizer, int c,
|
2456
|
+
GumboToken* output) {
|
2519
2457
|
switch (c) {
|
2520
2458
|
case '\'':
|
2521
2459
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_AFTER_DOCTYPE_PUBLIC_ID);
|
@@ -2546,9 +2484,8 @@ static StateResult handle_doctype_public_id_single_quoted_state(
|
|
2546
2484
|
}
|
2547
2485
|
|
2548
2486
|
// http://www.whatwg.org/specs/web-apps/current-work/complete.html#after-doctype-public-identifier-state
|
2549
|
-
static StateResult handle_after_doctype_public_id_state(
|
2550
|
-
|
2551
|
-
int c, GumboToken* output) {
|
2487
|
+
static StateResult handle_after_doctype_public_id_state(GumboParser* parser,
|
2488
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
2552
2489
|
switch (c) {
|
2553
2490
|
case '\t':
|
2554
2491
|
case '\n':
|
@@ -2590,8 +2527,8 @@ static StateResult handle_after_doctype_public_id_state(
|
|
2590
2527
|
|
2591
2528
|
// http://www.whatwg.org/specs/web-apps/current-work/complete.html#between-doctype-public-and-system-identifiers-state
|
2592
2529
|
static StateResult handle_between_doctype_public_system_id_state(
|
2593
|
-
GumboParser* parser, GumboTokenizerState* tokenizer,
|
2594
|
-
|
2530
|
+
GumboParser* parser, GumboTokenizerState* tokenizer, int c,
|
2531
|
+
GumboToken* output) {
|
2595
2532
|
switch (c) {
|
2596
2533
|
case '\t':
|
2597
2534
|
case '\n':
|
@@ -2629,8 +2566,8 @@ static StateResult handle_between_doctype_public_system_id_state(
|
|
2629
2566
|
|
2630
2567
|
// http://www.whatwg.org/specs/web-apps/current-work/complete.html#after-doctype-system-keyword-state
|
2631
2568
|
static StateResult handle_after_doctype_system_keyword_state(
|
2632
|
-
GumboParser* parser, GumboTokenizerState* tokenizer,
|
2633
|
-
|
2569
|
+
GumboParser* parser, GumboTokenizerState* tokenizer, int c,
|
2570
|
+
GumboToken* output) {
|
2634
2571
|
switch (c) {
|
2635
2572
|
case '\t':
|
2636
2573
|
case '\n':
|
@@ -2671,9 +2608,8 @@ static StateResult handle_after_doctype_system_keyword_state(
|
|
2671
2608
|
}
|
2672
2609
|
|
2673
2610
|
// http://www.whatwg.org/specs/web-apps/current-work/complete.html#before-doctype-system-identifier-state
|
2674
|
-
static StateResult handle_before_doctype_system_id_state(
|
2675
|
-
|
2676
|
-
int c, GumboToken* output) {
|
2611
|
+
static StateResult handle_before_doctype_system_id_state(GumboParser* parser,
|
2612
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
2677
2613
|
switch (c) {
|
2678
2614
|
case '\t':
|
2679
2615
|
case '\n':
|
@@ -2712,8 +2648,8 @@ static StateResult handle_before_doctype_system_id_state(
|
|
2712
2648
|
|
2713
2649
|
// http://www.whatwg.org/specs/web-apps/current-work/complete.html#doctype-system-identifier-(double-quoted)-state
|
2714
2650
|
static StateResult handle_doctype_system_id_double_quoted_state(
|
2715
|
-
GumboParser* parser, GumboTokenizerState* tokenizer,
|
2716
|
-
|
2651
|
+
GumboParser* parser, GumboTokenizerState* tokenizer, int c,
|
2652
|
+
GumboToken* output) {
|
2717
2653
|
switch (c) {
|
2718
2654
|
case '"':
|
2719
2655
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_AFTER_DOCTYPE_SYSTEM_ID);
|
@@ -2745,8 +2681,8 @@ static StateResult handle_doctype_system_id_double_quoted_state(
|
|
2745
2681
|
|
2746
2682
|
// http://www.whatwg.org/specs/web-apps/current-work/complete.html#doctype-system-identifier-(single-quoted)-state
|
2747
2683
|
static StateResult handle_doctype_system_id_single_quoted_state(
|
2748
|
-
GumboParser* parser, GumboTokenizerState* tokenizer,
|
2749
|
-
|
2684
|
+
GumboParser* parser, GumboTokenizerState* tokenizer, int c,
|
2685
|
+
GumboToken* output) {
|
2750
2686
|
switch (c) {
|
2751
2687
|
case '\'':
|
2752
2688
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_AFTER_DOCTYPE_SYSTEM_ID);
|
@@ -2777,9 +2713,8 @@ static StateResult handle_doctype_system_id_single_quoted_state(
|
|
2777
2713
|
}
|
2778
2714
|
|
2779
2715
|
// http://www.whatwg.org/specs/web-apps/current-work/complete.html#after-doctype-system-identifier-state
|
2780
|
-
static StateResult handle_after_doctype_system_id_state(
|
2781
|
-
|
2782
|
-
int c, GumboToken* output) {
|
2716
|
+
static StateResult handle_after_doctype_system_id_state(GumboParser* parser,
|
2717
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
2783
2718
|
switch (c) {
|
2784
2719
|
case '\t':
|
2785
2720
|
case '\n':
|
@@ -2804,9 +2739,8 @@ static StateResult handle_after_doctype_system_id_state(
|
|
2804
2739
|
}
|
2805
2740
|
|
2806
2741
|
// http://www.whatwg.org/specs/web-apps/current-work/complete.html#bogus-doctype-state
|
2807
|
-
static StateResult handle_bogus_doctype_state(
|
2808
|
-
|
2809
|
-
int c, GumboToken* output) {
|
2742
|
+
static StateResult handle_bogus_doctype_state(GumboParser* parser,
|
2743
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
2810
2744
|
if (c == '>' || c == -1) {
|
2811
2745
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2812
2746
|
emit_doctype(parser, output);
|
@@ -2816,15 +2750,14 @@ static StateResult handle_bogus_doctype_state(
|
|
2816
2750
|
}
|
2817
2751
|
|
2818
2752
|
// http://www.whatwg.org/specs/web-apps/current-work/complete.html#cdata-section-state
|
2819
|
-
static StateResult handle_cdata_state(
|
2820
|
-
|
2821
|
-
int c, GumboToken* output) {
|
2753
|
+
static StateResult handle_cdata_state(GumboParser* parser,
|
2754
|
+
GumboTokenizerState* tokenizer, int c, GumboToken* output) {
|
2822
2755
|
if (c == -1 || utf8iterator_maybe_consume_match(
|
2823
|
-
|
2756
|
+
&tokenizer->_input, "]]>", sizeof("]]>") - 1, true)) {
|
2824
2757
|
tokenizer->_reconsume_current_input = true;
|
2825
2758
|
reset_token_start_point(tokenizer);
|
2826
2759
|
gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
|
2827
|
-
tokenizer->_is_in_cdata =
|
2760
|
+
tokenizer->_is_in_cdata = false;
|
2828
2761
|
return NEXT_CHAR;
|
2829
2762
|
} else {
|
2830
2763
|
return emit_current_char(parser, output);
|
@@ -2834,76 +2767,47 @@ static StateResult handle_cdata_state(
|
|
2834
2767
|
typedef StateResult (*GumboLexerStateFunction)(
|
2835
2768
|
GumboParser*, GumboTokenizerState*, int, GumboToken*);
|
2836
2769
|
|
2837
|
-
static GumboLexerStateFunction dispatch_table[] = {
|
2838
|
-
|
2839
|
-
|
2840
|
-
|
2841
|
-
|
2842
|
-
|
2843
|
-
|
2844
|
-
|
2845
|
-
|
2846
|
-
|
2847
|
-
|
2848
|
-
|
2849
|
-
|
2850
|
-
|
2851
|
-
|
2852
|
-
|
2853
|
-
|
2854
|
-
|
2855
|
-
|
2856
|
-
|
2857
|
-
|
2858
|
-
|
2859
|
-
|
2860
|
-
|
2861
|
-
|
2862
|
-
|
2863
|
-
|
2864
|
-
|
2865
|
-
|
2866
|
-
|
2867
|
-
|
2868
|
-
|
2869
|
-
|
2870
|
-
|
2871
|
-
|
2872
|
-
|
2873
|
-
|
2874
|
-
|
2875
|
-
|
2876
|
-
|
2877
|
-
|
2878
|
-
handle_char_ref_in_attr_value_state,
|
2879
|
-
handle_after_attr_value_quoted_state,
|
2880
|
-
handle_self_closing_start_tag_state,
|
2881
|
-
handle_bogus_comment_state,
|
2882
|
-
handle_markup_declaration_state,
|
2883
|
-
handle_comment_start_state,
|
2884
|
-
handle_comment_start_dash_state,
|
2885
|
-
handle_comment_state,
|
2886
|
-
handle_comment_end_dash_state,
|
2887
|
-
handle_comment_end_state,
|
2888
|
-
handle_comment_end_bang_state,
|
2889
|
-
handle_doctype_state,
|
2890
|
-
handle_before_doctype_name_state,
|
2891
|
-
handle_doctype_name_state,
|
2892
|
-
handle_after_doctype_name_state,
|
2893
|
-
handle_after_doctype_public_keyword_state,
|
2894
|
-
handle_before_doctype_public_id_state,
|
2895
|
-
handle_doctype_public_id_double_quoted_state,
|
2896
|
-
handle_doctype_public_id_single_quoted_state,
|
2897
|
-
handle_after_doctype_public_id_state,
|
2898
|
-
handle_between_doctype_public_system_id_state,
|
2899
|
-
handle_after_doctype_system_keyword_state,
|
2900
|
-
handle_before_doctype_system_id_state,
|
2901
|
-
handle_doctype_system_id_double_quoted_state,
|
2902
|
-
handle_doctype_system_id_single_quoted_state,
|
2903
|
-
handle_after_doctype_system_id_state,
|
2904
|
-
handle_bogus_doctype_state,
|
2905
|
-
handle_cdata_state
|
2906
|
-
};
|
2770
|
+
static GumboLexerStateFunction dispatch_table[] = {handle_data_state,
|
2771
|
+
handle_char_ref_in_data_state, handle_rcdata_state,
|
2772
|
+
handle_char_ref_in_rcdata_state, handle_rawtext_state, handle_script_state,
|
2773
|
+
handle_plaintext_state, handle_tag_open_state, handle_end_tag_open_state,
|
2774
|
+
handle_tag_name_state, handle_rcdata_lt_state,
|
2775
|
+
handle_rcdata_end_tag_open_state, handle_rcdata_end_tag_name_state,
|
2776
|
+
handle_rawtext_lt_state, handle_rawtext_end_tag_open_state,
|
2777
|
+
handle_rawtext_end_tag_name_state, handle_script_lt_state,
|
2778
|
+
handle_script_end_tag_open_state, handle_script_end_tag_name_state,
|
2779
|
+
handle_script_escaped_start_state, handle_script_escaped_start_dash_state,
|
2780
|
+
handle_script_escaped_state, handle_script_escaped_dash_state,
|
2781
|
+
handle_script_escaped_dash_dash_state, handle_script_escaped_lt_state,
|
2782
|
+
handle_script_escaped_end_tag_open_state,
|
2783
|
+
handle_script_escaped_end_tag_name_state,
|
2784
|
+
handle_script_double_escaped_start_state,
|
2785
|
+
handle_script_double_escaped_state, handle_script_double_escaped_dash_state,
|
2786
|
+
handle_script_double_escaped_dash_dash_state,
|
2787
|
+
handle_script_double_escaped_lt_state,
|
2788
|
+
handle_script_double_escaped_end_state, handle_before_attr_name_state,
|
2789
|
+
handle_attr_name_state, handle_after_attr_name_state,
|
2790
|
+
handle_before_attr_value_state, handle_attr_value_double_quoted_state,
|
2791
|
+
handle_attr_value_single_quoted_state, handle_attr_value_unquoted_state,
|
2792
|
+
handle_char_ref_in_attr_value_state, handle_after_attr_value_quoted_state,
|
2793
|
+
handle_self_closing_start_tag_state, handle_bogus_comment_state,
|
2794
|
+
handle_markup_declaration_state, handle_comment_start_state,
|
2795
|
+
handle_comment_start_dash_state, handle_comment_state,
|
2796
|
+
handle_comment_end_dash_state, handle_comment_end_state,
|
2797
|
+
handle_comment_end_bang_state, handle_doctype_state,
|
2798
|
+
handle_before_doctype_name_state, handle_doctype_name_state,
|
2799
|
+
handle_after_doctype_name_state, handle_after_doctype_public_keyword_state,
|
2800
|
+
handle_before_doctype_public_id_state,
|
2801
|
+
handle_doctype_public_id_double_quoted_state,
|
2802
|
+
handle_doctype_public_id_single_quoted_state,
|
2803
|
+
handle_after_doctype_public_id_state,
|
2804
|
+
handle_between_doctype_public_system_id_state,
|
2805
|
+
handle_after_doctype_system_keyword_state,
|
2806
|
+
handle_before_doctype_system_id_state,
|
2807
|
+
handle_doctype_system_id_double_quoted_state,
|
2808
|
+
handle_doctype_system_id_single_quoted_state,
|
2809
|
+
handle_after_doctype_system_id_state, handle_bogus_doctype_state,
|
2810
|
+
handle_cdata_state};
|
2907
2811
|
|
2908
2812
|
bool gumbo_lex(GumboParser* parser, GumboToken* output) {
|
2909
2813
|
// Because of the spec requirements that...
|
@@ -2941,8 +2845,8 @@ bool gumbo_lex(GumboParser* parser, GumboToken* output) {
|
|
2941
2845
|
assert(!tokenizer->_temporary_buffer_emit);
|
2942
2846
|
assert(tokenizer->_buffered_emit_char == kGumboNoChar);
|
2943
2847
|
int c = utf8iterator_current(&tokenizer->_input);
|
2944
|
-
gumbo_debug(
|
2945
|
-
c, c, tokenizer->_state);
|
2848
|
+
gumbo_debug(
|
2849
|
+
"Lexing character '%c' (%d) in state %d.\n", c, c, tokenizer->_state);
|
2946
2850
|
StateResult result =
|
2947
2851
|
dispatch_table[tokenizer->_state](parser, tokenizer, c, output);
|
2948
2852
|
// We need to clear reconsume_current_input before returning to prevent
|
@@ -2952,7 +2856,7 @@ bool gumbo_lex(GumboParser* parser, GumboToken* output) {
|
|
2952
2856
|
|
2953
2857
|
if (result == RETURN_SUCCESS) {
|
2954
2858
|
return true;
|
2955
|
-
} else if(result == RETURN_ERROR) {
|
2859
|
+
} else if (result == RETURN_ERROR) {
|
2956
2860
|
return false;
|
2957
2861
|
}
|
2958
2862
|
|
@@ -2974,7 +2878,7 @@ void gumbo_token_destroy(GumboParser* parser, GumboToken* token) {
|
|
2974
2878
|
parser, (void*) token->v.doc_type.system_identifier);
|
2975
2879
|
return;
|
2976
2880
|
case GUMBO_TOKEN_START_TAG:
|
2977
|
-
for (int i = 0; i < token->v.start_tag.attributes.length; ++i) {
|
2881
|
+
for (unsigned int i = 0; i < token->v.start_tag.attributes.length; ++i) {
|
2978
2882
|
GumboAttribute* attr = token->v.start_tag.attributes.data[i];
|
2979
2883
|
if (attr) {
|
2980
2884
|
// May have been nulled out if this token was merged with another.
|