nokogumbo 1.4.2 → 1.4.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,153 @@
1
+ // Generated via `gentags.py src/tag.in`.
2
+ // Do not edit; edit src/tag.in instead.
3
+ // clang-format off
4
+ GUMBO_TAG_HTML,
5
+ GUMBO_TAG_HEAD,
6
+ GUMBO_TAG_TITLE,
7
+ GUMBO_TAG_BASE,
8
+ GUMBO_TAG_LINK,
9
+ GUMBO_TAG_META,
10
+ GUMBO_TAG_STYLE,
11
+ GUMBO_TAG_SCRIPT,
12
+ GUMBO_TAG_NOSCRIPT,
13
+ GUMBO_TAG_TEMPLATE,
14
+ GUMBO_TAG_BODY,
15
+ GUMBO_TAG_ARTICLE,
16
+ GUMBO_TAG_SECTION,
17
+ GUMBO_TAG_NAV,
18
+ GUMBO_TAG_ASIDE,
19
+ GUMBO_TAG_H1,
20
+ GUMBO_TAG_H2,
21
+ GUMBO_TAG_H3,
22
+ GUMBO_TAG_H4,
23
+ GUMBO_TAG_H5,
24
+ GUMBO_TAG_H6,
25
+ GUMBO_TAG_HGROUP,
26
+ GUMBO_TAG_HEADER,
27
+ GUMBO_TAG_FOOTER,
28
+ GUMBO_TAG_ADDRESS,
29
+ GUMBO_TAG_P,
30
+ GUMBO_TAG_HR,
31
+ GUMBO_TAG_PRE,
32
+ GUMBO_TAG_BLOCKQUOTE,
33
+ GUMBO_TAG_OL,
34
+ GUMBO_TAG_UL,
35
+ GUMBO_TAG_LI,
36
+ GUMBO_TAG_DL,
37
+ GUMBO_TAG_DT,
38
+ GUMBO_TAG_DD,
39
+ GUMBO_TAG_FIGURE,
40
+ GUMBO_TAG_FIGCAPTION,
41
+ GUMBO_TAG_MAIN,
42
+ GUMBO_TAG_DIV,
43
+ GUMBO_TAG_A,
44
+ GUMBO_TAG_EM,
45
+ GUMBO_TAG_STRONG,
46
+ GUMBO_TAG_SMALL,
47
+ GUMBO_TAG_S,
48
+ GUMBO_TAG_CITE,
49
+ GUMBO_TAG_Q,
50
+ GUMBO_TAG_DFN,
51
+ GUMBO_TAG_ABBR,
52
+ GUMBO_TAG_DATA,
53
+ GUMBO_TAG_TIME,
54
+ GUMBO_TAG_CODE,
55
+ GUMBO_TAG_VAR,
56
+ GUMBO_TAG_SAMP,
57
+ GUMBO_TAG_KBD,
58
+ GUMBO_TAG_SUB,
59
+ GUMBO_TAG_SUP,
60
+ GUMBO_TAG_I,
61
+ GUMBO_TAG_B,
62
+ GUMBO_TAG_U,
63
+ GUMBO_TAG_MARK,
64
+ GUMBO_TAG_RUBY,
65
+ GUMBO_TAG_RT,
66
+ GUMBO_TAG_RP,
67
+ GUMBO_TAG_BDI,
68
+ GUMBO_TAG_BDO,
69
+ GUMBO_TAG_SPAN,
70
+ GUMBO_TAG_BR,
71
+ GUMBO_TAG_WBR,
72
+ GUMBO_TAG_INS,
73
+ GUMBO_TAG_DEL,
74
+ GUMBO_TAG_IMAGE,
75
+ GUMBO_TAG_IMG,
76
+ GUMBO_TAG_IFRAME,
77
+ GUMBO_TAG_EMBED,
78
+ GUMBO_TAG_OBJECT,
79
+ GUMBO_TAG_PARAM,
80
+ GUMBO_TAG_VIDEO,
81
+ GUMBO_TAG_AUDIO,
82
+ GUMBO_TAG_SOURCE,
83
+ GUMBO_TAG_TRACK,
84
+ GUMBO_TAG_CANVAS,
85
+ GUMBO_TAG_MAP,
86
+ GUMBO_TAG_AREA,
87
+ GUMBO_TAG_MATH,
88
+ GUMBO_TAG_MI,
89
+ GUMBO_TAG_MO,
90
+ GUMBO_TAG_MN,
91
+ GUMBO_TAG_MS,
92
+ GUMBO_TAG_MTEXT,
93
+ GUMBO_TAG_MGLYPH,
94
+ GUMBO_TAG_MALIGNMARK,
95
+ GUMBO_TAG_ANNOTATION_XML,
96
+ GUMBO_TAG_SVG,
97
+ GUMBO_TAG_FOREIGNOBJECT,
98
+ GUMBO_TAG_DESC,
99
+ GUMBO_TAG_TABLE,
100
+ GUMBO_TAG_CAPTION,
101
+ GUMBO_TAG_COLGROUP,
102
+ GUMBO_TAG_COL,
103
+ GUMBO_TAG_TBODY,
104
+ GUMBO_TAG_THEAD,
105
+ GUMBO_TAG_TFOOT,
106
+ GUMBO_TAG_TR,
107
+ GUMBO_TAG_TD,
108
+ GUMBO_TAG_TH,
109
+ GUMBO_TAG_FORM,
110
+ GUMBO_TAG_FIELDSET,
111
+ GUMBO_TAG_LEGEND,
112
+ GUMBO_TAG_LABEL,
113
+ GUMBO_TAG_INPUT,
114
+ GUMBO_TAG_BUTTON,
115
+ GUMBO_TAG_SELECT,
116
+ GUMBO_TAG_DATALIST,
117
+ GUMBO_TAG_OPTGROUP,
118
+ GUMBO_TAG_OPTION,
119
+ GUMBO_TAG_TEXTAREA,
120
+ GUMBO_TAG_KEYGEN,
121
+ GUMBO_TAG_OUTPUT,
122
+ GUMBO_TAG_PROGRESS,
123
+ GUMBO_TAG_METER,
124
+ GUMBO_TAG_DETAILS,
125
+ GUMBO_TAG_SUMMARY,
126
+ GUMBO_TAG_MENU,
127
+ GUMBO_TAG_MENUITEM,
128
+ GUMBO_TAG_APPLET,
129
+ GUMBO_TAG_ACRONYM,
130
+ GUMBO_TAG_BGSOUND,
131
+ GUMBO_TAG_DIR,
132
+ GUMBO_TAG_FRAME,
133
+ GUMBO_TAG_FRAMESET,
134
+ GUMBO_TAG_NOFRAMES,
135
+ GUMBO_TAG_ISINDEX,
136
+ GUMBO_TAG_LISTING,
137
+ GUMBO_TAG_XMP,
138
+ GUMBO_TAG_NEXTID,
139
+ GUMBO_TAG_NOEMBED,
140
+ GUMBO_TAG_PLAINTEXT,
141
+ GUMBO_TAG_RB,
142
+ GUMBO_TAG_STRIKE,
143
+ GUMBO_TAG_BASEFONT,
144
+ GUMBO_TAG_BIG,
145
+ GUMBO_TAG_BLINK,
146
+ GUMBO_TAG_CENTER,
147
+ GUMBO_TAG_FONT,
148
+ GUMBO_TAG_MARQUEE,
149
+ GUMBO_TAG_MULTICOL,
150
+ GUMBO_TAG_NOBR,
151
+ GUMBO_TAG_SPACER,
152
+ GUMBO_TAG_TT,
153
+ GUMBO_TAG_RTC,
@@ -0,0 +1,105 @@
1
+ static unsigned int tag_hash(
2
+ register const char *str, register unsigned int len) {
3
+ static unsigned short asso_values[] = {296, 296, 296, 296, 296, 296, 296, 296,
4
+ 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296,
5
+ 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296,
6
+ 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 6, 4, 3, 1, 1, 0,
7
+ 1, 0, 0, 296, 296, 296, 296, 296, 296, 296, 22, 73, 151, 4, 13, 59, 65, 2,
8
+ 69, 0, 134, 9, 16, 52, 55, 28, 101, 0, 1, 6, 63, 126, 104, 93, 124, 296,
9
+ 296, 296, 296, 296, 296, 296, 22, 73, 151, 4, 13, 59, 65, 2, 69, 0, 134,
10
+ 9, 16, 52, 55, 28, 101, 0, 1, 6, 63, 126, 104, 93, 124, 296, 296, 296,
11
+ 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296,
12
+ 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296,
13
+ 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296,
14
+ 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296,
15
+ 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296,
16
+ 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296,
17
+ 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296,
18
+ 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296,
19
+ 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296};
20
+ register unsigned int hval = len;
21
+
22
+ switch (hval) {
23
+ default:
24
+ hval += asso_values[(unsigned char) str[1] + 3];
25
+ /*FALLTHROUGH*/
26
+ case 1:
27
+ hval += asso_values[(unsigned char) str[0]];
28
+ break;
29
+ }
30
+ return hval + asso_values[(unsigned char) str[len - 1]];
31
+ }
32
+
33
+ static const unsigned char kGumboTagMap[] = {GUMBO_TAG_LAST, GUMBO_TAG_LAST,
34
+ GUMBO_TAG_LAST, GUMBO_TAG_S, GUMBO_TAG_H6, GUMBO_TAG_H5, GUMBO_TAG_H4,
35
+ GUMBO_TAG_H3, GUMBO_TAG_SPACER, GUMBO_TAG_H2, GUMBO_TAG_HEADER,
36
+ GUMBO_TAG_H1, GUMBO_TAG_HEAD, GUMBO_TAG_LAST, GUMBO_TAG_DETAILS,
37
+ GUMBO_TAG_SELECT, GUMBO_TAG_DIR, GUMBO_TAG_LAST, GUMBO_TAG_DEL,
38
+ GUMBO_TAG_LAST, GUMBO_TAG_SOURCE, GUMBO_TAG_LEGEND, GUMBO_TAG_DATALIST,
39
+ GUMBO_TAG_METER, GUMBO_TAG_MGLYPH, GUMBO_TAG_LAST, GUMBO_TAG_MATH,
40
+ GUMBO_TAG_LABEL, GUMBO_TAG_TABLE, GUMBO_TAG_TEMPLATE, GUMBO_TAG_LAST,
41
+ GUMBO_TAG_RP, GUMBO_TAG_TIME, GUMBO_TAG_TITLE, GUMBO_TAG_DATA,
42
+ GUMBO_TAG_APPLET, GUMBO_TAG_HGROUP, GUMBO_TAG_SAMP, GUMBO_TAG_TEXTAREA,
43
+ GUMBO_TAG_ABBR, GUMBO_TAG_MARQUEE, GUMBO_TAG_LAST, GUMBO_TAG_MENUITEM,
44
+ GUMBO_TAG_SMALL, GUMBO_TAG_META, GUMBO_TAG_A, GUMBO_TAG_LAST,
45
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_EMBED,
46
+ GUMBO_TAG_MAP, GUMBO_TAG_LAST, GUMBO_TAG_PARAM, GUMBO_TAG_LAST,
47
+ GUMBO_TAG_LAST, GUMBO_TAG_NOBR, GUMBO_TAG_P, GUMBO_TAG_SPAN, GUMBO_TAG_EM,
48
+ GUMBO_TAG_LAST, GUMBO_TAG_NOFRAMES, GUMBO_TAG_SECTION, GUMBO_TAG_NOEMBED,
49
+ GUMBO_TAG_NEXTID, GUMBO_TAG_FOOTER, GUMBO_TAG_NOSCRIPT, GUMBO_TAG_HR,
50
+ GUMBO_TAG_LAST, GUMBO_TAG_FONT, GUMBO_TAG_DL, GUMBO_TAG_TR,
51
+ GUMBO_TAG_SCRIPT, GUMBO_TAG_MO, GUMBO_TAG_LAST, GUMBO_TAG_DD,
52
+ GUMBO_TAG_MAIN, GUMBO_TAG_TD, GUMBO_TAG_FOREIGNOBJECT, GUMBO_TAG_FORM,
53
+ GUMBO_TAG_OBJECT, GUMBO_TAG_LAST, GUMBO_TAG_FIELDSET, GUMBO_TAG_LAST,
54
+ GUMBO_TAG_BGSOUND, GUMBO_TAG_MENU, GUMBO_TAG_TFOOT, GUMBO_TAG_FIGURE,
55
+ GUMBO_TAG_RB, GUMBO_TAG_LI, GUMBO_TAG_LISTING, GUMBO_TAG_BASEFONT,
56
+ GUMBO_TAG_OPTGROUP, GUMBO_TAG_LAST, GUMBO_TAG_BASE, GUMBO_TAG_ADDRESS,
57
+ GUMBO_TAG_MI, GUMBO_TAG_LAST, GUMBO_TAG_PLAINTEXT, GUMBO_TAG_LAST,
58
+ GUMBO_TAG_PROGRESS, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
59
+ GUMBO_TAG_ACRONYM, GUMBO_TAG_ARTICLE, GUMBO_TAG_LAST, GUMBO_TAG_PRE,
60
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_AREA,
61
+ GUMBO_TAG_RT, GUMBO_TAG_LAST, GUMBO_TAG_OPTION, GUMBO_TAG_IMAGE,
62
+ GUMBO_TAG_DT, GUMBO_TAG_LAST, GUMBO_TAG_TT, GUMBO_TAG_HTML, GUMBO_TAG_WBR,
63
+ GUMBO_TAG_OL, GUMBO_TAG_LAST, GUMBO_TAG_STYLE, GUMBO_TAG_STRIKE,
64
+ GUMBO_TAG_SUP, GUMBO_TAG_MULTICOL, GUMBO_TAG_U, GUMBO_TAG_DFN, GUMBO_TAG_UL,
65
+ GUMBO_TAG_FIGCAPTION, GUMBO_TAG_MTEXT, GUMBO_TAG_LAST, GUMBO_TAG_VAR,
66
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_FRAMESET, GUMBO_TAG_LAST,
67
+ GUMBO_TAG_BR, GUMBO_TAG_I, GUMBO_TAG_FRAME, GUMBO_TAG_LAST, GUMBO_TAG_DIV,
68
+ GUMBO_TAG_LAST, GUMBO_TAG_TH, GUMBO_TAG_MS, GUMBO_TAG_ANNOTATION_XML,
69
+ GUMBO_TAG_B, GUMBO_TAG_TBODY, GUMBO_TAG_THEAD, GUMBO_TAG_BIG,
70
+ GUMBO_TAG_BLOCKQUOTE, GUMBO_TAG_XMP, GUMBO_TAG_LAST, GUMBO_TAG_KBD,
71
+ GUMBO_TAG_LAST, GUMBO_TAG_LINK, GUMBO_TAG_IFRAME, GUMBO_TAG_MARK,
72
+ GUMBO_TAG_CENTER, GUMBO_TAG_OUTPUT, GUMBO_TAG_DESC, GUMBO_TAG_CANVAS,
73
+ GUMBO_TAG_COL, GUMBO_TAG_MALIGNMARK, GUMBO_TAG_IMG, GUMBO_TAG_ASIDE,
74
+ GUMBO_TAG_LAST, GUMBO_TAG_CODE, GUMBO_TAG_LAST, GUMBO_TAG_SUB, GUMBO_TAG_MN,
75
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_INS, GUMBO_TAG_AUDIO,
76
+ GUMBO_TAG_STRONG, GUMBO_TAG_CITE, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
77
+ GUMBO_TAG_LAST, GUMBO_TAG_INPUT, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
78
+ GUMBO_TAG_LAST, GUMBO_TAG_NAV, GUMBO_TAG_LAST, GUMBO_TAG_COLGROUP,
79
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
80
+ GUMBO_TAG_LAST, GUMBO_TAG_SVG, GUMBO_TAG_KEYGEN, GUMBO_TAG_VIDEO,
81
+ GUMBO_TAG_BDO, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
82
+ GUMBO_TAG_LAST, GUMBO_TAG_BODY, GUMBO_TAG_LAST, GUMBO_TAG_Q, GUMBO_TAG_LAST,
83
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_TRACK,
84
+ GUMBO_TAG_LAST, GUMBO_TAG_BDI, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
85
+ GUMBO_TAG_LAST, GUMBO_TAG_CAPTION, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
86
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
87
+ GUMBO_TAG_RUBY, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_BUTTON,
88
+ GUMBO_TAG_SUMMARY, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
89
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
90
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
91
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
92
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
93
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
94
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
95
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
96
+ GUMBO_TAG_LAST, GUMBO_TAG_RTC, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
97
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
98
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_BLINK, GUMBO_TAG_LAST,
99
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
100
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
101
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
102
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
103
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
104
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
105
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_ISINDEX};
@@ -0,0 +1,4 @@
1
+ // Generated via `gentags.py src/tag.in`.
2
+ // Do not edit; edit src/tag.in instead.
3
+ // clang-format off
4
+ 4, 4, 5, 4, 4, 4, 5, 6, 8, 8, 4, 7, 7, 3, 5, 2, 2, 2, 2, 2, 2, 6, 6, 6, 7, 1, 2, 3, 10, 2, 2, 2, 2, 2, 2, 6, 10, 4, 3, 1, 2, 6, 5, 1, 4, 1, 3, 4, 4, 4, 4, 3, 4, 3, 3, 3, 1, 1, 1, 4, 4, 2, 2, 3, 3, 4, 2, 3, 3, 3, 5, 3, 6, 5, 6, 5, 5, 5, 6, 5, 6, 3, 4, 4, 2, 2, 2, 2, 5, 6, 10, 14, 3, 13, 4, 5, 7, 8, 3, 5, 5, 5, 2, 2, 2, 4, 8, 6, 5, 5, 6, 6, 8, 8, 6, 8, 6, 6, 8, 5, 7, 7, 4, 8, 6, 7, 7, 3, 5, 8, 8, 7, 7, 3, 6, 7, 9, 2, 6, 8, 3, 5, 6, 4, 7, 8, 4, 6, 2, 3,
@@ -0,0 +1,153 @@
1
+ // Generated via `gentags.py src/tag.in`.
2
+ // Do not edit; edit src/tag.in instead.
3
+ // clang-format off
4
+ "html",
5
+ "head",
6
+ "title",
7
+ "base",
8
+ "link",
9
+ "meta",
10
+ "style",
11
+ "script",
12
+ "noscript",
13
+ "template",
14
+ "body",
15
+ "article",
16
+ "section",
17
+ "nav",
18
+ "aside",
19
+ "h1",
20
+ "h2",
21
+ "h3",
22
+ "h4",
23
+ "h5",
24
+ "h6",
25
+ "hgroup",
26
+ "header",
27
+ "footer",
28
+ "address",
29
+ "p",
30
+ "hr",
31
+ "pre",
32
+ "blockquote",
33
+ "ol",
34
+ "ul",
35
+ "li",
36
+ "dl",
37
+ "dt",
38
+ "dd",
39
+ "figure",
40
+ "figcaption",
41
+ "main",
42
+ "div",
43
+ "a",
44
+ "em",
45
+ "strong",
46
+ "small",
47
+ "s",
48
+ "cite",
49
+ "q",
50
+ "dfn",
51
+ "abbr",
52
+ "data",
53
+ "time",
54
+ "code",
55
+ "var",
56
+ "samp",
57
+ "kbd",
58
+ "sub",
59
+ "sup",
60
+ "i",
61
+ "b",
62
+ "u",
63
+ "mark",
64
+ "ruby",
65
+ "rt",
66
+ "rp",
67
+ "bdi",
68
+ "bdo",
69
+ "span",
70
+ "br",
71
+ "wbr",
72
+ "ins",
73
+ "del",
74
+ "image",
75
+ "img",
76
+ "iframe",
77
+ "embed",
78
+ "object",
79
+ "param",
80
+ "video",
81
+ "audio",
82
+ "source",
83
+ "track",
84
+ "canvas",
85
+ "map",
86
+ "area",
87
+ "math",
88
+ "mi",
89
+ "mo",
90
+ "mn",
91
+ "ms",
92
+ "mtext",
93
+ "mglyph",
94
+ "malignmark",
95
+ "annotation-xml",
96
+ "svg",
97
+ "foreignobject",
98
+ "desc",
99
+ "table",
100
+ "caption",
101
+ "colgroup",
102
+ "col",
103
+ "tbody",
104
+ "thead",
105
+ "tfoot",
106
+ "tr",
107
+ "td",
108
+ "th",
109
+ "form",
110
+ "fieldset",
111
+ "legend",
112
+ "label",
113
+ "input",
114
+ "button",
115
+ "select",
116
+ "datalist",
117
+ "optgroup",
118
+ "option",
119
+ "textarea",
120
+ "keygen",
121
+ "output",
122
+ "progress",
123
+ "meter",
124
+ "details",
125
+ "summary",
126
+ "menu",
127
+ "menuitem",
128
+ "applet",
129
+ "acronym",
130
+ "bgsound",
131
+ "dir",
132
+ "frame",
133
+ "frameset",
134
+ "noframes",
135
+ "isindex",
136
+ "listing",
137
+ "xmp",
138
+ "nextid",
139
+ "noembed",
140
+ "plaintext",
141
+ "rb",
142
+ "strike",
143
+ "basefont",
144
+ "big",
145
+ "blink",
146
+ "center",
147
+ "font",
148
+ "marquee",
149
+ "multicol",
150
+ "nobr",
151
+ "spacer",
152
+ "tt",
153
+ "rtc",
@@ -42,7 +42,6 @@
42
42
  // prevents parse error position from being messed up by possible mark/resets in
43
43
  // temporary buffer manipulation.
44
44
 
45
-
46
45
  #include "tokenizer.h"
47
46
 
48
47
  #include <assert.h>
@@ -64,13 +63,13 @@
64
63
 
65
64
  // Compared against _script_data_buffer to determine if we're in double-escaped
66
65
  // script mode.
67
- const GumboStringPiece kScriptTag = { "script", 6 };
66
+ const GumboStringPiece kScriptTag = {"script", 6};
68
67
 
69
68
  // An enum for the return value of each individual state.
70
69
  typedef enum {
71
- RETURN_ERROR, // Return false (error) from the tokenizer.
72
- RETURN_SUCCESS, // Return true (success) from the tokenizer.
73
- NEXT_CHAR // Proceed to the next character and continue lexing.
70
+ RETURN_ERROR, // Return false (error) from the tokenizer.
71
+ RETURN_SUCCESS, // Return true (success) from the tokenizer.
72
+ NEXT_CHAR // Proceed to the next character and continue lexing.
74
73
  } StateResult;
75
74
 
76
75
  // This is a struct containing state necessary to build up a tag token,
@@ -200,7 +199,8 @@ typedef struct GumboInternalTokenizerState {
200
199
  } GumboTokenizerState;
201
200
 
202
201
  // Adds an ERR_UNEXPECTED_CODE_POINT parse error to the parser's error struct.
203
- static void tokenizer_add_parse_error(GumboParser* parser, GumboErrorType type) {
202
+ static void tokenizer_add_parse_error(
203
+ GumboParser* parser, GumboErrorType type) {
204
204
  GumboError* error = gumbo_add_error(parser);
205
205
  if (!error) {
206
206
  return;
@@ -356,12 +356,10 @@ static void clear_temporary_buffer(GumboParser* parser) {
356
356
  GumboTokenizerState* tokenizer = parser->_tokenizer_state;
357
357
  assert(!tokenizer->_temporary_buffer_emit);
358
358
  utf8iterator_mark(&tokenizer->_input);
359
- gumbo_string_buffer_destroy(parser, &tokenizer->_temporary_buffer);
360
- gumbo_string_buffer_init(parser, &tokenizer->_temporary_buffer);
359
+ gumbo_string_buffer_clear(parser, &tokenizer->_temporary_buffer);
361
360
  // The temporary buffer and script data buffer are the same object in the
362
361
  // spec, so the script data buffer should be cleared as well.
363
- gumbo_string_buffer_destroy(parser, &tokenizer->_script_data_buffer);
364
- gumbo_string_buffer_init(parser, &tokenizer->_script_data_buffer);
362
+ gumbo_string_buffer_clear(parser, &tokenizer->_script_data_buffer);
365
363
  }
366
364
 
367
365
  // Appends a codepoint to the temporary buffer.
@@ -374,15 +372,14 @@ static void append_char_to_temporary_buffer(
374
372
  // Checks to see if the temporary buffer equals a certain string.
375
373
  // Make sure this remains side-effect free; it's used in assertions.
376
374
  #ifndef NDEBUG
377
- static bool temporary_buffer_equals(
378
- GumboParser* parser, const char* text) {
375
+ static bool temporary_buffer_equals(GumboParser* parser, const char* text) {
379
376
  GumboStringBuffer* buffer = &parser->_tokenizer_state->_temporary_buffer;
380
377
  // TODO(jdtang): See if the extra strlen is a performance problem, and replace
381
378
  // it with an explicit sizeof(literal) if necessary. I don't think it will
382
379
  // be, as this is only used in a couple of rare states.
383
380
  int text_len = strlen(text);
384
381
  return text_len == buffer->length &&
385
- memcmp(buffer->data, text, text_len) == 0;
382
+ memcmp(buffer->data, text, text_len) == 0;
386
383
  }
387
384
  #endif
388
385
 
@@ -539,8 +536,8 @@ static StateResult emit_current_tag(GumboParser* parser, GumboToken* output) {
539
536
  output->v.start_tag.is_self_closing = tag_state->_is_self_closing;
540
537
  tag_state->_last_start_tag = tag_state->_tag;
541
538
  mark_tag_state_as_empty(tag_state);
542
- gumbo_debug("Emitted start tag %s.\n",
543
- gumbo_normalized_tagname(tag_state->_tag));
539
+ gumbo_debug(
540
+ "Emitted start tag %s.\n", gumbo_normalized_tagname(tag_state->_tag));
544
541
  } else {
545
542
  output->type = GUMBO_TOKEN_END_TAG;
546
543
  output->v.end_tag = tag_state->_tag;
@@ -548,17 +545,18 @@ static StateResult emit_current_tag(GumboParser* parser, GumboToken* output) {
548
545
  // token, but it's still initialized as normal, so it must be manually
549
546
  // deallocated. There may also be attributes to destroy, in certain broken
550
547
  // cases like </div</th> (the "th" is an attribute there).
551
- for (int i = 0; i < tag_state->_attributes.length; ++i) {
548
+ for (unsigned int i = 0; i < tag_state->_attributes.length; ++i) {
552
549
  gumbo_destroy_attribute(parser, tag_state->_attributes.data[i]);
553
550
  }
554
551
  gumbo_parser_deallocate(parser, tag_state->_attributes.data);
555
552
  mark_tag_state_as_empty(tag_state);
556
- gumbo_debug("Emitted end tag %s.\n",
557
- gumbo_normalized_tagname(tag_state->_tag));
553
+ gumbo_debug(
554
+ "Emitted end tag %s.\n", gumbo_normalized_tagname(tag_state->_tag));
558
555
  }
559
556
  gumbo_string_buffer_destroy(parser, &tag_state->_buffer);
560
557
  finish_token(parser, output);
561
- gumbo_debug("Original text = %.*s.\n", output->original_text.length, output->original_text.data);
558
+ gumbo_debug("Original text = %.*s.\n", output->original_text.length,
559
+ output->original_text.data);
562
560
  assert(output->original_text.length >= 2);
563
561
  assert(output->original_text.data[0] == '<');
564
562
  assert(output->original_text.data[output->original_text.length - 1] == '>');
@@ -571,7 +569,7 @@ static StateResult emit_current_tag(GumboParser* parser, GumboToken* output) {
571
569
  // avoid a memory leak.
572
570
  static void abandon_current_tag(GumboParser* parser) {
573
571
  GumboTagState* tag_state = &parser->_tokenizer_state->_tag_state;
574
- for (int i = 0; i < tag_state->_attributes.length; ++i) {
572
+ for (unsigned int i = 0; i < tag_state->_attributes.length; ++i) {
575
573
  gumbo_destroy_attribute(parser, tag_state->_attributes.data[i]);
576
574
  }
577
575
  gumbo_parser_deallocate(parser, tag_state->_attributes.data);
@@ -583,9 +581,8 @@ static void abandon_current_tag(GumboParser* parser) {
583
581
  // Wraps the consume_char_ref function to handle its output and make the
584
582
  // appropriate TokenizerState modifications. Returns RETURN_ERROR if a parse
585
583
  // error occurred, RETURN_SUCCESS otherwise.
586
- static StateResult emit_char_ref(
587
- GumboParser* parser, int additional_allowed_char,
588
- bool is_in_attribute, GumboToken* output) {
584
+ static StateResult emit_char_ref(GumboParser* parser,
585
+ int additional_allowed_char, bool is_in_attribute, GumboToken* output) {
589
586
  GumboTokenizerState* tokenizer = parser->_tokenizer_state;
590
587
  OneOrTwoCodepoints char_ref;
591
588
  bool status = consume_char_ref(
@@ -649,8 +646,7 @@ static bool maybe_emit_from_temporary_buffer(
649
646
  // _temporary_buffer_emit, and then (if the temporary buffer is non-empty) emits
650
647
  // the first character in it. It returns true if a character was emitted, false
651
648
  // otherwise.
652
- static bool emit_temporary_buffer(
653
- GumboParser* parser, GumboToken* output) {
649
+ static bool emit_temporary_buffer(GumboParser* parser, GumboToken* output) {
654
650
  GumboTokenizerState* tokenizer = parser->_tokenizer_state;
655
651
  assert(tokenizer->_temporary_buffer.data);
656
652
  utf8iterator_reset(&tokenizer->_input);
@@ -663,8 +659,8 @@ static bool emit_temporary_buffer(
663
659
  // start point; the only time you would *not* want to pass true for this
664
660
  // parameter is if you want the original_text to include character (like an
665
661
  // opening quote) that doesn't appear in the value.
666
- static void append_char_to_tag_buffer(GumboParser* parser, int codepoint,
667
- bool reinitilize_position_on_first) {
662
+ static void append_char_to_tag_buffer(
663
+ GumboParser* parser, int codepoint, bool reinitilize_position_on_first) {
668
664
  GumboStringBuffer* buffer = &parser->_tokenizer_state->_tag_state._buffer;
669
665
  if (buffer->length == 0 && reinitilize_position_on_first) {
670
666
  reset_tag_buffer_start_point(parser);
@@ -697,7 +693,11 @@ static void start_new_tag(GumboParser* parser, bool is_start_tag) {
697
693
  gumbo_string_buffer_append_codepoint(parser, c, &tag_state->_buffer);
698
694
 
699
695
  assert(tag_state->_attributes.data == NULL);
700
- gumbo_vector_init(parser, 4, &tag_state->_attributes);
696
+ // Initial size chosen by statistical analysis of a corpus of 60k webpages.
697
+ // 99.5% of elements have 0 attributes, 93% of the remainder have 1. These
698
+ // numbers are a bit higher for more modern websites (eg. ~45% = 0, ~40% = 1
699
+ // for the HTML5 Spec), but still have basically 99% of nodes with <= 2 attrs.
700
+ gumbo_vector_init(parser, 1, &tag_state->_attributes);
701
701
  tag_state->_drop_next_attr_value = false;
702
702
  tag_state->_is_start_tag = is_start_tag;
703
703
  tag_state->_is_self_closing = false;
@@ -717,16 +717,15 @@ static void copy_over_tag_buffer(GumboParser* parser, const char** output) {
717
717
  // * The start_pos GumboSourcePosition with the start position of the tag
718
718
  // buffer.
719
719
  // * The end_pos GumboSourcePosition with the current source position.
720
- static void copy_over_original_tag_text(
721
- GumboParser* parser, GumboStringPiece* original_text,
722
- GumboSourcePosition* start_pos, GumboSourcePosition* end_pos) {
720
+ static void copy_over_original_tag_text(GumboParser* parser,
721
+ GumboStringPiece* original_text, GumboSourcePosition* start_pos,
722
+ GumboSourcePosition* end_pos) {
723
723
  GumboTokenizerState* tokenizer = parser->_tokenizer_state;
724
724
  GumboTagState* tag_state = &tokenizer->_tag_state;
725
725
 
726
726
  original_text->data = tag_state->_original_text;
727
- original_text->length =
728
- utf8iterator_get_char_pointer(&tokenizer->_input) -
729
- tag_state->_original_text;
727
+ original_text->length = utf8iterator_get_char_pointer(&tokenizer->_input) -
728
+ tag_state->_original_text;
730
729
  if (original_text->data[original_text->length - 1] == '\r') {
731
730
  // Since \r is skipped by the UTF-8 iterator, it can sometimes end up
732
731
  // appended to the end of original text even when it's really the first part
@@ -751,16 +750,14 @@ static void finish_tag_name(GumboParser* parser) {
751
750
  GumboTokenizerState* tokenizer = parser->_tokenizer_state;
752
751
  GumboTagState* tag_state = &tokenizer->_tag_state;
753
752
 
754
- const char* temp;
755
- copy_over_tag_buffer(parser, &temp);
756
- tag_state->_tag = gumbo_tag_enum(temp);
753
+ tag_state->_tag =
754
+ gumbo_tagn_enum(tag_state->_buffer.data, tag_state->_buffer.length);
757
755
  reinitialize_tag_buffer(parser);
758
- gumbo_parser_deallocate(parser, (void*) temp);
759
756
  }
760
757
 
761
758
  // Adds an ERR_DUPLICATE_ATTR parse error to the parser's error struct.
762
759
  static void add_duplicate_attr_error(GumboParser* parser, const char* attr_name,
763
- int original_index, int new_index) {
760
+ int original_index, int new_index) {
764
761
  GumboError* error = gumbo_add_error(parser);
765
762
  if (!error) {
766
763
  return;
@@ -790,14 +787,13 @@ static bool finish_attribute_name(GumboParser* parser) {
790
787
  assert(tag_state->_attributes.capacity);
791
788
 
792
789
  GumboVector* /* GumboAttribute* */ attributes = &tag_state->_attributes;
793
- for (int i = 0; i < attributes->length; ++i) {
790
+ for (unsigned int i = 0; i < attributes->length; ++i) {
794
791
  GumboAttribute* attr = attributes->data[i];
795
792
  if (strlen(attr->name) == tag_state->_buffer.length &&
796
793
  memcmp(attr->name, tag_state->_buffer.data,
797
- tag_state->_buffer.length) == 0) {
794
+ tag_state->_buffer.length) == 0) {
798
795
  // Identical attribute; bail.
799
- add_duplicate_attr_error(
800
- parser, attr->name, i, attributes->length);
796
+ add_duplicate_attr_error(parser, attr->name, i, attributes->length);
801
797
  tag_state->_drop_next_attr_value = true;
802
798
  return false;
803
799
  }
@@ -806,11 +802,11 @@ static bool finish_attribute_name(GumboParser* parser) {
806
802
  GumboAttribute* attr = gumbo_parser_allocate(parser, sizeof(GumboAttribute));
807
803
  attr->attr_namespace = GUMBO_ATTR_NAMESPACE_NONE;
808
804
  copy_over_tag_buffer(parser, &attr->name);
809
- copy_over_original_tag_text(parser, &attr->original_name,
810
- &attr->name_start, &attr->name_end);
805
+ copy_over_original_tag_text(
806
+ parser, &attr->original_name, &attr->name_start, &attr->name_end);
811
807
  attr->value = gumbo_copy_stringz(parser, "");
812
- copy_over_original_tag_text(parser, &attr->original_value,
813
- &attr->name_start, &attr->name_end);
808
+ copy_over_original_tag_text(
809
+ parser, &attr->original_value, &attr->name_start, &attr->name_end);
814
810
  gumbo_vector_add(parser, attr, attributes);
815
811
  reinitialize_tag_buffer(parser);
816
812
  return true;
@@ -832,8 +828,8 @@ static void finish_attribute_value(GumboParser* parser) {
832
828
  tag_state->_attributes.data[tag_state->_attributes.length - 1];
833
829
  gumbo_parser_deallocate(parser, (void*) attr->value);
834
830
  copy_over_tag_buffer(parser, &attr->value);
835
- copy_over_original_tag_text(parser, &attr->original_value,
836
- &attr->value_start, &attr->value_end);
831
+ copy_over_original_tag_text(
832
+ parser, &attr->original_value, &attr->value_start, &attr->value_end);
837
833
  reinitialize_tag_buffer(parser);
838
834
  }
839
835
 
@@ -841,13 +837,9 @@ static void finish_attribute_value(GumboParser* parser) {
841
837
  static bool is_appropriate_end_tag(GumboParser* parser) {
842
838
  GumboTagState* tag_state = &parser->_tokenizer_state->_tag_state;
843
839
  assert(!tag_state->_is_start_tag);
844
- // Null terminate the current string buffer, so it can be passed to
845
- // gumbo_tag_enum, but don't increment the length in case we need to dump the
846
- // buffer as character tokens.
847
- gumbo_string_buffer_append_codepoint(parser, '\0', &tag_state->_buffer);
848
- --tag_state->_buffer.length;
849
840
  return tag_state->_last_start_tag != GUMBO_TAG_LAST &&
850
- tag_state->_last_start_tag == gumbo_tag_enum(tag_state->_buffer.data);
841
+ tag_state->_last_start_tag == gumbo_tagn_enum(tag_state->_buffer.data,
842
+ tag_state->_buffer.length);
851
843
  }
852
844
 
853
845
  void gumbo_tokenizer_state_init(
@@ -892,15 +884,14 @@ void gumbo_tokenizer_set_is_current_node_foreign(
892
884
  GumboParser* parser, bool is_foreign) {
893
885
  if (is_foreign != parser->_tokenizer_state->_is_current_node_foreign) {
894
886
  gumbo_debug("Toggling is_current_node_foreign to %s.\n",
895
- is_foreign ? "true" : "false");
887
+ is_foreign ? "true" : "false");
896
888
  }
897
889
  parser->_tokenizer_state->_is_current_node_foreign = is_foreign;
898
890
  }
899
891
 
900
892
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#data-state
901
- static StateResult handle_data_state(
902
- GumboParser* parser, GumboTokenizerState* tokenizer,
903
- int c, GumboToken* output) {
893
+ static StateResult handle_data_state(GumboParser* parser,
894
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
904
895
  switch (c) {
905
896
  case '&':
906
897
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_CHAR_REF_IN_DATA);
@@ -924,17 +915,15 @@ static StateResult handle_data_state(
924
915
  }
925
916
 
926
917
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#character-reference-in-data-state
927
- static StateResult handle_char_ref_in_data_state(
928
- GumboParser* parser, GumboTokenizerState* tokenizer,
929
- int c, GumboToken* output) {
918
+ static StateResult handle_char_ref_in_data_state(GumboParser* parser,
919
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
930
920
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
931
921
  return emit_char_ref(parser, ' ', false, output);
932
922
  }
933
923
 
934
924
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#rcdata-state
935
- static StateResult handle_rcdata_state(
936
- GumboParser* parser, GumboTokenizerState* tokenizer,
937
- int c, GumboToken* output) {
925
+ static StateResult handle_rcdata_state(GumboParser* parser,
926
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
938
927
  switch (c) {
939
928
  case '&':
940
929
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_CHAR_REF_IN_RCDATA);
@@ -955,17 +944,15 @@ static StateResult handle_rcdata_state(
955
944
  }
956
945
 
957
946
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#character-reference-in-rcdata-state
958
- static StateResult handle_char_ref_in_rcdata_state(
959
- GumboParser* parser, GumboTokenizerState* tokenizer,
960
- int c, GumboToken* output) {
947
+ static StateResult handle_char_ref_in_rcdata_state(GumboParser* parser,
948
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
961
949
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_RCDATA);
962
950
  return emit_char_ref(parser, ' ', false, output);
963
951
  }
964
952
 
965
953
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#rawtext-state
966
- static StateResult handle_rawtext_state(
967
- GumboParser* parser, GumboTokenizerState* tokenizer,
968
- int c, GumboToken* output) {
954
+ static StateResult handle_rawtext_state(GumboParser* parser,
955
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
969
956
  switch (c) {
970
957
  case '<':
971
958
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_RAWTEXT_LT);
@@ -982,9 +969,8 @@ static StateResult handle_rawtext_state(
982
969
  }
983
970
 
984
971
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-state
985
- static StateResult handle_script_state(
986
- GumboParser* parser, GumboTokenizerState* tokenizer,
987
- int c, GumboToken* output) {
972
+ static StateResult handle_script_state(GumboParser* parser,
973
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
988
974
  switch (c) {
989
975
  case '<':
990
976
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_LT);
@@ -1001,9 +987,8 @@ static StateResult handle_script_state(
1001
987
  }
1002
988
 
1003
989
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#plaintext-state
1004
- static StateResult handle_plaintext_state(
1005
- GumboParser* parser, GumboTokenizerState* tokenizer,
1006
- int c, GumboToken* output) {
990
+ static StateResult handle_plaintext_state(GumboParser* parser,
991
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1007
992
  switch (c) {
1008
993
  case '\0':
1009
994
  return emit_replacement_char(parser, output);
@@ -1015,9 +1000,8 @@ static StateResult handle_plaintext_state(
1015
1000
  }
1016
1001
 
1017
1002
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#tag-open-state
1018
- static StateResult handle_tag_open_state(
1019
- GumboParser* parser, GumboTokenizerState* tokenizer,
1020
- int c, GumboToken* output) {
1003
+ static StateResult handle_tag_open_state(GumboParser* parser,
1004
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1021
1005
  assert(temporary_buffer_equals(parser, "<"));
1022
1006
  switch (c) {
1023
1007
  case '!':
@@ -1049,9 +1033,8 @@ static StateResult handle_tag_open_state(
1049
1033
  }
1050
1034
 
1051
1035
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#end-tag-open-state
1052
- static StateResult handle_end_tag_open_state(
1053
- GumboParser* parser, GumboTokenizerState* tokenizer,
1054
- int c, GumboToken* output) {
1036
+ static StateResult handle_end_tag_open_state(GumboParser* parser,
1037
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1055
1038
  assert(temporary_buffer_equals(parser, "</"));
1056
1039
  switch (c) {
1057
1040
  case '>':
@@ -1077,9 +1060,8 @@ static StateResult handle_end_tag_open_state(
1077
1060
  }
1078
1061
 
1079
1062
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#tag-name-state
1080
- static StateResult handle_tag_name_state(
1081
- GumboParser* parser, GumboTokenizerState* tokenizer,
1082
- int c, GumboToken* output) {
1063
+ static StateResult handle_tag_name_state(GumboParser* parser,
1064
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1083
1065
  switch (c) {
1084
1066
  case '\t':
1085
1067
  case '\n':
@@ -1112,9 +1094,8 @@ static StateResult handle_tag_name_state(
1112
1094
  }
1113
1095
 
1114
1096
  // http://www.whatwg.org/specs/web-apps/current-work/complete.html#rcdata-less-than-sign-state
1115
- static StateResult handle_rcdata_lt_state(
1116
- GumboParser* parser, GumboTokenizerState* tokenizer,
1117
- int c, GumboToken* output) {
1097
+ static StateResult handle_rcdata_lt_state(GumboParser* parser,
1098
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1118
1099
  assert(temporary_buffer_equals(parser, "<"));
1119
1100
  if (c == '/') {
1120
1101
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_RCDATA_END_TAG_OPEN);
@@ -1128,9 +1109,8 @@ static StateResult handle_rcdata_lt_state(
1128
1109
  }
1129
1110
 
1130
1111
  // http://www.whatwg.org/specs/web-apps/current-work/complete.html#rcdata-end-tag-open-state
1131
- static StateResult handle_rcdata_end_tag_open_state(
1132
- GumboParser* parser, GumboTokenizerState* tokenizer,
1133
- int c, GumboToken* output) {
1112
+ static StateResult handle_rcdata_end_tag_open_state(GumboParser* parser,
1113
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1134
1114
  assert(temporary_buffer_equals(parser, "</"));
1135
1115
  if (is_alpha(c)) {
1136
1116
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_RCDATA_END_TAG_NAME);
@@ -1145,9 +1125,8 @@ static StateResult handle_rcdata_end_tag_open_state(
1145
1125
  }
1146
1126
 
1147
1127
  // http://www.whatwg.org/specs/web-apps/current-work/complete.html#rcdata-end-tag-name-state
1148
- static StateResult handle_rcdata_end_tag_name_state(
1149
- GumboParser* parser, GumboTokenizerState* tokenizer,
1150
- int c, GumboToken* output) {
1128
+ static StateResult handle_rcdata_end_tag_name_state(GumboParser* parser,
1129
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1151
1130
  assert(tokenizer->_temporary_buffer.length >= 2);
1152
1131
  if (is_alpha(c)) {
1153
1132
  append_char_to_tag_buffer(parser, ensure_lowercase(c), true);
@@ -1178,9 +1157,8 @@ static StateResult handle_rcdata_end_tag_name_state(
1178
1157
  }
1179
1158
 
1180
1159
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#rawtext-less-than-sign-state
1181
- static StateResult handle_rawtext_lt_state(
1182
- GumboParser* parser, GumboTokenizerState* tokenizer,
1183
- int c, GumboToken* output) {
1160
+ static StateResult handle_rawtext_lt_state(GumboParser* parser,
1161
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1184
1162
  assert(temporary_buffer_equals(parser, "<"));
1185
1163
  if (c == '/') {
1186
1164
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_RAWTEXT_END_TAG_OPEN);
@@ -1194,9 +1172,8 @@ static StateResult handle_rawtext_lt_state(
1194
1172
  }
1195
1173
 
1196
1174
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#rawtext-end-tag-open-state
1197
- static StateResult handle_rawtext_end_tag_open_state(
1198
- GumboParser* parser, GumboTokenizerState* tokenizer,
1199
- int c, GumboToken* output) {
1175
+ static StateResult handle_rawtext_end_tag_open_state(GumboParser* parser,
1176
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1200
1177
  assert(temporary_buffer_equals(parser, "</"));
1201
1178
  if (is_alpha(c)) {
1202
1179
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_RAWTEXT_END_TAG_NAME);
@@ -1210,12 +1187,11 @@ static StateResult handle_rawtext_end_tag_open_state(
1210
1187
  }
1211
1188
 
1212
1189
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#rawtext-end-tag-name-state
1213
- static StateResult handle_rawtext_end_tag_name_state(
1214
- GumboParser* parser, GumboTokenizerState* tokenizer,
1215
- int c, GumboToken* output) {
1190
+ static StateResult handle_rawtext_end_tag_name_state(GumboParser* parser,
1191
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1216
1192
  assert(tokenizer->_temporary_buffer.length >= 2);
1217
1193
  gumbo_debug("Last end tag: %*s\n", (int) tokenizer->_tag_state._buffer.length,
1218
- tokenizer->_tag_state._buffer.data);
1194
+ tokenizer->_tag_state._buffer.data);
1219
1195
  if (is_alpha(c)) {
1220
1196
  append_char_to_tag_buffer(parser, ensure_lowercase(c), true);
1221
1197
  append_char_to_temporary_buffer(parser, c);
@@ -1246,9 +1222,8 @@ static StateResult handle_rawtext_end_tag_name_state(
1246
1222
  }
1247
1223
 
1248
1224
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-less-than-sign-state
1249
- static StateResult handle_script_lt_state(
1250
- GumboParser* parser, GumboTokenizerState* tokenizer,
1251
- int c, GumboToken* output) {
1225
+ static StateResult handle_script_lt_state(GumboParser* parser,
1226
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1252
1227
  assert(temporary_buffer_equals(parser, "<"));
1253
1228
  if (c == '/') {
1254
1229
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_END_TAG_OPEN);
@@ -1266,9 +1241,8 @@ static StateResult handle_script_lt_state(
1266
1241
  }
1267
1242
 
1268
1243
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-end-tag-open-state
1269
- static StateResult handle_script_end_tag_open_state(
1270
- GumboParser* parser, GumboTokenizerState* tokenizer,
1271
- int c, GumboToken* output) {
1244
+ static StateResult handle_script_end_tag_open_state(GumboParser* parser,
1245
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1272
1246
  assert(temporary_buffer_equals(parser, "</"));
1273
1247
  if (is_alpha(c)) {
1274
1248
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_END_TAG_NAME);
@@ -1282,9 +1256,8 @@ static StateResult handle_script_end_tag_open_state(
1282
1256
  }
1283
1257
 
1284
1258
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-end-tag-name-state
1285
- static StateResult handle_script_end_tag_name_state(
1286
- GumboParser* parser, GumboTokenizerState* tokenizer,
1287
- int c, GumboToken* output) {
1259
+ static StateResult handle_script_end_tag_name_state(GumboParser* parser,
1260
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1288
1261
  assert(tokenizer->_temporary_buffer.length >= 2);
1289
1262
  if (is_alpha(c)) {
1290
1263
  append_char_to_tag_buffer(parser, ensure_lowercase(c), true);
@@ -1315,9 +1288,8 @@ static StateResult handle_script_end_tag_name_state(
1315
1288
  }
1316
1289
 
1317
1290
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-escape-start-state
1318
- static StateResult handle_script_escaped_start_state(
1319
- GumboParser* parser, GumboTokenizerState* tokenizer,
1320
- int c, GumboToken* output) {
1291
+ static StateResult handle_script_escaped_start_state(GumboParser* parser,
1292
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1321
1293
  if (c == '-') {
1322
1294
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_ESCAPED_START_DASH);
1323
1295
  return emit_current_char(parser, output);
@@ -1329,9 +1301,8 @@ static StateResult handle_script_escaped_start_state(
1329
1301
  }
1330
1302
 
1331
1303
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-escape-start-dash-state
1332
- static StateResult handle_script_escaped_start_dash_state(
1333
- GumboParser* parser, GumboTokenizerState* tokenizer,
1334
- int c, GumboToken* output) {
1304
+ static StateResult handle_script_escaped_start_dash_state(GumboParser* parser,
1305
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1335
1306
  if (c == '-') {
1336
1307
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_ESCAPED_DASH_DASH);
1337
1308
  return emit_current_char(parser, output);
@@ -1343,9 +1314,8 @@ static StateResult handle_script_escaped_start_dash_state(
1343
1314
  }
1344
1315
 
1345
1316
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-escaped-state
1346
- static StateResult handle_script_escaped_state(
1347
- GumboParser* parser, GumboTokenizerState* tokenizer,
1348
- int c, GumboToken* output) {
1317
+ static StateResult handle_script_escaped_state(GumboParser* parser,
1318
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1349
1319
  switch (c) {
1350
1320
  case '-':
1351
1321
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_ESCAPED_DASH);
@@ -1366,9 +1336,8 @@ static StateResult handle_script_escaped_state(
1366
1336
  }
1367
1337
 
1368
1338
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-escaped-dash-state
1369
- static StateResult handle_script_escaped_dash_state(
1370
- GumboParser* parser, GumboTokenizerState* tokenizer,
1371
- int c, GumboToken* output) {
1339
+ static StateResult handle_script_escaped_dash_state(GumboParser* parser,
1340
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1372
1341
  switch (c) {
1373
1342
  case '-':
1374
1343
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_ESCAPED_DASH_DASH);
@@ -1392,9 +1361,8 @@ static StateResult handle_script_escaped_dash_state(
1392
1361
  }
1393
1362
 
1394
1363
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-escaped-dash-dash-state
1395
- static StateResult handle_script_escaped_dash_dash_state(
1396
- GumboParser* parser, GumboTokenizerState* tokenizer,
1397
- int c, GumboToken* output) {
1364
+ static StateResult handle_script_escaped_dash_dash_state(GumboParser* parser,
1365
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1398
1366
  switch (c) {
1399
1367
  case '-':
1400
1368
  return emit_current_char(parser, output);
@@ -1420,9 +1388,8 @@ static StateResult handle_script_escaped_dash_dash_state(
1420
1388
  }
1421
1389
 
1422
1390
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-escaped-less-than-sign-state
1423
- static StateResult handle_script_escaped_lt_state(
1424
- GumboParser* parser, GumboTokenizerState* tokenizer,
1425
- int c, GumboToken* output) {
1391
+ static StateResult handle_script_escaped_lt_state(GumboParser* parser,
1392
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1426
1393
  assert(temporary_buffer_equals(parser, "<"));
1427
1394
  assert(!tokenizer->_script_data_buffer.length);
1428
1395
  if (c == '/') {
@@ -1442,9 +1409,8 @@ static StateResult handle_script_escaped_lt_state(
1442
1409
  }
1443
1410
 
1444
1411
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-escaped-end-tag-open-state
1445
- static StateResult handle_script_escaped_end_tag_open_state(
1446
- GumboParser* parser, GumboTokenizerState* tokenizer,
1447
- int c, GumboToken* output) {
1412
+ static StateResult handle_script_escaped_end_tag_open_state(GumboParser* parser,
1413
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1448
1414
  assert(temporary_buffer_equals(parser, "</"));
1449
1415
  if (is_alpha(c)) {
1450
1416
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_ESCAPED_END_TAG_NAME);
@@ -1458,9 +1424,8 @@ static StateResult handle_script_escaped_end_tag_open_state(
1458
1424
  }
1459
1425
 
1460
1426
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-escaped-end-tag-name-state
1461
- static StateResult handle_script_escaped_end_tag_name_state(
1462
- GumboParser* parser, GumboTokenizerState* tokenizer,
1463
- int c, GumboToken* output) {
1427
+ static StateResult handle_script_escaped_end_tag_name_state(GumboParser* parser,
1428
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1464
1429
  assert(tokenizer->_temporary_buffer.length >= 2);
1465
1430
  if (is_alpha(c)) {
1466
1431
  append_char_to_tag_buffer(parser, ensure_lowercase(c), true);
@@ -1491,9 +1456,8 @@ static StateResult handle_script_escaped_end_tag_name_state(
1491
1456
  }
1492
1457
 
1493
1458
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-double-escape-start-state
1494
- static StateResult handle_script_double_escaped_start_state(
1495
- GumboParser* parser, GumboTokenizerState* tokenizer,
1496
- int c, GumboToken* output) {
1459
+ static StateResult handle_script_double_escaped_start_state(GumboParser* parser,
1460
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1497
1461
  switch (c) {
1498
1462
  case '\t':
1499
1463
  case '\n':
@@ -1501,9 +1465,11 @@ static StateResult handle_script_double_escaped_start_state(
1501
1465
  case ' ':
1502
1466
  case '/':
1503
1467
  case '>':
1504
- gumbo_tokenizer_set_state(parser, gumbo_string_equals(
1505
- &kScriptTag, (GumboStringPiece*) &tokenizer->_script_data_buffer)
1506
- ? GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED : GUMBO_LEX_SCRIPT_ESCAPED);
1468
+ gumbo_tokenizer_set_state(
1469
+ parser, gumbo_string_equals(&kScriptTag,
1470
+ (GumboStringPiece*) &tokenizer->_script_data_buffer)
1471
+ ? GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED
1472
+ : GUMBO_LEX_SCRIPT_ESCAPED);
1507
1473
  return emit_current_char(parser, output);
1508
1474
  default:
1509
1475
  if (is_alpha(c)) {
@@ -1519,9 +1485,8 @@ static StateResult handle_script_double_escaped_start_state(
1519
1485
  }
1520
1486
 
1521
1487
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-double-escaped-state
1522
- static StateResult handle_script_double_escaped_state(
1523
- GumboParser* parser, GumboTokenizerState* tokenizer,
1524
- int c, GumboToken* output) {
1488
+ static StateResult handle_script_double_escaped_state(GumboParser* parser,
1489
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1525
1490
  switch (c) {
1526
1491
  case '-':
1527
1492
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED_DASH);
@@ -1541,9 +1506,8 @@ static StateResult handle_script_double_escaped_state(
1541
1506
  }
1542
1507
 
1543
1508
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-double-escaped-dash-state
1544
- static StateResult handle_script_double_escaped_dash_state(
1545
- GumboParser* parser, GumboTokenizerState* tokenizer,
1546
- int c, GumboToken* output) {
1509
+ static StateResult handle_script_double_escaped_dash_state(GumboParser* parser,
1510
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1547
1511
  switch (c) {
1548
1512
  case '-':
1549
1513
  gumbo_tokenizer_set_state(
@@ -1567,8 +1531,8 @@ static StateResult handle_script_double_escaped_dash_state(
1567
1531
 
1568
1532
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-double-escaped-dash-dash-state
1569
1533
  static StateResult handle_script_double_escaped_dash_dash_state(
1570
- GumboParser* parser, GumboTokenizerState* tokenizer,
1571
- int c, GumboToken* output) {
1534
+ GumboParser* parser, GumboTokenizerState* tokenizer, int c,
1535
+ GumboToken* output) {
1572
1536
  switch (c) {
1573
1537
  case '-':
1574
1538
  return emit_current_char(parser, output);
@@ -1592,26 +1556,22 @@ static StateResult handle_script_double_escaped_dash_dash_state(
1592
1556
  }
1593
1557
 
1594
1558
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-double-escaped-less-than-sign-state
1595
- static StateResult handle_script_double_escaped_lt_state(
1596
- GumboParser* parser, GumboTokenizerState* tokenizer,
1597
- int c, GumboToken* output) {
1559
+ static StateResult handle_script_double_escaped_lt_state(GumboParser* parser,
1560
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1598
1561
  if (c == '/') {
1599
1562
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED_END);
1600
- gumbo_string_buffer_destroy(parser, &tokenizer->_script_data_buffer);
1601
- gumbo_string_buffer_init(parser, &tokenizer->_script_data_buffer);
1563
+ gumbo_string_buffer_clear(parser, &tokenizer->_script_data_buffer);
1602
1564
  return emit_current_char(parser, output);
1603
1565
  } else {
1604
1566
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED);
1605
1567
  tokenizer->_reconsume_current_input = true;
1606
1568
  return NEXT_CHAR;
1607
1569
  }
1608
-
1609
1570
  }
1610
1571
 
1611
1572
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-double-escape-end-state
1612
- static StateResult handle_script_double_escaped_end_state(
1613
- GumboParser* parser, GumboTokenizerState* tokenizer,
1614
- int c, GumboToken* output) {
1573
+ static StateResult handle_script_double_escaped_end_state(GumboParser* parser,
1574
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1615
1575
  switch (c) {
1616
1576
  case '\t':
1617
1577
  case '\n':
@@ -1619,9 +1579,11 @@ static StateResult handle_script_double_escaped_end_state(
1619
1579
  case ' ':
1620
1580
  case '/':
1621
1581
  case '>':
1622
- gumbo_tokenizer_set_state(parser, gumbo_string_equals(
1623
- &kScriptTag, (GumboStringPiece*) &tokenizer->_script_data_buffer)
1624
- ? GUMBO_LEX_SCRIPT_ESCAPED : GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED);
1582
+ gumbo_tokenizer_set_state(
1583
+ parser, gumbo_string_equals(&kScriptTag,
1584
+ (GumboStringPiece*) &tokenizer->_script_data_buffer)
1585
+ ? GUMBO_LEX_SCRIPT_ESCAPED
1586
+ : GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED);
1625
1587
  return emit_current_char(parser, output);
1626
1588
  default:
1627
1589
  if (is_alpha(c)) {
@@ -1637,9 +1599,8 @@ static StateResult handle_script_double_escaped_end_state(
1637
1599
  }
1638
1600
 
1639
1601
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#before-attribute-name-state
1640
- static StateResult handle_before_attr_name_state(
1641
- GumboParser* parser, GumboTokenizerState* tokenizer,
1642
- int c, GumboToken* output) {
1602
+ static StateResult handle_before_attr_name_state(GumboParser* parser,
1603
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1643
1604
  switch (c) {
1644
1605
  case '\t':
1645
1606
  case '\n':
@@ -1667,7 +1628,7 @@ static StateResult handle_before_attr_name_state(
1667
1628
  case '<':
1668
1629
  case '=':
1669
1630
  tokenizer_add_parse_error(parser, GUMBO_ERR_ATTR_NAME_INVALID);
1670
- // Fall through.
1631
+ // Fall through.
1671
1632
  default:
1672
1633
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_ATTR_NAME);
1673
1634
  append_char_to_tag_buffer(parser, ensure_lowercase(c), true);
@@ -1676,9 +1637,8 @@ static StateResult handle_before_attr_name_state(
1676
1637
  }
1677
1638
 
1678
1639
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#attribute-name-state
1679
- static StateResult handle_attr_name_state(
1680
- GumboParser* parser, GumboTokenizerState* tokenizer,
1681
- int c, GumboToken* output) {
1640
+ static StateResult handle_attr_name_state(GumboParser* parser,
1641
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1682
1642
  switch (c) {
1683
1643
  case '\t':
1684
1644
  case '\n':
@@ -1712,7 +1672,7 @@ static StateResult handle_attr_name_state(
1712
1672
  case '\'':
1713
1673
  case '<':
1714
1674
  tokenizer_add_parse_error(parser, GUMBO_ERR_ATTR_NAME_INVALID);
1715
- // Fall through.
1675
+ // Fall through.
1716
1676
  default:
1717
1677
  append_char_to_tag_buffer(parser, ensure_lowercase(c), true);
1718
1678
  return NEXT_CHAR;
@@ -1720,9 +1680,8 @@ static StateResult handle_attr_name_state(
1720
1680
  }
1721
1681
 
1722
1682
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#after-attribute-name-state
1723
- static StateResult handle_after_attr_name_state(
1724
- GumboParser* parser, GumboTokenizerState* tokenizer,
1725
- int c, GumboToken* output) {
1683
+ static StateResult handle_after_attr_name_state(GumboParser* parser,
1684
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1726
1685
  switch (c) {
1727
1686
  case '\t':
1728
1687
  case '\n':
@@ -1752,7 +1711,7 @@ static StateResult handle_after_attr_name_state(
1752
1711
  case '\'':
1753
1712
  case '<':
1754
1713
  tokenizer_add_parse_error(parser, GUMBO_ERR_ATTR_NAME_INVALID);
1755
- // Fall through.
1714
+ // Fall through.
1756
1715
  default:
1757
1716
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_ATTR_NAME);
1758
1717
  append_char_to_tag_buffer(parser, ensure_lowercase(c), true);
@@ -1761,9 +1720,8 @@ static StateResult handle_after_attr_name_state(
1761
1720
  }
1762
1721
 
1763
1722
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#before-attribute-value-state
1764
- static StateResult handle_before_attr_value_state(
1765
- GumboParser* parser, GumboTokenizerState* tokenizer,
1766
- int c, GumboToken* output) {
1723
+ static StateResult handle_before_attr_value_state(GumboParser* parser,
1724
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1767
1725
  switch (c) {
1768
1726
  case '\t':
1769
1727
  case '\n':
@@ -1802,7 +1760,7 @@ static StateResult handle_before_attr_value_state(
1802
1760
  case '=':
1803
1761
  case '`':
1804
1762
  tokenizer_add_parse_error(parser, GUMBO_ERR_ATTR_UNQUOTED_EQUALS);
1805
- // Fall through.
1763
+ // Fall through.
1806
1764
  default:
1807
1765
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_ATTR_VALUE_UNQUOTED);
1808
1766
  append_char_to_tag_buffer(parser, c, true);
@@ -1811,9 +1769,8 @@ static StateResult handle_before_attr_value_state(
1811
1769
  }
1812
1770
 
1813
1771
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#attribute-value-double-quoted-state
1814
- static StateResult handle_attr_value_double_quoted_state(
1815
- GumboParser* parser, GumboTokenizerState* tokenizer,
1816
- int c, GumboToken* output) {
1772
+ static StateResult handle_attr_value_double_quoted_state(GumboParser* parser,
1773
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1817
1774
  switch (c) {
1818
1775
  case '"':
1819
1776
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_AFTER_ATTR_VALUE_QUOTED);
@@ -1840,9 +1797,8 @@ static StateResult handle_attr_value_double_quoted_state(
1840
1797
  }
1841
1798
 
1842
1799
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#attribute-value-single-quoted-state
1843
- static StateResult handle_attr_value_single_quoted_state(
1844
- GumboParser* parser, GumboTokenizerState* tokenizer,
1845
- int c, GumboToken* output) {
1800
+ static StateResult handle_attr_value_single_quoted_state(GumboParser* parser,
1801
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1846
1802
  switch (c) {
1847
1803
  case '\'':
1848
1804
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_AFTER_ATTR_VALUE_QUOTED);
@@ -1869,9 +1825,8 @@ static StateResult handle_attr_value_single_quoted_state(
1869
1825
  }
1870
1826
 
1871
1827
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#attribute-value-unquoted-state
1872
- static StateResult handle_attr_value_unquoted_state(
1873
- GumboParser* parser, GumboTokenizerState* tokenizer,
1874
- int c, GumboToken* output) {
1828
+ static StateResult handle_attr_value_unquoted_state(GumboParser* parser,
1829
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1875
1830
  switch (c) {
1876
1831
  case '\t':
1877
1832
  case '\n':
@@ -1905,7 +1860,7 @@ static StateResult handle_attr_value_unquoted_state(
1905
1860
  case '\'':
1906
1861
  case '`':
1907
1862
  tokenizer_add_parse_error(parser, GUMBO_ERR_ATTR_UNQUOTED_EQUALS);
1908
- // Fall through.
1863
+ // Fall through.
1909
1864
  default:
1910
1865
  append_char_to_tag_buffer(parser, c, true);
1911
1866
  return NEXT_CHAR;
@@ -1913,9 +1868,8 @@ static StateResult handle_attr_value_unquoted_state(
1913
1868
  }
1914
1869
 
1915
1870
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#character-reference-in-attribute-value-state
1916
- static StateResult handle_char_ref_in_attr_value_state(
1917
- GumboParser* parser, GumboTokenizerState* tokenizer,
1918
- int c, GumboToken* output) {
1871
+ static StateResult handle_char_ref_in_attr_value_state(GumboParser* parser,
1872
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1919
1873
  OneOrTwoCodepoints char_ref;
1920
1874
  int allowed_char;
1921
1875
  bool is_unquoted = false;
@@ -1956,9 +1910,8 @@ static StateResult handle_char_ref_in_attr_value_state(
1956
1910
  }
1957
1911
 
1958
1912
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#after-attribute-value-quoted-state
1959
- static StateResult handle_after_attr_value_quoted_state(
1960
- GumboParser* parser, GumboTokenizerState* tokenizer,
1961
- int c, GumboToken* output) {
1913
+ static StateResult handle_after_attr_value_quoted_state(GumboParser* parser,
1914
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1962
1915
  finish_attribute_value(parser);
1963
1916
  switch (c) {
1964
1917
  case '\t':
@@ -1988,9 +1941,8 @@ static StateResult handle_after_attr_value_quoted_state(
1988
1941
  }
1989
1942
 
1990
1943
  // http://www.whatwg.org/specs/web-apps/current-work/complete.html#self-closing-start-tag-state
1991
- static StateResult handle_self_closing_start_tag_state(
1992
- GumboParser* parser, GumboTokenizerState* tokenizer,
1993
- int c, GumboToken* output) {
1944
+ static StateResult handle_self_closing_start_tag_state(GumboParser* parser,
1945
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1994
1946
  switch (c) {
1995
1947
  case '>':
1996
1948
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
@@ -2010,9 +1962,8 @@ static StateResult handle_self_closing_start_tag_state(
2010
1962
  }
2011
1963
 
2012
1964
  // http://www.whatwg.org/specs/web-apps/current-work/complete.html#bogus-comment-state
2013
- static StateResult handle_bogus_comment_state(
2014
- GumboParser* parser, GumboTokenizerState* tokenizer,
2015
- int c, GumboToken* output) {
1965
+ static StateResult handle_bogus_comment_state(GumboParser* parser,
1966
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
2016
1967
  while (c != '>' && c != -1) {
2017
1968
  if (c == '\0') {
2018
1969
  c = 0xFFFD;
@@ -2026,15 +1977,14 @@ static StateResult handle_bogus_comment_state(
2026
1977
  }
2027
1978
 
2028
1979
  // http://www.whatwg.org/specs/web-apps/current-work/complete.html#markup-declaration-open-state
2029
- static StateResult handle_markup_declaration_state(
2030
- GumboParser* parser, GumboTokenizerState* tokenizer,
2031
- int c, GumboToken* output) {
1980
+ static StateResult handle_markup_declaration_state(GumboParser* parser,
1981
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
2032
1982
  if (utf8iterator_maybe_consume_match(
2033
- &tokenizer->_input, "--", sizeof("--") - 1, true)) {
1983
+ &tokenizer->_input, "--", sizeof("--") - 1, true)) {
2034
1984
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT_START);
2035
1985
  tokenizer->_reconsume_current_input = true;
2036
1986
  } else if (utf8iterator_maybe_consume_match(
2037
- &tokenizer->_input, "DOCTYPE", sizeof("DOCTYPE") - 1, false)) {
1987
+ &tokenizer->_input, "DOCTYPE", sizeof("DOCTYPE") - 1, false)) {
2038
1988
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_DOCTYPE);
2039
1989
  tokenizer->_reconsume_current_input = true;
2040
1990
  // If we get here, we know we'll eventually emit a doctype token, so now is
@@ -2048,7 +1998,7 @@ static StateResult handle_markup_declaration_state(
2048
1998
  gumbo_copy_stringz(parser, "");
2049
1999
  } else if (tokenizer->_is_current_node_foreign &&
2050
2000
  utf8iterator_maybe_consume_match(
2051
- &tokenizer->_input, "[CDATA[", sizeof("[CDATA[") - 1, true)) {
2001
+ &tokenizer->_input, "[CDATA[", sizeof("[CDATA[") - 1, true)) {
2052
2002
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_CDATA);
2053
2003
  tokenizer->_is_in_cdata = true;
2054
2004
  tokenizer->_reconsume_current_input = true;
@@ -2062,9 +2012,8 @@ static StateResult handle_markup_declaration_state(
2062
2012
  }
2063
2013
 
2064
2014
  // http://www.whatwg.org/specs/web-apps/current-work/complete.html#comment-start-state
2065
- static StateResult handle_comment_start_state(
2066
- GumboParser* parser, GumboTokenizerState* tokenizer,
2067
- int c, GumboToken* output) {
2015
+ static StateResult handle_comment_start_state(GumboParser* parser,
2016
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
2068
2017
  switch (c) {
2069
2018
  case '-':
2070
2019
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT_START_DASH);
@@ -2092,9 +2041,8 @@ static StateResult handle_comment_start_state(
2092
2041
  }
2093
2042
 
2094
2043
  // http://www.whatwg.org/specs/web-apps/current-work/complete.html#comment-start-dash-state
2095
- static StateResult handle_comment_start_dash_state(
2096
- GumboParser* parser, GumboTokenizerState* tokenizer,
2097
- int c, GumboToken* output) {
2044
+ static StateResult handle_comment_start_dash_state(GumboParser* parser,
2045
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
2098
2046
  switch (c) {
2099
2047
  case '-':
2100
2048
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT_END);
@@ -2124,9 +2072,8 @@ static StateResult handle_comment_start_dash_state(
2124
2072
  }
2125
2073
 
2126
2074
  // http://www.whatwg.org/specs/web-apps/current-work/complete.html#comment-state
2127
- static StateResult handle_comment_state(
2128
- GumboParser* parser, GumboTokenizerState* tokenizer,
2129
- int c, GumboToken* output) {
2075
+ static StateResult handle_comment_state(GumboParser* parser,
2076
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
2130
2077
  switch (c) {
2131
2078
  case '-':
2132
2079
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT_END_DASH);
@@ -2147,9 +2094,8 @@ static StateResult handle_comment_state(
2147
2094
  }
2148
2095
 
2149
2096
  // http://www.whatwg.org/specs/web-apps/current-work/complete.html#comment-end-dash-state
2150
- static StateResult handle_comment_end_dash_state(
2151
- GumboParser* parser, GumboTokenizerState* tokenizer,
2152
- int c, GumboToken* output) {
2097
+ static StateResult handle_comment_end_dash_state(GumboParser* parser,
2098
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
2153
2099
  switch (c) {
2154
2100
  case '-':
2155
2101
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT_END);
@@ -2174,9 +2120,8 @@ static StateResult handle_comment_end_dash_state(
2174
2120
  }
2175
2121
 
2176
2122
  // http://www.whatwg.org/specs/web-apps/current-work/complete.html#comment-end-state
2177
- static StateResult handle_comment_end_state(
2178
- GumboParser* parser, GumboTokenizerState* tokenizer,
2179
- int c, GumboToken* output) {
2123
+ static StateResult handle_comment_end_state(GumboParser* parser,
2124
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
2180
2125
  switch (c) {
2181
2126
  case '>':
2182
2127
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
@@ -2189,11 +2134,13 @@ static StateResult handle_comment_end_state(
2189
2134
  append_char_to_temporary_buffer(parser, kUtf8ReplacementChar);
2190
2135
  return NEXT_CHAR;
2191
2136
  case '!':
2192
- tokenizer_add_parse_error(parser, GUMBO_ERR_COMMENT_BANG_AFTER_DOUBLE_DASH);
2137
+ tokenizer_add_parse_error(
2138
+ parser, GUMBO_ERR_COMMENT_BANG_AFTER_DOUBLE_DASH);
2193
2139
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT_END_BANG);
2194
2140
  return NEXT_CHAR;
2195
2141
  case '-':
2196
- tokenizer_add_parse_error(parser, GUMBO_ERR_COMMENT_DASH_AFTER_DOUBLE_DASH);
2142
+ tokenizer_add_parse_error(
2143
+ parser, GUMBO_ERR_COMMENT_DASH_AFTER_DOUBLE_DASH);
2197
2144
  append_char_to_temporary_buffer(parser, '-');
2198
2145
  return NEXT_CHAR;
2199
2146
  case -1:
@@ -2212,9 +2159,8 @@ static StateResult handle_comment_end_state(
2212
2159
  }
2213
2160
 
2214
2161
  // http://www.whatwg.org/specs/web-apps/current-work/complete.html#comment-end-bang-state
2215
- static StateResult handle_comment_end_bang_state(
2216
- GumboParser* parser, GumboTokenizerState* tokenizer,
2217
- int c, GumboToken* output) {
2162
+ static StateResult handle_comment_end_bang_state(GumboParser* parser,
2163
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
2218
2164
  switch (c) {
2219
2165
  case '-':
2220
2166
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT_END_DASH);
@@ -2249,9 +2195,8 @@ static StateResult handle_comment_end_bang_state(
2249
2195
  }
2250
2196
 
2251
2197
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#doctype-state
2252
- static StateResult handle_doctype_state(
2253
- GumboParser* parser, GumboTokenizerState* tokenizer,
2254
- int c, GumboToken* output) {
2198
+ static StateResult handle_doctype_state(GumboParser* parser,
2199
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
2255
2200
  assert(!tokenizer->_temporary_buffer.length);
2256
2201
  switch (c) {
2257
2202
  case '\t':
@@ -2276,9 +2221,8 @@ static StateResult handle_doctype_state(
2276
2221
  }
2277
2222
 
2278
2223
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#before-doctype-name-state
2279
- static StateResult handle_before_doctype_name_state(
2280
- GumboParser* parser, GumboTokenizerState* tokenizer,
2281
- int c, GumboToken* output) {
2224
+ static StateResult handle_before_doctype_name_state(GumboParser* parser,
2225
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
2282
2226
  switch (c) {
2283
2227
  case '\t':
2284
2228
  case '\n':
@@ -2312,9 +2256,8 @@ static StateResult handle_before_doctype_name_state(
2312
2256
  }
2313
2257
 
2314
2258
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#doctype-name-state
2315
- static StateResult handle_doctype_name_state(
2316
- GumboParser* parser, GumboTokenizerState* tokenizer,
2317
- int c, GumboToken* output) {
2259
+ static StateResult handle_doctype_name_state(GumboParser* parser,
2260
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
2318
2261
  switch (c) {
2319
2262
  case '\t':
2320
2263
  case '\n':
@@ -2322,14 +2265,12 @@ static StateResult handle_doctype_name_state(
2322
2265
  case ' ':
2323
2266
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_AFTER_DOCTYPE_NAME);
2324
2267
  gumbo_parser_deallocate(parser, (void*) tokenizer->_doc_type_state.name);
2325
- finish_temporary_buffer(
2326
- parser, &tokenizer->_doc_type_state.name);
2268
+ finish_temporary_buffer(parser, &tokenizer->_doc_type_state.name);
2327
2269
  return NEXT_CHAR;
2328
2270
  case '>':
2329
2271
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
2330
2272
  gumbo_parser_deallocate(parser, (void*) tokenizer->_doc_type_state.name);
2331
- finish_temporary_buffer(
2332
- parser, &tokenizer->_doc_type_state.name);
2273
+ finish_temporary_buffer(parser, &tokenizer->_doc_type_state.name);
2333
2274
  emit_doctype(parser, output);
2334
2275
  return RETURN_SUCCESS;
2335
2276
  case '\0':
@@ -2341,8 +2282,7 @@ static StateResult handle_doctype_name_state(
2341
2282
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
2342
2283
  tokenizer->_doc_type_state.force_quirks = true;
2343
2284
  gumbo_parser_deallocate(parser, (void*) tokenizer->_doc_type_state.name);
2344
- finish_temporary_buffer(
2345
- parser, &tokenizer->_doc_type_state.name);
2285
+ finish_temporary_buffer(parser, &tokenizer->_doc_type_state.name);
2346
2286
  emit_doctype(parser, output);
2347
2287
  return RETURN_ERROR;
2348
2288
  default:
@@ -2354,9 +2294,8 @@ static StateResult handle_doctype_name_state(
2354
2294
  }
2355
2295
 
2356
2296
  // http://www.whatwg.org/specs/web-apps/current-work/complete.html#after-doctype-name-state
2357
- static StateResult handle_after_doctype_name_state(
2358
- GumboParser* parser, GumboTokenizerState* tokenizer,
2359
- int c, GumboToken* output) {
2297
+ static StateResult handle_after_doctype_name_state(GumboParser* parser,
2298
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
2360
2299
  switch (c) {
2361
2300
  case '\t':
2362
2301
  case '\n':
@@ -2375,17 +2314,18 @@ static StateResult handle_after_doctype_name_state(
2375
2314
  return RETURN_ERROR;
2376
2315
  default:
2377
2316
  if (utf8iterator_maybe_consume_match(
2378
- &tokenizer->_input, "PUBLIC", sizeof("PUBLIC") - 1, false)) {
2317
+ &tokenizer->_input, "PUBLIC", sizeof("PUBLIC") - 1, false)) {
2379
2318
  gumbo_tokenizer_set_state(
2380
2319
  parser, GUMBO_LEX_AFTER_DOCTYPE_PUBLIC_KEYWORD);
2381
2320
  tokenizer->_reconsume_current_input = true;
2382
- } else if (utf8iterator_maybe_consume_match(
2383
- &tokenizer->_input, "SYSTEM", sizeof("SYSTEM") - 1, false)) {
2321
+ } else if (utf8iterator_maybe_consume_match(&tokenizer->_input, "SYSTEM",
2322
+ sizeof("SYSTEM") - 1, false)) {
2384
2323
  gumbo_tokenizer_set_state(
2385
2324
  parser, GUMBO_LEX_AFTER_DOCTYPE_SYSTEM_KEYWORD);
2386
2325
  tokenizer->_reconsume_current_input = true;
2387
2326
  } else {
2388
- tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_SPACE_OR_RIGHT_BRACKET);
2327
+ tokenizer_add_parse_error(
2328
+ parser, GUMBO_ERR_DOCTYPE_SPACE_OR_RIGHT_BRACKET);
2389
2329
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_BOGUS_DOCTYPE);
2390
2330
  tokenizer->_doc_type_state.force_quirks = true;
2391
2331
  }
@@ -2395,15 +2335,14 @@ static StateResult handle_after_doctype_name_state(
2395
2335
 
2396
2336
  // http://www.whatwg.org/specs/web-apps/current-work/complete.html#after-doctype-public-keyword-state
2397
2337
  static StateResult handle_after_doctype_public_keyword_state(
2398
- GumboParser* parser, GumboTokenizerState* tokenizer,
2399
- int c, GumboToken* output) {
2338
+ GumboParser* parser, GumboTokenizerState* tokenizer, int c,
2339
+ GumboToken* output) {
2400
2340
  switch (c) {
2401
2341
  case '\t':
2402
2342
  case '\n':
2403
2343
  case '\f':
2404
2344
  case ' ':
2405
- gumbo_tokenizer_set_state(
2406
- parser, GUMBO_LEX_BEFORE_DOCTYPE_PUBLIC_ID);
2345
+ gumbo_tokenizer_set_state(parser, GUMBO_LEX_BEFORE_DOCTYPE_PUBLIC_ID);
2407
2346
  return NEXT_CHAR;
2408
2347
  case '"':
2409
2348
  tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_INVALID);
@@ -2439,9 +2378,8 @@ static StateResult handle_after_doctype_public_keyword_state(
2439
2378
  }
2440
2379
 
2441
2380
  // http://www.whatwg.org/specs/web-apps/current-work/complete.html#before-doctype-public-identifier-state
2442
- static StateResult handle_before_doctype_public_id_state(
2443
- GumboParser* parser, GumboTokenizerState* tokenizer,
2444
- int c, GumboToken* output) {
2381
+ static StateResult handle_before_doctype_public_id_state(GumboParser* parser,
2382
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
2445
2383
  switch (c) {
2446
2384
  case '\t':
2447
2385
  case '\n':
@@ -2481,8 +2419,8 @@ static StateResult handle_before_doctype_public_id_state(
2481
2419
 
2482
2420
  // http://www.whatwg.org/specs/web-apps/current-work/complete.html#doctype-public-identifier-(double-quoted)-state
2483
2421
  static StateResult handle_doctype_public_id_double_quoted_state(
2484
- GumboParser* parser, GumboTokenizerState* tokenizer,
2485
- int c, GumboToken* output) {
2422
+ GumboParser* parser, GumboTokenizerState* tokenizer, int c,
2423
+ GumboToken* output) {
2486
2424
  switch (c) {
2487
2425
  case '"':
2488
2426
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_AFTER_DOCTYPE_PUBLIC_ID);
@@ -2514,8 +2452,8 @@ static StateResult handle_doctype_public_id_double_quoted_state(
2514
2452
 
2515
2453
  // http://www.whatwg.org/specs/web-apps/current-work/complete.html#doctype-public-identifier-(single-quoted)-state
2516
2454
  static StateResult handle_doctype_public_id_single_quoted_state(
2517
- GumboParser* parser, GumboTokenizerState* tokenizer,
2518
- int c, GumboToken* output) {
2455
+ GumboParser* parser, GumboTokenizerState* tokenizer, int c,
2456
+ GumboToken* output) {
2519
2457
  switch (c) {
2520
2458
  case '\'':
2521
2459
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_AFTER_DOCTYPE_PUBLIC_ID);
@@ -2546,9 +2484,8 @@ static StateResult handle_doctype_public_id_single_quoted_state(
2546
2484
  }
2547
2485
 
2548
2486
  // http://www.whatwg.org/specs/web-apps/current-work/complete.html#after-doctype-public-identifier-state
2549
- static StateResult handle_after_doctype_public_id_state(
2550
- GumboParser* parser, GumboTokenizerState* tokenizer,
2551
- int c, GumboToken* output) {
2487
+ static StateResult handle_after_doctype_public_id_state(GumboParser* parser,
2488
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
2552
2489
  switch (c) {
2553
2490
  case '\t':
2554
2491
  case '\n':
@@ -2590,8 +2527,8 @@ static StateResult handle_after_doctype_public_id_state(
2590
2527
 
2591
2528
  // http://www.whatwg.org/specs/web-apps/current-work/complete.html#between-doctype-public-and-system-identifiers-state
2592
2529
  static StateResult handle_between_doctype_public_system_id_state(
2593
- GumboParser* parser, GumboTokenizerState* tokenizer,
2594
- int c, GumboToken* output) {
2530
+ GumboParser* parser, GumboTokenizerState* tokenizer, int c,
2531
+ GumboToken* output) {
2595
2532
  switch (c) {
2596
2533
  case '\t':
2597
2534
  case '\n':
@@ -2629,8 +2566,8 @@ static StateResult handle_between_doctype_public_system_id_state(
2629
2566
 
2630
2567
  // http://www.whatwg.org/specs/web-apps/current-work/complete.html#after-doctype-system-keyword-state
2631
2568
  static StateResult handle_after_doctype_system_keyword_state(
2632
- GumboParser* parser, GumboTokenizerState* tokenizer,
2633
- int c, GumboToken* output) {
2569
+ GumboParser* parser, GumboTokenizerState* tokenizer, int c,
2570
+ GumboToken* output) {
2634
2571
  switch (c) {
2635
2572
  case '\t':
2636
2573
  case '\n':
@@ -2671,9 +2608,8 @@ static StateResult handle_after_doctype_system_keyword_state(
2671
2608
  }
2672
2609
 
2673
2610
  // http://www.whatwg.org/specs/web-apps/current-work/complete.html#before-doctype-system-identifier-state
2674
- static StateResult handle_before_doctype_system_id_state(
2675
- GumboParser* parser, GumboTokenizerState* tokenizer,
2676
- int c, GumboToken* output) {
2611
+ static StateResult handle_before_doctype_system_id_state(GumboParser* parser,
2612
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
2677
2613
  switch (c) {
2678
2614
  case '\t':
2679
2615
  case '\n':
@@ -2712,8 +2648,8 @@ static StateResult handle_before_doctype_system_id_state(
2712
2648
 
2713
2649
  // http://www.whatwg.org/specs/web-apps/current-work/complete.html#doctype-system-identifier-(double-quoted)-state
2714
2650
  static StateResult handle_doctype_system_id_double_quoted_state(
2715
- GumboParser* parser, GumboTokenizerState* tokenizer,
2716
- int c, GumboToken* output) {
2651
+ GumboParser* parser, GumboTokenizerState* tokenizer, int c,
2652
+ GumboToken* output) {
2717
2653
  switch (c) {
2718
2654
  case '"':
2719
2655
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_AFTER_DOCTYPE_SYSTEM_ID);
@@ -2745,8 +2681,8 @@ static StateResult handle_doctype_system_id_double_quoted_state(
2745
2681
 
2746
2682
  // http://www.whatwg.org/specs/web-apps/current-work/complete.html#doctype-system-identifier-(single-quoted)-state
2747
2683
  static StateResult handle_doctype_system_id_single_quoted_state(
2748
- GumboParser* parser, GumboTokenizerState* tokenizer,
2749
- int c, GumboToken* output) {
2684
+ GumboParser* parser, GumboTokenizerState* tokenizer, int c,
2685
+ GumboToken* output) {
2750
2686
  switch (c) {
2751
2687
  case '\'':
2752
2688
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_AFTER_DOCTYPE_SYSTEM_ID);
@@ -2777,9 +2713,8 @@ static StateResult handle_doctype_system_id_single_quoted_state(
2777
2713
  }
2778
2714
 
2779
2715
  // http://www.whatwg.org/specs/web-apps/current-work/complete.html#after-doctype-system-identifier-state
2780
- static StateResult handle_after_doctype_system_id_state(
2781
- GumboParser* parser, GumboTokenizerState* tokenizer,
2782
- int c, GumboToken* output) {
2716
+ static StateResult handle_after_doctype_system_id_state(GumboParser* parser,
2717
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
2783
2718
  switch (c) {
2784
2719
  case '\t':
2785
2720
  case '\n':
@@ -2804,9 +2739,8 @@ static StateResult handle_after_doctype_system_id_state(
2804
2739
  }
2805
2740
 
2806
2741
  // http://www.whatwg.org/specs/web-apps/current-work/complete.html#bogus-doctype-state
2807
- static StateResult handle_bogus_doctype_state(
2808
- GumboParser* parser, GumboTokenizerState* tokenizer,
2809
- int c, GumboToken* output) {
2742
+ static StateResult handle_bogus_doctype_state(GumboParser* parser,
2743
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
2810
2744
  if (c == '>' || c == -1) {
2811
2745
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
2812
2746
  emit_doctype(parser, output);
@@ -2816,15 +2750,14 @@ static StateResult handle_bogus_doctype_state(
2816
2750
  }
2817
2751
 
2818
2752
  // http://www.whatwg.org/specs/web-apps/current-work/complete.html#cdata-section-state
2819
- static StateResult handle_cdata_state(
2820
- GumboParser* parser, GumboTokenizerState* tokenizer,
2821
- int c, GumboToken* output) {
2753
+ static StateResult handle_cdata_state(GumboParser* parser,
2754
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
2822
2755
  if (c == -1 || utf8iterator_maybe_consume_match(
2823
- &tokenizer->_input, "]]>", sizeof("]]>") - 1, true)) {
2756
+ &tokenizer->_input, "]]>", sizeof("]]>") - 1, true)) {
2824
2757
  tokenizer->_reconsume_current_input = true;
2825
2758
  reset_token_start_point(tokenizer);
2826
2759
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
2827
- tokenizer->_is_in_cdata = true;
2760
+ tokenizer->_is_in_cdata = false;
2828
2761
  return NEXT_CHAR;
2829
2762
  } else {
2830
2763
  return emit_current_char(parser, output);
@@ -2834,76 +2767,47 @@ static StateResult handle_cdata_state(
2834
2767
  typedef StateResult (*GumboLexerStateFunction)(
2835
2768
  GumboParser*, GumboTokenizerState*, int, GumboToken*);
2836
2769
 
2837
- static GumboLexerStateFunction dispatch_table[] = {
2838
- handle_data_state,
2839
- handle_char_ref_in_data_state,
2840
- handle_rcdata_state,
2841
- handle_char_ref_in_rcdata_state,
2842
- handle_rawtext_state,
2843
- handle_script_state,
2844
- handle_plaintext_state,
2845
- handle_tag_open_state,
2846
- handle_end_tag_open_state,
2847
- handle_tag_name_state,
2848
- handle_rcdata_lt_state,
2849
- handle_rcdata_end_tag_open_state,
2850
- handle_rcdata_end_tag_name_state,
2851
- handle_rawtext_lt_state,
2852
- handle_rawtext_end_tag_open_state,
2853
- handle_rawtext_end_tag_name_state,
2854
- handle_script_lt_state,
2855
- handle_script_end_tag_open_state,
2856
- handle_script_end_tag_name_state,
2857
- handle_script_escaped_start_state,
2858
- handle_script_escaped_start_dash_state,
2859
- handle_script_escaped_state,
2860
- handle_script_escaped_dash_state,
2861
- handle_script_escaped_dash_dash_state,
2862
- handle_script_escaped_lt_state,
2863
- handle_script_escaped_end_tag_open_state,
2864
- handle_script_escaped_end_tag_name_state,
2865
- handle_script_double_escaped_start_state,
2866
- handle_script_double_escaped_state,
2867
- handle_script_double_escaped_dash_state,
2868
- handle_script_double_escaped_dash_dash_state,
2869
- handle_script_double_escaped_lt_state,
2870
- handle_script_double_escaped_end_state,
2871
- handle_before_attr_name_state,
2872
- handle_attr_name_state,
2873
- handle_after_attr_name_state,
2874
- handle_before_attr_value_state,
2875
- handle_attr_value_double_quoted_state,
2876
- handle_attr_value_single_quoted_state,
2877
- handle_attr_value_unquoted_state,
2878
- handle_char_ref_in_attr_value_state,
2879
- handle_after_attr_value_quoted_state,
2880
- handle_self_closing_start_tag_state,
2881
- handle_bogus_comment_state,
2882
- handle_markup_declaration_state,
2883
- handle_comment_start_state,
2884
- handle_comment_start_dash_state,
2885
- handle_comment_state,
2886
- handle_comment_end_dash_state,
2887
- handle_comment_end_state,
2888
- handle_comment_end_bang_state,
2889
- handle_doctype_state,
2890
- handle_before_doctype_name_state,
2891
- handle_doctype_name_state,
2892
- handle_after_doctype_name_state,
2893
- handle_after_doctype_public_keyword_state,
2894
- handle_before_doctype_public_id_state,
2895
- handle_doctype_public_id_double_quoted_state,
2896
- handle_doctype_public_id_single_quoted_state,
2897
- handle_after_doctype_public_id_state,
2898
- handle_between_doctype_public_system_id_state,
2899
- handle_after_doctype_system_keyword_state,
2900
- handle_before_doctype_system_id_state,
2901
- handle_doctype_system_id_double_quoted_state,
2902
- handle_doctype_system_id_single_quoted_state,
2903
- handle_after_doctype_system_id_state,
2904
- handle_bogus_doctype_state,
2905
- handle_cdata_state
2906
- };
2770
+ static GumboLexerStateFunction dispatch_table[] = {handle_data_state,
2771
+ handle_char_ref_in_data_state, handle_rcdata_state,
2772
+ handle_char_ref_in_rcdata_state, handle_rawtext_state, handle_script_state,
2773
+ handle_plaintext_state, handle_tag_open_state, handle_end_tag_open_state,
2774
+ handle_tag_name_state, handle_rcdata_lt_state,
2775
+ handle_rcdata_end_tag_open_state, handle_rcdata_end_tag_name_state,
2776
+ handle_rawtext_lt_state, handle_rawtext_end_tag_open_state,
2777
+ handle_rawtext_end_tag_name_state, handle_script_lt_state,
2778
+ handle_script_end_tag_open_state, handle_script_end_tag_name_state,
2779
+ handle_script_escaped_start_state, handle_script_escaped_start_dash_state,
2780
+ handle_script_escaped_state, handle_script_escaped_dash_state,
2781
+ handle_script_escaped_dash_dash_state, handle_script_escaped_lt_state,
2782
+ handle_script_escaped_end_tag_open_state,
2783
+ handle_script_escaped_end_tag_name_state,
2784
+ handle_script_double_escaped_start_state,
2785
+ handle_script_double_escaped_state, handle_script_double_escaped_dash_state,
2786
+ handle_script_double_escaped_dash_dash_state,
2787
+ handle_script_double_escaped_lt_state,
2788
+ handle_script_double_escaped_end_state, handle_before_attr_name_state,
2789
+ handle_attr_name_state, handle_after_attr_name_state,
2790
+ handle_before_attr_value_state, handle_attr_value_double_quoted_state,
2791
+ handle_attr_value_single_quoted_state, handle_attr_value_unquoted_state,
2792
+ handle_char_ref_in_attr_value_state, handle_after_attr_value_quoted_state,
2793
+ handle_self_closing_start_tag_state, handle_bogus_comment_state,
2794
+ handle_markup_declaration_state, handle_comment_start_state,
2795
+ handle_comment_start_dash_state, handle_comment_state,
2796
+ handle_comment_end_dash_state, handle_comment_end_state,
2797
+ handle_comment_end_bang_state, handle_doctype_state,
2798
+ handle_before_doctype_name_state, handle_doctype_name_state,
2799
+ handle_after_doctype_name_state, handle_after_doctype_public_keyword_state,
2800
+ handle_before_doctype_public_id_state,
2801
+ handle_doctype_public_id_double_quoted_state,
2802
+ handle_doctype_public_id_single_quoted_state,
2803
+ handle_after_doctype_public_id_state,
2804
+ handle_between_doctype_public_system_id_state,
2805
+ handle_after_doctype_system_keyword_state,
2806
+ handle_before_doctype_system_id_state,
2807
+ handle_doctype_system_id_double_quoted_state,
2808
+ handle_doctype_system_id_single_quoted_state,
2809
+ handle_after_doctype_system_id_state, handle_bogus_doctype_state,
2810
+ handle_cdata_state};
2907
2811
 
2908
2812
  bool gumbo_lex(GumboParser* parser, GumboToken* output) {
2909
2813
  // Because of the spec requirements that...
@@ -2941,8 +2845,8 @@ bool gumbo_lex(GumboParser* parser, GumboToken* output) {
2941
2845
  assert(!tokenizer->_temporary_buffer_emit);
2942
2846
  assert(tokenizer->_buffered_emit_char == kGumboNoChar);
2943
2847
  int c = utf8iterator_current(&tokenizer->_input);
2944
- gumbo_debug("Lexing character '%c' (%d) in state %d.\n",
2945
- c, c, tokenizer->_state);
2848
+ gumbo_debug(
2849
+ "Lexing character '%c' (%d) in state %d.\n", c, c, tokenizer->_state);
2946
2850
  StateResult result =
2947
2851
  dispatch_table[tokenizer->_state](parser, tokenizer, c, output);
2948
2852
  // We need to clear reconsume_current_input before returning to prevent
@@ -2952,7 +2856,7 @@ bool gumbo_lex(GumboParser* parser, GumboToken* output) {
2952
2856
 
2953
2857
  if (result == RETURN_SUCCESS) {
2954
2858
  return true;
2955
- } else if(result == RETURN_ERROR) {
2859
+ } else if (result == RETURN_ERROR) {
2956
2860
  return false;
2957
2861
  }
2958
2862
 
@@ -2974,7 +2878,7 @@ void gumbo_token_destroy(GumboParser* parser, GumboToken* token) {
2974
2878
  parser, (void*) token->v.doc_type.system_identifier);
2975
2879
  return;
2976
2880
  case GUMBO_TOKEN_START_TAG:
2977
- for (int i = 0; i < token->v.start_tag.attributes.length; ++i) {
2881
+ for (unsigned int i = 0; i < token->v.start_tag.attributes.length; ++i) {
2978
2882
  GumboAttribute* attr = token->v.start_tag.attributes.data[i];
2979
2883
  if (attr) {
2980
2884
  // May have been nulled out if this token was merged with another.