nokogumbo 1.4.2 → 1.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,153 @@
1
+ // Generated via `gentags.py src/tag.in`.
2
+ // Do not edit; edit src/tag.in instead.
3
+ // clang-format off
4
+ GUMBO_TAG_HTML,
5
+ GUMBO_TAG_HEAD,
6
+ GUMBO_TAG_TITLE,
7
+ GUMBO_TAG_BASE,
8
+ GUMBO_TAG_LINK,
9
+ GUMBO_TAG_META,
10
+ GUMBO_TAG_STYLE,
11
+ GUMBO_TAG_SCRIPT,
12
+ GUMBO_TAG_NOSCRIPT,
13
+ GUMBO_TAG_TEMPLATE,
14
+ GUMBO_TAG_BODY,
15
+ GUMBO_TAG_ARTICLE,
16
+ GUMBO_TAG_SECTION,
17
+ GUMBO_TAG_NAV,
18
+ GUMBO_TAG_ASIDE,
19
+ GUMBO_TAG_H1,
20
+ GUMBO_TAG_H2,
21
+ GUMBO_TAG_H3,
22
+ GUMBO_TAG_H4,
23
+ GUMBO_TAG_H5,
24
+ GUMBO_TAG_H6,
25
+ GUMBO_TAG_HGROUP,
26
+ GUMBO_TAG_HEADER,
27
+ GUMBO_TAG_FOOTER,
28
+ GUMBO_TAG_ADDRESS,
29
+ GUMBO_TAG_P,
30
+ GUMBO_TAG_HR,
31
+ GUMBO_TAG_PRE,
32
+ GUMBO_TAG_BLOCKQUOTE,
33
+ GUMBO_TAG_OL,
34
+ GUMBO_TAG_UL,
35
+ GUMBO_TAG_LI,
36
+ GUMBO_TAG_DL,
37
+ GUMBO_TAG_DT,
38
+ GUMBO_TAG_DD,
39
+ GUMBO_TAG_FIGURE,
40
+ GUMBO_TAG_FIGCAPTION,
41
+ GUMBO_TAG_MAIN,
42
+ GUMBO_TAG_DIV,
43
+ GUMBO_TAG_A,
44
+ GUMBO_TAG_EM,
45
+ GUMBO_TAG_STRONG,
46
+ GUMBO_TAG_SMALL,
47
+ GUMBO_TAG_S,
48
+ GUMBO_TAG_CITE,
49
+ GUMBO_TAG_Q,
50
+ GUMBO_TAG_DFN,
51
+ GUMBO_TAG_ABBR,
52
+ GUMBO_TAG_DATA,
53
+ GUMBO_TAG_TIME,
54
+ GUMBO_TAG_CODE,
55
+ GUMBO_TAG_VAR,
56
+ GUMBO_TAG_SAMP,
57
+ GUMBO_TAG_KBD,
58
+ GUMBO_TAG_SUB,
59
+ GUMBO_TAG_SUP,
60
+ GUMBO_TAG_I,
61
+ GUMBO_TAG_B,
62
+ GUMBO_TAG_U,
63
+ GUMBO_TAG_MARK,
64
+ GUMBO_TAG_RUBY,
65
+ GUMBO_TAG_RT,
66
+ GUMBO_TAG_RP,
67
+ GUMBO_TAG_BDI,
68
+ GUMBO_TAG_BDO,
69
+ GUMBO_TAG_SPAN,
70
+ GUMBO_TAG_BR,
71
+ GUMBO_TAG_WBR,
72
+ GUMBO_TAG_INS,
73
+ GUMBO_TAG_DEL,
74
+ GUMBO_TAG_IMAGE,
75
+ GUMBO_TAG_IMG,
76
+ GUMBO_TAG_IFRAME,
77
+ GUMBO_TAG_EMBED,
78
+ GUMBO_TAG_OBJECT,
79
+ GUMBO_TAG_PARAM,
80
+ GUMBO_TAG_VIDEO,
81
+ GUMBO_TAG_AUDIO,
82
+ GUMBO_TAG_SOURCE,
83
+ GUMBO_TAG_TRACK,
84
+ GUMBO_TAG_CANVAS,
85
+ GUMBO_TAG_MAP,
86
+ GUMBO_TAG_AREA,
87
+ GUMBO_TAG_MATH,
88
+ GUMBO_TAG_MI,
89
+ GUMBO_TAG_MO,
90
+ GUMBO_TAG_MN,
91
+ GUMBO_TAG_MS,
92
+ GUMBO_TAG_MTEXT,
93
+ GUMBO_TAG_MGLYPH,
94
+ GUMBO_TAG_MALIGNMARK,
95
+ GUMBO_TAG_ANNOTATION_XML,
96
+ GUMBO_TAG_SVG,
97
+ GUMBO_TAG_FOREIGNOBJECT,
98
+ GUMBO_TAG_DESC,
99
+ GUMBO_TAG_TABLE,
100
+ GUMBO_TAG_CAPTION,
101
+ GUMBO_TAG_COLGROUP,
102
+ GUMBO_TAG_COL,
103
+ GUMBO_TAG_TBODY,
104
+ GUMBO_TAG_THEAD,
105
+ GUMBO_TAG_TFOOT,
106
+ GUMBO_TAG_TR,
107
+ GUMBO_TAG_TD,
108
+ GUMBO_TAG_TH,
109
+ GUMBO_TAG_FORM,
110
+ GUMBO_TAG_FIELDSET,
111
+ GUMBO_TAG_LEGEND,
112
+ GUMBO_TAG_LABEL,
113
+ GUMBO_TAG_INPUT,
114
+ GUMBO_TAG_BUTTON,
115
+ GUMBO_TAG_SELECT,
116
+ GUMBO_TAG_DATALIST,
117
+ GUMBO_TAG_OPTGROUP,
118
+ GUMBO_TAG_OPTION,
119
+ GUMBO_TAG_TEXTAREA,
120
+ GUMBO_TAG_KEYGEN,
121
+ GUMBO_TAG_OUTPUT,
122
+ GUMBO_TAG_PROGRESS,
123
+ GUMBO_TAG_METER,
124
+ GUMBO_TAG_DETAILS,
125
+ GUMBO_TAG_SUMMARY,
126
+ GUMBO_TAG_MENU,
127
+ GUMBO_TAG_MENUITEM,
128
+ GUMBO_TAG_APPLET,
129
+ GUMBO_TAG_ACRONYM,
130
+ GUMBO_TAG_BGSOUND,
131
+ GUMBO_TAG_DIR,
132
+ GUMBO_TAG_FRAME,
133
+ GUMBO_TAG_FRAMESET,
134
+ GUMBO_TAG_NOFRAMES,
135
+ GUMBO_TAG_ISINDEX,
136
+ GUMBO_TAG_LISTING,
137
+ GUMBO_TAG_XMP,
138
+ GUMBO_TAG_NEXTID,
139
+ GUMBO_TAG_NOEMBED,
140
+ GUMBO_TAG_PLAINTEXT,
141
+ GUMBO_TAG_RB,
142
+ GUMBO_TAG_STRIKE,
143
+ GUMBO_TAG_BASEFONT,
144
+ GUMBO_TAG_BIG,
145
+ GUMBO_TAG_BLINK,
146
+ GUMBO_TAG_CENTER,
147
+ GUMBO_TAG_FONT,
148
+ GUMBO_TAG_MARQUEE,
149
+ GUMBO_TAG_MULTICOL,
150
+ GUMBO_TAG_NOBR,
151
+ GUMBO_TAG_SPACER,
152
+ GUMBO_TAG_TT,
153
+ GUMBO_TAG_RTC,
@@ -0,0 +1,105 @@
1
+ static unsigned int tag_hash(
2
+ register const char *str, register unsigned int len) {
3
+ static unsigned short asso_values[] = {296, 296, 296, 296, 296, 296, 296, 296,
4
+ 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296,
5
+ 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296,
6
+ 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 6, 4, 3, 1, 1, 0,
7
+ 1, 0, 0, 296, 296, 296, 296, 296, 296, 296, 22, 73, 151, 4, 13, 59, 65, 2,
8
+ 69, 0, 134, 9, 16, 52, 55, 28, 101, 0, 1, 6, 63, 126, 104, 93, 124, 296,
9
+ 296, 296, 296, 296, 296, 296, 22, 73, 151, 4, 13, 59, 65, 2, 69, 0, 134,
10
+ 9, 16, 52, 55, 28, 101, 0, 1, 6, 63, 126, 104, 93, 124, 296, 296, 296,
11
+ 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296,
12
+ 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296,
13
+ 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296,
14
+ 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296,
15
+ 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296,
16
+ 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296,
17
+ 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296,
18
+ 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296,
19
+ 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296, 296};
20
+ register unsigned int hval = len;
21
+
22
+ switch (hval) {
23
+ default:
24
+ hval += asso_values[(unsigned char) str[1] + 3];
25
+ /*FALLTHROUGH*/
26
+ case 1:
27
+ hval += asso_values[(unsigned char) str[0]];
28
+ break;
29
+ }
30
+ return hval + asso_values[(unsigned char) str[len - 1]];
31
+ }
32
+
33
+ static const unsigned char kGumboTagMap[] = {GUMBO_TAG_LAST, GUMBO_TAG_LAST,
34
+ GUMBO_TAG_LAST, GUMBO_TAG_S, GUMBO_TAG_H6, GUMBO_TAG_H5, GUMBO_TAG_H4,
35
+ GUMBO_TAG_H3, GUMBO_TAG_SPACER, GUMBO_TAG_H2, GUMBO_TAG_HEADER,
36
+ GUMBO_TAG_H1, GUMBO_TAG_HEAD, GUMBO_TAG_LAST, GUMBO_TAG_DETAILS,
37
+ GUMBO_TAG_SELECT, GUMBO_TAG_DIR, GUMBO_TAG_LAST, GUMBO_TAG_DEL,
38
+ GUMBO_TAG_LAST, GUMBO_TAG_SOURCE, GUMBO_TAG_LEGEND, GUMBO_TAG_DATALIST,
39
+ GUMBO_TAG_METER, GUMBO_TAG_MGLYPH, GUMBO_TAG_LAST, GUMBO_TAG_MATH,
40
+ GUMBO_TAG_LABEL, GUMBO_TAG_TABLE, GUMBO_TAG_TEMPLATE, GUMBO_TAG_LAST,
41
+ GUMBO_TAG_RP, GUMBO_TAG_TIME, GUMBO_TAG_TITLE, GUMBO_TAG_DATA,
42
+ GUMBO_TAG_APPLET, GUMBO_TAG_HGROUP, GUMBO_TAG_SAMP, GUMBO_TAG_TEXTAREA,
43
+ GUMBO_TAG_ABBR, GUMBO_TAG_MARQUEE, GUMBO_TAG_LAST, GUMBO_TAG_MENUITEM,
44
+ GUMBO_TAG_SMALL, GUMBO_TAG_META, GUMBO_TAG_A, GUMBO_TAG_LAST,
45
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_EMBED,
46
+ GUMBO_TAG_MAP, GUMBO_TAG_LAST, GUMBO_TAG_PARAM, GUMBO_TAG_LAST,
47
+ GUMBO_TAG_LAST, GUMBO_TAG_NOBR, GUMBO_TAG_P, GUMBO_TAG_SPAN, GUMBO_TAG_EM,
48
+ GUMBO_TAG_LAST, GUMBO_TAG_NOFRAMES, GUMBO_TAG_SECTION, GUMBO_TAG_NOEMBED,
49
+ GUMBO_TAG_NEXTID, GUMBO_TAG_FOOTER, GUMBO_TAG_NOSCRIPT, GUMBO_TAG_HR,
50
+ GUMBO_TAG_LAST, GUMBO_TAG_FONT, GUMBO_TAG_DL, GUMBO_TAG_TR,
51
+ GUMBO_TAG_SCRIPT, GUMBO_TAG_MO, GUMBO_TAG_LAST, GUMBO_TAG_DD,
52
+ GUMBO_TAG_MAIN, GUMBO_TAG_TD, GUMBO_TAG_FOREIGNOBJECT, GUMBO_TAG_FORM,
53
+ GUMBO_TAG_OBJECT, GUMBO_TAG_LAST, GUMBO_TAG_FIELDSET, GUMBO_TAG_LAST,
54
+ GUMBO_TAG_BGSOUND, GUMBO_TAG_MENU, GUMBO_TAG_TFOOT, GUMBO_TAG_FIGURE,
55
+ GUMBO_TAG_RB, GUMBO_TAG_LI, GUMBO_TAG_LISTING, GUMBO_TAG_BASEFONT,
56
+ GUMBO_TAG_OPTGROUP, GUMBO_TAG_LAST, GUMBO_TAG_BASE, GUMBO_TAG_ADDRESS,
57
+ GUMBO_TAG_MI, GUMBO_TAG_LAST, GUMBO_TAG_PLAINTEXT, GUMBO_TAG_LAST,
58
+ GUMBO_TAG_PROGRESS, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
59
+ GUMBO_TAG_ACRONYM, GUMBO_TAG_ARTICLE, GUMBO_TAG_LAST, GUMBO_TAG_PRE,
60
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_AREA,
61
+ GUMBO_TAG_RT, GUMBO_TAG_LAST, GUMBO_TAG_OPTION, GUMBO_TAG_IMAGE,
62
+ GUMBO_TAG_DT, GUMBO_TAG_LAST, GUMBO_TAG_TT, GUMBO_TAG_HTML, GUMBO_TAG_WBR,
63
+ GUMBO_TAG_OL, GUMBO_TAG_LAST, GUMBO_TAG_STYLE, GUMBO_TAG_STRIKE,
64
+ GUMBO_TAG_SUP, GUMBO_TAG_MULTICOL, GUMBO_TAG_U, GUMBO_TAG_DFN, GUMBO_TAG_UL,
65
+ GUMBO_TAG_FIGCAPTION, GUMBO_TAG_MTEXT, GUMBO_TAG_LAST, GUMBO_TAG_VAR,
66
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_FRAMESET, GUMBO_TAG_LAST,
67
+ GUMBO_TAG_BR, GUMBO_TAG_I, GUMBO_TAG_FRAME, GUMBO_TAG_LAST, GUMBO_TAG_DIV,
68
+ GUMBO_TAG_LAST, GUMBO_TAG_TH, GUMBO_TAG_MS, GUMBO_TAG_ANNOTATION_XML,
69
+ GUMBO_TAG_B, GUMBO_TAG_TBODY, GUMBO_TAG_THEAD, GUMBO_TAG_BIG,
70
+ GUMBO_TAG_BLOCKQUOTE, GUMBO_TAG_XMP, GUMBO_TAG_LAST, GUMBO_TAG_KBD,
71
+ GUMBO_TAG_LAST, GUMBO_TAG_LINK, GUMBO_TAG_IFRAME, GUMBO_TAG_MARK,
72
+ GUMBO_TAG_CENTER, GUMBO_TAG_OUTPUT, GUMBO_TAG_DESC, GUMBO_TAG_CANVAS,
73
+ GUMBO_TAG_COL, GUMBO_TAG_MALIGNMARK, GUMBO_TAG_IMG, GUMBO_TAG_ASIDE,
74
+ GUMBO_TAG_LAST, GUMBO_TAG_CODE, GUMBO_TAG_LAST, GUMBO_TAG_SUB, GUMBO_TAG_MN,
75
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_INS, GUMBO_TAG_AUDIO,
76
+ GUMBO_TAG_STRONG, GUMBO_TAG_CITE, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
77
+ GUMBO_TAG_LAST, GUMBO_TAG_INPUT, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
78
+ GUMBO_TAG_LAST, GUMBO_TAG_NAV, GUMBO_TAG_LAST, GUMBO_TAG_COLGROUP,
79
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
80
+ GUMBO_TAG_LAST, GUMBO_TAG_SVG, GUMBO_TAG_KEYGEN, GUMBO_TAG_VIDEO,
81
+ GUMBO_TAG_BDO, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
82
+ GUMBO_TAG_LAST, GUMBO_TAG_BODY, GUMBO_TAG_LAST, GUMBO_TAG_Q, GUMBO_TAG_LAST,
83
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_TRACK,
84
+ GUMBO_TAG_LAST, GUMBO_TAG_BDI, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
85
+ GUMBO_TAG_LAST, GUMBO_TAG_CAPTION, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
86
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
87
+ GUMBO_TAG_RUBY, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_BUTTON,
88
+ GUMBO_TAG_SUMMARY, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
89
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
90
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
91
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
92
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
93
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
94
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
95
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
96
+ GUMBO_TAG_LAST, GUMBO_TAG_RTC, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
97
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
98
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_BLINK, GUMBO_TAG_LAST,
99
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
100
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
101
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
102
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
103
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
104
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_LAST,
105
+ GUMBO_TAG_LAST, GUMBO_TAG_LAST, GUMBO_TAG_ISINDEX};
@@ -0,0 +1,4 @@
1
+ // Generated via `gentags.py src/tag.in`.
2
+ // Do not edit; edit src/tag.in instead.
3
+ // clang-format off
4
+ 4, 4, 5, 4, 4, 4, 5, 6, 8, 8, 4, 7, 7, 3, 5, 2, 2, 2, 2, 2, 2, 6, 6, 6, 7, 1, 2, 3, 10, 2, 2, 2, 2, 2, 2, 6, 10, 4, 3, 1, 2, 6, 5, 1, 4, 1, 3, 4, 4, 4, 4, 3, 4, 3, 3, 3, 1, 1, 1, 4, 4, 2, 2, 3, 3, 4, 2, 3, 3, 3, 5, 3, 6, 5, 6, 5, 5, 5, 6, 5, 6, 3, 4, 4, 2, 2, 2, 2, 5, 6, 10, 14, 3, 13, 4, 5, 7, 8, 3, 5, 5, 5, 2, 2, 2, 4, 8, 6, 5, 5, 6, 6, 8, 8, 6, 8, 6, 6, 8, 5, 7, 7, 4, 8, 6, 7, 7, 3, 5, 8, 8, 7, 7, 3, 6, 7, 9, 2, 6, 8, 3, 5, 6, 4, 7, 8, 4, 6, 2, 3,
@@ -0,0 +1,153 @@
1
+ // Generated via `gentags.py src/tag.in`.
2
+ // Do not edit; edit src/tag.in instead.
3
+ // clang-format off
4
+ "html",
5
+ "head",
6
+ "title",
7
+ "base",
8
+ "link",
9
+ "meta",
10
+ "style",
11
+ "script",
12
+ "noscript",
13
+ "template",
14
+ "body",
15
+ "article",
16
+ "section",
17
+ "nav",
18
+ "aside",
19
+ "h1",
20
+ "h2",
21
+ "h3",
22
+ "h4",
23
+ "h5",
24
+ "h6",
25
+ "hgroup",
26
+ "header",
27
+ "footer",
28
+ "address",
29
+ "p",
30
+ "hr",
31
+ "pre",
32
+ "blockquote",
33
+ "ol",
34
+ "ul",
35
+ "li",
36
+ "dl",
37
+ "dt",
38
+ "dd",
39
+ "figure",
40
+ "figcaption",
41
+ "main",
42
+ "div",
43
+ "a",
44
+ "em",
45
+ "strong",
46
+ "small",
47
+ "s",
48
+ "cite",
49
+ "q",
50
+ "dfn",
51
+ "abbr",
52
+ "data",
53
+ "time",
54
+ "code",
55
+ "var",
56
+ "samp",
57
+ "kbd",
58
+ "sub",
59
+ "sup",
60
+ "i",
61
+ "b",
62
+ "u",
63
+ "mark",
64
+ "ruby",
65
+ "rt",
66
+ "rp",
67
+ "bdi",
68
+ "bdo",
69
+ "span",
70
+ "br",
71
+ "wbr",
72
+ "ins",
73
+ "del",
74
+ "image",
75
+ "img",
76
+ "iframe",
77
+ "embed",
78
+ "object",
79
+ "param",
80
+ "video",
81
+ "audio",
82
+ "source",
83
+ "track",
84
+ "canvas",
85
+ "map",
86
+ "area",
87
+ "math",
88
+ "mi",
89
+ "mo",
90
+ "mn",
91
+ "ms",
92
+ "mtext",
93
+ "mglyph",
94
+ "malignmark",
95
+ "annotation-xml",
96
+ "svg",
97
+ "foreignobject",
98
+ "desc",
99
+ "table",
100
+ "caption",
101
+ "colgroup",
102
+ "col",
103
+ "tbody",
104
+ "thead",
105
+ "tfoot",
106
+ "tr",
107
+ "td",
108
+ "th",
109
+ "form",
110
+ "fieldset",
111
+ "legend",
112
+ "label",
113
+ "input",
114
+ "button",
115
+ "select",
116
+ "datalist",
117
+ "optgroup",
118
+ "option",
119
+ "textarea",
120
+ "keygen",
121
+ "output",
122
+ "progress",
123
+ "meter",
124
+ "details",
125
+ "summary",
126
+ "menu",
127
+ "menuitem",
128
+ "applet",
129
+ "acronym",
130
+ "bgsound",
131
+ "dir",
132
+ "frame",
133
+ "frameset",
134
+ "noframes",
135
+ "isindex",
136
+ "listing",
137
+ "xmp",
138
+ "nextid",
139
+ "noembed",
140
+ "plaintext",
141
+ "rb",
142
+ "strike",
143
+ "basefont",
144
+ "big",
145
+ "blink",
146
+ "center",
147
+ "font",
148
+ "marquee",
149
+ "multicol",
150
+ "nobr",
151
+ "spacer",
152
+ "tt",
153
+ "rtc",
@@ -42,7 +42,6 @@
42
42
  // prevents parse error position from being messed up by possible mark/resets in
43
43
  // temporary buffer manipulation.
44
44
 
45
-
46
45
  #include "tokenizer.h"
47
46
 
48
47
  #include <assert.h>
@@ -64,13 +63,13 @@
64
63
 
65
64
  // Compared against _script_data_buffer to determine if we're in double-escaped
66
65
  // script mode.
67
- const GumboStringPiece kScriptTag = { "script", 6 };
66
+ const GumboStringPiece kScriptTag = {"script", 6};
68
67
 
69
68
  // An enum for the return value of each individual state.
70
69
  typedef enum {
71
- RETURN_ERROR, // Return false (error) from the tokenizer.
72
- RETURN_SUCCESS, // Return true (success) from the tokenizer.
73
- NEXT_CHAR // Proceed to the next character and continue lexing.
70
+ RETURN_ERROR, // Return false (error) from the tokenizer.
71
+ RETURN_SUCCESS, // Return true (success) from the tokenizer.
72
+ NEXT_CHAR // Proceed to the next character and continue lexing.
74
73
  } StateResult;
75
74
 
76
75
  // This is a struct containing state necessary to build up a tag token,
@@ -200,7 +199,8 @@ typedef struct GumboInternalTokenizerState {
200
199
  } GumboTokenizerState;
201
200
 
202
201
  // Adds an ERR_UNEXPECTED_CODE_POINT parse error to the parser's error struct.
203
- static void tokenizer_add_parse_error(GumboParser* parser, GumboErrorType type) {
202
+ static void tokenizer_add_parse_error(
203
+ GumboParser* parser, GumboErrorType type) {
204
204
  GumboError* error = gumbo_add_error(parser);
205
205
  if (!error) {
206
206
  return;
@@ -356,12 +356,10 @@ static void clear_temporary_buffer(GumboParser* parser) {
356
356
  GumboTokenizerState* tokenizer = parser->_tokenizer_state;
357
357
  assert(!tokenizer->_temporary_buffer_emit);
358
358
  utf8iterator_mark(&tokenizer->_input);
359
- gumbo_string_buffer_destroy(parser, &tokenizer->_temporary_buffer);
360
- gumbo_string_buffer_init(parser, &tokenizer->_temporary_buffer);
359
+ gumbo_string_buffer_clear(parser, &tokenizer->_temporary_buffer);
361
360
  // The temporary buffer and script data buffer are the same object in the
362
361
  // spec, so the script data buffer should be cleared as well.
363
- gumbo_string_buffer_destroy(parser, &tokenizer->_script_data_buffer);
364
- gumbo_string_buffer_init(parser, &tokenizer->_script_data_buffer);
362
+ gumbo_string_buffer_clear(parser, &tokenizer->_script_data_buffer);
365
363
  }
366
364
 
367
365
  // Appends a codepoint to the temporary buffer.
@@ -374,15 +372,14 @@ static void append_char_to_temporary_buffer(
374
372
  // Checks to see if the temporary buffer equals a certain string.
375
373
  // Make sure this remains side-effect free; it's used in assertions.
376
374
  #ifndef NDEBUG
377
- static bool temporary_buffer_equals(
378
- GumboParser* parser, const char* text) {
375
+ static bool temporary_buffer_equals(GumboParser* parser, const char* text) {
379
376
  GumboStringBuffer* buffer = &parser->_tokenizer_state->_temporary_buffer;
380
377
  // TODO(jdtang): See if the extra strlen is a performance problem, and replace
381
378
  // it with an explicit sizeof(literal) if necessary. I don't think it will
382
379
  // be, as this is only used in a couple of rare states.
383
380
  int text_len = strlen(text);
384
381
  return text_len == buffer->length &&
385
- memcmp(buffer->data, text, text_len) == 0;
382
+ memcmp(buffer->data, text, text_len) == 0;
386
383
  }
387
384
  #endif
388
385
 
@@ -539,8 +536,8 @@ static StateResult emit_current_tag(GumboParser* parser, GumboToken* output) {
539
536
  output->v.start_tag.is_self_closing = tag_state->_is_self_closing;
540
537
  tag_state->_last_start_tag = tag_state->_tag;
541
538
  mark_tag_state_as_empty(tag_state);
542
- gumbo_debug("Emitted start tag %s.\n",
543
- gumbo_normalized_tagname(tag_state->_tag));
539
+ gumbo_debug(
540
+ "Emitted start tag %s.\n", gumbo_normalized_tagname(tag_state->_tag));
544
541
  } else {
545
542
  output->type = GUMBO_TOKEN_END_TAG;
546
543
  output->v.end_tag = tag_state->_tag;
@@ -548,17 +545,18 @@ static StateResult emit_current_tag(GumboParser* parser, GumboToken* output) {
548
545
  // token, but it's still initialized as normal, so it must be manually
549
546
  // deallocated. There may also be attributes to destroy, in certain broken
550
547
  // cases like </div</th> (the "th" is an attribute there).
551
- for (int i = 0; i < tag_state->_attributes.length; ++i) {
548
+ for (unsigned int i = 0; i < tag_state->_attributes.length; ++i) {
552
549
  gumbo_destroy_attribute(parser, tag_state->_attributes.data[i]);
553
550
  }
554
551
  gumbo_parser_deallocate(parser, tag_state->_attributes.data);
555
552
  mark_tag_state_as_empty(tag_state);
556
- gumbo_debug("Emitted end tag %s.\n",
557
- gumbo_normalized_tagname(tag_state->_tag));
553
+ gumbo_debug(
554
+ "Emitted end tag %s.\n", gumbo_normalized_tagname(tag_state->_tag));
558
555
  }
559
556
  gumbo_string_buffer_destroy(parser, &tag_state->_buffer);
560
557
  finish_token(parser, output);
561
- gumbo_debug("Original text = %.*s.\n", output->original_text.length, output->original_text.data);
558
+ gumbo_debug("Original text = %.*s.\n", output->original_text.length,
559
+ output->original_text.data);
562
560
  assert(output->original_text.length >= 2);
563
561
  assert(output->original_text.data[0] == '<');
564
562
  assert(output->original_text.data[output->original_text.length - 1] == '>');
@@ -571,7 +569,7 @@ static StateResult emit_current_tag(GumboParser* parser, GumboToken* output) {
571
569
  // avoid a memory leak.
572
570
  static void abandon_current_tag(GumboParser* parser) {
573
571
  GumboTagState* tag_state = &parser->_tokenizer_state->_tag_state;
574
- for (int i = 0; i < tag_state->_attributes.length; ++i) {
572
+ for (unsigned int i = 0; i < tag_state->_attributes.length; ++i) {
575
573
  gumbo_destroy_attribute(parser, tag_state->_attributes.data[i]);
576
574
  }
577
575
  gumbo_parser_deallocate(parser, tag_state->_attributes.data);
@@ -583,9 +581,8 @@ static void abandon_current_tag(GumboParser* parser) {
583
581
  // Wraps the consume_char_ref function to handle its output and make the
584
582
  // appropriate TokenizerState modifications. Returns RETURN_ERROR if a parse
585
583
  // error occurred, RETURN_SUCCESS otherwise.
586
- static StateResult emit_char_ref(
587
- GumboParser* parser, int additional_allowed_char,
588
- bool is_in_attribute, GumboToken* output) {
584
+ static StateResult emit_char_ref(GumboParser* parser,
585
+ int additional_allowed_char, bool is_in_attribute, GumboToken* output) {
589
586
  GumboTokenizerState* tokenizer = parser->_tokenizer_state;
590
587
  OneOrTwoCodepoints char_ref;
591
588
  bool status = consume_char_ref(
@@ -649,8 +646,7 @@ static bool maybe_emit_from_temporary_buffer(
649
646
  // _temporary_buffer_emit, and then (if the temporary buffer is non-empty) emits
650
647
  // the first character in it. It returns true if a character was emitted, false
651
648
  // otherwise.
652
- static bool emit_temporary_buffer(
653
- GumboParser* parser, GumboToken* output) {
649
+ static bool emit_temporary_buffer(GumboParser* parser, GumboToken* output) {
654
650
  GumboTokenizerState* tokenizer = parser->_tokenizer_state;
655
651
  assert(tokenizer->_temporary_buffer.data);
656
652
  utf8iterator_reset(&tokenizer->_input);
@@ -663,8 +659,8 @@ static bool emit_temporary_buffer(
663
659
  // start point; the only time you would *not* want to pass true for this
664
660
  // parameter is if you want the original_text to include character (like an
665
661
  // opening quote) that doesn't appear in the value.
666
- static void append_char_to_tag_buffer(GumboParser* parser, int codepoint,
667
- bool reinitilize_position_on_first) {
662
+ static void append_char_to_tag_buffer(
663
+ GumboParser* parser, int codepoint, bool reinitilize_position_on_first) {
668
664
  GumboStringBuffer* buffer = &parser->_tokenizer_state->_tag_state._buffer;
669
665
  if (buffer->length == 0 && reinitilize_position_on_first) {
670
666
  reset_tag_buffer_start_point(parser);
@@ -697,7 +693,11 @@ static void start_new_tag(GumboParser* parser, bool is_start_tag) {
697
693
  gumbo_string_buffer_append_codepoint(parser, c, &tag_state->_buffer);
698
694
 
699
695
  assert(tag_state->_attributes.data == NULL);
700
- gumbo_vector_init(parser, 4, &tag_state->_attributes);
696
+ // Initial size chosen by statistical analysis of a corpus of 60k webpages.
697
+ // 99.5% of elements have 0 attributes, 93% of the remainder have 1. These
698
+ // numbers are a bit higher for more modern websites (eg. ~45% = 0, ~40% = 1
699
+ // for the HTML5 Spec), but still have basically 99% of nodes with <= 2 attrs.
700
+ gumbo_vector_init(parser, 1, &tag_state->_attributes);
701
701
  tag_state->_drop_next_attr_value = false;
702
702
  tag_state->_is_start_tag = is_start_tag;
703
703
  tag_state->_is_self_closing = false;
@@ -717,16 +717,15 @@ static void copy_over_tag_buffer(GumboParser* parser, const char** output) {
717
717
  // * The start_pos GumboSourcePosition with the start position of the tag
718
718
  // buffer.
719
719
  // * The end_pos GumboSourcePosition with the current source position.
720
- static void copy_over_original_tag_text(
721
- GumboParser* parser, GumboStringPiece* original_text,
722
- GumboSourcePosition* start_pos, GumboSourcePosition* end_pos) {
720
+ static void copy_over_original_tag_text(GumboParser* parser,
721
+ GumboStringPiece* original_text, GumboSourcePosition* start_pos,
722
+ GumboSourcePosition* end_pos) {
723
723
  GumboTokenizerState* tokenizer = parser->_tokenizer_state;
724
724
  GumboTagState* tag_state = &tokenizer->_tag_state;
725
725
 
726
726
  original_text->data = tag_state->_original_text;
727
- original_text->length =
728
- utf8iterator_get_char_pointer(&tokenizer->_input) -
729
- tag_state->_original_text;
727
+ original_text->length = utf8iterator_get_char_pointer(&tokenizer->_input) -
728
+ tag_state->_original_text;
730
729
  if (original_text->data[original_text->length - 1] == '\r') {
731
730
  // Since \r is skipped by the UTF-8 iterator, it can sometimes end up
732
731
  // appended to the end of original text even when it's really the first part
@@ -751,16 +750,14 @@ static void finish_tag_name(GumboParser* parser) {
751
750
  GumboTokenizerState* tokenizer = parser->_tokenizer_state;
752
751
  GumboTagState* tag_state = &tokenizer->_tag_state;
753
752
 
754
- const char* temp;
755
- copy_over_tag_buffer(parser, &temp);
756
- tag_state->_tag = gumbo_tag_enum(temp);
753
+ tag_state->_tag =
754
+ gumbo_tagn_enum(tag_state->_buffer.data, tag_state->_buffer.length);
757
755
  reinitialize_tag_buffer(parser);
758
- gumbo_parser_deallocate(parser, (void*) temp);
759
756
  }
760
757
 
761
758
  // Adds an ERR_DUPLICATE_ATTR parse error to the parser's error struct.
762
759
  static void add_duplicate_attr_error(GumboParser* parser, const char* attr_name,
763
- int original_index, int new_index) {
760
+ int original_index, int new_index) {
764
761
  GumboError* error = gumbo_add_error(parser);
765
762
  if (!error) {
766
763
  return;
@@ -790,14 +787,13 @@ static bool finish_attribute_name(GumboParser* parser) {
790
787
  assert(tag_state->_attributes.capacity);
791
788
 
792
789
  GumboVector* /* GumboAttribute* */ attributes = &tag_state->_attributes;
793
- for (int i = 0; i < attributes->length; ++i) {
790
+ for (unsigned int i = 0; i < attributes->length; ++i) {
794
791
  GumboAttribute* attr = attributes->data[i];
795
792
  if (strlen(attr->name) == tag_state->_buffer.length &&
796
793
  memcmp(attr->name, tag_state->_buffer.data,
797
- tag_state->_buffer.length) == 0) {
794
+ tag_state->_buffer.length) == 0) {
798
795
  // Identical attribute; bail.
799
- add_duplicate_attr_error(
800
- parser, attr->name, i, attributes->length);
796
+ add_duplicate_attr_error(parser, attr->name, i, attributes->length);
801
797
  tag_state->_drop_next_attr_value = true;
802
798
  return false;
803
799
  }
@@ -806,11 +802,11 @@ static bool finish_attribute_name(GumboParser* parser) {
806
802
  GumboAttribute* attr = gumbo_parser_allocate(parser, sizeof(GumboAttribute));
807
803
  attr->attr_namespace = GUMBO_ATTR_NAMESPACE_NONE;
808
804
  copy_over_tag_buffer(parser, &attr->name);
809
- copy_over_original_tag_text(parser, &attr->original_name,
810
- &attr->name_start, &attr->name_end);
805
+ copy_over_original_tag_text(
806
+ parser, &attr->original_name, &attr->name_start, &attr->name_end);
811
807
  attr->value = gumbo_copy_stringz(parser, "");
812
- copy_over_original_tag_text(parser, &attr->original_value,
813
- &attr->name_start, &attr->name_end);
808
+ copy_over_original_tag_text(
809
+ parser, &attr->original_value, &attr->name_start, &attr->name_end);
814
810
  gumbo_vector_add(parser, attr, attributes);
815
811
  reinitialize_tag_buffer(parser);
816
812
  return true;
@@ -832,8 +828,8 @@ static void finish_attribute_value(GumboParser* parser) {
832
828
  tag_state->_attributes.data[tag_state->_attributes.length - 1];
833
829
  gumbo_parser_deallocate(parser, (void*) attr->value);
834
830
  copy_over_tag_buffer(parser, &attr->value);
835
- copy_over_original_tag_text(parser, &attr->original_value,
836
- &attr->value_start, &attr->value_end);
831
+ copy_over_original_tag_text(
832
+ parser, &attr->original_value, &attr->value_start, &attr->value_end);
837
833
  reinitialize_tag_buffer(parser);
838
834
  }
839
835
 
@@ -841,13 +837,9 @@ static void finish_attribute_value(GumboParser* parser) {
841
837
  static bool is_appropriate_end_tag(GumboParser* parser) {
842
838
  GumboTagState* tag_state = &parser->_tokenizer_state->_tag_state;
843
839
  assert(!tag_state->_is_start_tag);
844
- // Null terminate the current string buffer, so it can be passed to
845
- // gumbo_tag_enum, but don't increment the length in case we need to dump the
846
- // buffer as character tokens.
847
- gumbo_string_buffer_append_codepoint(parser, '\0', &tag_state->_buffer);
848
- --tag_state->_buffer.length;
849
840
  return tag_state->_last_start_tag != GUMBO_TAG_LAST &&
850
- tag_state->_last_start_tag == gumbo_tag_enum(tag_state->_buffer.data);
841
+ tag_state->_last_start_tag == gumbo_tagn_enum(tag_state->_buffer.data,
842
+ tag_state->_buffer.length);
851
843
  }
852
844
 
853
845
  void gumbo_tokenizer_state_init(
@@ -892,15 +884,14 @@ void gumbo_tokenizer_set_is_current_node_foreign(
892
884
  GumboParser* parser, bool is_foreign) {
893
885
  if (is_foreign != parser->_tokenizer_state->_is_current_node_foreign) {
894
886
  gumbo_debug("Toggling is_current_node_foreign to %s.\n",
895
- is_foreign ? "true" : "false");
887
+ is_foreign ? "true" : "false");
896
888
  }
897
889
  parser->_tokenizer_state->_is_current_node_foreign = is_foreign;
898
890
  }
899
891
 
900
892
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#data-state
901
- static StateResult handle_data_state(
902
- GumboParser* parser, GumboTokenizerState* tokenizer,
903
- int c, GumboToken* output) {
893
+ static StateResult handle_data_state(GumboParser* parser,
894
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
904
895
  switch (c) {
905
896
  case '&':
906
897
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_CHAR_REF_IN_DATA);
@@ -924,17 +915,15 @@ static StateResult handle_data_state(
924
915
  }
925
916
 
926
917
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#character-reference-in-data-state
927
- static StateResult handle_char_ref_in_data_state(
928
- GumboParser* parser, GumboTokenizerState* tokenizer,
929
- int c, GumboToken* output) {
918
+ static StateResult handle_char_ref_in_data_state(GumboParser* parser,
919
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
930
920
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
931
921
  return emit_char_ref(parser, ' ', false, output);
932
922
  }
933
923
 
934
924
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#rcdata-state
935
- static StateResult handle_rcdata_state(
936
- GumboParser* parser, GumboTokenizerState* tokenizer,
937
- int c, GumboToken* output) {
925
+ static StateResult handle_rcdata_state(GumboParser* parser,
926
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
938
927
  switch (c) {
939
928
  case '&':
940
929
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_CHAR_REF_IN_RCDATA);
@@ -955,17 +944,15 @@ static StateResult handle_rcdata_state(
955
944
  }
956
945
 
957
946
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#character-reference-in-rcdata-state
958
- static StateResult handle_char_ref_in_rcdata_state(
959
- GumboParser* parser, GumboTokenizerState* tokenizer,
960
- int c, GumboToken* output) {
947
+ static StateResult handle_char_ref_in_rcdata_state(GumboParser* parser,
948
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
961
949
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_RCDATA);
962
950
  return emit_char_ref(parser, ' ', false, output);
963
951
  }
964
952
 
965
953
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#rawtext-state
966
- static StateResult handle_rawtext_state(
967
- GumboParser* parser, GumboTokenizerState* tokenizer,
968
- int c, GumboToken* output) {
954
+ static StateResult handle_rawtext_state(GumboParser* parser,
955
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
969
956
  switch (c) {
970
957
  case '<':
971
958
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_RAWTEXT_LT);
@@ -982,9 +969,8 @@ static StateResult handle_rawtext_state(
982
969
  }
983
970
 
984
971
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-state
985
- static StateResult handle_script_state(
986
- GumboParser* parser, GumboTokenizerState* tokenizer,
987
- int c, GumboToken* output) {
972
+ static StateResult handle_script_state(GumboParser* parser,
973
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
988
974
  switch (c) {
989
975
  case '<':
990
976
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_LT);
@@ -1001,9 +987,8 @@ static StateResult handle_script_state(
1001
987
  }
1002
988
 
1003
989
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#plaintext-state
1004
- static StateResult handle_plaintext_state(
1005
- GumboParser* parser, GumboTokenizerState* tokenizer,
1006
- int c, GumboToken* output) {
990
+ static StateResult handle_plaintext_state(GumboParser* parser,
991
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1007
992
  switch (c) {
1008
993
  case '\0':
1009
994
  return emit_replacement_char(parser, output);
@@ -1015,9 +1000,8 @@ static StateResult handle_plaintext_state(
1015
1000
  }
1016
1001
 
1017
1002
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#tag-open-state
1018
- static StateResult handle_tag_open_state(
1019
- GumboParser* parser, GumboTokenizerState* tokenizer,
1020
- int c, GumboToken* output) {
1003
+ static StateResult handle_tag_open_state(GumboParser* parser,
1004
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1021
1005
  assert(temporary_buffer_equals(parser, "<"));
1022
1006
  switch (c) {
1023
1007
  case '!':
@@ -1049,9 +1033,8 @@ static StateResult handle_tag_open_state(
1049
1033
  }
1050
1034
 
1051
1035
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#end-tag-open-state
1052
- static StateResult handle_end_tag_open_state(
1053
- GumboParser* parser, GumboTokenizerState* tokenizer,
1054
- int c, GumboToken* output) {
1036
+ static StateResult handle_end_tag_open_state(GumboParser* parser,
1037
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1055
1038
  assert(temporary_buffer_equals(parser, "</"));
1056
1039
  switch (c) {
1057
1040
  case '>':
@@ -1077,9 +1060,8 @@ static StateResult handle_end_tag_open_state(
1077
1060
  }
1078
1061
 
1079
1062
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#tag-name-state
1080
- static StateResult handle_tag_name_state(
1081
- GumboParser* parser, GumboTokenizerState* tokenizer,
1082
- int c, GumboToken* output) {
1063
+ static StateResult handle_tag_name_state(GumboParser* parser,
1064
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1083
1065
  switch (c) {
1084
1066
  case '\t':
1085
1067
  case '\n':
@@ -1112,9 +1094,8 @@ static StateResult handle_tag_name_state(
1112
1094
  }
1113
1095
 
1114
1096
  // http://www.whatwg.org/specs/web-apps/current-work/complete.html#rcdata-less-than-sign-state
1115
- static StateResult handle_rcdata_lt_state(
1116
- GumboParser* parser, GumboTokenizerState* tokenizer,
1117
- int c, GumboToken* output) {
1097
+ static StateResult handle_rcdata_lt_state(GumboParser* parser,
1098
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1118
1099
  assert(temporary_buffer_equals(parser, "<"));
1119
1100
  if (c == '/') {
1120
1101
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_RCDATA_END_TAG_OPEN);
@@ -1128,9 +1109,8 @@ static StateResult handle_rcdata_lt_state(
1128
1109
  }
1129
1110
 
1130
1111
  // http://www.whatwg.org/specs/web-apps/current-work/complete.html#rcdata-end-tag-open-state
1131
- static StateResult handle_rcdata_end_tag_open_state(
1132
- GumboParser* parser, GumboTokenizerState* tokenizer,
1133
- int c, GumboToken* output) {
1112
+ static StateResult handle_rcdata_end_tag_open_state(GumboParser* parser,
1113
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1134
1114
  assert(temporary_buffer_equals(parser, "</"));
1135
1115
  if (is_alpha(c)) {
1136
1116
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_RCDATA_END_TAG_NAME);
@@ -1145,9 +1125,8 @@ static StateResult handle_rcdata_end_tag_open_state(
1145
1125
  }
1146
1126
 
1147
1127
  // http://www.whatwg.org/specs/web-apps/current-work/complete.html#rcdata-end-tag-name-state
1148
- static StateResult handle_rcdata_end_tag_name_state(
1149
- GumboParser* parser, GumboTokenizerState* tokenizer,
1150
- int c, GumboToken* output) {
1128
+ static StateResult handle_rcdata_end_tag_name_state(GumboParser* parser,
1129
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1151
1130
  assert(tokenizer->_temporary_buffer.length >= 2);
1152
1131
  if (is_alpha(c)) {
1153
1132
  append_char_to_tag_buffer(parser, ensure_lowercase(c), true);
@@ -1178,9 +1157,8 @@ static StateResult handle_rcdata_end_tag_name_state(
1178
1157
  }
1179
1158
 
1180
1159
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#rawtext-less-than-sign-state
1181
- static StateResult handle_rawtext_lt_state(
1182
- GumboParser* parser, GumboTokenizerState* tokenizer,
1183
- int c, GumboToken* output) {
1160
+ static StateResult handle_rawtext_lt_state(GumboParser* parser,
1161
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1184
1162
  assert(temporary_buffer_equals(parser, "<"));
1185
1163
  if (c == '/') {
1186
1164
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_RAWTEXT_END_TAG_OPEN);
@@ -1194,9 +1172,8 @@ static StateResult handle_rawtext_lt_state(
1194
1172
  }
1195
1173
 
1196
1174
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#rawtext-end-tag-open-state
1197
- static StateResult handle_rawtext_end_tag_open_state(
1198
- GumboParser* parser, GumboTokenizerState* tokenizer,
1199
- int c, GumboToken* output) {
1175
+ static StateResult handle_rawtext_end_tag_open_state(GumboParser* parser,
1176
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1200
1177
  assert(temporary_buffer_equals(parser, "</"));
1201
1178
  if (is_alpha(c)) {
1202
1179
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_RAWTEXT_END_TAG_NAME);
@@ -1210,12 +1187,11 @@ static StateResult handle_rawtext_end_tag_open_state(
1210
1187
  }
1211
1188
 
1212
1189
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#rawtext-end-tag-name-state
1213
- static StateResult handle_rawtext_end_tag_name_state(
1214
- GumboParser* parser, GumboTokenizerState* tokenizer,
1215
- int c, GumboToken* output) {
1190
+ static StateResult handle_rawtext_end_tag_name_state(GumboParser* parser,
1191
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1216
1192
  assert(tokenizer->_temporary_buffer.length >= 2);
1217
1193
  gumbo_debug("Last end tag: %*s\n", (int) tokenizer->_tag_state._buffer.length,
1218
- tokenizer->_tag_state._buffer.data);
1194
+ tokenizer->_tag_state._buffer.data);
1219
1195
  if (is_alpha(c)) {
1220
1196
  append_char_to_tag_buffer(parser, ensure_lowercase(c), true);
1221
1197
  append_char_to_temporary_buffer(parser, c);
@@ -1246,9 +1222,8 @@ static StateResult handle_rawtext_end_tag_name_state(
1246
1222
  }
1247
1223
 
1248
1224
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-less-than-sign-state
1249
- static StateResult handle_script_lt_state(
1250
- GumboParser* parser, GumboTokenizerState* tokenizer,
1251
- int c, GumboToken* output) {
1225
+ static StateResult handle_script_lt_state(GumboParser* parser,
1226
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1252
1227
  assert(temporary_buffer_equals(parser, "<"));
1253
1228
  if (c == '/') {
1254
1229
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_END_TAG_OPEN);
@@ -1266,9 +1241,8 @@ static StateResult handle_script_lt_state(
1266
1241
  }
1267
1242
 
1268
1243
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-end-tag-open-state
1269
- static StateResult handle_script_end_tag_open_state(
1270
- GumboParser* parser, GumboTokenizerState* tokenizer,
1271
- int c, GumboToken* output) {
1244
+ static StateResult handle_script_end_tag_open_state(GumboParser* parser,
1245
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1272
1246
  assert(temporary_buffer_equals(parser, "</"));
1273
1247
  if (is_alpha(c)) {
1274
1248
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_END_TAG_NAME);
@@ -1282,9 +1256,8 @@ static StateResult handle_script_end_tag_open_state(
1282
1256
  }
1283
1257
 
1284
1258
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-end-tag-name-state
1285
- static StateResult handle_script_end_tag_name_state(
1286
- GumboParser* parser, GumboTokenizerState* tokenizer,
1287
- int c, GumboToken* output) {
1259
+ static StateResult handle_script_end_tag_name_state(GumboParser* parser,
1260
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1288
1261
  assert(tokenizer->_temporary_buffer.length >= 2);
1289
1262
  if (is_alpha(c)) {
1290
1263
  append_char_to_tag_buffer(parser, ensure_lowercase(c), true);
@@ -1315,9 +1288,8 @@ static StateResult handle_script_end_tag_name_state(
1315
1288
  }
1316
1289
 
1317
1290
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-escape-start-state
1318
- static StateResult handle_script_escaped_start_state(
1319
- GumboParser* parser, GumboTokenizerState* tokenizer,
1320
- int c, GumboToken* output) {
1291
+ static StateResult handle_script_escaped_start_state(GumboParser* parser,
1292
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1321
1293
  if (c == '-') {
1322
1294
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_ESCAPED_START_DASH);
1323
1295
  return emit_current_char(parser, output);
@@ -1329,9 +1301,8 @@ static StateResult handle_script_escaped_start_state(
1329
1301
  }
1330
1302
 
1331
1303
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-escape-start-dash-state
1332
- static StateResult handle_script_escaped_start_dash_state(
1333
- GumboParser* parser, GumboTokenizerState* tokenizer,
1334
- int c, GumboToken* output) {
1304
+ static StateResult handle_script_escaped_start_dash_state(GumboParser* parser,
1305
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1335
1306
  if (c == '-') {
1336
1307
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_ESCAPED_DASH_DASH);
1337
1308
  return emit_current_char(parser, output);
@@ -1343,9 +1314,8 @@ static StateResult handle_script_escaped_start_dash_state(
1343
1314
  }
1344
1315
 
1345
1316
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-escaped-state
1346
- static StateResult handle_script_escaped_state(
1347
- GumboParser* parser, GumboTokenizerState* tokenizer,
1348
- int c, GumboToken* output) {
1317
+ static StateResult handle_script_escaped_state(GumboParser* parser,
1318
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1349
1319
  switch (c) {
1350
1320
  case '-':
1351
1321
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_ESCAPED_DASH);
@@ -1366,9 +1336,8 @@ static StateResult handle_script_escaped_state(
1366
1336
  }
1367
1337
 
1368
1338
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-escaped-dash-state
1369
- static StateResult handle_script_escaped_dash_state(
1370
- GumboParser* parser, GumboTokenizerState* tokenizer,
1371
- int c, GumboToken* output) {
1339
+ static StateResult handle_script_escaped_dash_state(GumboParser* parser,
1340
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1372
1341
  switch (c) {
1373
1342
  case '-':
1374
1343
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_ESCAPED_DASH_DASH);
@@ -1392,9 +1361,8 @@ static StateResult handle_script_escaped_dash_state(
1392
1361
  }
1393
1362
 
1394
1363
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-escaped-dash-dash-state
1395
- static StateResult handle_script_escaped_dash_dash_state(
1396
- GumboParser* parser, GumboTokenizerState* tokenizer,
1397
- int c, GumboToken* output) {
1364
+ static StateResult handle_script_escaped_dash_dash_state(GumboParser* parser,
1365
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1398
1366
  switch (c) {
1399
1367
  case '-':
1400
1368
  return emit_current_char(parser, output);
@@ -1420,9 +1388,8 @@ static StateResult handle_script_escaped_dash_dash_state(
1420
1388
  }
1421
1389
 
1422
1390
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-escaped-less-than-sign-state
1423
- static StateResult handle_script_escaped_lt_state(
1424
- GumboParser* parser, GumboTokenizerState* tokenizer,
1425
- int c, GumboToken* output) {
1391
+ static StateResult handle_script_escaped_lt_state(GumboParser* parser,
1392
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1426
1393
  assert(temporary_buffer_equals(parser, "<"));
1427
1394
  assert(!tokenizer->_script_data_buffer.length);
1428
1395
  if (c == '/') {
@@ -1442,9 +1409,8 @@ static StateResult handle_script_escaped_lt_state(
1442
1409
  }
1443
1410
 
1444
1411
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-escaped-end-tag-open-state
1445
- static StateResult handle_script_escaped_end_tag_open_state(
1446
- GumboParser* parser, GumboTokenizerState* tokenizer,
1447
- int c, GumboToken* output) {
1412
+ static StateResult handle_script_escaped_end_tag_open_state(GumboParser* parser,
1413
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1448
1414
  assert(temporary_buffer_equals(parser, "</"));
1449
1415
  if (is_alpha(c)) {
1450
1416
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_ESCAPED_END_TAG_NAME);
@@ -1458,9 +1424,8 @@ static StateResult handle_script_escaped_end_tag_open_state(
1458
1424
  }
1459
1425
 
1460
1426
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-escaped-end-tag-name-state
1461
- static StateResult handle_script_escaped_end_tag_name_state(
1462
- GumboParser* parser, GumboTokenizerState* tokenizer,
1463
- int c, GumboToken* output) {
1427
+ static StateResult handle_script_escaped_end_tag_name_state(GumboParser* parser,
1428
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1464
1429
  assert(tokenizer->_temporary_buffer.length >= 2);
1465
1430
  if (is_alpha(c)) {
1466
1431
  append_char_to_tag_buffer(parser, ensure_lowercase(c), true);
@@ -1491,9 +1456,8 @@ static StateResult handle_script_escaped_end_tag_name_state(
1491
1456
  }
1492
1457
 
1493
1458
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-double-escape-start-state
1494
- static StateResult handle_script_double_escaped_start_state(
1495
- GumboParser* parser, GumboTokenizerState* tokenizer,
1496
- int c, GumboToken* output) {
1459
+ static StateResult handle_script_double_escaped_start_state(GumboParser* parser,
1460
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1497
1461
  switch (c) {
1498
1462
  case '\t':
1499
1463
  case '\n':
@@ -1501,9 +1465,11 @@ static StateResult handle_script_double_escaped_start_state(
1501
1465
  case ' ':
1502
1466
  case '/':
1503
1467
  case '>':
1504
- gumbo_tokenizer_set_state(parser, gumbo_string_equals(
1505
- &kScriptTag, (GumboStringPiece*) &tokenizer->_script_data_buffer)
1506
- ? GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED : GUMBO_LEX_SCRIPT_ESCAPED);
1468
+ gumbo_tokenizer_set_state(
1469
+ parser, gumbo_string_equals(&kScriptTag,
1470
+ (GumboStringPiece*) &tokenizer->_script_data_buffer)
1471
+ ? GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED
1472
+ : GUMBO_LEX_SCRIPT_ESCAPED);
1507
1473
  return emit_current_char(parser, output);
1508
1474
  default:
1509
1475
  if (is_alpha(c)) {
@@ -1519,9 +1485,8 @@ static StateResult handle_script_double_escaped_start_state(
1519
1485
  }
1520
1486
 
1521
1487
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-double-escaped-state
1522
- static StateResult handle_script_double_escaped_state(
1523
- GumboParser* parser, GumboTokenizerState* tokenizer,
1524
- int c, GumboToken* output) {
1488
+ static StateResult handle_script_double_escaped_state(GumboParser* parser,
1489
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1525
1490
  switch (c) {
1526
1491
  case '-':
1527
1492
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED_DASH);
@@ -1541,9 +1506,8 @@ static StateResult handle_script_double_escaped_state(
1541
1506
  }
1542
1507
 
1543
1508
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-double-escaped-dash-state
1544
- static StateResult handle_script_double_escaped_dash_state(
1545
- GumboParser* parser, GumboTokenizerState* tokenizer,
1546
- int c, GumboToken* output) {
1509
+ static StateResult handle_script_double_escaped_dash_state(GumboParser* parser,
1510
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1547
1511
  switch (c) {
1548
1512
  case '-':
1549
1513
  gumbo_tokenizer_set_state(
@@ -1567,8 +1531,8 @@ static StateResult handle_script_double_escaped_dash_state(
1567
1531
 
1568
1532
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-double-escaped-dash-dash-state
1569
1533
  static StateResult handle_script_double_escaped_dash_dash_state(
1570
- GumboParser* parser, GumboTokenizerState* tokenizer,
1571
- int c, GumboToken* output) {
1534
+ GumboParser* parser, GumboTokenizerState* tokenizer, int c,
1535
+ GumboToken* output) {
1572
1536
  switch (c) {
1573
1537
  case '-':
1574
1538
  return emit_current_char(parser, output);
@@ -1592,26 +1556,22 @@ static StateResult handle_script_double_escaped_dash_dash_state(
1592
1556
  }
1593
1557
 
1594
1558
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-double-escaped-less-than-sign-state
1595
- static StateResult handle_script_double_escaped_lt_state(
1596
- GumboParser* parser, GumboTokenizerState* tokenizer,
1597
- int c, GumboToken* output) {
1559
+ static StateResult handle_script_double_escaped_lt_state(GumboParser* parser,
1560
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1598
1561
  if (c == '/') {
1599
1562
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED_END);
1600
- gumbo_string_buffer_destroy(parser, &tokenizer->_script_data_buffer);
1601
- gumbo_string_buffer_init(parser, &tokenizer->_script_data_buffer);
1563
+ gumbo_string_buffer_clear(parser, &tokenizer->_script_data_buffer);
1602
1564
  return emit_current_char(parser, output);
1603
1565
  } else {
1604
1566
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED);
1605
1567
  tokenizer->_reconsume_current_input = true;
1606
1568
  return NEXT_CHAR;
1607
1569
  }
1608
-
1609
1570
  }
1610
1571
 
1611
1572
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-double-escape-end-state
1612
- static StateResult handle_script_double_escaped_end_state(
1613
- GumboParser* parser, GumboTokenizerState* tokenizer,
1614
- int c, GumboToken* output) {
1573
+ static StateResult handle_script_double_escaped_end_state(GumboParser* parser,
1574
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1615
1575
  switch (c) {
1616
1576
  case '\t':
1617
1577
  case '\n':
@@ -1619,9 +1579,11 @@ static StateResult handle_script_double_escaped_end_state(
1619
1579
  case ' ':
1620
1580
  case '/':
1621
1581
  case '>':
1622
- gumbo_tokenizer_set_state(parser, gumbo_string_equals(
1623
- &kScriptTag, (GumboStringPiece*) &tokenizer->_script_data_buffer)
1624
- ? GUMBO_LEX_SCRIPT_ESCAPED : GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED);
1582
+ gumbo_tokenizer_set_state(
1583
+ parser, gumbo_string_equals(&kScriptTag,
1584
+ (GumboStringPiece*) &tokenizer->_script_data_buffer)
1585
+ ? GUMBO_LEX_SCRIPT_ESCAPED
1586
+ : GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED);
1625
1587
  return emit_current_char(parser, output);
1626
1588
  default:
1627
1589
  if (is_alpha(c)) {
@@ -1637,9 +1599,8 @@ static StateResult handle_script_double_escaped_end_state(
1637
1599
  }
1638
1600
 
1639
1601
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#before-attribute-name-state
1640
- static StateResult handle_before_attr_name_state(
1641
- GumboParser* parser, GumboTokenizerState* tokenizer,
1642
- int c, GumboToken* output) {
1602
+ static StateResult handle_before_attr_name_state(GumboParser* parser,
1603
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1643
1604
  switch (c) {
1644
1605
  case '\t':
1645
1606
  case '\n':
@@ -1667,7 +1628,7 @@ static StateResult handle_before_attr_name_state(
1667
1628
  case '<':
1668
1629
  case '=':
1669
1630
  tokenizer_add_parse_error(parser, GUMBO_ERR_ATTR_NAME_INVALID);
1670
- // Fall through.
1631
+ // Fall through.
1671
1632
  default:
1672
1633
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_ATTR_NAME);
1673
1634
  append_char_to_tag_buffer(parser, ensure_lowercase(c), true);
@@ -1676,9 +1637,8 @@ static StateResult handle_before_attr_name_state(
1676
1637
  }
1677
1638
 
1678
1639
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#attribute-name-state
1679
- static StateResult handle_attr_name_state(
1680
- GumboParser* parser, GumboTokenizerState* tokenizer,
1681
- int c, GumboToken* output) {
1640
+ static StateResult handle_attr_name_state(GumboParser* parser,
1641
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1682
1642
  switch (c) {
1683
1643
  case '\t':
1684
1644
  case '\n':
@@ -1712,7 +1672,7 @@ static StateResult handle_attr_name_state(
1712
1672
  case '\'':
1713
1673
  case '<':
1714
1674
  tokenizer_add_parse_error(parser, GUMBO_ERR_ATTR_NAME_INVALID);
1715
- // Fall through.
1675
+ // Fall through.
1716
1676
  default:
1717
1677
  append_char_to_tag_buffer(parser, ensure_lowercase(c), true);
1718
1678
  return NEXT_CHAR;
@@ -1720,9 +1680,8 @@ static StateResult handle_attr_name_state(
1720
1680
  }
1721
1681
 
1722
1682
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#after-attribute-name-state
1723
- static StateResult handle_after_attr_name_state(
1724
- GumboParser* parser, GumboTokenizerState* tokenizer,
1725
- int c, GumboToken* output) {
1683
+ static StateResult handle_after_attr_name_state(GumboParser* parser,
1684
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1726
1685
  switch (c) {
1727
1686
  case '\t':
1728
1687
  case '\n':
@@ -1752,7 +1711,7 @@ static StateResult handle_after_attr_name_state(
1752
1711
  case '\'':
1753
1712
  case '<':
1754
1713
  tokenizer_add_parse_error(parser, GUMBO_ERR_ATTR_NAME_INVALID);
1755
- // Fall through.
1714
+ // Fall through.
1756
1715
  default:
1757
1716
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_ATTR_NAME);
1758
1717
  append_char_to_tag_buffer(parser, ensure_lowercase(c), true);
@@ -1761,9 +1720,8 @@ static StateResult handle_after_attr_name_state(
1761
1720
  }
1762
1721
 
1763
1722
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#before-attribute-value-state
1764
- static StateResult handle_before_attr_value_state(
1765
- GumboParser* parser, GumboTokenizerState* tokenizer,
1766
- int c, GumboToken* output) {
1723
+ static StateResult handle_before_attr_value_state(GumboParser* parser,
1724
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1767
1725
  switch (c) {
1768
1726
  case '\t':
1769
1727
  case '\n':
@@ -1802,7 +1760,7 @@ static StateResult handle_before_attr_value_state(
1802
1760
  case '=':
1803
1761
  case '`':
1804
1762
  tokenizer_add_parse_error(parser, GUMBO_ERR_ATTR_UNQUOTED_EQUALS);
1805
- // Fall through.
1763
+ // Fall through.
1806
1764
  default:
1807
1765
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_ATTR_VALUE_UNQUOTED);
1808
1766
  append_char_to_tag_buffer(parser, c, true);
@@ -1811,9 +1769,8 @@ static StateResult handle_before_attr_value_state(
1811
1769
  }
1812
1770
 
1813
1771
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#attribute-value-double-quoted-state
1814
- static StateResult handle_attr_value_double_quoted_state(
1815
- GumboParser* parser, GumboTokenizerState* tokenizer,
1816
- int c, GumboToken* output) {
1772
+ static StateResult handle_attr_value_double_quoted_state(GumboParser* parser,
1773
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1817
1774
  switch (c) {
1818
1775
  case '"':
1819
1776
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_AFTER_ATTR_VALUE_QUOTED);
@@ -1840,9 +1797,8 @@ static StateResult handle_attr_value_double_quoted_state(
1840
1797
  }
1841
1798
 
1842
1799
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#attribute-value-single-quoted-state
1843
- static StateResult handle_attr_value_single_quoted_state(
1844
- GumboParser* parser, GumboTokenizerState* tokenizer,
1845
- int c, GumboToken* output) {
1800
+ static StateResult handle_attr_value_single_quoted_state(GumboParser* parser,
1801
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1846
1802
  switch (c) {
1847
1803
  case '\'':
1848
1804
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_AFTER_ATTR_VALUE_QUOTED);
@@ -1869,9 +1825,8 @@ static StateResult handle_attr_value_single_quoted_state(
1869
1825
  }
1870
1826
 
1871
1827
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#attribute-value-unquoted-state
1872
- static StateResult handle_attr_value_unquoted_state(
1873
- GumboParser* parser, GumboTokenizerState* tokenizer,
1874
- int c, GumboToken* output) {
1828
+ static StateResult handle_attr_value_unquoted_state(GumboParser* parser,
1829
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1875
1830
  switch (c) {
1876
1831
  case '\t':
1877
1832
  case '\n':
@@ -1905,7 +1860,7 @@ static StateResult handle_attr_value_unquoted_state(
1905
1860
  case '\'':
1906
1861
  case '`':
1907
1862
  tokenizer_add_parse_error(parser, GUMBO_ERR_ATTR_UNQUOTED_EQUALS);
1908
- // Fall through.
1863
+ // Fall through.
1909
1864
  default:
1910
1865
  append_char_to_tag_buffer(parser, c, true);
1911
1866
  return NEXT_CHAR;
@@ -1913,9 +1868,8 @@ static StateResult handle_attr_value_unquoted_state(
1913
1868
  }
1914
1869
 
1915
1870
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#character-reference-in-attribute-value-state
1916
- static StateResult handle_char_ref_in_attr_value_state(
1917
- GumboParser* parser, GumboTokenizerState* tokenizer,
1918
- int c, GumboToken* output) {
1871
+ static StateResult handle_char_ref_in_attr_value_state(GumboParser* parser,
1872
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1919
1873
  OneOrTwoCodepoints char_ref;
1920
1874
  int allowed_char;
1921
1875
  bool is_unquoted = false;
@@ -1956,9 +1910,8 @@ static StateResult handle_char_ref_in_attr_value_state(
1956
1910
  }
1957
1911
 
1958
1912
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#after-attribute-value-quoted-state
1959
- static StateResult handle_after_attr_value_quoted_state(
1960
- GumboParser* parser, GumboTokenizerState* tokenizer,
1961
- int c, GumboToken* output) {
1913
+ static StateResult handle_after_attr_value_quoted_state(GumboParser* parser,
1914
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1962
1915
  finish_attribute_value(parser);
1963
1916
  switch (c) {
1964
1917
  case '\t':
@@ -1988,9 +1941,8 @@ static StateResult handle_after_attr_value_quoted_state(
1988
1941
  }
1989
1942
 
1990
1943
  // http://www.whatwg.org/specs/web-apps/current-work/complete.html#self-closing-start-tag-state
1991
- static StateResult handle_self_closing_start_tag_state(
1992
- GumboParser* parser, GumboTokenizerState* tokenizer,
1993
- int c, GumboToken* output) {
1944
+ static StateResult handle_self_closing_start_tag_state(GumboParser* parser,
1945
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
1994
1946
  switch (c) {
1995
1947
  case '>':
1996
1948
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
@@ -2010,9 +1962,8 @@ static StateResult handle_self_closing_start_tag_state(
2010
1962
  }
2011
1963
 
2012
1964
  // http://www.whatwg.org/specs/web-apps/current-work/complete.html#bogus-comment-state
2013
- static StateResult handle_bogus_comment_state(
2014
- GumboParser* parser, GumboTokenizerState* tokenizer,
2015
- int c, GumboToken* output) {
1965
+ static StateResult handle_bogus_comment_state(GumboParser* parser,
1966
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
2016
1967
  while (c != '>' && c != -1) {
2017
1968
  if (c == '\0') {
2018
1969
  c = 0xFFFD;
@@ -2026,15 +1977,14 @@ static StateResult handle_bogus_comment_state(
2026
1977
  }
2027
1978
 
2028
1979
  // http://www.whatwg.org/specs/web-apps/current-work/complete.html#markup-declaration-open-state
2029
- static StateResult handle_markup_declaration_state(
2030
- GumboParser* parser, GumboTokenizerState* tokenizer,
2031
- int c, GumboToken* output) {
1980
+ static StateResult handle_markup_declaration_state(GumboParser* parser,
1981
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
2032
1982
  if (utf8iterator_maybe_consume_match(
2033
- &tokenizer->_input, "--", sizeof("--") - 1, true)) {
1983
+ &tokenizer->_input, "--", sizeof("--") - 1, true)) {
2034
1984
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT_START);
2035
1985
  tokenizer->_reconsume_current_input = true;
2036
1986
  } else if (utf8iterator_maybe_consume_match(
2037
- &tokenizer->_input, "DOCTYPE", sizeof("DOCTYPE") - 1, false)) {
1987
+ &tokenizer->_input, "DOCTYPE", sizeof("DOCTYPE") - 1, false)) {
2038
1988
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_DOCTYPE);
2039
1989
  tokenizer->_reconsume_current_input = true;
2040
1990
  // If we get here, we know we'll eventually emit a doctype token, so now is
@@ -2048,7 +1998,7 @@ static StateResult handle_markup_declaration_state(
2048
1998
  gumbo_copy_stringz(parser, "");
2049
1999
  } else if (tokenizer->_is_current_node_foreign &&
2050
2000
  utf8iterator_maybe_consume_match(
2051
- &tokenizer->_input, "[CDATA[", sizeof("[CDATA[") - 1, true)) {
2001
+ &tokenizer->_input, "[CDATA[", sizeof("[CDATA[") - 1, true)) {
2052
2002
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_CDATA);
2053
2003
  tokenizer->_is_in_cdata = true;
2054
2004
  tokenizer->_reconsume_current_input = true;
@@ -2062,9 +2012,8 @@ static StateResult handle_markup_declaration_state(
2062
2012
  }
2063
2013
 
2064
2014
  // http://www.whatwg.org/specs/web-apps/current-work/complete.html#comment-start-state
2065
- static StateResult handle_comment_start_state(
2066
- GumboParser* parser, GumboTokenizerState* tokenizer,
2067
- int c, GumboToken* output) {
2015
+ static StateResult handle_comment_start_state(GumboParser* parser,
2016
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
2068
2017
  switch (c) {
2069
2018
  case '-':
2070
2019
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT_START_DASH);
@@ -2092,9 +2041,8 @@ static StateResult handle_comment_start_state(
2092
2041
  }
2093
2042
 
2094
2043
  // http://www.whatwg.org/specs/web-apps/current-work/complete.html#comment-start-dash-state
2095
- static StateResult handle_comment_start_dash_state(
2096
- GumboParser* parser, GumboTokenizerState* tokenizer,
2097
- int c, GumboToken* output) {
2044
+ static StateResult handle_comment_start_dash_state(GumboParser* parser,
2045
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
2098
2046
  switch (c) {
2099
2047
  case '-':
2100
2048
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT_END);
@@ -2124,9 +2072,8 @@ static StateResult handle_comment_start_dash_state(
2124
2072
  }
2125
2073
 
2126
2074
  // http://www.whatwg.org/specs/web-apps/current-work/complete.html#comment-state
2127
- static StateResult handle_comment_state(
2128
- GumboParser* parser, GumboTokenizerState* tokenizer,
2129
- int c, GumboToken* output) {
2075
+ static StateResult handle_comment_state(GumboParser* parser,
2076
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
2130
2077
  switch (c) {
2131
2078
  case '-':
2132
2079
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT_END_DASH);
@@ -2147,9 +2094,8 @@ static StateResult handle_comment_state(
2147
2094
  }
2148
2095
 
2149
2096
  // http://www.whatwg.org/specs/web-apps/current-work/complete.html#comment-end-dash-state
2150
- static StateResult handle_comment_end_dash_state(
2151
- GumboParser* parser, GumboTokenizerState* tokenizer,
2152
- int c, GumboToken* output) {
2097
+ static StateResult handle_comment_end_dash_state(GumboParser* parser,
2098
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
2153
2099
  switch (c) {
2154
2100
  case '-':
2155
2101
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT_END);
@@ -2174,9 +2120,8 @@ static StateResult handle_comment_end_dash_state(
2174
2120
  }
2175
2121
 
2176
2122
  // http://www.whatwg.org/specs/web-apps/current-work/complete.html#comment-end-state
2177
- static StateResult handle_comment_end_state(
2178
- GumboParser* parser, GumboTokenizerState* tokenizer,
2179
- int c, GumboToken* output) {
2123
+ static StateResult handle_comment_end_state(GumboParser* parser,
2124
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
2180
2125
  switch (c) {
2181
2126
  case '>':
2182
2127
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
@@ -2189,11 +2134,13 @@ static StateResult handle_comment_end_state(
2189
2134
  append_char_to_temporary_buffer(parser, kUtf8ReplacementChar);
2190
2135
  return NEXT_CHAR;
2191
2136
  case '!':
2192
- tokenizer_add_parse_error(parser, GUMBO_ERR_COMMENT_BANG_AFTER_DOUBLE_DASH);
2137
+ tokenizer_add_parse_error(
2138
+ parser, GUMBO_ERR_COMMENT_BANG_AFTER_DOUBLE_DASH);
2193
2139
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT_END_BANG);
2194
2140
  return NEXT_CHAR;
2195
2141
  case '-':
2196
- tokenizer_add_parse_error(parser, GUMBO_ERR_COMMENT_DASH_AFTER_DOUBLE_DASH);
2142
+ tokenizer_add_parse_error(
2143
+ parser, GUMBO_ERR_COMMENT_DASH_AFTER_DOUBLE_DASH);
2197
2144
  append_char_to_temporary_buffer(parser, '-');
2198
2145
  return NEXT_CHAR;
2199
2146
  case -1:
@@ -2212,9 +2159,8 @@ static StateResult handle_comment_end_state(
2212
2159
  }
2213
2160
 
2214
2161
  // http://www.whatwg.org/specs/web-apps/current-work/complete.html#comment-end-bang-state
2215
- static StateResult handle_comment_end_bang_state(
2216
- GumboParser* parser, GumboTokenizerState* tokenizer,
2217
- int c, GumboToken* output) {
2162
+ static StateResult handle_comment_end_bang_state(GumboParser* parser,
2163
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
2218
2164
  switch (c) {
2219
2165
  case '-':
2220
2166
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT_END_DASH);
@@ -2249,9 +2195,8 @@ static StateResult handle_comment_end_bang_state(
2249
2195
  }
2250
2196
 
2251
2197
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#doctype-state
2252
- static StateResult handle_doctype_state(
2253
- GumboParser* parser, GumboTokenizerState* tokenizer,
2254
- int c, GumboToken* output) {
2198
+ static StateResult handle_doctype_state(GumboParser* parser,
2199
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
2255
2200
  assert(!tokenizer->_temporary_buffer.length);
2256
2201
  switch (c) {
2257
2202
  case '\t':
@@ -2276,9 +2221,8 @@ static StateResult handle_doctype_state(
2276
2221
  }
2277
2222
 
2278
2223
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#before-doctype-name-state
2279
- static StateResult handle_before_doctype_name_state(
2280
- GumboParser* parser, GumboTokenizerState* tokenizer,
2281
- int c, GumboToken* output) {
2224
+ static StateResult handle_before_doctype_name_state(GumboParser* parser,
2225
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
2282
2226
  switch (c) {
2283
2227
  case '\t':
2284
2228
  case '\n':
@@ -2312,9 +2256,8 @@ static StateResult handle_before_doctype_name_state(
2312
2256
  }
2313
2257
 
2314
2258
  // http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#doctype-name-state
2315
- static StateResult handle_doctype_name_state(
2316
- GumboParser* parser, GumboTokenizerState* tokenizer,
2317
- int c, GumboToken* output) {
2259
+ static StateResult handle_doctype_name_state(GumboParser* parser,
2260
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
2318
2261
  switch (c) {
2319
2262
  case '\t':
2320
2263
  case '\n':
@@ -2322,14 +2265,12 @@ static StateResult handle_doctype_name_state(
2322
2265
  case ' ':
2323
2266
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_AFTER_DOCTYPE_NAME);
2324
2267
  gumbo_parser_deallocate(parser, (void*) tokenizer->_doc_type_state.name);
2325
- finish_temporary_buffer(
2326
- parser, &tokenizer->_doc_type_state.name);
2268
+ finish_temporary_buffer(parser, &tokenizer->_doc_type_state.name);
2327
2269
  return NEXT_CHAR;
2328
2270
  case '>':
2329
2271
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
2330
2272
  gumbo_parser_deallocate(parser, (void*) tokenizer->_doc_type_state.name);
2331
- finish_temporary_buffer(
2332
- parser, &tokenizer->_doc_type_state.name);
2273
+ finish_temporary_buffer(parser, &tokenizer->_doc_type_state.name);
2333
2274
  emit_doctype(parser, output);
2334
2275
  return RETURN_SUCCESS;
2335
2276
  case '\0':
@@ -2341,8 +2282,7 @@ static StateResult handle_doctype_name_state(
2341
2282
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
2342
2283
  tokenizer->_doc_type_state.force_quirks = true;
2343
2284
  gumbo_parser_deallocate(parser, (void*) tokenizer->_doc_type_state.name);
2344
- finish_temporary_buffer(
2345
- parser, &tokenizer->_doc_type_state.name);
2285
+ finish_temporary_buffer(parser, &tokenizer->_doc_type_state.name);
2346
2286
  emit_doctype(parser, output);
2347
2287
  return RETURN_ERROR;
2348
2288
  default:
@@ -2354,9 +2294,8 @@ static StateResult handle_doctype_name_state(
2354
2294
  }
2355
2295
 
2356
2296
  // http://www.whatwg.org/specs/web-apps/current-work/complete.html#after-doctype-name-state
2357
- static StateResult handle_after_doctype_name_state(
2358
- GumboParser* parser, GumboTokenizerState* tokenizer,
2359
- int c, GumboToken* output) {
2297
+ static StateResult handle_after_doctype_name_state(GumboParser* parser,
2298
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
2360
2299
  switch (c) {
2361
2300
  case '\t':
2362
2301
  case '\n':
@@ -2375,17 +2314,18 @@ static StateResult handle_after_doctype_name_state(
2375
2314
  return RETURN_ERROR;
2376
2315
  default:
2377
2316
  if (utf8iterator_maybe_consume_match(
2378
- &tokenizer->_input, "PUBLIC", sizeof("PUBLIC") - 1, false)) {
2317
+ &tokenizer->_input, "PUBLIC", sizeof("PUBLIC") - 1, false)) {
2379
2318
  gumbo_tokenizer_set_state(
2380
2319
  parser, GUMBO_LEX_AFTER_DOCTYPE_PUBLIC_KEYWORD);
2381
2320
  tokenizer->_reconsume_current_input = true;
2382
- } else if (utf8iterator_maybe_consume_match(
2383
- &tokenizer->_input, "SYSTEM", sizeof("SYSTEM") - 1, false)) {
2321
+ } else if (utf8iterator_maybe_consume_match(&tokenizer->_input, "SYSTEM",
2322
+ sizeof("SYSTEM") - 1, false)) {
2384
2323
  gumbo_tokenizer_set_state(
2385
2324
  parser, GUMBO_LEX_AFTER_DOCTYPE_SYSTEM_KEYWORD);
2386
2325
  tokenizer->_reconsume_current_input = true;
2387
2326
  } else {
2388
- tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_SPACE_OR_RIGHT_BRACKET);
2327
+ tokenizer_add_parse_error(
2328
+ parser, GUMBO_ERR_DOCTYPE_SPACE_OR_RIGHT_BRACKET);
2389
2329
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_BOGUS_DOCTYPE);
2390
2330
  tokenizer->_doc_type_state.force_quirks = true;
2391
2331
  }
@@ -2395,15 +2335,14 @@ static StateResult handle_after_doctype_name_state(
2395
2335
 
2396
2336
  // http://www.whatwg.org/specs/web-apps/current-work/complete.html#after-doctype-public-keyword-state
2397
2337
  static StateResult handle_after_doctype_public_keyword_state(
2398
- GumboParser* parser, GumboTokenizerState* tokenizer,
2399
- int c, GumboToken* output) {
2338
+ GumboParser* parser, GumboTokenizerState* tokenizer, int c,
2339
+ GumboToken* output) {
2400
2340
  switch (c) {
2401
2341
  case '\t':
2402
2342
  case '\n':
2403
2343
  case '\f':
2404
2344
  case ' ':
2405
- gumbo_tokenizer_set_state(
2406
- parser, GUMBO_LEX_BEFORE_DOCTYPE_PUBLIC_ID);
2345
+ gumbo_tokenizer_set_state(parser, GUMBO_LEX_BEFORE_DOCTYPE_PUBLIC_ID);
2407
2346
  return NEXT_CHAR;
2408
2347
  case '"':
2409
2348
  tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_INVALID);
@@ -2439,9 +2378,8 @@ static StateResult handle_after_doctype_public_keyword_state(
2439
2378
  }
2440
2379
 
2441
2380
  // http://www.whatwg.org/specs/web-apps/current-work/complete.html#before-doctype-public-identifier-state
2442
- static StateResult handle_before_doctype_public_id_state(
2443
- GumboParser* parser, GumboTokenizerState* tokenizer,
2444
- int c, GumboToken* output) {
2381
+ static StateResult handle_before_doctype_public_id_state(GumboParser* parser,
2382
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
2445
2383
  switch (c) {
2446
2384
  case '\t':
2447
2385
  case '\n':
@@ -2481,8 +2419,8 @@ static StateResult handle_before_doctype_public_id_state(
2481
2419
 
2482
2420
  // http://www.whatwg.org/specs/web-apps/current-work/complete.html#doctype-public-identifier-(double-quoted)-state
2483
2421
  static StateResult handle_doctype_public_id_double_quoted_state(
2484
- GumboParser* parser, GumboTokenizerState* tokenizer,
2485
- int c, GumboToken* output) {
2422
+ GumboParser* parser, GumboTokenizerState* tokenizer, int c,
2423
+ GumboToken* output) {
2486
2424
  switch (c) {
2487
2425
  case '"':
2488
2426
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_AFTER_DOCTYPE_PUBLIC_ID);
@@ -2514,8 +2452,8 @@ static StateResult handle_doctype_public_id_double_quoted_state(
2514
2452
 
2515
2453
  // http://www.whatwg.org/specs/web-apps/current-work/complete.html#doctype-public-identifier-(single-quoted)-state
2516
2454
  static StateResult handle_doctype_public_id_single_quoted_state(
2517
- GumboParser* parser, GumboTokenizerState* tokenizer,
2518
- int c, GumboToken* output) {
2455
+ GumboParser* parser, GumboTokenizerState* tokenizer, int c,
2456
+ GumboToken* output) {
2519
2457
  switch (c) {
2520
2458
  case '\'':
2521
2459
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_AFTER_DOCTYPE_PUBLIC_ID);
@@ -2546,9 +2484,8 @@ static StateResult handle_doctype_public_id_single_quoted_state(
2546
2484
  }
2547
2485
 
2548
2486
  // http://www.whatwg.org/specs/web-apps/current-work/complete.html#after-doctype-public-identifier-state
2549
- static StateResult handle_after_doctype_public_id_state(
2550
- GumboParser* parser, GumboTokenizerState* tokenizer,
2551
- int c, GumboToken* output) {
2487
+ static StateResult handle_after_doctype_public_id_state(GumboParser* parser,
2488
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
2552
2489
  switch (c) {
2553
2490
  case '\t':
2554
2491
  case '\n':
@@ -2590,8 +2527,8 @@ static StateResult handle_after_doctype_public_id_state(
2590
2527
 
2591
2528
  // http://www.whatwg.org/specs/web-apps/current-work/complete.html#between-doctype-public-and-system-identifiers-state
2592
2529
  static StateResult handle_between_doctype_public_system_id_state(
2593
- GumboParser* parser, GumboTokenizerState* tokenizer,
2594
- int c, GumboToken* output) {
2530
+ GumboParser* parser, GumboTokenizerState* tokenizer, int c,
2531
+ GumboToken* output) {
2595
2532
  switch (c) {
2596
2533
  case '\t':
2597
2534
  case '\n':
@@ -2629,8 +2566,8 @@ static StateResult handle_between_doctype_public_system_id_state(
2629
2566
 
2630
2567
  // http://www.whatwg.org/specs/web-apps/current-work/complete.html#after-doctype-system-keyword-state
2631
2568
  static StateResult handle_after_doctype_system_keyword_state(
2632
- GumboParser* parser, GumboTokenizerState* tokenizer,
2633
- int c, GumboToken* output) {
2569
+ GumboParser* parser, GumboTokenizerState* tokenizer, int c,
2570
+ GumboToken* output) {
2634
2571
  switch (c) {
2635
2572
  case '\t':
2636
2573
  case '\n':
@@ -2671,9 +2608,8 @@ static StateResult handle_after_doctype_system_keyword_state(
2671
2608
  }
2672
2609
 
2673
2610
  // http://www.whatwg.org/specs/web-apps/current-work/complete.html#before-doctype-system-identifier-state
2674
- static StateResult handle_before_doctype_system_id_state(
2675
- GumboParser* parser, GumboTokenizerState* tokenizer,
2676
- int c, GumboToken* output) {
2611
+ static StateResult handle_before_doctype_system_id_state(GumboParser* parser,
2612
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
2677
2613
  switch (c) {
2678
2614
  case '\t':
2679
2615
  case '\n':
@@ -2712,8 +2648,8 @@ static StateResult handle_before_doctype_system_id_state(
2712
2648
 
2713
2649
  // http://www.whatwg.org/specs/web-apps/current-work/complete.html#doctype-system-identifier-(double-quoted)-state
2714
2650
  static StateResult handle_doctype_system_id_double_quoted_state(
2715
- GumboParser* parser, GumboTokenizerState* tokenizer,
2716
- int c, GumboToken* output) {
2651
+ GumboParser* parser, GumboTokenizerState* tokenizer, int c,
2652
+ GumboToken* output) {
2717
2653
  switch (c) {
2718
2654
  case '"':
2719
2655
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_AFTER_DOCTYPE_SYSTEM_ID);
@@ -2745,8 +2681,8 @@ static StateResult handle_doctype_system_id_double_quoted_state(
2745
2681
 
2746
2682
  // http://www.whatwg.org/specs/web-apps/current-work/complete.html#doctype-system-identifier-(single-quoted)-state
2747
2683
  static StateResult handle_doctype_system_id_single_quoted_state(
2748
- GumboParser* parser, GumboTokenizerState* tokenizer,
2749
- int c, GumboToken* output) {
2684
+ GumboParser* parser, GumboTokenizerState* tokenizer, int c,
2685
+ GumboToken* output) {
2750
2686
  switch (c) {
2751
2687
  case '\'':
2752
2688
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_AFTER_DOCTYPE_SYSTEM_ID);
@@ -2777,9 +2713,8 @@ static StateResult handle_doctype_system_id_single_quoted_state(
2777
2713
  }
2778
2714
 
2779
2715
  // http://www.whatwg.org/specs/web-apps/current-work/complete.html#after-doctype-system-identifier-state
2780
- static StateResult handle_after_doctype_system_id_state(
2781
- GumboParser* parser, GumboTokenizerState* tokenizer,
2782
- int c, GumboToken* output) {
2716
+ static StateResult handle_after_doctype_system_id_state(GumboParser* parser,
2717
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
2783
2718
  switch (c) {
2784
2719
  case '\t':
2785
2720
  case '\n':
@@ -2804,9 +2739,8 @@ static StateResult handle_after_doctype_system_id_state(
2804
2739
  }
2805
2740
 
2806
2741
  // http://www.whatwg.org/specs/web-apps/current-work/complete.html#bogus-doctype-state
2807
- static StateResult handle_bogus_doctype_state(
2808
- GumboParser* parser, GumboTokenizerState* tokenizer,
2809
- int c, GumboToken* output) {
2742
+ static StateResult handle_bogus_doctype_state(GumboParser* parser,
2743
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
2810
2744
  if (c == '>' || c == -1) {
2811
2745
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
2812
2746
  emit_doctype(parser, output);
@@ -2816,15 +2750,14 @@ static StateResult handle_bogus_doctype_state(
2816
2750
  }
2817
2751
 
2818
2752
  // http://www.whatwg.org/specs/web-apps/current-work/complete.html#cdata-section-state
2819
- static StateResult handle_cdata_state(
2820
- GumboParser* parser, GumboTokenizerState* tokenizer,
2821
- int c, GumboToken* output) {
2753
+ static StateResult handle_cdata_state(GumboParser* parser,
2754
+ GumboTokenizerState* tokenizer, int c, GumboToken* output) {
2822
2755
  if (c == -1 || utf8iterator_maybe_consume_match(
2823
- &tokenizer->_input, "]]>", sizeof("]]>") - 1, true)) {
2756
+ &tokenizer->_input, "]]>", sizeof("]]>") - 1, true)) {
2824
2757
  tokenizer->_reconsume_current_input = true;
2825
2758
  reset_token_start_point(tokenizer);
2826
2759
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
2827
- tokenizer->_is_in_cdata = true;
2760
+ tokenizer->_is_in_cdata = false;
2828
2761
  return NEXT_CHAR;
2829
2762
  } else {
2830
2763
  return emit_current_char(parser, output);
@@ -2834,76 +2767,47 @@ static StateResult handle_cdata_state(
2834
2767
  typedef StateResult (*GumboLexerStateFunction)(
2835
2768
  GumboParser*, GumboTokenizerState*, int, GumboToken*);
2836
2769
 
2837
- static GumboLexerStateFunction dispatch_table[] = {
2838
- handle_data_state,
2839
- handle_char_ref_in_data_state,
2840
- handle_rcdata_state,
2841
- handle_char_ref_in_rcdata_state,
2842
- handle_rawtext_state,
2843
- handle_script_state,
2844
- handle_plaintext_state,
2845
- handle_tag_open_state,
2846
- handle_end_tag_open_state,
2847
- handle_tag_name_state,
2848
- handle_rcdata_lt_state,
2849
- handle_rcdata_end_tag_open_state,
2850
- handle_rcdata_end_tag_name_state,
2851
- handle_rawtext_lt_state,
2852
- handle_rawtext_end_tag_open_state,
2853
- handle_rawtext_end_tag_name_state,
2854
- handle_script_lt_state,
2855
- handle_script_end_tag_open_state,
2856
- handle_script_end_tag_name_state,
2857
- handle_script_escaped_start_state,
2858
- handle_script_escaped_start_dash_state,
2859
- handle_script_escaped_state,
2860
- handle_script_escaped_dash_state,
2861
- handle_script_escaped_dash_dash_state,
2862
- handle_script_escaped_lt_state,
2863
- handle_script_escaped_end_tag_open_state,
2864
- handle_script_escaped_end_tag_name_state,
2865
- handle_script_double_escaped_start_state,
2866
- handle_script_double_escaped_state,
2867
- handle_script_double_escaped_dash_state,
2868
- handle_script_double_escaped_dash_dash_state,
2869
- handle_script_double_escaped_lt_state,
2870
- handle_script_double_escaped_end_state,
2871
- handle_before_attr_name_state,
2872
- handle_attr_name_state,
2873
- handle_after_attr_name_state,
2874
- handle_before_attr_value_state,
2875
- handle_attr_value_double_quoted_state,
2876
- handle_attr_value_single_quoted_state,
2877
- handle_attr_value_unquoted_state,
2878
- handle_char_ref_in_attr_value_state,
2879
- handle_after_attr_value_quoted_state,
2880
- handle_self_closing_start_tag_state,
2881
- handle_bogus_comment_state,
2882
- handle_markup_declaration_state,
2883
- handle_comment_start_state,
2884
- handle_comment_start_dash_state,
2885
- handle_comment_state,
2886
- handle_comment_end_dash_state,
2887
- handle_comment_end_state,
2888
- handle_comment_end_bang_state,
2889
- handle_doctype_state,
2890
- handle_before_doctype_name_state,
2891
- handle_doctype_name_state,
2892
- handle_after_doctype_name_state,
2893
- handle_after_doctype_public_keyword_state,
2894
- handle_before_doctype_public_id_state,
2895
- handle_doctype_public_id_double_quoted_state,
2896
- handle_doctype_public_id_single_quoted_state,
2897
- handle_after_doctype_public_id_state,
2898
- handle_between_doctype_public_system_id_state,
2899
- handle_after_doctype_system_keyword_state,
2900
- handle_before_doctype_system_id_state,
2901
- handle_doctype_system_id_double_quoted_state,
2902
- handle_doctype_system_id_single_quoted_state,
2903
- handle_after_doctype_system_id_state,
2904
- handle_bogus_doctype_state,
2905
- handle_cdata_state
2906
- };
2770
+ static GumboLexerStateFunction dispatch_table[] = {handle_data_state,
2771
+ handle_char_ref_in_data_state, handle_rcdata_state,
2772
+ handle_char_ref_in_rcdata_state, handle_rawtext_state, handle_script_state,
2773
+ handle_plaintext_state, handle_tag_open_state, handle_end_tag_open_state,
2774
+ handle_tag_name_state, handle_rcdata_lt_state,
2775
+ handle_rcdata_end_tag_open_state, handle_rcdata_end_tag_name_state,
2776
+ handle_rawtext_lt_state, handle_rawtext_end_tag_open_state,
2777
+ handle_rawtext_end_tag_name_state, handle_script_lt_state,
2778
+ handle_script_end_tag_open_state, handle_script_end_tag_name_state,
2779
+ handle_script_escaped_start_state, handle_script_escaped_start_dash_state,
2780
+ handle_script_escaped_state, handle_script_escaped_dash_state,
2781
+ handle_script_escaped_dash_dash_state, handle_script_escaped_lt_state,
2782
+ handle_script_escaped_end_tag_open_state,
2783
+ handle_script_escaped_end_tag_name_state,
2784
+ handle_script_double_escaped_start_state,
2785
+ handle_script_double_escaped_state, handle_script_double_escaped_dash_state,
2786
+ handle_script_double_escaped_dash_dash_state,
2787
+ handle_script_double_escaped_lt_state,
2788
+ handle_script_double_escaped_end_state, handle_before_attr_name_state,
2789
+ handle_attr_name_state, handle_after_attr_name_state,
2790
+ handle_before_attr_value_state, handle_attr_value_double_quoted_state,
2791
+ handle_attr_value_single_quoted_state, handle_attr_value_unquoted_state,
2792
+ handle_char_ref_in_attr_value_state, handle_after_attr_value_quoted_state,
2793
+ handle_self_closing_start_tag_state, handle_bogus_comment_state,
2794
+ handle_markup_declaration_state, handle_comment_start_state,
2795
+ handle_comment_start_dash_state, handle_comment_state,
2796
+ handle_comment_end_dash_state, handle_comment_end_state,
2797
+ handle_comment_end_bang_state, handle_doctype_state,
2798
+ handle_before_doctype_name_state, handle_doctype_name_state,
2799
+ handle_after_doctype_name_state, handle_after_doctype_public_keyword_state,
2800
+ handle_before_doctype_public_id_state,
2801
+ handle_doctype_public_id_double_quoted_state,
2802
+ handle_doctype_public_id_single_quoted_state,
2803
+ handle_after_doctype_public_id_state,
2804
+ handle_between_doctype_public_system_id_state,
2805
+ handle_after_doctype_system_keyword_state,
2806
+ handle_before_doctype_system_id_state,
2807
+ handle_doctype_system_id_double_quoted_state,
2808
+ handle_doctype_system_id_single_quoted_state,
2809
+ handle_after_doctype_system_id_state, handle_bogus_doctype_state,
2810
+ handle_cdata_state};
2907
2811
 
2908
2812
  bool gumbo_lex(GumboParser* parser, GumboToken* output) {
2909
2813
  // Because of the spec requirements that...
@@ -2941,8 +2845,8 @@ bool gumbo_lex(GumboParser* parser, GumboToken* output) {
2941
2845
  assert(!tokenizer->_temporary_buffer_emit);
2942
2846
  assert(tokenizer->_buffered_emit_char == kGumboNoChar);
2943
2847
  int c = utf8iterator_current(&tokenizer->_input);
2944
- gumbo_debug("Lexing character '%c' (%d) in state %d.\n",
2945
- c, c, tokenizer->_state);
2848
+ gumbo_debug(
2849
+ "Lexing character '%c' (%d) in state %d.\n", c, c, tokenizer->_state);
2946
2850
  StateResult result =
2947
2851
  dispatch_table[tokenizer->_state](parser, tokenizer, c, output);
2948
2852
  // We need to clear reconsume_current_input before returning to prevent
@@ -2952,7 +2856,7 @@ bool gumbo_lex(GumboParser* parser, GumboToken* output) {
2952
2856
 
2953
2857
  if (result == RETURN_SUCCESS) {
2954
2858
  return true;
2955
- } else if(result == RETURN_ERROR) {
2859
+ } else if (result == RETURN_ERROR) {
2956
2860
  return false;
2957
2861
  }
2958
2862
 
@@ -2974,7 +2878,7 @@ void gumbo_token_destroy(GumboParser* parser, GumboToken* token) {
2974
2878
  parser, (void*) token->v.doc_type.system_identifier);
2975
2879
  return;
2976
2880
  case GUMBO_TOKEN_START_TAG:
2977
- for (int i = 0; i < token->v.start_tag.attributes.length; ++i) {
2881
+ for (unsigned int i = 0; i < token->v.start_tag.attributes.length; ++i) {
2978
2882
  GumboAttribute* attr = token->v.start_tag.attributes.data[i];
2979
2883
  if (attr) {
2980
2884
  // May have been nulled out if this token was merged with another.