nokogumbo 1.3.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -32,48 +32,55 @@
32
32
  #include "util.h"
33
33
  #include "vector.h"
34
34
 
35
-
36
35
  #define AVOID_UNUSED_VARIABLE_WARNING(i) (void)(i)
37
36
 
38
- #define GUMBO_STRING(literal) { literal, sizeof(literal) - 1 }
39
- #define TERMINATOR { "", 0 }
37
+ #define GUMBO_STRING(literal) \
38
+ { literal, sizeof(literal) - 1 }
39
+ #define TERMINATOR \
40
+ { "", 0 }
40
41
 
41
- static void* malloc_wrapper(void* unused, size_t size) {
42
- return malloc(size);
43
- }
42
+ typedef char gumbo_tagset[GUMBO_TAG_LAST];
43
+ #define TAG(tag) [GUMBO_TAG_##tag] = (1 << GUMBO_NAMESPACE_HTML)
44
+ #define TAG_SVG(tag) [GUMBO_TAG_##tag] = (1 << GUMBO_NAMESPACE_SVG)
45
+ #define TAG_MATHML(tag) [GUMBO_TAG_##tag] = (1 << GUMBO_NAMESPACE_MATHML)
44
46
 
45
- static void free_wrapper(void* unused, void* ptr) {
46
- free(ptr);
47
- }
47
+ #define TAGSET_INCLUDES(tagset, namespace, tag) \
48
+ (tag < GUMBO_TAG_LAST && tagset[(int) tag] == (1 << (int) namespace))
48
49
 
49
- const GumboOptions kGumboDefaultOptions = {
50
- &malloc_wrapper,
51
- &free_wrapper,
52
- NULL,
53
- 8,
54
- false,
55
- -1,
56
- };
50
+ // selected forward declarations as it is getting hard to find
51
+ // an appropriate order
52
+ static bool node_html_tag_is(const GumboNode*, GumboTag);
53
+ static GumboInsertionMode get_current_template_insertion_mode(
54
+ const GumboParser*);
55
+ static bool handle_in_template(GumboParser*, GumboToken*);
56
+ static void destroy_node(GumboParser*, GumboNode*);
57
+
58
+ static void* malloc_wrapper(void* unused, size_t size) { return malloc(size); }
59
+
60
+ static void free_wrapper(void* unused, void* ptr) { free(ptr); }
61
+
62
+ const GumboOptions kGumboDefaultOptions = {&malloc_wrapper, &free_wrapper, NULL,
63
+ 8, false, -1, GUMBO_TAG_LAST, GUMBO_NAMESPACE_HTML};
57
64
 
58
65
  static const GumboStringPiece kDoctypeHtml = GUMBO_STRING("html");
59
- static const GumboStringPiece kPublicIdHtml4_0 = GUMBO_STRING(
60
- "-//W3C//DTD HTML 4.0//EN");
61
- static const GumboStringPiece kPublicIdHtml4_01 = GUMBO_STRING(
62
- "-//W3C//DTD HTML 4.01//EN");
63
- static const GumboStringPiece kPublicIdXhtml1_0 = GUMBO_STRING(
64
- "-//W3C//DTD XHTML 1.0 Strict//EN");
65
- static const GumboStringPiece kPublicIdXhtml1_1 = GUMBO_STRING(
66
- "-//W3C//DTD XHTML 1.1//EN");
67
- static const GumboStringPiece kSystemIdRecHtml4_0 = GUMBO_STRING(
68
- "http://www.w3.org/TR/REC-html40/strict.dtd");
69
- static const GumboStringPiece kSystemIdHtml4 = GUMBO_STRING(
70
- "http://www.w3.org/TR/html4/strict.dtd");
71
- static const GumboStringPiece kSystemIdXhtmlStrict1_1 = GUMBO_STRING(
72
- "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd");
73
- static const GumboStringPiece kSystemIdXhtml1_1 = GUMBO_STRING(
74
- "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd");
75
- static const GumboStringPiece kSystemIdLegacyCompat = GUMBO_STRING(
76
- "about:legacy-compat");
66
+ static const GumboStringPiece kPublicIdHtml4_0 =
67
+ GUMBO_STRING("-//W3C//DTD HTML 4.0//EN");
68
+ static const GumboStringPiece kPublicIdHtml4_01 =
69
+ GUMBO_STRING("-//W3C//DTD HTML 4.01//EN");
70
+ static const GumboStringPiece kPublicIdXhtml1_0 =
71
+ GUMBO_STRING("-//W3C//DTD XHTML 1.0 Strict//EN");
72
+ static const GumboStringPiece kPublicIdXhtml1_1 =
73
+ GUMBO_STRING("-//W3C//DTD XHTML 1.1//EN");
74
+ static const GumboStringPiece kSystemIdRecHtml4_0 =
75
+ GUMBO_STRING("http://www.w3.org/TR/REC-html40/strict.dtd");
76
+ static const GumboStringPiece kSystemIdHtml4 =
77
+ GUMBO_STRING("http://www.w3.org/TR/html4/strict.dtd");
78
+ static const GumboStringPiece kSystemIdXhtmlStrict1_1 =
79
+ GUMBO_STRING("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd");
80
+ static const GumboStringPiece kSystemIdXhtml1_1 =
81
+ GUMBO_STRING("http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd");
82
+ static const GumboStringPiece kSystemIdLegacyCompat =
83
+ GUMBO_STRING("about:legacy-compat");
77
84
 
78
85
  // The doctype arrays have an explicit terminator because we want to pass them
79
86
  // to a helper function, and passing them as a pointer discards sizeof
@@ -81,96 +88,86 @@ static const GumboStringPiece kSystemIdLegacyCompat = GUMBO_STRING(
81
88
  // over them use sizeof directly instead of a terminator.
82
89
 
83
90
  static const GumboStringPiece kQuirksModePublicIdPrefixes[] = {
84
- GUMBO_STRING("+//Silmaril//dtd html Pro v0r11 19970101//"),
85
- GUMBO_STRING("-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//"),
86
- GUMBO_STRING("-//AS//DTD HTML 3.0 asWedit + extensions//"),
87
- GUMBO_STRING("-//IETF//DTD HTML 2.0 Level 1//"),
88
- GUMBO_STRING("-//IETF//DTD HTML 2.0 Level 2//"),
89
- GUMBO_STRING("-//IETF//DTD HTML 2.0 Strict Level 1//"),
90
- GUMBO_STRING("-//IETF//DTD HTML 2.0 Strict Level 2//"),
91
- GUMBO_STRING("-//IETF//DTD HTML 2.0 Strict//"),
92
- GUMBO_STRING("-//IETF//DTD HTML 2.0//"),
93
- GUMBO_STRING("-//IETF//DTD HTML 2.1E//"),
94
- GUMBO_STRING("-//IETF//DTD HTML 3.0//"),
95
- GUMBO_STRING("-//IETF//DTD HTML 3.2 Final//"),
96
- GUMBO_STRING("-//IETF//DTD HTML 3.2//"),
97
- GUMBO_STRING("-//IETF//DTD HTML 3//"),
98
- GUMBO_STRING("-//IETF//DTD HTML Level 0//"),
99
- GUMBO_STRING("-//IETF//DTD HTML Level 1//"),
100
- GUMBO_STRING("-//IETF//DTD HTML Level 2//"),
101
- GUMBO_STRING("-//IETF//DTD HTML Level 3//"),
102
- GUMBO_STRING("-//IETF//DTD HTML Strict Level 0//"),
103
- GUMBO_STRING("-//IETF//DTD HTML Strict Level 1//"),
104
- GUMBO_STRING("-//IETF//DTD HTML Strict Level 2//"),
105
- GUMBO_STRING("-//IETF//DTD HTML Strict Level 3//"),
106
- GUMBO_STRING("-//IETF//DTD HTML Strict//"),
107
- GUMBO_STRING("-//IETF//DTD HTML//"),
108
- GUMBO_STRING("-//Metrius//DTD Metrius Presentational//"),
109
- GUMBO_STRING("-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//"),
110
- GUMBO_STRING("-//Microsoft//DTD Internet Explorer 2.0 HTML//"),
111
- GUMBO_STRING("-//Microsoft//DTD Internet Explorer 2.0 Tables//"),
112
- GUMBO_STRING("-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//"),
113
- GUMBO_STRING("-//Microsoft//DTD Internet Explorer 3.0 HTML//"),
114
- GUMBO_STRING("-//Microsoft//DTD Internet Explorer 3.0 Tables//"),
115
- GUMBO_STRING("-//Netscape Comm. Corp.//DTD HTML//"),
116
- GUMBO_STRING("-//Netscape Comm. Corp.//DTD Strict HTML//"),
117
- GUMBO_STRING("-//O'Reilly and Associates//DTD HTML 2.0//"),
118
- GUMBO_STRING("-//O'Reilly and Associates//DTD HTML Extended 1.0//"),
119
- GUMBO_STRING("-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//"),
120
- GUMBO_STRING("-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::)"
121
- "extensions to HTML 4.0//"),
122
- GUMBO_STRING("-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::"
123
- "extensions to HTML 4.0//"),
124
- GUMBO_STRING("-//Spyglass//DTD HTML 2.0 Extended//"),
125
- GUMBO_STRING("-//SQ//DTD HTML 2.0 HoTMetaL + extensions//"),
126
- GUMBO_STRING("-//Sun Microsystems Corp.//DTD HotJava HTML//"),
127
- GUMBO_STRING("-//Sun Microsystems Corp.//DTD HotJava Strict HTML//"),
128
- GUMBO_STRING("-//W3C//DTD HTML 3 1995-03-24//"),
129
- GUMBO_STRING("-//W3C//DTD HTML 3.2 Draft//"),
130
- GUMBO_STRING("-//W3C//DTD HTML 3.2 Final//"),
131
- GUMBO_STRING("-//W3C//DTD HTML 3.2//"),
132
- GUMBO_STRING("-//W3C//DTD HTML 3.2S Draft//"),
133
- GUMBO_STRING("-//W3C//DTD HTML 4.0 Frameset//"),
134
- GUMBO_STRING("-//W3C//DTD HTML 4.0 Transitional//"),
135
- GUMBO_STRING("-//W3C//DTD HTML Experimental 19960712//"),
136
- GUMBO_STRING("-//W3C//DTD HTML Experimental 970421//"),
137
- GUMBO_STRING("-//W3C//DTD W3 HTML//"),
138
- GUMBO_STRING("-//W3O//DTD W3 HTML 3.0//"),
139
- GUMBO_STRING("-//WebTechs//DTD Mozilla HTML 2.0//"),
140
- GUMBO_STRING("-//WebTechs//DTD Mozilla HTML//"),
141
- TERMINATOR
142
- };
91
+ GUMBO_STRING("+//Silmaril//dtd html Pro v0r11 19970101//"),
92
+ GUMBO_STRING("-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//"),
93
+ GUMBO_STRING("-//AS//DTD HTML 3.0 asWedit + extensions//"),
94
+ GUMBO_STRING("-//IETF//DTD HTML 2.0 Level 1//"),
95
+ GUMBO_STRING("-//IETF//DTD HTML 2.0 Level 2//"),
96
+ GUMBO_STRING("-//IETF//DTD HTML 2.0 Strict Level 1//"),
97
+ GUMBO_STRING("-//IETF//DTD HTML 2.0 Strict Level 2//"),
98
+ GUMBO_STRING("-//IETF//DTD HTML 2.0 Strict//"),
99
+ GUMBO_STRING("-//IETF//DTD HTML 2.0//"),
100
+ GUMBO_STRING("-//IETF//DTD HTML 2.1E//"),
101
+ GUMBO_STRING("-//IETF//DTD HTML 3.0//"),
102
+ GUMBO_STRING("-//IETF//DTD HTML 3.2 Final//"),
103
+ GUMBO_STRING("-//IETF//DTD HTML 3.2//"),
104
+ GUMBO_STRING("-//IETF//DTD HTML 3//"),
105
+ GUMBO_STRING("-//IETF//DTD HTML Level 0//"),
106
+ GUMBO_STRING("-//IETF//DTD HTML Level 1//"),
107
+ GUMBO_STRING("-//IETF//DTD HTML Level 2//"),
108
+ GUMBO_STRING("-//IETF//DTD HTML Level 3//"),
109
+ GUMBO_STRING("-//IETF//DTD HTML Strict Level 0//"),
110
+ GUMBO_STRING("-//IETF//DTD HTML Strict Level 1//"),
111
+ GUMBO_STRING("-//IETF//DTD HTML Strict Level 2//"),
112
+ GUMBO_STRING("-//IETF//DTD HTML Strict Level 3//"),
113
+ GUMBO_STRING("-//IETF//DTD HTML Strict//"),
114
+ GUMBO_STRING("-//IETF//DTD HTML//"),
115
+ GUMBO_STRING("-//Metrius//DTD Metrius Presentational//"),
116
+ GUMBO_STRING("-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//"),
117
+ GUMBO_STRING("-//Microsoft//DTD Internet Explorer 2.0 HTML//"),
118
+ GUMBO_STRING("-//Microsoft//DTD Internet Explorer 2.0 Tables//"),
119
+ GUMBO_STRING("-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//"),
120
+ GUMBO_STRING("-//Microsoft//DTD Internet Explorer 3.0 HTML//"),
121
+ GUMBO_STRING("-//Microsoft//DTD Internet Explorer 3.0 Tables//"),
122
+ GUMBO_STRING("-//Netscape Comm. Corp.//DTD HTML//"),
123
+ GUMBO_STRING("-//Netscape Comm. Corp.//DTD Strict HTML//"),
124
+ GUMBO_STRING("-//O'Reilly and Associates//DTD HTML 2.0//"),
125
+ GUMBO_STRING("-//O'Reilly and Associates//DTD HTML Extended 1.0//"),
126
+ GUMBO_STRING("-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//"),
127
+ GUMBO_STRING(
128
+ "-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::)"
129
+ "extensions to HTML 4.0//"),
130
+ GUMBO_STRING(
131
+ "-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::"
132
+ "extensions to HTML 4.0//"),
133
+ GUMBO_STRING("-//Spyglass//DTD HTML 2.0 Extended//"),
134
+ GUMBO_STRING("-//SQ//DTD HTML 2.0 HoTMetaL + extensions//"),
135
+ GUMBO_STRING("-//Sun Microsystems Corp.//DTD HotJava HTML//"),
136
+ GUMBO_STRING("-//Sun Microsystems Corp.//DTD HotJava Strict HTML//"),
137
+ GUMBO_STRING("-//W3C//DTD HTML 3 1995-03-24//"),
138
+ GUMBO_STRING("-//W3C//DTD HTML 3.2 Draft//"),
139
+ GUMBO_STRING("-//W3C//DTD HTML 3.2 Final//"),
140
+ GUMBO_STRING("-//W3C//DTD HTML 3.2//"),
141
+ GUMBO_STRING("-//W3C//DTD HTML 3.2S Draft//"),
142
+ GUMBO_STRING("-//W3C//DTD HTML 4.0 Frameset//"),
143
+ GUMBO_STRING("-//W3C//DTD HTML 4.0 Transitional//"),
144
+ GUMBO_STRING("-//W3C//DTD HTML Experimental 19960712//"),
145
+ GUMBO_STRING("-//W3C//DTD HTML Experimental 970421//"),
146
+ GUMBO_STRING("-//W3C//DTD W3 HTML//"),
147
+ GUMBO_STRING("-//W3O//DTD W3 HTML 3.0//"),
148
+ GUMBO_STRING("-//WebTechs//DTD Mozilla HTML 2.0//"),
149
+ GUMBO_STRING("-//WebTechs//DTD Mozilla HTML//"), TERMINATOR};
143
150
 
144
151
  static const GumboStringPiece kQuirksModePublicIdExactMatches[] = {
145
- GUMBO_STRING("-//W3O//DTD W3 HTML Strict 3.0//EN//"),
146
- GUMBO_STRING("-/W3C/DTD HTML 4.0 Transitional/EN"),
147
- GUMBO_STRING("HTML"),
148
- TERMINATOR
149
- };
152
+ GUMBO_STRING("-//W3O//DTD W3 HTML Strict 3.0//EN//"),
153
+ GUMBO_STRING("-/W3C/DTD HTML 4.0 Transitional/EN"), GUMBO_STRING("HTML"),
154
+ TERMINATOR};
150
155
 
151
156
  static const GumboStringPiece kQuirksModeSystemIdExactMatches[] = {
152
- GUMBO_STRING("http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"),
153
- TERMINATOR
154
- };
157
+ GUMBO_STRING("http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"),
158
+ TERMINATOR};
155
159
 
156
160
  static const GumboStringPiece kLimitedQuirksPublicIdPrefixes[] = {
157
- GUMBO_STRING("-//W3C//DTD XHTML 1.0 Frameset//"),
158
- GUMBO_STRING("-//W3C//DTD XHTML 1.0 Transitional//"),
159
- TERMINATOR
160
- };
161
+ GUMBO_STRING("-//W3C//DTD XHTML 1.0 Frameset//"),
162
+ GUMBO_STRING("-//W3C//DTD XHTML 1.0 Transitional//"), TERMINATOR};
161
163
 
162
- static const GumboStringPiece kLimitedQuirksRequiresSystemIdPublicIdPrefixes[] = {
163
- GUMBO_STRING("-//W3C//DTD HTML 4.01 Frameset//"),
164
- GUMBO_STRING("-//W3C//DTD HTML 4.01 Transitional//"),
165
- TERMINATOR
166
- };
164
+ static const GumboStringPiece kLimitedQuirksRequiresSystemIdPublicIdPrefixes[] =
165
+ {GUMBO_STRING("-//W3C//DTD HTML 4.01 Frameset//"),
166
+ GUMBO_STRING("-//W3C//DTD HTML 4.01 Transitional//"), TERMINATOR};
167
167
 
168
168
  // Indexed by GumboNamespaceEnum; keep in sync with that.
169
- static const char* kLegalXmlns[] = {
170
- "http://www.w3.org/1999/xhtml",
171
- "http://www.w3.org/2000/svg",
172
- "http://www.w3.org/1998/Math/MathML"
173
- };
169
+ static const char* kLegalXmlns[] = {"http://www.w3.org/1999/xhtml",
170
+ "http://www.w3.org/2000/svg", "http://www.w3.org/1998/Math/MathML"};
174
171
 
175
172
  typedef struct _ReplacementEntry {
176
173
  const GumboStringPiece from;
@@ -178,112 +175,112 @@ typedef struct _ReplacementEntry {
178
175
  } ReplacementEntry;
179
176
 
180
177
  #define REPLACEMENT_ENTRY(from, to) \
181
- { GUMBO_STRING(from), GUMBO_STRING(to) }
178
+ { GUMBO_STRING(from), GUMBO_STRING(to) }
182
179
 
183
180
  // Static data for SVG attribute replacements.
184
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#adjust-svg-attributes
181
+ // https://html.spec.whatwg.org/multipage/syntax.html#creating-and-inserting-nodes
185
182
  static const ReplacementEntry kSvgAttributeReplacements[] = {
186
- REPLACEMENT_ENTRY("attributename", "attributeName"),
187
- REPLACEMENT_ENTRY("attributetype", "attributeType"),
188
- REPLACEMENT_ENTRY("basefrequency", "baseFrequency"),
189
- REPLACEMENT_ENTRY("baseprofile", "baseProfile"),
190
- REPLACEMENT_ENTRY("calcmode", "calcMode"),
191
- REPLACEMENT_ENTRY("clippathunits", "clipPathUnits"),
192
- REPLACEMENT_ENTRY("contentscripttype", "contentScriptType"),
193
- REPLACEMENT_ENTRY("contentstyletype", "contentStyleType"),
194
- REPLACEMENT_ENTRY("diffuseconstant", "diffuseConstant"),
195
- REPLACEMENT_ENTRY("edgemode", "edgeMode"),
196
- REPLACEMENT_ENTRY("externalresourcesrequired", "externalResourcesRequired"),
197
- REPLACEMENT_ENTRY("filterres", "filterRes"),
198
- REPLACEMENT_ENTRY("filterunits", "filterUnits"),
199
- REPLACEMENT_ENTRY("glyphref", "glyphRef"),
200
- REPLACEMENT_ENTRY("gradienttransform", "gradientTransform"),
201
- REPLACEMENT_ENTRY("gradientunits", "gradientUnits"),
202
- REPLACEMENT_ENTRY("kernelmatrix", "kernelMatrix"),
203
- REPLACEMENT_ENTRY("kernelunitlength", "kernelUnitLength"),
204
- REPLACEMENT_ENTRY("keypoints", "keyPoints"),
205
- REPLACEMENT_ENTRY("keysplines", "keySplines"),
206
- REPLACEMENT_ENTRY("keytimes", "keyTimes"),
207
- REPLACEMENT_ENTRY("lengthadjust", "lengthAdjust"),
208
- REPLACEMENT_ENTRY("limitingconeangle", "limitingConeAngle"),
209
- REPLACEMENT_ENTRY("markerheight", "markerHeight"),
210
- REPLACEMENT_ENTRY("markerunits", "markerUnits"),
211
- REPLACEMENT_ENTRY("markerwidth", "markerWidth"),
212
- REPLACEMENT_ENTRY("maskcontentunits", "maskContentUnits"),
213
- REPLACEMENT_ENTRY("maskunits", "maskUnits"),
214
- REPLACEMENT_ENTRY("numoctaves", "numOctaves"),
215
- REPLACEMENT_ENTRY("pathlength", "pathLength"),
216
- REPLACEMENT_ENTRY("patterncontentunits", "patternContentUnits"),
217
- REPLACEMENT_ENTRY("patterntransform", "patternTransform"),
218
- REPLACEMENT_ENTRY("patternunits", "patternUnits"),
219
- REPLACEMENT_ENTRY("pointsatx", "pointsAtX"),
220
- REPLACEMENT_ENTRY("pointsaty", "pointsAtY"),
221
- REPLACEMENT_ENTRY("pointsatz", "pointsAtZ"),
222
- REPLACEMENT_ENTRY("preservealpha", "preserveAlpha"),
223
- REPLACEMENT_ENTRY("preserveaspectratio", "preserveAspectRatio"),
224
- REPLACEMENT_ENTRY("primitiveunits", "primitiveUnits"),
225
- REPLACEMENT_ENTRY("refx", "refX"),
226
- REPLACEMENT_ENTRY("refy", "refY"),
227
- REPLACEMENT_ENTRY("repeatcount", "repeatCount"),
228
- REPLACEMENT_ENTRY("repeatdur", "repeatDur"),
229
- REPLACEMENT_ENTRY("requiredextensions", "requiredExtensions"),
230
- REPLACEMENT_ENTRY("requiredfeatures", "requiredFeatures"),
231
- REPLACEMENT_ENTRY("specularconstant", "specularConstant"),
232
- REPLACEMENT_ENTRY("specularexponent", "specularExponent"),
233
- REPLACEMENT_ENTRY("spreadmethod", "spreadMethod"),
234
- REPLACEMENT_ENTRY("startoffset", "startOffset"),
235
- REPLACEMENT_ENTRY("stddeviation", "stdDeviation"),
236
- REPLACEMENT_ENTRY("stitchtiles", "stitchTiles"),
237
- REPLACEMENT_ENTRY("surfacescale", "surfaceScale"),
238
- REPLACEMENT_ENTRY("systemlanguage", "systemLanguage"),
239
- REPLACEMENT_ENTRY("tablevalues", "tableValues"),
240
- REPLACEMENT_ENTRY("targetx", "targetX"),
241
- REPLACEMENT_ENTRY("targety", "targetY"),
242
- REPLACEMENT_ENTRY("textlength", "textLength"),
243
- REPLACEMENT_ENTRY("viewbox", "viewBox"),
244
- REPLACEMENT_ENTRY("viewtarget", "viewTarget"),
245
- REPLACEMENT_ENTRY("xchannelselector", "xChannelSelector"),
246
- REPLACEMENT_ENTRY("ychannelselector", "yChannelSelector"),
247
- REPLACEMENT_ENTRY("zoomandpan", "zoomAndPan"),
183
+ REPLACEMENT_ENTRY("attributename", "attributeName"),
184
+ REPLACEMENT_ENTRY("attributetype", "attributeType"),
185
+ REPLACEMENT_ENTRY("basefrequency", "baseFrequency"),
186
+ REPLACEMENT_ENTRY("baseprofile", "baseProfile"),
187
+ REPLACEMENT_ENTRY("calcmode", "calcMode"),
188
+ REPLACEMENT_ENTRY("clippathunits", "clipPathUnits"),
189
+ // REPLACEMENT_ENTRY("contentscripttype", "contentScriptType"),
190
+ // REPLACEMENT_ENTRY("contentstyletype", "contentStyleType"),
191
+ REPLACEMENT_ENTRY("diffuseconstant", "diffuseConstant"),
192
+ REPLACEMENT_ENTRY("edgemode", "edgeMode"),
193
+ // REPLACEMENT_ENTRY("externalresourcesrequired",
194
+ // "externalResourcesRequired"),
195
+ // REPLACEMENT_ENTRY("filterres", "filterRes"),
196
+ REPLACEMENT_ENTRY("filterunits", "filterUnits"),
197
+ REPLACEMENT_ENTRY("glyphref", "glyphRef"),
198
+ REPLACEMENT_ENTRY("gradienttransform", "gradientTransform"),
199
+ REPLACEMENT_ENTRY("gradientunits", "gradientUnits"),
200
+ REPLACEMENT_ENTRY("kernelmatrix", "kernelMatrix"),
201
+ REPLACEMENT_ENTRY("kernelunitlength", "kernelUnitLength"),
202
+ REPLACEMENT_ENTRY("keypoints", "keyPoints"),
203
+ REPLACEMENT_ENTRY("keysplines", "keySplines"),
204
+ REPLACEMENT_ENTRY("keytimes", "keyTimes"),
205
+ REPLACEMENT_ENTRY("lengthadjust", "lengthAdjust"),
206
+ REPLACEMENT_ENTRY("limitingconeangle", "limitingConeAngle"),
207
+ REPLACEMENT_ENTRY("markerheight", "markerHeight"),
208
+ REPLACEMENT_ENTRY("markerunits", "markerUnits"),
209
+ REPLACEMENT_ENTRY("markerwidth", "markerWidth"),
210
+ REPLACEMENT_ENTRY("maskcontentunits", "maskContentUnits"),
211
+ REPLACEMENT_ENTRY("maskunits", "maskUnits"),
212
+ REPLACEMENT_ENTRY("numoctaves", "numOctaves"),
213
+ REPLACEMENT_ENTRY("pathlength", "pathLength"),
214
+ REPLACEMENT_ENTRY("patterncontentunits", "patternContentUnits"),
215
+ REPLACEMENT_ENTRY("patterntransform", "patternTransform"),
216
+ REPLACEMENT_ENTRY("patternunits", "patternUnits"),
217
+ REPLACEMENT_ENTRY("pointsatx", "pointsAtX"),
218
+ REPLACEMENT_ENTRY("pointsaty", "pointsAtY"),
219
+ REPLACEMENT_ENTRY("pointsatz", "pointsAtZ"),
220
+ REPLACEMENT_ENTRY("preservealpha", "preserveAlpha"),
221
+ REPLACEMENT_ENTRY("preserveaspectratio", "preserveAspectRatio"),
222
+ REPLACEMENT_ENTRY("primitiveunits", "primitiveUnits"),
223
+ REPLACEMENT_ENTRY("refx", "refX"), REPLACEMENT_ENTRY("refy", "refY"),
224
+ REPLACEMENT_ENTRY("repeatcount", "repeatCount"),
225
+ REPLACEMENT_ENTRY("repeatdur", "repeatDur"),
226
+ REPLACEMENT_ENTRY("requiredextensions", "requiredExtensions"),
227
+ REPLACEMENT_ENTRY("requiredfeatures", "requiredFeatures"),
228
+ REPLACEMENT_ENTRY("specularconstant", "specularConstant"),
229
+ REPLACEMENT_ENTRY("specularexponent", "specularExponent"),
230
+ REPLACEMENT_ENTRY("spreadmethod", "spreadMethod"),
231
+ REPLACEMENT_ENTRY("startoffset", "startOffset"),
232
+ REPLACEMENT_ENTRY("stddeviation", "stdDeviation"),
233
+ REPLACEMENT_ENTRY("stitchtiles", "stitchTiles"),
234
+ REPLACEMENT_ENTRY("surfacescale", "surfaceScale"),
235
+ REPLACEMENT_ENTRY("systemlanguage", "systemLanguage"),
236
+ REPLACEMENT_ENTRY("tablevalues", "tableValues"),
237
+ REPLACEMENT_ENTRY("targetx", "targetX"),
238
+ REPLACEMENT_ENTRY("targety", "targetY"),
239
+ REPLACEMENT_ENTRY("textlength", "textLength"),
240
+ REPLACEMENT_ENTRY("viewbox", "viewBox"),
241
+ REPLACEMENT_ENTRY("viewtarget", "viewTarget"),
242
+ REPLACEMENT_ENTRY("xchannelselector", "xChannelSelector"),
243
+ REPLACEMENT_ENTRY("ychannelselector", "yChannelSelector"),
244
+ REPLACEMENT_ENTRY("zoomandpan", "zoomAndPan"),
248
245
  };
249
246
 
250
247
  static const ReplacementEntry kSvgTagReplacements[] = {
251
- REPLACEMENT_ENTRY("altglyph", "altGlyph"),
252
- REPLACEMENT_ENTRY("altglyphdef", "altGlyphDef"),
253
- REPLACEMENT_ENTRY("altglyphitem", "altGlyphItem"),
254
- REPLACEMENT_ENTRY("animatecolor", "animateColor"),
255
- REPLACEMENT_ENTRY("animatemotion", "animateMotion"),
256
- REPLACEMENT_ENTRY("animatetransform", "animateTransform"),
257
- REPLACEMENT_ENTRY("clippath", "clipPath"),
258
- REPLACEMENT_ENTRY("feblend", "feBlend"),
259
- REPLACEMENT_ENTRY("fecolormatrix", "feColorMatrix"),
260
- REPLACEMENT_ENTRY("fecomponenttransfer", "feComponentTransfer"),
261
- REPLACEMENT_ENTRY("fecomposite", "feComposite"),
262
- REPLACEMENT_ENTRY("feconvolvematrix", "feConvolveMatrix"),
263
- REPLACEMENT_ENTRY("fediffuselighting", "feDiffuseLighting"),
264
- REPLACEMENT_ENTRY("fedisplacementmap", "feDisplacementMap"),
265
- REPLACEMENT_ENTRY("fedistantlight", "feDistantLight"),
266
- REPLACEMENT_ENTRY("feflood", "feFlood"),
267
- REPLACEMENT_ENTRY("fefunca", "feFuncA"),
268
- REPLACEMENT_ENTRY("fefuncb", "feFuncB"),
269
- REPLACEMENT_ENTRY("fefuncg", "feFuncG"),
270
- REPLACEMENT_ENTRY("fefuncr", "feFuncR"),
271
- REPLACEMENT_ENTRY("fegaussianblur", "feGaussianBlur"),
272
- REPLACEMENT_ENTRY("feimage", "feImage"),
273
- REPLACEMENT_ENTRY("femerge", "feMerge"),
274
- REPLACEMENT_ENTRY("femergenode", "feMergeNode"),
275
- REPLACEMENT_ENTRY("femorphology", "feMorphology"),
276
- REPLACEMENT_ENTRY("feoffset", "feOffset"),
277
- REPLACEMENT_ENTRY("fepointlight", "fePointLight"),
278
- REPLACEMENT_ENTRY("fespecularlighting", "feSpecularLighting"),
279
- REPLACEMENT_ENTRY("fespotlight", "feSpotLight"),
280
- REPLACEMENT_ENTRY("fetile", "feTile"),
281
- REPLACEMENT_ENTRY("feturbulence", "feTurbulence"),
282
- REPLACEMENT_ENTRY("foreignobject", "foreignObject"),
283
- REPLACEMENT_ENTRY("glyphref", "glyphRef"),
284
- REPLACEMENT_ENTRY("lineargradient", "linearGradient"),
285
- REPLACEMENT_ENTRY("radialgradient", "radialGradient"),
286
- REPLACEMENT_ENTRY("textpath", "textPath"),
248
+ REPLACEMENT_ENTRY("altglyph", "altGlyph"),
249
+ REPLACEMENT_ENTRY("altglyphdef", "altGlyphDef"),
250
+ REPLACEMENT_ENTRY("altglyphitem", "altGlyphItem"),
251
+ REPLACEMENT_ENTRY("animatecolor", "animateColor"),
252
+ REPLACEMENT_ENTRY("animatemotion", "animateMotion"),
253
+ REPLACEMENT_ENTRY("animatetransform", "animateTransform"),
254
+ REPLACEMENT_ENTRY("clippath", "clipPath"),
255
+ REPLACEMENT_ENTRY("feblend", "feBlend"),
256
+ REPLACEMENT_ENTRY("fecolormatrix", "feColorMatrix"),
257
+ REPLACEMENT_ENTRY("fecomponenttransfer", "feComponentTransfer"),
258
+ REPLACEMENT_ENTRY("fecomposite", "feComposite"),
259
+ REPLACEMENT_ENTRY("feconvolvematrix", "feConvolveMatrix"),
260
+ REPLACEMENT_ENTRY("fediffuselighting", "feDiffuseLighting"),
261
+ REPLACEMENT_ENTRY("fedisplacementmap", "feDisplacementMap"),
262
+ REPLACEMENT_ENTRY("fedistantlight", "feDistantLight"),
263
+ REPLACEMENT_ENTRY("feflood", "feFlood"),
264
+ REPLACEMENT_ENTRY("fefunca", "feFuncA"),
265
+ REPLACEMENT_ENTRY("fefuncb", "feFuncB"),
266
+ REPLACEMENT_ENTRY("fefuncg", "feFuncG"),
267
+ REPLACEMENT_ENTRY("fefuncr", "feFuncR"),
268
+ REPLACEMENT_ENTRY("fegaussianblur", "feGaussianBlur"),
269
+ REPLACEMENT_ENTRY("feimage", "feImage"),
270
+ REPLACEMENT_ENTRY("femerge", "feMerge"),
271
+ REPLACEMENT_ENTRY("femergenode", "feMergeNode"),
272
+ REPLACEMENT_ENTRY("femorphology", "feMorphology"),
273
+ REPLACEMENT_ENTRY("feoffset", "feOffset"),
274
+ REPLACEMENT_ENTRY("fepointlight", "fePointLight"),
275
+ REPLACEMENT_ENTRY("fespecularlighting", "feSpecularLighting"),
276
+ REPLACEMENT_ENTRY("fespotlight", "feSpotLight"),
277
+ REPLACEMENT_ENTRY("fetile", "feTile"),
278
+ REPLACEMENT_ENTRY("feturbulence", "feTurbulence"),
279
+ REPLACEMENT_ENTRY("foreignobject", "foreignObject"),
280
+ REPLACEMENT_ENTRY("glyphref", "glyphRef"),
281
+ REPLACEMENT_ENTRY("lineargradient", "linearGradient"),
282
+ REPLACEMENT_ENTRY("radialgradient", "radialGradient"),
283
+ REPLACEMENT_ENTRY("textpath", "textPath"),
287
284
  };
288
285
 
289
286
  typedef struct _NamespacedAttributeReplacement {
@@ -293,18 +290,18 @@ typedef struct _NamespacedAttributeReplacement {
293
290
  } NamespacedAttributeReplacement;
294
291
 
295
292
  static const NamespacedAttributeReplacement kForeignAttributeReplacements[] = {
296
- { "xlink:actuate", "actuate", GUMBO_ATTR_NAMESPACE_XLINK },
297
- { "xlink:actuate", "actuate", GUMBO_ATTR_NAMESPACE_XLINK },
298
- { "xlink:href", "href", GUMBO_ATTR_NAMESPACE_XLINK },
299
- { "xlink:role", "role", GUMBO_ATTR_NAMESPACE_XLINK },
300
- { "xlink:show", "show", GUMBO_ATTR_NAMESPACE_XLINK },
301
- { "xlink:title", "title", GUMBO_ATTR_NAMESPACE_XLINK },
302
- { "xlink:type", "type", GUMBO_ATTR_NAMESPACE_XLINK },
303
- { "xml:base", "base", GUMBO_ATTR_NAMESPACE_XML },
304
- { "xml:lang", "lang", GUMBO_ATTR_NAMESPACE_XML },
305
- { "xml:space", "space", GUMBO_ATTR_NAMESPACE_XML },
306
- { "xmlns", "xmlns", GUMBO_ATTR_NAMESPACE_XMLNS },
307
- { "xmlns:xlink", "xlink", GUMBO_ATTR_NAMESPACE_XMLNS },
293
+ {"xlink:actuate", "actuate", GUMBO_ATTR_NAMESPACE_XLINK},
294
+ {"xlink:actuate", "actuate", GUMBO_ATTR_NAMESPACE_XLINK},
295
+ {"xlink:href", "href", GUMBO_ATTR_NAMESPACE_XLINK},
296
+ {"xlink:role", "role", GUMBO_ATTR_NAMESPACE_XLINK},
297
+ {"xlink:show", "show", GUMBO_ATTR_NAMESPACE_XLINK},
298
+ {"xlink:title", "title", GUMBO_ATTR_NAMESPACE_XLINK},
299
+ {"xlink:type", "type", GUMBO_ATTR_NAMESPACE_XLINK},
300
+ {"xml:base", "base", GUMBO_ATTR_NAMESPACE_XML},
301
+ {"xml:lang", "lang", GUMBO_ATTR_NAMESPACE_XML},
302
+ {"xml:space", "space", GUMBO_ATTR_NAMESPACE_XML},
303
+ {"xmlns", "xmlns", GUMBO_ATTR_NAMESPACE_XMLNS},
304
+ {"xmlns:xlink", "xlink", GUMBO_ATTR_NAMESPACE_XMLNS},
308
305
  };
309
306
 
310
307
  // The "scope marker" for the list of active formatting elements. We use a
@@ -336,7 +333,7 @@ typedef struct _TextNodeBufferState {
336
333
  // The source position of the start of this text node.
337
334
  GumboSourcePosition _start_position;
338
335
 
339
- // The type of node that will be inserted (TEXT or WHITESPACE).
336
+ // The type of node that will be inserted (TEXT, CDATA, or WHITESPACE).
340
337
  GumboNodeType _type;
341
338
  } TextNodeBufferState;
342
339
 
@@ -362,6 +359,9 @@ typedef struct GumboInternalParserState {
362
359
  GumboNode* _head_element;
363
360
  GumboNode* _form_element;
364
361
 
362
+ // The element used as fragment context when parsing in fragment mode
363
+ GumboNode* _fragment_ctx;
364
+
365
365
  // The flag for when the spec says "Reprocess the current token in..."
366
366
  bool _reprocess_current_token;
367
367
 
@@ -418,14 +418,14 @@ static bool attribute_matches(
418
418
  static bool attribute_matches_case_sensitive(
419
419
  const GumboVector* attributes, const char* name, const char* value) {
420
420
  const GumboAttribute* attr = gumbo_get_attribute(attributes, name);
421
- return attr ? strcmp(value, attr->value) == 0 : false;
421
+ return attr ? strcmp(value, attr->value) == 0 : false;
422
422
  }
423
423
 
424
424
  // Checks if the specified attribute vectors are identical.
425
425
  static bool all_attributes_match(
426
426
  const GumboVector* attr1, const GumboVector* attr2) {
427
- int num_unmatched_attr2_elements = attr2->length;
428
- for (int i = 0; i < attr1->length; ++i) {
427
+ unsigned int num_unmatched_attr2_elements = attr2->length;
428
+ for (unsigned int i = 0; i < attr1->length; ++i) {
429
429
  const GumboAttribute* attr = attr1->data[i];
430
430
  if (attribute_matches_case_sensitive(attr2, attr->name, attr->value)) {
431
431
  --num_unmatched_attr2_elements;
@@ -453,8 +453,7 @@ static GumboNode* create_node(GumboParser* parser, GumboNodeType type) {
453
453
  static GumboNode* new_document_node(GumboParser* parser) {
454
454
  GumboNode* document_node = create_node(parser, GUMBO_NODE_DOCUMENT);
455
455
  document_node->parse_flags = GUMBO_INSERTION_BY_PARSER;
456
- gumbo_vector_init(
457
- parser, 1, &document_node->v.document.children);
456
+ gumbo_vector_init(parser, 1, &document_node->v.document.children);
458
457
 
459
458
  // Must be initialized explicitly, as there's no guarantee that we'll see a
460
459
  // doc type token.
@@ -489,6 +488,7 @@ static void parser_state_init(GumboParser* parser) {
489
488
  gumbo_vector_init(parser, 5, &parser_state->_template_insertion_modes);
490
489
  parser_state->_head_element = NULL;
491
490
  parser_state->_form_element = NULL;
491
+ parser_state->_fragment_ctx = NULL;
492
492
  parser_state->_current_token = NULL;
493
493
  parser_state->_closed_body_tag = false;
494
494
  parser_state->_closed_html_tag = false;
@@ -497,6 +497,9 @@ static void parser_state_init(GumboParser* parser) {
497
497
 
498
498
  static void parser_state_destroy(GumboParser* parser) {
499
499
  GumboParserState* state = parser->_parser_state;
500
+ if (state->_fragment_ctx) {
501
+ destroy_node(parser, state->_fragment_ctx);
502
+ }
500
503
  gumbo_vector_destroy(parser, &state->_active_formatting_elements);
501
504
  gumbo_vector_destroy(parser, &state->_open_elements);
502
505
  gumbo_vector_destroy(parser, &state->_template_insertion_modes);
@@ -508,6 +511,10 @@ static GumboNode* get_document_node(GumboParser* parser) {
508
511
  return parser->_output->document;
509
512
  }
510
513
 
514
+ static bool is_fragment_parser(const GumboParser* parser) {
515
+ return !!parser->_parser_state->_fragment_ctx;
516
+ }
517
+
511
518
  // Returns the node at the bottom of the stack of open elements, or NULL if no
512
519
  // elements have been added yet.
513
520
  static GumboNode* get_current_node(GumboParser* parser) {
@@ -521,6 +528,14 @@ static GumboNode* get_current_node(GumboParser* parser) {
521
528
  return open_elements->data[open_elements->length - 1];
522
529
  }
523
530
 
531
+ static GumboNode* get_adjusted_current_node(GumboParser* parser) {
532
+ GumboParserState* state = parser->_parser_state;
533
+ if (state->_open_elements.length == 1 && state->_fragment_ctx) {
534
+ return state->_fragment_ctx;
535
+ }
536
+ return get_current_node(parser);
537
+ }
538
+
524
539
  // Returns true if the given needle is in the given array of literal
525
540
  // GumboStringPieces. If exact_match is true, this requires that they match
526
541
  // exactly; otherwise, this performs a prefix match to check if any of the
@@ -528,7 +543,7 @@ static GumboNode* get_current_node(GumboParser* parser) {
528
543
  // case-insensitive match.
529
544
  static bool is_in_static_list(
530
545
  const char* needle, const GumboStringPiece* haystack, bool exact_match) {
531
- for (int i = 0; haystack[i].length > 0; ++i) {
546
+ for (unsigned int i = 0; haystack[i].length > 0; ++i) {
532
547
  if ((exact_match && !strcmp(needle, haystack[i].data)) ||
533
548
  (!exact_match && !strcasecmp(needle, haystack[i].data))) {
534
549
  return true;
@@ -547,15 +562,36 @@ static void set_insertion_mode(GumboParser* parser, GumboInsertionMode mode) {
547
562
  // indicate that there is no appropriate insertion mode, and the loop should
548
563
  // continue.
549
564
  static GumboInsertionMode get_appropriate_insertion_mode(
550
- const GumboNode* node, bool is_last) {
551
- assert(node->type == GUMBO_NODE_ELEMENT);
565
+ const GumboParser* parser, int index) {
566
+ const GumboVector* open_elements = &parser->_parser_state->_open_elements;
567
+ const GumboNode* node = open_elements->data[index];
568
+ const bool is_last = index == 0;
569
+
570
+ if (is_last && is_fragment_parser(parser)) {
571
+ node = parser->_parser_state->_fragment_ctx;
572
+ }
573
+
574
+ assert(node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE);
552
575
  switch (node->v.element.tag) {
553
- case GUMBO_TAG_SELECT:
576
+ case GUMBO_TAG_SELECT: {
577
+ if (is_last) {
578
+ return GUMBO_INSERTION_MODE_IN_SELECT;
579
+ }
580
+ for (int i = index; i > 0; --i) {
581
+ const GumboNode* ancestor = open_elements->data[i];
582
+ if (node_html_tag_is(ancestor, GUMBO_TAG_TEMPLATE)) {
583
+ return GUMBO_INSERTION_MODE_IN_SELECT;
584
+ }
585
+ if (node_html_tag_is(ancestor, GUMBO_TAG_TABLE)) {
586
+ return GUMBO_INSERTION_MODE_IN_SELECT_IN_TABLE;
587
+ }
588
+ }
554
589
  return GUMBO_INSERTION_MODE_IN_SELECT;
590
+ }
555
591
  case GUMBO_TAG_TD:
556
592
  case GUMBO_TAG_TH:
557
- return is_last ?
558
- GUMBO_INSERTION_MODE_IN_BODY : GUMBO_INSERTION_MODE_IN_CELL;
593
+ if (!is_last) return GUMBO_INSERTION_MODE_IN_CELL;
594
+ break;
559
595
  case GUMBO_TAG_TR:
560
596
  return GUMBO_INSERTION_MODE_IN_ROW;
561
597
  case GUMBO_TAG_TBODY:
@@ -568,25 +604,30 @@ static GumboInsertionMode get_appropriate_insertion_mode(
568
604
  return GUMBO_INSERTION_MODE_IN_COLUMN_GROUP;
569
605
  case GUMBO_TAG_TABLE:
570
606
  return GUMBO_INSERTION_MODE_IN_TABLE;
607
+ case GUMBO_TAG_TEMPLATE:
608
+ return get_current_template_insertion_mode(parser);
571
609
  case GUMBO_TAG_HEAD:
610
+ if (!is_last) return GUMBO_INSERTION_MODE_IN_HEAD;
611
+ break;
572
612
  case GUMBO_TAG_BODY:
573
613
  return GUMBO_INSERTION_MODE_IN_BODY;
574
614
  case GUMBO_TAG_FRAMESET:
575
615
  return GUMBO_INSERTION_MODE_IN_FRAMESET;
576
616
  case GUMBO_TAG_HTML:
577
- return GUMBO_INSERTION_MODE_BEFORE_HEAD;
617
+ return parser->_parser_state->_head_element
618
+ ? GUMBO_INSERTION_MODE_AFTER_HEAD
619
+ : GUMBO_INSERTION_MODE_BEFORE_HEAD;
578
620
  default:
579
- return is_last ?
580
- GUMBO_INSERTION_MODE_IN_BODY : GUMBO_INSERTION_MODE_INITIAL;
621
+ break;
581
622
  }
623
+ return is_last ? GUMBO_INSERTION_MODE_IN_BODY : GUMBO_INSERTION_MODE_INITIAL;
582
624
  }
583
625
 
584
626
  // This performs the actual "reset the insertion mode" loop.
585
627
  static void reset_insertion_mode_appropriately(GumboParser* parser) {
586
628
  const GumboVector* open_elements = &parser->_parser_state->_open_elements;
587
- for (int i = open_elements->length; --i >= 0; ) {
588
- GumboInsertionMode mode =
589
- get_appropriate_insertion_mode(open_elements->data[i], i == 0);
629
+ for (int i = open_elements->length; --i >= 0;) {
630
+ GumboInsertionMode mode = get_appropriate_insertion_mode(parser, i);
590
631
  if (mode != GUMBO_INSERTION_MODE_INITIAL) {
591
632
  set_insertion_mode(parser, mode);
592
633
  return;
@@ -597,7 +638,8 @@ static void reset_insertion_mode_appropriately(GumboParser* parser) {
597
638
  assert(0);
598
639
  }
599
640
 
600
- static GumboError* parser_add_parse_error(GumboParser* parser, const GumboToken* token) {
641
+ static GumboError* parser_add_parse_error(
642
+ GumboParser* parser, const GumboToken* token) {
601
643
  gumbo_debug("Adding parse error.\n");
602
644
  GumboError* error = gumbo_add_error(parser);
603
645
  if (!error) {
@@ -616,13 +658,14 @@ static GumboError* parser_add_parse_error(GumboParser* parser, const GumboToken*
616
658
  }
617
659
  GumboParserState* state = parser->_parser_state;
618
660
  extra_data->parser_state = state->_insertion_mode;
619
- gumbo_vector_init(parser, state->_open_elements.length,
620
- &extra_data->tag_stack);
621
- for (int i = 0; i < state->_open_elements.length; ++i) {
661
+ gumbo_vector_init(
662
+ parser, state->_open_elements.length, &extra_data->tag_stack);
663
+ for (unsigned int i = 0; i < state->_open_elements.length; ++i) {
622
664
  const GumboNode* node = state->_open_elements.data[i];
623
- assert(node->type == GUMBO_NODE_ELEMENT);
624
- gumbo_vector_add(parser, (void*) node->v.element.tag,
625
- &extra_data->tag_stack);
665
+ assert(
666
+ node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE);
667
+ gumbo_vector_add(
668
+ parser, (void*) node->v.element.tag, &extra_data->tag_stack);
626
669
  }
627
670
  return error;
628
671
  }
@@ -631,13 +674,8 @@ static GumboError* parser_add_parse_error(GumboParser* parser, const GumboToken*
631
674
  // by is_start) with one of the tag types in the varargs list. Terminate the
632
675
  // list with GUMBO_TAG_LAST; this functions as a sentinel since no portion of
633
676
  // the spec references tags that are not in the spec.
634
- // TODO(jdtang): A lot of the tag lists for this function are repeated in many
635
- // places in the code. This is how it's written in the spec (and it's done this
636
- // way so it's easy to verify the code against the spec), but it may be worth
637
- // coming up with a notion of a "tag set" that includes a list of tags, and
638
- // using that in many places. It'd probably also help performance, but I want
639
- // to profile before optimizing.
640
- static bool tag_in(const GumboToken* token, bool is_start, ...) {
677
+ static bool tag_in(
678
+ const GumboToken* token, bool is_start, const gumbo_tagset tags) {
641
679
  GumboTag token_tag;
642
680
  if (is_start && token->type == GUMBO_TOKEN_START_TAG) {
643
681
  token_tag = token->v.start_tag.tag;
@@ -646,19 +684,7 @@ static bool tag_in(const GumboToken* token, bool is_start, ...) {
646
684
  } else {
647
685
  return false;
648
686
  }
649
-
650
- va_list tags;
651
- va_start(tags, is_start);
652
- bool result = false;
653
- for (GumboTag tag = va_arg(tags, GumboTag); tag != GUMBO_TAG_LAST;
654
- tag = va_arg(tags, GumboTag)) {
655
- if (tag == token_tag) {
656
- result = true;
657
- break;
658
- }
659
- }
660
- va_end(tags);
661
- return result;
687
+ return (token_tag < GUMBO_TAG_LAST && tags[(int) token_tag] != 0);
662
688
  }
663
689
 
664
690
  // Like tag_in, but for the single-tag case.
@@ -673,50 +699,125 @@ static bool tag_is(const GumboToken* token, bool is_start, GumboTag tag) {
673
699
  }
674
700
 
675
701
  // Like tag_in, but checks for the tag of a node, rather than a token.
676
- static bool node_tag_in(const GumboNode* node, ...) {
702
+ static bool node_tag_in_set(const GumboNode* node, const gumbo_tagset tags) {
677
703
  assert(node != NULL);
678
- if (node->type != GUMBO_NODE_ELEMENT) {
704
+ if (node->type != GUMBO_NODE_ELEMENT && node->type != GUMBO_NODE_TEMPLATE) {
679
705
  return false;
680
706
  }
681
- GumboTag node_tag = node->v.element.tag;
682
-
683
- va_list tags;
684
- va_start(tags, node);
685
- bool result = false;
686
- for (GumboTag tag = va_arg(tags, GumboTag); tag != GUMBO_TAG_LAST;
687
- tag = va_arg(tags, GumboTag)) {
688
- assert(tag <= GUMBO_TAG_LAST);
689
- if (tag == node_tag) {
690
- result = true;
691
- break;
692
- }
693
- }
694
- va_end(tags);
695
- return result;
707
+ return TAGSET_INCLUDES(
708
+ tags, node->v.element.tag_namespace, node->v.element.tag);
696
709
  }
697
710
 
698
711
  // Like node_tag_in, but for the single-tag case.
699
- static bool node_tag_is(const GumboNode* node, GumboTag tag) {
700
- return node->type == GUMBO_NODE_ELEMENT && node->v.element.tag == tag;
712
+ static bool node_qualified_tag_is(
713
+ const GumboNode* node, GumboNamespaceEnum ns, GumboTag tag) {
714
+ assert(node);
715
+ return (node->type == GUMBO_NODE_ELEMENT ||
716
+ node->type == GUMBO_NODE_TEMPLATE) &&
717
+ node->v.element.tag == tag && node->v.element.tag_namespace == ns;
718
+ }
719
+
720
+ // Like node_tag_in, but for the single-tag case in the HTML namespace
721
+ static bool node_html_tag_is(const GumboNode* node, GumboTag tag) {
722
+ return node_qualified_tag_is(node, GUMBO_NAMESPACE_HTML, tag);
723
+ }
724
+
725
+ static void push_template_insertion_mode(
726
+ GumboParser* parser, GumboInsertionMode mode) {
727
+ gumbo_vector_add(
728
+ parser, (void*) mode, &parser->_parser_state->_template_insertion_modes);
729
+ }
730
+
731
+ static void pop_template_insertion_mode(GumboParser* parser) {
732
+ gumbo_vector_pop(parser, &parser->_parser_state->_template_insertion_modes);
733
+ }
734
+
735
+ // Returns the current template insertion mode. If the stack of template
736
+ // insertion modes is empty, this returns GUMBO_INSERTION_MODE_INITIAL.
737
+ static GumboInsertionMode get_current_template_insertion_mode(
738
+ const GumboParser* parser) {
739
+ GumboVector* template_insertion_modes =
740
+ &parser->_parser_state->_template_insertion_modes;
741
+ if (template_insertion_modes->length == 0) {
742
+ return GUMBO_INSERTION_MODE_INITIAL;
743
+ }
744
+ return (GumboInsertionMode)
745
+ template_insertion_modes->data[(template_insertion_modes->length - 1)];
701
746
  }
702
747
 
703
748
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#mathml-text-integration-point
704
749
  static bool is_mathml_integration_point(const GumboNode* node) {
705
- return node_tag_in(node, GUMBO_TAG_MI, GUMBO_TAG_MO, GUMBO_TAG_MN,
706
- GUMBO_TAG_MS, GUMBO_TAG_MTEXT, GUMBO_TAG_LAST) &&
707
- node->v.element.tag_namespace == GUMBO_NAMESPACE_MATHML;
750
+ return node_tag_in_set(
751
+ node, (gumbo_tagset){TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
752
+ TAG_MATHML(MS), TAG_MATHML(MTEXT)});
708
753
  }
709
754
 
710
755
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#html-integration-point
711
756
  static bool is_html_integration_point(const GumboNode* node) {
712
- return (node_tag_in(node, GUMBO_TAG_FOREIGNOBJECT, GUMBO_TAG_DESC,
713
- GUMBO_TAG_TITLE, GUMBO_TAG_LAST) &&
714
- node->v.element.tag_namespace == GUMBO_NAMESPACE_SVG) ||
715
- (node_tag_is(node, GUMBO_TAG_ANNOTATION_XML) && (
716
- attribute_matches(&node->v.element.attributes,
717
- "encoding", "text/html") ||
718
- attribute_matches(&node->v.element.attributes,
719
- "encoding", "application/xhtml+xml")));
757
+ return node_tag_in_set(node, (gumbo_tagset){TAG_SVG(FOREIGNOBJECT),
758
+ TAG_SVG(DESC), TAG_SVG(TITLE)}) ||
759
+ (node_qualified_tag_is(
760
+ node, GUMBO_NAMESPACE_MATHML, GUMBO_TAG_ANNOTATION_XML) &&
761
+ (attribute_matches(
762
+ &node->v.element.attributes, "encoding", "text/html") ||
763
+ attribute_matches(&node->v.element.attributes, "encoding",
764
+ "application/xhtml+xml")));
765
+ }
766
+
767
+ // This represents a place to insert a node, consisting of a target parent and a
768
+ // child index within that parent. If the node should be inserted at the end of
769
+ // the parent's child, index will be -1.
770
+ typedef struct {
771
+ GumboNode* target;
772
+ int index;
773
+ } InsertionLocation;
774
+
775
+ InsertionLocation get_appropriate_insertion_location(
776
+ GumboParser* parser, GumboNode* override_target) {
777
+ InsertionLocation retval = {override_target, -1};
778
+ if (retval.target == NULL) {
779
+ // No override target; default to the current node, but special-case the
780
+ // root node since get_current_node() assumes the stack of open elements is
781
+ // non-empty.
782
+ retval.target = parser->_output->root != NULL ? get_current_node(parser)
783
+ : get_document_node(parser);
784
+ }
785
+ if (!parser->_parser_state->_foster_parent_insertions ||
786
+ !node_tag_in_set(retval.target, (gumbo_tagset){TAG(TABLE), TAG(TBODY),
787
+ TAG(TFOOT), TAG(THEAD), TAG(TR)})) {
788
+ return retval;
789
+ }
790
+
791
+ // Foster-parenting case.
792
+ int last_template_index = -1;
793
+ int last_table_index = -1;
794
+ GumboVector* open_elements = &parser->_parser_state->_open_elements;
795
+ for (unsigned int i = 0; i < open_elements->length; ++i) {
796
+ if (node_html_tag_is(open_elements->data[i], GUMBO_TAG_TEMPLATE)) {
797
+ last_template_index = i;
798
+ }
799
+ if (node_html_tag_is(open_elements->data[i], GUMBO_TAG_TABLE)) {
800
+ last_table_index = i;
801
+ }
802
+ }
803
+ if (last_template_index != -1 &&
804
+ (last_table_index == -1 || last_template_index > last_table_index)) {
805
+ retval.target = open_elements->data[last_template_index];
806
+ return retval;
807
+ }
808
+ if (last_table_index == -1) {
809
+ retval.target = open_elements->data[0];
810
+ return retval;
811
+ }
812
+ GumboNode* last_table = open_elements->data[last_table_index];
813
+ if (last_table->parent != NULL) {
814
+ retval.target = last_table->parent;
815
+ retval.index = last_table->index_within_parent;
816
+ return retval;
817
+ }
818
+
819
+ retval.target = open_elements->data[last_table_index - 1];
820
+ return retval;
720
821
  }
721
822
 
722
823
  // Appends a node to the end of its parent, setting the "parent" and
@@ -726,7 +827,8 @@ static void append_node(
726
827
  assert(node->parent == NULL);
727
828
  assert(node->index_within_parent == -1);
728
829
  GumboVector* children;
729
- if (parent->type == GUMBO_NODE_ELEMENT) {
830
+ if (parent->type == GUMBO_NODE_ELEMENT ||
831
+ parent->type == GUMBO_NODE_TEMPLATE) {
730
832
  children = &parent->v.element.children;
731
833
  } else {
732
834
  assert(parent->type == GUMBO_NODE_DOCUMENT);
@@ -738,64 +840,41 @@ static void append_node(
738
840
  assert(node->index_within_parent < children->length);
739
841
  }
740
842
 
741
- // Inserts a node at the specified index within its parent, updating the
843
+ // Inserts a node at the specified InsertionLocation, updating the
742
844
  // "parent" and "index_within_parent" fields of it and all its siblings.
845
+ // If the index of the location is -1, this calls append_node.
743
846
  static void insert_node(
744
- GumboParser* parser, GumboNode* parent, int index, GumboNode* node) {
847
+ GumboParser* parser, GumboNode* node, InsertionLocation location) {
745
848
  assert(node->parent == NULL);
746
849
  assert(node->index_within_parent == -1);
747
- assert(parent->type == GUMBO_NODE_ELEMENT);
748
- GumboVector* children = &parent->v.element.children;
749
- assert(index >= 0);
750
- assert(index < children->length);
751
- node->parent = parent;
752
- node->index_within_parent = index;
753
- gumbo_vector_insert_at(parser, (void*) node, index, children);
754
- assert(node->index_within_parent < children->length);
755
- for (int i = index + 1; i < children->length; ++i) {
756
- GumboNode* sibling = children->data[i];
757
- sibling->index_within_parent = i;
758
- assert(sibling->index_within_parent < children->length);
759
- }
760
- }
850
+ GumboNode* parent = location.target;
851
+ int index = location.index;
852
+ if (index != -1) {
853
+ GumboVector* children = NULL;
854
+ if (parent->type == GUMBO_NODE_ELEMENT ||
855
+ parent->type == GUMBO_NODE_TEMPLATE) {
856
+ children = &parent->v.element.children;
857
+ } else if (parent->type == GUMBO_NODE_DOCUMENT) {
858
+ children = &parent->v.document.children;
859
+ assert(children->length == 0);
860
+ } else {
861
+ assert(0);
862
+ }
761
863
 
762
- // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#foster-parenting
763
- static void foster_parent_element(GumboParser* parser, GumboNode* node) {
764
- GumboVector* open_elements = &parser->_parser_state->_open_elements;
765
- assert(open_elements->length > 2);
766
-
767
- node->parse_flags |= GUMBO_INSERTION_FOSTER_PARENTED;
768
- GumboNode* foster_parent_element = open_elements->data[0];
769
- assert(foster_parent_element->type == GUMBO_NODE_ELEMENT);
770
- assert(node_tag_is(foster_parent_element, GUMBO_TAG_HTML));
771
- for (int i = open_elements->length; --i > 1; ) {
772
- GumboNode* table_element = open_elements->data[i];
773
- if (node_tag_is(table_element, GUMBO_TAG_TABLE)) {
774
- foster_parent_element = table_element->parent;
775
- if (!foster_parent_element ||
776
- foster_parent_element->type != GUMBO_NODE_ELEMENT) {
777
- // Table has no parent; spec says it's possible if a script manipulated
778
- // the DOM, although I don't think we have to worry about this case.
779
- gumbo_debug("Table has no parent.\n");
780
- foster_parent_element = open_elements->data[i - 1];
781
- break;
782
- }
783
- assert(foster_parent_element->type == GUMBO_NODE_ELEMENT);
784
- gumbo_debug("Found enclosing table (%x) at %d; parent=%s, index=%d.\n",
785
- table_element, i, gumbo_normalized_tagname(
786
- foster_parent_element->v.element.tag),
787
- table_element->index_within_parent);
788
- assert(foster_parent_element->v.element.children.data[
789
- table_element->index_within_parent] == table_element);
790
- insert_node(parser, foster_parent_element,
791
- table_element->index_within_parent, node);
792
- return;
864
+ assert(index >= 0);
865
+ assert((unsigned int) index < children->length);
866
+ node->parent = parent;
867
+ node->index_within_parent = index;
868
+ gumbo_vector_insert_at(parser, (void*) node, index, children);
869
+ assert(node->index_within_parent < children->length);
870
+ for (unsigned int i = index + 1; i < children->length; ++i) {
871
+ GumboNode* sibling = children->data[i];
872
+ sibling->index_within_parent = i;
873
+ assert(sibling->index_within_parent < children->length);
793
874
  }
875
+ } else {
876
+ append_node(parser, parent, node);
794
877
  }
795
- if (node->type == GUMBO_NODE_ELEMENT) {
796
- gumbo_vector_add(parser, (void*) node, open_elements);
797
- }
798
- append_node(parser, foster_parent_element, node);
799
878
  }
800
879
 
801
880
  static void maybe_flush_text_node_buffer(GumboParser* parser) {
@@ -806,30 +885,31 @@ static void maybe_flush_text_node_buffer(GumboParser* parser) {
806
885
  }
807
886
 
808
887
  assert(buffer_state->_type == GUMBO_NODE_WHITESPACE ||
809
- buffer_state->_type == GUMBO_NODE_TEXT);
888
+ buffer_state->_type == GUMBO_NODE_TEXT ||
889
+ buffer_state->_type == GUMBO_NODE_CDATA);
810
890
  GumboNode* text_node = create_node(parser, buffer_state->_type);
811
891
  GumboText* text_node_data = &text_node->v.text;
812
- text_node_data->text = gumbo_string_buffer_to_string(
813
- parser, &buffer_state->_buffer);
892
+ text_node_data->text =
893
+ gumbo_string_buffer_to_string(parser, &buffer_state->_buffer);
814
894
  text_node_data->original_text.data = buffer_state->_start_original_text;
815
895
  text_node_data->original_text.length =
816
896
  state->_current_token->original_text.data -
817
897
  buffer_state->_start_original_text;
818
898
  text_node_data->start_pos = buffer_state->_start_position;
819
- if (state->_foster_parent_insertions && node_tag_in(
820
- get_current_node(parser), GUMBO_TAG_TABLE, GUMBO_TAG_TBODY,
821
- GUMBO_TAG_TFOOT, GUMBO_TAG_THEAD, GUMBO_TAG_TR, GUMBO_TAG_LAST)) {
822
- foster_parent_element(parser, text_node);
899
+
900
+ gumbo_debug("Flushing text node buffer of %.*s.\n",
901
+ (int) buffer_state->_buffer.length, buffer_state->_buffer.data);
902
+
903
+ InsertionLocation location = get_appropriate_insertion_location(parser, NULL);
904
+ if (location.target->type == GUMBO_NODE_DOCUMENT) {
905
+ // The DOM does not allow Document nodes to have Text children, so per the
906
+ // spec, they are dropped on the floor.
907
+ destroy_node(parser, text_node);
823
908
  } else {
824
- append_node(
825
- parser, parser->_output->root ?
826
- get_current_node(parser) : parser->_output->document, text_node);
909
+ insert_node(parser, text_node, location);
827
910
  }
828
- gumbo_debug("Flushing text node buffer of %.*s.\n",
829
- (int) buffer_state->_buffer.length, buffer_state->_buffer.data);
830
911
 
831
- gumbo_string_buffer_destroy(parser, &buffer_state->_buffer);
832
- gumbo_string_buffer_init(parser, &buffer_state->_buffer);
912
+ gumbo_string_buffer_clear(parser, &buffer_state->_buffer);
833
913
  buffer_state->_type = GUMBO_NODE_WHITESPACE;
834
914
  assert(buffer_state->_buffer.length == 0);
835
915
  }
@@ -837,18 +917,17 @@ static void maybe_flush_text_node_buffer(GumboParser* parser) {
837
917
  static void record_end_of_element(
838
918
  GumboToken* current_token, GumboElement* element) {
839
919
  element->end_pos = current_token->position;
840
- element->original_end_tag =
841
- current_token->type == GUMBO_TOKEN_END_TAG ?
842
- current_token->original_text : kGumboEmptyString;
920
+ element->original_end_tag = current_token->type == GUMBO_TOKEN_END_TAG
921
+ ? current_token->original_text
922
+ : kGumboEmptyString;
843
923
  }
844
924
 
845
925
  static GumboNode* pop_current_node(GumboParser* parser) {
846
926
  GumboParserState* state = parser->_parser_state;
847
927
  maybe_flush_text_node_buffer(parser);
848
928
  if (state->_open_elements.length > 0) {
849
- assert(node_tag_is(state->_open_elements.data[0], GUMBO_TAG_HTML));
850
- gumbo_debug(
851
- "Popping %s node.\n",
929
+ assert(node_html_tag_is(state->_open_elements.data[0], GUMBO_TAG_HTML));
930
+ gumbo_debug("Popping %s node.\n",
852
931
  gumbo_normalized_tagname(get_current_node(parser)->v.element.tag));
853
932
  }
854
933
  GumboNode* current_node = gumbo_vector_pop(parser, &state->_open_elements);
@@ -856,13 +935,16 @@ static GumboNode* pop_current_node(GumboParser* parser) {
856
935
  assert(state->_open_elements.length == 0);
857
936
  return NULL;
858
937
  }
859
- assert(current_node->type == GUMBO_NODE_ELEMENT);
938
+ assert(current_node->type == GUMBO_NODE_ELEMENT ||
939
+ current_node->type == GUMBO_NODE_TEMPLATE);
860
940
  bool is_closed_body_or_html_tag =
861
- (node_tag_is(current_node, GUMBO_TAG_BODY) && state->_closed_body_tag) ||
862
- (node_tag_is(current_node, GUMBO_TAG_HTML) && state->_closed_html_tag);
941
+ (node_html_tag_is(current_node, GUMBO_TAG_BODY) &&
942
+ state->_closed_body_tag) ||
943
+ (node_html_tag_is(current_node, GUMBO_TAG_HTML) &&
944
+ state->_closed_html_tag);
863
945
  if ((state->_current_token->type != GUMBO_TOKEN_END_TAG ||
864
- !node_tag_is(current_node, state->_current_token->v.end_tag)) &&
865
- !is_closed_body_or_html_tag) {
946
+ !node_html_tag_is(current_node, state->_current_token->v.end_tag)) &&
947
+ !is_closed_body_or_html_tag) {
866
948
  current_node->parse_flags |= GUMBO_INSERTION_IMPLICIT_END_TAG;
867
949
  }
868
950
  if (!is_closed_body_or_html_tag) {
@@ -885,25 +967,25 @@ static void append_comment_node(
885
967
 
886
968
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#clear-the-stack-back-to-a-table-row-context
887
969
  static void clear_stack_to_table_row_context(GumboParser* parser) {
888
- while (!node_tag_in(get_current_node(parser),
889
- GUMBO_TAG_HTML, GUMBO_TAG_TR, GUMBO_TAG_LAST)) {
970
+ while (!node_tag_in_set(get_current_node(parser),
971
+ (gumbo_tagset){TAG(HTML), TAG(TR), TAG(TEMPLATE)})) {
890
972
  pop_current_node(parser);
891
973
  }
892
974
  }
893
975
 
894
976
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#clear-the-stack-back-to-a-table-context
895
977
  static void clear_stack_to_table_context(GumboParser* parser) {
896
- while (!node_tag_in(get_current_node(parser),
897
- GUMBO_TAG_HTML, GUMBO_TAG_TABLE, GUMBO_TAG_LAST)) {
978
+ while (!node_tag_in_set(get_current_node(parser),
979
+ (gumbo_tagset){TAG(HTML), TAG(TABLE), TAG(TEMPLATE)})) {
898
980
  pop_current_node(parser);
899
981
  }
900
982
  }
901
983
 
902
984
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#clear-the-stack-back-to-a-table-body-context
903
985
  void clear_stack_to_table_body_context(GumboParser* parser) {
904
- while (!node_tag_in(get_current_node(parser), GUMBO_TAG_HTML,
905
- GUMBO_TAG_TBODY, GUMBO_TAG_TFOOT, GUMBO_TAG_THEAD,
906
- GUMBO_TAG_LAST)) {
986
+ while (!node_tag_in_set(get_current_node(parser),
987
+ (gumbo_tagset){TAG(HTML), TAG(TBODY), TAG(TFOOT), TAG(THEAD),
988
+ TAG(TEMPLATE)})) {
907
989
  pop_current_node(parser);
908
990
  }
909
991
  }
@@ -918,7 +1000,9 @@ static GumboNode* create_element(GumboParser* parser, GumboTag tag) {
918
1000
  element->tag_namespace = GUMBO_NAMESPACE_HTML;
919
1001
  element->original_tag = kGumboEmptyString;
920
1002
  element->original_end_tag = kGumboEmptyString;
921
- element->start_pos = parser->_parser_state->_current_token->position;
1003
+ element->start_pos = (parser->_parser_state->_current_token)
1004
+ ? parser->_parser_state->_current_token->position
1005
+ : kGumboEmptySourcePosition;
922
1006
  element->end_pos = kGumboEmptySourcePosition;
923
1007
  return node;
924
1008
  }
@@ -929,7 +1013,12 @@ static GumboNode* create_element_from_token(
929
1013
  assert(token->type == GUMBO_TOKEN_START_TAG);
930
1014
  GumboTokenStartTag* start_tag = &token->v.start_tag;
931
1015
 
932
- GumboNode* node = create_node(parser, GUMBO_NODE_ELEMENT);
1016
+ GumboNodeType type = (tag_namespace == GUMBO_NAMESPACE_HTML &&
1017
+ start_tag->tag == GUMBO_TAG_TEMPLATE)
1018
+ ? GUMBO_NODE_TEMPLATE
1019
+ : GUMBO_NODE_ELEMENT;
1020
+
1021
+ GumboNode* node = create_node(parser, type);
933
1022
  GumboElement* element = &node->v.element;
934
1023
  gumbo_vector_init(parser, 1, &element->children);
935
1024
  element->attributes = start_tag->attributes;
@@ -952,7 +1041,7 @@ static GumboNode* create_element_from_token(
952
1041
 
953
1042
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#insert-an-html-element
954
1043
  static void insert_element(GumboParser* parser, GumboNode* node,
955
- bool is_reconstructing_formatting_elements) {
1044
+ bool is_reconstructing_formatting_elements) {
956
1045
  GumboParserState* state = parser->_parser_state;
957
1046
  // NOTE(jdtang): The text node buffer must always be flushed before inserting
958
1047
  // a node, otherwise we're handling nodes in a different order than the spec
@@ -966,20 +1055,8 @@ static void insert_element(GumboParser* parser, GumboNode* node,
966
1055
  if (!is_reconstructing_formatting_elements) {
967
1056
  maybe_flush_text_node_buffer(parser);
968
1057
  }
969
- if (state->_foster_parent_insertions && node_tag_in(
970
- get_current_node(parser), GUMBO_TAG_TABLE, GUMBO_TAG_TBODY,
971
- GUMBO_TAG_TFOOT, GUMBO_TAG_THEAD, GUMBO_TAG_TR, GUMBO_TAG_LAST)) {
972
- foster_parent_element(parser, node);
973
- gumbo_vector_add(parser, (void*) node, &state->_open_elements);
974
- return;
975
- }
976
-
977
- // This is called to insert the root HTML element, but get_current_node
978
- // assumes the stack of open elements is non-empty, so we need special
979
- // handling for this case.
980
- append_node(
981
- parser, parser->_output->root ?
982
- get_current_node(parser) : parser->_output->document, node);
1058
+ InsertionLocation location = get_appropriate_insertion_location(parser, NULL);
1059
+ insert_node(parser, node, location);
983
1060
  gumbo_vector_add(parser, (void*) node, &state->_open_elements);
984
1061
  }
985
1062
 
@@ -992,7 +1069,7 @@ static GumboNode* insert_element_from_token(
992
1069
  create_element_from_token(parser, token, GUMBO_NAMESPACE_HTML);
993
1070
  insert_element(parser, element, false);
994
1071
  gumbo_debug("Inserting <%s> element (@%x) from token.\n",
995
- gumbo_normalized_tagname(element->v.element.tag), element);
1072
+ gumbo_normalized_tagname(element->v.element.tag), element);
996
1073
  return element;
997
1074
  }
998
1075
 
@@ -1005,7 +1082,7 @@ static GumboNode* insert_element_of_tag_type(
1005
1082
  element->parse_flags |= GUMBO_INSERTION_BY_PARSER | reason;
1006
1083
  insert_element(parser, element, false);
1007
1084
  gumbo_debug("Inserting %s element (@%x) from tag type.\n",
1008
- gumbo_normalized_tagname(tag), element);
1085
+ gumbo_normalized_tagname(tag), element);
1009
1086
  return element;
1010
1087
  }
1011
1088
 
@@ -1017,16 +1094,14 @@ static GumboNode* insert_foreign_element(
1017
1094
  GumboNode* element = create_element_from_token(parser, token, tag_namespace);
1018
1095
  insert_element(parser, element, false);
1019
1096
  if (token_has_attribute(token, "xmlns") &&
1020
- !attribute_matches_case_sensitive(
1021
- &token->v.start_tag.attributes, "xmlns",
1097
+ !attribute_matches_case_sensitive(&token->v.start_tag.attributes, "xmlns",
1022
1098
  kLegalXmlns[tag_namespace])) {
1023
1099
  // TODO(jdtang): Since there're multiple possible error codes here, we
1024
1100
  // eventually need reason codes to differentiate them.
1025
1101
  parser_add_parse_error(parser, token);
1026
1102
  }
1027
1103
  if (token_has_attribute(token, "xmlns:xlink") &&
1028
- !attribute_matches_case_sensitive(
1029
- &token->v.start_tag.attributes,
1104
+ !attribute_matches_case_sensitive(&token->v.start_tag.attributes,
1030
1105
  "xmlns:xlink", "http://www.w3.org/1999/xlink")) {
1031
1106
  parser_add_parse_error(parser, token);
1032
1107
  }
@@ -1035,7 +1110,8 @@ static GumboNode* insert_foreign_element(
1035
1110
 
1036
1111
  static void insert_text_token(GumboParser* parser, GumboToken* token) {
1037
1112
  assert(token->type == GUMBO_TOKEN_WHITESPACE ||
1038
- token->type == GUMBO_TOKEN_CHARACTER);
1113
+ token->type == GUMBO_TOKEN_CHARACTER ||
1114
+ token->type == GUMBO_TOKEN_NULL || token->type == GUMBO_TOKEN_CDATA);
1039
1115
  TextNodeBufferState* buffer_state = &parser->_parser_state->_text_node;
1040
1116
  if (buffer_state->_buffer.length == 0) {
1041
1117
  // Initialize position fields.
@@ -1046,6 +1122,8 @@ static void insert_text_token(GumboParser* parser, GumboToken* token) {
1046
1122
  parser, token->v.character, &buffer_state->_buffer);
1047
1123
  if (token->type == GUMBO_TOKEN_CHARACTER) {
1048
1124
  buffer_state->_type = GUMBO_NODE_TEXT;
1125
+ } else if (token->type == GUMBO_TOKEN_CDATA) {
1126
+ buffer_state->_type = GUMBO_NODE_CDATA;
1049
1127
  }
1050
1128
  gumbo_debug("Inserting text token '%c'.\n", token->v.character);
1051
1129
  }
@@ -1068,12 +1146,12 @@ static void acknowledge_self_closing_tag(GumboParser* parser) {
1068
1146
  // elements, and fills in its index if so.
1069
1147
  static bool find_last_anchor_index(GumboParser* parser, int* anchor_index) {
1070
1148
  GumboVector* elements = &parser->_parser_state->_active_formatting_elements;
1071
- for (int i = elements->length; --i >= 0; ) {
1149
+ for (int i = elements->length; --i >= 0;) {
1072
1150
  GumboNode* node = elements->data[i];
1073
1151
  if (node == &kActiveFormattingScopeMarker) {
1074
1152
  return false;
1075
1153
  }
1076
- if (node_tag_is(node, GUMBO_TAG_A)) {
1154
+ if (node_html_tag_is(node, GUMBO_TAG_A)) {
1077
1155
  *anchor_index = i;
1078
1156
  return true;
1079
1157
  }
@@ -1085,23 +1163,21 @@ static bool find_last_anchor_index(GumboParser* parser, int* anchor_index) {
1085
1163
  // formatting elements (after the last active scope marker) that have a specific
1086
1164
  // tag. If this is > 0, then earliest_matching_index will be filled in with the
1087
1165
  // index of the first such element.
1088
- static int count_formatting_elements_of_tag(
1089
- GumboParser* parser, const GumboNode* desired_node,
1090
- int* earliest_matching_index) {
1166
+ static int count_formatting_elements_of_tag(GumboParser* parser,
1167
+ const GumboNode* desired_node, int* earliest_matching_index) {
1091
1168
  const GumboElement* desired_element = &desired_node->v.element;
1092
1169
  GumboVector* elements = &parser->_parser_state->_active_formatting_elements;
1093
1170
  int num_identical_elements = 0;
1094
- for (int i = elements->length; --i >= 0; ) {
1171
+ for (int i = elements->length; --i >= 0;) {
1095
1172
  GumboNode* node = elements->data[i];
1096
1173
  if (node == &kActiveFormattingScopeMarker) {
1097
1174
  break;
1098
1175
  }
1099
1176
  assert(node->type == GUMBO_NODE_ELEMENT);
1100
- GumboElement* element = &node->v.element;
1101
- if (node_tag_is(node, desired_element->tag) &&
1102
- element->tag_namespace == desired_element->tag_namespace &&
1103
- all_attributes_match(&element->attributes,
1104
- &desired_element->attributes)) {
1177
+ if (node_qualified_tag_is(
1178
+ node, desired_element->tag_namespace, desired_element->tag) &&
1179
+ all_attributes_match(
1180
+ &node->v.element.attributes, &desired_element->attributes)) {
1105
1181
  num_identical_elements++;
1106
1182
  *earliest_matching_index = i;
1107
1183
  }
@@ -1128,7 +1204,7 @@ static void add_formatting_element(GumboParser* parser, const GumboNode* node) {
1128
1204
  // Noah's Ark clause: if there're at least 3, remove the earliest.
1129
1205
  if (num_identical_elements >= 3) {
1130
1206
  gumbo_debug("Noah's ark clause: removing element at %d.\n",
1131
- earliest_identical_element);
1207
+ earliest_identical_element);
1132
1208
  gumbo_vector_remove_at(parser, earliest_identical_element, elements);
1133
1209
  }
1134
1210
 
@@ -1137,7 +1213,7 @@ static void add_formatting_element(GumboParser* parser, const GumboNode* node) {
1137
1213
 
1138
1214
  static bool is_open_element(GumboParser* parser, const GumboNode* node) {
1139
1215
  GumboVector* open_elements = &parser->_parser_state->_open_elements;
1140
- for (int i = 0; i < open_elements->length; ++i) {
1216
+ for (unsigned int i = 0; i < open_elements->length; ++i) {
1141
1217
  if (open_elements->data[i] == node) {
1142
1218
  return true;
1143
1219
  }
@@ -1149,8 +1225,8 @@ static bool is_open_element(GumboParser* parser, const GumboNode* node) {
1149
1225
  // clone shares no structure with the original node: all owned strings and
1150
1226
  // values are fresh copies.
1151
1227
  GumboNode* clone_node(
1152
- GumboParser* parser, const GumboNode* node, GumboParseFlags reason) {
1153
- assert(node->type == GUMBO_NODE_ELEMENT);
1228
+ GumboParser* parser, GumboNode* node, GumboParseFlags reason) {
1229
+ assert(node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE);
1154
1230
  GumboNode* new_node = gumbo_parser_allocate(parser, sizeof(GumboNode));
1155
1231
  *new_node = *node;
1156
1232
  new_node->parent = NULL;
@@ -1164,7 +1240,7 @@ GumboNode* clone_node(
1164
1240
 
1165
1241
  const GumboVector* old_attributes = &node->v.element.attributes;
1166
1242
  gumbo_vector_init(parser, old_attributes->length, &element->attributes);
1167
- for (int i = 0; i < old_attributes->length; ++i) {
1243
+ for (unsigned int i = 0; i < old_attributes->length; ++i) {
1168
1244
  const GumboAttribute* old_attr = old_attributes->data[i];
1169
1245
  GumboAttribute* attr =
1170
1246
  gumbo_parser_allocate(parser, sizeof(GumboAttribute));
@@ -1188,8 +1264,8 @@ static void reconstruct_active_formatting_elements(GumboParser* parser) {
1188
1264
  }
1189
1265
 
1190
1266
  // Step 2 & 3
1191
- int i = elements->length - 1;
1192
- const GumboNode* element = elements->data[i];
1267
+ unsigned int i = elements->length - 1;
1268
+ GumboNode* element = elements->data[i];
1193
1269
  if (element == &kActiveFormattingScopeMarker ||
1194
1270
  is_open_element(parser, element)) {
1195
1271
  return;
@@ -1199,7 +1275,7 @@ static void reconstruct_active_formatting_elements(GumboParser* parser) {
1199
1275
  do {
1200
1276
  if (i == 0) {
1201
1277
  // Step 4
1202
- i = -1; // Incremented to 0 below.
1278
+ i = -1; // Incremented to 0 below.
1203
1279
  break;
1204
1280
  }
1205
1281
  // Step 5
@@ -1209,9 +1285,8 @@ static void reconstruct_active_formatting_elements(GumboParser* parser) {
1209
1285
 
1210
1286
  ++i;
1211
1287
  gumbo_debug("Reconstructing elements from %d on %s parent.\n", i,
1212
- gumbo_normalized_tagname(
1213
- get_current_node(parser)->v.element.tag));
1214
- for(; i < elements->length; ++i) {
1288
+ gumbo_normalized_tagname(get_current_node(parser)->v.element.tag));
1289
+ for (; i < elements->length; ++i) {
1215
1290
  // Step 7 & 8.
1216
1291
  assert(elements->length > 0);
1217
1292
  assert(i < elements->length);
@@ -1220,11 +1295,16 @@ static void reconstruct_active_formatting_elements(GumboParser* parser) {
1220
1295
  GumboNode* clone = clone_node(
1221
1296
  parser, element, GUMBO_INSERTION_RECONSTRUCTED_FORMATTING_ELEMENT);
1222
1297
  // Step 9.
1223
- insert_element(parser, clone, true);
1298
+ InsertionLocation location =
1299
+ get_appropriate_insertion_location(parser, NULL);
1300
+ insert_node(parser, clone, location);
1301
+ gumbo_vector_add(
1302
+ parser, (void*) clone, &parser->_parser_state->_open_elements);
1303
+
1224
1304
  // Step 10.
1225
1305
  elements->data[i] = clone;
1226
1306
  gumbo_debug("Reconstructed %s element at %d.\n",
1227
- gumbo_normalized_tagname(clone->v.element.tag), i);
1307
+ gumbo_normalized_tagname(clone->v.element.tag), i);
1228
1308
  }
1229
1309
  }
1230
1310
 
@@ -1235,32 +1315,30 @@ static void clear_active_formatting_elements(GumboParser* parser) {
1235
1315
  do {
1236
1316
  node = gumbo_vector_pop(parser, elements);
1237
1317
  ++num_elements_cleared;
1238
- } while(node && node != &kActiveFormattingScopeMarker);
1318
+ } while (node && node != &kActiveFormattingScopeMarker);
1239
1319
  gumbo_debug("Cleared %d elements from active formatting list.\n",
1240
- num_elements_cleared);
1320
+ num_elements_cleared);
1241
1321
  }
1242
1322
 
1243
1323
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#the-initial-insertion-mode
1244
1324
  static GumboQuirksModeEnum compute_quirks_mode(
1245
1325
  const GumboTokenDocType* doctype) {
1246
- if (doctype->force_quirks ||
1247
- strcmp(doctype->name, kDoctypeHtml.data) ||
1248
- is_in_static_list(doctype->public_identifier,
1249
- kQuirksModePublicIdPrefixes, false) ||
1250
- is_in_static_list(doctype->public_identifier,
1251
- kQuirksModePublicIdExactMatches, true) ||
1252
- is_in_static_list(doctype->system_identifier,
1253
- kQuirksModeSystemIdExactMatches, true) ||
1326
+ if (doctype->force_quirks || strcmp(doctype->name, kDoctypeHtml.data) ||
1327
+ is_in_static_list(
1328
+ doctype->public_identifier, kQuirksModePublicIdPrefixes, false) ||
1329
+ is_in_static_list(
1330
+ doctype->public_identifier, kQuirksModePublicIdExactMatches, true) ||
1331
+ is_in_static_list(
1332
+ doctype->system_identifier, kQuirksModeSystemIdExactMatches, true) ||
1254
1333
  (is_in_static_list(doctype->public_identifier,
1255
- kLimitedQuirksRequiresSystemIdPublicIdPrefixes, false)
1256
- && !doctype->has_system_identifier)) {
1334
+ kLimitedQuirksRequiresSystemIdPublicIdPrefixes, false) &&
1335
+ !doctype->has_system_identifier)) {
1257
1336
  return GUMBO_DOCTYPE_QUIRKS;
1258
- } else if (
1259
- is_in_static_list(doctype->public_identifier,
1260
- kLimitedQuirksPublicIdPrefixes, false) ||
1261
- (is_in_static_list(doctype->public_identifier,
1262
- kLimitedQuirksRequiresSystemIdPublicIdPrefixes, false)
1263
- && doctype->has_system_identifier)) {
1337
+ } else if (is_in_static_list(doctype->public_identifier,
1338
+ kLimitedQuirksPublicIdPrefixes, false) ||
1339
+ (is_in_static_list(doctype->public_identifier,
1340
+ kLimitedQuirksRequiresSystemIdPublicIdPrefixes, false) &&
1341
+ doctype->has_system_identifier)) {
1264
1342
  return GUMBO_DOCTYPE_LIMITED_QUIRKS;
1265
1343
  }
1266
1344
  return GUMBO_DOCTYPE_NO_QUIRKS;
@@ -1269,83 +1347,50 @@ static GumboQuirksModeEnum compute_quirks_mode(
1269
1347
  // The following functions are all defined by the "has an element in __ scope"
1270
1348
  // sections of the HTML5 spec:
1271
1349
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-the-specific-scope
1272
- // The basic idea behind them is that they check for an element of the given tag
1273
- // name, contained within a scope formed by a set of other tag names. For
1274
- // example, "has an element in list scope" looks for an element of the given tag
1275
- // within the nearest enclosing <ol> or <ul>, along with a bunch of generic
1276
- // element types that serve to "firewall" their content from the rest of the
1277
- // document.
1278
- static bool has_an_element_in_specific_scope(
1279
- GumboParser* parser, GumboVector* /* GumboTag */ expected, bool negate, ...) {
1350
+ // The basic idea behind them is that they check for an element of the given
1351
+ // qualified name, contained within a scope formed by a set of other qualified
1352
+ // names. For example, "has an element in list scope" looks for an element of
1353
+ // the given qualified name within the nearest enclosing <ol> or <ul>, along
1354
+ // with a bunch of generic element types that serve to "firewall" their content
1355
+ // from the rest of the document. Note that because of the way the spec is
1356
+ // written,
1357
+ // all elements are expected to be in the HTML namespace
1358
+ static bool has_an_element_in_specific_scope(GumboParser* parser,
1359
+ int expected_size, const GumboTag* expected, bool negate,
1360
+ const gumbo_tagset tags) {
1280
1361
  GumboVector* open_elements = &parser->_parser_state->_open_elements;
1281
- va_list args;
1282
- va_start(args, negate);
1283
- // va_arg can only run through the list once, so we copy it to an GumboVector
1284
- // here. I wonder if it'd make more sense to make tags the GumboVector*
1285
- // parameter and 'expected' a vararg list, but that'd require changing a lot
1286
- // of code for unknown benefit. We may want to change the representation of
1287
- // these tag sets anyway, to something more efficient.
1288
- GumboVector tags;
1289
- gumbo_vector_init(parser, 10, &tags);
1290
- for (GumboTag tag = va_arg(args, GumboTag); tag != GUMBO_TAG_LAST;
1291
- tag = va_arg(args, GumboTag)) {
1292
- // We store the tags inline instead of storing pointers to them.
1293
- gumbo_vector_add(parser, (void*) tag, &tags);
1294
- }
1295
- va_end(args);
1296
-
1297
- bool result = false;
1298
- for (int i = open_elements->length; --i >= 0; ) {
1362
+ for (int i = open_elements->length; --i >= 0;) {
1299
1363
  const GumboNode* node = open_elements->data[i];
1300
- if (node->type != GUMBO_NODE_ELEMENT) {
1364
+ if (node->type != GUMBO_NODE_ELEMENT && node->type != GUMBO_NODE_TEMPLATE)
1301
1365
  continue;
1302
- }
1366
+
1303
1367
  GumboTag node_tag = node->v.element.tag;
1304
- for (int j = 0; j < expected->length; ++j) {
1305
- GumboTag expected_tag = (GumboTag) expected->data[j];
1306
- if (node_tag == expected_tag) {
1307
- result = true;
1308
- goto cleanup;
1309
- }
1368
+ GumboNamespaceEnum node_ns = node->v.element.tag_namespace;
1369
+ for (int j = 0; j < expected_size; ++j) {
1370
+ if (node_tag == expected[j] && node_ns == GUMBO_NAMESPACE_HTML)
1371
+ return true;
1310
1372
  }
1311
1373
 
1312
- bool found_tag = false;
1313
- for (int j = 0; j < tags.length; ++j) {
1314
- GumboTag tag = (GumboTag) tags.data[j];
1315
- if (tag == node_tag) {
1316
- found_tag = true;
1317
- break;
1318
- }
1319
- }
1320
- if (negate != found_tag) {
1321
- result = false;
1322
- goto cleanup;
1323
- }
1374
+ bool found = TAGSET_INCLUDES(tags, node_ns, node_tag);
1375
+ if (negate != found) return false;
1324
1376
  }
1325
- cleanup:
1326
- gumbo_vector_destroy(parser, &tags);
1327
- return result;
1377
+ return false;
1328
1378
  }
1329
1379
 
1330
- // This is a bit of a hack to stack-allocate a one-element GumboVector name
1331
- // 'varname' containing the 'from_var' variable, since it's used in nearly all
1332
- // the subsequent helper functions. Note the use of void* and casts instead of
1333
- // GumboTag; this is so the alignment requirements are the same as GumboVector
1334
- // and the data inside it can be freely accessed as if it were a normal
1335
- // GumboVector.
1336
- #define DECLARE_ONE_ELEMENT_GUMBO_VECTOR(varname, from_var) \
1337
- void* varname ## _tmp_array[1] = { (void*) from_var }; \
1338
- GumboVector varname = { varname ## _tmp_array, 1, 1 }
1380
+ // Checks for the presence of an open element of the specified tag type.
1381
+ static bool has_open_element(GumboParser* parser, GumboTag tag) {
1382
+ return has_an_element_in_specific_scope(
1383
+ parser, 1, &tag, false, (gumbo_tagset){TAG(HTML)});
1384
+ }
1339
1385
 
1340
1386
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-scope
1341
1387
  static bool has_an_element_in_scope(GumboParser* parser, GumboTag tag) {
1342
- DECLARE_ONE_ELEMENT_GUMBO_VECTOR(tags, tag);
1343
- return has_an_element_in_specific_scope(
1344
- parser, &tags, false, GUMBO_TAG_APPLET, GUMBO_TAG_CAPTION, GUMBO_TAG_HTML,
1345
- GUMBO_TAG_TABLE, GUMBO_TAG_TD, GUMBO_TAG_TH, GUMBO_TAG_MARQUEE,
1346
- GUMBO_TAG_OBJECT, GUMBO_TAG_MI, GUMBO_TAG_MO, GUMBO_TAG_MN, GUMBO_TAG_MS,
1347
- GUMBO_TAG_MTEXT, GUMBO_TAG_ANNOTATION_XML, GUMBO_TAG_FOREIGNOBJECT,
1348
- GUMBO_TAG_DESC, GUMBO_TAG_TITLE, GUMBO_TAG_LAST);
1388
+ return has_an_element_in_specific_scope(parser, 1, &tag, false,
1389
+ (gumbo_tagset){TAG(APPLET), TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD),
1390
+ TAG(TH), TAG(MARQUEE), TAG(OBJECT), TAG(TEMPLATE), TAG_MATHML(MI),
1391
+ TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS), TAG_MATHML(MTEXT),
1392
+ TAG_MATHML(ANNOTATION_XML), TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC),
1393
+ TAG_SVG(TITLE)});
1349
1394
  }
1350
1395
 
1351
1396
  // Like "has an element in scope", but for the specific case of looking for a
@@ -1356,21 +1401,21 @@ static bool has_an_element_in_scope(GumboParser* parser, GumboTag tag) {
1356
1401
  // parameterize it.
1357
1402
  static bool has_node_in_scope(GumboParser* parser, const GumboNode* node) {
1358
1403
  GumboVector* open_elements = &parser->_parser_state->_open_elements;
1359
- for (int i = open_elements->length; --i >= 0; ) {
1404
+ for (int i = open_elements->length; --i >= 0;) {
1360
1405
  const GumboNode* current = open_elements->data[i];
1361
1406
  if (current == node) {
1362
1407
  return true;
1363
1408
  }
1364
- if (current->type != GUMBO_NODE_ELEMENT) {
1409
+ if (current->type != GUMBO_NODE_ELEMENT &&
1410
+ current->type != GUMBO_NODE_TEMPLATE) {
1365
1411
  continue;
1366
1412
  }
1367
- if (node_tag_in(
1368
- current, GUMBO_TAG_APPLET, GUMBO_TAG_CAPTION, GUMBO_TAG_HTML,
1369
- GUMBO_TAG_TABLE, GUMBO_TAG_TD, GUMBO_TAG_TH, GUMBO_TAG_MARQUEE,
1370
- GUMBO_TAG_OBJECT, GUMBO_TAG_MI, GUMBO_TAG_MO, GUMBO_TAG_MN,
1371
- GUMBO_TAG_MS, GUMBO_TAG_MTEXT, GUMBO_TAG_ANNOTATION_XML,
1372
- GUMBO_TAG_FOREIGNOBJECT, GUMBO_TAG_DESC, GUMBO_TAG_TITLE,
1373
- GUMBO_TAG_LAST)) {
1413
+ if (node_tag_in_set(current,
1414
+ (gumbo_tagset){TAG(APPLET), TAG(CAPTION), TAG(HTML), TAG(TABLE),
1415
+ TAG(TD), TAG(TH), TAG(MARQUEE), TAG(OBJECT), TAG(TEMPLATE),
1416
+ TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS),
1417
+ TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
1418
+ TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC), TAG_SVG(TITLE)})) {
1374
1419
  return false;
1375
1420
  }
1376
1421
  }
@@ -1378,79 +1423,72 @@ static bool has_node_in_scope(GumboParser* parser, const GumboNode* node) {
1378
1423
  return false;
1379
1424
  }
1380
1425
 
1381
- // Like has_an_element_in_scope, but restricts the expected tag to a range of
1382
- // possible tag names instead of just a single one.
1383
- static bool has_an_element_in_scope_with_tagname(GumboParser* parser, ...) {
1384
- GumboVector tags;
1385
- // 6 = arbitrary initial size for vector, chosen because the major use-case
1386
- // for this method is heading tags, of which there are 6.
1387
- gumbo_vector_init(parser, 6, &tags);
1388
- va_list args;
1389
- va_start(args, parser);
1390
- for (GumboTag tag = va_arg(args, GumboTag); tag != GUMBO_TAG_LAST;
1391
- tag = va_arg(args, GumboTag)) {
1392
- gumbo_vector_add(parser, (void*) tag, &tags);
1393
- }
1394
- bool found = has_an_element_in_specific_scope(
1395
- parser, &tags, false, GUMBO_TAG_APPLET, GUMBO_TAG_CAPTION, GUMBO_TAG_HTML,
1396
- GUMBO_TAG_TABLE, GUMBO_TAG_TD, GUMBO_TAG_TH, GUMBO_TAG_MARQUEE,
1397
- GUMBO_TAG_OBJECT, GUMBO_TAG_MI, GUMBO_TAG_MO, GUMBO_TAG_MN, GUMBO_TAG_MS,
1398
- GUMBO_TAG_MTEXT, GUMBO_TAG_ANNOTATION_XML, GUMBO_TAG_FOREIGNOBJECT,
1399
- GUMBO_TAG_DESC, GUMBO_TAG_TITLE, GUMBO_TAG_LAST);
1400
- gumbo_vector_destroy(parser, &tags);
1401
- va_end(args);
1402
- return found;
1426
+ // Like has_an_element_in_scope, but restricts the expected qualified name to a
1427
+ // range of possible qualified names instead of just a single one.
1428
+ static bool has_an_element_in_scope_with_tagname(
1429
+ GumboParser* parser, int expected_len, const GumboTag expected[]) {
1430
+ return has_an_element_in_specific_scope(parser, expected_len, expected, false,
1431
+ (gumbo_tagset){TAG(APPLET), TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD),
1432
+ TAG(TH), TAG(MARQUEE), TAG(OBJECT), TAG(TEMPLATE), TAG_MATHML(MI),
1433
+ TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS), TAG_MATHML(MTEXT),
1434
+ TAG_MATHML(ANNOTATION_XML), TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC),
1435
+ TAG_SVG(TITLE)});
1403
1436
  }
1404
1437
 
1405
1438
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-list-item-scope
1406
1439
  static bool has_an_element_in_list_scope(GumboParser* parser, GumboTag tag) {
1407
- DECLARE_ONE_ELEMENT_GUMBO_VECTOR(tags, tag);
1408
- return has_an_element_in_specific_scope(
1409
- parser, &tags, false, GUMBO_TAG_APPLET, GUMBO_TAG_CAPTION, GUMBO_TAG_HTML,
1410
- GUMBO_TAG_TABLE, GUMBO_TAG_TD, GUMBO_TAG_TH, GUMBO_TAG_MARQUEE,
1411
- GUMBO_TAG_OBJECT, GUMBO_TAG_MI, GUMBO_TAG_MO, GUMBO_TAG_MN, GUMBO_TAG_MS,
1412
- GUMBO_TAG_MTEXT, GUMBO_TAG_ANNOTATION_XML, GUMBO_TAG_FOREIGNOBJECT,
1413
- GUMBO_TAG_DESC, GUMBO_TAG_TITLE, GUMBO_TAG_OL, GUMBO_TAG_UL,
1414
- GUMBO_TAG_LAST);
1440
+ return has_an_element_in_specific_scope(parser, 1, &tag, false,
1441
+ (gumbo_tagset){TAG(APPLET), TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD),
1442
+ TAG(TH), TAG(MARQUEE), TAG(OBJECT), TAG(TEMPLATE), TAG_MATHML(MI),
1443
+ TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS), TAG_MATHML(MTEXT),
1444
+ TAG_MATHML(ANNOTATION_XML), TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC),
1445
+ TAG_SVG(TITLE), TAG(OL), TAG(UL)});
1415
1446
  }
1416
1447
 
1417
1448
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-button-scope
1418
1449
  static bool has_an_element_in_button_scope(GumboParser* parser, GumboTag tag) {
1419
- DECLARE_ONE_ELEMENT_GUMBO_VECTOR(tags, tag);
1420
- return has_an_element_in_specific_scope(
1421
- parser, &tags, false, GUMBO_TAG_APPLET, GUMBO_TAG_CAPTION, GUMBO_TAG_HTML,
1422
- GUMBO_TAG_TABLE, GUMBO_TAG_TD, GUMBO_TAG_TH, GUMBO_TAG_MARQUEE,
1423
- GUMBO_TAG_OBJECT, GUMBO_TAG_MI, GUMBO_TAG_MO, GUMBO_TAG_MN, GUMBO_TAG_MS,
1424
- GUMBO_TAG_MTEXT, GUMBO_TAG_ANNOTATION_XML, GUMBO_TAG_FOREIGNOBJECT,
1425
- GUMBO_TAG_DESC, GUMBO_TAG_TITLE, GUMBO_TAG_BUTTON, GUMBO_TAG_LAST);
1450
+ return has_an_element_in_specific_scope(parser, 1, &tag, false,
1451
+ (gumbo_tagset){TAG(APPLET), TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD),
1452
+ TAG(TH), TAG(MARQUEE), TAG(OBJECT), TAG(TEMPLATE), TAG_MATHML(MI),
1453
+ TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS), TAG_MATHML(MTEXT),
1454
+ TAG_MATHML(ANNOTATION_XML), TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC),
1455
+ TAG_SVG(TITLE), TAG(BUTTON)});
1426
1456
  }
1427
1457
 
1428
1458
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-table-scope
1429
1459
  static bool has_an_element_in_table_scope(GumboParser* parser, GumboTag tag) {
1430
- DECLARE_ONE_ELEMENT_GUMBO_VECTOR(tags, tag);
1431
- return has_an_element_in_specific_scope(
1432
- parser, &tags, false, GUMBO_TAG_HTML, GUMBO_TAG_TABLE, GUMBO_TAG_LAST);
1460
+ return has_an_element_in_specific_scope(parser, 1, &tag, false,
1461
+ (gumbo_tagset){TAG(HTML), TAG(TABLE), TAG(TEMPLATE)});
1433
1462
  }
1434
1463
 
1435
1464
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-select-scope
1436
1465
  static bool has_an_element_in_select_scope(GumboParser* parser, GumboTag tag) {
1437
- DECLARE_ONE_ELEMENT_GUMBO_VECTOR(tags, tag);
1438
1466
  return has_an_element_in_specific_scope(
1439
- parser, &tags, true, GUMBO_TAG_OPTGROUP, GUMBO_TAG_OPTION,
1440
- GUMBO_TAG_LAST);
1467
+ parser, 1, &tag, true, (gumbo_tagset){TAG(OPTGROUP), TAG(OPTION)});
1441
1468
  }
1442
1469
 
1443
-
1444
1470
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#generate-implied-end-tags
1445
1471
  // "exception" is the "element to exclude from the process" listed in the spec.
1446
1472
  // Pass GUMBO_TAG_LAST to not exclude any of them.
1447
1473
  static void generate_implied_end_tags(GumboParser* parser, GumboTag exception) {
1448
- for (;
1449
- node_tag_in(get_current_node(parser), GUMBO_TAG_DD, GUMBO_TAG_DT,
1450
- GUMBO_TAG_LI, GUMBO_TAG_OPTION, GUMBO_TAG_OPTGROUP,
1451
- GUMBO_TAG_P, GUMBO_TAG_RP, GUMBO_TAG_RT, GUMBO_TAG_LAST) &&
1452
- !node_tag_is(get_current_node(parser), exception);
1453
- pop_current_node(parser));
1474
+ for (; node_tag_in_set(get_current_node(parser),
1475
+ (gumbo_tagset){TAG(DD), TAG(DT), TAG(LI), TAG(OPTION),
1476
+ TAG(OPTGROUP), TAG(P), TAG(RP), TAG(RB), TAG(RT), TAG(RTC)}) &&
1477
+ !node_html_tag_is(get_current_node(parser), exception);
1478
+ pop_current_node(parser))
1479
+ ;
1480
+ }
1481
+
1482
+ // This is the "generate all implied end tags thoroughly" clause of the spec.
1483
+ // https://html.spec.whatwg.org/multipage/syntax.html#closing-elements-that-have-implied-end-tags
1484
+ static void generate_all_implied_end_tags_thoroughly(GumboParser* parser) {
1485
+ for (
1486
+ ; node_tag_in_set(get_current_node(parser),
1487
+ (gumbo_tagset){TAG(CAPTION), TAG(COLGROUP), TAG(DD), TAG(DT), TAG(LI),
1488
+ TAG(OPTION), TAG(OPTGROUP), TAG(P), TAG(RP), TAG(RT), TAG(RTC),
1489
+ TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(HEAD), TAG(TR)});
1490
+ pop_current_node(parser))
1491
+ ;
1454
1492
  }
1455
1493
 
1456
1494
  // This factors out the clauses relating to "act as if an end tag token with tag
@@ -1463,7 +1501,7 @@ static bool close_table(GumboParser* parser) {
1463
1501
  }
1464
1502
 
1465
1503
  GumboNode* node = pop_current_node(parser);
1466
- while (!node_tag_is(node, GUMBO_TAG_TABLE)) {
1504
+ while (!node_html_tag_is(node, GUMBO_TAG_TABLE)) {
1467
1505
  node = pop_current_node(parser);
1468
1506
  }
1469
1507
  reset_insertion_mode_appropriately(parser);
@@ -1472,18 +1510,18 @@ static bool close_table(GumboParser* parser) {
1472
1510
 
1473
1511
  // This factors out the clauses relating to "act as if an end tag token with tag
1474
1512
  // name `cell_tag` had been seen".
1475
- static bool close_table_cell(GumboParser* parser, const GumboToken* token,
1476
- GumboTag cell_tag) {
1513
+ static bool close_table_cell(
1514
+ GumboParser* parser, const GumboToken* token, GumboTag cell_tag) {
1477
1515
  bool result = true;
1478
1516
  generate_implied_end_tags(parser, GUMBO_TAG_LAST);
1479
1517
  const GumboNode* node = get_current_node(parser);
1480
- if (!node_tag_is(node, cell_tag)) {
1518
+ if (!node_html_tag_is(node, cell_tag)) {
1481
1519
  parser_add_parse_error(parser, token);
1482
1520
  result = false;
1483
1521
  }
1484
1522
  do {
1485
1523
  node = pop_current_node(parser);
1486
- } while (!node_tag_is(node, cell_tag));
1524
+ } while (!node_html_tag_is(node, cell_tag));
1487
1525
 
1488
1526
  clear_active_formatting_elements(parser);
1489
1527
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
@@ -1508,7 +1546,7 @@ static bool close_current_cell(GumboParser* parser, const GumboToken* token) {
1508
1546
  // resets the insertion mode appropriately.
1509
1547
  static void close_current_select(GumboParser* parser) {
1510
1548
  GumboNode* node = pop_current_node(parser);
1511
- while (!node_tag_is(node, GUMBO_TAG_SELECT)) {
1549
+ while (!node_html_tag_is(node, GUMBO_TAG_SELECT)) {
1512
1550
  node = pop_current_node(parser);
1513
1551
  }
1514
1552
  reset_insertion_mode_appropriately(parser);
@@ -1517,60 +1555,48 @@ static void close_current_select(GumboParser* parser) {
1517
1555
  // The list of nodes in the "special" category:
1518
1556
  // http://www.whatwg.org/specs/web-apps/current-work/complete/parsing.html#special
1519
1557
  static bool is_special_node(const GumboNode* node) {
1520
- assert(node->type == GUMBO_NODE_ELEMENT);
1521
- switch (node->v.element.tag_namespace) {
1522
- case GUMBO_NAMESPACE_HTML:
1523
- return node_tag_in(node,
1524
- GUMBO_TAG_ADDRESS, GUMBO_TAG_APPLET, GUMBO_TAG_AREA,
1525
- GUMBO_TAG_ARTICLE, GUMBO_TAG_ASIDE, GUMBO_TAG_BASE,
1526
- GUMBO_TAG_BASEFONT, GUMBO_TAG_BGSOUND, GUMBO_TAG_BLOCKQUOTE,
1527
- GUMBO_TAG_BODY, GUMBO_TAG_BR, GUMBO_TAG_BUTTON, GUMBO_TAG_CAPTION,
1528
- GUMBO_TAG_CENTER, GUMBO_TAG_COL, GUMBO_TAG_COLGROUP,
1529
- GUMBO_TAG_MENUITEM, GUMBO_TAG_DD, GUMBO_TAG_DETAILS, GUMBO_TAG_DIR,
1530
- GUMBO_TAG_DIV, GUMBO_TAG_DL, GUMBO_TAG_DT, GUMBO_TAG_EMBED,
1531
- GUMBO_TAG_FIELDSET, GUMBO_TAG_FIGCAPTION, GUMBO_TAG_FIGURE,
1532
- GUMBO_TAG_FOOTER, GUMBO_TAG_FORM, GUMBO_TAG_FRAME,
1533
- GUMBO_TAG_FRAMESET, GUMBO_TAG_H1, GUMBO_TAG_H2, GUMBO_TAG_H3,
1534
- GUMBO_TAG_H4, GUMBO_TAG_H5, GUMBO_TAG_H6, GUMBO_TAG_HEAD,
1535
- GUMBO_TAG_HEADER, GUMBO_TAG_HGROUP, GUMBO_TAG_HR, GUMBO_TAG_HTML,
1536
- GUMBO_TAG_IFRAME, GUMBO_TAG_IMG, GUMBO_TAG_INPUT, GUMBO_TAG_ISINDEX,
1537
- GUMBO_TAG_LI, GUMBO_TAG_LINK, GUMBO_TAG_LISTING, GUMBO_TAG_MARQUEE,
1538
- GUMBO_TAG_MENU, GUMBO_TAG_META, GUMBO_TAG_NAV, GUMBO_TAG_NOEMBED,
1539
- GUMBO_TAG_NOFRAMES, GUMBO_TAG_NOSCRIPT, GUMBO_TAG_OBJECT,
1540
- GUMBO_TAG_OL, GUMBO_TAG_P, GUMBO_TAG_PARAM, GUMBO_TAG_PLAINTEXT,
1541
- GUMBO_TAG_PRE, GUMBO_TAG_SCRIPT, GUMBO_TAG_SECTION, GUMBO_TAG_SELECT,
1542
- GUMBO_TAG_STYLE, GUMBO_TAG_SUMMARY, GUMBO_TAG_TABLE, GUMBO_TAG_TBODY,
1543
- GUMBO_TAG_TD, GUMBO_TAG_TEXTAREA, GUMBO_TAG_TFOOT, GUMBO_TAG_TH,
1544
- GUMBO_TAG_THEAD, GUMBO_TAG_TITLE, GUMBO_TAG_TR, GUMBO_TAG_UL,
1545
- GUMBO_TAG_WBR, GUMBO_TAG_XMP, GUMBO_TAG_LAST);
1546
- case GUMBO_NAMESPACE_MATHML:
1547
- return node_tag_in(node,
1548
- GUMBO_TAG_MI, GUMBO_TAG_MO, GUMBO_TAG_MN, GUMBO_TAG_MS,
1549
- GUMBO_TAG_MTEXT, GUMBO_TAG_ANNOTATION_XML, GUMBO_TAG_LAST);
1550
- case GUMBO_NAMESPACE_SVG:
1551
- return node_tag_in(node,
1552
- GUMBO_TAG_FOREIGNOBJECT, GUMBO_TAG_DESC, GUMBO_TAG_LAST);
1553
- }
1554
- abort();
1555
- return false; // Pacify compiler.
1556
- }
1557
-
1558
- // Implicitly closes currently open tags until it reaches an element with the
1559
- // specified tag name. If the elements closed are in the set handled by
1558
+ assert(node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE);
1559
+ return node_tag_in_set(node,
1560
+ (gumbo_tagset){TAG(ADDRESS), TAG(APPLET), TAG(AREA), TAG(ARTICLE),
1561
+ TAG(ASIDE), TAG(BASE), TAG(BASEFONT), TAG(BGSOUND), TAG(BLOCKQUOTE),
1562
+ TAG(BODY), TAG(BR), TAG(BUTTON), TAG(CAPTION), TAG(CENTER), TAG(COL),
1563
+ TAG(COLGROUP), TAG(MENUITEM), TAG(DD), TAG(DETAILS), TAG(DIR),
1564
+ TAG(DIV), TAG(DL), TAG(DT), TAG(EMBED), TAG(FIELDSET),
1565
+ TAG(FIGCAPTION), TAG(FIGURE), TAG(FOOTER), TAG(FORM), TAG(FRAME),
1566
+ TAG(FRAMESET), TAG(H1), TAG(H2), TAG(H3), TAG(H4), TAG(H5), TAG(H6),
1567
+ TAG(HEAD), TAG(HEADER), TAG(HGROUP), TAG(HR), TAG(HTML), TAG(IFRAME),
1568
+ TAG(IMG), TAG(INPUT), TAG(ISINDEX), TAG(LI), TAG(LINK), TAG(LISTING),
1569
+ TAG(MARQUEE), TAG(MENU), TAG(META), TAG(NAV), TAG(NOEMBED),
1570
+ TAG(NOFRAMES), TAG(NOSCRIPT), TAG(OBJECT), TAG(OL), TAG(P),
1571
+ TAG(PARAM), TAG(PLAINTEXT), TAG(PRE), TAG(SCRIPT), TAG(SECTION),
1572
+ TAG(SELECT), TAG(STYLE), TAG(SUMMARY), TAG(TABLE), TAG(TBODY),
1573
+ TAG(TD), TAG(TEMPLATE), TAG(TEXTAREA), TAG(TFOOT), TAG(TH),
1574
+ TAG(THEAD), TAG(TITLE), TAG(TR), TAG(UL), TAG(WBR), TAG(XMP),
1575
+
1576
+ TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS),
1577
+ TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
1578
+
1579
+ TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC)});
1580
+ }
1581
+
1582
+ // Implicitly closes currently open elements until it reaches an element with
1583
+ // the
1584
+ // specified qualified name. If the elements closed are in the set handled by
1560
1585
  // generate_implied_end_tags, this is normal operation and this function returns
1561
1586
  // true. Otherwise, a parse error is recorded and this function returns false.
1562
- static bool implicitly_close_tags(
1563
- GumboParser* parser, GumboToken* token, GumboTag target) {
1587
+ static bool implicitly_close_tags(GumboParser* parser, GumboToken* token,
1588
+ GumboNamespaceEnum target_ns, GumboTag target) {
1564
1589
  bool result = true;
1565
1590
  generate_implied_end_tags(parser, target);
1566
- if (!node_tag_is(get_current_node(parser), target)) {
1591
+ if (!node_qualified_tag_is(get_current_node(parser), target_ns, target)) {
1567
1592
  parser_add_parse_error(parser, token);
1568
- while (!node_tag_is(get_current_node(parser), target)) {
1593
+ while (
1594
+ !node_qualified_tag_is(get_current_node(parser), target_ns, target)) {
1569
1595
  pop_current_node(parser);
1570
1596
  }
1571
1597
  result = false;
1572
1598
  }
1573
- assert(node_tag_is(get_current_node(parser), target));
1599
+ assert(node_qualified_tag_is(get_current_node(parser), target_ns, target));
1574
1600
  pop_current_node(parser);
1575
1601
  return result;
1576
1602
  }
@@ -1579,9 +1605,11 @@ static bool implicitly_close_tags(
1579
1605
  // a </p> tag was encountered, implicitly closing tags. Returns false if a
1580
1606
  // parse error occurs. This is a convenience function because this particular
1581
1607
  // clause appears several times in the spec.
1582
- static bool maybe_implicitly_close_p_tag(GumboParser* parser, GumboToken* token) {
1608
+ static bool maybe_implicitly_close_p_tag(
1609
+ GumboParser* parser, GumboToken* token) {
1583
1610
  if (has_an_element_in_button_scope(parser, GUMBO_TAG_P)) {
1584
- return implicitly_close_tags(parser, token, GUMBO_TAG_P);
1611
+ return implicitly_close_tags(
1612
+ parser, token, GUMBO_NAMESPACE_HTML, GUMBO_TAG_P);
1585
1613
  }
1586
1614
  return true;
1587
1615
  }
@@ -1592,18 +1620,19 @@ static void maybe_implicitly_close_list_tag(
1592
1620
  GumboParser* parser, GumboToken* token, bool is_li) {
1593
1621
  GumboParserState* state = parser->_parser_state;
1594
1622
  state->_frameset_ok = false;
1595
- for (int i = state->_open_elements.length; --i >= 0; ) {
1623
+ for (int i = state->_open_elements.length; --i >= 0;) {
1596
1624
  const GumboNode* node = state->_open_elements.data[i];
1597
- bool is_list_tag = is_li ?
1598
- node_tag_is(node, GUMBO_TAG_LI) :
1599
- node_tag_in(node, GUMBO_TAG_DD, GUMBO_TAG_DT, GUMBO_TAG_LAST);
1625
+ bool is_list_tag =
1626
+ is_li ? node_html_tag_is(node, GUMBO_TAG_LI)
1627
+ : node_tag_in_set(node, (gumbo_tagset){TAG(DD), TAG(DT)});
1600
1628
  if (is_list_tag) {
1601
- implicitly_close_tags(parser, token, node->v.element.tag);
1629
+ implicitly_close_tags(
1630
+ parser, token, node->v.element.tag_namespace, node->v.element.tag);
1602
1631
  return;
1603
1632
  }
1604
1633
  if (is_special_node(node) &&
1605
- !node_tag_in(node, GUMBO_TAG_ADDRESS, GUMBO_TAG_DIV, GUMBO_TAG_P,
1606
- GUMBO_TAG_LAST)) {
1634
+ !node_tag_in_set(
1635
+ node, (gumbo_tagset){TAG(ADDRESS), TAG(DIV), TAG(P)})) {
1607
1636
  return;
1608
1637
  }
1609
1638
  }
@@ -1616,7 +1645,7 @@ static void merge_attributes(
1616
1645
  const GumboVector* token_attr = &token->v.start_tag.attributes;
1617
1646
  GumboVector* node_attr = &node->v.element.attributes;
1618
1647
 
1619
- for (int i = 0; i < token_attr->length; ++i) {
1648
+ for (unsigned int i = 0; i < token_attr->length; ++i) {
1620
1649
  GumboAttribute* attr = token_attr->data[i];
1621
1650
  if (!gumbo_get_attribute(node_attr, attr->name)) {
1622
1651
  // Ownership of the attribute is transferred by this gumbo_vector_add,
@@ -1640,8 +1669,8 @@ static void merge_attributes(
1640
1669
  }
1641
1670
 
1642
1671
  const char* gumbo_normalize_svg_tagname(const GumboStringPiece* tag) {
1643
- for (int i = 0;
1644
- i < sizeof(kSvgTagReplacements) / sizeof(ReplacementEntry); ++i) {
1672
+ for (size_t i = 0; i < sizeof(kSvgTagReplacements) / sizeof(ReplacementEntry);
1673
+ ++i) {
1645
1674
  const ReplacementEntry* entry = &kSvgTagReplacements[i];
1646
1675
  if (gumbo_string_equals_ignore_case(tag, &entry->from)) {
1647
1676
  return entry->to.data;
@@ -1656,9 +1685,9 @@ const char* gumbo_normalize_svg_tagname(const GumboStringPiece* tag) {
1656
1685
  static void adjust_foreign_attributes(GumboParser* parser, GumboToken* token) {
1657
1686
  assert(token->type == GUMBO_TOKEN_START_TAG);
1658
1687
  const GumboVector* attributes = &token->v.start_tag.attributes;
1659
- for (int i = 0;
1660
- i < sizeof(kForeignAttributeReplacements) /
1661
- sizeof(NamespacedAttributeReplacement); ++i) {
1688
+ for (size_t i = 0; i < sizeof(kForeignAttributeReplacements) /
1689
+ sizeof(NamespacedAttributeReplacement);
1690
+ ++i) {
1662
1691
  const NamespacedAttributeReplacement* entry =
1663
1692
  &kForeignAttributeReplacements[i];
1664
1693
  GumboAttribute* attr = gumbo_get_attribute(attributes, entry->from);
@@ -1676,7 +1705,7 @@ static void adjust_foreign_attributes(GumboParser* parser, GumboToken* token) {
1676
1705
  static void adjust_svg_attributes(GumboParser* parser, GumboToken* token) {
1677
1706
  assert(token->type == GUMBO_TOKEN_START_TAG);
1678
1707
  const GumboVector* attributes = &token->v.start_tag.attributes;
1679
- for (int i = 0;
1708
+ for (size_t i = 0;
1680
1709
  i < sizeof(kSvgAttributeReplacements) / sizeof(ReplacementEntry); ++i) {
1681
1710
  const ReplacementEntry* entry = &kSvgAttributeReplacements[i];
1682
1711
  GumboAttribute* attr = gumbo_get_attribute(attributes, entry->from.data);
@@ -1693,8 +1722,8 @@ static void adjust_svg_attributes(GumboParser* parser, GumboToken* token) {
1693
1722
  // value.
1694
1723
  static void adjust_mathml_attributes(GumboParser* parser, GumboToken* token) {
1695
1724
  assert(token->type == GUMBO_TOKEN_START_TAG);
1696
- GumboAttribute* attr = gumbo_get_attribute(
1697
- &token->v.start_tag.attributes, "definitionurl");
1725
+ GumboAttribute* attr =
1726
+ gumbo_get_attribute(&token->v.start_tag.attributes, "definitionurl");
1698
1727
  if (!attr) {
1699
1728
  return;
1700
1729
  }
@@ -1702,32 +1731,30 @@ static void adjust_mathml_attributes(GumboParser* parser, GumboToken* token) {
1702
1731
  attr->name = gumbo_copy_stringz(parser, "definitionURL");
1703
1732
  }
1704
1733
 
1705
- static bool doctype_matches(
1706
- const GumboTokenDocType* doctype,
1707
- const GumboStringPiece* public_id,
1708
- const GumboStringPiece* system_id,
1734
+ static bool doctype_matches(const GumboTokenDocType* doctype,
1735
+ const GumboStringPiece* public_id, const GumboStringPiece* system_id,
1709
1736
  bool allow_missing_system_id) {
1710
1737
  return !strcmp(doctype->public_identifier, public_id->data) &&
1711
- (allow_missing_system_id || doctype->has_system_identifier) &&
1712
- !strcmp(doctype->system_identifier, system_id->data);
1738
+ (allow_missing_system_id || doctype->has_system_identifier) &&
1739
+ !strcmp(doctype->system_identifier, system_id->data);
1713
1740
  }
1714
1741
 
1715
1742
  static bool maybe_add_doctype_error(
1716
1743
  GumboParser* parser, const GumboToken* token) {
1717
1744
  const GumboTokenDocType* doctype = &token->v.doc_type;
1718
1745
  bool html_doctype = !strcmp(doctype->name, kDoctypeHtml.data);
1719
- if ((!html_doctype ||
1720
- doctype->has_public_identifier ||
1721
- (doctype->has_system_identifier && !strcmp(
1722
- doctype->system_identifier, kSystemIdLegacyCompat.data))) &&
1723
- !(html_doctype && (
1724
- doctype_matches(doctype, &kPublicIdHtml4_0,
1725
- &kSystemIdRecHtml4_0, true) ||
1726
- doctype_matches(doctype, &kPublicIdHtml4_01, &kSystemIdHtml4, true) ||
1727
- doctype_matches(doctype, &kPublicIdXhtml1_0,
1728
- &kSystemIdXhtmlStrict1_1, false) ||
1729
- doctype_matches(doctype, &kPublicIdXhtml1_1,
1730
- &kSystemIdXhtml1_1, false)))) {
1746
+ if ((!html_doctype || doctype->has_public_identifier ||
1747
+ (doctype->has_system_identifier &&
1748
+ !strcmp(
1749
+ doctype->system_identifier, kSystemIdLegacyCompat.data))) &&
1750
+ !(html_doctype && (doctype_matches(doctype, &kPublicIdHtml4_0,
1751
+ &kSystemIdRecHtml4_0, true) ||
1752
+ doctype_matches(doctype, &kPublicIdHtml4_01,
1753
+ &kSystemIdHtml4, true) ||
1754
+ doctype_matches(doctype, &kPublicIdXhtml1_0,
1755
+ &kSystemIdXhtmlStrict1_1, false) ||
1756
+ doctype_matches(doctype, &kPublicIdXhtml1_1,
1757
+ &kSystemIdXhtml1_1, false)))) {
1731
1758
  parser_add_parse_error(parser, token);
1732
1759
  return false;
1733
1760
  }
@@ -1750,7 +1777,7 @@ static void remove_from_parent(GumboParser* parser, GumboNode* node) {
1750
1777
  gumbo_vector_remove_at(parser, index, children);
1751
1778
  node->parent = NULL;
1752
1779
  node->index_within_parent = -1;
1753
- for (int i = index; i < children->length; ++i) {
1780
+ for (unsigned int i = index; i < children->length; ++i) {
1754
1781
  GumboNode* child = children->data[i];
1755
1782
  child->index_within_parent = i;
1756
1783
  }
@@ -1759,29 +1786,38 @@ static void remove_from_parent(GumboParser* parser, GumboNode* node) {
1759
1786
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#an-introduction-to-error-handling-and-strange-cases-in-the-parser
1760
1787
  // Also described in the "in body" handling for end formatting tags.
1761
1788
  static bool adoption_agency_algorithm(
1762
- GumboParser* parser, GumboToken* token, GumboTag closing_tag) {
1789
+ GumboParser* parser, GumboToken* token, GumboTag subject) {
1763
1790
  GumboParserState* state = parser->_parser_state;
1764
1791
  gumbo_debug("Entering adoption agency algorithm.\n");
1765
- // Steps 1-3 & 16:
1766
- for (int i = 0; i < 8; ++i) {
1767
- // Step 4.
1792
+ // Step 1.
1793
+ GumboNode* current_node = get_current_node(parser);
1794
+ if (current_node->v.element.tag_namespace == GUMBO_NAMESPACE_HTML &&
1795
+ current_node->v.element.tag == subject &&
1796
+ gumbo_vector_index_of(
1797
+ &state->_active_formatting_elements, current_node) == -1) {
1798
+ pop_current_node(parser);
1799
+ return false;
1800
+ }
1801
+ // Steps 2-4 & 20:
1802
+ for (unsigned int i = 0; i < 8; ++i) {
1803
+ // Step 5.
1768
1804
  GumboNode* formatting_node = NULL;
1769
1805
  int formatting_node_in_open_elements = -1;
1770
- for (int j = state->_active_formatting_elements.length; --j >= 0; ) {
1806
+ for (int j = state->_active_formatting_elements.length; --j >= 0;) {
1771
1807
  GumboNode* current_node = state->_active_formatting_elements.data[j];
1772
1808
  if (current_node == &kActiveFormattingScopeMarker) {
1773
1809
  gumbo_debug("Broke on scope marker; aborting.\n");
1774
1810
  // Last scope marker; abort the algorithm.
1775
1811
  return false;
1776
1812
  }
1777
- if (node_tag_is(current_node, closing_tag)) {
1813
+ if (node_html_tag_is(current_node, subject)) {
1778
1814
  // Found it.
1779
1815
  formatting_node = current_node;
1780
- formatting_node_in_open_elements = gumbo_vector_index_of(
1781
- &state->_open_elements, formatting_node);
1816
+ formatting_node_in_open_elements =
1817
+ gumbo_vector_index_of(&state->_open_elements, formatting_node);
1782
1818
  gumbo_debug("Formatting element of tag %s at %d.\n",
1783
- gumbo_normalized_tagname(closing_tag),
1784
- formatting_node_in_open_elements);
1819
+ gumbo_normalized_tagname(subject),
1820
+ formatting_node_in_open_elements);
1785
1821
  break;
1786
1822
  }
1787
1823
  }
@@ -1793,74 +1829,84 @@ static bool adoption_agency_algorithm(
1793
1829
  return false;
1794
1830
  }
1795
1831
 
1832
+ // Step 6
1796
1833
  if (formatting_node_in_open_elements == -1) {
1797
1834
  gumbo_debug("Formatting node not on stack of open elements.\n");
1798
- gumbo_vector_remove(parser, formatting_node,
1799
- &state->_active_formatting_elements);
1835
+ parser_add_parse_error(parser, token);
1836
+ gumbo_vector_remove(
1837
+ parser, formatting_node, &state->_active_formatting_elements);
1800
1838
  return false;
1801
1839
  }
1802
1840
 
1841
+ // Step 7
1803
1842
  if (!has_an_element_in_scope(parser, formatting_node->v.element.tag)) {
1804
1843
  parser_add_parse_error(parser, token);
1805
1844
  gumbo_debug("Element not in scope.\n");
1806
1845
  return false;
1807
1846
  }
1847
+
1848
+ // Step 8
1808
1849
  if (formatting_node != get_current_node(parser)) {
1809
1850
  parser_add_parse_error(parser, token); // But continue onwards.
1810
1851
  }
1811
1852
  assert(formatting_node);
1812
- assert(!node_tag_is(formatting_node, GUMBO_TAG_HTML));
1813
- assert(!node_tag_is(formatting_node, GUMBO_TAG_BODY));
1853
+ assert(!node_html_tag_is(formatting_node, GUMBO_TAG_HTML));
1854
+ assert(!node_html_tag_is(formatting_node, GUMBO_TAG_BODY));
1814
1855
 
1815
- // Step 5 & 6.
1856
+ // Step 9 & 10
1816
1857
  GumboNode* furthest_block = NULL;
1817
- for (int j = formatting_node_in_open_elements;
1858
+ for (unsigned int j = formatting_node_in_open_elements;
1818
1859
  j < state->_open_elements.length; ++j) {
1819
1860
  assert(j > 0);
1820
1861
  GumboNode* current = state->_open_elements.data[j];
1821
1862
  if (is_special_node(current)) {
1822
- // Step 5.
1863
+ // Step 9.
1823
1864
  furthest_block = current;
1824
1865
  break;
1825
1866
  }
1826
1867
  }
1827
1868
  if (!furthest_block) {
1828
- // Step 6.
1869
+ // Step 10.
1829
1870
  while (get_current_node(parser) != formatting_node) {
1830
1871
  pop_current_node(parser);
1831
1872
  }
1832
1873
  // And the formatting element itself.
1833
1874
  pop_current_node(parser);
1834
- gumbo_vector_remove(parser, formatting_node,
1835
- &state->_active_formatting_elements);
1875
+ gumbo_vector_remove(
1876
+ parser, formatting_node, &state->_active_formatting_elements);
1836
1877
  return false;
1837
1878
  }
1838
- assert(!node_tag_is(furthest_block, GUMBO_TAG_HTML));
1879
+ assert(!node_html_tag_is(furthest_block, GUMBO_TAG_HTML));
1839
1880
  assert(furthest_block);
1840
1881
 
1841
- // Step 7.
1882
+ // Step 11.
1842
1883
  // Elements may be moved and reparented by this algorithm, so
1843
1884
  // common_ancestor is not necessarily the same as formatting_node->parent.
1844
1885
  GumboNode* common_ancestor =
1845
- state->_open_elements.data[gumbo_vector_index_of(
1846
- &state->_open_elements, formatting_node) - 1];
1886
+ state->_open_elements.data[gumbo_vector_index_of(&state->_open_elements,
1887
+ formatting_node) -
1888
+ 1];
1847
1889
  gumbo_debug("Common ancestor tag = %s, furthest block tag = %s.\n",
1848
- gumbo_normalized_tagname(common_ancestor->v.element.tag),
1849
- gumbo_normalized_tagname(furthest_block->v.element.tag));
1890
+ gumbo_normalized_tagname(common_ancestor->v.element.tag),
1891
+ gumbo_normalized_tagname(furthest_block->v.element.tag));
1850
1892
 
1851
- // Step 8.
1893
+ // Step 12.
1852
1894
  int bookmark = gumbo_vector_index_of(
1853
- &state->_active_formatting_elements, formatting_node);;
1854
- // Step 9.
1895
+ &state->_active_formatting_elements, formatting_node) +
1896
+ 1;
1897
+ gumbo_debug("Bookmark at %d.\n", bookmark);
1898
+ // Step 13.
1855
1899
  GumboNode* node = furthest_block;
1856
1900
  GumboNode* last_node = furthest_block;
1857
1901
  // Must be stored explicitly, in case node is removed from the stack of open
1858
1902
  // elements, to handle step 9.4.
1859
1903
  int saved_node_index = gumbo_vector_index_of(&state->_open_elements, node);
1860
1904
  assert(saved_node_index > 0);
1861
- // Step 9.1-9.3 & 9.11.
1862
- for (int j = 0; j < 3; ++j) {
1863
- // Step 9.4.
1905
+ // Step 13.1.
1906
+ for (int j = 0;;) {
1907
+ // Step 13.2.
1908
+ ++j;
1909
+ // Step 13.3.
1864
1910
  int node_index = gumbo_vector_index_of(&state->_open_elements, node);
1865
1911
  gumbo_debug(
1866
1912
  "Current index: %d, last index: %d.\n", node_index, saved_node_index);
@@ -1869,59 +1915,72 @@ static bool adoption_agency_algorithm(
1869
1915
  }
1870
1916
  saved_node_index = --node_index;
1871
1917
  assert(node_index > 0);
1872
- assert(node_index < state->_open_elements.capacity);
1918
+ assert((unsigned int) node_index < state->_open_elements.capacity);
1873
1919
  node = state->_open_elements.data[node_index];
1874
1920
  assert(node->parent);
1875
- // Step 9.5.
1876
- if (gumbo_vector_index_of(
1877
- &state->_active_formatting_elements, node) == -1) {
1921
+ if (node == formatting_node) {
1922
+ // Step 13.4.
1923
+ break;
1924
+ }
1925
+ int formatting_index =
1926
+ gumbo_vector_index_of(&state->_active_formatting_elements, node);
1927
+ if (j > 3 && formatting_index != -1) {
1928
+ // Step 13.5.
1929
+ gumbo_debug("Removing formatting element at %d.\n", formatting_index);
1930
+ gumbo_vector_remove_at(
1931
+ parser, formatting_index, &state->_active_formatting_elements);
1932
+ // Removing the element shifts all indices over by one, so we may need
1933
+ // to move the bookmark.
1934
+ if (formatting_index < bookmark) {
1935
+ --bookmark;
1936
+ gumbo_debug("Moving bookmark to %d.\n", bookmark);
1937
+ }
1938
+ continue;
1939
+ }
1940
+ if (formatting_index == -1) {
1941
+ // Step 13.6.
1878
1942
  gumbo_vector_remove_at(parser, node_index, &state->_open_elements);
1879
1943
  continue;
1880
- } else if (node == formatting_node) {
1881
- // Step 9.6.
1882
- break;
1883
1944
  }
1884
- // Step 9.7.
1885
- int formatting_index = gumbo_vector_index_of(
1886
- &state->_active_formatting_elements, node);
1945
+ // Step 13.7.
1946
+ // "common ancestor as the intended parent" doesn't actually mean insert
1947
+ // it into the common ancestor; that happens below.
1887
1948
  node = clone_node(parser, node, GUMBO_INSERTION_ADOPTION_AGENCY_CLONED);
1949
+ assert(formatting_index >= 0);
1888
1950
  state->_active_formatting_elements.data[formatting_index] = node;
1951
+ assert(node_index >= 0);
1889
1952
  state->_open_elements.data[node_index] = node;
1890
- // Step 9.8.
1953
+ // Step 13.8.
1891
1954
  if (last_node == furthest_block) {
1892
1955
  bookmark = formatting_index + 1;
1893
- assert(bookmark <= state->_active_formatting_elements.length);
1956
+ gumbo_debug("Bookmark moved to %d.\n", bookmark);
1957
+ assert((unsigned int) bookmark <= state->_active_formatting_elements.length);
1894
1958
  }
1895
- // Step 9.9.
1959
+ // Step 13.9.
1896
1960
  last_node->parse_flags |= GUMBO_INSERTION_ADOPTION_AGENCY_MOVED;
1897
1961
  remove_from_parent(parser, last_node);
1898
1962
  append_node(parser, node, last_node);
1899
- // Step 9.10.
1963
+ // Step 13.10.
1900
1964
  last_node = node;
1901
- }
1965
+ } // Step 13.11.
1902
1966
 
1903
- // Step 10.
1967
+ // Step 14.
1904
1968
  gumbo_debug("Removing %s node from parent ",
1905
- gumbo_normalized_tagname(last_node->v.element.tag));
1969
+ gumbo_normalized_tagname(last_node->v.element.tag));
1906
1970
  remove_from_parent(parser, last_node);
1907
1971
  last_node->parse_flags |= GUMBO_INSERTION_ADOPTION_AGENCY_MOVED;
1908
- if (node_tag_in(common_ancestor, GUMBO_TAG_TABLE, GUMBO_TAG_TBODY,
1909
- GUMBO_TAG_TFOOT, GUMBO_TAG_THEAD, GUMBO_TAG_TR,
1910
- GUMBO_TAG_LAST)) {
1911
- gumbo_debug("and foster-parenting it.\n");
1912
- foster_parent_element(parser, last_node);
1913
- } else {
1914
- gumbo_debug("and inserting it into %s.\n",
1915
- gumbo_normalized_tagname(common_ancestor->v.element.tag));
1916
- append_node(parser, common_ancestor, last_node);
1917
- }
1972
+ InsertionLocation location =
1973
+ get_appropriate_insertion_location(parser, common_ancestor);
1974
+ gumbo_debug("and inserting it into %s.\n",
1975
+ gumbo_normalized_tagname(location.target->v.element.tag));
1976
+ insert_node(parser, last_node, location);
1918
1977
 
1919
- // Step 11.
1978
+ // Step 15.
1920
1979
  GumboNode* new_formatting_node = clone_node(
1921
1980
  parser, formatting_node, GUMBO_INSERTION_ADOPTION_AGENCY_CLONED);
1922
1981
  formatting_node->parse_flags |= GUMBO_INSERTION_IMPLICIT_END_TAG;
1923
1982
 
1924
- // Step 12. Instead of appending nodes one-by-one, we swap the children
1983
+ // Step 16. Instead of appending nodes one-by-one, we swap the children
1925
1984
  // vector of furthest_block with the empty children of new_formatting_node,
1926
1985
  // reducing memory traffic and allocations. We still have to reset their
1927
1986
  // parent pointers, though.
@@ -1931,15 +1990,15 @@ static bool adoption_agency_algorithm(
1931
1990
  furthest_block->v.element.children = temp;
1932
1991
 
1933
1992
  temp = new_formatting_node->v.element.children;
1934
- for (int i = 0; i < temp.length; ++i) {
1993
+ for (unsigned int i = 0; i < temp.length; ++i) {
1935
1994
  GumboNode* child = temp.data[i];
1936
1995
  child->parent = new_formatting_node;
1937
1996
  }
1938
1997
 
1939
- // Step 13.
1998
+ // Step 17.
1940
1999
  append_node(parser, furthest_block, new_formatting_node);
1941
2000
 
1942
- // Step 14.
2001
+ // Step 18.
1943
2002
  // If the formatting node was before the bookmark, it may shift over all
1944
2003
  // indices after it, so we need to explicitly find the index and possibly
1945
2004
  // adjust the bookmark.
@@ -1947,25 +2006,27 @@ static bool adoption_agency_algorithm(
1947
2006
  &state->_active_formatting_elements, formatting_node);
1948
2007
  assert(formatting_node_index != -1);
1949
2008
  if (formatting_node_index < bookmark) {
2009
+ gumbo_debug(
2010
+ "Formatting node at %d is before bookmark at %d; decrementing.\n",
2011
+ formatting_node_index, bookmark);
1950
2012
  --bookmark;
1951
2013
  }
1952
2014
  gumbo_vector_remove_at(
1953
2015
  parser, formatting_node_index, &state->_active_formatting_elements);
1954
2016
  assert(bookmark >= 0);
1955
- assert(bookmark <= state->_active_formatting_elements.length);
2017
+ assert((unsigned int) bookmark <= state->_active_formatting_elements.length);
1956
2018
  gumbo_vector_insert_at(parser, new_formatting_node, bookmark,
1957
- &state->_active_formatting_elements);
2019
+ &state->_active_formatting_elements);
1958
2020
 
1959
- // Step 15.
1960
- gumbo_vector_remove(
1961
- parser, formatting_node, &state->_open_elements);
1962
- int insert_at = gumbo_vector_index_of(
1963
- &state->_open_elements, furthest_block) + 1;
2021
+ // Step 19.
2022
+ gumbo_vector_remove(parser, formatting_node, &state->_open_elements);
2023
+ int insert_at =
2024
+ gumbo_vector_index_of(&state->_open_elements, furthest_block) + 1;
1964
2025
  assert(insert_at >= 0);
1965
- assert(insert_at <= state->_open_elements.length);
2026
+ assert((unsigned int) insert_at <= state->_open_elements.length);
1966
2027
  gumbo_vector_insert_at(
1967
2028
  parser, new_formatting_node, insert_at, &state->_open_elements);
1968
- }
2029
+ } // Step 20.
1969
2030
  return true;
1970
2031
  }
1971
2032
 
@@ -1988,17 +2049,19 @@ static void ignore_token(GumboParser* parser) {
1988
2049
 
1989
2050
  // http://www.whatwg.org/specs/web-apps/current-work/complete/the-end.html
1990
2051
  static void finish_parsing(GumboParser* parser) {
2052
+ gumbo_debug("Finishing parsing");
1991
2053
  maybe_flush_text_node_buffer(parser);
1992
2054
  GumboParserState* state = parser->_parser_state;
1993
2055
  for (GumboNode* node = pop_current_node(parser); node;
1994
2056
  node = pop_current_node(parser)) {
1995
- if ((node_tag_is(node, GUMBO_TAG_BODY) && state->_closed_body_tag) ||
1996
- (node_tag_is(node, GUMBO_TAG_HTML) && state->_closed_html_tag)) {
2057
+ if ((node_html_tag_is(node, GUMBO_TAG_BODY) && state->_closed_body_tag) ||
2058
+ (node_html_tag_is(node, GUMBO_TAG_HTML) && state->_closed_html_tag)) {
1997
2059
  continue;
1998
2060
  }
1999
2061
  node->parse_flags |= GUMBO_INSERTION_IMPLICIT_END_TAG;
2000
2062
  }
2001
- while (pop_current_node(parser)); // Pop them all.
2063
+ while (pop_current_node(parser))
2064
+ ; // Pop them all.
2002
2065
  }
2003
2066
 
2004
2067
  static bool handle_initial(GumboParser* parser, GumboToken* token) {
@@ -2042,9 +2105,9 @@ static bool handle_before_html(GumboParser* parser, GumboToken* token) {
2042
2105
  parser->_output->root = html_node;
2043
2106
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_BEFORE_HEAD);
2044
2107
  return true;
2045
- } else if (token->type == GUMBO_TOKEN_END_TAG && !tag_in(
2046
- token, false, GUMBO_TAG_HEAD, GUMBO_TAG_BODY, GUMBO_TAG_HTML,
2047
- GUMBO_TAG_BR, GUMBO_TAG_LAST)) {
2108
+ } else if (token->type == GUMBO_TOKEN_END_TAG &&
2109
+ !tag_in(token, false,
2110
+ (gumbo_tagset){TAG(HEAD), TAG(BODY), TAG(HTML), TAG(BR)})) {
2048
2111
  parser_add_parse_error(parser, token);
2049
2112
  ignore_token(parser);
2050
2113
  return false;
@@ -2076,9 +2139,9 @@ static bool handle_before_head(GumboParser* parser, GumboToken* token) {
2076
2139
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_HEAD);
2077
2140
  parser->_parser_state->_head_element = node;
2078
2141
  return true;
2079
- } else if (token->type == GUMBO_TOKEN_END_TAG && !tag_in(
2080
- token, false, GUMBO_TAG_HEAD, GUMBO_TAG_BODY, GUMBO_TAG_HTML,
2081
- GUMBO_TAG_BR, GUMBO_TAG_LAST)) {
2142
+ } else if (token->type == GUMBO_TOKEN_END_TAG &&
2143
+ !tag_in(token, false,
2144
+ (gumbo_tagset){TAG(HEAD), TAG(BODY), TAG(HTML), TAG(BR)})) {
2082
2145
  parser_add_parse_error(parser, token);
2083
2146
  ignore_token(parser);
2084
2147
  return false;
@@ -2110,9 +2173,9 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
2110
2173
  return true;
2111
2174
  } else if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
2112
2175
  return handle_in_body(parser, token);
2113
- } else if (tag_in(token, kStartTag, GUMBO_TAG_BASE, GUMBO_TAG_BASEFONT,
2114
- GUMBO_TAG_BGSOUND, GUMBO_TAG_MENUITEM, GUMBO_TAG_LINK,
2115
- GUMBO_TAG_LAST)) {
2176
+ } else if (tag_in(token, kStartTag,
2177
+ (gumbo_tagset){TAG(BASE), TAG(BASEFONT), TAG(BGSOUND),
2178
+ TAG(MENUITEM), TAG(LINK)})) {
2116
2179
  insert_element_from_token(parser, token);
2117
2180
  pop_current_node(parser);
2118
2181
  acknowledge_self_closing_tag(parser);
@@ -2129,8 +2192,8 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
2129
2192
  } else if (tag_is(token, kStartTag, GUMBO_TAG_TITLE)) {
2130
2193
  run_generic_parsing_algorithm(parser, token, GUMBO_LEX_RCDATA);
2131
2194
  return true;
2132
- } else if (tag_in(token, kStartTag, GUMBO_TAG_NOFRAMES, GUMBO_TAG_STYLE,
2133
- GUMBO_TAG_LAST)) {
2195
+ } else if (tag_in(
2196
+ token, kStartTag, (gumbo_tagset){TAG(NOFRAMES), TAG(STYLE)})) {
2134
2197
  run_generic_parsing_algorithm(parser, token, GUMBO_LEX_RAWTEXT);
2135
2198
  return true;
2136
2199
  } else if (tag_is(token, kStartTag, GUMBO_TAG_NOSCRIPT)) {
@@ -2143,32 +2206,51 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
2143
2206
  } else if (tag_is(token, kEndTag, GUMBO_TAG_HEAD)) {
2144
2207
  GumboNode* head = pop_current_node(parser);
2145
2208
  AVOID_UNUSED_VARIABLE_WARNING(head);
2146
- assert(node_tag_is(head, GUMBO_TAG_HEAD));
2209
+ assert(node_html_tag_is(head, GUMBO_TAG_HEAD));
2147
2210
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_HEAD);
2148
2211
  return true;
2149
- } else if (tag_is(token, kStartTag, GUMBO_TAG_HEAD)) {
2150
- parser_add_parse_error(parser, token);
2151
- ignore_token(parser);
2152
- return false;
2212
+ } else if (tag_in(token, kEndTag,
2213
+ (gumbo_tagset){TAG(BODY), TAG(HTML), TAG(BR)})) {
2214
+ pop_current_node(parser);
2215
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_HEAD);
2216
+ parser->_parser_state->_reprocess_current_token = true;
2217
+ return true;
2218
+ } else if (tag_is(token, kStartTag, GUMBO_TAG_TEMPLATE)) {
2219
+ insert_element_from_token(parser, token);
2220
+ add_formatting_element(parser, &kActiveFormattingScopeMarker);
2221
+ parser->_parser_state->_frameset_ok = false;
2222
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TEMPLATE);
2223
+ push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TEMPLATE);
2224
+ return true;
2225
+ } else if (tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
2226
+ if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2227
+ parser_add_parse_error(parser, token);
2228
+ ignore_token(parser);
2229
+ return false;
2230
+ }
2231
+ generate_all_implied_end_tags_thoroughly(parser);
2232
+ bool success = true;
2233
+ if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_TEMPLATE)) {
2234
+ parser_add_parse_error(parser, token);
2235
+ success = false;
2236
+ }
2237
+ while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_TEMPLATE))
2238
+ ;
2239
+ clear_active_formatting_elements(parser);
2240
+ pop_template_insertion_mode(parser);
2241
+ reset_insertion_mode_appropriately(parser);
2242
+ return success;
2153
2243
  } else if (tag_is(token, kStartTag, GUMBO_TAG_HEAD) ||
2154
- (token->type == GUMBO_TOKEN_END_TAG &&
2155
- !tag_in(token, kEndTag, GUMBO_TAG_BODY, GUMBO_TAG_HTML,
2156
- GUMBO_TAG_BR, GUMBO_TAG_LAST))) {
2157
- parser_add_parse_error(parser, token);
2158
- return false;
2159
- } else if (tag_is(token, kStartTag, GUMBO_TAG_UNKNOWN) && token->v.start_tag.is_self_closing) {
2244
+ (token->type == GUMBO_TOKEN_END_TAG)) {
2160
2245
  parser_add_parse_error(parser, token);
2161
2246
  ignore_token(parser);
2162
2247
  return false;
2163
2248
  } else {
2164
- const GumboNode* node = pop_current_node(parser);
2165
- assert(node_tag_is(node, GUMBO_TAG_HEAD));
2166
- AVOID_UNUSED_VARIABLE_WARNING(node);
2249
+ pop_current_node(parser);
2167
2250
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_HEAD);
2168
2251
  parser->_parser_state->_reprocess_current_token = true;
2169
2252
  return true;
2170
2253
  }
2171
-
2172
2254
  return true;
2173
2255
  }
2174
2256
 
@@ -2181,27 +2263,27 @@ static bool handle_in_head_noscript(GumboParser* parser, GumboToken* token) {
2181
2263
  return handle_in_body(parser, token);
2182
2264
  } else if (tag_is(token, kEndTag, GUMBO_TAG_NOSCRIPT)) {
2183
2265
  const GumboNode* node = pop_current_node(parser);
2184
- assert(node_tag_is(node, GUMBO_TAG_NOSCRIPT));
2266
+ assert(node_html_tag_is(node, GUMBO_TAG_NOSCRIPT));
2185
2267
  AVOID_UNUSED_VARIABLE_WARNING(node);
2186
2268
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_HEAD);
2187
2269
  return true;
2188
2270
  } else if (token->type == GUMBO_TOKEN_WHITESPACE ||
2189
2271
  token->type == GUMBO_TOKEN_COMMENT ||
2190
- tag_in(token, kStartTag, GUMBO_TAG_BASEFONT, GUMBO_TAG_BGSOUND,
2191
- GUMBO_TAG_LINK, GUMBO_TAG_META, GUMBO_TAG_NOFRAMES,
2192
- GUMBO_TAG_STYLE, GUMBO_TAG_LAST)) {
2272
+ tag_in(token, kStartTag,
2273
+ (gumbo_tagset){TAG(BASEFONT), TAG(BGSOUND), TAG(LINK),
2274
+ TAG(META), TAG(NOFRAMES), TAG(STYLE)})) {
2193
2275
  return handle_in_head(parser, token);
2194
- } else if (tag_in(token, kStartTag, GUMBO_TAG_HEAD, GUMBO_TAG_NOSCRIPT,
2195
- GUMBO_TAG_LAST) ||
2196
- (token->type == GUMBO_TOKEN_END_TAG &&
2197
- !tag_is(token, kEndTag, GUMBO_TAG_BR))) {
2276
+ } else if (tag_in(
2277
+ token, kStartTag, (gumbo_tagset){TAG(HEAD), TAG(NOSCRIPT)}) ||
2278
+ (token->type == GUMBO_TOKEN_END_TAG &&
2279
+ !tag_is(token, kEndTag, GUMBO_TAG_BR))) {
2198
2280
  parser_add_parse_error(parser, token);
2199
2281
  ignore_token(parser);
2200
2282
  return false;
2201
2283
  } else {
2202
2284
  parser_add_parse_error(parser, token);
2203
2285
  const GumboNode* node = pop_current_node(parser);
2204
- assert(node_tag_is(node, GUMBO_TAG_NOSCRIPT));
2286
+ assert(node_html_tag_is(node, GUMBO_TAG_NOSCRIPT));
2205
2287
  AVOID_UNUSED_VARIABLE_WARNING(node);
2206
2288
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_HEAD);
2207
2289
  parser->_parser_state->_reprocess_current_token = true;
@@ -2233,10 +2315,10 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
2233
2315
  insert_element_from_token(parser, token);
2234
2316
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_FRAMESET);
2235
2317
  return true;
2236
- } else if (tag_in(token, kStartTag, GUMBO_TAG_BASE, GUMBO_TAG_BASEFONT,
2237
- GUMBO_TAG_BGSOUND, GUMBO_TAG_LINK, GUMBO_TAG_META,
2238
- GUMBO_TAG_NOFRAMES, GUMBO_TAG_SCRIPT, GUMBO_TAG_STYLE,
2239
- GUMBO_TAG_TITLE, GUMBO_TAG_LAST)) {
2318
+ } else if (tag_in(token, kStartTag,
2319
+ (gumbo_tagset){TAG(BASE), TAG(BASEFONT), TAG(BGSOUND),
2320
+ TAG(LINK), TAG(META), TAG(NOFRAMES), TAG(SCRIPT),
2321
+ TAG(STYLE), TAG(TEMPLATE), TAG(TITLE)})) {
2240
2322
  parser_add_parse_error(parser, token);
2241
2323
  assert(state->_head_element != NULL);
2242
2324
  // This must be flushed before we push the head element on, as there may be
@@ -2246,10 +2328,12 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
2246
2328
  bool result = handle_in_head(parser, token);
2247
2329
  gumbo_vector_remove(parser, state->_head_element, &state->_open_elements);
2248
2330
  return result;
2331
+ } else if (tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
2332
+ return handle_in_head(parser, token);
2249
2333
  } else if (tag_is(token, kStartTag, GUMBO_TAG_HEAD) ||
2250
- (token->type == GUMBO_TOKEN_END_TAG &&
2251
- !tag_in(token, kEndTag, GUMBO_TAG_BODY, GUMBO_TAG_HTML,
2252
- GUMBO_TAG_BR, GUMBO_TAG_LAST))) {
2334
+ (token->type == GUMBO_TOKEN_END_TAG &&
2335
+ !tag_in(token, kEndTag,
2336
+ (gumbo_tagset){TAG(BODY), TAG(HTML), TAG(BR)}))) {
2253
2337
  parser_add_parse_error(parser, token);
2254
2338
  ignore_token(parser);
2255
2339
  return false;
@@ -2263,24 +2347,23 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
2263
2347
 
2264
2348
  static void destroy_node(GumboParser* parser, GumboNode* node) {
2265
2349
  switch (node->type) {
2266
- case GUMBO_NODE_DOCUMENT:
2267
- {
2268
- GumboDocument* doc = &node->v.document;
2269
- for (int i = 0; i < doc->children.length; ++i) {
2270
- destroy_node(parser, doc->children.data[i]);
2271
- }
2272
- gumbo_parser_deallocate(parser, (void*) doc->children.data);
2273
- gumbo_parser_deallocate(parser, (void*) doc->name);
2274
- gumbo_parser_deallocate(parser, (void*) doc->public_identifier);
2275
- gumbo_parser_deallocate(parser, (void*) doc->system_identifier);
2350
+ case GUMBO_NODE_DOCUMENT: {
2351
+ GumboDocument* doc = &node->v.document;
2352
+ for (unsigned int i = 0; i < doc->children.length; ++i) {
2353
+ destroy_node(parser, doc->children.data[i]);
2276
2354
  }
2277
- break;
2355
+ gumbo_parser_deallocate(parser, (void*) doc->children.data);
2356
+ gumbo_parser_deallocate(parser, (void*) doc->name);
2357
+ gumbo_parser_deallocate(parser, (void*) doc->public_identifier);
2358
+ gumbo_parser_deallocate(parser, (void*) doc->system_identifier);
2359
+ } break;
2360
+ case GUMBO_NODE_TEMPLATE:
2278
2361
  case GUMBO_NODE_ELEMENT:
2279
- for (int i = 0; i < node->v.element.attributes.length; ++i) {
2362
+ for (unsigned int i = 0; i < node->v.element.attributes.length; ++i) {
2280
2363
  gumbo_destroy_attribute(parser, node->v.element.attributes.data[i]);
2281
2364
  }
2282
2365
  gumbo_parser_deallocate(parser, node->v.element.attributes.data);
2283
- for (int i = 0; i < node->v.element.children.length; ++i) {
2366
+ for (unsigned int i = 0; i < node->v.element.children.length; ++i) {
2284
2367
  destroy_node(parser, node->v.element.children.data[i]);
2285
2368
  }
2286
2369
  gumbo_parser_deallocate(parser, node->v.element.children.data);
@@ -2307,7 +2390,8 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2307
2390
  reconstruct_active_formatting_elements(parser);
2308
2391
  insert_text_token(parser, token);
2309
2392
  return true;
2310
- } else if (token->type == GUMBO_TOKEN_CHARACTER) {
2393
+ } else if (token->type == GUMBO_TOKEN_CHARACTER ||
2394
+ token->type == GUMBO_TOKEN_CDATA) {
2311
2395
  reconstruct_active_formatting_elements(parser);
2312
2396
  insert_text_token(parser, token);
2313
2397
  set_frameset_not_ok(parser);
@@ -2320,20 +2404,26 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2320
2404
  ignore_token(parser);
2321
2405
  return false;
2322
2406
  } else if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
2407
+ parser_add_parse_error(parser, token);
2408
+ if (has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2409
+ ignore_token(parser);
2410
+ return false;
2411
+ }
2323
2412
  assert(parser->_output->root != NULL);
2324
2413
  assert(parser->_output->root->type == GUMBO_NODE_ELEMENT);
2325
- parser_add_parse_error(parser, token);
2326
2414
  merge_attributes(parser, token, parser->_output->root);
2327
2415
  return false;
2328
- } else if (tag_in(token, kStartTag, GUMBO_TAG_BASE, GUMBO_TAG_BASEFONT,
2329
- GUMBO_TAG_BGSOUND, GUMBO_TAG_MENUITEM, GUMBO_TAG_LINK,
2330
- GUMBO_TAG_META, GUMBO_TAG_NOFRAMES, GUMBO_TAG_SCRIPT,
2331
- GUMBO_TAG_STYLE, GUMBO_TAG_TITLE, GUMBO_TAG_LAST)) {
2416
+ } else if (tag_in(token, kStartTag,
2417
+ (gumbo_tagset){TAG(BASE), TAG(BASEFONT), TAG(BGSOUND),
2418
+ TAG(MENUITEM), TAG(LINK), TAG(META), TAG(NOFRAMES),
2419
+ TAG(SCRIPT), TAG(STYLE), TAG(TEMPLATE), TAG(TITLE)}) ||
2420
+ tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
2332
2421
  return handle_in_head(parser, token);
2333
2422
  } else if (tag_is(token, kStartTag, GUMBO_TAG_BODY)) {
2334
2423
  parser_add_parse_error(parser, token);
2335
2424
  if (state->_open_elements.length < 2 ||
2336
- !node_tag_is(state->_open_elements.data[1], GUMBO_TAG_BODY)) {
2425
+ !node_html_tag_is(state->_open_elements.data[1], GUMBO_TAG_BODY) ||
2426
+ has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2337
2427
  ignore_token(parser);
2338
2428
  return false;
2339
2429
  }
@@ -2343,7 +2433,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2343
2433
  } else if (tag_is(token, kStartTag, GUMBO_TAG_FRAMESET)) {
2344
2434
  parser_add_parse_error(parser, token);
2345
2435
  if (state->_open_elements.length < 2 ||
2346
- !node_tag_is(state->_open_elements.data[1], GUMBO_TAG_BODY) ||
2436
+ !node_html_tag_is(state->_open_elements.data[1], GUMBO_TAG_BODY) ||
2347
2437
  !state->_frameset_ok) {
2348
2438
  ignore_token(parser);
2349
2439
  return false;
@@ -2367,7 +2457,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2367
2457
  // Remove the body node. We may want to factor this out into a generic
2368
2458
  // helper, but right now this is the only code that needs to do this.
2369
2459
  GumboVector* children = &parser->_output->root->v.element.children;
2370
- for (int i = 0; i < children->length; ++i) {
2460
+ for (unsigned int i = 0; i < children->length; ++i) {
2371
2461
  if (children->data[i] == body_node) {
2372
2462
  gumbo_vector_remove_at(parser, i, children);
2373
2463
  break;
@@ -2380,33 +2470,32 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2380
2470
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_FRAMESET);
2381
2471
  return true;
2382
2472
  } else if (token->type == GUMBO_TOKEN_EOF) {
2383
- for (int i = 0; i < state->_open_elements.length; ++i) {
2384
- if (!node_tag_in(state->_open_elements.data[i], GUMBO_TAG_DD,
2385
- GUMBO_TAG_DT, GUMBO_TAG_LI, GUMBO_TAG_P, GUMBO_TAG_TBODY,
2386
- GUMBO_TAG_TD, GUMBO_TAG_TFOOT, GUMBO_TAG_TH,
2387
- GUMBO_TAG_THEAD, GUMBO_TAG_TR, GUMBO_TAG_BODY,
2388
- GUMBO_TAG_HTML, GUMBO_TAG_LAST)) {
2473
+ for (unsigned int i = 0; i < state->_open_elements.length; ++i) {
2474
+ if (!node_tag_in_set(state->_open_elements.data[i],
2475
+ (gumbo_tagset){TAG(DD), TAG(DT), TAG(LI), TAG(P), TAG(TBODY),
2476
+ TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD), TAG(TR), TAG(BODY),
2477
+ TAG(HTML)})) {
2389
2478
  parser_add_parse_error(parser, token);
2390
- return false;
2391
2479
  }
2392
2480
  }
2481
+ if (get_current_template_insertion_mode(parser) !=
2482
+ GUMBO_INSERTION_MODE_INITIAL) {
2483
+ return handle_in_template(parser, token);
2484
+ }
2393
2485
  return true;
2394
- } else if (tag_in(token, kEndTag, GUMBO_TAG_BODY, GUMBO_TAG_HTML,
2395
- GUMBO_TAG_LAST)) {
2486
+ } else if (tag_in(token, kEndTag, (gumbo_tagset){TAG(BODY), TAG(HTML)})) {
2396
2487
  if (!has_an_element_in_scope(parser, GUMBO_TAG_BODY)) {
2397
2488
  parser_add_parse_error(parser, token);
2398
2489
  ignore_token(parser);
2399
2490
  return false;
2400
2491
  }
2401
2492
  bool success = true;
2402
- for (int i = 0; i < state->_open_elements.length; ++i) {
2403
- if (!node_tag_in(state->_open_elements.data[i], GUMBO_TAG_DD,
2404
- GUMBO_TAG_DT, GUMBO_TAG_LI, GUMBO_TAG_OPTGROUP,
2405
- GUMBO_TAG_OPTION, GUMBO_TAG_P, GUMBO_TAG_RP,
2406
- GUMBO_TAG_RT, GUMBO_TAG_TBODY, GUMBO_TAG_TD,
2407
- GUMBO_TAG_TFOOT, GUMBO_TAG_TH, GUMBO_TAG_THEAD,
2408
- GUMBO_TAG_TR, GUMBO_TAG_BODY, GUMBO_TAG_HTML,
2409
- GUMBO_TAG_LAST)) {
2493
+ for (unsigned int i = 0; i < state->_open_elements.length; ++i) {
2494
+ if (!node_tag_in_set(state->_open_elements.data[i],
2495
+ (gumbo_tagset){TAG(DD), TAG(DT), TAG(LI), TAG(OPTGROUP),
2496
+ TAG(OPTION), TAG(P), TAG(RB), TAG(RP), TAG(RT), TAG(RTC),
2497
+ TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD), TAG(TR),
2498
+ TAG(BODY), TAG(HTML)})) {
2410
2499
  parser_add_parse_error(parser, token);
2411
2500
  success = false;
2412
2501
  break;
@@ -2417,58 +2506,58 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2417
2506
  parser->_parser_state->_reprocess_current_token = true;
2418
2507
  } else {
2419
2508
  GumboNode* body = state->_open_elements.data[1];
2420
- assert(node_tag_is(body, GUMBO_TAG_BODY));
2509
+ assert(node_html_tag_is(body, GUMBO_TAG_BODY));
2421
2510
  record_end_of_element(state->_current_token, &body->v.element);
2422
2511
  }
2423
2512
  return success;
2424
- } else if (tag_in(token, kStartTag, GUMBO_TAG_ADDRESS, GUMBO_TAG_ARTICLE,
2425
- GUMBO_TAG_ASIDE, GUMBO_TAG_BLOCKQUOTE, GUMBO_TAG_CENTER,
2426
- GUMBO_TAG_DETAILS, GUMBO_TAG_DIR, GUMBO_TAG_DIV,
2427
- GUMBO_TAG_DL, GUMBO_TAG_FIELDSET, GUMBO_TAG_FIGCAPTION,
2428
- GUMBO_TAG_FIGURE, GUMBO_TAG_FOOTER, GUMBO_TAG_HEADER,
2429
- GUMBO_TAG_HGROUP, GUMBO_TAG_MENU, GUMBO_TAG_NAV,
2430
- GUMBO_TAG_OL, GUMBO_TAG_P, GUMBO_TAG_SECTION,
2431
- GUMBO_TAG_SUMMARY, GUMBO_TAG_UL, GUMBO_TAG_LAST)) {
2513
+ } else if (tag_in(token, kStartTag,
2514
+ (gumbo_tagset){TAG(ADDRESS), TAG(ARTICLE), TAG(ASIDE),
2515
+ TAG(BLOCKQUOTE), TAG(CENTER), TAG(DETAILS), TAG(DIR),
2516
+ TAG(DIV), TAG(DL), TAG(FIELDSET), TAG(FIGCAPTION),
2517
+ TAG(FIGURE), TAG(FOOTER), TAG(HEADER), TAG(HGROUP),
2518
+ TAG(MENU), TAG(MAIN), TAG(NAV), TAG(OL), TAG(P),
2519
+ TAG(SECTION), TAG(SUMMARY), TAG(UL)})) {
2432
2520
  bool result = maybe_implicitly_close_p_tag(parser, token);
2433
2521
  insert_element_from_token(parser, token);
2434
2522
  return result;
2435
- } else if (tag_in(token, kStartTag, GUMBO_TAG_H1, GUMBO_TAG_H2, GUMBO_TAG_H3,
2436
- GUMBO_TAG_H4, GUMBO_TAG_H5, GUMBO_TAG_H6, GUMBO_TAG_LAST)) {
2523
+ } else if (tag_in(token, kStartTag, (gumbo_tagset){TAG(H1), TAG(H2), TAG(H3),
2524
+ TAG(H4), TAG(H5), TAG(H6)})) {
2437
2525
  bool result = maybe_implicitly_close_p_tag(parser, token);
2438
- if (node_tag_in(get_current_node(parser), GUMBO_TAG_H1, GUMBO_TAG_H2,
2439
- GUMBO_TAG_H3, GUMBO_TAG_H4, GUMBO_TAG_H5, GUMBO_TAG_H6,
2440
- GUMBO_TAG_LAST)) {
2526
+ if (node_tag_in_set(
2527
+ get_current_node(parser), (gumbo_tagset){TAG(H1), TAG(H2), TAG(H3),
2528
+ TAG(H4), TAG(H5), TAG(H6)})) {
2441
2529
  parser_add_parse_error(parser, token);
2442
2530
  pop_current_node(parser);
2443
2531
  result = false;
2444
2532
  }
2445
2533
  insert_element_from_token(parser, token);
2446
2534
  return result;
2447
- } else if (tag_in(token, kStartTag, GUMBO_TAG_PRE, GUMBO_TAG_LISTING,
2448
- GUMBO_TAG_LAST)) {
2535
+ } else if (tag_in(token, kStartTag, (gumbo_tagset){TAG(PRE), TAG(LISTING)})) {
2449
2536
  bool result = maybe_implicitly_close_p_tag(parser, token);
2450
2537
  insert_element_from_token(parser, token);
2451
2538
  state->_ignore_next_linefeed = true;
2452
2539
  state->_frameset_ok = false;
2453
2540
  return result;
2454
2541
  } else if (tag_is(token, kStartTag, GUMBO_TAG_FORM)) {
2455
- if (state->_form_element != NULL) {
2542
+ if (state->_form_element != NULL &&
2543
+ !has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2456
2544
  gumbo_debug("Ignoring nested form.\n");
2457
2545
  parser_add_parse_error(parser, token);
2458
2546
  ignore_token(parser);
2459
2547
  return false;
2460
2548
  }
2461
2549
  bool result = maybe_implicitly_close_p_tag(parser, token);
2462
- state->_form_element =
2463
- insert_element_from_token(parser, token);
2550
+ GumboNode* form_element = insert_element_from_token(parser, token);
2551
+ if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2552
+ state->_form_element = form_element;
2553
+ }
2464
2554
  return result;
2465
2555
  } else if (tag_is(token, kStartTag, GUMBO_TAG_LI)) {
2466
2556
  maybe_implicitly_close_list_tag(parser, token, true);
2467
2557
  bool result = maybe_implicitly_close_p_tag(parser, token);
2468
2558
  insert_element_from_token(parser, token);
2469
2559
  return result;
2470
- } else if (tag_in(token, kStartTag, GUMBO_TAG_DD, GUMBO_TAG_DT,
2471
- GUMBO_TAG_LAST)) {
2560
+ } else if (tag_in(token, kStartTag, (gumbo_tagset){TAG(DD), TAG(DT)})) {
2472
2561
  maybe_implicitly_close_list_tag(parser, token, false);
2473
2562
  bool result = maybe_implicitly_close_p_tag(parser, token);
2474
2563
  insert_element_from_token(parser, token);
@@ -2481,7 +2570,8 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2481
2570
  } else if (tag_is(token, kStartTag, GUMBO_TAG_BUTTON)) {
2482
2571
  if (has_an_element_in_scope(parser, GUMBO_TAG_BUTTON)) {
2483
2572
  parser_add_parse_error(parser, token);
2484
- implicitly_close_tags(parser, token, GUMBO_TAG_BUTTON);
2573
+ implicitly_close_tags(
2574
+ parser, token, GUMBO_NAMESPACE_HTML, GUMBO_TAG_BUTTON);
2485
2575
  state->_reprocess_current_token = true;
2486
2576
  return false;
2487
2577
  }
@@ -2489,67 +2579,83 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2489
2579
  insert_element_from_token(parser, token);
2490
2580
  state->_frameset_ok = false;
2491
2581
  return true;
2492
- } else if (tag_in(token, kEndTag, GUMBO_TAG_ADDRESS, GUMBO_TAG_ARTICLE,
2493
- GUMBO_TAG_ASIDE, GUMBO_TAG_BLOCKQUOTE, GUMBO_TAG_BUTTON,
2494
- GUMBO_TAG_CENTER, GUMBO_TAG_DETAILS, GUMBO_TAG_DIR,
2495
- GUMBO_TAG_DIV, GUMBO_TAG_DL, GUMBO_TAG_FIELDSET,
2496
- GUMBO_TAG_FIGCAPTION, GUMBO_TAG_FIGURE, GUMBO_TAG_FOOTER,
2497
- GUMBO_TAG_HEADER, GUMBO_TAG_HGROUP, GUMBO_TAG_LISTING,
2498
- GUMBO_TAG_MENU, GUMBO_TAG_NAV, GUMBO_TAG_OL, GUMBO_TAG_PRE,
2499
- GUMBO_TAG_SECTION, GUMBO_TAG_SUMMARY, GUMBO_TAG_UL,
2500
- GUMBO_TAG_LAST)) {
2582
+ } else if (tag_in(token, kEndTag,
2583
+ (gumbo_tagset){TAG(ADDRESS), TAG(ARTICLE), TAG(ASIDE),
2584
+ TAG(BLOCKQUOTE), TAG(BUTTON), TAG(CENTER), TAG(DETAILS),
2585
+ TAG(DIR), TAG(DIV), TAG(DL), TAG(FIELDSET),
2586
+ TAG(FIGCAPTION), TAG(FIGURE), TAG(FOOTER), TAG(HEADER),
2587
+ TAG(HGROUP), TAG(LISTING), TAG(MAIN), TAG(MENU), TAG(NAV),
2588
+ TAG(OL), TAG(PRE), TAG(SECTION), TAG(SUMMARY), TAG(UL)})) {
2501
2589
  GumboTag tag = token->v.end_tag;
2502
2590
  if (!has_an_element_in_scope(parser, tag)) {
2503
2591
  parser_add_parse_error(parser, token);
2504
2592
  ignore_token(parser);
2505
2593
  return false;
2506
2594
  }
2507
- implicitly_close_tags(parser, token, token->v.end_tag);
2595
+ implicitly_close_tags(
2596
+ parser, token, GUMBO_NAMESPACE_HTML, token->v.end_tag);
2508
2597
  return true;
2509
2598
  } else if (tag_is(token, kEndTag, GUMBO_TAG_FORM)) {
2510
- bool result = true;
2511
- const GumboNode* node = state->_form_element;
2512
- assert(!node || node->type == GUMBO_NODE_ELEMENT);
2513
- state->_form_element = NULL;
2514
- if (!node || !has_node_in_scope(parser, node)) {
2515
- gumbo_debug("Closing an unopened form.\n");
2516
- parser_add_parse_error(parser, token);
2517
- ignore_token(parser);
2518
- return false;
2519
- }
2520
- // This differs from implicitly_close_tags because we remove *only* the
2521
- // <form> element; other nodes are left in scope.
2522
- generate_implied_end_tags(parser, GUMBO_TAG_LAST);
2523
- if (get_current_node(parser) != node) {
2524
- parser_add_parse_error(parser, token);
2525
- result = false;
2526
- }
2599
+ if (has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2600
+ if (!has_an_element_in_scope(parser, GUMBO_TAG_FORM)) {
2601
+ parser_add_parse_error(parser, token);
2602
+ ignore_token(parser);
2603
+ return false;
2604
+ }
2605
+ bool success = true;
2606
+ generate_implied_end_tags(parser, GUMBO_TAG_LAST);
2607
+ if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_FORM)) {
2608
+ parser_add_parse_error(parser, token);
2609
+ return false;
2610
+ }
2611
+ while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_FORM))
2612
+ ;
2613
+ return success;
2614
+ } else {
2615
+ bool result = true;
2616
+ const GumboNode* node = state->_form_element;
2617
+ assert(!node || node->type == GUMBO_NODE_ELEMENT);
2618
+ state->_form_element = NULL;
2619
+ if (!node || !has_node_in_scope(parser, node)) {
2620
+ gumbo_debug("Closing an unopened form.\n");
2621
+ parser_add_parse_error(parser, token);
2622
+ ignore_token(parser);
2623
+ return false;
2624
+ }
2625
+ // This differs from implicitly_close_tags because we remove *only* the
2626
+ // <form> element; other nodes are left in scope.
2627
+ generate_implied_end_tags(parser, GUMBO_TAG_LAST);
2628
+ if (get_current_node(parser) != node) {
2629
+ parser_add_parse_error(parser, token);
2630
+ result = false;
2631
+ }
2527
2632
 
2528
- GumboVector* open_elements = &state->_open_elements;
2529
- int index = open_elements->length - 1;
2530
- for (; index >= 0 && open_elements->data[index] != node; --index);
2531
- assert(index >= 0);
2532
- gumbo_vector_remove_at(parser, index, open_elements);
2533
- return result;
2633
+ GumboVector* open_elements = &state->_open_elements;
2634
+ int index = gumbo_vector_index_of(open_elements, node);
2635
+ assert(index >= 0);
2636
+ gumbo_vector_remove_at(parser, index, open_elements);
2637
+ return result;
2638
+ }
2534
2639
  } else if (tag_is(token, kEndTag, GUMBO_TAG_P)) {
2535
2640
  if (!has_an_element_in_button_scope(parser, GUMBO_TAG_P)) {
2536
2641
  parser_add_parse_error(parser, token);
2537
- reconstruct_active_formatting_elements(parser);
2642
+ // reconstruct_active_formatting_elements(parser);
2538
2643
  insert_element_of_tag_type(
2539
2644
  parser, GUMBO_TAG_P, GUMBO_INSERTION_CONVERTED_FROM_END_TAG);
2540
2645
  state->_reprocess_current_token = true;
2541
2646
  return false;
2542
2647
  }
2543
- return implicitly_close_tags(parser, token, GUMBO_TAG_P);
2648
+ return implicitly_close_tags(
2649
+ parser, token, GUMBO_NAMESPACE_HTML, GUMBO_TAG_P);
2544
2650
  } else if (tag_is(token, kEndTag, GUMBO_TAG_LI)) {
2545
2651
  if (!has_an_element_in_list_scope(parser, GUMBO_TAG_LI)) {
2546
2652
  parser_add_parse_error(parser, token);
2547
2653
  ignore_token(parser);
2548
2654
  return false;
2549
2655
  }
2550
- return implicitly_close_tags(parser, token, GUMBO_TAG_LI);
2551
- } else if (tag_in(token, kEndTag, GUMBO_TAG_DD, GUMBO_TAG_DT,
2552
- GUMBO_TAG_LAST)) {
2656
+ return implicitly_close_tags(
2657
+ parser, token, GUMBO_NAMESPACE_HTML, GUMBO_TAG_LI);
2658
+ } else if (tag_in(token, kEndTag, (gumbo_tagset){TAG(DD), TAG(DT)})) {
2553
2659
  assert(token->type == GUMBO_TOKEN_END_TAG);
2554
2660
  GumboTag token_tag = token->v.end_tag;
2555
2661
  if (!has_an_element_in_scope(parser, token_tag)) {
@@ -2557,12 +2663,13 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2557
2663
  ignore_token(parser);
2558
2664
  return false;
2559
2665
  }
2560
- return implicitly_close_tags(parser, token, token_tag);
2561
- } else if (tag_in(token, kEndTag, GUMBO_TAG_H1, GUMBO_TAG_H2, GUMBO_TAG_H3,
2562
- GUMBO_TAG_H4, GUMBO_TAG_H5, GUMBO_TAG_H6, GUMBO_TAG_LAST)) {
2666
+ return implicitly_close_tags(
2667
+ parser, token, GUMBO_NAMESPACE_HTML, token_tag);
2668
+ } else if (tag_in(token, kEndTag, (gumbo_tagset){TAG(H1), TAG(H2), TAG(H3),
2669
+ TAG(H4), TAG(H5), TAG(H6)})) {
2563
2670
  if (!has_an_element_in_scope_with_tagname(
2564
- parser, GUMBO_TAG_H1, GUMBO_TAG_H2, GUMBO_TAG_H3, GUMBO_TAG_H4,
2565
- GUMBO_TAG_H5, GUMBO_TAG_H6, GUMBO_TAG_LAST)) {
2671
+ parser, 6, (GumboTag[]){GUMBO_TAG_H1, GUMBO_TAG_H2, GUMBO_TAG_H3,
2672
+ GUMBO_TAG_H4, GUMBO_TAG_H5, GUMBO_TAG_H6})) {
2566
2673
  // No heading open; ignore the token entirely.
2567
2674
  parser_add_parse_error(parser, token);
2568
2675
  ignore_token(parser);
@@ -2570,7 +2677,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2570
2677
  } else {
2571
2678
  generate_implied_end_tags(parser, GUMBO_TAG_LAST);
2572
2679
  const GumboNode* current_node = get_current_node(parser);
2573
- bool success = node_tag_is(current_node, token->v.end_tag);
2680
+ bool success = node_html_tag_is(current_node, token->v.end_tag);
2574
2681
  if (!success) {
2575
2682
  // There're children of the heading currently open; close them below and
2576
2683
  // record a parse error.
@@ -2580,9 +2687,9 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2580
2687
  }
2581
2688
  do {
2582
2689
  current_node = pop_current_node(parser);
2583
- } while (!node_tag_in(current_node, GUMBO_TAG_H1, GUMBO_TAG_H2,
2584
- GUMBO_TAG_H3, GUMBO_TAG_H4, GUMBO_TAG_H5,
2585
- GUMBO_TAG_H6, GUMBO_TAG_LAST));
2690
+ } while (!node_tag_in_set(
2691
+ current_node, (gumbo_tagset){TAG(H1), TAG(H2), TAG(H3),
2692
+ TAG(H4), TAG(H5), TAG(H6)}));
2586
2693
  return success;
2587
2694
  }
2588
2695
  } else if (tag_is(token, kStartTag, GUMBO_TAG_A)) {
@@ -2600,19 +2707,17 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2600
2707
  if (find_last_anchor_index(parser, &last_a)) {
2601
2708
  void* last_element = gumbo_vector_remove_at(
2602
2709
  parser, last_a, &state->_active_formatting_elements);
2603
- gumbo_vector_remove(
2604
- parser, last_element, &state->_open_elements);
2710
+ gumbo_vector_remove(parser, last_element, &state->_open_elements);
2605
2711
  }
2606
2712
  success = false;
2607
2713
  }
2608
2714
  reconstruct_active_formatting_elements(parser);
2609
2715
  add_formatting_element(parser, insert_element_from_token(parser, token));
2610
2716
  return success;
2611
- } else if (tag_in(token, kStartTag, GUMBO_TAG_B, GUMBO_TAG_BIG,
2612
- GUMBO_TAG_CODE, GUMBO_TAG_EM, GUMBO_TAG_FONT, GUMBO_TAG_I,
2613
- GUMBO_TAG_S, GUMBO_TAG_SMALL, GUMBO_TAG_STRIKE,
2614
- GUMBO_TAG_STRONG, GUMBO_TAG_TT, GUMBO_TAG_U,
2615
- GUMBO_TAG_LAST)) {
2717
+ } else if (tag_in(token, kStartTag,
2718
+ (gumbo_tagset){TAG(B), TAG(BIG), TAG(CODE), TAG(EM), TAG(FONT),
2719
+ TAG(I), TAG(S), TAG(SMALL), TAG(STRIKE), TAG(STRONG),
2720
+ TAG(TT), TAG(U)})) {
2616
2721
  reconstruct_active_formatting_elements(parser);
2617
2722
  add_formatting_element(parser, insert_element_from_token(parser, token));
2618
2723
  return true;
@@ -2628,28 +2733,27 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2628
2733
  insert_element_from_token(parser, token);
2629
2734
  add_formatting_element(parser, get_current_node(parser));
2630
2735
  return result;
2631
- } else if (tag_in(token, kEndTag, GUMBO_TAG_A, GUMBO_TAG_B, GUMBO_TAG_BIG,
2632
- GUMBO_TAG_CODE, GUMBO_TAG_EM, GUMBO_TAG_FONT, GUMBO_TAG_I,
2633
- GUMBO_TAG_NOBR, GUMBO_TAG_S, GUMBO_TAG_SMALL,
2634
- GUMBO_TAG_STRIKE, GUMBO_TAG_STRONG, GUMBO_TAG_TT,
2635
- GUMBO_TAG_U, GUMBO_TAG_LAST)) {
2736
+ } else if (tag_in(token, kEndTag,
2737
+ (gumbo_tagset){TAG(A), TAG(B), TAG(BIG), TAG(CODE), TAG(EM),
2738
+ TAG(FONT), TAG(I), TAG(NOBR), TAG(S), TAG(SMALL),
2739
+ TAG(STRIKE), TAG(STRONG), TAG(TT), TAG(U)})) {
2636
2740
  return adoption_agency_algorithm(parser, token, token->v.end_tag);
2637
- } else if (tag_in(token, kStartTag, GUMBO_TAG_APPLET, GUMBO_TAG_MARQUEE,
2638
- GUMBO_TAG_OBJECT, GUMBO_TAG_LAST)) {
2741
+ } else if (tag_in(token, kStartTag,
2742
+ (gumbo_tagset){TAG(APPLET), TAG(MARQUEE), TAG(OBJECT)})) {
2639
2743
  reconstruct_active_formatting_elements(parser);
2640
2744
  insert_element_from_token(parser, token);
2641
2745
  add_formatting_element(parser, &kActiveFormattingScopeMarker);
2642
2746
  set_frameset_not_ok(parser);
2643
2747
  return true;
2644
- } else if (tag_in(token, kEndTag, GUMBO_TAG_APPLET, GUMBO_TAG_MARQUEE,
2645
- GUMBO_TAG_OBJECT, GUMBO_TAG_LAST)) {
2748
+ } else if (tag_in(token, kEndTag,
2749
+ (gumbo_tagset){TAG(APPLET), TAG(MARQUEE), TAG(OBJECT)})) {
2646
2750
  GumboTag token_tag = token->v.end_tag;
2647
2751
  if (!has_an_element_in_table_scope(parser, token_tag)) {
2648
2752
  parser_add_parse_error(parser, token);
2649
2753
  ignore_token(parser);
2650
2754
  return false;
2651
2755
  }
2652
- implicitly_close_tags(parser, token, token_tag);
2756
+ implicitly_close_tags(parser, token, GUMBO_NAMESPACE_HTML, token_tag);
2653
2757
  clear_active_formatting_elements(parser);
2654
2758
  return true;
2655
2759
  } else if (tag_is(token, kStartTag, GUMBO_TAG_TABLE)) {
@@ -2661,9 +2765,9 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2661
2765
  set_frameset_not_ok(parser);
2662
2766
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
2663
2767
  return true;
2664
- } else if (tag_in(token, kStartTag, GUMBO_TAG_AREA, GUMBO_TAG_BR,
2665
- GUMBO_TAG_EMBED, GUMBO_TAG_IMG, GUMBO_TAG_IMAGE,
2666
- GUMBO_TAG_KEYGEN, GUMBO_TAG_WBR, GUMBO_TAG_LAST)) {
2768
+ } else if (tag_in(token, kStartTag,
2769
+ (gumbo_tagset){TAG(AREA), TAG(BR), TAG(EMBED), TAG(IMG),
2770
+ TAG(IMAGE), TAG(KEYGEN), TAG(WBR)})) {
2667
2771
  bool success = true;
2668
2772
  if (tag_is(token, kStartTag, GUMBO_TAG_IMAGE)) {
2669
2773
  success = false;
@@ -2693,8 +2797,8 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2693
2797
  pop_current_node(parser);
2694
2798
  acknowledge_self_closing_tag(parser);
2695
2799
  return true;
2696
- } else if (tag_in(token, kStartTag, GUMBO_TAG_PARAM, GUMBO_TAG_SOURCE,
2697
- GUMBO_TAG_TRACK, GUMBO_TAG_LAST)) {
2800
+ } else if (tag_in(token, kStartTag,
2801
+ (gumbo_tagset){TAG(PARAM), TAG(SOURCE), TAG(TRACK)})) {
2698
2802
  insert_element_from_token(parser, token);
2699
2803
  pop_current_node(parser);
2700
2804
  acknowledge_self_closing_tag(parser);
@@ -2708,7 +2812,8 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2708
2812
  return result;
2709
2813
  } else if (tag_is(token, kStartTag, GUMBO_TAG_ISINDEX)) {
2710
2814
  parser_add_parse_error(parser, token);
2711
- if (parser->_parser_state->_form_element != NULL) {
2815
+ if (parser->_parser_state->_form_element != NULL &&
2816
+ !has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2712
2817
  ignore_token(parser);
2713
2818
  return false;
2714
2819
  }
@@ -2723,15 +2828,18 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2723
2828
 
2724
2829
  GumboNode* form = insert_element_of_tag_type(
2725
2830
  parser, GUMBO_TAG_FORM, GUMBO_INSERTION_FROM_ISINDEX);
2831
+ if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2832
+ parser->_parser_state->_form_element = form;
2833
+ }
2726
2834
  if (action_attr) {
2727
2835
  gumbo_vector_add(parser, action_attr, &form->v.element.attributes);
2728
2836
  }
2729
- insert_element_of_tag_type(parser, GUMBO_TAG_HR,
2730
- GUMBO_INSERTION_FROM_ISINDEX);
2731
- pop_current_node(parser); // <hr>
2837
+ insert_element_of_tag_type(
2838
+ parser, GUMBO_TAG_HR, GUMBO_INSERTION_FROM_ISINDEX);
2839
+ pop_current_node(parser); // <hr>
2732
2840
 
2733
- insert_element_of_tag_type(parser, GUMBO_TAG_LABEL,
2734
- GUMBO_INSERTION_FROM_ISINDEX);
2841
+ insert_element_of_tag_type(
2842
+ parser, GUMBO_TAG_LABEL, GUMBO_INSERTION_FROM_ISINDEX);
2735
2843
  TextNodeBufferState* text_state = &parser->_parser_state->_text_node;
2736
2844
  text_state->_start_original_text = token->original_text.data;
2737
2845
  text_state->_start_position = token->position;
@@ -2744,15 +2852,15 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2744
2852
  text_state->_buffer.capacity = prompt_attr_length + 1;
2745
2853
  gumbo_destroy_attribute(parser, prompt_attr);
2746
2854
  } else {
2747
- GumboStringPiece prompt_text = GUMBO_STRING(
2748
- "This is a searchable index. Enter search keywords: ");
2855
+ GumboStringPiece prompt_text =
2856
+ GUMBO_STRING("This is a searchable index. Enter search keywords: ");
2749
2857
  gumbo_string_buffer_append_string(
2750
2858
  parser, &prompt_text, &text_state->_buffer);
2751
2859
  }
2752
2860
 
2753
2861
  GumboNode* input = insert_element_of_tag_type(
2754
2862
  parser, GUMBO_TAG_INPUT, GUMBO_INSERTION_FROM_ISINDEX);
2755
- for (int i = 0; i < token_attrs->length; ++i) {
2863
+ for (unsigned int i = 0; i < token_attrs->length; ++i) {
2756
2864
  GumboAttribute* attr = token_attrs->data[i];
2757
2865
  if (attr != prompt_attr && attr != action_attr && attr != name_attr) {
2758
2866
  gumbo_vector_add(parser, attr, &input->v.element.attributes);
@@ -2765,6 +2873,13 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2765
2873
  // touching the attributes.
2766
2874
  ignore_token(parser);
2767
2875
 
2876
+ // The name attribute, if present, should be destroyed since it's ignored
2877
+ // when copying over. The action attribute should be kept since it's moved
2878
+ // to the form.
2879
+ if (name_attr) {
2880
+ gumbo_destroy_attribute(parser, name_attr);
2881
+ }
2882
+
2768
2883
  GumboAttribute* name =
2769
2884
  gumbo_parser_allocate(parser, sizeof(GumboAttribute));
2770
2885
  GumboStringPiece name_str = GUMBO_STRING("name");
@@ -2780,12 +2895,15 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2780
2895
  name->value_end = kGumboEmptySourcePosition;
2781
2896
  gumbo_vector_add(parser, name, &input->v.element.attributes);
2782
2897
 
2783
- pop_current_node(parser); // <input>
2784
- pop_current_node(parser); // <label>
2898
+ pop_current_node(parser); // <input>
2899
+ pop_current_node(parser); // <label>
2785
2900
  insert_element_of_tag_type(
2786
2901
  parser, GUMBO_TAG_HR, GUMBO_INSERTION_FROM_ISINDEX);
2787
- pop_current_node(parser); // <hr>
2788
- pop_current_node(parser); // <form>
2902
+ pop_current_node(parser); // <hr>
2903
+ pop_current_node(parser); // <form>
2904
+ if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2905
+ parser->_parser_state->_form_element = NULL;
2906
+ }
2789
2907
  return false;
2790
2908
  } else if (tag_is(token, kStartTag, GUMBO_TAG_TEXTAREA)) {
2791
2909
  run_generic_parsing_algorithm(parser, token, GUMBO_LEX_RCDATA);
@@ -2820,21 +2938,27 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2820
2938
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_SELECT);
2821
2939
  }
2822
2940
  return true;
2823
- } else if (tag_in(token, kStartTag, GUMBO_TAG_OPTION, GUMBO_TAG_OPTGROUP,
2824
- GUMBO_TAG_LAST)) {
2825
- if (node_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)) {
2941
+ } else if (tag_in(token, kStartTag,
2942
+ (gumbo_tagset){TAG(OPTION), TAG(OPTGROUP)})) {
2943
+ if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)) {
2826
2944
  pop_current_node(parser);
2827
2945
  }
2828
2946
  reconstruct_active_formatting_elements(parser);
2829
2947
  insert_element_from_token(parser, token);
2830
2948
  return true;
2831
- } else if (tag_in(token, kStartTag, GUMBO_TAG_RP, GUMBO_TAG_RT,
2832
- GUMBO_TAG_LAST)) {
2949
+ } else if (tag_in(token, kStartTag,
2950
+ (gumbo_tagset){TAG(RB), TAG(RP), TAG(RT), TAG(RTC)})) {
2833
2951
  bool success = true;
2952
+ GumboTag exception =
2953
+ tag_in(token, kStartTag, (gumbo_tagset){TAG(RT), TAG(RP)})
2954
+ ? GUMBO_TAG_RTC
2955
+ : GUMBO_TAG_LAST;
2834
2956
  if (has_an_element_in_scope(parser, GUMBO_TAG_RUBY)) {
2835
- generate_implied_end_tags(parser, GUMBO_TAG_LAST);
2957
+ generate_implied_end_tags(parser, exception);
2836
2958
  }
2837
- if (!node_tag_is(get_current_node(parser), GUMBO_TAG_RUBY)) {
2959
+ if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_RUBY) &&
2960
+ !(exception == GUMBO_TAG_LAST ||
2961
+ node_html_tag_is(get_current_node(parser), GUMBO_TAG_RTC))) {
2838
2962
  parser_add_parse_error(parser, token);
2839
2963
  success = false;
2840
2964
  }
@@ -2867,11 +2991,10 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2867
2991
  acknowledge_self_closing_tag(parser);
2868
2992
  }
2869
2993
  return true;
2870
- } else if (tag_in(token, kStartTag, GUMBO_TAG_CAPTION, GUMBO_TAG_COL,
2871
- GUMBO_TAG_COLGROUP, GUMBO_TAG_FRAME, GUMBO_TAG_HEAD,
2872
- GUMBO_TAG_TBODY, GUMBO_TAG_TD, GUMBO_TAG_TFOOT,
2873
- GUMBO_TAG_TH, GUMBO_TAG_THEAD, GUMBO_TAG_TR,
2874
- GUMBO_TAG_LAST)) {
2994
+ } else if (tag_in(token, kStartTag,
2995
+ (gumbo_tagset){TAG(CAPTION), TAG(COL), TAG(COLGROUP),
2996
+ TAG(FRAME), TAG(HEAD), TAG(TBODY), TAG(TD), TAG(TFOOT),
2997
+ TAG(TH), TAG(THEAD), TAG(TR)})) {
2875
2998
  parser_add_parse_error(parser, token);
2876
2999
  ignore_token(parser);
2877
3000
  return false;
@@ -2883,22 +3006,22 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2883
3006
  assert(token->type == GUMBO_TOKEN_END_TAG);
2884
3007
  GumboTag end_tag = token->v.end_tag;
2885
3008
  assert(state->_open_elements.length > 0);
2886
- assert(node_tag_is(state->_open_elements.data[0], GUMBO_TAG_HTML));
3009
+ assert(node_html_tag_is(state->_open_elements.data[0], GUMBO_TAG_HTML));
2887
3010
  // Walk up the stack of open elements until we find one that either:
2888
3011
  // a) Matches the tag name we saw
2889
3012
  // b) Is in the "special" category.
2890
3013
  // If we see a), implicitly close everything up to and including it. If we
2891
3014
  // see b), then record a parse error, don't close anything (except the
2892
3015
  // implied end tags) and ignore the end tag token.
2893
- for (int i = state->_open_elements.length; --i >= 0; ) {
3016
+ for (int i = state->_open_elements.length; --i >= 0;) {
2894
3017
  const GumboNode* node = state->_open_elements.data[i];
2895
- if (node->v.element.tag_namespace == GUMBO_NAMESPACE_HTML &&
2896
- node_tag_is(node, end_tag)) {
3018
+ if (node_html_tag_is(node, end_tag)) {
2897
3019
  generate_implied_end_tags(parser, end_tag);
2898
3020
  // TODO(jdtang): Do I need to add a parse error here? The condition in
2899
3021
  // the spec seems like it's the inverse of the loop condition above, and
2900
3022
  // so would never fire.
2901
- while (node != pop_current_node(parser)); // Pop everything.
3023
+ while (node != pop_current_node(parser))
3024
+ ; // Pop everything.
2902
3025
  return true;
2903
3026
  } else if (is_special_node(node)) {
2904
3027
  parser_add_parse_error(parser, token);
@@ -2914,7 +3037,8 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2914
3037
 
2915
3038
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-incdata
2916
3039
  static bool handle_text(GumboParser* parser, GumboToken* token) {
2917
- if (token->type == GUMBO_TOKEN_CHARACTER || token->type == GUMBO_TOKEN_WHITESPACE) {
3040
+ if (token->type == GUMBO_TOKEN_CHARACTER ||
3041
+ token->type == GUMBO_TOKEN_WHITESPACE) {
2918
3042
  insert_text_token(parser, token);
2919
3043
  } else {
2920
3044
  // We provide only bare-bones script handling that doesn't involve any of
@@ -2974,13 +3098,12 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
2974
3098
  parser->_parser_state->_reprocess_current_token = true;
2975
3099
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_COLUMN_GROUP);
2976
3100
  return true;
2977
- } else if (tag_in(token, kStartTag, GUMBO_TAG_TBODY, GUMBO_TAG_TFOOT,
2978
- GUMBO_TAG_THEAD, GUMBO_TAG_TD, GUMBO_TAG_TH, GUMBO_TAG_TR,
2979
- GUMBO_TAG_LAST)) {
3101
+ } else if (tag_in(token, kStartTag,
3102
+ (gumbo_tagset){TAG(TBODY), TAG(TFOOT), TAG(THEAD), TAG(TD),
3103
+ TAG(TH), TAG(TR)})) {
2980
3104
  clear_stack_to_table_context(parser);
2981
3105
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
2982
- if (tag_in(token, kStartTag, GUMBO_TAG_TD, GUMBO_TAG_TH, GUMBO_TAG_TR,
2983
- GUMBO_TAG_LAST)) {
3106
+ if (tag_in(token, kStartTag, (gumbo_tagset){TAG(TD), TAG(TH), TAG(TR)})) {
2984
3107
  insert_element_of_tag_type(
2985
3108
  parser, GUMBO_TAG_TBODY, GUMBO_INSERTION_IMPLIED);
2986
3109
  state->_reprocess_current_token = true;
@@ -3002,27 +3125,27 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
3002
3125
  return false;
3003
3126
  }
3004
3127
  return true;
3005
- } else if (tag_in(token, kEndTag, GUMBO_TAG_BODY, GUMBO_TAG_CAPTION,
3006
- GUMBO_TAG_COL, GUMBO_TAG_COLGROUP, GUMBO_TAG_HTML,
3007
- GUMBO_TAG_TBODY, GUMBO_TAG_TD, GUMBO_TAG_TFOOT,
3008
- GUMBO_TAG_TH, GUMBO_TAG_THEAD, GUMBO_TAG_TR,
3009
- GUMBO_TAG_LAST)) {
3128
+ } else if (tag_in(token, kEndTag,
3129
+ (gumbo_tagset){TAG(BODY), TAG(CAPTION), TAG(COL),
3130
+ TAG(COLGROUP), TAG(HTML), TAG(TBODY), TAG(TD), TAG(TFOOT),
3131
+ TAG(TH), TAG(THEAD), TAG(TR)})) {
3010
3132
  parser_add_parse_error(parser, token);
3011
3133
  ignore_token(parser);
3012
3134
  return false;
3013
- } else if (tag_in(token, kStartTag, GUMBO_TAG_STYLE, GUMBO_TAG_SCRIPT,
3014
- GUMBO_TAG_LAST)) {
3135
+ } else if (tag_in(token, kStartTag,
3136
+ (gumbo_tagset){TAG(STYLE), TAG(SCRIPT), TAG(TEMPLATE)}) ||
3137
+ (tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE))) {
3015
3138
  return handle_in_head(parser, token);
3016
3139
  } else if (tag_is(token, kStartTag, GUMBO_TAG_INPUT) &&
3017
- attribute_matches(&token->v.start_tag.attributes,
3018
- "type", "hidden")) {
3140
+ attribute_matches(
3141
+ &token->v.start_tag.attributes, "type", "hidden")) {
3019
3142
  parser_add_parse_error(parser, token);
3020
3143
  insert_element_from_token(parser, token);
3021
3144
  pop_current_node(parser);
3022
3145
  return false;
3023
3146
  } else if (tag_is(token, kStartTag, GUMBO_TAG_FORM)) {
3024
3147
  parser_add_parse_error(parser, token);
3025
- if (state->_form_element) {
3148
+ if (state->_form_element || has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
3026
3149
  ignore_token(parser);
3027
3150
  return false;
3028
3151
  }
@@ -3030,11 +3153,7 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
3030
3153
  pop_current_node(parser);
3031
3154
  return false;
3032
3155
  } else if (token->type == GUMBO_TOKEN_EOF) {
3033
- if (!node_tag_is(get_current_node(parser), GUMBO_TAG_HTML)) {
3034
- parser_add_parse_error(parser, token);
3035
- return false;
3036
- }
3037
- return true;
3156
+ return handle_in_body(parser, token);
3038
3157
  } else {
3039
3158
  parser_add_parse_error(parser, token);
3040
3159
  state->_foster_parent_insertions = true;
@@ -3062,8 +3181,9 @@ static bool handle_in_table_text(GumboParser* parser, GumboToken* token) {
3062
3181
  // Note that TextNodeBuffer may contain UTF-8 characters, but the presence
3063
3182
  // of any one byte that is not whitespace means we flip the flag, so this
3064
3183
  // loop is still valid.
3065
- for (int i = 0; i < buffer->length; ++i) {
3066
- if (!isspace(buffer->data[i]) || buffer->data[i] == '\v') {
3184
+ for (unsigned int i = 0; i < buffer->length; ++i) {
3185
+ if (!isspace((unsigned char) buffer->data[i]) ||
3186
+ buffer->data[i] == '\v') {
3067
3187
  state->_foster_parent_insertions = true;
3068
3188
  reconstruct_active_formatting_elements(parser);
3069
3189
  break;
@@ -3079,38 +3199,43 @@ static bool handle_in_table_text(GumboParser* parser, GumboToken* token) {
3079
3199
 
3080
3200
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-incaption
3081
3201
  static bool handle_in_caption(GumboParser* parser, GumboToken* token) {
3082
- if (tag_in(token, kStartTag, GUMBO_TAG_CAPTION, GUMBO_TAG_COL,
3083
- GUMBO_TAG_COLGROUP, GUMBO_TAG_TBODY, GUMBO_TAG_TD,
3084
- GUMBO_TAG_TFOOT, GUMBO_TAG_TH, GUMBO_TAG_THEAD,
3085
- GUMBO_TAG_TR, GUMBO_TAG_LAST) ||
3086
- tag_in(token, kEndTag, GUMBO_TAG_CAPTION, GUMBO_TAG_TABLE,
3087
- GUMBO_TAG_LAST)) {
3202
+ if (tag_is(token, kEndTag, GUMBO_TAG_CAPTION)) {
3088
3203
  if (!has_an_element_in_table_scope(parser, GUMBO_TAG_CAPTION)) {
3089
3204
  parser_add_parse_error(parser, token);
3090
3205
  ignore_token(parser);
3091
3206
  return false;
3207
+ } else {
3208
+ generate_implied_end_tags(parser, GUMBO_TAG_LAST);
3209
+ bool result = true;
3210
+ if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_CAPTION)) {
3211
+ parser_add_parse_error(parser, token);
3212
+ }
3213
+ while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_CAPTION))
3214
+ ;
3215
+ clear_active_formatting_elements(parser);
3216
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
3217
+ return result;
3092
3218
  }
3093
- if (!tag_is(token, kEndTag, GUMBO_TAG_CAPTION)) {
3094
- parser_add_parse_error(parser, token);
3095
- parser->_parser_state->_reprocess_current_token = true;
3096
- }
3097
- generate_implied_end_tags(parser, GUMBO_TAG_LAST);
3098
- bool result = true;
3099
- if (!node_tag_is(get_current_node(parser), GUMBO_TAG_CAPTION)) {
3219
+ } else if (tag_in(token, kStartTag,
3220
+ (gumbo_tagset){TAG(CAPTION), TAG(COL), TAG(COLGROUP),
3221
+ TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD),
3222
+ TAG(TR)}) ||
3223
+ (tag_is(token, kEndTag, GUMBO_TAG_TABLE))) {
3224
+ if (!has_an_element_in_table_scope(parser, GUMBO_TAG_CAPTION)) {
3100
3225
  parser_add_parse_error(parser, token);
3101
- while (!node_tag_is(get_current_node(parser), GUMBO_TAG_CAPTION)) {
3102
- pop_current_node(parser);
3103
- }
3104
- result = false;
3226
+ ignore_token(parser);
3227
+ return false;
3105
3228
  }
3106
- pop_current_node(parser); // The <caption> itself.
3229
+ while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_CAPTION))
3230
+ ;
3107
3231
  clear_active_formatting_elements(parser);
3108
3232
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
3109
- return result;
3110
- } else if (tag_in(token, kEndTag, GUMBO_TAG_BODY, GUMBO_TAG_COL,
3111
- GUMBO_TAG_COLGROUP, GUMBO_TAG_HTML, GUMBO_TAG_TBODY,
3112
- GUMBO_TAG_TD, GUMBO_TAG_TFOOT, GUMBO_TAG_TH,
3113
- GUMBO_TAG_THEAD, GUMBO_TAG_TR, GUMBO_TAG_LAST)) {
3233
+ parser->_parser_state->_reprocess_current_token = true;
3234
+ return true;
3235
+ } else if (tag_in(token, kEndTag,
3236
+ (gumbo_tagset){TAG(BODY), TAG(COL), TAG(COLGROUP), TAG(HTML),
3237
+ TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD),
3238
+ TAG(TR)})) {
3114
3239
  parser_add_parse_error(parser, token);
3115
3240
  ignore_token(parser);
3116
3241
  return false;
@@ -3138,24 +3263,33 @@ static bool handle_in_column_group(GumboParser* parser, GumboToken* token) {
3138
3263
  pop_current_node(parser);
3139
3264
  acknowledge_self_closing_tag(parser);
3140
3265
  return true;
3266
+ } else if (tag_is(token, kEndTag, GUMBO_TAG_COLGROUP)) {
3267
+ if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_COLGROUP)) {
3268
+ parser_add_parse_error(parser, token);
3269
+ ignore_token(parser);
3270
+ return false;
3271
+ }
3272
+ pop_current_node(parser);
3273
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
3274
+ return false;
3141
3275
  } else if (tag_is(token, kEndTag, GUMBO_TAG_COL)) {
3142
3276
  parser_add_parse_error(parser, token);
3143
3277
  ignore_token(parser);
3144
3278
  return false;
3145
- } else if (token->type == GUMBO_TOKEN_EOF &&
3146
- get_current_node(parser) == parser->_output->root) {
3147
- return true;
3279
+ } else if (tag_is(token, kStartTag, GUMBO_TAG_TEMPLATE) ||
3280
+ tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
3281
+ return handle_in_head(parser, token);
3282
+ } else if (token->type == GUMBO_TOKEN_EOF) {
3283
+ return handle_in_body(parser, token);
3148
3284
  } else {
3149
- if (get_current_node(parser) == parser->_output->root) {
3285
+ if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_COLGROUP)) {
3150
3286
  parser_add_parse_error(parser, token);
3287
+ ignore_token(parser);
3151
3288
  return false;
3152
3289
  }
3153
- assert(node_tag_is(get_current_node(parser), GUMBO_TAG_COLGROUP));
3154
3290
  pop_current_node(parser);
3155
3291
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
3156
- if (!tag_is(token, kEndTag, GUMBO_TAG_COLGROUP)) {
3157
- parser->_parser_state->_reprocess_current_token = true;
3158
- }
3292
+ parser->_parser_state->_reprocess_current_token = true;
3159
3293
  return true;
3160
3294
  }
3161
3295
  }
@@ -3167,16 +3301,15 @@ static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
3167
3301
  insert_element_from_token(parser, token);
3168
3302
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
3169
3303
  return true;
3170
- } else if (tag_in(token, kStartTag, GUMBO_TAG_TD, GUMBO_TAG_TH,
3171
- GUMBO_TAG_LAST)) {
3304
+ } else if (tag_in(token, kStartTag, (gumbo_tagset){TAG(TD), TAG(TH)})) {
3172
3305
  parser_add_parse_error(parser, token);
3173
3306
  clear_stack_to_table_body_context(parser);
3174
3307
  insert_element_of_tag_type(parser, GUMBO_TAG_TR, GUMBO_INSERTION_IMPLIED);
3175
3308
  parser->_parser_state->_reprocess_current_token = true;
3176
3309
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
3177
3310
  return false;
3178
- } else if (tag_in(token, kEndTag, GUMBO_TAG_TBODY, GUMBO_TAG_TFOOT,
3179
- GUMBO_TAG_THEAD, GUMBO_TAG_LAST)) {
3311
+ } else if (tag_in(token, kEndTag,
3312
+ (gumbo_tagset){TAG(TBODY), TAG(TFOOT), TAG(THEAD)})) {
3180
3313
  if (!has_an_element_in_table_scope(parser, token->v.end_tag)) {
3181
3314
  parser_add_parse_error(parser, token);
3182
3315
  ignore_token(parser);
@@ -3186,13 +3319,13 @@ static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
3186
3319
  pop_current_node(parser);
3187
3320
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
3188
3321
  return true;
3189
- } else if (tag_in(token, kStartTag, GUMBO_TAG_CAPTION, GUMBO_TAG_COL,
3190
- GUMBO_TAG_COLGROUP, GUMBO_TAG_TBODY, GUMBO_TAG_TFOOT,
3191
- GUMBO_TAG_THEAD, GUMBO_TAG_LAST) ||
3322
+ } else if (tag_in(token, kStartTag,
3323
+ (gumbo_tagset){TAG(CAPTION), TAG(COL), TAG(COLGROUP),
3324
+ TAG(TBODY), TAG(TFOOT), TAG(THEAD)}) ||
3192
3325
  tag_is(token, kEndTag, GUMBO_TAG_TABLE)) {
3193
3326
  if (!(has_an_element_in_table_scope(parser, GUMBO_TAG_TBODY) ||
3194
- has_an_element_in_table_scope(parser, GUMBO_TAG_THEAD) ||
3195
- has_an_element_in_table_scope(parser, GUMBO_TAG_TFOOT))) {
3327
+ has_an_element_in_table_scope(parser, GUMBO_TAG_THEAD) ||
3328
+ has_an_element_in_table_scope(parser, GUMBO_TAG_TFOOT))) {
3196
3329
  parser_add_parse_error(parser, token);
3197
3330
  ignore_token(parser);
3198
3331
  return false;
@@ -3202,10 +3335,9 @@ static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
3202
3335
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
3203
3336
  parser->_parser_state->_reprocess_current_token = true;
3204
3337
  return true;
3205
- } else if (tag_in(token, kEndTag, GUMBO_TAG_BODY, GUMBO_TAG_CAPTION,
3206
- GUMBO_TAG_COL, GUMBO_TAG_TR, GUMBO_TAG_COLGROUP,
3207
- GUMBO_TAG_HTML, GUMBO_TAG_TD, GUMBO_TAG_TH, GUMBO_TAG_LAST))
3208
- {
3338
+ } else if (tag_in(token, kEndTag,
3339
+ (gumbo_tagset){TAG(BODY), TAG(CAPTION), TAG(COL), TAG(TR),
3340
+ TAG(COLGROUP), TAG(HTML), TAG(TD), TAG(TH)})) {
3209
3341
  parser_add_parse_error(parser, token);
3210
3342
  ignore_token(parser);
3211
3343
  return false;
@@ -3216,48 +3348,55 @@ static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
3216
3348
 
3217
3349
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-intr
3218
3350
  static bool handle_in_row(GumboParser* parser, GumboToken* token) {
3219
- if (tag_in(token, kStartTag, GUMBO_TAG_TH, GUMBO_TAG_TD, GUMBO_TAG_LAST)) {
3351
+ if (tag_in(token, kStartTag, (gumbo_tagset){TAG(TH), TAG(TD)})) {
3220
3352
  clear_stack_to_table_row_context(parser);
3221
3353
  insert_element_from_token(parser, token);
3222
3354
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_CELL);
3223
3355
  add_formatting_element(parser, &kActiveFormattingScopeMarker);
3224
3356
  return true;
3225
- } else if (tag_in(token, kStartTag, GUMBO_TAG_CAPTION, GUMBO_TAG_COLGROUP,
3226
- GUMBO_TAG_TBODY, GUMBO_TAG_TFOOT, GUMBO_TAG_THEAD,
3227
- GUMBO_TAG_TR, GUMBO_TAG_LAST) ||
3228
- tag_in(token, kEndTag, GUMBO_TAG_TR, GUMBO_TAG_TABLE,
3229
- GUMBO_TAG_TBODY, GUMBO_TAG_TFOOT, GUMBO_TAG_THEAD,
3230
- GUMBO_TAG_LAST)) {
3231
- // This case covers 4 clauses of the spec, each of which say "Otherwise, act
3232
- // as if an end tag with the tag name "tr" had been seen." The differences
3233
- // are in error handling and whether the current token is reprocessed.
3234
- GumboTag desired_tag =
3235
- tag_in(token, kEndTag, GUMBO_TAG_TBODY, GUMBO_TAG_TFOOT,
3236
- GUMBO_TAG_THEAD, GUMBO_TAG_LAST)
3237
- ? token->v.end_tag : GUMBO_TAG_TR;
3238
- if (!has_an_element_in_table_scope(parser, desired_tag)) {
3239
- gumbo_debug("Bailing because there is no tag %s in table scope.\nOpen elements:",
3240
- gumbo_normalized_tagname(desired_tag));
3241
- for (int i = 0; i < parser->_parser_state->_open_elements.length; ++i) {
3242
- const GumboNode* node = parser->_parser_state->_open_elements.data[i];
3243
- gumbo_debug("%s\n", gumbo_normalized_tagname(node->v.element.tag));
3244
- }
3357
+ } else if (tag_is(token, kEndTag, GUMBO_TAG_TR)) {
3358
+ if (!has_an_element_in_table_scope(parser, GUMBO_TAG_TR)) {
3359
+ parser_add_parse_error(parser, token);
3360
+ ignore_token(parser);
3361
+ return false;
3362
+ } else {
3363
+ clear_stack_to_table_row_context(parser);
3364
+ pop_current_node(parser);
3365
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
3366
+ return true;
3367
+ }
3368
+ } else if (tag_in(token, kStartTag,
3369
+ (gumbo_tagset){TAG(CAPTION), TAG(COL), TAG(COLGROUP),
3370
+ TAG(TBODY), TAG(TFOOT), TAG(THEAD), TAG(TR)}) ||
3371
+ tag_is(token, kEndTag, GUMBO_TAG_TABLE)) {
3372
+ if (!has_an_element_in_table_scope(parser, GUMBO_TAG_TR)) {
3245
3373
  parser_add_parse_error(parser, token);
3246
3374
  ignore_token(parser);
3247
3375
  return false;
3376
+ } else {
3377
+ clear_stack_to_table_row_context(parser);
3378
+ pop_current_node(parser);
3379
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
3380
+ parser->_parser_state->_reprocess_current_token = true;
3381
+ return true;
3248
3382
  }
3249
- clear_stack_to_table_row_context(parser);
3250
- GumboNode* last_element = pop_current_node(parser);
3251
- assert(node_tag_is(last_element, GUMBO_TAG_TR));
3252
- AVOID_UNUSED_VARIABLE_WARNING(last_element);
3253
- set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
3254
- if (!tag_is(token, kEndTag, GUMBO_TAG_TR)) {
3383
+ } else if (tag_in(token, kEndTag,
3384
+ (gumbo_tagset){TAG(TBODY), TAG(TFOOT), TAG(THEAD)})) {
3385
+ if (!has_an_element_in_table_scope(parser, token->v.end_tag) ||
3386
+ (!has_an_element_in_table_scope(parser, GUMBO_TAG_TR))) {
3387
+ parser_add_parse_error(parser, token);
3388
+ ignore_token(parser);
3389
+ return false;
3390
+ } else {
3391
+ clear_stack_to_table_row_context(parser);
3392
+ pop_current_node(parser);
3393
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
3255
3394
  parser->_parser_state->_reprocess_current_token = true;
3395
+ return true;
3256
3396
  }
3257
- return true;
3258
- } else if (tag_in(token, kEndTag, GUMBO_TAG_BODY, GUMBO_TAG_CAPTION,
3259
- GUMBO_TAG_COL, GUMBO_TAG_COLGROUP, GUMBO_TAG_HTML,
3260
- GUMBO_TAG_TD, GUMBO_TAG_TH, GUMBO_TAG_LAST)) {
3397
+ } else if (tag_in(token, kEndTag,
3398
+ (gumbo_tagset){TAG(BODY), TAG(CAPTION), TAG(COL),
3399
+ TAG(COLGROUP), TAG(HTML), TAG(TD), TAG(TH)})) {
3261
3400
  parser_add_parse_error(parser, token);
3262
3401
  ignore_token(parser);
3263
3402
  return false;
@@ -3268,17 +3407,18 @@ static bool handle_in_row(GumboParser* parser, GumboToken* token) {
3268
3407
 
3269
3408
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-intd
3270
3409
  static bool handle_in_cell(GumboParser* parser, GumboToken* token) {
3271
- if (tag_in(token, kEndTag, GUMBO_TAG_TD, GUMBO_TAG_TH, GUMBO_TAG_LAST)) {
3410
+ if (tag_in(token, kEndTag, (gumbo_tagset){TAG(TD), TAG(TH)})) {
3272
3411
  GumboTag token_tag = token->v.end_tag;
3273
3412
  if (!has_an_element_in_table_scope(parser, token_tag)) {
3274
3413
  parser_add_parse_error(parser, token);
3414
+ ignore_token(parser);
3275
3415
  return false;
3276
3416
  }
3277
3417
  return close_table_cell(parser, token, token_tag);
3278
- } else if (tag_in(token, kStartTag, GUMBO_TAG_CAPTION, GUMBO_TAG_COL,
3279
- GUMBO_TAG_COLGROUP, GUMBO_TAG_TBODY, GUMBO_TAG_TD,
3280
- GUMBO_TAG_TFOOT, GUMBO_TAG_TH, GUMBO_TAG_THEAD,
3281
- GUMBO_TAG_TR, GUMBO_TAG_LAST)) {
3418
+ } else if (tag_in(token, kStartTag,
3419
+ (gumbo_tagset){TAG(CAPTION), TAG(COL), TAG(COLGROUP),
3420
+ TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD),
3421
+ TAG(TR)})) {
3282
3422
  gumbo_debug("Handling <td> in cell.\n");
3283
3423
  if (!has_an_element_in_table_scope(parser, GUMBO_TAG_TH) &&
3284
3424
  !has_an_element_in_table_scope(parser, GUMBO_TAG_TD)) {
@@ -3289,15 +3429,13 @@ static bool handle_in_cell(GumboParser* parser, GumboToken* token) {
3289
3429
  }
3290
3430
  parser->_parser_state->_reprocess_current_token = true;
3291
3431
  return close_current_cell(parser, token);
3292
- } else if (tag_in(token, kEndTag, GUMBO_TAG_BODY, GUMBO_TAG_CAPTION,
3293
- GUMBO_TAG_COL, GUMBO_TAG_COLGROUP, GUMBO_TAG_HTML,
3294
- GUMBO_TAG_LAST)) {
3432
+ } else if (tag_in(token, kEndTag, (gumbo_tagset){TAG(BODY), TAG(CAPTION),
3433
+ TAG(COL), TAG(COLGROUP), TAG(HTML)})) {
3295
3434
  parser_add_parse_error(parser, token);
3296
3435
  ignore_token(parser);
3297
3436
  return false;
3298
- } else if (tag_in(token, kEndTag, GUMBO_TAG_TABLE, GUMBO_TAG_TBODY,
3299
- GUMBO_TAG_TFOOT, GUMBO_TAG_THEAD, GUMBO_TAG_TR,
3300
- GUMBO_TAG_LAST)) {
3437
+ } else if (tag_in(token, kEndTag, (gumbo_tagset){TAG(TABLE), TAG(TBODY),
3438
+ TAG(TFOOT), TAG(THEAD), TAG(TR)})) {
3301
3439
  if (!has_an_element_in_table_scope(parser, token->v.end_tag)) {
3302
3440
  parser_add_parse_error(parser, token);
3303
3441
  ignore_token(parser);
@@ -3330,28 +3468,28 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
3330
3468
  } else if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
3331
3469
  return handle_in_body(parser, token);
3332
3470
  } else if (tag_is(token, kStartTag, GUMBO_TAG_OPTION)) {
3333
- if (node_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)) {
3471
+ if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)) {
3334
3472
  pop_current_node(parser);
3335
3473
  }
3336
3474
  insert_element_from_token(parser, token);
3337
3475
  return true;
3338
3476
  } else if (tag_is(token, kStartTag, GUMBO_TAG_OPTGROUP)) {
3339
- if (node_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)) {
3477
+ if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)) {
3340
3478
  pop_current_node(parser);
3341
3479
  }
3342
- if (node_tag_is(get_current_node(parser), GUMBO_TAG_OPTGROUP)) {
3480
+ if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTGROUP)) {
3343
3481
  pop_current_node(parser);
3344
3482
  }
3345
3483
  insert_element_from_token(parser, token);
3346
3484
  return true;
3347
3485
  } else if (tag_is(token, kEndTag, GUMBO_TAG_OPTGROUP)) {
3348
3486
  GumboVector* open_elements = &parser->_parser_state->_open_elements;
3349
- if (node_tag_is(get_current_node(parser), GUMBO_TAG_OPTION) &&
3350
- node_tag_is(open_elements->data[open_elements->length - 2],
3351
- GUMBO_TAG_OPTGROUP)) {
3487
+ if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION) &&
3488
+ node_html_tag_is(open_elements->data[open_elements->length - 2],
3489
+ GUMBO_TAG_OPTGROUP)) {
3352
3490
  pop_current_node(parser);
3353
3491
  }
3354
- if (node_tag_is(get_current_node(parser), GUMBO_TAG_OPTGROUP)) {
3492
+ if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTGROUP)) {
3355
3493
  pop_current_node(parser);
3356
3494
  return true;
3357
3495
  } else {
@@ -3360,7 +3498,7 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
3360
3498
  return false;
3361
3499
  }
3362
3500
  } else if (tag_is(token, kEndTag, GUMBO_TAG_OPTION)) {
3363
- if (node_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)) {
3501
+ if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)) {
3364
3502
  pop_current_node(parser);
3365
3503
  return true;
3366
3504
  } else {
@@ -3379,10 +3517,12 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
3379
3517
  } else if (tag_is(token, kStartTag, GUMBO_TAG_SELECT)) {
3380
3518
  parser_add_parse_error(parser, token);
3381
3519
  ignore_token(parser);
3382
- close_current_select(parser);
3520
+ if (has_an_element_in_select_scope(parser, GUMBO_TAG_SELECT)) {
3521
+ close_current_select(parser);
3522
+ }
3383
3523
  return false;
3384
- } else if (tag_in(token, kStartTag, GUMBO_TAG_INPUT, GUMBO_TAG_KEYGEN,
3385
- GUMBO_TAG_TEXTAREA, GUMBO_TAG_LAST)) {
3524
+ } else if (tag_in(token, kStartTag,
3525
+ (gumbo_tagset){TAG(INPUT), TAG(KEYGEN), TAG(TEXTAREA)})) {
3386
3526
  parser_add_parse_error(parser, token);
3387
3527
  if (!has_an_element_in_select_scope(parser, GUMBO_TAG_SELECT)) {
3388
3528
  ignore_token(parser);
@@ -3391,14 +3531,12 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
3391
3531
  parser->_parser_state->_reprocess_current_token = true;
3392
3532
  }
3393
3533
  return false;
3394
- } else if (tag_is(token, kStartTag, GUMBO_TAG_SCRIPT)) {
3534
+ } else if (tag_in(token, kStartTag,
3535
+ (gumbo_tagset){TAG(SCRIPT), TAG(TEMPLATE)}) ||
3536
+ tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
3395
3537
  return handle_in_head(parser, token);
3396
3538
  } else if (token->type == GUMBO_TOKEN_EOF) {
3397
- if (get_current_node(parser) != parser->_output->root) {
3398
- parser_add_parse_error(parser, token);
3399
- return false;
3400
- }
3401
- return true;
3539
+ return handle_in_body(parser, token);
3402
3540
  } else {
3403
3541
  parser_add_parse_error(parser, token);
3404
3542
  ignore_token(parser);
@@ -3408,25 +3546,28 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
3408
3546
 
3409
3547
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-inselectintable
3410
3548
  static bool handle_in_select_in_table(GumboParser* parser, GumboToken* token) {
3411
- if (tag_in(token, kStartTag, GUMBO_TAG_CAPTION, GUMBO_TAG_TABLE,
3412
- GUMBO_TAG_TBODY, GUMBO_TAG_TFOOT, GUMBO_TAG_THEAD, GUMBO_TAG_TR,
3413
- GUMBO_TAG_TD, GUMBO_TAG_TH, GUMBO_TAG_LAST)) {
3549
+ if (tag_in(token, kStartTag,
3550
+ (gumbo_tagset){TAG(CAPTION), TAG(TABLE), TAG(TBODY), TAG(TFOOT),
3551
+ TAG(THEAD), TAG(TR), TAG(TD), TAG(TH)})) {
3414
3552
  parser_add_parse_error(parser, token);
3415
3553
  close_current_select(parser);
3416
3554
  parser->_parser_state->_reprocess_current_token = true;
3417
3555
  return false;
3418
- } else if (tag_in(token, kEndTag, GUMBO_TAG_CAPTION, GUMBO_TAG_TABLE,
3419
- GUMBO_TAG_TBODY, GUMBO_TAG_TFOOT, GUMBO_TAG_THEAD,
3420
- GUMBO_TAG_TR, GUMBO_TAG_TD, GUMBO_TAG_TH, GUMBO_TAG_LAST)) {
3556
+ } else if (tag_in(token, kEndTag,
3557
+ (gumbo_tagset){TAG(CAPTION), TAG(TABLE), TAG(TBODY),
3558
+ TAG(TFOOT), TAG(THEAD), TAG(TR), TAG(TD), TAG(TH)})) {
3421
3559
  parser_add_parse_error(parser, token);
3422
- if (has_an_element_in_table_scope(parser, token->v.end_tag)) {
3560
+ if (!has_an_element_in_table_scope(parser, token->v.end_tag)) {
3561
+ ignore_token(parser);
3562
+ return false;
3563
+ } else {
3423
3564
  close_current_select(parser);
3424
- reset_insertion_mode_appropriately(parser);
3565
+ // close_current_select already does the
3566
+ // reset_insertion_mode_appropriately
3567
+ // reset_insertion_mode_appropriately(parser);
3425
3568
  parser->_parser_state->_reprocess_current_token = true;
3426
- } else {
3427
- ignore_token(parser);
3569
+ return false;
3428
3570
  }
3429
- return false;
3430
3571
  } else {
3431
3572
  return handle_in_select(parser, token);
3432
3573
  }
@@ -3434,8 +3575,71 @@ static bool handle_in_select_in_table(GumboParser* parser, GumboToken* token) {
3434
3575
 
3435
3576
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#parsing-main-intemplate
3436
3577
  static bool handle_in_template(GumboParser* parser, GumboToken* token) {
3437
- // TODO(jdtang): Implement this.
3438
- return true;
3578
+ GumboParserState* state = parser->_parser_state;
3579
+ if (token->type == GUMBO_TOKEN_WHITESPACE ||
3580
+ token->type == GUMBO_TOKEN_CHARACTER ||
3581
+ token->type == GUMBO_TOKEN_COMMENT || token->type == GUMBO_TOKEN_NULL ||
3582
+ token->type == GUMBO_TOKEN_DOCTYPE) {
3583
+ return handle_in_body(parser, token);
3584
+ } else if (tag_in(token, kStartTag,
3585
+ (gumbo_tagset){TAG(BASE), TAG(BASEFONT), TAG(BGSOUND),
3586
+ TAG(LINK), TAG(META), TAG(NOFRAMES), TAG(SCRIPT),
3587
+ TAG(STYLE), TAG(TEMPLATE), TAG(TITLE)}) ||
3588
+ tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
3589
+ return handle_in_head(parser, token);
3590
+ } else if (tag_in(
3591
+ token, kStartTag, (gumbo_tagset){TAG(CAPTION), TAG(COLGROUP),
3592
+ TAG(TBODY), TAG(TFOOT), TAG(THEAD)})) {
3593
+ pop_template_insertion_mode(parser);
3594
+ push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
3595
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
3596
+ state->_reprocess_current_token = true;
3597
+ return true;
3598
+ } else if (tag_is(token, kStartTag, GUMBO_TAG_COL)) {
3599
+ pop_template_insertion_mode(parser);
3600
+ push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_COLUMN_GROUP);
3601
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_COLUMN_GROUP);
3602
+ state->_reprocess_current_token = true;
3603
+ return true;
3604
+ } else if (tag_is(token, kStartTag, GUMBO_TAG_TR)) {
3605
+ pop_template_insertion_mode(parser);
3606
+ push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
3607
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
3608
+ state->_reprocess_current_token = true;
3609
+ return true;
3610
+ } else if (tag_in(token, kStartTag, (gumbo_tagset){TAG(TD), TAG(TH)})) {
3611
+ pop_template_insertion_mode(parser);
3612
+ push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
3613
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
3614
+ state->_reprocess_current_token = true;
3615
+ return true;
3616
+ } else if (token->type == GUMBO_TOKEN_START_TAG) {
3617
+ pop_template_insertion_mode(parser);
3618
+ push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_BODY);
3619
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_BODY);
3620
+ state->_reprocess_current_token = true;
3621
+ return true;
3622
+ } else if (token->type == GUMBO_TOKEN_END_TAG) {
3623
+ parser_add_parse_error(parser, token);
3624
+ ignore_token(parser);
3625
+ return false;
3626
+ } else if (token->type == GUMBO_TOKEN_EOF) {
3627
+ if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
3628
+ // Stop parsing.
3629
+ return true;
3630
+ }
3631
+ parser_add_parse_error(parser, token);
3632
+ while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_TEMPLATE))
3633
+ ;
3634
+ clear_active_formatting_elements(parser);
3635
+ pop_template_insertion_mode(parser);
3636
+ reset_insertion_mode_appropriately(parser);
3637
+ state->_reprocess_current_token = true;
3638
+ return false;
3639
+ } else {
3640
+ assert(0);
3641
+ return false;
3642
+ }
3439
3643
  }
3440
3644
 
3441
3645
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-afterbody
@@ -3453,10 +3657,15 @@ static bool handle_after_body(GumboParser* parser, GumboToken* token) {
3453
3657
  ignore_token(parser);
3454
3658
  return false;
3455
3659
  } else if (tag_is(token, kEndTag, GUMBO_TAG_HTML)) {
3456
- // TODO(jdtang): Handle fragment parsing algorithm case.
3660
+ /* fragment case: ignore the closing HTML token */
3661
+ if (is_fragment_parser(parser)) {
3662
+ parser_add_parse_error(parser, token);
3663
+ ignore_token(parser);
3664
+ return false;
3665
+ }
3457
3666
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_AFTER_BODY);
3458
3667
  GumboNode* html = parser->_parser_state->_open_elements.data[0];
3459
- assert(node_tag_is(html, GUMBO_TAG_HTML));
3668
+ assert(node_html_tag_is(html, GUMBO_TAG_HTML));
3460
3669
  record_end_of_element(
3461
3670
  parser->_parser_state->_current_token, &html->v.element);
3462
3671
  return true;
@@ -3488,15 +3697,14 @@ static bool handle_in_frameset(GumboParser* parser, GumboToken* token) {
3488
3697
  insert_element_from_token(parser, token);
3489
3698
  return true;
3490
3699
  } else if (tag_is(token, kEndTag, GUMBO_TAG_FRAMESET)) {
3491
- if (node_tag_is(get_current_node(parser), GUMBO_TAG_HTML)) {
3700
+ if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_HTML)) {
3492
3701
  parser_add_parse_error(parser, token);
3493
3702
  ignore_token(parser);
3494
3703
  return false;
3495
3704
  }
3496
3705
  pop_current_node(parser);
3497
- // TODO(jdtang): Add a condition to ignore this for the fragment parsing
3498
- // algorithm.
3499
- if (!node_tag_is(get_current_node(parser), GUMBO_TAG_FRAMESET)) {
3706
+ if (!is_fragment_parser(parser) &&
3707
+ !node_html_tag_is(get_current_node(parser), GUMBO_TAG_FRAMESET)) {
3500
3708
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_FRAMESET);
3501
3709
  }
3502
3710
  return true;
@@ -3508,7 +3716,7 @@ static bool handle_in_frameset(GumboParser* parser, GumboToken* token) {
3508
3716
  } else if (tag_is(token, kStartTag, GUMBO_TAG_NOFRAMES)) {
3509
3717
  return handle_in_head(parser, token);
3510
3718
  } else if (token->type == GUMBO_TOKEN_EOF) {
3511
- if (!node_tag_is(get_current_node(parser), GUMBO_TAG_HTML)) {
3719
+ if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_HTML)) {
3512
3720
  parser_add_parse_error(parser, token);
3513
3721
  return false;
3514
3722
  }
@@ -3536,7 +3744,7 @@ static bool handle_after_frameset(GumboParser* parser, GumboToken* token) {
3536
3744
  return handle_in_body(parser, token);
3537
3745
  } else if (tag_is(token, kEndTag, GUMBO_TAG_HTML)) {
3538
3746
  GumboNode* html = parser->_parser_state->_open_elements.data[0];
3539
- assert(node_tag_is(html, GUMBO_TAG_HTML));
3747
+ assert(node_html_tag_is(html, GUMBO_TAG_HTML));
3540
3748
  record_end_of_element(
3541
3749
  parser->_parser_state->_current_token, &html->v.element);
3542
3750
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_AFTER_FRAMESET);
@@ -3595,31 +3803,14 @@ static bool handle_after_after_frameset(
3595
3803
  // Function pointers for each insertion mode. Keep in sync with
3596
3804
  // insertion_mode.h.
3597
3805
  typedef bool (*TokenHandler)(GumboParser* parser, GumboToken* token);
3598
- static const TokenHandler kTokenHandlers[] = {
3599
- handle_initial,
3600
- handle_before_html,
3601
- handle_before_head,
3602
- handle_in_head,
3603
- handle_in_head_noscript,
3604
- handle_after_head,
3605
- handle_in_body,
3606
- handle_text,
3607
- handle_in_table,
3608
- handle_in_table_text,
3609
- handle_in_caption,
3610
- handle_in_column_group,
3611
- handle_in_table_body,
3612
- handle_in_row,
3613
- handle_in_cell,
3614
- handle_in_select,
3615
- handle_in_select_in_table,
3616
- handle_in_template,
3617
- handle_after_body,
3618
- handle_in_frameset,
3619
- handle_after_frameset,
3620
- handle_after_after_body,
3621
- handle_after_after_frameset
3622
- };
3806
+ static const TokenHandler kTokenHandlers[] = {handle_initial,
3807
+ handle_before_html, handle_before_head, handle_in_head,
3808
+ handle_in_head_noscript, handle_after_head, handle_in_body, handle_text,
3809
+ handle_in_table, handle_in_table_text, handle_in_caption,
3810
+ handle_in_column_group, handle_in_table_body, handle_in_row, handle_in_cell,
3811
+ handle_in_select, handle_in_select_in_table, handle_in_template,
3812
+ handle_after_body, handle_in_frameset, handle_after_frameset,
3813
+ handle_after_after_body, handle_after_after_frameset};
3623
3814
 
3624
3815
  static bool handle_html_content(GumboParser* parser, GumboToken* token) {
3625
3816
  return kTokenHandlers[(unsigned int) parser->_parser_state->_insertion_mode](
@@ -3628,16 +3819,17 @@ static bool handle_html_content(GumboParser* parser, GumboToken* token) {
3628
3819
 
3629
3820
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-inforeign
3630
3821
  static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
3822
+ gumbo_debug("Handling foreign content");
3631
3823
  switch (token->type) {
3632
3824
  case GUMBO_TOKEN_NULL:
3633
3825
  parser_add_parse_error(parser, token);
3634
- token->type = GUMBO_TOKEN_CHARACTER;
3635
3826
  token->v.character = kUtf8ReplacementChar;
3636
3827
  insert_text_token(parser, token);
3637
3828
  return false;
3638
3829
  case GUMBO_TOKEN_WHITESPACE:
3639
3830
  insert_text_token(parser, token);
3640
3831
  return true;
3832
+ case GUMBO_TOKEN_CDATA:
3641
3833
  case GUMBO_TOKEN_CHARACTER:
3642
3834
  insert_text_token(parser, token);
3643
3835
  set_frameset_not_ok(parser);
@@ -3654,35 +3846,44 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
3654
3846
  break;
3655
3847
  }
3656
3848
  // Order matters for these clauses.
3657
- if (tag_in(token, kStartTag, GUMBO_TAG_B, GUMBO_TAG_BIG,
3658
- GUMBO_TAG_BLOCKQUOTE, GUMBO_TAG_BODY, GUMBO_TAG_BR,
3659
- GUMBO_TAG_CENTER, GUMBO_TAG_CODE, GUMBO_TAG_DD, GUMBO_TAG_DIV,
3660
- GUMBO_TAG_DL, GUMBO_TAG_DT, GUMBO_TAG_EM, GUMBO_TAG_EMBED,
3661
- GUMBO_TAG_H1, GUMBO_TAG_H2, GUMBO_TAG_H3, GUMBO_TAG_H4,
3662
- GUMBO_TAG_H5, GUMBO_TAG_H6, GUMBO_TAG_HEAD, GUMBO_TAG_HR,
3663
- GUMBO_TAG_I, GUMBO_TAG_IMG, GUMBO_TAG_LI, GUMBO_TAG_LISTING,
3664
- GUMBO_TAG_MENU, GUMBO_TAG_META, GUMBO_TAG_NOBR, GUMBO_TAG_OL,
3665
- GUMBO_TAG_P, GUMBO_TAG_PRE, GUMBO_TAG_RUBY, GUMBO_TAG_S,
3666
- GUMBO_TAG_SMALL, GUMBO_TAG_SPAN, GUMBO_TAG_STRONG,
3667
- GUMBO_TAG_STRIKE, GUMBO_TAG_SUB, GUMBO_TAG_SUP,
3668
- GUMBO_TAG_TABLE, GUMBO_TAG_TT, GUMBO_TAG_U, GUMBO_TAG_UL,
3669
- GUMBO_TAG_VAR, GUMBO_TAG_LAST) ||
3670
- (tag_is(token, kStartTag, GUMBO_TAG_FONT) && (
3671
- token_has_attribute(token, "color") ||
3672
- token_has_attribute(token, "face") ||
3673
- token_has_attribute(token, "size")))) {
3849
+ if (tag_in(token, kStartTag,
3850
+ (gumbo_tagset){TAG(B), TAG(BIG), TAG(BLOCKQUOTE), TAG(BODY), TAG(BR),
3851
+ TAG(CENTER), TAG(CODE), TAG(DD), TAG(DIV), TAG(DL), TAG(DT),
3852
+ TAG(EM), TAG(EMBED), TAG(H1), TAG(H2), TAG(H3), TAG(H4), TAG(H5),
3853
+ TAG(H6), TAG(HEAD), TAG(HR), TAG(I), TAG(IMG), TAG(LI),
3854
+ TAG(LISTING), TAG(MENU), TAG(META), TAG(NOBR), TAG(OL), TAG(P),
3855
+ TAG(PRE), TAG(RUBY), TAG(S), TAG(SMALL), TAG(SPAN), TAG(STRONG),
3856
+ TAG(STRIKE), TAG(SUB), TAG(SUP), TAG(TABLE), TAG(TT), TAG(U),
3857
+ TAG(UL), TAG(VAR)}) ||
3858
+ (tag_is(token, kStartTag, GUMBO_TAG_FONT) &&
3859
+ (token_has_attribute(token, "color") ||
3860
+ token_has_attribute(token, "face") ||
3861
+ token_has_attribute(token, "size")))) {
3862
+ /* Parse error */
3674
3863
  parser_add_parse_error(parser, token);
3675
- do {
3676
- pop_current_node(parser);
3677
- } while(!(is_mathml_integration_point(get_current_node(parser)) ||
3678
- is_html_integration_point(get_current_node(parser)) ||
3679
- get_current_node(parser)->v.element.tag_namespace ==
3680
- GUMBO_NAMESPACE_HTML));
3681
- parser->_parser_state->_reprocess_current_token = true;
3682
- return false;
3683
- } else if (token->type == GUMBO_TOKEN_START_TAG) {
3864
+
3865
+ /*
3866
+ * Fragment case: If the parser was originally created for the HTML
3867
+ * fragment parsing algorithm, then act as described in the "any other
3868
+ * start tag" entry below.
3869
+ */
3870
+ if (!is_fragment_parser(parser)) {
3871
+ do {
3872
+ pop_current_node(parser);
3873
+ } while (!(is_mathml_integration_point(get_current_node(parser)) ||
3874
+ is_html_integration_point(get_current_node(parser)) ||
3875
+ get_current_node(parser)->v.element.tag_namespace ==
3876
+ GUMBO_NAMESPACE_HTML));
3877
+ parser->_parser_state->_reprocess_current_token = true;
3878
+ return false;
3879
+ }
3880
+
3881
+ assert(token->type == GUMBO_TOKEN_START_TAG);
3882
+ }
3883
+
3884
+ if (token->type == GUMBO_TOKEN_START_TAG) {
3684
3885
  const GumboNamespaceEnum current_namespace =
3685
- get_current_node(parser)->v.element.tag_namespace;
3886
+ get_adjusted_current_node(parser)->v.element.tag_namespace;
3686
3887
  if (current_namespace == GUMBO_NAMESPACE_MATHML) {
3687
3888
  adjust_mathml_attributes(parser, token);
3688
3889
  }
@@ -3698,8 +3899,8 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
3698
3899
  acknowledge_self_closing_tag(parser);
3699
3900
  }
3700
3901
  return true;
3701
- // </script> tags are handled like any other end tag, putting the script's
3702
- // text into a text node child and closing the current node.
3902
+ // </script> tags are handled like any other end tag, putting the script's
3903
+ // text into a text node child and closing the current node.
3703
3904
  } else {
3704
3905
  assert(token->type == GUMBO_TOKEN_END_TAG);
3705
3906
  GumboNode* node = get_current_node(parser);
@@ -3715,13 +3916,13 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
3715
3916
  is_success = false;
3716
3917
  }
3717
3918
  int i = parser->_parser_state->_open_elements.length;
3718
- for( --i; i > 0; ) {
3919
+ for (--i; i > 0;) {
3719
3920
  // Here we move up the stack until we find an HTML element (in which
3720
3921
  // case we do nothing) or we find the element that we're about to
3721
3922
  // close (in which case we pop everything we've seen until that
3722
3923
  // point.)
3723
3924
  gumbo_debug("Foreign %.*s node at %d.\n", node_tagname.length,
3724
- node_tagname.data, i);
3925
+ node_tagname.data, i);
3725
3926
  if (gumbo_string_equals_ignore_case(&node_tagname, &token_tagname)) {
3726
3927
  gumbo_debug("Matches.\n");
3727
3928
  while (pop_current_node(parser) != node) {
@@ -3749,7 +3950,6 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
3749
3950
  }
3750
3951
  }
3751
3952
 
3752
-
3753
3953
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#tree-construction
3754
3954
  static bool handle_token(GumboParser* parser, GumboToken* token) {
3755
3955
  if (parser->_parser_state->_ignore_next_linefeed &&
@@ -3771,29 +3971,31 @@ static bool handle_token(GumboParser* parser, GumboToken* token) {
3771
3971
  parser->_parser_state->_closed_html_tag = true;
3772
3972
  }
3773
3973
 
3774
- const GumboNode* current_node = get_current_node(parser);
3775
- assert(!current_node || current_node->type == GUMBO_NODE_ELEMENT);
3974
+ const GumboNode* current_node = get_adjusted_current_node(parser);
3975
+ assert(!current_node || current_node->type == GUMBO_NODE_ELEMENT ||
3976
+ current_node->type == GUMBO_NODE_TEMPLATE);
3776
3977
  if (current_node) {
3777
3978
  gumbo_debug("Current node: <%s>.\n",
3778
- gumbo_normalized_tagname(current_node->v.element.tag));
3979
+ gumbo_normalized_tagname(current_node->v.element.tag));
3779
3980
  }
3780
3981
  if (!current_node ||
3781
3982
  current_node->v.element.tag_namespace == GUMBO_NAMESPACE_HTML ||
3782
3983
  (is_mathml_integration_point(current_node) &&
3783
- (token->type == GUMBO_TOKEN_CHARACTER ||
3784
- token->type == GUMBO_TOKEN_WHITESPACE ||
3785
- token->type == GUMBO_TOKEN_NULL ||
3786
- (token->type == GUMBO_TOKEN_START_TAG &&
3787
- !tag_in(token, kStartTag, GUMBO_TAG_MGLYPH, GUMBO_TAG_MALIGNMARK,
3788
- GUMBO_TAG_LAST)))) ||
3984
+ (token->type == GUMBO_TOKEN_CHARACTER ||
3985
+ token->type == GUMBO_TOKEN_WHITESPACE ||
3986
+ token->type == GUMBO_TOKEN_NULL ||
3987
+ (token->type == GUMBO_TOKEN_START_TAG &&
3988
+ !tag_in(token, kStartTag,
3989
+ (gumbo_tagset){TAG(MGLYPH), TAG(MALIGNMARK)})))) ||
3789
3990
  (current_node->v.element.tag_namespace == GUMBO_NAMESPACE_MATHML &&
3790
- node_tag_is(current_node, GUMBO_TAG_ANNOTATION_XML) &&
3791
- tag_is(token, kStartTag, GUMBO_TAG_SVG)) ||
3792
- (is_html_integration_point(current_node) && (
3793
- token->type == GUMBO_TOKEN_START_TAG ||
3794
- token->type == GUMBO_TOKEN_CHARACTER ||
3795
- token->type == GUMBO_TOKEN_NULL ||
3796
- token->type == GUMBO_TOKEN_WHITESPACE)) ||
3991
+ node_qualified_tag_is(
3992
+ current_node, GUMBO_NAMESPACE_MATHML, GUMBO_TAG_ANNOTATION_XML) &&
3993
+ tag_is(token, kStartTag, GUMBO_TAG_SVG)) ||
3994
+ (is_html_integration_point(current_node) &&
3995
+ (token->type == GUMBO_TOKEN_START_TAG ||
3996
+ token->type == GUMBO_TOKEN_CHARACTER ||
3997
+ token->type == GUMBO_TOKEN_NULL ||
3998
+ token->type == GUMBO_TOKEN_WHITESPACE)) ||
3797
3999
  token->type == GUMBO_TOKEN_EOF) {
3798
4000
  return handle_html_content(parser, token);
3799
4001
  } else {
@@ -3801,6 +4003,66 @@ static bool handle_token(GumboParser* parser, GumboToken* token) {
3801
4003
  }
3802
4004
  }
3803
4005
 
4006
+ static void fragment_parser_init(GumboParser* parser, GumboTag fragment_ctx,
4007
+ GumboNamespaceEnum fragment_namespace) {
4008
+ GumboNode* root;
4009
+ assert(fragment_ctx != GUMBO_TAG_LAST);
4010
+
4011
+ // 3
4012
+ parser->_parser_state->_fragment_ctx = create_element(parser, fragment_ctx);
4013
+ parser->_parser_state->_fragment_ctx->v.element.tag_namespace =
4014
+ fragment_namespace;
4015
+
4016
+ // 4
4017
+ if (fragment_namespace == GUMBO_NAMESPACE_HTML) {
4018
+ // Non-HTML namespaces always start in the DATA state.
4019
+ switch (fragment_ctx) {
4020
+ case GUMBO_TAG_TITLE:
4021
+ case GUMBO_TAG_TEXTAREA:
4022
+ gumbo_tokenizer_set_state(parser, GUMBO_LEX_RCDATA);
4023
+ break;
4024
+
4025
+ case GUMBO_TAG_STYLE:
4026
+ case GUMBO_TAG_XMP:
4027
+ case GUMBO_TAG_IFRAME:
4028
+ case GUMBO_TAG_NOEMBED:
4029
+ case GUMBO_TAG_NOFRAMES:
4030
+ gumbo_tokenizer_set_state(parser, GUMBO_LEX_RAWTEXT);
4031
+ break;
4032
+
4033
+ case GUMBO_TAG_SCRIPT:
4034
+ gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT);
4035
+ break;
4036
+
4037
+ case GUMBO_TAG_NOSCRIPT:
4038
+ /* scripting is disabled in Gumbo, so leave the tokenizer
4039
+ * in the default data state */
4040
+ break;
4041
+
4042
+ case GUMBO_TAG_PLAINTEXT:
4043
+ gumbo_tokenizer_set_state(parser, GUMBO_LEX_PLAINTEXT);
4044
+ break;
4045
+
4046
+ default:
4047
+ /* default data state */
4048
+ break;
4049
+ }
4050
+ }
4051
+
4052
+ // 5. 6. 7.
4053
+ root = insert_element_of_tag_type(
4054
+ parser, GUMBO_TAG_HTML, GUMBO_INSERTION_IMPLIED);
4055
+ parser->_output->root = root;
4056
+
4057
+ // 8.
4058
+ if (fragment_ctx == GUMBO_TAG_TEMPLATE) {
4059
+ push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TEMPLATE);
4060
+ }
4061
+
4062
+ // 10.
4063
+ reset_insertion_mode_appropriately(parser);
4064
+ }
4065
+
3804
4066
  GumboOutput* gumbo_parse(const char* buffer) {
3805
4067
  return gumbo_parse_with_options(
3806
4068
  &kGumboDefaultOptions, buffer, strlen(buffer));
@@ -3814,6 +4076,11 @@ GumboOutput* gumbo_parse_with_options(
3814
4076
  gumbo_tokenizer_state_init(&parser, buffer, length);
3815
4077
  parser_state_init(&parser);
3816
4078
 
4079
+ if (options->fragment_context != GUMBO_TAG_LAST) {
4080
+ fragment_parser_init(
4081
+ &parser, options->fragment_context, options->fragment_namespace);
4082
+ }
4083
+
3817
4084
  GumboParserState* state = parser._parser_state;
3818
4085
  gumbo_debug("Parsing %.*s.\n", length, buffer);
3819
4086
 
@@ -3823,14 +4090,15 @@ GumboOutput* gumbo_parse_with_options(
3823
4090
 
3824
4091
  GumboToken token;
3825
4092
  bool has_error = false;
4093
+
3826
4094
  do {
3827
4095
  if (state->_reprocess_current_token) {
3828
4096
  state->_reprocess_current_token = false;
3829
4097
  } else {
3830
4098
  GumboNode* current_node = get_current_node(&parser);
3831
- gumbo_tokenizer_set_is_current_node_foreign(
3832
- &parser, current_node &&
3833
- current_node->v.element.tag_namespace != GUMBO_NAMESPACE_HTML);
4099
+ gumbo_tokenizer_set_is_current_node_foreign(&parser,
4100
+ current_node &&
4101
+ current_node->v.element.tag_namespace != GUMBO_NAMESPACE_HTML);
3834
4102
  has_error = !gumbo_lex(&parser, &token) || has_error;
3835
4103
  }
3836
4104
  const char* token_type = "text";
@@ -3850,14 +4118,13 @@ GumboOutput* gumbo_parse_with_options(
3850
4118
  default:
3851
4119
  break;
3852
4120
  }
3853
- gumbo_debug("Handling %s token @%d:%d in state %d.\n",
3854
- (char*) token_type, token.position.line, token.position.column,
3855
- state->_insertion_mode);
4121
+ gumbo_debug("Handling %s token @%d:%d in state %d.\n", (char*) token_type,
4122
+ token.position.line, token.position.column, state->_insertion_mode);
3856
4123
 
3857
4124
  state->_current_token = &token;
3858
4125
  state->_self_closing_flag_acknowledged =
3859
4126
  !(token.type == GUMBO_TOKEN_START_TAG &&
3860
- token.v.start_tag.is_self_closing);
4127
+ token.v.start_tag.is_self_closing);
3861
4128
 
3862
4129
  has_error = !handle_token(&parser, &token) || has_error;
3863
4130
 
@@ -3913,7 +4180,7 @@ void gumbo_destroy_output(const GumboOptions* options, GumboOutput* output) {
3913
4180
  GumboParser parser;
3914
4181
  parser._options = options;
3915
4182
  destroy_node(&parser, output->document);
3916
- for (int i = 0; i < output->errors.length; ++i) {
4183
+ for (unsigned int i = 0; i < output->errors.length; ++i) {
3917
4184
  gumbo_error_destroy(&parser, output->errors.data[i]);
3918
4185
  }
3919
4186
  gumbo_vector_destroy(&parser, &output->errors);