nokogumbo 1.3.0 → 1.5.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -32,48 +32,55 @@
32
32
  #include "util.h"
33
33
  #include "vector.h"
34
34
 
35
-
36
35
  #define AVOID_UNUSED_VARIABLE_WARNING(i) (void)(i)
37
36
 
38
- #define GUMBO_STRING(literal) { literal, sizeof(literal) - 1 }
39
- #define TERMINATOR { "", 0 }
37
+ #define GUMBO_STRING(literal) \
38
+ { literal, sizeof(literal) - 1 }
39
+ #define TERMINATOR \
40
+ { "", 0 }
40
41
 
41
- static void* malloc_wrapper(void* unused, size_t size) {
42
- return malloc(size);
43
- }
42
+ typedef char gumbo_tagset[GUMBO_TAG_LAST];
43
+ #define TAG(tag) [GUMBO_TAG_##tag] = (1 << GUMBO_NAMESPACE_HTML)
44
+ #define TAG_SVG(tag) [GUMBO_TAG_##tag] = (1 << GUMBO_NAMESPACE_SVG)
45
+ #define TAG_MATHML(tag) [GUMBO_TAG_##tag] = (1 << GUMBO_NAMESPACE_MATHML)
44
46
 
45
- static void free_wrapper(void* unused, void* ptr) {
46
- free(ptr);
47
- }
47
+ #define TAGSET_INCLUDES(tagset, namespace, tag) \
48
+ (tag < GUMBO_TAG_LAST && tagset[(int) tag] == (1 << (int) namespace))
48
49
 
49
- const GumboOptions kGumboDefaultOptions = {
50
- &malloc_wrapper,
51
- &free_wrapper,
52
- NULL,
53
- 8,
54
- false,
55
- -1,
56
- };
50
+ // selected forward declarations as it is getting hard to find
51
+ // an appropriate order
52
+ static bool node_html_tag_is(const GumboNode*, GumboTag);
53
+ static GumboInsertionMode get_current_template_insertion_mode(
54
+ const GumboParser*);
55
+ static bool handle_in_template(GumboParser*, GumboToken*);
56
+ static void destroy_node(GumboParser*, GumboNode*);
57
+
58
+ static void* malloc_wrapper(void* unused, size_t size) { return malloc(size); }
59
+
60
+ static void free_wrapper(void* unused, void* ptr) { free(ptr); }
61
+
62
+ const GumboOptions kGumboDefaultOptions = {&malloc_wrapper, &free_wrapper, NULL,
63
+ 8, false, -1, GUMBO_TAG_LAST, GUMBO_NAMESPACE_HTML};
57
64
 
58
65
  static const GumboStringPiece kDoctypeHtml = GUMBO_STRING("html");
59
- static const GumboStringPiece kPublicIdHtml4_0 = GUMBO_STRING(
60
- "-//W3C//DTD HTML 4.0//EN");
61
- static const GumboStringPiece kPublicIdHtml4_01 = GUMBO_STRING(
62
- "-//W3C//DTD HTML 4.01//EN");
63
- static const GumboStringPiece kPublicIdXhtml1_0 = GUMBO_STRING(
64
- "-//W3C//DTD XHTML 1.0 Strict//EN");
65
- static const GumboStringPiece kPublicIdXhtml1_1 = GUMBO_STRING(
66
- "-//W3C//DTD XHTML 1.1//EN");
67
- static const GumboStringPiece kSystemIdRecHtml4_0 = GUMBO_STRING(
68
- "http://www.w3.org/TR/REC-html40/strict.dtd");
69
- static const GumboStringPiece kSystemIdHtml4 = GUMBO_STRING(
70
- "http://www.w3.org/TR/html4/strict.dtd");
71
- static const GumboStringPiece kSystemIdXhtmlStrict1_1 = GUMBO_STRING(
72
- "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd");
73
- static const GumboStringPiece kSystemIdXhtml1_1 = GUMBO_STRING(
74
- "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd");
75
- static const GumboStringPiece kSystemIdLegacyCompat = GUMBO_STRING(
76
- "about:legacy-compat");
66
+ static const GumboStringPiece kPublicIdHtml4_0 =
67
+ GUMBO_STRING("-//W3C//DTD HTML 4.0//EN");
68
+ static const GumboStringPiece kPublicIdHtml4_01 =
69
+ GUMBO_STRING("-//W3C//DTD HTML 4.01//EN");
70
+ static const GumboStringPiece kPublicIdXhtml1_0 =
71
+ GUMBO_STRING("-//W3C//DTD XHTML 1.0 Strict//EN");
72
+ static const GumboStringPiece kPublicIdXhtml1_1 =
73
+ GUMBO_STRING("-//W3C//DTD XHTML 1.1//EN");
74
+ static const GumboStringPiece kSystemIdRecHtml4_0 =
75
+ GUMBO_STRING("http://www.w3.org/TR/REC-html40/strict.dtd");
76
+ static const GumboStringPiece kSystemIdHtml4 =
77
+ GUMBO_STRING("http://www.w3.org/TR/html4/strict.dtd");
78
+ static const GumboStringPiece kSystemIdXhtmlStrict1_1 =
79
+ GUMBO_STRING("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd");
80
+ static const GumboStringPiece kSystemIdXhtml1_1 =
81
+ GUMBO_STRING("http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd");
82
+ static const GumboStringPiece kSystemIdLegacyCompat =
83
+ GUMBO_STRING("about:legacy-compat");
77
84
 
78
85
  // The doctype arrays have an explicit terminator because we want to pass them
79
86
  // to a helper function, and passing them as a pointer discards sizeof
@@ -81,96 +88,86 @@ static const GumboStringPiece kSystemIdLegacyCompat = GUMBO_STRING(
81
88
  // over them use sizeof directly instead of a terminator.
82
89
 
83
90
  static const GumboStringPiece kQuirksModePublicIdPrefixes[] = {
84
- GUMBO_STRING("+//Silmaril//dtd html Pro v0r11 19970101//"),
85
- GUMBO_STRING("-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//"),
86
- GUMBO_STRING("-//AS//DTD HTML 3.0 asWedit + extensions//"),
87
- GUMBO_STRING("-//IETF//DTD HTML 2.0 Level 1//"),
88
- GUMBO_STRING("-//IETF//DTD HTML 2.0 Level 2//"),
89
- GUMBO_STRING("-//IETF//DTD HTML 2.0 Strict Level 1//"),
90
- GUMBO_STRING("-//IETF//DTD HTML 2.0 Strict Level 2//"),
91
- GUMBO_STRING("-//IETF//DTD HTML 2.0 Strict//"),
92
- GUMBO_STRING("-//IETF//DTD HTML 2.0//"),
93
- GUMBO_STRING("-//IETF//DTD HTML 2.1E//"),
94
- GUMBO_STRING("-//IETF//DTD HTML 3.0//"),
95
- GUMBO_STRING("-//IETF//DTD HTML 3.2 Final//"),
96
- GUMBO_STRING("-//IETF//DTD HTML 3.2//"),
97
- GUMBO_STRING("-//IETF//DTD HTML 3//"),
98
- GUMBO_STRING("-//IETF//DTD HTML Level 0//"),
99
- GUMBO_STRING("-//IETF//DTD HTML Level 1//"),
100
- GUMBO_STRING("-//IETF//DTD HTML Level 2//"),
101
- GUMBO_STRING("-//IETF//DTD HTML Level 3//"),
102
- GUMBO_STRING("-//IETF//DTD HTML Strict Level 0//"),
103
- GUMBO_STRING("-//IETF//DTD HTML Strict Level 1//"),
104
- GUMBO_STRING("-//IETF//DTD HTML Strict Level 2//"),
105
- GUMBO_STRING("-//IETF//DTD HTML Strict Level 3//"),
106
- GUMBO_STRING("-//IETF//DTD HTML Strict//"),
107
- GUMBO_STRING("-//IETF//DTD HTML//"),
108
- GUMBO_STRING("-//Metrius//DTD Metrius Presentational//"),
109
- GUMBO_STRING("-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//"),
110
- GUMBO_STRING("-//Microsoft//DTD Internet Explorer 2.0 HTML//"),
111
- GUMBO_STRING("-//Microsoft//DTD Internet Explorer 2.0 Tables//"),
112
- GUMBO_STRING("-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//"),
113
- GUMBO_STRING("-//Microsoft//DTD Internet Explorer 3.0 HTML//"),
114
- GUMBO_STRING("-//Microsoft//DTD Internet Explorer 3.0 Tables//"),
115
- GUMBO_STRING("-//Netscape Comm. Corp.//DTD HTML//"),
116
- GUMBO_STRING("-//Netscape Comm. Corp.//DTD Strict HTML//"),
117
- GUMBO_STRING("-//O'Reilly and Associates//DTD HTML 2.0//"),
118
- GUMBO_STRING("-//O'Reilly and Associates//DTD HTML Extended 1.0//"),
119
- GUMBO_STRING("-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//"),
120
- GUMBO_STRING("-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::)"
121
- "extensions to HTML 4.0//"),
122
- GUMBO_STRING("-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::"
123
- "extensions to HTML 4.0//"),
124
- GUMBO_STRING("-//Spyglass//DTD HTML 2.0 Extended//"),
125
- GUMBO_STRING("-//SQ//DTD HTML 2.0 HoTMetaL + extensions//"),
126
- GUMBO_STRING("-//Sun Microsystems Corp.//DTD HotJava HTML//"),
127
- GUMBO_STRING("-//Sun Microsystems Corp.//DTD HotJava Strict HTML//"),
128
- GUMBO_STRING("-//W3C//DTD HTML 3 1995-03-24//"),
129
- GUMBO_STRING("-//W3C//DTD HTML 3.2 Draft//"),
130
- GUMBO_STRING("-//W3C//DTD HTML 3.2 Final//"),
131
- GUMBO_STRING("-//W3C//DTD HTML 3.2//"),
132
- GUMBO_STRING("-//W3C//DTD HTML 3.2S Draft//"),
133
- GUMBO_STRING("-//W3C//DTD HTML 4.0 Frameset//"),
134
- GUMBO_STRING("-//W3C//DTD HTML 4.0 Transitional//"),
135
- GUMBO_STRING("-//W3C//DTD HTML Experimental 19960712//"),
136
- GUMBO_STRING("-//W3C//DTD HTML Experimental 970421//"),
137
- GUMBO_STRING("-//W3C//DTD W3 HTML//"),
138
- GUMBO_STRING("-//W3O//DTD W3 HTML 3.0//"),
139
- GUMBO_STRING("-//WebTechs//DTD Mozilla HTML 2.0//"),
140
- GUMBO_STRING("-//WebTechs//DTD Mozilla HTML//"),
141
- TERMINATOR
142
- };
91
+ GUMBO_STRING("+//Silmaril//dtd html Pro v0r11 19970101//"),
92
+ GUMBO_STRING("-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//"),
93
+ GUMBO_STRING("-//AS//DTD HTML 3.0 asWedit + extensions//"),
94
+ GUMBO_STRING("-//IETF//DTD HTML 2.0 Level 1//"),
95
+ GUMBO_STRING("-//IETF//DTD HTML 2.0 Level 2//"),
96
+ GUMBO_STRING("-//IETF//DTD HTML 2.0 Strict Level 1//"),
97
+ GUMBO_STRING("-//IETF//DTD HTML 2.0 Strict Level 2//"),
98
+ GUMBO_STRING("-//IETF//DTD HTML 2.0 Strict//"),
99
+ GUMBO_STRING("-//IETF//DTD HTML 2.0//"),
100
+ GUMBO_STRING("-//IETF//DTD HTML 2.1E//"),
101
+ GUMBO_STRING("-//IETF//DTD HTML 3.0//"),
102
+ GUMBO_STRING("-//IETF//DTD HTML 3.2 Final//"),
103
+ GUMBO_STRING("-//IETF//DTD HTML 3.2//"),
104
+ GUMBO_STRING("-//IETF//DTD HTML 3//"),
105
+ GUMBO_STRING("-//IETF//DTD HTML Level 0//"),
106
+ GUMBO_STRING("-//IETF//DTD HTML Level 1//"),
107
+ GUMBO_STRING("-//IETF//DTD HTML Level 2//"),
108
+ GUMBO_STRING("-//IETF//DTD HTML Level 3//"),
109
+ GUMBO_STRING("-//IETF//DTD HTML Strict Level 0//"),
110
+ GUMBO_STRING("-//IETF//DTD HTML Strict Level 1//"),
111
+ GUMBO_STRING("-//IETF//DTD HTML Strict Level 2//"),
112
+ GUMBO_STRING("-//IETF//DTD HTML Strict Level 3//"),
113
+ GUMBO_STRING("-//IETF//DTD HTML Strict//"),
114
+ GUMBO_STRING("-//IETF//DTD HTML//"),
115
+ GUMBO_STRING("-//Metrius//DTD Metrius Presentational//"),
116
+ GUMBO_STRING("-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//"),
117
+ GUMBO_STRING("-//Microsoft//DTD Internet Explorer 2.0 HTML//"),
118
+ GUMBO_STRING("-//Microsoft//DTD Internet Explorer 2.0 Tables//"),
119
+ GUMBO_STRING("-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//"),
120
+ GUMBO_STRING("-//Microsoft//DTD Internet Explorer 3.0 HTML//"),
121
+ GUMBO_STRING("-//Microsoft//DTD Internet Explorer 3.0 Tables//"),
122
+ GUMBO_STRING("-//Netscape Comm. Corp.//DTD HTML//"),
123
+ GUMBO_STRING("-//Netscape Comm. Corp.//DTD Strict HTML//"),
124
+ GUMBO_STRING("-//O'Reilly and Associates//DTD HTML 2.0//"),
125
+ GUMBO_STRING("-//O'Reilly and Associates//DTD HTML Extended 1.0//"),
126
+ GUMBO_STRING("-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//"),
127
+ GUMBO_STRING(
128
+ "-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::)"
129
+ "extensions to HTML 4.0//"),
130
+ GUMBO_STRING(
131
+ "-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::"
132
+ "extensions to HTML 4.0//"),
133
+ GUMBO_STRING("-//Spyglass//DTD HTML 2.0 Extended//"),
134
+ GUMBO_STRING("-//SQ//DTD HTML 2.0 HoTMetaL + extensions//"),
135
+ GUMBO_STRING("-//Sun Microsystems Corp.//DTD HotJava HTML//"),
136
+ GUMBO_STRING("-//Sun Microsystems Corp.//DTD HotJava Strict HTML//"),
137
+ GUMBO_STRING("-//W3C//DTD HTML 3 1995-03-24//"),
138
+ GUMBO_STRING("-//W3C//DTD HTML 3.2 Draft//"),
139
+ GUMBO_STRING("-//W3C//DTD HTML 3.2 Final//"),
140
+ GUMBO_STRING("-//W3C//DTD HTML 3.2//"),
141
+ GUMBO_STRING("-//W3C//DTD HTML 3.2S Draft//"),
142
+ GUMBO_STRING("-//W3C//DTD HTML 4.0 Frameset//"),
143
+ GUMBO_STRING("-//W3C//DTD HTML 4.0 Transitional//"),
144
+ GUMBO_STRING("-//W3C//DTD HTML Experimental 19960712//"),
145
+ GUMBO_STRING("-//W3C//DTD HTML Experimental 970421//"),
146
+ GUMBO_STRING("-//W3C//DTD W3 HTML//"),
147
+ GUMBO_STRING("-//W3O//DTD W3 HTML 3.0//"),
148
+ GUMBO_STRING("-//WebTechs//DTD Mozilla HTML 2.0//"),
149
+ GUMBO_STRING("-//WebTechs//DTD Mozilla HTML//"), TERMINATOR};
143
150
 
144
151
  static const GumboStringPiece kQuirksModePublicIdExactMatches[] = {
145
- GUMBO_STRING("-//W3O//DTD W3 HTML Strict 3.0//EN//"),
146
- GUMBO_STRING("-/W3C/DTD HTML 4.0 Transitional/EN"),
147
- GUMBO_STRING("HTML"),
148
- TERMINATOR
149
- };
152
+ GUMBO_STRING("-//W3O//DTD W3 HTML Strict 3.0//EN//"),
153
+ GUMBO_STRING("-/W3C/DTD HTML 4.0 Transitional/EN"), GUMBO_STRING("HTML"),
154
+ TERMINATOR};
150
155
 
151
156
  static const GumboStringPiece kQuirksModeSystemIdExactMatches[] = {
152
- GUMBO_STRING("http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"),
153
- TERMINATOR
154
- };
157
+ GUMBO_STRING("http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"),
158
+ TERMINATOR};
155
159
 
156
160
  static const GumboStringPiece kLimitedQuirksPublicIdPrefixes[] = {
157
- GUMBO_STRING("-//W3C//DTD XHTML 1.0 Frameset//"),
158
- GUMBO_STRING("-//W3C//DTD XHTML 1.0 Transitional//"),
159
- TERMINATOR
160
- };
161
+ GUMBO_STRING("-//W3C//DTD XHTML 1.0 Frameset//"),
162
+ GUMBO_STRING("-//W3C//DTD XHTML 1.0 Transitional//"), TERMINATOR};
161
163
 
162
- static const GumboStringPiece kLimitedQuirksRequiresSystemIdPublicIdPrefixes[] = {
163
- GUMBO_STRING("-//W3C//DTD HTML 4.01 Frameset//"),
164
- GUMBO_STRING("-//W3C//DTD HTML 4.01 Transitional//"),
165
- TERMINATOR
166
- };
164
+ static const GumboStringPiece kLimitedQuirksRequiresSystemIdPublicIdPrefixes[] =
165
+ {GUMBO_STRING("-//W3C//DTD HTML 4.01 Frameset//"),
166
+ GUMBO_STRING("-//W3C//DTD HTML 4.01 Transitional//"), TERMINATOR};
167
167
 
168
168
  // Indexed by GumboNamespaceEnum; keep in sync with that.
169
- static const char* kLegalXmlns[] = {
170
- "http://www.w3.org/1999/xhtml",
171
- "http://www.w3.org/2000/svg",
172
- "http://www.w3.org/1998/Math/MathML"
173
- };
169
+ static const char* kLegalXmlns[] = {"http://www.w3.org/1999/xhtml",
170
+ "http://www.w3.org/2000/svg", "http://www.w3.org/1998/Math/MathML"};
174
171
 
175
172
  typedef struct _ReplacementEntry {
176
173
  const GumboStringPiece from;
@@ -178,112 +175,112 @@ typedef struct _ReplacementEntry {
178
175
  } ReplacementEntry;
179
176
 
180
177
  #define REPLACEMENT_ENTRY(from, to) \
181
- { GUMBO_STRING(from), GUMBO_STRING(to) }
178
+ { GUMBO_STRING(from), GUMBO_STRING(to) }
182
179
 
183
180
  // Static data for SVG attribute replacements.
184
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#adjust-svg-attributes
181
+ // https://html.spec.whatwg.org/multipage/syntax.html#creating-and-inserting-nodes
185
182
  static const ReplacementEntry kSvgAttributeReplacements[] = {
186
- REPLACEMENT_ENTRY("attributename", "attributeName"),
187
- REPLACEMENT_ENTRY("attributetype", "attributeType"),
188
- REPLACEMENT_ENTRY("basefrequency", "baseFrequency"),
189
- REPLACEMENT_ENTRY("baseprofile", "baseProfile"),
190
- REPLACEMENT_ENTRY("calcmode", "calcMode"),
191
- REPLACEMENT_ENTRY("clippathunits", "clipPathUnits"),
192
- REPLACEMENT_ENTRY("contentscripttype", "contentScriptType"),
193
- REPLACEMENT_ENTRY("contentstyletype", "contentStyleType"),
194
- REPLACEMENT_ENTRY("diffuseconstant", "diffuseConstant"),
195
- REPLACEMENT_ENTRY("edgemode", "edgeMode"),
196
- REPLACEMENT_ENTRY("externalresourcesrequired", "externalResourcesRequired"),
197
- REPLACEMENT_ENTRY("filterres", "filterRes"),
198
- REPLACEMENT_ENTRY("filterunits", "filterUnits"),
199
- REPLACEMENT_ENTRY("glyphref", "glyphRef"),
200
- REPLACEMENT_ENTRY("gradienttransform", "gradientTransform"),
201
- REPLACEMENT_ENTRY("gradientunits", "gradientUnits"),
202
- REPLACEMENT_ENTRY("kernelmatrix", "kernelMatrix"),
203
- REPLACEMENT_ENTRY("kernelunitlength", "kernelUnitLength"),
204
- REPLACEMENT_ENTRY("keypoints", "keyPoints"),
205
- REPLACEMENT_ENTRY("keysplines", "keySplines"),
206
- REPLACEMENT_ENTRY("keytimes", "keyTimes"),
207
- REPLACEMENT_ENTRY("lengthadjust", "lengthAdjust"),
208
- REPLACEMENT_ENTRY("limitingconeangle", "limitingConeAngle"),
209
- REPLACEMENT_ENTRY("markerheight", "markerHeight"),
210
- REPLACEMENT_ENTRY("markerunits", "markerUnits"),
211
- REPLACEMENT_ENTRY("markerwidth", "markerWidth"),
212
- REPLACEMENT_ENTRY("maskcontentunits", "maskContentUnits"),
213
- REPLACEMENT_ENTRY("maskunits", "maskUnits"),
214
- REPLACEMENT_ENTRY("numoctaves", "numOctaves"),
215
- REPLACEMENT_ENTRY("pathlength", "pathLength"),
216
- REPLACEMENT_ENTRY("patterncontentunits", "patternContentUnits"),
217
- REPLACEMENT_ENTRY("patterntransform", "patternTransform"),
218
- REPLACEMENT_ENTRY("patternunits", "patternUnits"),
219
- REPLACEMENT_ENTRY("pointsatx", "pointsAtX"),
220
- REPLACEMENT_ENTRY("pointsaty", "pointsAtY"),
221
- REPLACEMENT_ENTRY("pointsatz", "pointsAtZ"),
222
- REPLACEMENT_ENTRY("preservealpha", "preserveAlpha"),
223
- REPLACEMENT_ENTRY("preserveaspectratio", "preserveAspectRatio"),
224
- REPLACEMENT_ENTRY("primitiveunits", "primitiveUnits"),
225
- REPLACEMENT_ENTRY("refx", "refX"),
226
- REPLACEMENT_ENTRY("refy", "refY"),
227
- REPLACEMENT_ENTRY("repeatcount", "repeatCount"),
228
- REPLACEMENT_ENTRY("repeatdur", "repeatDur"),
229
- REPLACEMENT_ENTRY("requiredextensions", "requiredExtensions"),
230
- REPLACEMENT_ENTRY("requiredfeatures", "requiredFeatures"),
231
- REPLACEMENT_ENTRY("specularconstant", "specularConstant"),
232
- REPLACEMENT_ENTRY("specularexponent", "specularExponent"),
233
- REPLACEMENT_ENTRY("spreadmethod", "spreadMethod"),
234
- REPLACEMENT_ENTRY("startoffset", "startOffset"),
235
- REPLACEMENT_ENTRY("stddeviation", "stdDeviation"),
236
- REPLACEMENT_ENTRY("stitchtiles", "stitchTiles"),
237
- REPLACEMENT_ENTRY("surfacescale", "surfaceScale"),
238
- REPLACEMENT_ENTRY("systemlanguage", "systemLanguage"),
239
- REPLACEMENT_ENTRY("tablevalues", "tableValues"),
240
- REPLACEMENT_ENTRY("targetx", "targetX"),
241
- REPLACEMENT_ENTRY("targety", "targetY"),
242
- REPLACEMENT_ENTRY("textlength", "textLength"),
243
- REPLACEMENT_ENTRY("viewbox", "viewBox"),
244
- REPLACEMENT_ENTRY("viewtarget", "viewTarget"),
245
- REPLACEMENT_ENTRY("xchannelselector", "xChannelSelector"),
246
- REPLACEMENT_ENTRY("ychannelselector", "yChannelSelector"),
247
- REPLACEMENT_ENTRY("zoomandpan", "zoomAndPan"),
183
+ REPLACEMENT_ENTRY("attributename", "attributeName"),
184
+ REPLACEMENT_ENTRY("attributetype", "attributeType"),
185
+ REPLACEMENT_ENTRY("basefrequency", "baseFrequency"),
186
+ REPLACEMENT_ENTRY("baseprofile", "baseProfile"),
187
+ REPLACEMENT_ENTRY("calcmode", "calcMode"),
188
+ REPLACEMENT_ENTRY("clippathunits", "clipPathUnits"),
189
+ // REPLACEMENT_ENTRY("contentscripttype", "contentScriptType"),
190
+ // REPLACEMENT_ENTRY("contentstyletype", "contentStyleType"),
191
+ REPLACEMENT_ENTRY("diffuseconstant", "diffuseConstant"),
192
+ REPLACEMENT_ENTRY("edgemode", "edgeMode"),
193
+ // REPLACEMENT_ENTRY("externalresourcesrequired",
194
+ // "externalResourcesRequired"),
195
+ // REPLACEMENT_ENTRY("filterres", "filterRes"),
196
+ REPLACEMENT_ENTRY("filterunits", "filterUnits"),
197
+ REPLACEMENT_ENTRY("glyphref", "glyphRef"),
198
+ REPLACEMENT_ENTRY("gradienttransform", "gradientTransform"),
199
+ REPLACEMENT_ENTRY("gradientunits", "gradientUnits"),
200
+ REPLACEMENT_ENTRY("kernelmatrix", "kernelMatrix"),
201
+ REPLACEMENT_ENTRY("kernelunitlength", "kernelUnitLength"),
202
+ REPLACEMENT_ENTRY("keypoints", "keyPoints"),
203
+ REPLACEMENT_ENTRY("keysplines", "keySplines"),
204
+ REPLACEMENT_ENTRY("keytimes", "keyTimes"),
205
+ REPLACEMENT_ENTRY("lengthadjust", "lengthAdjust"),
206
+ REPLACEMENT_ENTRY("limitingconeangle", "limitingConeAngle"),
207
+ REPLACEMENT_ENTRY("markerheight", "markerHeight"),
208
+ REPLACEMENT_ENTRY("markerunits", "markerUnits"),
209
+ REPLACEMENT_ENTRY("markerwidth", "markerWidth"),
210
+ REPLACEMENT_ENTRY("maskcontentunits", "maskContentUnits"),
211
+ REPLACEMENT_ENTRY("maskunits", "maskUnits"),
212
+ REPLACEMENT_ENTRY("numoctaves", "numOctaves"),
213
+ REPLACEMENT_ENTRY("pathlength", "pathLength"),
214
+ REPLACEMENT_ENTRY("patterncontentunits", "patternContentUnits"),
215
+ REPLACEMENT_ENTRY("patterntransform", "patternTransform"),
216
+ REPLACEMENT_ENTRY("patternunits", "patternUnits"),
217
+ REPLACEMENT_ENTRY("pointsatx", "pointsAtX"),
218
+ REPLACEMENT_ENTRY("pointsaty", "pointsAtY"),
219
+ REPLACEMENT_ENTRY("pointsatz", "pointsAtZ"),
220
+ REPLACEMENT_ENTRY("preservealpha", "preserveAlpha"),
221
+ REPLACEMENT_ENTRY("preserveaspectratio", "preserveAspectRatio"),
222
+ REPLACEMENT_ENTRY("primitiveunits", "primitiveUnits"),
223
+ REPLACEMENT_ENTRY("refx", "refX"), REPLACEMENT_ENTRY("refy", "refY"),
224
+ REPLACEMENT_ENTRY("repeatcount", "repeatCount"),
225
+ REPLACEMENT_ENTRY("repeatdur", "repeatDur"),
226
+ REPLACEMENT_ENTRY("requiredextensions", "requiredExtensions"),
227
+ REPLACEMENT_ENTRY("requiredfeatures", "requiredFeatures"),
228
+ REPLACEMENT_ENTRY("specularconstant", "specularConstant"),
229
+ REPLACEMENT_ENTRY("specularexponent", "specularExponent"),
230
+ REPLACEMENT_ENTRY("spreadmethod", "spreadMethod"),
231
+ REPLACEMENT_ENTRY("startoffset", "startOffset"),
232
+ REPLACEMENT_ENTRY("stddeviation", "stdDeviation"),
233
+ REPLACEMENT_ENTRY("stitchtiles", "stitchTiles"),
234
+ REPLACEMENT_ENTRY("surfacescale", "surfaceScale"),
235
+ REPLACEMENT_ENTRY("systemlanguage", "systemLanguage"),
236
+ REPLACEMENT_ENTRY("tablevalues", "tableValues"),
237
+ REPLACEMENT_ENTRY("targetx", "targetX"),
238
+ REPLACEMENT_ENTRY("targety", "targetY"),
239
+ REPLACEMENT_ENTRY("textlength", "textLength"),
240
+ REPLACEMENT_ENTRY("viewbox", "viewBox"),
241
+ REPLACEMENT_ENTRY("viewtarget", "viewTarget"),
242
+ REPLACEMENT_ENTRY("xchannelselector", "xChannelSelector"),
243
+ REPLACEMENT_ENTRY("ychannelselector", "yChannelSelector"),
244
+ REPLACEMENT_ENTRY("zoomandpan", "zoomAndPan"),
248
245
  };
249
246
 
250
247
  static const ReplacementEntry kSvgTagReplacements[] = {
251
- REPLACEMENT_ENTRY("altglyph", "altGlyph"),
252
- REPLACEMENT_ENTRY("altglyphdef", "altGlyphDef"),
253
- REPLACEMENT_ENTRY("altglyphitem", "altGlyphItem"),
254
- REPLACEMENT_ENTRY("animatecolor", "animateColor"),
255
- REPLACEMENT_ENTRY("animatemotion", "animateMotion"),
256
- REPLACEMENT_ENTRY("animatetransform", "animateTransform"),
257
- REPLACEMENT_ENTRY("clippath", "clipPath"),
258
- REPLACEMENT_ENTRY("feblend", "feBlend"),
259
- REPLACEMENT_ENTRY("fecolormatrix", "feColorMatrix"),
260
- REPLACEMENT_ENTRY("fecomponenttransfer", "feComponentTransfer"),
261
- REPLACEMENT_ENTRY("fecomposite", "feComposite"),
262
- REPLACEMENT_ENTRY("feconvolvematrix", "feConvolveMatrix"),
263
- REPLACEMENT_ENTRY("fediffuselighting", "feDiffuseLighting"),
264
- REPLACEMENT_ENTRY("fedisplacementmap", "feDisplacementMap"),
265
- REPLACEMENT_ENTRY("fedistantlight", "feDistantLight"),
266
- REPLACEMENT_ENTRY("feflood", "feFlood"),
267
- REPLACEMENT_ENTRY("fefunca", "feFuncA"),
268
- REPLACEMENT_ENTRY("fefuncb", "feFuncB"),
269
- REPLACEMENT_ENTRY("fefuncg", "feFuncG"),
270
- REPLACEMENT_ENTRY("fefuncr", "feFuncR"),
271
- REPLACEMENT_ENTRY("fegaussianblur", "feGaussianBlur"),
272
- REPLACEMENT_ENTRY("feimage", "feImage"),
273
- REPLACEMENT_ENTRY("femerge", "feMerge"),
274
- REPLACEMENT_ENTRY("femergenode", "feMergeNode"),
275
- REPLACEMENT_ENTRY("femorphology", "feMorphology"),
276
- REPLACEMENT_ENTRY("feoffset", "feOffset"),
277
- REPLACEMENT_ENTRY("fepointlight", "fePointLight"),
278
- REPLACEMENT_ENTRY("fespecularlighting", "feSpecularLighting"),
279
- REPLACEMENT_ENTRY("fespotlight", "feSpotLight"),
280
- REPLACEMENT_ENTRY("fetile", "feTile"),
281
- REPLACEMENT_ENTRY("feturbulence", "feTurbulence"),
282
- REPLACEMENT_ENTRY("foreignobject", "foreignObject"),
283
- REPLACEMENT_ENTRY("glyphref", "glyphRef"),
284
- REPLACEMENT_ENTRY("lineargradient", "linearGradient"),
285
- REPLACEMENT_ENTRY("radialgradient", "radialGradient"),
286
- REPLACEMENT_ENTRY("textpath", "textPath"),
248
+ REPLACEMENT_ENTRY("altglyph", "altGlyph"),
249
+ REPLACEMENT_ENTRY("altglyphdef", "altGlyphDef"),
250
+ REPLACEMENT_ENTRY("altglyphitem", "altGlyphItem"),
251
+ REPLACEMENT_ENTRY("animatecolor", "animateColor"),
252
+ REPLACEMENT_ENTRY("animatemotion", "animateMotion"),
253
+ REPLACEMENT_ENTRY("animatetransform", "animateTransform"),
254
+ REPLACEMENT_ENTRY("clippath", "clipPath"),
255
+ REPLACEMENT_ENTRY("feblend", "feBlend"),
256
+ REPLACEMENT_ENTRY("fecolormatrix", "feColorMatrix"),
257
+ REPLACEMENT_ENTRY("fecomponenttransfer", "feComponentTransfer"),
258
+ REPLACEMENT_ENTRY("fecomposite", "feComposite"),
259
+ REPLACEMENT_ENTRY("feconvolvematrix", "feConvolveMatrix"),
260
+ REPLACEMENT_ENTRY("fediffuselighting", "feDiffuseLighting"),
261
+ REPLACEMENT_ENTRY("fedisplacementmap", "feDisplacementMap"),
262
+ REPLACEMENT_ENTRY("fedistantlight", "feDistantLight"),
263
+ REPLACEMENT_ENTRY("feflood", "feFlood"),
264
+ REPLACEMENT_ENTRY("fefunca", "feFuncA"),
265
+ REPLACEMENT_ENTRY("fefuncb", "feFuncB"),
266
+ REPLACEMENT_ENTRY("fefuncg", "feFuncG"),
267
+ REPLACEMENT_ENTRY("fefuncr", "feFuncR"),
268
+ REPLACEMENT_ENTRY("fegaussianblur", "feGaussianBlur"),
269
+ REPLACEMENT_ENTRY("feimage", "feImage"),
270
+ REPLACEMENT_ENTRY("femerge", "feMerge"),
271
+ REPLACEMENT_ENTRY("femergenode", "feMergeNode"),
272
+ REPLACEMENT_ENTRY("femorphology", "feMorphology"),
273
+ REPLACEMENT_ENTRY("feoffset", "feOffset"),
274
+ REPLACEMENT_ENTRY("fepointlight", "fePointLight"),
275
+ REPLACEMENT_ENTRY("fespecularlighting", "feSpecularLighting"),
276
+ REPLACEMENT_ENTRY("fespotlight", "feSpotLight"),
277
+ REPLACEMENT_ENTRY("fetile", "feTile"),
278
+ REPLACEMENT_ENTRY("feturbulence", "feTurbulence"),
279
+ REPLACEMENT_ENTRY("foreignobject", "foreignObject"),
280
+ REPLACEMENT_ENTRY("glyphref", "glyphRef"),
281
+ REPLACEMENT_ENTRY("lineargradient", "linearGradient"),
282
+ REPLACEMENT_ENTRY("radialgradient", "radialGradient"),
283
+ REPLACEMENT_ENTRY("textpath", "textPath"),
287
284
  };
288
285
 
289
286
  typedef struct _NamespacedAttributeReplacement {
@@ -293,18 +290,18 @@ typedef struct _NamespacedAttributeReplacement {
293
290
  } NamespacedAttributeReplacement;
294
291
 
295
292
  static const NamespacedAttributeReplacement kForeignAttributeReplacements[] = {
296
- { "xlink:actuate", "actuate", GUMBO_ATTR_NAMESPACE_XLINK },
297
- { "xlink:actuate", "actuate", GUMBO_ATTR_NAMESPACE_XLINK },
298
- { "xlink:href", "href", GUMBO_ATTR_NAMESPACE_XLINK },
299
- { "xlink:role", "role", GUMBO_ATTR_NAMESPACE_XLINK },
300
- { "xlink:show", "show", GUMBO_ATTR_NAMESPACE_XLINK },
301
- { "xlink:title", "title", GUMBO_ATTR_NAMESPACE_XLINK },
302
- { "xlink:type", "type", GUMBO_ATTR_NAMESPACE_XLINK },
303
- { "xml:base", "base", GUMBO_ATTR_NAMESPACE_XML },
304
- { "xml:lang", "lang", GUMBO_ATTR_NAMESPACE_XML },
305
- { "xml:space", "space", GUMBO_ATTR_NAMESPACE_XML },
306
- { "xmlns", "xmlns", GUMBO_ATTR_NAMESPACE_XMLNS },
307
- { "xmlns:xlink", "xlink", GUMBO_ATTR_NAMESPACE_XMLNS },
293
+ {"xlink:actuate", "actuate", GUMBO_ATTR_NAMESPACE_XLINK},
294
+ {"xlink:actuate", "actuate", GUMBO_ATTR_NAMESPACE_XLINK},
295
+ {"xlink:href", "href", GUMBO_ATTR_NAMESPACE_XLINK},
296
+ {"xlink:role", "role", GUMBO_ATTR_NAMESPACE_XLINK},
297
+ {"xlink:show", "show", GUMBO_ATTR_NAMESPACE_XLINK},
298
+ {"xlink:title", "title", GUMBO_ATTR_NAMESPACE_XLINK},
299
+ {"xlink:type", "type", GUMBO_ATTR_NAMESPACE_XLINK},
300
+ {"xml:base", "base", GUMBO_ATTR_NAMESPACE_XML},
301
+ {"xml:lang", "lang", GUMBO_ATTR_NAMESPACE_XML},
302
+ {"xml:space", "space", GUMBO_ATTR_NAMESPACE_XML},
303
+ {"xmlns", "xmlns", GUMBO_ATTR_NAMESPACE_XMLNS},
304
+ {"xmlns:xlink", "xlink", GUMBO_ATTR_NAMESPACE_XMLNS},
308
305
  };
309
306
 
310
307
  // The "scope marker" for the list of active formatting elements. We use a
@@ -336,7 +333,7 @@ typedef struct _TextNodeBufferState {
336
333
  // The source position of the start of this text node.
337
334
  GumboSourcePosition _start_position;
338
335
 
339
- // The type of node that will be inserted (TEXT or WHITESPACE).
336
+ // The type of node that will be inserted (TEXT, CDATA, or WHITESPACE).
340
337
  GumboNodeType _type;
341
338
  } TextNodeBufferState;
342
339
 
@@ -362,6 +359,9 @@ typedef struct GumboInternalParserState {
362
359
  GumboNode* _head_element;
363
360
  GumboNode* _form_element;
364
361
 
362
+ // The element used as fragment context when parsing in fragment mode
363
+ GumboNode* _fragment_ctx;
364
+
365
365
  // The flag for when the spec says "Reprocess the current token in..."
366
366
  bool _reprocess_current_token;
367
367
 
@@ -418,14 +418,14 @@ static bool attribute_matches(
418
418
  static bool attribute_matches_case_sensitive(
419
419
  const GumboVector* attributes, const char* name, const char* value) {
420
420
  const GumboAttribute* attr = gumbo_get_attribute(attributes, name);
421
- return attr ? strcmp(value, attr->value) == 0 : false;
421
+ return attr ? strcmp(value, attr->value) == 0 : false;
422
422
  }
423
423
 
424
424
  // Checks if the specified attribute vectors are identical.
425
425
  static bool all_attributes_match(
426
426
  const GumboVector* attr1, const GumboVector* attr2) {
427
- int num_unmatched_attr2_elements = attr2->length;
428
- for (int i = 0; i < attr1->length; ++i) {
427
+ unsigned int num_unmatched_attr2_elements = attr2->length;
428
+ for (unsigned int i = 0; i < attr1->length; ++i) {
429
429
  const GumboAttribute* attr = attr1->data[i];
430
430
  if (attribute_matches_case_sensitive(attr2, attr->name, attr->value)) {
431
431
  --num_unmatched_attr2_elements;
@@ -453,8 +453,7 @@ static GumboNode* create_node(GumboParser* parser, GumboNodeType type) {
453
453
  static GumboNode* new_document_node(GumboParser* parser) {
454
454
  GumboNode* document_node = create_node(parser, GUMBO_NODE_DOCUMENT);
455
455
  document_node->parse_flags = GUMBO_INSERTION_BY_PARSER;
456
- gumbo_vector_init(
457
- parser, 1, &document_node->v.document.children);
456
+ gumbo_vector_init(parser, 1, &document_node->v.document.children);
458
457
 
459
458
  // Must be initialized explicitly, as there's no guarantee that we'll see a
460
459
  // doc type token.
@@ -489,6 +488,7 @@ static void parser_state_init(GumboParser* parser) {
489
488
  gumbo_vector_init(parser, 5, &parser_state->_template_insertion_modes);
490
489
  parser_state->_head_element = NULL;
491
490
  parser_state->_form_element = NULL;
491
+ parser_state->_fragment_ctx = NULL;
492
492
  parser_state->_current_token = NULL;
493
493
  parser_state->_closed_body_tag = false;
494
494
  parser_state->_closed_html_tag = false;
@@ -497,6 +497,9 @@ static void parser_state_init(GumboParser* parser) {
497
497
 
498
498
  static void parser_state_destroy(GumboParser* parser) {
499
499
  GumboParserState* state = parser->_parser_state;
500
+ if (state->_fragment_ctx) {
501
+ destroy_node(parser, state->_fragment_ctx);
502
+ }
500
503
  gumbo_vector_destroy(parser, &state->_active_formatting_elements);
501
504
  gumbo_vector_destroy(parser, &state->_open_elements);
502
505
  gumbo_vector_destroy(parser, &state->_template_insertion_modes);
@@ -508,6 +511,10 @@ static GumboNode* get_document_node(GumboParser* parser) {
508
511
  return parser->_output->document;
509
512
  }
510
513
 
514
+ static bool is_fragment_parser(const GumboParser* parser) {
515
+ return !!parser->_parser_state->_fragment_ctx;
516
+ }
517
+
511
518
  // Returns the node at the bottom of the stack of open elements, or NULL if no
512
519
  // elements have been added yet.
513
520
  static GumboNode* get_current_node(GumboParser* parser) {
@@ -521,6 +528,14 @@ static GumboNode* get_current_node(GumboParser* parser) {
521
528
  return open_elements->data[open_elements->length - 1];
522
529
  }
523
530
 
531
+ static GumboNode* get_adjusted_current_node(GumboParser* parser) {
532
+ GumboParserState* state = parser->_parser_state;
533
+ if (state->_open_elements.length == 1 && state->_fragment_ctx) {
534
+ return state->_fragment_ctx;
535
+ }
536
+ return get_current_node(parser);
537
+ }
538
+
524
539
  // Returns true if the given needle is in the given array of literal
525
540
  // GumboStringPieces. If exact_match is true, this requires that they match
526
541
  // exactly; otherwise, this performs a prefix match to check if any of the
@@ -528,7 +543,7 @@ static GumboNode* get_current_node(GumboParser* parser) {
528
543
  // case-insensitive match.
529
544
  static bool is_in_static_list(
530
545
  const char* needle, const GumboStringPiece* haystack, bool exact_match) {
531
- for (int i = 0; haystack[i].length > 0; ++i) {
546
+ for (unsigned int i = 0; haystack[i].length > 0; ++i) {
532
547
  if ((exact_match && !strcmp(needle, haystack[i].data)) ||
533
548
  (!exact_match && !strcasecmp(needle, haystack[i].data))) {
534
549
  return true;
@@ -547,15 +562,36 @@ static void set_insertion_mode(GumboParser* parser, GumboInsertionMode mode) {
547
562
  // indicate that there is no appropriate insertion mode, and the loop should
548
563
  // continue.
549
564
  static GumboInsertionMode get_appropriate_insertion_mode(
550
- const GumboNode* node, bool is_last) {
551
- assert(node->type == GUMBO_NODE_ELEMENT);
565
+ const GumboParser* parser, int index) {
566
+ const GumboVector* open_elements = &parser->_parser_state->_open_elements;
567
+ const GumboNode* node = open_elements->data[index];
568
+ const bool is_last = index == 0;
569
+
570
+ if (is_last && is_fragment_parser(parser)) {
571
+ node = parser->_parser_state->_fragment_ctx;
572
+ }
573
+
574
+ assert(node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE);
552
575
  switch (node->v.element.tag) {
553
- case GUMBO_TAG_SELECT:
576
+ case GUMBO_TAG_SELECT: {
577
+ if (is_last) {
578
+ return GUMBO_INSERTION_MODE_IN_SELECT;
579
+ }
580
+ for (int i = index; i > 0; --i) {
581
+ const GumboNode* ancestor = open_elements->data[i];
582
+ if (node_html_tag_is(ancestor, GUMBO_TAG_TEMPLATE)) {
583
+ return GUMBO_INSERTION_MODE_IN_SELECT;
584
+ }
585
+ if (node_html_tag_is(ancestor, GUMBO_TAG_TABLE)) {
586
+ return GUMBO_INSERTION_MODE_IN_SELECT_IN_TABLE;
587
+ }
588
+ }
554
589
  return GUMBO_INSERTION_MODE_IN_SELECT;
590
+ }
555
591
  case GUMBO_TAG_TD:
556
592
  case GUMBO_TAG_TH:
557
- return is_last ?
558
- GUMBO_INSERTION_MODE_IN_BODY : GUMBO_INSERTION_MODE_IN_CELL;
593
+ if (!is_last) return GUMBO_INSERTION_MODE_IN_CELL;
594
+ break;
559
595
  case GUMBO_TAG_TR:
560
596
  return GUMBO_INSERTION_MODE_IN_ROW;
561
597
  case GUMBO_TAG_TBODY:
@@ -568,25 +604,30 @@ static GumboInsertionMode get_appropriate_insertion_mode(
568
604
  return GUMBO_INSERTION_MODE_IN_COLUMN_GROUP;
569
605
  case GUMBO_TAG_TABLE:
570
606
  return GUMBO_INSERTION_MODE_IN_TABLE;
607
+ case GUMBO_TAG_TEMPLATE:
608
+ return get_current_template_insertion_mode(parser);
571
609
  case GUMBO_TAG_HEAD:
610
+ if (!is_last) return GUMBO_INSERTION_MODE_IN_HEAD;
611
+ break;
572
612
  case GUMBO_TAG_BODY:
573
613
  return GUMBO_INSERTION_MODE_IN_BODY;
574
614
  case GUMBO_TAG_FRAMESET:
575
615
  return GUMBO_INSERTION_MODE_IN_FRAMESET;
576
616
  case GUMBO_TAG_HTML:
577
- return GUMBO_INSERTION_MODE_BEFORE_HEAD;
617
+ return parser->_parser_state->_head_element
618
+ ? GUMBO_INSERTION_MODE_AFTER_HEAD
619
+ : GUMBO_INSERTION_MODE_BEFORE_HEAD;
578
620
  default:
579
- return is_last ?
580
- GUMBO_INSERTION_MODE_IN_BODY : GUMBO_INSERTION_MODE_INITIAL;
621
+ break;
581
622
  }
623
+ return is_last ? GUMBO_INSERTION_MODE_IN_BODY : GUMBO_INSERTION_MODE_INITIAL;
582
624
  }
583
625
 
584
626
  // This performs the actual "reset the insertion mode" loop.
585
627
  static void reset_insertion_mode_appropriately(GumboParser* parser) {
586
628
  const GumboVector* open_elements = &parser->_parser_state->_open_elements;
587
- for (int i = open_elements->length; --i >= 0; ) {
588
- GumboInsertionMode mode =
589
- get_appropriate_insertion_mode(open_elements->data[i], i == 0);
629
+ for (int i = open_elements->length; --i >= 0;) {
630
+ GumboInsertionMode mode = get_appropriate_insertion_mode(parser, i);
590
631
  if (mode != GUMBO_INSERTION_MODE_INITIAL) {
591
632
  set_insertion_mode(parser, mode);
592
633
  return;
@@ -597,7 +638,8 @@ static void reset_insertion_mode_appropriately(GumboParser* parser) {
597
638
  assert(0);
598
639
  }
599
640
 
600
- static GumboError* parser_add_parse_error(GumboParser* parser, const GumboToken* token) {
641
+ static GumboError* parser_add_parse_error(
642
+ GumboParser* parser, const GumboToken* token) {
601
643
  gumbo_debug("Adding parse error.\n");
602
644
  GumboError* error = gumbo_add_error(parser);
603
645
  if (!error) {
@@ -616,13 +658,14 @@ static GumboError* parser_add_parse_error(GumboParser* parser, const GumboToken*
616
658
  }
617
659
  GumboParserState* state = parser->_parser_state;
618
660
  extra_data->parser_state = state->_insertion_mode;
619
- gumbo_vector_init(parser, state->_open_elements.length,
620
- &extra_data->tag_stack);
621
- for (int i = 0; i < state->_open_elements.length; ++i) {
661
+ gumbo_vector_init(
662
+ parser, state->_open_elements.length, &extra_data->tag_stack);
663
+ for (unsigned int i = 0; i < state->_open_elements.length; ++i) {
622
664
  const GumboNode* node = state->_open_elements.data[i];
623
- assert(node->type == GUMBO_NODE_ELEMENT);
624
- gumbo_vector_add(parser, (void*) node->v.element.tag,
625
- &extra_data->tag_stack);
665
+ assert(
666
+ node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE);
667
+ gumbo_vector_add(
668
+ parser, (void*) node->v.element.tag, &extra_data->tag_stack);
626
669
  }
627
670
  return error;
628
671
  }
@@ -631,13 +674,8 @@ static GumboError* parser_add_parse_error(GumboParser* parser, const GumboToken*
631
674
  // by is_start) with one of the tag types in the varargs list. Terminate the
632
675
  // list with GUMBO_TAG_LAST; this functions as a sentinel since no portion of
633
676
  // the spec references tags that are not in the spec.
634
- // TODO(jdtang): A lot of the tag lists for this function are repeated in many
635
- // places in the code. This is how it's written in the spec (and it's done this
636
- // way so it's easy to verify the code against the spec), but it may be worth
637
- // coming up with a notion of a "tag set" that includes a list of tags, and
638
- // using that in many places. It'd probably also help performance, but I want
639
- // to profile before optimizing.
640
- static bool tag_in(const GumboToken* token, bool is_start, ...) {
677
+ static bool tag_in(
678
+ const GumboToken* token, bool is_start, const gumbo_tagset tags) {
641
679
  GumboTag token_tag;
642
680
  if (is_start && token->type == GUMBO_TOKEN_START_TAG) {
643
681
  token_tag = token->v.start_tag.tag;
@@ -646,19 +684,7 @@ static bool tag_in(const GumboToken* token, bool is_start, ...) {
646
684
  } else {
647
685
  return false;
648
686
  }
649
-
650
- va_list tags;
651
- va_start(tags, is_start);
652
- bool result = false;
653
- for (GumboTag tag = va_arg(tags, GumboTag); tag != GUMBO_TAG_LAST;
654
- tag = va_arg(tags, GumboTag)) {
655
- if (tag == token_tag) {
656
- result = true;
657
- break;
658
- }
659
- }
660
- va_end(tags);
661
- return result;
687
+ return (token_tag < GUMBO_TAG_LAST && tags[(int) token_tag] != 0);
662
688
  }
663
689
 
664
690
  // Like tag_in, but for the single-tag case.
@@ -673,50 +699,125 @@ static bool tag_is(const GumboToken* token, bool is_start, GumboTag tag) {
673
699
  }
674
700
 
675
701
  // Like tag_in, but checks for the tag of a node, rather than a token.
676
- static bool node_tag_in(const GumboNode* node, ...) {
702
+ static bool node_tag_in_set(const GumboNode* node, const gumbo_tagset tags) {
677
703
  assert(node != NULL);
678
- if (node->type != GUMBO_NODE_ELEMENT) {
704
+ if (node->type != GUMBO_NODE_ELEMENT && node->type != GUMBO_NODE_TEMPLATE) {
679
705
  return false;
680
706
  }
681
- GumboTag node_tag = node->v.element.tag;
682
-
683
- va_list tags;
684
- va_start(tags, node);
685
- bool result = false;
686
- for (GumboTag tag = va_arg(tags, GumboTag); tag != GUMBO_TAG_LAST;
687
- tag = va_arg(tags, GumboTag)) {
688
- assert(tag <= GUMBO_TAG_LAST);
689
- if (tag == node_tag) {
690
- result = true;
691
- break;
692
- }
693
- }
694
- va_end(tags);
695
- return result;
707
+ return TAGSET_INCLUDES(
708
+ tags, node->v.element.tag_namespace, node->v.element.tag);
696
709
  }
697
710
 
698
711
  // Like node_tag_in, but for the single-tag case.
699
- static bool node_tag_is(const GumboNode* node, GumboTag tag) {
700
- return node->type == GUMBO_NODE_ELEMENT && node->v.element.tag == tag;
712
+ static bool node_qualified_tag_is(
713
+ const GumboNode* node, GumboNamespaceEnum ns, GumboTag tag) {
714
+ assert(node);
715
+ return (node->type == GUMBO_NODE_ELEMENT ||
716
+ node->type == GUMBO_NODE_TEMPLATE) &&
717
+ node->v.element.tag == tag && node->v.element.tag_namespace == ns;
718
+ }
719
+
720
+ // Like node_tag_in, but for the single-tag case in the HTML namespace
721
+ static bool node_html_tag_is(const GumboNode* node, GumboTag tag) {
722
+ return node_qualified_tag_is(node, GUMBO_NAMESPACE_HTML, tag);
723
+ }
724
+
725
+ static void push_template_insertion_mode(
726
+ GumboParser* parser, GumboInsertionMode mode) {
727
+ gumbo_vector_add(
728
+ parser, (void*) mode, &parser->_parser_state->_template_insertion_modes);
729
+ }
730
+
731
+ static void pop_template_insertion_mode(GumboParser* parser) {
732
+ gumbo_vector_pop(parser, &parser->_parser_state->_template_insertion_modes);
733
+ }
734
+
735
+ // Returns the current template insertion mode. If the stack of template
736
+ // insertion modes is empty, this returns GUMBO_INSERTION_MODE_INITIAL.
737
+ static GumboInsertionMode get_current_template_insertion_mode(
738
+ const GumboParser* parser) {
739
+ GumboVector* template_insertion_modes =
740
+ &parser->_parser_state->_template_insertion_modes;
741
+ if (template_insertion_modes->length == 0) {
742
+ return GUMBO_INSERTION_MODE_INITIAL;
743
+ }
744
+ return (GumboInsertionMode)
745
+ template_insertion_modes->data[(template_insertion_modes->length - 1)];
701
746
  }
702
747
 
703
748
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#mathml-text-integration-point
704
749
  static bool is_mathml_integration_point(const GumboNode* node) {
705
- return node_tag_in(node, GUMBO_TAG_MI, GUMBO_TAG_MO, GUMBO_TAG_MN,
706
- GUMBO_TAG_MS, GUMBO_TAG_MTEXT, GUMBO_TAG_LAST) &&
707
- node->v.element.tag_namespace == GUMBO_NAMESPACE_MATHML;
750
+ return node_tag_in_set(
751
+ node, (gumbo_tagset){TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
752
+ TAG_MATHML(MS), TAG_MATHML(MTEXT)});
708
753
  }
709
754
 
710
755
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#html-integration-point
711
756
  static bool is_html_integration_point(const GumboNode* node) {
712
- return (node_tag_in(node, GUMBO_TAG_FOREIGNOBJECT, GUMBO_TAG_DESC,
713
- GUMBO_TAG_TITLE, GUMBO_TAG_LAST) &&
714
- node->v.element.tag_namespace == GUMBO_NAMESPACE_SVG) ||
715
- (node_tag_is(node, GUMBO_TAG_ANNOTATION_XML) && (
716
- attribute_matches(&node->v.element.attributes,
717
- "encoding", "text/html") ||
718
- attribute_matches(&node->v.element.attributes,
719
- "encoding", "application/xhtml+xml")));
757
+ return node_tag_in_set(node, (gumbo_tagset){TAG_SVG(FOREIGNOBJECT),
758
+ TAG_SVG(DESC), TAG_SVG(TITLE)}) ||
759
+ (node_qualified_tag_is(
760
+ node, GUMBO_NAMESPACE_MATHML, GUMBO_TAG_ANNOTATION_XML) &&
761
+ (attribute_matches(
762
+ &node->v.element.attributes, "encoding", "text/html") ||
763
+ attribute_matches(&node->v.element.attributes, "encoding",
764
+ "application/xhtml+xml")));
765
+ }
766
+
767
+ // This represents a place to insert a node, consisting of a target parent and a
768
+ // child index within that parent. If the node should be inserted at the end of
769
+ // the parent's child, index will be -1.
770
+ typedef struct {
771
+ GumboNode* target;
772
+ int index;
773
+ } InsertionLocation;
774
+
775
+ InsertionLocation get_appropriate_insertion_location(
776
+ GumboParser* parser, GumboNode* override_target) {
777
+ InsertionLocation retval = {override_target, -1};
778
+ if (retval.target == NULL) {
779
+ // No override target; default to the current node, but special-case the
780
+ // root node since get_current_node() assumes the stack of open elements is
781
+ // non-empty.
782
+ retval.target = parser->_output->root != NULL ? get_current_node(parser)
783
+ : get_document_node(parser);
784
+ }
785
+ if (!parser->_parser_state->_foster_parent_insertions ||
786
+ !node_tag_in_set(retval.target, (gumbo_tagset){TAG(TABLE), TAG(TBODY),
787
+ TAG(TFOOT), TAG(THEAD), TAG(TR)})) {
788
+ return retval;
789
+ }
790
+
791
+ // Foster-parenting case.
792
+ int last_template_index = -1;
793
+ int last_table_index = -1;
794
+ GumboVector* open_elements = &parser->_parser_state->_open_elements;
795
+ for (unsigned int i = 0; i < open_elements->length; ++i) {
796
+ if (node_html_tag_is(open_elements->data[i], GUMBO_TAG_TEMPLATE)) {
797
+ last_template_index = i;
798
+ }
799
+ if (node_html_tag_is(open_elements->data[i], GUMBO_TAG_TABLE)) {
800
+ last_table_index = i;
801
+ }
802
+ }
803
+ if (last_template_index != -1 &&
804
+ (last_table_index == -1 || last_template_index > last_table_index)) {
805
+ retval.target = open_elements->data[last_template_index];
806
+ return retval;
807
+ }
808
+ if (last_table_index == -1) {
809
+ retval.target = open_elements->data[0];
810
+ return retval;
811
+ }
812
+ GumboNode* last_table = open_elements->data[last_table_index];
813
+ if (last_table->parent != NULL) {
814
+ retval.target = last_table->parent;
815
+ retval.index = last_table->index_within_parent;
816
+ return retval;
817
+ }
818
+
819
+ retval.target = open_elements->data[last_table_index - 1];
820
+ return retval;
720
821
  }
721
822
 
722
823
  // Appends a node to the end of its parent, setting the "parent" and
@@ -726,7 +827,8 @@ static void append_node(
726
827
  assert(node->parent == NULL);
727
828
  assert(node->index_within_parent == -1);
728
829
  GumboVector* children;
729
- if (parent->type == GUMBO_NODE_ELEMENT) {
830
+ if (parent->type == GUMBO_NODE_ELEMENT ||
831
+ parent->type == GUMBO_NODE_TEMPLATE) {
730
832
  children = &parent->v.element.children;
731
833
  } else {
732
834
  assert(parent->type == GUMBO_NODE_DOCUMENT);
@@ -738,64 +840,41 @@ static void append_node(
738
840
  assert(node->index_within_parent < children->length);
739
841
  }
740
842
 
741
- // Inserts a node at the specified index within its parent, updating the
843
+ // Inserts a node at the specified InsertionLocation, updating the
742
844
  // "parent" and "index_within_parent" fields of it and all its siblings.
845
+ // If the index of the location is -1, this calls append_node.
743
846
  static void insert_node(
744
- GumboParser* parser, GumboNode* parent, int index, GumboNode* node) {
847
+ GumboParser* parser, GumboNode* node, InsertionLocation location) {
745
848
  assert(node->parent == NULL);
746
849
  assert(node->index_within_parent == -1);
747
- assert(parent->type == GUMBO_NODE_ELEMENT);
748
- GumboVector* children = &parent->v.element.children;
749
- assert(index >= 0);
750
- assert(index < children->length);
751
- node->parent = parent;
752
- node->index_within_parent = index;
753
- gumbo_vector_insert_at(parser, (void*) node, index, children);
754
- assert(node->index_within_parent < children->length);
755
- for (int i = index + 1; i < children->length; ++i) {
756
- GumboNode* sibling = children->data[i];
757
- sibling->index_within_parent = i;
758
- assert(sibling->index_within_parent < children->length);
759
- }
760
- }
850
+ GumboNode* parent = location.target;
851
+ int index = location.index;
852
+ if (index != -1) {
853
+ GumboVector* children = NULL;
854
+ if (parent->type == GUMBO_NODE_ELEMENT ||
855
+ parent->type == GUMBO_NODE_TEMPLATE) {
856
+ children = &parent->v.element.children;
857
+ } else if (parent->type == GUMBO_NODE_DOCUMENT) {
858
+ children = &parent->v.document.children;
859
+ assert(children->length == 0);
860
+ } else {
861
+ assert(0);
862
+ }
761
863
 
762
- // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#foster-parenting
763
- static void foster_parent_element(GumboParser* parser, GumboNode* node) {
764
- GumboVector* open_elements = &parser->_parser_state->_open_elements;
765
- assert(open_elements->length > 2);
766
-
767
- node->parse_flags |= GUMBO_INSERTION_FOSTER_PARENTED;
768
- GumboNode* foster_parent_element = open_elements->data[0];
769
- assert(foster_parent_element->type == GUMBO_NODE_ELEMENT);
770
- assert(node_tag_is(foster_parent_element, GUMBO_TAG_HTML));
771
- for (int i = open_elements->length; --i > 1; ) {
772
- GumboNode* table_element = open_elements->data[i];
773
- if (node_tag_is(table_element, GUMBO_TAG_TABLE)) {
774
- foster_parent_element = table_element->parent;
775
- if (!foster_parent_element ||
776
- foster_parent_element->type != GUMBO_NODE_ELEMENT) {
777
- // Table has no parent; spec says it's possible if a script manipulated
778
- // the DOM, although I don't think we have to worry about this case.
779
- gumbo_debug("Table has no parent.\n");
780
- foster_parent_element = open_elements->data[i - 1];
781
- break;
782
- }
783
- assert(foster_parent_element->type == GUMBO_NODE_ELEMENT);
784
- gumbo_debug("Found enclosing table (%x) at %d; parent=%s, index=%d.\n",
785
- table_element, i, gumbo_normalized_tagname(
786
- foster_parent_element->v.element.tag),
787
- table_element->index_within_parent);
788
- assert(foster_parent_element->v.element.children.data[
789
- table_element->index_within_parent] == table_element);
790
- insert_node(parser, foster_parent_element,
791
- table_element->index_within_parent, node);
792
- return;
864
+ assert(index >= 0);
865
+ assert((unsigned int) index < children->length);
866
+ node->parent = parent;
867
+ node->index_within_parent = index;
868
+ gumbo_vector_insert_at(parser, (void*) node, index, children);
869
+ assert(node->index_within_parent < children->length);
870
+ for (unsigned int i = index + 1; i < children->length; ++i) {
871
+ GumboNode* sibling = children->data[i];
872
+ sibling->index_within_parent = i;
873
+ assert(sibling->index_within_parent < children->length);
793
874
  }
875
+ } else {
876
+ append_node(parser, parent, node);
794
877
  }
795
- if (node->type == GUMBO_NODE_ELEMENT) {
796
- gumbo_vector_add(parser, (void*) node, open_elements);
797
- }
798
- append_node(parser, foster_parent_element, node);
799
878
  }
800
879
 
801
880
  static void maybe_flush_text_node_buffer(GumboParser* parser) {
@@ -806,30 +885,31 @@ static void maybe_flush_text_node_buffer(GumboParser* parser) {
806
885
  }
807
886
 
808
887
  assert(buffer_state->_type == GUMBO_NODE_WHITESPACE ||
809
- buffer_state->_type == GUMBO_NODE_TEXT);
888
+ buffer_state->_type == GUMBO_NODE_TEXT ||
889
+ buffer_state->_type == GUMBO_NODE_CDATA);
810
890
  GumboNode* text_node = create_node(parser, buffer_state->_type);
811
891
  GumboText* text_node_data = &text_node->v.text;
812
- text_node_data->text = gumbo_string_buffer_to_string(
813
- parser, &buffer_state->_buffer);
892
+ text_node_data->text =
893
+ gumbo_string_buffer_to_string(parser, &buffer_state->_buffer);
814
894
  text_node_data->original_text.data = buffer_state->_start_original_text;
815
895
  text_node_data->original_text.length =
816
896
  state->_current_token->original_text.data -
817
897
  buffer_state->_start_original_text;
818
898
  text_node_data->start_pos = buffer_state->_start_position;
819
- if (state->_foster_parent_insertions && node_tag_in(
820
- get_current_node(parser), GUMBO_TAG_TABLE, GUMBO_TAG_TBODY,
821
- GUMBO_TAG_TFOOT, GUMBO_TAG_THEAD, GUMBO_TAG_TR, GUMBO_TAG_LAST)) {
822
- foster_parent_element(parser, text_node);
899
+
900
+ gumbo_debug("Flushing text node buffer of %.*s.\n",
901
+ (int) buffer_state->_buffer.length, buffer_state->_buffer.data);
902
+
903
+ InsertionLocation location = get_appropriate_insertion_location(parser, NULL);
904
+ if (location.target->type == GUMBO_NODE_DOCUMENT) {
905
+ // The DOM does not allow Document nodes to have Text children, so per the
906
+ // spec, they are dropped on the floor.
907
+ destroy_node(parser, text_node);
823
908
  } else {
824
- append_node(
825
- parser, parser->_output->root ?
826
- get_current_node(parser) : parser->_output->document, text_node);
909
+ insert_node(parser, text_node, location);
827
910
  }
828
- gumbo_debug("Flushing text node buffer of %.*s.\n",
829
- (int) buffer_state->_buffer.length, buffer_state->_buffer.data);
830
911
 
831
- gumbo_string_buffer_destroy(parser, &buffer_state->_buffer);
832
- gumbo_string_buffer_init(parser, &buffer_state->_buffer);
912
+ gumbo_string_buffer_clear(parser, &buffer_state->_buffer);
833
913
  buffer_state->_type = GUMBO_NODE_WHITESPACE;
834
914
  assert(buffer_state->_buffer.length == 0);
835
915
  }
@@ -837,18 +917,17 @@ static void maybe_flush_text_node_buffer(GumboParser* parser) {
837
917
  static void record_end_of_element(
838
918
  GumboToken* current_token, GumboElement* element) {
839
919
  element->end_pos = current_token->position;
840
- element->original_end_tag =
841
- current_token->type == GUMBO_TOKEN_END_TAG ?
842
- current_token->original_text : kGumboEmptyString;
920
+ element->original_end_tag = current_token->type == GUMBO_TOKEN_END_TAG
921
+ ? current_token->original_text
922
+ : kGumboEmptyString;
843
923
  }
844
924
 
845
925
  static GumboNode* pop_current_node(GumboParser* parser) {
846
926
  GumboParserState* state = parser->_parser_state;
847
927
  maybe_flush_text_node_buffer(parser);
848
928
  if (state->_open_elements.length > 0) {
849
- assert(node_tag_is(state->_open_elements.data[0], GUMBO_TAG_HTML));
850
- gumbo_debug(
851
- "Popping %s node.\n",
929
+ assert(node_html_tag_is(state->_open_elements.data[0], GUMBO_TAG_HTML));
930
+ gumbo_debug("Popping %s node.\n",
852
931
  gumbo_normalized_tagname(get_current_node(parser)->v.element.tag));
853
932
  }
854
933
  GumboNode* current_node = gumbo_vector_pop(parser, &state->_open_elements);
@@ -856,13 +935,16 @@ static GumboNode* pop_current_node(GumboParser* parser) {
856
935
  assert(state->_open_elements.length == 0);
857
936
  return NULL;
858
937
  }
859
- assert(current_node->type == GUMBO_NODE_ELEMENT);
938
+ assert(current_node->type == GUMBO_NODE_ELEMENT ||
939
+ current_node->type == GUMBO_NODE_TEMPLATE);
860
940
  bool is_closed_body_or_html_tag =
861
- (node_tag_is(current_node, GUMBO_TAG_BODY) && state->_closed_body_tag) ||
862
- (node_tag_is(current_node, GUMBO_TAG_HTML) && state->_closed_html_tag);
941
+ (node_html_tag_is(current_node, GUMBO_TAG_BODY) &&
942
+ state->_closed_body_tag) ||
943
+ (node_html_tag_is(current_node, GUMBO_TAG_HTML) &&
944
+ state->_closed_html_tag);
863
945
  if ((state->_current_token->type != GUMBO_TOKEN_END_TAG ||
864
- !node_tag_is(current_node, state->_current_token->v.end_tag)) &&
865
- !is_closed_body_or_html_tag) {
946
+ !node_html_tag_is(current_node, state->_current_token->v.end_tag)) &&
947
+ !is_closed_body_or_html_tag) {
866
948
  current_node->parse_flags |= GUMBO_INSERTION_IMPLICIT_END_TAG;
867
949
  }
868
950
  if (!is_closed_body_or_html_tag) {
@@ -885,25 +967,25 @@ static void append_comment_node(
885
967
 
886
968
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#clear-the-stack-back-to-a-table-row-context
887
969
  static void clear_stack_to_table_row_context(GumboParser* parser) {
888
- while (!node_tag_in(get_current_node(parser),
889
- GUMBO_TAG_HTML, GUMBO_TAG_TR, GUMBO_TAG_LAST)) {
970
+ while (!node_tag_in_set(get_current_node(parser),
971
+ (gumbo_tagset){TAG(HTML), TAG(TR), TAG(TEMPLATE)})) {
890
972
  pop_current_node(parser);
891
973
  }
892
974
  }
893
975
 
894
976
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#clear-the-stack-back-to-a-table-context
895
977
  static void clear_stack_to_table_context(GumboParser* parser) {
896
- while (!node_tag_in(get_current_node(parser),
897
- GUMBO_TAG_HTML, GUMBO_TAG_TABLE, GUMBO_TAG_LAST)) {
978
+ while (!node_tag_in_set(get_current_node(parser),
979
+ (gumbo_tagset){TAG(HTML), TAG(TABLE), TAG(TEMPLATE)})) {
898
980
  pop_current_node(parser);
899
981
  }
900
982
  }
901
983
 
902
984
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#clear-the-stack-back-to-a-table-body-context
903
985
  void clear_stack_to_table_body_context(GumboParser* parser) {
904
- while (!node_tag_in(get_current_node(parser), GUMBO_TAG_HTML,
905
- GUMBO_TAG_TBODY, GUMBO_TAG_TFOOT, GUMBO_TAG_THEAD,
906
- GUMBO_TAG_LAST)) {
986
+ while (!node_tag_in_set(get_current_node(parser),
987
+ (gumbo_tagset){TAG(HTML), TAG(TBODY), TAG(TFOOT), TAG(THEAD),
988
+ TAG(TEMPLATE)})) {
907
989
  pop_current_node(parser);
908
990
  }
909
991
  }
@@ -918,7 +1000,9 @@ static GumboNode* create_element(GumboParser* parser, GumboTag tag) {
918
1000
  element->tag_namespace = GUMBO_NAMESPACE_HTML;
919
1001
  element->original_tag = kGumboEmptyString;
920
1002
  element->original_end_tag = kGumboEmptyString;
921
- element->start_pos = parser->_parser_state->_current_token->position;
1003
+ element->start_pos = (parser->_parser_state->_current_token)
1004
+ ? parser->_parser_state->_current_token->position
1005
+ : kGumboEmptySourcePosition;
922
1006
  element->end_pos = kGumboEmptySourcePosition;
923
1007
  return node;
924
1008
  }
@@ -929,7 +1013,12 @@ static GumboNode* create_element_from_token(
929
1013
  assert(token->type == GUMBO_TOKEN_START_TAG);
930
1014
  GumboTokenStartTag* start_tag = &token->v.start_tag;
931
1015
 
932
- GumboNode* node = create_node(parser, GUMBO_NODE_ELEMENT);
1016
+ GumboNodeType type = (tag_namespace == GUMBO_NAMESPACE_HTML &&
1017
+ start_tag->tag == GUMBO_TAG_TEMPLATE)
1018
+ ? GUMBO_NODE_TEMPLATE
1019
+ : GUMBO_NODE_ELEMENT;
1020
+
1021
+ GumboNode* node = create_node(parser, type);
933
1022
  GumboElement* element = &node->v.element;
934
1023
  gumbo_vector_init(parser, 1, &element->children);
935
1024
  element->attributes = start_tag->attributes;
@@ -952,7 +1041,7 @@ static GumboNode* create_element_from_token(
952
1041
 
953
1042
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#insert-an-html-element
954
1043
  static void insert_element(GumboParser* parser, GumboNode* node,
955
- bool is_reconstructing_formatting_elements) {
1044
+ bool is_reconstructing_formatting_elements) {
956
1045
  GumboParserState* state = parser->_parser_state;
957
1046
  // NOTE(jdtang): The text node buffer must always be flushed before inserting
958
1047
  // a node, otherwise we're handling nodes in a different order than the spec
@@ -966,20 +1055,8 @@ static void insert_element(GumboParser* parser, GumboNode* node,
966
1055
  if (!is_reconstructing_formatting_elements) {
967
1056
  maybe_flush_text_node_buffer(parser);
968
1057
  }
969
- if (state->_foster_parent_insertions && node_tag_in(
970
- get_current_node(parser), GUMBO_TAG_TABLE, GUMBO_TAG_TBODY,
971
- GUMBO_TAG_TFOOT, GUMBO_TAG_THEAD, GUMBO_TAG_TR, GUMBO_TAG_LAST)) {
972
- foster_parent_element(parser, node);
973
- gumbo_vector_add(parser, (void*) node, &state->_open_elements);
974
- return;
975
- }
976
-
977
- // This is called to insert the root HTML element, but get_current_node
978
- // assumes the stack of open elements is non-empty, so we need special
979
- // handling for this case.
980
- append_node(
981
- parser, parser->_output->root ?
982
- get_current_node(parser) : parser->_output->document, node);
1058
+ InsertionLocation location = get_appropriate_insertion_location(parser, NULL);
1059
+ insert_node(parser, node, location);
983
1060
  gumbo_vector_add(parser, (void*) node, &state->_open_elements);
984
1061
  }
985
1062
 
@@ -992,7 +1069,7 @@ static GumboNode* insert_element_from_token(
992
1069
  create_element_from_token(parser, token, GUMBO_NAMESPACE_HTML);
993
1070
  insert_element(parser, element, false);
994
1071
  gumbo_debug("Inserting <%s> element (@%x) from token.\n",
995
- gumbo_normalized_tagname(element->v.element.tag), element);
1072
+ gumbo_normalized_tagname(element->v.element.tag), element);
996
1073
  return element;
997
1074
  }
998
1075
 
@@ -1005,7 +1082,7 @@ static GumboNode* insert_element_of_tag_type(
1005
1082
  element->parse_flags |= GUMBO_INSERTION_BY_PARSER | reason;
1006
1083
  insert_element(parser, element, false);
1007
1084
  gumbo_debug("Inserting %s element (@%x) from tag type.\n",
1008
- gumbo_normalized_tagname(tag), element);
1085
+ gumbo_normalized_tagname(tag), element);
1009
1086
  return element;
1010
1087
  }
1011
1088
 
@@ -1017,16 +1094,14 @@ static GumboNode* insert_foreign_element(
1017
1094
  GumboNode* element = create_element_from_token(parser, token, tag_namespace);
1018
1095
  insert_element(parser, element, false);
1019
1096
  if (token_has_attribute(token, "xmlns") &&
1020
- !attribute_matches_case_sensitive(
1021
- &token->v.start_tag.attributes, "xmlns",
1097
+ !attribute_matches_case_sensitive(&token->v.start_tag.attributes, "xmlns",
1022
1098
  kLegalXmlns[tag_namespace])) {
1023
1099
  // TODO(jdtang): Since there're multiple possible error codes here, we
1024
1100
  // eventually need reason codes to differentiate them.
1025
1101
  parser_add_parse_error(parser, token);
1026
1102
  }
1027
1103
  if (token_has_attribute(token, "xmlns:xlink") &&
1028
- !attribute_matches_case_sensitive(
1029
- &token->v.start_tag.attributes,
1104
+ !attribute_matches_case_sensitive(&token->v.start_tag.attributes,
1030
1105
  "xmlns:xlink", "http://www.w3.org/1999/xlink")) {
1031
1106
  parser_add_parse_error(parser, token);
1032
1107
  }
@@ -1035,7 +1110,8 @@ static GumboNode* insert_foreign_element(
1035
1110
 
1036
1111
  static void insert_text_token(GumboParser* parser, GumboToken* token) {
1037
1112
  assert(token->type == GUMBO_TOKEN_WHITESPACE ||
1038
- token->type == GUMBO_TOKEN_CHARACTER);
1113
+ token->type == GUMBO_TOKEN_CHARACTER ||
1114
+ token->type == GUMBO_TOKEN_NULL || token->type == GUMBO_TOKEN_CDATA);
1039
1115
  TextNodeBufferState* buffer_state = &parser->_parser_state->_text_node;
1040
1116
  if (buffer_state->_buffer.length == 0) {
1041
1117
  // Initialize position fields.
@@ -1046,6 +1122,8 @@ static void insert_text_token(GumboParser* parser, GumboToken* token) {
1046
1122
  parser, token->v.character, &buffer_state->_buffer);
1047
1123
  if (token->type == GUMBO_TOKEN_CHARACTER) {
1048
1124
  buffer_state->_type = GUMBO_NODE_TEXT;
1125
+ } else if (token->type == GUMBO_TOKEN_CDATA) {
1126
+ buffer_state->_type = GUMBO_NODE_CDATA;
1049
1127
  }
1050
1128
  gumbo_debug("Inserting text token '%c'.\n", token->v.character);
1051
1129
  }
@@ -1068,12 +1146,12 @@ static void acknowledge_self_closing_tag(GumboParser* parser) {
1068
1146
  // elements, and fills in its index if so.
1069
1147
  static bool find_last_anchor_index(GumboParser* parser, int* anchor_index) {
1070
1148
  GumboVector* elements = &parser->_parser_state->_active_formatting_elements;
1071
- for (int i = elements->length; --i >= 0; ) {
1149
+ for (int i = elements->length; --i >= 0;) {
1072
1150
  GumboNode* node = elements->data[i];
1073
1151
  if (node == &kActiveFormattingScopeMarker) {
1074
1152
  return false;
1075
1153
  }
1076
- if (node_tag_is(node, GUMBO_TAG_A)) {
1154
+ if (node_html_tag_is(node, GUMBO_TAG_A)) {
1077
1155
  *anchor_index = i;
1078
1156
  return true;
1079
1157
  }
@@ -1085,23 +1163,21 @@ static bool find_last_anchor_index(GumboParser* parser, int* anchor_index) {
1085
1163
  // formatting elements (after the last active scope marker) that have a specific
1086
1164
  // tag. If this is > 0, then earliest_matching_index will be filled in with the
1087
1165
  // index of the first such element.
1088
- static int count_formatting_elements_of_tag(
1089
- GumboParser* parser, const GumboNode* desired_node,
1090
- int* earliest_matching_index) {
1166
+ static int count_formatting_elements_of_tag(GumboParser* parser,
1167
+ const GumboNode* desired_node, int* earliest_matching_index) {
1091
1168
  const GumboElement* desired_element = &desired_node->v.element;
1092
1169
  GumboVector* elements = &parser->_parser_state->_active_formatting_elements;
1093
1170
  int num_identical_elements = 0;
1094
- for (int i = elements->length; --i >= 0; ) {
1171
+ for (int i = elements->length; --i >= 0;) {
1095
1172
  GumboNode* node = elements->data[i];
1096
1173
  if (node == &kActiveFormattingScopeMarker) {
1097
1174
  break;
1098
1175
  }
1099
1176
  assert(node->type == GUMBO_NODE_ELEMENT);
1100
- GumboElement* element = &node->v.element;
1101
- if (node_tag_is(node, desired_element->tag) &&
1102
- element->tag_namespace == desired_element->tag_namespace &&
1103
- all_attributes_match(&element->attributes,
1104
- &desired_element->attributes)) {
1177
+ if (node_qualified_tag_is(
1178
+ node, desired_element->tag_namespace, desired_element->tag) &&
1179
+ all_attributes_match(
1180
+ &node->v.element.attributes, &desired_element->attributes)) {
1105
1181
  num_identical_elements++;
1106
1182
  *earliest_matching_index = i;
1107
1183
  }
@@ -1128,7 +1204,7 @@ static void add_formatting_element(GumboParser* parser, const GumboNode* node) {
1128
1204
  // Noah's Ark clause: if there're at least 3, remove the earliest.
1129
1205
  if (num_identical_elements >= 3) {
1130
1206
  gumbo_debug("Noah's ark clause: removing element at %d.\n",
1131
- earliest_identical_element);
1207
+ earliest_identical_element);
1132
1208
  gumbo_vector_remove_at(parser, earliest_identical_element, elements);
1133
1209
  }
1134
1210
 
@@ -1137,7 +1213,7 @@ static void add_formatting_element(GumboParser* parser, const GumboNode* node) {
1137
1213
 
1138
1214
  static bool is_open_element(GumboParser* parser, const GumboNode* node) {
1139
1215
  GumboVector* open_elements = &parser->_parser_state->_open_elements;
1140
- for (int i = 0; i < open_elements->length; ++i) {
1216
+ for (unsigned int i = 0; i < open_elements->length; ++i) {
1141
1217
  if (open_elements->data[i] == node) {
1142
1218
  return true;
1143
1219
  }
@@ -1149,8 +1225,8 @@ static bool is_open_element(GumboParser* parser, const GumboNode* node) {
1149
1225
  // clone shares no structure with the original node: all owned strings and
1150
1226
  // values are fresh copies.
1151
1227
  GumboNode* clone_node(
1152
- GumboParser* parser, const GumboNode* node, GumboParseFlags reason) {
1153
- assert(node->type == GUMBO_NODE_ELEMENT);
1228
+ GumboParser* parser, GumboNode* node, GumboParseFlags reason) {
1229
+ assert(node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE);
1154
1230
  GumboNode* new_node = gumbo_parser_allocate(parser, sizeof(GumboNode));
1155
1231
  *new_node = *node;
1156
1232
  new_node->parent = NULL;
@@ -1164,7 +1240,7 @@ GumboNode* clone_node(
1164
1240
 
1165
1241
  const GumboVector* old_attributes = &node->v.element.attributes;
1166
1242
  gumbo_vector_init(parser, old_attributes->length, &element->attributes);
1167
- for (int i = 0; i < old_attributes->length; ++i) {
1243
+ for (unsigned int i = 0; i < old_attributes->length; ++i) {
1168
1244
  const GumboAttribute* old_attr = old_attributes->data[i];
1169
1245
  GumboAttribute* attr =
1170
1246
  gumbo_parser_allocate(parser, sizeof(GumboAttribute));
@@ -1188,8 +1264,8 @@ static void reconstruct_active_formatting_elements(GumboParser* parser) {
1188
1264
  }
1189
1265
 
1190
1266
  // Step 2 & 3
1191
- int i = elements->length - 1;
1192
- const GumboNode* element = elements->data[i];
1267
+ unsigned int i = elements->length - 1;
1268
+ GumboNode* element = elements->data[i];
1193
1269
  if (element == &kActiveFormattingScopeMarker ||
1194
1270
  is_open_element(parser, element)) {
1195
1271
  return;
@@ -1199,7 +1275,7 @@ static void reconstruct_active_formatting_elements(GumboParser* parser) {
1199
1275
  do {
1200
1276
  if (i == 0) {
1201
1277
  // Step 4
1202
- i = -1; // Incremented to 0 below.
1278
+ i = -1; // Incremented to 0 below.
1203
1279
  break;
1204
1280
  }
1205
1281
  // Step 5
@@ -1209,9 +1285,8 @@ static void reconstruct_active_formatting_elements(GumboParser* parser) {
1209
1285
 
1210
1286
  ++i;
1211
1287
  gumbo_debug("Reconstructing elements from %d on %s parent.\n", i,
1212
- gumbo_normalized_tagname(
1213
- get_current_node(parser)->v.element.tag));
1214
- for(; i < elements->length; ++i) {
1288
+ gumbo_normalized_tagname(get_current_node(parser)->v.element.tag));
1289
+ for (; i < elements->length; ++i) {
1215
1290
  // Step 7 & 8.
1216
1291
  assert(elements->length > 0);
1217
1292
  assert(i < elements->length);
@@ -1220,11 +1295,16 @@ static void reconstruct_active_formatting_elements(GumboParser* parser) {
1220
1295
  GumboNode* clone = clone_node(
1221
1296
  parser, element, GUMBO_INSERTION_RECONSTRUCTED_FORMATTING_ELEMENT);
1222
1297
  // Step 9.
1223
- insert_element(parser, clone, true);
1298
+ InsertionLocation location =
1299
+ get_appropriate_insertion_location(parser, NULL);
1300
+ insert_node(parser, clone, location);
1301
+ gumbo_vector_add(
1302
+ parser, (void*) clone, &parser->_parser_state->_open_elements);
1303
+
1224
1304
  // Step 10.
1225
1305
  elements->data[i] = clone;
1226
1306
  gumbo_debug("Reconstructed %s element at %d.\n",
1227
- gumbo_normalized_tagname(clone->v.element.tag), i);
1307
+ gumbo_normalized_tagname(clone->v.element.tag), i);
1228
1308
  }
1229
1309
  }
1230
1310
 
@@ -1235,32 +1315,30 @@ static void clear_active_formatting_elements(GumboParser* parser) {
1235
1315
  do {
1236
1316
  node = gumbo_vector_pop(parser, elements);
1237
1317
  ++num_elements_cleared;
1238
- } while(node && node != &kActiveFormattingScopeMarker);
1318
+ } while (node && node != &kActiveFormattingScopeMarker);
1239
1319
  gumbo_debug("Cleared %d elements from active formatting list.\n",
1240
- num_elements_cleared);
1320
+ num_elements_cleared);
1241
1321
  }
1242
1322
 
1243
1323
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#the-initial-insertion-mode
1244
1324
  static GumboQuirksModeEnum compute_quirks_mode(
1245
1325
  const GumboTokenDocType* doctype) {
1246
- if (doctype->force_quirks ||
1247
- strcmp(doctype->name, kDoctypeHtml.data) ||
1248
- is_in_static_list(doctype->public_identifier,
1249
- kQuirksModePublicIdPrefixes, false) ||
1250
- is_in_static_list(doctype->public_identifier,
1251
- kQuirksModePublicIdExactMatches, true) ||
1252
- is_in_static_list(doctype->system_identifier,
1253
- kQuirksModeSystemIdExactMatches, true) ||
1326
+ if (doctype->force_quirks || strcmp(doctype->name, kDoctypeHtml.data) ||
1327
+ is_in_static_list(
1328
+ doctype->public_identifier, kQuirksModePublicIdPrefixes, false) ||
1329
+ is_in_static_list(
1330
+ doctype->public_identifier, kQuirksModePublicIdExactMatches, true) ||
1331
+ is_in_static_list(
1332
+ doctype->system_identifier, kQuirksModeSystemIdExactMatches, true) ||
1254
1333
  (is_in_static_list(doctype->public_identifier,
1255
- kLimitedQuirksRequiresSystemIdPublicIdPrefixes, false)
1256
- && !doctype->has_system_identifier)) {
1334
+ kLimitedQuirksRequiresSystemIdPublicIdPrefixes, false) &&
1335
+ !doctype->has_system_identifier)) {
1257
1336
  return GUMBO_DOCTYPE_QUIRKS;
1258
- } else if (
1259
- is_in_static_list(doctype->public_identifier,
1260
- kLimitedQuirksPublicIdPrefixes, false) ||
1261
- (is_in_static_list(doctype->public_identifier,
1262
- kLimitedQuirksRequiresSystemIdPublicIdPrefixes, false)
1263
- && doctype->has_system_identifier)) {
1337
+ } else if (is_in_static_list(doctype->public_identifier,
1338
+ kLimitedQuirksPublicIdPrefixes, false) ||
1339
+ (is_in_static_list(doctype->public_identifier,
1340
+ kLimitedQuirksRequiresSystemIdPublicIdPrefixes, false) &&
1341
+ doctype->has_system_identifier)) {
1264
1342
  return GUMBO_DOCTYPE_LIMITED_QUIRKS;
1265
1343
  }
1266
1344
  return GUMBO_DOCTYPE_NO_QUIRKS;
@@ -1269,83 +1347,50 @@ static GumboQuirksModeEnum compute_quirks_mode(
1269
1347
  // The following functions are all defined by the "has an element in __ scope"
1270
1348
  // sections of the HTML5 spec:
1271
1349
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-the-specific-scope
1272
- // The basic idea behind them is that they check for an element of the given tag
1273
- // name, contained within a scope formed by a set of other tag names. For
1274
- // example, "has an element in list scope" looks for an element of the given tag
1275
- // within the nearest enclosing <ol> or <ul>, along with a bunch of generic
1276
- // element types that serve to "firewall" their content from the rest of the
1277
- // document.
1278
- static bool has_an_element_in_specific_scope(
1279
- GumboParser* parser, GumboVector* /* GumboTag */ expected, bool negate, ...) {
1350
+ // The basic idea behind them is that they check for an element of the given
1351
+ // qualified name, contained within a scope formed by a set of other qualified
1352
+ // names. For example, "has an element in list scope" looks for an element of
1353
+ // the given qualified name within the nearest enclosing <ol> or <ul>, along
1354
+ // with a bunch of generic element types that serve to "firewall" their content
1355
+ // from the rest of the document. Note that because of the way the spec is
1356
+ // written,
1357
+ // all elements are expected to be in the HTML namespace
1358
+ static bool has_an_element_in_specific_scope(GumboParser* parser,
1359
+ int expected_size, const GumboTag* expected, bool negate,
1360
+ const gumbo_tagset tags) {
1280
1361
  GumboVector* open_elements = &parser->_parser_state->_open_elements;
1281
- va_list args;
1282
- va_start(args, negate);
1283
- // va_arg can only run through the list once, so we copy it to an GumboVector
1284
- // here. I wonder if it'd make more sense to make tags the GumboVector*
1285
- // parameter and 'expected' a vararg list, but that'd require changing a lot
1286
- // of code for unknown benefit. We may want to change the representation of
1287
- // these tag sets anyway, to something more efficient.
1288
- GumboVector tags;
1289
- gumbo_vector_init(parser, 10, &tags);
1290
- for (GumboTag tag = va_arg(args, GumboTag); tag != GUMBO_TAG_LAST;
1291
- tag = va_arg(args, GumboTag)) {
1292
- // We store the tags inline instead of storing pointers to them.
1293
- gumbo_vector_add(parser, (void*) tag, &tags);
1294
- }
1295
- va_end(args);
1296
-
1297
- bool result = false;
1298
- for (int i = open_elements->length; --i >= 0; ) {
1362
+ for (int i = open_elements->length; --i >= 0;) {
1299
1363
  const GumboNode* node = open_elements->data[i];
1300
- if (node->type != GUMBO_NODE_ELEMENT) {
1364
+ if (node->type != GUMBO_NODE_ELEMENT && node->type != GUMBO_NODE_TEMPLATE)
1301
1365
  continue;
1302
- }
1366
+
1303
1367
  GumboTag node_tag = node->v.element.tag;
1304
- for (int j = 0; j < expected->length; ++j) {
1305
- GumboTag expected_tag = (GumboTag) expected->data[j];
1306
- if (node_tag == expected_tag) {
1307
- result = true;
1308
- goto cleanup;
1309
- }
1368
+ GumboNamespaceEnum node_ns = node->v.element.tag_namespace;
1369
+ for (int j = 0; j < expected_size; ++j) {
1370
+ if (node_tag == expected[j] && node_ns == GUMBO_NAMESPACE_HTML)
1371
+ return true;
1310
1372
  }
1311
1373
 
1312
- bool found_tag = false;
1313
- for (int j = 0; j < tags.length; ++j) {
1314
- GumboTag tag = (GumboTag) tags.data[j];
1315
- if (tag == node_tag) {
1316
- found_tag = true;
1317
- break;
1318
- }
1319
- }
1320
- if (negate != found_tag) {
1321
- result = false;
1322
- goto cleanup;
1323
- }
1374
+ bool found = TAGSET_INCLUDES(tags, node_ns, node_tag);
1375
+ if (negate != found) return false;
1324
1376
  }
1325
- cleanup:
1326
- gumbo_vector_destroy(parser, &tags);
1327
- return result;
1377
+ return false;
1328
1378
  }
1329
1379
 
1330
- // This is a bit of a hack to stack-allocate a one-element GumboVector name
1331
- // 'varname' containing the 'from_var' variable, since it's used in nearly all
1332
- // the subsequent helper functions. Note the use of void* and casts instead of
1333
- // GumboTag; this is so the alignment requirements are the same as GumboVector
1334
- // and the data inside it can be freely accessed as if it were a normal
1335
- // GumboVector.
1336
- #define DECLARE_ONE_ELEMENT_GUMBO_VECTOR(varname, from_var) \
1337
- void* varname ## _tmp_array[1] = { (void*) from_var }; \
1338
- GumboVector varname = { varname ## _tmp_array, 1, 1 }
1380
+ // Checks for the presence of an open element of the specified tag type.
1381
+ static bool has_open_element(GumboParser* parser, GumboTag tag) {
1382
+ return has_an_element_in_specific_scope(
1383
+ parser, 1, &tag, false, (gumbo_tagset){TAG(HTML)});
1384
+ }
1339
1385
 
1340
1386
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-scope
1341
1387
  static bool has_an_element_in_scope(GumboParser* parser, GumboTag tag) {
1342
- DECLARE_ONE_ELEMENT_GUMBO_VECTOR(tags, tag);
1343
- return has_an_element_in_specific_scope(
1344
- parser, &tags, false, GUMBO_TAG_APPLET, GUMBO_TAG_CAPTION, GUMBO_TAG_HTML,
1345
- GUMBO_TAG_TABLE, GUMBO_TAG_TD, GUMBO_TAG_TH, GUMBO_TAG_MARQUEE,
1346
- GUMBO_TAG_OBJECT, GUMBO_TAG_MI, GUMBO_TAG_MO, GUMBO_TAG_MN, GUMBO_TAG_MS,
1347
- GUMBO_TAG_MTEXT, GUMBO_TAG_ANNOTATION_XML, GUMBO_TAG_FOREIGNOBJECT,
1348
- GUMBO_TAG_DESC, GUMBO_TAG_TITLE, GUMBO_TAG_LAST);
1388
+ return has_an_element_in_specific_scope(parser, 1, &tag, false,
1389
+ (gumbo_tagset){TAG(APPLET), TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD),
1390
+ TAG(TH), TAG(MARQUEE), TAG(OBJECT), TAG(TEMPLATE), TAG_MATHML(MI),
1391
+ TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS), TAG_MATHML(MTEXT),
1392
+ TAG_MATHML(ANNOTATION_XML), TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC),
1393
+ TAG_SVG(TITLE)});
1349
1394
  }
1350
1395
 
1351
1396
  // Like "has an element in scope", but for the specific case of looking for a
@@ -1356,21 +1401,21 @@ static bool has_an_element_in_scope(GumboParser* parser, GumboTag tag) {
1356
1401
  // parameterize it.
1357
1402
  static bool has_node_in_scope(GumboParser* parser, const GumboNode* node) {
1358
1403
  GumboVector* open_elements = &parser->_parser_state->_open_elements;
1359
- for (int i = open_elements->length; --i >= 0; ) {
1404
+ for (int i = open_elements->length; --i >= 0;) {
1360
1405
  const GumboNode* current = open_elements->data[i];
1361
1406
  if (current == node) {
1362
1407
  return true;
1363
1408
  }
1364
- if (current->type != GUMBO_NODE_ELEMENT) {
1409
+ if (current->type != GUMBO_NODE_ELEMENT &&
1410
+ current->type != GUMBO_NODE_TEMPLATE) {
1365
1411
  continue;
1366
1412
  }
1367
- if (node_tag_in(
1368
- current, GUMBO_TAG_APPLET, GUMBO_TAG_CAPTION, GUMBO_TAG_HTML,
1369
- GUMBO_TAG_TABLE, GUMBO_TAG_TD, GUMBO_TAG_TH, GUMBO_TAG_MARQUEE,
1370
- GUMBO_TAG_OBJECT, GUMBO_TAG_MI, GUMBO_TAG_MO, GUMBO_TAG_MN,
1371
- GUMBO_TAG_MS, GUMBO_TAG_MTEXT, GUMBO_TAG_ANNOTATION_XML,
1372
- GUMBO_TAG_FOREIGNOBJECT, GUMBO_TAG_DESC, GUMBO_TAG_TITLE,
1373
- GUMBO_TAG_LAST)) {
1413
+ if (node_tag_in_set(current,
1414
+ (gumbo_tagset){TAG(APPLET), TAG(CAPTION), TAG(HTML), TAG(TABLE),
1415
+ TAG(TD), TAG(TH), TAG(MARQUEE), TAG(OBJECT), TAG(TEMPLATE),
1416
+ TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS),
1417
+ TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
1418
+ TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC), TAG_SVG(TITLE)})) {
1374
1419
  return false;
1375
1420
  }
1376
1421
  }
@@ -1378,79 +1423,72 @@ static bool has_node_in_scope(GumboParser* parser, const GumboNode* node) {
1378
1423
  return false;
1379
1424
  }
1380
1425
 
1381
- // Like has_an_element_in_scope, but restricts the expected tag to a range of
1382
- // possible tag names instead of just a single one.
1383
- static bool has_an_element_in_scope_with_tagname(GumboParser* parser, ...) {
1384
- GumboVector tags;
1385
- // 6 = arbitrary initial size for vector, chosen because the major use-case
1386
- // for this method is heading tags, of which there are 6.
1387
- gumbo_vector_init(parser, 6, &tags);
1388
- va_list args;
1389
- va_start(args, parser);
1390
- for (GumboTag tag = va_arg(args, GumboTag); tag != GUMBO_TAG_LAST;
1391
- tag = va_arg(args, GumboTag)) {
1392
- gumbo_vector_add(parser, (void*) tag, &tags);
1393
- }
1394
- bool found = has_an_element_in_specific_scope(
1395
- parser, &tags, false, GUMBO_TAG_APPLET, GUMBO_TAG_CAPTION, GUMBO_TAG_HTML,
1396
- GUMBO_TAG_TABLE, GUMBO_TAG_TD, GUMBO_TAG_TH, GUMBO_TAG_MARQUEE,
1397
- GUMBO_TAG_OBJECT, GUMBO_TAG_MI, GUMBO_TAG_MO, GUMBO_TAG_MN, GUMBO_TAG_MS,
1398
- GUMBO_TAG_MTEXT, GUMBO_TAG_ANNOTATION_XML, GUMBO_TAG_FOREIGNOBJECT,
1399
- GUMBO_TAG_DESC, GUMBO_TAG_TITLE, GUMBO_TAG_LAST);
1400
- gumbo_vector_destroy(parser, &tags);
1401
- va_end(args);
1402
- return found;
1426
+ // Like has_an_element_in_scope, but restricts the expected qualified name to a
1427
+ // range of possible qualified names instead of just a single one.
1428
+ static bool has_an_element_in_scope_with_tagname(
1429
+ GumboParser* parser, int expected_len, const GumboTag expected[]) {
1430
+ return has_an_element_in_specific_scope(parser, expected_len, expected, false,
1431
+ (gumbo_tagset){TAG(APPLET), TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD),
1432
+ TAG(TH), TAG(MARQUEE), TAG(OBJECT), TAG(TEMPLATE), TAG_MATHML(MI),
1433
+ TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS), TAG_MATHML(MTEXT),
1434
+ TAG_MATHML(ANNOTATION_XML), TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC),
1435
+ TAG_SVG(TITLE)});
1403
1436
  }
1404
1437
 
1405
1438
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-list-item-scope
1406
1439
  static bool has_an_element_in_list_scope(GumboParser* parser, GumboTag tag) {
1407
- DECLARE_ONE_ELEMENT_GUMBO_VECTOR(tags, tag);
1408
- return has_an_element_in_specific_scope(
1409
- parser, &tags, false, GUMBO_TAG_APPLET, GUMBO_TAG_CAPTION, GUMBO_TAG_HTML,
1410
- GUMBO_TAG_TABLE, GUMBO_TAG_TD, GUMBO_TAG_TH, GUMBO_TAG_MARQUEE,
1411
- GUMBO_TAG_OBJECT, GUMBO_TAG_MI, GUMBO_TAG_MO, GUMBO_TAG_MN, GUMBO_TAG_MS,
1412
- GUMBO_TAG_MTEXT, GUMBO_TAG_ANNOTATION_XML, GUMBO_TAG_FOREIGNOBJECT,
1413
- GUMBO_TAG_DESC, GUMBO_TAG_TITLE, GUMBO_TAG_OL, GUMBO_TAG_UL,
1414
- GUMBO_TAG_LAST);
1440
+ return has_an_element_in_specific_scope(parser, 1, &tag, false,
1441
+ (gumbo_tagset){TAG(APPLET), TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD),
1442
+ TAG(TH), TAG(MARQUEE), TAG(OBJECT), TAG(TEMPLATE), TAG_MATHML(MI),
1443
+ TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS), TAG_MATHML(MTEXT),
1444
+ TAG_MATHML(ANNOTATION_XML), TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC),
1445
+ TAG_SVG(TITLE), TAG(OL), TAG(UL)});
1415
1446
  }
1416
1447
 
1417
1448
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-button-scope
1418
1449
  static bool has_an_element_in_button_scope(GumboParser* parser, GumboTag tag) {
1419
- DECLARE_ONE_ELEMENT_GUMBO_VECTOR(tags, tag);
1420
- return has_an_element_in_specific_scope(
1421
- parser, &tags, false, GUMBO_TAG_APPLET, GUMBO_TAG_CAPTION, GUMBO_TAG_HTML,
1422
- GUMBO_TAG_TABLE, GUMBO_TAG_TD, GUMBO_TAG_TH, GUMBO_TAG_MARQUEE,
1423
- GUMBO_TAG_OBJECT, GUMBO_TAG_MI, GUMBO_TAG_MO, GUMBO_TAG_MN, GUMBO_TAG_MS,
1424
- GUMBO_TAG_MTEXT, GUMBO_TAG_ANNOTATION_XML, GUMBO_TAG_FOREIGNOBJECT,
1425
- GUMBO_TAG_DESC, GUMBO_TAG_TITLE, GUMBO_TAG_BUTTON, GUMBO_TAG_LAST);
1450
+ return has_an_element_in_specific_scope(parser, 1, &tag, false,
1451
+ (gumbo_tagset){TAG(APPLET), TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD),
1452
+ TAG(TH), TAG(MARQUEE), TAG(OBJECT), TAG(TEMPLATE), TAG_MATHML(MI),
1453
+ TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS), TAG_MATHML(MTEXT),
1454
+ TAG_MATHML(ANNOTATION_XML), TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC),
1455
+ TAG_SVG(TITLE), TAG(BUTTON)});
1426
1456
  }
1427
1457
 
1428
1458
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-table-scope
1429
1459
  static bool has_an_element_in_table_scope(GumboParser* parser, GumboTag tag) {
1430
- DECLARE_ONE_ELEMENT_GUMBO_VECTOR(tags, tag);
1431
- return has_an_element_in_specific_scope(
1432
- parser, &tags, false, GUMBO_TAG_HTML, GUMBO_TAG_TABLE, GUMBO_TAG_LAST);
1460
+ return has_an_element_in_specific_scope(parser, 1, &tag, false,
1461
+ (gumbo_tagset){TAG(HTML), TAG(TABLE), TAG(TEMPLATE)});
1433
1462
  }
1434
1463
 
1435
1464
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-select-scope
1436
1465
  static bool has_an_element_in_select_scope(GumboParser* parser, GumboTag tag) {
1437
- DECLARE_ONE_ELEMENT_GUMBO_VECTOR(tags, tag);
1438
1466
  return has_an_element_in_specific_scope(
1439
- parser, &tags, true, GUMBO_TAG_OPTGROUP, GUMBO_TAG_OPTION,
1440
- GUMBO_TAG_LAST);
1467
+ parser, 1, &tag, true, (gumbo_tagset){TAG(OPTGROUP), TAG(OPTION)});
1441
1468
  }
1442
1469
 
1443
-
1444
1470
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#generate-implied-end-tags
1445
1471
  // "exception" is the "element to exclude from the process" listed in the spec.
1446
1472
  // Pass GUMBO_TAG_LAST to not exclude any of them.
1447
1473
  static void generate_implied_end_tags(GumboParser* parser, GumboTag exception) {
1448
- for (;
1449
- node_tag_in(get_current_node(parser), GUMBO_TAG_DD, GUMBO_TAG_DT,
1450
- GUMBO_TAG_LI, GUMBO_TAG_OPTION, GUMBO_TAG_OPTGROUP,
1451
- GUMBO_TAG_P, GUMBO_TAG_RP, GUMBO_TAG_RT, GUMBO_TAG_LAST) &&
1452
- !node_tag_is(get_current_node(parser), exception);
1453
- pop_current_node(parser));
1474
+ for (; node_tag_in_set(get_current_node(parser),
1475
+ (gumbo_tagset){TAG(DD), TAG(DT), TAG(LI), TAG(OPTION),
1476
+ TAG(OPTGROUP), TAG(P), TAG(RP), TAG(RB), TAG(RT), TAG(RTC)}) &&
1477
+ !node_html_tag_is(get_current_node(parser), exception);
1478
+ pop_current_node(parser))
1479
+ ;
1480
+ }
1481
+
1482
+ // This is the "generate all implied end tags thoroughly" clause of the spec.
1483
+ // https://html.spec.whatwg.org/multipage/syntax.html#closing-elements-that-have-implied-end-tags
1484
+ static void generate_all_implied_end_tags_thoroughly(GumboParser* parser) {
1485
+ for (
1486
+ ; node_tag_in_set(get_current_node(parser),
1487
+ (gumbo_tagset){TAG(CAPTION), TAG(COLGROUP), TAG(DD), TAG(DT), TAG(LI),
1488
+ TAG(OPTION), TAG(OPTGROUP), TAG(P), TAG(RP), TAG(RT), TAG(RTC),
1489
+ TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(HEAD), TAG(TR)});
1490
+ pop_current_node(parser))
1491
+ ;
1454
1492
  }
1455
1493
 
1456
1494
  // This factors out the clauses relating to "act as if an end tag token with tag
@@ -1463,7 +1501,7 @@ static bool close_table(GumboParser* parser) {
1463
1501
  }
1464
1502
 
1465
1503
  GumboNode* node = pop_current_node(parser);
1466
- while (!node_tag_is(node, GUMBO_TAG_TABLE)) {
1504
+ while (!node_html_tag_is(node, GUMBO_TAG_TABLE)) {
1467
1505
  node = pop_current_node(parser);
1468
1506
  }
1469
1507
  reset_insertion_mode_appropriately(parser);
@@ -1472,18 +1510,18 @@ static bool close_table(GumboParser* parser) {
1472
1510
 
1473
1511
  // This factors out the clauses relating to "act as if an end tag token with tag
1474
1512
  // name `cell_tag` had been seen".
1475
- static bool close_table_cell(GumboParser* parser, const GumboToken* token,
1476
- GumboTag cell_tag) {
1513
+ static bool close_table_cell(
1514
+ GumboParser* parser, const GumboToken* token, GumboTag cell_tag) {
1477
1515
  bool result = true;
1478
1516
  generate_implied_end_tags(parser, GUMBO_TAG_LAST);
1479
1517
  const GumboNode* node = get_current_node(parser);
1480
- if (!node_tag_is(node, cell_tag)) {
1518
+ if (!node_html_tag_is(node, cell_tag)) {
1481
1519
  parser_add_parse_error(parser, token);
1482
1520
  result = false;
1483
1521
  }
1484
1522
  do {
1485
1523
  node = pop_current_node(parser);
1486
- } while (!node_tag_is(node, cell_tag));
1524
+ } while (!node_html_tag_is(node, cell_tag));
1487
1525
 
1488
1526
  clear_active_formatting_elements(parser);
1489
1527
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
@@ -1508,7 +1546,7 @@ static bool close_current_cell(GumboParser* parser, const GumboToken* token) {
1508
1546
  // resets the insertion mode appropriately.
1509
1547
  static void close_current_select(GumboParser* parser) {
1510
1548
  GumboNode* node = pop_current_node(parser);
1511
- while (!node_tag_is(node, GUMBO_TAG_SELECT)) {
1549
+ while (!node_html_tag_is(node, GUMBO_TAG_SELECT)) {
1512
1550
  node = pop_current_node(parser);
1513
1551
  }
1514
1552
  reset_insertion_mode_appropriately(parser);
@@ -1517,60 +1555,48 @@ static void close_current_select(GumboParser* parser) {
1517
1555
  // The list of nodes in the "special" category:
1518
1556
  // http://www.whatwg.org/specs/web-apps/current-work/complete/parsing.html#special
1519
1557
  static bool is_special_node(const GumboNode* node) {
1520
- assert(node->type == GUMBO_NODE_ELEMENT);
1521
- switch (node->v.element.tag_namespace) {
1522
- case GUMBO_NAMESPACE_HTML:
1523
- return node_tag_in(node,
1524
- GUMBO_TAG_ADDRESS, GUMBO_TAG_APPLET, GUMBO_TAG_AREA,
1525
- GUMBO_TAG_ARTICLE, GUMBO_TAG_ASIDE, GUMBO_TAG_BASE,
1526
- GUMBO_TAG_BASEFONT, GUMBO_TAG_BGSOUND, GUMBO_TAG_BLOCKQUOTE,
1527
- GUMBO_TAG_BODY, GUMBO_TAG_BR, GUMBO_TAG_BUTTON, GUMBO_TAG_CAPTION,
1528
- GUMBO_TAG_CENTER, GUMBO_TAG_COL, GUMBO_TAG_COLGROUP,
1529
- GUMBO_TAG_MENUITEM, GUMBO_TAG_DD, GUMBO_TAG_DETAILS, GUMBO_TAG_DIR,
1530
- GUMBO_TAG_DIV, GUMBO_TAG_DL, GUMBO_TAG_DT, GUMBO_TAG_EMBED,
1531
- GUMBO_TAG_FIELDSET, GUMBO_TAG_FIGCAPTION, GUMBO_TAG_FIGURE,
1532
- GUMBO_TAG_FOOTER, GUMBO_TAG_FORM, GUMBO_TAG_FRAME,
1533
- GUMBO_TAG_FRAMESET, GUMBO_TAG_H1, GUMBO_TAG_H2, GUMBO_TAG_H3,
1534
- GUMBO_TAG_H4, GUMBO_TAG_H5, GUMBO_TAG_H6, GUMBO_TAG_HEAD,
1535
- GUMBO_TAG_HEADER, GUMBO_TAG_HGROUP, GUMBO_TAG_HR, GUMBO_TAG_HTML,
1536
- GUMBO_TAG_IFRAME, GUMBO_TAG_IMG, GUMBO_TAG_INPUT, GUMBO_TAG_ISINDEX,
1537
- GUMBO_TAG_LI, GUMBO_TAG_LINK, GUMBO_TAG_LISTING, GUMBO_TAG_MARQUEE,
1538
- GUMBO_TAG_MENU, GUMBO_TAG_META, GUMBO_TAG_NAV, GUMBO_TAG_NOEMBED,
1539
- GUMBO_TAG_NOFRAMES, GUMBO_TAG_NOSCRIPT, GUMBO_TAG_OBJECT,
1540
- GUMBO_TAG_OL, GUMBO_TAG_P, GUMBO_TAG_PARAM, GUMBO_TAG_PLAINTEXT,
1541
- GUMBO_TAG_PRE, GUMBO_TAG_SCRIPT, GUMBO_TAG_SECTION, GUMBO_TAG_SELECT,
1542
- GUMBO_TAG_STYLE, GUMBO_TAG_SUMMARY, GUMBO_TAG_TABLE, GUMBO_TAG_TBODY,
1543
- GUMBO_TAG_TD, GUMBO_TAG_TEXTAREA, GUMBO_TAG_TFOOT, GUMBO_TAG_TH,
1544
- GUMBO_TAG_THEAD, GUMBO_TAG_TITLE, GUMBO_TAG_TR, GUMBO_TAG_UL,
1545
- GUMBO_TAG_WBR, GUMBO_TAG_XMP, GUMBO_TAG_LAST);
1546
- case GUMBO_NAMESPACE_MATHML:
1547
- return node_tag_in(node,
1548
- GUMBO_TAG_MI, GUMBO_TAG_MO, GUMBO_TAG_MN, GUMBO_TAG_MS,
1549
- GUMBO_TAG_MTEXT, GUMBO_TAG_ANNOTATION_XML, GUMBO_TAG_LAST);
1550
- case GUMBO_NAMESPACE_SVG:
1551
- return node_tag_in(node,
1552
- GUMBO_TAG_FOREIGNOBJECT, GUMBO_TAG_DESC, GUMBO_TAG_LAST);
1553
- }
1554
- abort();
1555
- return false; // Pacify compiler.
1556
- }
1557
-
1558
- // Implicitly closes currently open tags until it reaches an element with the
1559
- // specified tag name. If the elements closed are in the set handled by
1558
+ assert(node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE);
1559
+ return node_tag_in_set(node,
1560
+ (gumbo_tagset){TAG(ADDRESS), TAG(APPLET), TAG(AREA), TAG(ARTICLE),
1561
+ TAG(ASIDE), TAG(BASE), TAG(BASEFONT), TAG(BGSOUND), TAG(BLOCKQUOTE),
1562
+ TAG(BODY), TAG(BR), TAG(BUTTON), TAG(CAPTION), TAG(CENTER), TAG(COL),
1563
+ TAG(COLGROUP), TAG(MENUITEM), TAG(DD), TAG(DETAILS), TAG(DIR),
1564
+ TAG(DIV), TAG(DL), TAG(DT), TAG(EMBED), TAG(FIELDSET),
1565
+ TAG(FIGCAPTION), TAG(FIGURE), TAG(FOOTER), TAG(FORM), TAG(FRAME),
1566
+ TAG(FRAMESET), TAG(H1), TAG(H2), TAG(H3), TAG(H4), TAG(H5), TAG(H6),
1567
+ TAG(HEAD), TAG(HEADER), TAG(HGROUP), TAG(HR), TAG(HTML), TAG(IFRAME),
1568
+ TAG(IMG), TAG(INPUT), TAG(ISINDEX), TAG(LI), TAG(LINK), TAG(LISTING),
1569
+ TAG(MARQUEE), TAG(MENU), TAG(META), TAG(NAV), TAG(NOEMBED),
1570
+ TAG(NOFRAMES), TAG(NOSCRIPT), TAG(OBJECT), TAG(OL), TAG(P),
1571
+ TAG(PARAM), TAG(PLAINTEXT), TAG(PRE), TAG(SCRIPT), TAG(SECTION),
1572
+ TAG(SELECT), TAG(STYLE), TAG(SUMMARY), TAG(TABLE), TAG(TBODY),
1573
+ TAG(TD), TAG(TEMPLATE), TAG(TEXTAREA), TAG(TFOOT), TAG(TH),
1574
+ TAG(THEAD), TAG(TITLE), TAG(TR), TAG(UL), TAG(WBR), TAG(XMP),
1575
+
1576
+ TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS),
1577
+ TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
1578
+
1579
+ TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC)});
1580
+ }
1581
+
1582
+ // Implicitly closes currently open elements until it reaches an element with
1583
+ // the
1584
+ // specified qualified name. If the elements closed are in the set handled by
1560
1585
  // generate_implied_end_tags, this is normal operation and this function returns
1561
1586
  // true. Otherwise, a parse error is recorded and this function returns false.
1562
- static bool implicitly_close_tags(
1563
- GumboParser* parser, GumboToken* token, GumboTag target) {
1587
+ static bool implicitly_close_tags(GumboParser* parser, GumboToken* token,
1588
+ GumboNamespaceEnum target_ns, GumboTag target) {
1564
1589
  bool result = true;
1565
1590
  generate_implied_end_tags(parser, target);
1566
- if (!node_tag_is(get_current_node(parser), target)) {
1591
+ if (!node_qualified_tag_is(get_current_node(parser), target_ns, target)) {
1567
1592
  parser_add_parse_error(parser, token);
1568
- while (!node_tag_is(get_current_node(parser), target)) {
1593
+ while (
1594
+ !node_qualified_tag_is(get_current_node(parser), target_ns, target)) {
1569
1595
  pop_current_node(parser);
1570
1596
  }
1571
1597
  result = false;
1572
1598
  }
1573
- assert(node_tag_is(get_current_node(parser), target));
1599
+ assert(node_qualified_tag_is(get_current_node(parser), target_ns, target));
1574
1600
  pop_current_node(parser);
1575
1601
  return result;
1576
1602
  }
@@ -1579,9 +1605,11 @@ static bool implicitly_close_tags(
1579
1605
  // a </p> tag was encountered, implicitly closing tags. Returns false if a
1580
1606
  // parse error occurs. This is a convenience function because this particular
1581
1607
  // clause appears several times in the spec.
1582
- static bool maybe_implicitly_close_p_tag(GumboParser* parser, GumboToken* token) {
1608
+ static bool maybe_implicitly_close_p_tag(
1609
+ GumboParser* parser, GumboToken* token) {
1583
1610
  if (has_an_element_in_button_scope(parser, GUMBO_TAG_P)) {
1584
- return implicitly_close_tags(parser, token, GUMBO_TAG_P);
1611
+ return implicitly_close_tags(
1612
+ parser, token, GUMBO_NAMESPACE_HTML, GUMBO_TAG_P);
1585
1613
  }
1586
1614
  return true;
1587
1615
  }
@@ -1592,18 +1620,19 @@ static void maybe_implicitly_close_list_tag(
1592
1620
  GumboParser* parser, GumboToken* token, bool is_li) {
1593
1621
  GumboParserState* state = parser->_parser_state;
1594
1622
  state->_frameset_ok = false;
1595
- for (int i = state->_open_elements.length; --i >= 0; ) {
1623
+ for (int i = state->_open_elements.length; --i >= 0;) {
1596
1624
  const GumboNode* node = state->_open_elements.data[i];
1597
- bool is_list_tag = is_li ?
1598
- node_tag_is(node, GUMBO_TAG_LI) :
1599
- node_tag_in(node, GUMBO_TAG_DD, GUMBO_TAG_DT, GUMBO_TAG_LAST);
1625
+ bool is_list_tag =
1626
+ is_li ? node_html_tag_is(node, GUMBO_TAG_LI)
1627
+ : node_tag_in_set(node, (gumbo_tagset){TAG(DD), TAG(DT)});
1600
1628
  if (is_list_tag) {
1601
- implicitly_close_tags(parser, token, node->v.element.tag);
1629
+ implicitly_close_tags(
1630
+ parser, token, node->v.element.tag_namespace, node->v.element.tag);
1602
1631
  return;
1603
1632
  }
1604
1633
  if (is_special_node(node) &&
1605
- !node_tag_in(node, GUMBO_TAG_ADDRESS, GUMBO_TAG_DIV, GUMBO_TAG_P,
1606
- GUMBO_TAG_LAST)) {
1634
+ !node_tag_in_set(
1635
+ node, (gumbo_tagset){TAG(ADDRESS), TAG(DIV), TAG(P)})) {
1607
1636
  return;
1608
1637
  }
1609
1638
  }
@@ -1616,7 +1645,7 @@ static void merge_attributes(
1616
1645
  const GumboVector* token_attr = &token->v.start_tag.attributes;
1617
1646
  GumboVector* node_attr = &node->v.element.attributes;
1618
1647
 
1619
- for (int i = 0; i < token_attr->length; ++i) {
1648
+ for (unsigned int i = 0; i < token_attr->length; ++i) {
1620
1649
  GumboAttribute* attr = token_attr->data[i];
1621
1650
  if (!gumbo_get_attribute(node_attr, attr->name)) {
1622
1651
  // Ownership of the attribute is transferred by this gumbo_vector_add,
@@ -1640,8 +1669,8 @@ static void merge_attributes(
1640
1669
  }
1641
1670
 
1642
1671
  const char* gumbo_normalize_svg_tagname(const GumboStringPiece* tag) {
1643
- for (int i = 0;
1644
- i < sizeof(kSvgTagReplacements) / sizeof(ReplacementEntry); ++i) {
1672
+ for (size_t i = 0; i < sizeof(kSvgTagReplacements) / sizeof(ReplacementEntry);
1673
+ ++i) {
1645
1674
  const ReplacementEntry* entry = &kSvgTagReplacements[i];
1646
1675
  if (gumbo_string_equals_ignore_case(tag, &entry->from)) {
1647
1676
  return entry->to.data;
@@ -1656,9 +1685,9 @@ const char* gumbo_normalize_svg_tagname(const GumboStringPiece* tag) {
1656
1685
  static void adjust_foreign_attributes(GumboParser* parser, GumboToken* token) {
1657
1686
  assert(token->type == GUMBO_TOKEN_START_TAG);
1658
1687
  const GumboVector* attributes = &token->v.start_tag.attributes;
1659
- for (int i = 0;
1660
- i < sizeof(kForeignAttributeReplacements) /
1661
- sizeof(NamespacedAttributeReplacement); ++i) {
1688
+ for (size_t i = 0; i < sizeof(kForeignAttributeReplacements) /
1689
+ sizeof(NamespacedAttributeReplacement);
1690
+ ++i) {
1662
1691
  const NamespacedAttributeReplacement* entry =
1663
1692
  &kForeignAttributeReplacements[i];
1664
1693
  GumboAttribute* attr = gumbo_get_attribute(attributes, entry->from);
@@ -1676,7 +1705,7 @@ static void adjust_foreign_attributes(GumboParser* parser, GumboToken* token) {
1676
1705
  static void adjust_svg_attributes(GumboParser* parser, GumboToken* token) {
1677
1706
  assert(token->type == GUMBO_TOKEN_START_TAG);
1678
1707
  const GumboVector* attributes = &token->v.start_tag.attributes;
1679
- for (int i = 0;
1708
+ for (size_t i = 0;
1680
1709
  i < sizeof(kSvgAttributeReplacements) / sizeof(ReplacementEntry); ++i) {
1681
1710
  const ReplacementEntry* entry = &kSvgAttributeReplacements[i];
1682
1711
  GumboAttribute* attr = gumbo_get_attribute(attributes, entry->from.data);
@@ -1693,8 +1722,8 @@ static void adjust_svg_attributes(GumboParser* parser, GumboToken* token) {
1693
1722
  // value.
1694
1723
  static void adjust_mathml_attributes(GumboParser* parser, GumboToken* token) {
1695
1724
  assert(token->type == GUMBO_TOKEN_START_TAG);
1696
- GumboAttribute* attr = gumbo_get_attribute(
1697
- &token->v.start_tag.attributes, "definitionurl");
1725
+ GumboAttribute* attr =
1726
+ gumbo_get_attribute(&token->v.start_tag.attributes, "definitionurl");
1698
1727
  if (!attr) {
1699
1728
  return;
1700
1729
  }
@@ -1702,32 +1731,30 @@ static void adjust_mathml_attributes(GumboParser* parser, GumboToken* token) {
1702
1731
  attr->name = gumbo_copy_stringz(parser, "definitionURL");
1703
1732
  }
1704
1733
 
1705
- static bool doctype_matches(
1706
- const GumboTokenDocType* doctype,
1707
- const GumboStringPiece* public_id,
1708
- const GumboStringPiece* system_id,
1734
+ static bool doctype_matches(const GumboTokenDocType* doctype,
1735
+ const GumboStringPiece* public_id, const GumboStringPiece* system_id,
1709
1736
  bool allow_missing_system_id) {
1710
1737
  return !strcmp(doctype->public_identifier, public_id->data) &&
1711
- (allow_missing_system_id || doctype->has_system_identifier) &&
1712
- !strcmp(doctype->system_identifier, system_id->data);
1738
+ (allow_missing_system_id || doctype->has_system_identifier) &&
1739
+ !strcmp(doctype->system_identifier, system_id->data);
1713
1740
  }
1714
1741
 
1715
1742
  static bool maybe_add_doctype_error(
1716
1743
  GumboParser* parser, const GumboToken* token) {
1717
1744
  const GumboTokenDocType* doctype = &token->v.doc_type;
1718
1745
  bool html_doctype = !strcmp(doctype->name, kDoctypeHtml.data);
1719
- if ((!html_doctype ||
1720
- doctype->has_public_identifier ||
1721
- (doctype->has_system_identifier && !strcmp(
1722
- doctype->system_identifier, kSystemIdLegacyCompat.data))) &&
1723
- !(html_doctype && (
1724
- doctype_matches(doctype, &kPublicIdHtml4_0,
1725
- &kSystemIdRecHtml4_0, true) ||
1726
- doctype_matches(doctype, &kPublicIdHtml4_01, &kSystemIdHtml4, true) ||
1727
- doctype_matches(doctype, &kPublicIdXhtml1_0,
1728
- &kSystemIdXhtmlStrict1_1, false) ||
1729
- doctype_matches(doctype, &kPublicIdXhtml1_1,
1730
- &kSystemIdXhtml1_1, false)))) {
1746
+ if ((!html_doctype || doctype->has_public_identifier ||
1747
+ (doctype->has_system_identifier &&
1748
+ !strcmp(
1749
+ doctype->system_identifier, kSystemIdLegacyCompat.data))) &&
1750
+ !(html_doctype && (doctype_matches(doctype, &kPublicIdHtml4_0,
1751
+ &kSystemIdRecHtml4_0, true) ||
1752
+ doctype_matches(doctype, &kPublicIdHtml4_01,
1753
+ &kSystemIdHtml4, true) ||
1754
+ doctype_matches(doctype, &kPublicIdXhtml1_0,
1755
+ &kSystemIdXhtmlStrict1_1, false) ||
1756
+ doctype_matches(doctype, &kPublicIdXhtml1_1,
1757
+ &kSystemIdXhtml1_1, false)))) {
1731
1758
  parser_add_parse_error(parser, token);
1732
1759
  return false;
1733
1760
  }
@@ -1750,7 +1777,7 @@ static void remove_from_parent(GumboParser* parser, GumboNode* node) {
1750
1777
  gumbo_vector_remove_at(parser, index, children);
1751
1778
  node->parent = NULL;
1752
1779
  node->index_within_parent = -1;
1753
- for (int i = index; i < children->length; ++i) {
1780
+ for (unsigned int i = index; i < children->length; ++i) {
1754
1781
  GumboNode* child = children->data[i];
1755
1782
  child->index_within_parent = i;
1756
1783
  }
@@ -1759,29 +1786,38 @@ static void remove_from_parent(GumboParser* parser, GumboNode* node) {
1759
1786
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#an-introduction-to-error-handling-and-strange-cases-in-the-parser
1760
1787
  // Also described in the "in body" handling for end formatting tags.
1761
1788
  static bool adoption_agency_algorithm(
1762
- GumboParser* parser, GumboToken* token, GumboTag closing_tag) {
1789
+ GumboParser* parser, GumboToken* token, GumboTag subject) {
1763
1790
  GumboParserState* state = parser->_parser_state;
1764
1791
  gumbo_debug("Entering adoption agency algorithm.\n");
1765
- // Steps 1-3 & 16:
1766
- for (int i = 0; i < 8; ++i) {
1767
- // Step 4.
1792
+ // Step 1.
1793
+ GumboNode* current_node = get_current_node(parser);
1794
+ if (current_node->v.element.tag_namespace == GUMBO_NAMESPACE_HTML &&
1795
+ current_node->v.element.tag == subject &&
1796
+ gumbo_vector_index_of(
1797
+ &state->_active_formatting_elements, current_node) == -1) {
1798
+ pop_current_node(parser);
1799
+ return false;
1800
+ }
1801
+ // Steps 2-4 & 20:
1802
+ for (unsigned int i = 0; i < 8; ++i) {
1803
+ // Step 5.
1768
1804
  GumboNode* formatting_node = NULL;
1769
1805
  int formatting_node_in_open_elements = -1;
1770
- for (int j = state->_active_formatting_elements.length; --j >= 0; ) {
1806
+ for (int j = state->_active_formatting_elements.length; --j >= 0;) {
1771
1807
  GumboNode* current_node = state->_active_formatting_elements.data[j];
1772
1808
  if (current_node == &kActiveFormattingScopeMarker) {
1773
1809
  gumbo_debug("Broke on scope marker; aborting.\n");
1774
1810
  // Last scope marker; abort the algorithm.
1775
1811
  return false;
1776
1812
  }
1777
- if (node_tag_is(current_node, closing_tag)) {
1813
+ if (node_html_tag_is(current_node, subject)) {
1778
1814
  // Found it.
1779
1815
  formatting_node = current_node;
1780
- formatting_node_in_open_elements = gumbo_vector_index_of(
1781
- &state->_open_elements, formatting_node);
1816
+ formatting_node_in_open_elements =
1817
+ gumbo_vector_index_of(&state->_open_elements, formatting_node);
1782
1818
  gumbo_debug("Formatting element of tag %s at %d.\n",
1783
- gumbo_normalized_tagname(closing_tag),
1784
- formatting_node_in_open_elements);
1819
+ gumbo_normalized_tagname(subject),
1820
+ formatting_node_in_open_elements);
1785
1821
  break;
1786
1822
  }
1787
1823
  }
@@ -1793,74 +1829,84 @@ static bool adoption_agency_algorithm(
1793
1829
  return false;
1794
1830
  }
1795
1831
 
1832
+ // Step 6
1796
1833
  if (formatting_node_in_open_elements == -1) {
1797
1834
  gumbo_debug("Formatting node not on stack of open elements.\n");
1798
- gumbo_vector_remove(parser, formatting_node,
1799
- &state->_active_formatting_elements);
1835
+ parser_add_parse_error(parser, token);
1836
+ gumbo_vector_remove(
1837
+ parser, formatting_node, &state->_active_formatting_elements);
1800
1838
  return false;
1801
1839
  }
1802
1840
 
1841
+ // Step 7
1803
1842
  if (!has_an_element_in_scope(parser, formatting_node->v.element.tag)) {
1804
1843
  parser_add_parse_error(parser, token);
1805
1844
  gumbo_debug("Element not in scope.\n");
1806
1845
  return false;
1807
1846
  }
1847
+
1848
+ // Step 8
1808
1849
  if (formatting_node != get_current_node(parser)) {
1809
1850
  parser_add_parse_error(parser, token); // But continue onwards.
1810
1851
  }
1811
1852
  assert(formatting_node);
1812
- assert(!node_tag_is(formatting_node, GUMBO_TAG_HTML));
1813
- assert(!node_tag_is(formatting_node, GUMBO_TAG_BODY));
1853
+ assert(!node_html_tag_is(formatting_node, GUMBO_TAG_HTML));
1854
+ assert(!node_html_tag_is(formatting_node, GUMBO_TAG_BODY));
1814
1855
 
1815
- // Step 5 & 6.
1856
+ // Step 9 & 10
1816
1857
  GumboNode* furthest_block = NULL;
1817
- for (int j = formatting_node_in_open_elements;
1858
+ for (unsigned int j = formatting_node_in_open_elements;
1818
1859
  j < state->_open_elements.length; ++j) {
1819
1860
  assert(j > 0);
1820
1861
  GumboNode* current = state->_open_elements.data[j];
1821
1862
  if (is_special_node(current)) {
1822
- // Step 5.
1863
+ // Step 9.
1823
1864
  furthest_block = current;
1824
1865
  break;
1825
1866
  }
1826
1867
  }
1827
1868
  if (!furthest_block) {
1828
- // Step 6.
1869
+ // Step 10.
1829
1870
  while (get_current_node(parser) != formatting_node) {
1830
1871
  pop_current_node(parser);
1831
1872
  }
1832
1873
  // And the formatting element itself.
1833
1874
  pop_current_node(parser);
1834
- gumbo_vector_remove(parser, formatting_node,
1835
- &state->_active_formatting_elements);
1875
+ gumbo_vector_remove(
1876
+ parser, formatting_node, &state->_active_formatting_elements);
1836
1877
  return false;
1837
1878
  }
1838
- assert(!node_tag_is(furthest_block, GUMBO_TAG_HTML));
1879
+ assert(!node_html_tag_is(furthest_block, GUMBO_TAG_HTML));
1839
1880
  assert(furthest_block);
1840
1881
 
1841
- // Step 7.
1882
+ // Step 11.
1842
1883
  // Elements may be moved and reparented by this algorithm, so
1843
1884
  // common_ancestor is not necessarily the same as formatting_node->parent.
1844
1885
  GumboNode* common_ancestor =
1845
- state->_open_elements.data[gumbo_vector_index_of(
1846
- &state->_open_elements, formatting_node) - 1];
1886
+ state->_open_elements.data[gumbo_vector_index_of(&state->_open_elements,
1887
+ formatting_node) -
1888
+ 1];
1847
1889
  gumbo_debug("Common ancestor tag = %s, furthest block tag = %s.\n",
1848
- gumbo_normalized_tagname(common_ancestor->v.element.tag),
1849
- gumbo_normalized_tagname(furthest_block->v.element.tag));
1890
+ gumbo_normalized_tagname(common_ancestor->v.element.tag),
1891
+ gumbo_normalized_tagname(furthest_block->v.element.tag));
1850
1892
 
1851
- // Step 8.
1893
+ // Step 12.
1852
1894
  int bookmark = gumbo_vector_index_of(
1853
- &state->_active_formatting_elements, formatting_node);;
1854
- // Step 9.
1895
+ &state->_active_formatting_elements, formatting_node) +
1896
+ 1;
1897
+ gumbo_debug("Bookmark at %d.\n", bookmark);
1898
+ // Step 13.
1855
1899
  GumboNode* node = furthest_block;
1856
1900
  GumboNode* last_node = furthest_block;
1857
1901
  // Must be stored explicitly, in case node is removed from the stack of open
1858
1902
  // elements, to handle step 9.4.
1859
1903
  int saved_node_index = gumbo_vector_index_of(&state->_open_elements, node);
1860
1904
  assert(saved_node_index > 0);
1861
- // Step 9.1-9.3 & 9.11.
1862
- for (int j = 0; j < 3; ++j) {
1863
- // Step 9.4.
1905
+ // Step 13.1.
1906
+ for (int j = 0;;) {
1907
+ // Step 13.2.
1908
+ ++j;
1909
+ // Step 13.3.
1864
1910
  int node_index = gumbo_vector_index_of(&state->_open_elements, node);
1865
1911
  gumbo_debug(
1866
1912
  "Current index: %d, last index: %d.\n", node_index, saved_node_index);
@@ -1869,59 +1915,72 @@ static bool adoption_agency_algorithm(
1869
1915
  }
1870
1916
  saved_node_index = --node_index;
1871
1917
  assert(node_index > 0);
1872
- assert(node_index < state->_open_elements.capacity);
1918
+ assert((unsigned int) node_index < state->_open_elements.capacity);
1873
1919
  node = state->_open_elements.data[node_index];
1874
1920
  assert(node->parent);
1875
- // Step 9.5.
1876
- if (gumbo_vector_index_of(
1877
- &state->_active_formatting_elements, node) == -1) {
1921
+ if (node == formatting_node) {
1922
+ // Step 13.4.
1923
+ break;
1924
+ }
1925
+ int formatting_index =
1926
+ gumbo_vector_index_of(&state->_active_formatting_elements, node);
1927
+ if (j > 3 && formatting_index != -1) {
1928
+ // Step 13.5.
1929
+ gumbo_debug("Removing formatting element at %d.\n", formatting_index);
1930
+ gumbo_vector_remove_at(
1931
+ parser, formatting_index, &state->_active_formatting_elements);
1932
+ // Removing the element shifts all indices over by one, so we may need
1933
+ // to move the bookmark.
1934
+ if (formatting_index < bookmark) {
1935
+ --bookmark;
1936
+ gumbo_debug("Moving bookmark to %d.\n", bookmark);
1937
+ }
1938
+ continue;
1939
+ }
1940
+ if (formatting_index == -1) {
1941
+ // Step 13.6.
1878
1942
  gumbo_vector_remove_at(parser, node_index, &state->_open_elements);
1879
1943
  continue;
1880
- } else if (node == formatting_node) {
1881
- // Step 9.6.
1882
- break;
1883
1944
  }
1884
- // Step 9.7.
1885
- int formatting_index = gumbo_vector_index_of(
1886
- &state->_active_formatting_elements, node);
1945
+ // Step 13.7.
1946
+ // "common ancestor as the intended parent" doesn't actually mean insert
1947
+ // it into the common ancestor; that happens below.
1887
1948
  node = clone_node(parser, node, GUMBO_INSERTION_ADOPTION_AGENCY_CLONED);
1949
+ assert(formatting_index >= 0);
1888
1950
  state->_active_formatting_elements.data[formatting_index] = node;
1951
+ assert(node_index >= 0);
1889
1952
  state->_open_elements.data[node_index] = node;
1890
- // Step 9.8.
1953
+ // Step 13.8.
1891
1954
  if (last_node == furthest_block) {
1892
1955
  bookmark = formatting_index + 1;
1893
- assert(bookmark <= state->_active_formatting_elements.length);
1956
+ gumbo_debug("Bookmark moved to %d.\n", bookmark);
1957
+ assert((unsigned int) bookmark <= state->_active_formatting_elements.length);
1894
1958
  }
1895
- // Step 9.9.
1959
+ // Step 13.9.
1896
1960
  last_node->parse_flags |= GUMBO_INSERTION_ADOPTION_AGENCY_MOVED;
1897
1961
  remove_from_parent(parser, last_node);
1898
1962
  append_node(parser, node, last_node);
1899
- // Step 9.10.
1963
+ // Step 13.10.
1900
1964
  last_node = node;
1901
- }
1965
+ } // Step 13.11.
1902
1966
 
1903
- // Step 10.
1967
+ // Step 14.
1904
1968
  gumbo_debug("Removing %s node from parent ",
1905
- gumbo_normalized_tagname(last_node->v.element.tag));
1969
+ gumbo_normalized_tagname(last_node->v.element.tag));
1906
1970
  remove_from_parent(parser, last_node);
1907
1971
  last_node->parse_flags |= GUMBO_INSERTION_ADOPTION_AGENCY_MOVED;
1908
- if (node_tag_in(common_ancestor, GUMBO_TAG_TABLE, GUMBO_TAG_TBODY,
1909
- GUMBO_TAG_TFOOT, GUMBO_TAG_THEAD, GUMBO_TAG_TR,
1910
- GUMBO_TAG_LAST)) {
1911
- gumbo_debug("and foster-parenting it.\n");
1912
- foster_parent_element(parser, last_node);
1913
- } else {
1914
- gumbo_debug("and inserting it into %s.\n",
1915
- gumbo_normalized_tagname(common_ancestor->v.element.tag));
1916
- append_node(parser, common_ancestor, last_node);
1917
- }
1972
+ InsertionLocation location =
1973
+ get_appropriate_insertion_location(parser, common_ancestor);
1974
+ gumbo_debug("and inserting it into %s.\n",
1975
+ gumbo_normalized_tagname(location.target->v.element.tag));
1976
+ insert_node(parser, last_node, location);
1918
1977
 
1919
- // Step 11.
1978
+ // Step 15.
1920
1979
  GumboNode* new_formatting_node = clone_node(
1921
1980
  parser, formatting_node, GUMBO_INSERTION_ADOPTION_AGENCY_CLONED);
1922
1981
  formatting_node->parse_flags |= GUMBO_INSERTION_IMPLICIT_END_TAG;
1923
1982
 
1924
- // Step 12. Instead of appending nodes one-by-one, we swap the children
1983
+ // Step 16. Instead of appending nodes one-by-one, we swap the children
1925
1984
  // vector of furthest_block with the empty children of new_formatting_node,
1926
1985
  // reducing memory traffic and allocations. We still have to reset their
1927
1986
  // parent pointers, though.
@@ -1931,15 +1990,15 @@ static bool adoption_agency_algorithm(
1931
1990
  furthest_block->v.element.children = temp;
1932
1991
 
1933
1992
  temp = new_formatting_node->v.element.children;
1934
- for (int i = 0; i < temp.length; ++i) {
1993
+ for (unsigned int i = 0; i < temp.length; ++i) {
1935
1994
  GumboNode* child = temp.data[i];
1936
1995
  child->parent = new_formatting_node;
1937
1996
  }
1938
1997
 
1939
- // Step 13.
1998
+ // Step 17.
1940
1999
  append_node(parser, furthest_block, new_formatting_node);
1941
2000
 
1942
- // Step 14.
2001
+ // Step 18.
1943
2002
  // If the formatting node was before the bookmark, it may shift over all
1944
2003
  // indices after it, so we need to explicitly find the index and possibly
1945
2004
  // adjust the bookmark.
@@ -1947,25 +2006,27 @@ static bool adoption_agency_algorithm(
1947
2006
  &state->_active_formatting_elements, formatting_node);
1948
2007
  assert(formatting_node_index != -1);
1949
2008
  if (formatting_node_index < bookmark) {
2009
+ gumbo_debug(
2010
+ "Formatting node at %d is before bookmark at %d; decrementing.\n",
2011
+ formatting_node_index, bookmark);
1950
2012
  --bookmark;
1951
2013
  }
1952
2014
  gumbo_vector_remove_at(
1953
2015
  parser, formatting_node_index, &state->_active_formatting_elements);
1954
2016
  assert(bookmark >= 0);
1955
- assert(bookmark <= state->_active_formatting_elements.length);
2017
+ assert((unsigned int) bookmark <= state->_active_formatting_elements.length);
1956
2018
  gumbo_vector_insert_at(parser, new_formatting_node, bookmark,
1957
- &state->_active_formatting_elements);
2019
+ &state->_active_formatting_elements);
1958
2020
 
1959
- // Step 15.
1960
- gumbo_vector_remove(
1961
- parser, formatting_node, &state->_open_elements);
1962
- int insert_at = gumbo_vector_index_of(
1963
- &state->_open_elements, furthest_block) + 1;
2021
+ // Step 19.
2022
+ gumbo_vector_remove(parser, formatting_node, &state->_open_elements);
2023
+ int insert_at =
2024
+ gumbo_vector_index_of(&state->_open_elements, furthest_block) + 1;
1964
2025
  assert(insert_at >= 0);
1965
- assert(insert_at <= state->_open_elements.length);
2026
+ assert((unsigned int) insert_at <= state->_open_elements.length);
1966
2027
  gumbo_vector_insert_at(
1967
2028
  parser, new_formatting_node, insert_at, &state->_open_elements);
1968
- }
2029
+ } // Step 20.
1969
2030
  return true;
1970
2031
  }
1971
2032
 
@@ -1988,17 +2049,19 @@ static void ignore_token(GumboParser* parser) {
1988
2049
 
1989
2050
  // http://www.whatwg.org/specs/web-apps/current-work/complete/the-end.html
1990
2051
  static void finish_parsing(GumboParser* parser) {
2052
+ gumbo_debug("Finishing parsing");
1991
2053
  maybe_flush_text_node_buffer(parser);
1992
2054
  GumboParserState* state = parser->_parser_state;
1993
2055
  for (GumboNode* node = pop_current_node(parser); node;
1994
2056
  node = pop_current_node(parser)) {
1995
- if ((node_tag_is(node, GUMBO_TAG_BODY) && state->_closed_body_tag) ||
1996
- (node_tag_is(node, GUMBO_TAG_HTML) && state->_closed_html_tag)) {
2057
+ if ((node_html_tag_is(node, GUMBO_TAG_BODY) && state->_closed_body_tag) ||
2058
+ (node_html_tag_is(node, GUMBO_TAG_HTML) && state->_closed_html_tag)) {
1997
2059
  continue;
1998
2060
  }
1999
2061
  node->parse_flags |= GUMBO_INSERTION_IMPLICIT_END_TAG;
2000
2062
  }
2001
- while (pop_current_node(parser)); // Pop them all.
2063
+ while (pop_current_node(parser))
2064
+ ; // Pop them all.
2002
2065
  }
2003
2066
 
2004
2067
  static bool handle_initial(GumboParser* parser, GumboToken* token) {
@@ -2042,9 +2105,9 @@ static bool handle_before_html(GumboParser* parser, GumboToken* token) {
2042
2105
  parser->_output->root = html_node;
2043
2106
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_BEFORE_HEAD);
2044
2107
  return true;
2045
- } else if (token->type == GUMBO_TOKEN_END_TAG && !tag_in(
2046
- token, false, GUMBO_TAG_HEAD, GUMBO_TAG_BODY, GUMBO_TAG_HTML,
2047
- GUMBO_TAG_BR, GUMBO_TAG_LAST)) {
2108
+ } else if (token->type == GUMBO_TOKEN_END_TAG &&
2109
+ !tag_in(token, false,
2110
+ (gumbo_tagset){TAG(HEAD), TAG(BODY), TAG(HTML), TAG(BR)})) {
2048
2111
  parser_add_parse_error(parser, token);
2049
2112
  ignore_token(parser);
2050
2113
  return false;
@@ -2076,9 +2139,9 @@ static bool handle_before_head(GumboParser* parser, GumboToken* token) {
2076
2139
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_HEAD);
2077
2140
  parser->_parser_state->_head_element = node;
2078
2141
  return true;
2079
- } else if (token->type == GUMBO_TOKEN_END_TAG && !tag_in(
2080
- token, false, GUMBO_TAG_HEAD, GUMBO_TAG_BODY, GUMBO_TAG_HTML,
2081
- GUMBO_TAG_BR, GUMBO_TAG_LAST)) {
2142
+ } else if (token->type == GUMBO_TOKEN_END_TAG &&
2143
+ !tag_in(token, false,
2144
+ (gumbo_tagset){TAG(HEAD), TAG(BODY), TAG(HTML), TAG(BR)})) {
2082
2145
  parser_add_parse_error(parser, token);
2083
2146
  ignore_token(parser);
2084
2147
  return false;
@@ -2110,9 +2173,9 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
2110
2173
  return true;
2111
2174
  } else if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
2112
2175
  return handle_in_body(parser, token);
2113
- } else if (tag_in(token, kStartTag, GUMBO_TAG_BASE, GUMBO_TAG_BASEFONT,
2114
- GUMBO_TAG_BGSOUND, GUMBO_TAG_MENUITEM, GUMBO_TAG_LINK,
2115
- GUMBO_TAG_LAST)) {
2176
+ } else if (tag_in(token, kStartTag,
2177
+ (gumbo_tagset){TAG(BASE), TAG(BASEFONT), TAG(BGSOUND),
2178
+ TAG(MENUITEM), TAG(LINK)})) {
2116
2179
  insert_element_from_token(parser, token);
2117
2180
  pop_current_node(parser);
2118
2181
  acknowledge_self_closing_tag(parser);
@@ -2129,8 +2192,8 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
2129
2192
  } else if (tag_is(token, kStartTag, GUMBO_TAG_TITLE)) {
2130
2193
  run_generic_parsing_algorithm(parser, token, GUMBO_LEX_RCDATA);
2131
2194
  return true;
2132
- } else if (tag_in(token, kStartTag, GUMBO_TAG_NOFRAMES, GUMBO_TAG_STYLE,
2133
- GUMBO_TAG_LAST)) {
2195
+ } else if (tag_in(
2196
+ token, kStartTag, (gumbo_tagset){TAG(NOFRAMES), TAG(STYLE)})) {
2134
2197
  run_generic_parsing_algorithm(parser, token, GUMBO_LEX_RAWTEXT);
2135
2198
  return true;
2136
2199
  } else if (tag_is(token, kStartTag, GUMBO_TAG_NOSCRIPT)) {
@@ -2143,32 +2206,51 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
2143
2206
  } else if (tag_is(token, kEndTag, GUMBO_TAG_HEAD)) {
2144
2207
  GumboNode* head = pop_current_node(parser);
2145
2208
  AVOID_UNUSED_VARIABLE_WARNING(head);
2146
- assert(node_tag_is(head, GUMBO_TAG_HEAD));
2209
+ assert(node_html_tag_is(head, GUMBO_TAG_HEAD));
2147
2210
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_HEAD);
2148
2211
  return true;
2149
- } else if (tag_is(token, kStartTag, GUMBO_TAG_HEAD)) {
2150
- parser_add_parse_error(parser, token);
2151
- ignore_token(parser);
2152
- return false;
2212
+ } else if (tag_in(token, kEndTag,
2213
+ (gumbo_tagset){TAG(BODY), TAG(HTML), TAG(BR)})) {
2214
+ pop_current_node(parser);
2215
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_HEAD);
2216
+ parser->_parser_state->_reprocess_current_token = true;
2217
+ return true;
2218
+ } else if (tag_is(token, kStartTag, GUMBO_TAG_TEMPLATE)) {
2219
+ insert_element_from_token(parser, token);
2220
+ add_formatting_element(parser, &kActiveFormattingScopeMarker);
2221
+ parser->_parser_state->_frameset_ok = false;
2222
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TEMPLATE);
2223
+ push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TEMPLATE);
2224
+ return true;
2225
+ } else if (tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
2226
+ if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2227
+ parser_add_parse_error(parser, token);
2228
+ ignore_token(parser);
2229
+ return false;
2230
+ }
2231
+ generate_all_implied_end_tags_thoroughly(parser);
2232
+ bool success = true;
2233
+ if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_TEMPLATE)) {
2234
+ parser_add_parse_error(parser, token);
2235
+ success = false;
2236
+ }
2237
+ while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_TEMPLATE))
2238
+ ;
2239
+ clear_active_formatting_elements(parser);
2240
+ pop_template_insertion_mode(parser);
2241
+ reset_insertion_mode_appropriately(parser);
2242
+ return success;
2153
2243
  } else if (tag_is(token, kStartTag, GUMBO_TAG_HEAD) ||
2154
- (token->type == GUMBO_TOKEN_END_TAG &&
2155
- !tag_in(token, kEndTag, GUMBO_TAG_BODY, GUMBO_TAG_HTML,
2156
- GUMBO_TAG_BR, GUMBO_TAG_LAST))) {
2157
- parser_add_parse_error(parser, token);
2158
- return false;
2159
- } else if (tag_is(token, kStartTag, GUMBO_TAG_UNKNOWN) && token->v.start_tag.is_self_closing) {
2244
+ (token->type == GUMBO_TOKEN_END_TAG)) {
2160
2245
  parser_add_parse_error(parser, token);
2161
2246
  ignore_token(parser);
2162
2247
  return false;
2163
2248
  } else {
2164
- const GumboNode* node = pop_current_node(parser);
2165
- assert(node_tag_is(node, GUMBO_TAG_HEAD));
2166
- AVOID_UNUSED_VARIABLE_WARNING(node);
2249
+ pop_current_node(parser);
2167
2250
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_HEAD);
2168
2251
  parser->_parser_state->_reprocess_current_token = true;
2169
2252
  return true;
2170
2253
  }
2171
-
2172
2254
  return true;
2173
2255
  }
2174
2256
 
@@ -2181,27 +2263,27 @@ static bool handle_in_head_noscript(GumboParser* parser, GumboToken* token) {
2181
2263
  return handle_in_body(parser, token);
2182
2264
  } else if (tag_is(token, kEndTag, GUMBO_TAG_NOSCRIPT)) {
2183
2265
  const GumboNode* node = pop_current_node(parser);
2184
- assert(node_tag_is(node, GUMBO_TAG_NOSCRIPT));
2266
+ assert(node_html_tag_is(node, GUMBO_TAG_NOSCRIPT));
2185
2267
  AVOID_UNUSED_VARIABLE_WARNING(node);
2186
2268
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_HEAD);
2187
2269
  return true;
2188
2270
  } else if (token->type == GUMBO_TOKEN_WHITESPACE ||
2189
2271
  token->type == GUMBO_TOKEN_COMMENT ||
2190
- tag_in(token, kStartTag, GUMBO_TAG_BASEFONT, GUMBO_TAG_BGSOUND,
2191
- GUMBO_TAG_LINK, GUMBO_TAG_META, GUMBO_TAG_NOFRAMES,
2192
- GUMBO_TAG_STYLE, GUMBO_TAG_LAST)) {
2272
+ tag_in(token, kStartTag,
2273
+ (gumbo_tagset){TAG(BASEFONT), TAG(BGSOUND), TAG(LINK),
2274
+ TAG(META), TAG(NOFRAMES), TAG(STYLE)})) {
2193
2275
  return handle_in_head(parser, token);
2194
- } else if (tag_in(token, kStartTag, GUMBO_TAG_HEAD, GUMBO_TAG_NOSCRIPT,
2195
- GUMBO_TAG_LAST) ||
2196
- (token->type == GUMBO_TOKEN_END_TAG &&
2197
- !tag_is(token, kEndTag, GUMBO_TAG_BR))) {
2276
+ } else if (tag_in(
2277
+ token, kStartTag, (gumbo_tagset){TAG(HEAD), TAG(NOSCRIPT)}) ||
2278
+ (token->type == GUMBO_TOKEN_END_TAG &&
2279
+ !tag_is(token, kEndTag, GUMBO_TAG_BR))) {
2198
2280
  parser_add_parse_error(parser, token);
2199
2281
  ignore_token(parser);
2200
2282
  return false;
2201
2283
  } else {
2202
2284
  parser_add_parse_error(parser, token);
2203
2285
  const GumboNode* node = pop_current_node(parser);
2204
- assert(node_tag_is(node, GUMBO_TAG_NOSCRIPT));
2286
+ assert(node_html_tag_is(node, GUMBO_TAG_NOSCRIPT));
2205
2287
  AVOID_UNUSED_VARIABLE_WARNING(node);
2206
2288
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_HEAD);
2207
2289
  parser->_parser_state->_reprocess_current_token = true;
@@ -2233,10 +2315,10 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
2233
2315
  insert_element_from_token(parser, token);
2234
2316
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_FRAMESET);
2235
2317
  return true;
2236
- } else if (tag_in(token, kStartTag, GUMBO_TAG_BASE, GUMBO_TAG_BASEFONT,
2237
- GUMBO_TAG_BGSOUND, GUMBO_TAG_LINK, GUMBO_TAG_META,
2238
- GUMBO_TAG_NOFRAMES, GUMBO_TAG_SCRIPT, GUMBO_TAG_STYLE,
2239
- GUMBO_TAG_TITLE, GUMBO_TAG_LAST)) {
2318
+ } else if (tag_in(token, kStartTag,
2319
+ (gumbo_tagset){TAG(BASE), TAG(BASEFONT), TAG(BGSOUND),
2320
+ TAG(LINK), TAG(META), TAG(NOFRAMES), TAG(SCRIPT),
2321
+ TAG(STYLE), TAG(TEMPLATE), TAG(TITLE)})) {
2240
2322
  parser_add_parse_error(parser, token);
2241
2323
  assert(state->_head_element != NULL);
2242
2324
  // This must be flushed before we push the head element on, as there may be
@@ -2246,10 +2328,12 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
2246
2328
  bool result = handle_in_head(parser, token);
2247
2329
  gumbo_vector_remove(parser, state->_head_element, &state->_open_elements);
2248
2330
  return result;
2331
+ } else if (tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
2332
+ return handle_in_head(parser, token);
2249
2333
  } else if (tag_is(token, kStartTag, GUMBO_TAG_HEAD) ||
2250
- (token->type == GUMBO_TOKEN_END_TAG &&
2251
- !tag_in(token, kEndTag, GUMBO_TAG_BODY, GUMBO_TAG_HTML,
2252
- GUMBO_TAG_BR, GUMBO_TAG_LAST))) {
2334
+ (token->type == GUMBO_TOKEN_END_TAG &&
2335
+ !tag_in(token, kEndTag,
2336
+ (gumbo_tagset){TAG(BODY), TAG(HTML), TAG(BR)}))) {
2253
2337
  parser_add_parse_error(parser, token);
2254
2338
  ignore_token(parser);
2255
2339
  return false;
@@ -2263,24 +2347,23 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
2263
2347
 
2264
2348
  static void destroy_node(GumboParser* parser, GumboNode* node) {
2265
2349
  switch (node->type) {
2266
- case GUMBO_NODE_DOCUMENT:
2267
- {
2268
- GumboDocument* doc = &node->v.document;
2269
- for (int i = 0; i < doc->children.length; ++i) {
2270
- destroy_node(parser, doc->children.data[i]);
2271
- }
2272
- gumbo_parser_deallocate(parser, (void*) doc->children.data);
2273
- gumbo_parser_deallocate(parser, (void*) doc->name);
2274
- gumbo_parser_deallocate(parser, (void*) doc->public_identifier);
2275
- gumbo_parser_deallocate(parser, (void*) doc->system_identifier);
2350
+ case GUMBO_NODE_DOCUMENT: {
2351
+ GumboDocument* doc = &node->v.document;
2352
+ for (unsigned int i = 0; i < doc->children.length; ++i) {
2353
+ destroy_node(parser, doc->children.data[i]);
2276
2354
  }
2277
- break;
2355
+ gumbo_parser_deallocate(parser, (void*) doc->children.data);
2356
+ gumbo_parser_deallocate(parser, (void*) doc->name);
2357
+ gumbo_parser_deallocate(parser, (void*) doc->public_identifier);
2358
+ gumbo_parser_deallocate(parser, (void*) doc->system_identifier);
2359
+ } break;
2360
+ case GUMBO_NODE_TEMPLATE:
2278
2361
  case GUMBO_NODE_ELEMENT:
2279
- for (int i = 0; i < node->v.element.attributes.length; ++i) {
2362
+ for (unsigned int i = 0; i < node->v.element.attributes.length; ++i) {
2280
2363
  gumbo_destroy_attribute(parser, node->v.element.attributes.data[i]);
2281
2364
  }
2282
2365
  gumbo_parser_deallocate(parser, node->v.element.attributes.data);
2283
- for (int i = 0; i < node->v.element.children.length; ++i) {
2366
+ for (unsigned int i = 0; i < node->v.element.children.length; ++i) {
2284
2367
  destroy_node(parser, node->v.element.children.data[i]);
2285
2368
  }
2286
2369
  gumbo_parser_deallocate(parser, node->v.element.children.data);
@@ -2307,7 +2390,8 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2307
2390
  reconstruct_active_formatting_elements(parser);
2308
2391
  insert_text_token(parser, token);
2309
2392
  return true;
2310
- } else if (token->type == GUMBO_TOKEN_CHARACTER) {
2393
+ } else if (token->type == GUMBO_TOKEN_CHARACTER ||
2394
+ token->type == GUMBO_TOKEN_CDATA) {
2311
2395
  reconstruct_active_formatting_elements(parser);
2312
2396
  insert_text_token(parser, token);
2313
2397
  set_frameset_not_ok(parser);
@@ -2320,20 +2404,26 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2320
2404
  ignore_token(parser);
2321
2405
  return false;
2322
2406
  } else if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
2407
+ parser_add_parse_error(parser, token);
2408
+ if (has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2409
+ ignore_token(parser);
2410
+ return false;
2411
+ }
2323
2412
  assert(parser->_output->root != NULL);
2324
2413
  assert(parser->_output->root->type == GUMBO_NODE_ELEMENT);
2325
- parser_add_parse_error(parser, token);
2326
2414
  merge_attributes(parser, token, parser->_output->root);
2327
2415
  return false;
2328
- } else if (tag_in(token, kStartTag, GUMBO_TAG_BASE, GUMBO_TAG_BASEFONT,
2329
- GUMBO_TAG_BGSOUND, GUMBO_TAG_MENUITEM, GUMBO_TAG_LINK,
2330
- GUMBO_TAG_META, GUMBO_TAG_NOFRAMES, GUMBO_TAG_SCRIPT,
2331
- GUMBO_TAG_STYLE, GUMBO_TAG_TITLE, GUMBO_TAG_LAST)) {
2416
+ } else if (tag_in(token, kStartTag,
2417
+ (gumbo_tagset){TAG(BASE), TAG(BASEFONT), TAG(BGSOUND),
2418
+ TAG(MENUITEM), TAG(LINK), TAG(META), TAG(NOFRAMES),
2419
+ TAG(SCRIPT), TAG(STYLE), TAG(TEMPLATE), TAG(TITLE)}) ||
2420
+ tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
2332
2421
  return handle_in_head(parser, token);
2333
2422
  } else if (tag_is(token, kStartTag, GUMBO_TAG_BODY)) {
2334
2423
  parser_add_parse_error(parser, token);
2335
2424
  if (state->_open_elements.length < 2 ||
2336
- !node_tag_is(state->_open_elements.data[1], GUMBO_TAG_BODY)) {
2425
+ !node_html_tag_is(state->_open_elements.data[1], GUMBO_TAG_BODY) ||
2426
+ has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2337
2427
  ignore_token(parser);
2338
2428
  return false;
2339
2429
  }
@@ -2343,7 +2433,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2343
2433
  } else if (tag_is(token, kStartTag, GUMBO_TAG_FRAMESET)) {
2344
2434
  parser_add_parse_error(parser, token);
2345
2435
  if (state->_open_elements.length < 2 ||
2346
- !node_tag_is(state->_open_elements.data[1], GUMBO_TAG_BODY) ||
2436
+ !node_html_tag_is(state->_open_elements.data[1], GUMBO_TAG_BODY) ||
2347
2437
  !state->_frameset_ok) {
2348
2438
  ignore_token(parser);
2349
2439
  return false;
@@ -2367,7 +2457,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2367
2457
  // Remove the body node. We may want to factor this out into a generic
2368
2458
  // helper, but right now this is the only code that needs to do this.
2369
2459
  GumboVector* children = &parser->_output->root->v.element.children;
2370
- for (int i = 0; i < children->length; ++i) {
2460
+ for (unsigned int i = 0; i < children->length; ++i) {
2371
2461
  if (children->data[i] == body_node) {
2372
2462
  gumbo_vector_remove_at(parser, i, children);
2373
2463
  break;
@@ -2380,33 +2470,32 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2380
2470
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_FRAMESET);
2381
2471
  return true;
2382
2472
  } else if (token->type == GUMBO_TOKEN_EOF) {
2383
- for (int i = 0; i < state->_open_elements.length; ++i) {
2384
- if (!node_tag_in(state->_open_elements.data[i], GUMBO_TAG_DD,
2385
- GUMBO_TAG_DT, GUMBO_TAG_LI, GUMBO_TAG_P, GUMBO_TAG_TBODY,
2386
- GUMBO_TAG_TD, GUMBO_TAG_TFOOT, GUMBO_TAG_TH,
2387
- GUMBO_TAG_THEAD, GUMBO_TAG_TR, GUMBO_TAG_BODY,
2388
- GUMBO_TAG_HTML, GUMBO_TAG_LAST)) {
2473
+ for (unsigned int i = 0; i < state->_open_elements.length; ++i) {
2474
+ if (!node_tag_in_set(state->_open_elements.data[i],
2475
+ (gumbo_tagset){TAG(DD), TAG(DT), TAG(LI), TAG(P), TAG(TBODY),
2476
+ TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD), TAG(TR), TAG(BODY),
2477
+ TAG(HTML)})) {
2389
2478
  parser_add_parse_error(parser, token);
2390
- return false;
2391
2479
  }
2392
2480
  }
2481
+ if (get_current_template_insertion_mode(parser) !=
2482
+ GUMBO_INSERTION_MODE_INITIAL) {
2483
+ return handle_in_template(parser, token);
2484
+ }
2393
2485
  return true;
2394
- } else if (tag_in(token, kEndTag, GUMBO_TAG_BODY, GUMBO_TAG_HTML,
2395
- GUMBO_TAG_LAST)) {
2486
+ } else if (tag_in(token, kEndTag, (gumbo_tagset){TAG(BODY), TAG(HTML)})) {
2396
2487
  if (!has_an_element_in_scope(parser, GUMBO_TAG_BODY)) {
2397
2488
  parser_add_parse_error(parser, token);
2398
2489
  ignore_token(parser);
2399
2490
  return false;
2400
2491
  }
2401
2492
  bool success = true;
2402
- for (int i = 0; i < state->_open_elements.length; ++i) {
2403
- if (!node_tag_in(state->_open_elements.data[i], GUMBO_TAG_DD,
2404
- GUMBO_TAG_DT, GUMBO_TAG_LI, GUMBO_TAG_OPTGROUP,
2405
- GUMBO_TAG_OPTION, GUMBO_TAG_P, GUMBO_TAG_RP,
2406
- GUMBO_TAG_RT, GUMBO_TAG_TBODY, GUMBO_TAG_TD,
2407
- GUMBO_TAG_TFOOT, GUMBO_TAG_TH, GUMBO_TAG_THEAD,
2408
- GUMBO_TAG_TR, GUMBO_TAG_BODY, GUMBO_TAG_HTML,
2409
- GUMBO_TAG_LAST)) {
2493
+ for (unsigned int i = 0; i < state->_open_elements.length; ++i) {
2494
+ if (!node_tag_in_set(state->_open_elements.data[i],
2495
+ (gumbo_tagset){TAG(DD), TAG(DT), TAG(LI), TAG(OPTGROUP),
2496
+ TAG(OPTION), TAG(P), TAG(RB), TAG(RP), TAG(RT), TAG(RTC),
2497
+ TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD), TAG(TR),
2498
+ TAG(BODY), TAG(HTML)})) {
2410
2499
  parser_add_parse_error(parser, token);
2411
2500
  success = false;
2412
2501
  break;
@@ -2417,58 +2506,58 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2417
2506
  parser->_parser_state->_reprocess_current_token = true;
2418
2507
  } else {
2419
2508
  GumboNode* body = state->_open_elements.data[1];
2420
- assert(node_tag_is(body, GUMBO_TAG_BODY));
2509
+ assert(node_html_tag_is(body, GUMBO_TAG_BODY));
2421
2510
  record_end_of_element(state->_current_token, &body->v.element);
2422
2511
  }
2423
2512
  return success;
2424
- } else if (tag_in(token, kStartTag, GUMBO_TAG_ADDRESS, GUMBO_TAG_ARTICLE,
2425
- GUMBO_TAG_ASIDE, GUMBO_TAG_BLOCKQUOTE, GUMBO_TAG_CENTER,
2426
- GUMBO_TAG_DETAILS, GUMBO_TAG_DIR, GUMBO_TAG_DIV,
2427
- GUMBO_TAG_DL, GUMBO_TAG_FIELDSET, GUMBO_TAG_FIGCAPTION,
2428
- GUMBO_TAG_FIGURE, GUMBO_TAG_FOOTER, GUMBO_TAG_HEADER,
2429
- GUMBO_TAG_HGROUP, GUMBO_TAG_MENU, GUMBO_TAG_NAV,
2430
- GUMBO_TAG_OL, GUMBO_TAG_P, GUMBO_TAG_SECTION,
2431
- GUMBO_TAG_SUMMARY, GUMBO_TAG_UL, GUMBO_TAG_LAST)) {
2513
+ } else if (tag_in(token, kStartTag,
2514
+ (gumbo_tagset){TAG(ADDRESS), TAG(ARTICLE), TAG(ASIDE),
2515
+ TAG(BLOCKQUOTE), TAG(CENTER), TAG(DETAILS), TAG(DIR),
2516
+ TAG(DIV), TAG(DL), TAG(FIELDSET), TAG(FIGCAPTION),
2517
+ TAG(FIGURE), TAG(FOOTER), TAG(HEADER), TAG(HGROUP),
2518
+ TAG(MENU), TAG(MAIN), TAG(NAV), TAG(OL), TAG(P),
2519
+ TAG(SECTION), TAG(SUMMARY), TAG(UL)})) {
2432
2520
  bool result = maybe_implicitly_close_p_tag(parser, token);
2433
2521
  insert_element_from_token(parser, token);
2434
2522
  return result;
2435
- } else if (tag_in(token, kStartTag, GUMBO_TAG_H1, GUMBO_TAG_H2, GUMBO_TAG_H3,
2436
- GUMBO_TAG_H4, GUMBO_TAG_H5, GUMBO_TAG_H6, GUMBO_TAG_LAST)) {
2523
+ } else if (tag_in(token, kStartTag, (gumbo_tagset){TAG(H1), TAG(H2), TAG(H3),
2524
+ TAG(H4), TAG(H5), TAG(H6)})) {
2437
2525
  bool result = maybe_implicitly_close_p_tag(parser, token);
2438
- if (node_tag_in(get_current_node(parser), GUMBO_TAG_H1, GUMBO_TAG_H2,
2439
- GUMBO_TAG_H3, GUMBO_TAG_H4, GUMBO_TAG_H5, GUMBO_TAG_H6,
2440
- GUMBO_TAG_LAST)) {
2526
+ if (node_tag_in_set(
2527
+ get_current_node(parser), (gumbo_tagset){TAG(H1), TAG(H2), TAG(H3),
2528
+ TAG(H4), TAG(H5), TAG(H6)})) {
2441
2529
  parser_add_parse_error(parser, token);
2442
2530
  pop_current_node(parser);
2443
2531
  result = false;
2444
2532
  }
2445
2533
  insert_element_from_token(parser, token);
2446
2534
  return result;
2447
- } else if (tag_in(token, kStartTag, GUMBO_TAG_PRE, GUMBO_TAG_LISTING,
2448
- GUMBO_TAG_LAST)) {
2535
+ } else if (tag_in(token, kStartTag, (gumbo_tagset){TAG(PRE), TAG(LISTING)})) {
2449
2536
  bool result = maybe_implicitly_close_p_tag(parser, token);
2450
2537
  insert_element_from_token(parser, token);
2451
2538
  state->_ignore_next_linefeed = true;
2452
2539
  state->_frameset_ok = false;
2453
2540
  return result;
2454
2541
  } else if (tag_is(token, kStartTag, GUMBO_TAG_FORM)) {
2455
- if (state->_form_element != NULL) {
2542
+ if (state->_form_element != NULL &&
2543
+ !has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2456
2544
  gumbo_debug("Ignoring nested form.\n");
2457
2545
  parser_add_parse_error(parser, token);
2458
2546
  ignore_token(parser);
2459
2547
  return false;
2460
2548
  }
2461
2549
  bool result = maybe_implicitly_close_p_tag(parser, token);
2462
- state->_form_element =
2463
- insert_element_from_token(parser, token);
2550
+ GumboNode* form_element = insert_element_from_token(parser, token);
2551
+ if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2552
+ state->_form_element = form_element;
2553
+ }
2464
2554
  return result;
2465
2555
  } else if (tag_is(token, kStartTag, GUMBO_TAG_LI)) {
2466
2556
  maybe_implicitly_close_list_tag(parser, token, true);
2467
2557
  bool result = maybe_implicitly_close_p_tag(parser, token);
2468
2558
  insert_element_from_token(parser, token);
2469
2559
  return result;
2470
- } else if (tag_in(token, kStartTag, GUMBO_TAG_DD, GUMBO_TAG_DT,
2471
- GUMBO_TAG_LAST)) {
2560
+ } else if (tag_in(token, kStartTag, (gumbo_tagset){TAG(DD), TAG(DT)})) {
2472
2561
  maybe_implicitly_close_list_tag(parser, token, false);
2473
2562
  bool result = maybe_implicitly_close_p_tag(parser, token);
2474
2563
  insert_element_from_token(parser, token);
@@ -2481,7 +2570,8 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2481
2570
  } else if (tag_is(token, kStartTag, GUMBO_TAG_BUTTON)) {
2482
2571
  if (has_an_element_in_scope(parser, GUMBO_TAG_BUTTON)) {
2483
2572
  parser_add_parse_error(parser, token);
2484
- implicitly_close_tags(parser, token, GUMBO_TAG_BUTTON);
2573
+ implicitly_close_tags(
2574
+ parser, token, GUMBO_NAMESPACE_HTML, GUMBO_TAG_BUTTON);
2485
2575
  state->_reprocess_current_token = true;
2486
2576
  return false;
2487
2577
  }
@@ -2489,67 +2579,83 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2489
2579
  insert_element_from_token(parser, token);
2490
2580
  state->_frameset_ok = false;
2491
2581
  return true;
2492
- } else if (tag_in(token, kEndTag, GUMBO_TAG_ADDRESS, GUMBO_TAG_ARTICLE,
2493
- GUMBO_TAG_ASIDE, GUMBO_TAG_BLOCKQUOTE, GUMBO_TAG_BUTTON,
2494
- GUMBO_TAG_CENTER, GUMBO_TAG_DETAILS, GUMBO_TAG_DIR,
2495
- GUMBO_TAG_DIV, GUMBO_TAG_DL, GUMBO_TAG_FIELDSET,
2496
- GUMBO_TAG_FIGCAPTION, GUMBO_TAG_FIGURE, GUMBO_TAG_FOOTER,
2497
- GUMBO_TAG_HEADER, GUMBO_TAG_HGROUP, GUMBO_TAG_LISTING,
2498
- GUMBO_TAG_MENU, GUMBO_TAG_NAV, GUMBO_TAG_OL, GUMBO_TAG_PRE,
2499
- GUMBO_TAG_SECTION, GUMBO_TAG_SUMMARY, GUMBO_TAG_UL,
2500
- GUMBO_TAG_LAST)) {
2582
+ } else if (tag_in(token, kEndTag,
2583
+ (gumbo_tagset){TAG(ADDRESS), TAG(ARTICLE), TAG(ASIDE),
2584
+ TAG(BLOCKQUOTE), TAG(BUTTON), TAG(CENTER), TAG(DETAILS),
2585
+ TAG(DIR), TAG(DIV), TAG(DL), TAG(FIELDSET),
2586
+ TAG(FIGCAPTION), TAG(FIGURE), TAG(FOOTER), TAG(HEADER),
2587
+ TAG(HGROUP), TAG(LISTING), TAG(MAIN), TAG(MENU), TAG(NAV),
2588
+ TAG(OL), TAG(PRE), TAG(SECTION), TAG(SUMMARY), TAG(UL)})) {
2501
2589
  GumboTag tag = token->v.end_tag;
2502
2590
  if (!has_an_element_in_scope(parser, tag)) {
2503
2591
  parser_add_parse_error(parser, token);
2504
2592
  ignore_token(parser);
2505
2593
  return false;
2506
2594
  }
2507
- implicitly_close_tags(parser, token, token->v.end_tag);
2595
+ implicitly_close_tags(
2596
+ parser, token, GUMBO_NAMESPACE_HTML, token->v.end_tag);
2508
2597
  return true;
2509
2598
  } else if (tag_is(token, kEndTag, GUMBO_TAG_FORM)) {
2510
- bool result = true;
2511
- const GumboNode* node = state->_form_element;
2512
- assert(!node || node->type == GUMBO_NODE_ELEMENT);
2513
- state->_form_element = NULL;
2514
- if (!node || !has_node_in_scope(parser, node)) {
2515
- gumbo_debug("Closing an unopened form.\n");
2516
- parser_add_parse_error(parser, token);
2517
- ignore_token(parser);
2518
- return false;
2519
- }
2520
- // This differs from implicitly_close_tags because we remove *only* the
2521
- // <form> element; other nodes are left in scope.
2522
- generate_implied_end_tags(parser, GUMBO_TAG_LAST);
2523
- if (get_current_node(parser) != node) {
2524
- parser_add_parse_error(parser, token);
2525
- result = false;
2526
- }
2599
+ if (has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2600
+ if (!has_an_element_in_scope(parser, GUMBO_TAG_FORM)) {
2601
+ parser_add_parse_error(parser, token);
2602
+ ignore_token(parser);
2603
+ return false;
2604
+ }
2605
+ bool success = true;
2606
+ generate_implied_end_tags(parser, GUMBO_TAG_LAST);
2607
+ if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_FORM)) {
2608
+ parser_add_parse_error(parser, token);
2609
+ return false;
2610
+ }
2611
+ while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_FORM))
2612
+ ;
2613
+ return success;
2614
+ } else {
2615
+ bool result = true;
2616
+ const GumboNode* node = state->_form_element;
2617
+ assert(!node || node->type == GUMBO_NODE_ELEMENT);
2618
+ state->_form_element = NULL;
2619
+ if (!node || !has_node_in_scope(parser, node)) {
2620
+ gumbo_debug("Closing an unopened form.\n");
2621
+ parser_add_parse_error(parser, token);
2622
+ ignore_token(parser);
2623
+ return false;
2624
+ }
2625
+ // This differs from implicitly_close_tags because we remove *only* the
2626
+ // <form> element; other nodes are left in scope.
2627
+ generate_implied_end_tags(parser, GUMBO_TAG_LAST);
2628
+ if (get_current_node(parser) != node) {
2629
+ parser_add_parse_error(parser, token);
2630
+ result = false;
2631
+ }
2527
2632
 
2528
- GumboVector* open_elements = &state->_open_elements;
2529
- int index = open_elements->length - 1;
2530
- for (; index >= 0 && open_elements->data[index] != node; --index);
2531
- assert(index >= 0);
2532
- gumbo_vector_remove_at(parser, index, open_elements);
2533
- return result;
2633
+ GumboVector* open_elements = &state->_open_elements;
2634
+ int index = gumbo_vector_index_of(open_elements, node);
2635
+ assert(index >= 0);
2636
+ gumbo_vector_remove_at(parser, index, open_elements);
2637
+ return result;
2638
+ }
2534
2639
  } else if (tag_is(token, kEndTag, GUMBO_TAG_P)) {
2535
2640
  if (!has_an_element_in_button_scope(parser, GUMBO_TAG_P)) {
2536
2641
  parser_add_parse_error(parser, token);
2537
- reconstruct_active_formatting_elements(parser);
2642
+ // reconstruct_active_formatting_elements(parser);
2538
2643
  insert_element_of_tag_type(
2539
2644
  parser, GUMBO_TAG_P, GUMBO_INSERTION_CONVERTED_FROM_END_TAG);
2540
2645
  state->_reprocess_current_token = true;
2541
2646
  return false;
2542
2647
  }
2543
- return implicitly_close_tags(parser, token, GUMBO_TAG_P);
2648
+ return implicitly_close_tags(
2649
+ parser, token, GUMBO_NAMESPACE_HTML, GUMBO_TAG_P);
2544
2650
  } else if (tag_is(token, kEndTag, GUMBO_TAG_LI)) {
2545
2651
  if (!has_an_element_in_list_scope(parser, GUMBO_TAG_LI)) {
2546
2652
  parser_add_parse_error(parser, token);
2547
2653
  ignore_token(parser);
2548
2654
  return false;
2549
2655
  }
2550
- return implicitly_close_tags(parser, token, GUMBO_TAG_LI);
2551
- } else if (tag_in(token, kEndTag, GUMBO_TAG_DD, GUMBO_TAG_DT,
2552
- GUMBO_TAG_LAST)) {
2656
+ return implicitly_close_tags(
2657
+ parser, token, GUMBO_NAMESPACE_HTML, GUMBO_TAG_LI);
2658
+ } else if (tag_in(token, kEndTag, (gumbo_tagset){TAG(DD), TAG(DT)})) {
2553
2659
  assert(token->type == GUMBO_TOKEN_END_TAG);
2554
2660
  GumboTag token_tag = token->v.end_tag;
2555
2661
  if (!has_an_element_in_scope(parser, token_tag)) {
@@ -2557,12 +2663,13 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2557
2663
  ignore_token(parser);
2558
2664
  return false;
2559
2665
  }
2560
- return implicitly_close_tags(parser, token, token_tag);
2561
- } else if (tag_in(token, kEndTag, GUMBO_TAG_H1, GUMBO_TAG_H2, GUMBO_TAG_H3,
2562
- GUMBO_TAG_H4, GUMBO_TAG_H5, GUMBO_TAG_H6, GUMBO_TAG_LAST)) {
2666
+ return implicitly_close_tags(
2667
+ parser, token, GUMBO_NAMESPACE_HTML, token_tag);
2668
+ } else if (tag_in(token, kEndTag, (gumbo_tagset){TAG(H1), TAG(H2), TAG(H3),
2669
+ TAG(H4), TAG(H5), TAG(H6)})) {
2563
2670
  if (!has_an_element_in_scope_with_tagname(
2564
- parser, GUMBO_TAG_H1, GUMBO_TAG_H2, GUMBO_TAG_H3, GUMBO_TAG_H4,
2565
- GUMBO_TAG_H5, GUMBO_TAG_H6, GUMBO_TAG_LAST)) {
2671
+ parser, 6, (GumboTag[]){GUMBO_TAG_H1, GUMBO_TAG_H2, GUMBO_TAG_H3,
2672
+ GUMBO_TAG_H4, GUMBO_TAG_H5, GUMBO_TAG_H6})) {
2566
2673
  // No heading open; ignore the token entirely.
2567
2674
  parser_add_parse_error(parser, token);
2568
2675
  ignore_token(parser);
@@ -2570,7 +2677,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2570
2677
  } else {
2571
2678
  generate_implied_end_tags(parser, GUMBO_TAG_LAST);
2572
2679
  const GumboNode* current_node = get_current_node(parser);
2573
- bool success = node_tag_is(current_node, token->v.end_tag);
2680
+ bool success = node_html_tag_is(current_node, token->v.end_tag);
2574
2681
  if (!success) {
2575
2682
  // There're children of the heading currently open; close them below and
2576
2683
  // record a parse error.
@@ -2580,9 +2687,9 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2580
2687
  }
2581
2688
  do {
2582
2689
  current_node = pop_current_node(parser);
2583
- } while (!node_tag_in(current_node, GUMBO_TAG_H1, GUMBO_TAG_H2,
2584
- GUMBO_TAG_H3, GUMBO_TAG_H4, GUMBO_TAG_H5,
2585
- GUMBO_TAG_H6, GUMBO_TAG_LAST));
2690
+ } while (!node_tag_in_set(
2691
+ current_node, (gumbo_tagset){TAG(H1), TAG(H2), TAG(H3),
2692
+ TAG(H4), TAG(H5), TAG(H6)}));
2586
2693
  return success;
2587
2694
  }
2588
2695
  } else if (tag_is(token, kStartTag, GUMBO_TAG_A)) {
@@ -2600,19 +2707,17 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2600
2707
  if (find_last_anchor_index(parser, &last_a)) {
2601
2708
  void* last_element = gumbo_vector_remove_at(
2602
2709
  parser, last_a, &state->_active_formatting_elements);
2603
- gumbo_vector_remove(
2604
- parser, last_element, &state->_open_elements);
2710
+ gumbo_vector_remove(parser, last_element, &state->_open_elements);
2605
2711
  }
2606
2712
  success = false;
2607
2713
  }
2608
2714
  reconstruct_active_formatting_elements(parser);
2609
2715
  add_formatting_element(parser, insert_element_from_token(parser, token));
2610
2716
  return success;
2611
- } else if (tag_in(token, kStartTag, GUMBO_TAG_B, GUMBO_TAG_BIG,
2612
- GUMBO_TAG_CODE, GUMBO_TAG_EM, GUMBO_TAG_FONT, GUMBO_TAG_I,
2613
- GUMBO_TAG_S, GUMBO_TAG_SMALL, GUMBO_TAG_STRIKE,
2614
- GUMBO_TAG_STRONG, GUMBO_TAG_TT, GUMBO_TAG_U,
2615
- GUMBO_TAG_LAST)) {
2717
+ } else if (tag_in(token, kStartTag,
2718
+ (gumbo_tagset){TAG(B), TAG(BIG), TAG(CODE), TAG(EM), TAG(FONT),
2719
+ TAG(I), TAG(S), TAG(SMALL), TAG(STRIKE), TAG(STRONG),
2720
+ TAG(TT), TAG(U)})) {
2616
2721
  reconstruct_active_formatting_elements(parser);
2617
2722
  add_formatting_element(parser, insert_element_from_token(parser, token));
2618
2723
  return true;
@@ -2628,28 +2733,27 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2628
2733
  insert_element_from_token(parser, token);
2629
2734
  add_formatting_element(parser, get_current_node(parser));
2630
2735
  return result;
2631
- } else if (tag_in(token, kEndTag, GUMBO_TAG_A, GUMBO_TAG_B, GUMBO_TAG_BIG,
2632
- GUMBO_TAG_CODE, GUMBO_TAG_EM, GUMBO_TAG_FONT, GUMBO_TAG_I,
2633
- GUMBO_TAG_NOBR, GUMBO_TAG_S, GUMBO_TAG_SMALL,
2634
- GUMBO_TAG_STRIKE, GUMBO_TAG_STRONG, GUMBO_TAG_TT,
2635
- GUMBO_TAG_U, GUMBO_TAG_LAST)) {
2736
+ } else if (tag_in(token, kEndTag,
2737
+ (gumbo_tagset){TAG(A), TAG(B), TAG(BIG), TAG(CODE), TAG(EM),
2738
+ TAG(FONT), TAG(I), TAG(NOBR), TAG(S), TAG(SMALL),
2739
+ TAG(STRIKE), TAG(STRONG), TAG(TT), TAG(U)})) {
2636
2740
  return adoption_agency_algorithm(parser, token, token->v.end_tag);
2637
- } else if (tag_in(token, kStartTag, GUMBO_TAG_APPLET, GUMBO_TAG_MARQUEE,
2638
- GUMBO_TAG_OBJECT, GUMBO_TAG_LAST)) {
2741
+ } else if (tag_in(token, kStartTag,
2742
+ (gumbo_tagset){TAG(APPLET), TAG(MARQUEE), TAG(OBJECT)})) {
2639
2743
  reconstruct_active_formatting_elements(parser);
2640
2744
  insert_element_from_token(parser, token);
2641
2745
  add_formatting_element(parser, &kActiveFormattingScopeMarker);
2642
2746
  set_frameset_not_ok(parser);
2643
2747
  return true;
2644
- } else if (tag_in(token, kEndTag, GUMBO_TAG_APPLET, GUMBO_TAG_MARQUEE,
2645
- GUMBO_TAG_OBJECT, GUMBO_TAG_LAST)) {
2748
+ } else if (tag_in(token, kEndTag,
2749
+ (gumbo_tagset){TAG(APPLET), TAG(MARQUEE), TAG(OBJECT)})) {
2646
2750
  GumboTag token_tag = token->v.end_tag;
2647
2751
  if (!has_an_element_in_table_scope(parser, token_tag)) {
2648
2752
  parser_add_parse_error(parser, token);
2649
2753
  ignore_token(parser);
2650
2754
  return false;
2651
2755
  }
2652
- implicitly_close_tags(parser, token, token_tag);
2756
+ implicitly_close_tags(parser, token, GUMBO_NAMESPACE_HTML, token_tag);
2653
2757
  clear_active_formatting_elements(parser);
2654
2758
  return true;
2655
2759
  } else if (tag_is(token, kStartTag, GUMBO_TAG_TABLE)) {
@@ -2661,9 +2765,9 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2661
2765
  set_frameset_not_ok(parser);
2662
2766
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
2663
2767
  return true;
2664
- } else if (tag_in(token, kStartTag, GUMBO_TAG_AREA, GUMBO_TAG_BR,
2665
- GUMBO_TAG_EMBED, GUMBO_TAG_IMG, GUMBO_TAG_IMAGE,
2666
- GUMBO_TAG_KEYGEN, GUMBO_TAG_WBR, GUMBO_TAG_LAST)) {
2768
+ } else if (tag_in(token, kStartTag,
2769
+ (gumbo_tagset){TAG(AREA), TAG(BR), TAG(EMBED), TAG(IMG),
2770
+ TAG(IMAGE), TAG(KEYGEN), TAG(WBR)})) {
2667
2771
  bool success = true;
2668
2772
  if (tag_is(token, kStartTag, GUMBO_TAG_IMAGE)) {
2669
2773
  success = false;
@@ -2693,8 +2797,8 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2693
2797
  pop_current_node(parser);
2694
2798
  acknowledge_self_closing_tag(parser);
2695
2799
  return true;
2696
- } else if (tag_in(token, kStartTag, GUMBO_TAG_PARAM, GUMBO_TAG_SOURCE,
2697
- GUMBO_TAG_TRACK, GUMBO_TAG_LAST)) {
2800
+ } else if (tag_in(token, kStartTag,
2801
+ (gumbo_tagset){TAG(PARAM), TAG(SOURCE), TAG(TRACK)})) {
2698
2802
  insert_element_from_token(parser, token);
2699
2803
  pop_current_node(parser);
2700
2804
  acknowledge_self_closing_tag(parser);
@@ -2708,7 +2812,8 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2708
2812
  return result;
2709
2813
  } else if (tag_is(token, kStartTag, GUMBO_TAG_ISINDEX)) {
2710
2814
  parser_add_parse_error(parser, token);
2711
- if (parser->_parser_state->_form_element != NULL) {
2815
+ if (parser->_parser_state->_form_element != NULL &&
2816
+ !has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2712
2817
  ignore_token(parser);
2713
2818
  return false;
2714
2819
  }
@@ -2723,15 +2828,18 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2723
2828
 
2724
2829
  GumboNode* form = insert_element_of_tag_type(
2725
2830
  parser, GUMBO_TAG_FORM, GUMBO_INSERTION_FROM_ISINDEX);
2831
+ if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2832
+ parser->_parser_state->_form_element = form;
2833
+ }
2726
2834
  if (action_attr) {
2727
2835
  gumbo_vector_add(parser, action_attr, &form->v.element.attributes);
2728
2836
  }
2729
- insert_element_of_tag_type(parser, GUMBO_TAG_HR,
2730
- GUMBO_INSERTION_FROM_ISINDEX);
2731
- pop_current_node(parser); // <hr>
2837
+ insert_element_of_tag_type(
2838
+ parser, GUMBO_TAG_HR, GUMBO_INSERTION_FROM_ISINDEX);
2839
+ pop_current_node(parser); // <hr>
2732
2840
 
2733
- insert_element_of_tag_type(parser, GUMBO_TAG_LABEL,
2734
- GUMBO_INSERTION_FROM_ISINDEX);
2841
+ insert_element_of_tag_type(
2842
+ parser, GUMBO_TAG_LABEL, GUMBO_INSERTION_FROM_ISINDEX);
2735
2843
  TextNodeBufferState* text_state = &parser->_parser_state->_text_node;
2736
2844
  text_state->_start_original_text = token->original_text.data;
2737
2845
  text_state->_start_position = token->position;
@@ -2744,15 +2852,15 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2744
2852
  text_state->_buffer.capacity = prompt_attr_length + 1;
2745
2853
  gumbo_destroy_attribute(parser, prompt_attr);
2746
2854
  } else {
2747
- GumboStringPiece prompt_text = GUMBO_STRING(
2748
- "This is a searchable index. Enter search keywords: ");
2855
+ GumboStringPiece prompt_text =
2856
+ GUMBO_STRING("This is a searchable index. Enter search keywords: ");
2749
2857
  gumbo_string_buffer_append_string(
2750
2858
  parser, &prompt_text, &text_state->_buffer);
2751
2859
  }
2752
2860
 
2753
2861
  GumboNode* input = insert_element_of_tag_type(
2754
2862
  parser, GUMBO_TAG_INPUT, GUMBO_INSERTION_FROM_ISINDEX);
2755
- for (int i = 0; i < token_attrs->length; ++i) {
2863
+ for (unsigned int i = 0; i < token_attrs->length; ++i) {
2756
2864
  GumboAttribute* attr = token_attrs->data[i];
2757
2865
  if (attr != prompt_attr && attr != action_attr && attr != name_attr) {
2758
2866
  gumbo_vector_add(parser, attr, &input->v.element.attributes);
@@ -2765,6 +2873,13 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2765
2873
  // touching the attributes.
2766
2874
  ignore_token(parser);
2767
2875
 
2876
+ // The name attribute, if present, should be destroyed since it's ignored
2877
+ // when copying over. The action attribute should be kept since it's moved
2878
+ // to the form.
2879
+ if (name_attr) {
2880
+ gumbo_destroy_attribute(parser, name_attr);
2881
+ }
2882
+
2768
2883
  GumboAttribute* name =
2769
2884
  gumbo_parser_allocate(parser, sizeof(GumboAttribute));
2770
2885
  GumboStringPiece name_str = GUMBO_STRING("name");
@@ -2780,12 +2895,15 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2780
2895
  name->value_end = kGumboEmptySourcePosition;
2781
2896
  gumbo_vector_add(parser, name, &input->v.element.attributes);
2782
2897
 
2783
- pop_current_node(parser); // <input>
2784
- pop_current_node(parser); // <label>
2898
+ pop_current_node(parser); // <input>
2899
+ pop_current_node(parser); // <label>
2785
2900
  insert_element_of_tag_type(
2786
2901
  parser, GUMBO_TAG_HR, GUMBO_INSERTION_FROM_ISINDEX);
2787
- pop_current_node(parser); // <hr>
2788
- pop_current_node(parser); // <form>
2902
+ pop_current_node(parser); // <hr>
2903
+ pop_current_node(parser); // <form>
2904
+ if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2905
+ parser->_parser_state->_form_element = NULL;
2906
+ }
2789
2907
  return false;
2790
2908
  } else if (tag_is(token, kStartTag, GUMBO_TAG_TEXTAREA)) {
2791
2909
  run_generic_parsing_algorithm(parser, token, GUMBO_LEX_RCDATA);
@@ -2820,21 +2938,27 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2820
2938
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_SELECT);
2821
2939
  }
2822
2940
  return true;
2823
- } else if (tag_in(token, kStartTag, GUMBO_TAG_OPTION, GUMBO_TAG_OPTGROUP,
2824
- GUMBO_TAG_LAST)) {
2825
- if (node_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)) {
2941
+ } else if (tag_in(token, kStartTag,
2942
+ (gumbo_tagset){TAG(OPTION), TAG(OPTGROUP)})) {
2943
+ if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)) {
2826
2944
  pop_current_node(parser);
2827
2945
  }
2828
2946
  reconstruct_active_formatting_elements(parser);
2829
2947
  insert_element_from_token(parser, token);
2830
2948
  return true;
2831
- } else if (tag_in(token, kStartTag, GUMBO_TAG_RP, GUMBO_TAG_RT,
2832
- GUMBO_TAG_LAST)) {
2949
+ } else if (tag_in(token, kStartTag,
2950
+ (gumbo_tagset){TAG(RB), TAG(RP), TAG(RT), TAG(RTC)})) {
2833
2951
  bool success = true;
2952
+ GumboTag exception =
2953
+ tag_in(token, kStartTag, (gumbo_tagset){TAG(RT), TAG(RP)})
2954
+ ? GUMBO_TAG_RTC
2955
+ : GUMBO_TAG_LAST;
2834
2956
  if (has_an_element_in_scope(parser, GUMBO_TAG_RUBY)) {
2835
- generate_implied_end_tags(parser, GUMBO_TAG_LAST);
2957
+ generate_implied_end_tags(parser, exception);
2836
2958
  }
2837
- if (!node_tag_is(get_current_node(parser), GUMBO_TAG_RUBY)) {
2959
+ if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_RUBY) &&
2960
+ !(exception == GUMBO_TAG_LAST ||
2961
+ node_html_tag_is(get_current_node(parser), GUMBO_TAG_RTC))) {
2838
2962
  parser_add_parse_error(parser, token);
2839
2963
  success = false;
2840
2964
  }
@@ -2867,11 +2991,10 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2867
2991
  acknowledge_self_closing_tag(parser);
2868
2992
  }
2869
2993
  return true;
2870
- } else if (tag_in(token, kStartTag, GUMBO_TAG_CAPTION, GUMBO_TAG_COL,
2871
- GUMBO_TAG_COLGROUP, GUMBO_TAG_FRAME, GUMBO_TAG_HEAD,
2872
- GUMBO_TAG_TBODY, GUMBO_TAG_TD, GUMBO_TAG_TFOOT,
2873
- GUMBO_TAG_TH, GUMBO_TAG_THEAD, GUMBO_TAG_TR,
2874
- GUMBO_TAG_LAST)) {
2994
+ } else if (tag_in(token, kStartTag,
2995
+ (gumbo_tagset){TAG(CAPTION), TAG(COL), TAG(COLGROUP),
2996
+ TAG(FRAME), TAG(HEAD), TAG(TBODY), TAG(TD), TAG(TFOOT),
2997
+ TAG(TH), TAG(THEAD), TAG(TR)})) {
2875
2998
  parser_add_parse_error(parser, token);
2876
2999
  ignore_token(parser);
2877
3000
  return false;
@@ -2883,22 +3006,22 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2883
3006
  assert(token->type == GUMBO_TOKEN_END_TAG);
2884
3007
  GumboTag end_tag = token->v.end_tag;
2885
3008
  assert(state->_open_elements.length > 0);
2886
- assert(node_tag_is(state->_open_elements.data[0], GUMBO_TAG_HTML));
3009
+ assert(node_html_tag_is(state->_open_elements.data[0], GUMBO_TAG_HTML));
2887
3010
  // Walk up the stack of open elements until we find one that either:
2888
3011
  // a) Matches the tag name we saw
2889
3012
  // b) Is in the "special" category.
2890
3013
  // If we see a), implicitly close everything up to and including it. If we
2891
3014
  // see b), then record a parse error, don't close anything (except the
2892
3015
  // implied end tags) and ignore the end tag token.
2893
- for (int i = state->_open_elements.length; --i >= 0; ) {
3016
+ for (int i = state->_open_elements.length; --i >= 0;) {
2894
3017
  const GumboNode* node = state->_open_elements.data[i];
2895
- if (node->v.element.tag_namespace == GUMBO_NAMESPACE_HTML &&
2896
- node_tag_is(node, end_tag)) {
3018
+ if (node_html_tag_is(node, end_tag)) {
2897
3019
  generate_implied_end_tags(parser, end_tag);
2898
3020
  // TODO(jdtang): Do I need to add a parse error here? The condition in
2899
3021
  // the spec seems like it's the inverse of the loop condition above, and
2900
3022
  // so would never fire.
2901
- while (node != pop_current_node(parser)); // Pop everything.
3023
+ while (node != pop_current_node(parser))
3024
+ ; // Pop everything.
2902
3025
  return true;
2903
3026
  } else if (is_special_node(node)) {
2904
3027
  parser_add_parse_error(parser, token);
@@ -2914,7 +3037,8 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2914
3037
 
2915
3038
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-incdata
2916
3039
  static bool handle_text(GumboParser* parser, GumboToken* token) {
2917
- if (token->type == GUMBO_TOKEN_CHARACTER || token->type == GUMBO_TOKEN_WHITESPACE) {
3040
+ if (token->type == GUMBO_TOKEN_CHARACTER ||
3041
+ token->type == GUMBO_TOKEN_WHITESPACE) {
2918
3042
  insert_text_token(parser, token);
2919
3043
  } else {
2920
3044
  // We provide only bare-bones script handling that doesn't involve any of
@@ -2974,13 +3098,12 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
2974
3098
  parser->_parser_state->_reprocess_current_token = true;
2975
3099
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_COLUMN_GROUP);
2976
3100
  return true;
2977
- } else if (tag_in(token, kStartTag, GUMBO_TAG_TBODY, GUMBO_TAG_TFOOT,
2978
- GUMBO_TAG_THEAD, GUMBO_TAG_TD, GUMBO_TAG_TH, GUMBO_TAG_TR,
2979
- GUMBO_TAG_LAST)) {
3101
+ } else if (tag_in(token, kStartTag,
3102
+ (gumbo_tagset){TAG(TBODY), TAG(TFOOT), TAG(THEAD), TAG(TD),
3103
+ TAG(TH), TAG(TR)})) {
2980
3104
  clear_stack_to_table_context(parser);
2981
3105
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
2982
- if (tag_in(token, kStartTag, GUMBO_TAG_TD, GUMBO_TAG_TH, GUMBO_TAG_TR,
2983
- GUMBO_TAG_LAST)) {
3106
+ if (tag_in(token, kStartTag, (gumbo_tagset){TAG(TD), TAG(TH), TAG(TR)})) {
2984
3107
  insert_element_of_tag_type(
2985
3108
  parser, GUMBO_TAG_TBODY, GUMBO_INSERTION_IMPLIED);
2986
3109
  state->_reprocess_current_token = true;
@@ -3002,27 +3125,27 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
3002
3125
  return false;
3003
3126
  }
3004
3127
  return true;
3005
- } else if (tag_in(token, kEndTag, GUMBO_TAG_BODY, GUMBO_TAG_CAPTION,
3006
- GUMBO_TAG_COL, GUMBO_TAG_COLGROUP, GUMBO_TAG_HTML,
3007
- GUMBO_TAG_TBODY, GUMBO_TAG_TD, GUMBO_TAG_TFOOT,
3008
- GUMBO_TAG_TH, GUMBO_TAG_THEAD, GUMBO_TAG_TR,
3009
- GUMBO_TAG_LAST)) {
3128
+ } else if (tag_in(token, kEndTag,
3129
+ (gumbo_tagset){TAG(BODY), TAG(CAPTION), TAG(COL),
3130
+ TAG(COLGROUP), TAG(HTML), TAG(TBODY), TAG(TD), TAG(TFOOT),
3131
+ TAG(TH), TAG(THEAD), TAG(TR)})) {
3010
3132
  parser_add_parse_error(parser, token);
3011
3133
  ignore_token(parser);
3012
3134
  return false;
3013
- } else if (tag_in(token, kStartTag, GUMBO_TAG_STYLE, GUMBO_TAG_SCRIPT,
3014
- GUMBO_TAG_LAST)) {
3135
+ } else if (tag_in(token, kStartTag,
3136
+ (gumbo_tagset){TAG(STYLE), TAG(SCRIPT), TAG(TEMPLATE)}) ||
3137
+ (tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE))) {
3015
3138
  return handle_in_head(parser, token);
3016
3139
  } else if (tag_is(token, kStartTag, GUMBO_TAG_INPUT) &&
3017
- attribute_matches(&token->v.start_tag.attributes,
3018
- "type", "hidden")) {
3140
+ attribute_matches(
3141
+ &token->v.start_tag.attributes, "type", "hidden")) {
3019
3142
  parser_add_parse_error(parser, token);
3020
3143
  insert_element_from_token(parser, token);
3021
3144
  pop_current_node(parser);
3022
3145
  return false;
3023
3146
  } else if (tag_is(token, kStartTag, GUMBO_TAG_FORM)) {
3024
3147
  parser_add_parse_error(parser, token);
3025
- if (state->_form_element) {
3148
+ if (state->_form_element || has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
3026
3149
  ignore_token(parser);
3027
3150
  return false;
3028
3151
  }
@@ -3030,11 +3153,7 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
3030
3153
  pop_current_node(parser);
3031
3154
  return false;
3032
3155
  } else if (token->type == GUMBO_TOKEN_EOF) {
3033
- if (!node_tag_is(get_current_node(parser), GUMBO_TAG_HTML)) {
3034
- parser_add_parse_error(parser, token);
3035
- return false;
3036
- }
3037
- return true;
3156
+ return handle_in_body(parser, token);
3038
3157
  } else {
3039
3158
  parser_add_parse_error(parser, token);
3040
3159
  state->_foster_parent_insertions = true;
@@ -3062,8 +3181,9 @@ static bool handle_in_table_text(GumboParser* parser, GumboToken* token) {
3062
3181
  // Note that TextNodeBuffer may contain UTF-8 characters, but the presence
3063
3182
  // of any one byte that is not whitespace means we flip the flag, so this
3064
3183
  // loop is still valid.
3065
- for (int i = 0; i < buffer->length; ++i) {
3066
- if (!isspace(buffer->data[i]) || buffer->data[i] == '\v') {
3184
+ for (unsigned int i = 0; i < buffer->length; ++i) {
3185
+ if (!isspace((unsigned char) buffer->data[i]) ||
3186
+ buffer->data[i] == '\v') {
3067
3187
  state->_foster_parent_insertions = true;
3068
3188
  reconstruct_active_formatting_elements(parser);
3069
3189
  break;
@@ -3079,38 +3199,43 @@ static bool handle_in_table_text(GumboParser* parser, GumboToken* token) {
3079
3199
 
3080
3200
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-incaption
3081
3201
  static bool handle_in_caption(GumboParser* parser, GumboToken* token) {
3082
- if (tag_in(token, kStartTag, GUMBO_TAG_CAPTION, GUMBO_TAG_COL,
3083
- GUMBO_TAG_COLGROUP, GUMBO_TAG_TBODY, GUMBO_TAG_TD,
3084
- GUMBO_TAG_TFOOT, GUMBO_TAG_TH, GUMBO_TAG_THEAD,
3085
- GUMBO_TAG_TR, GUMBO_TAG_LAST) ||
3086
- tag_in(token, kEndTag, GUMBO_TAG_CAPTION, GUMBO_TAG_TABLE,
3087
- GUMBO_TAG_LAST)) {
3202
+ if (tag_is(token, kEndTag, GUMBO_TAG_CAPTION)) {
3088
3203
  if (!has_an_element_in_table_scope(parser, GUMBO_TAG_CAPTION)) {
3089
3204
  parser_add_parse_error(parser, token);
3090
3205
  ignore_token(parser);
3091
3206
  return false;
3207
+ } else {
3208
+ generate_implied_end_tags(parser, GUMBO_TAG_LAST);
3209
+ bool result = true;
3210
+ if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_CAPTION)) {
3211
+ parser_add_parse_error(parser, token);
3212
+ }
3213
+ while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_CAPTION))
3214
+ ;
3215
+ clear_active_formatting_elements(parser);
3216
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
3217
+ return result;
3092
3218
  }
3093
- if (!tag_is(token, kEndTag, GUMBO_TAG_CAPTION)) {
3094
- parser_add_parse_error(parser, token);
3095
- parser->_parser_state->_reprocess_current_token = true;
3096
- }
3097
- generate_implied_end_tags(parser, GUMBO_TAG_LAST);
3098
- bool result = true;
3099
- if (!node_tag_is(get_current_node(parser), GUMBO_TAG_CAPTION)) {
3219
+ } else if (tag_in(token, kStartTag,
3220
+ (gumbo_tagset){TAG(CAPTION), TAG(COL), TAG(COLGROUP),
3221
+ TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD),
3222
+ TAG(TR)}) ||
3223
+ (tag_is(token, kEndTag, GUMBO_TAG_TABLE))) {
3224
+ if (!has_an_element_in_table_scope(parser, GUMBO_TAG_CAPTION)) {
3100
3225
  parser_add_parse_error(parser, token);
3101
- while (!node_tag_is(get_current_node(parser), GUMBO_TAG_CAPTION)) {
3102
- pop_current_node(parser);
3103
- }
3104
- result = false;
3226
+ ignore_token(parser);
3227
+ return false;
3105
3228
  }
3106
- pop_current_node(parser); // The <caption> itself.
3229
+ while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_CAPTION))
3230
+ ;
3107
3231
  clear_active_formatting_elements(parser);
3108
3232
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
3109
- return result;
3110
- } else if (tag_in(token, kEndTag, GUMBO_TAG_BODY, GUMBO_TAG_COL,
3111
- GUMBO_TAG_COLGROUP, GUMBO_TAG_HTML, GUMBO_TAG_TBODY,
3112
- GUMBO_TAG_TD, GUMBO_TAG_TFOOT, GUMBO_TAG_TH,
3113
- GUMBO_TAG_THEAD, GUMBO_TAG_TR, GUMBO_TAG_LAST)) {
3233
+ parser->_parser_state->_reprocess_current_token = true;
3234
+ return true;
3235
+ } else if (tag_in(token, kEndTag,
3236
+ (gumbo_tagset){TAG(BODY), TAG(COL), TAG(COLGROUP), TAG(HTML),
3237
+ TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD),
3238
+ TAG(TR)})) {
3114
3239
  parser_add_parse_error(parser, token);
3115
3240
  ignore_token(parser);
3116
3241
  return false;
@@ -3138,24 +3263,33 @@ static bool handle_in_column_group(GumboParser* parser, GumboToken* token) {
3138
3263
  pop_current_node(parser);
3139
3264
  acknowledge_self_closing_tag(parser);
3140
3265
  return true;
3266
+ } else if (tag_is(token, kEndTag, GUMBO_TAG_COLGROUP)) {
3267
+ if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_COLGROUP)) {
3268
+ parser_add_parse_error(parser, token);
3269
+ ignore_token(parser);
3270
+ return false;
3271
+ }
3272
+ pop_current_node(parser);
3273
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
3274
+ return false;
3141
3275
  } else if (tag_is(token, kEndTag, GUMBO_TAG_COL)) {
3142
3276
  parser_add_parse_error(parser, token);
3143
3277
  ignore_token(parser);
3144
3278
  return false;
3145
- } else if (token->type == GUMBO_TOKEN_EOF &&
3146
- get_current_node(parser) == parser->_output->root) {
3147
- return true;
3279
+ } else if (tag_is(token, kStartTag, GUMBO_TAG_TEMPLATE) ||
3280
+ tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
3281
+ return handle_in_head(parser, token);
3282
+ } else if (token->type == GUMBO_TOKEN_EOF) {
3283
+ return handle_in_body(parser, token);
3148
3284
  } else {
3149
- if (get_current_node(parser) == parser->_output->root) {
3285
+ if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_COLGROUP)) {
3150
3286
  parser_add_parse_error(parser, token);
3287
+ ignore_token(parser);
3151
3288
  return false;
3152
3289
  }
3153
- assert(node_tag_is(get_current_node(parser), GUMBO_TAG_COLGROUP));
3154
3290
  pop_current_node(parser);
3155
3291
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
3156
- if (!tag_is(token, kEndTag, GUMBO_TAG_COLGROUP)) {
3157
- parser->_parser_state->_reprocess_current_token = true;
3158
- }
3292
+ parser->_parser_state->_reprocess_current_token = true;
3159
3293
  return true;
3160
3294
  }
3161
3295
  }
@@ -3167,16 +3301,15 @@ static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
3167
3301
  insert_element_from_token(parser, token);
3168
3302
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
3169
3303
  return true;
3170
- } else if (tag_in(token, kStartTag, GUMBO_TAG_TD, GUMBO_TAG_TH,
3171
- GUMBO_TAG_LAST)) {
3304
+ } else if (tag_in(token, kStartTag, (gumbo_tagset){TAG(TD), TAG(TH)})) {
3172
3305
  parser_add_parse_error(parser, token);
3173
3306
  clear_stack_to_table_body_context(parser);
3174
3307
  insert_element_of_tag_type(parser, GUMBO_TAG_TR, GUMBO_INSERTION_IMPLIED);
3175
3308
  parser->_parser_state->_reprocess_current_token = true;
3176
3309
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
3177
3310
  return false;
3178
- } else if (tag_in(token, kEndTag, GUMBO_TAG_TBODY, GUMBO_TAG_TFOOT,
3179
- GUMBO_TAG_THEAD, GUMBO_TAG_LAST)) {
3311
+ } else if (tag_in(token, kEndTag,
3312
+ (gumbo_tagset){TAG(TBODY), TAG(TFOOT), TAG(THEAD)})) {
3180
3313
  if (!has_an_element_in_table_scope(parser, token->v.end_tag)) {
3181
3314
  parser_add_parse_error(parser, token);
3182
3315
  ignore_token(parser);
@@ -3186,13 +3319,13 @@ static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
3186
3319
  pop_current_node(parser);
3187
3320
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
3188
3321
  return true;
3189
- } else if (tag_in(token, kStartTag, GUMBO_TAG_CAPTION, GUMBO_TAG_COL,
3190
- GUMBO_TAG_COLGROUP, GUMBO_TAG_TBODY, GUMBO_TAG_TFOOT,
3191
- GUMBO_TAG_THEAD, GUMBO_TAG_LAST) ||
3322
+ } else if (tag_in(token, kStartTag,
3323
+ (gumbo_tagset){TAG(CAPTION), TAG(COL), TAG(COLGROUP),
3324
+ TAG(TBODY), TAG(TFOOT), TAG(THEAD)}) ||
3192
3325
  tag_is(token, kEndTag, GUMBO_TAG_TABLE)) {
3193
3326
  if (!(has_an_element_in_table_scope(parser, GUMBO_TAG_TBODY) ||
3194
- has_an_element_in_table_scope(parser, GUMBO_TAG_THEAD) ||
3195
- has_an_element_in_table_scope(parser, GUMBO_TAG_TFOOT))) {
3327
+ has_an_element_in_table_scope(parser, GUMBO_TAG_THEAD) ||
3328
+ has_an_element_in_table_scope(parser, GUMBO_TAG_TFOOT))) {
3196
3329
  parser_add_parse_error(parser, token);
3197
3330
  ignore_token(parser);
3198
3331
  return false;
@@ -3202,10 +3335,9 @@ static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
3202
3335
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
3203
3336
  parser->_parser_state->_reprocess_current_token = true;
3204
3337
  return true;
3205
- } else if (tag_in(token, kEndTag, GUMBO_TAG_BODY, GUMBO_TAG_CAPTION,
3206
- GUMBO_TAG_COL, GUMBO_TAG_TR, GUMBO_TAG_COLGROUP,
3207
- GUMBO_TAG_HTML, GUMBO_TAG_TD, GUMBO_TAG_TH, GUMBO_TAG_LAST))
3208
- {
3338
+ } else if (tag_in(token, kEndTag,
3339
+ (gumbo_tagset){TAG(BODY), TAG(CAPTION), TAG(COL), TAG(TR),
3340
+ TAG(COLGROUP), TAG(HTML), TAG(TD), TAG(TH)})) {
3209
3341
  parser_add_parse_error(parser, token);
3210
3342
  ignore_token(parser);
3211
3343
  return false;
@@ -3216,48 +3348,55 @@ static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
3216
3348
 
3217
3349
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-intr
3218
3350
  static bool handle_in_row(GumboParser* parser, GumboToken* token) {
3219
- if (tag_in(token, kStartTag, GUMBO_TAG_TH, GUMBO_TAG_TD, GUMBO_TAG_LAST)) {
3351
+ if (tag_in(token, kStartTag, (gumbo_tagset){TAG(TH), TAG(TD)})) {
3220
3352
  clear_stack_to_table_row_context(parser);
3221
3353
  insert_element_from_token(parser, token);
3222
3354
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_CELL);
3223
3355
  add_formatting_element(parser, &kActiveFormattingScopeMarker);
3224
3356
  return true;
3225
- } else if (tag_in(token, kStartTag, GUMBO_TAG_CAPTION, GUMBO_TAG_COLGROUP,
3226
- GUMBO_TAG_TBODY, GUMBO_TAG_TFOOT, GUMBO_TAG_THEAD,
3227
- GUMBO_TAG_TR, GUMBO_TAG_LAST) ||
3228
- tag_in(token, kEndTag, GUMBO_TAG_TR, GUMBO_TAG_TABLE,
3229
- GUMBO_TAG_TBODY, GUMBO_TAG_TFOOT, GUMBO_TAG_THEAD,
3230
- GUMBO_TAG_LAST)) {
3231
- // This case covers 4 clauses of the spec, each of which say "Otherwise, act
3232
- // as if an end tag with the tag name "tr" had been seen." The differences
3233
- // are in error handling and whether the current token is reprocessed.
3234
- GumboTag desired_tag =
3235
- tag_in(token, kEndTag, GUMBO_TAG_TBODY, GUMBO_TAG_TFOOT,
3236
- GUMBO_TAG_THEAD, GUMBO_TAG_LAST)
3237
- ? token->v.end_tag : GUMBO_TAG_TR;
3238
- if (!has_an_element_in_table_scope(parser, desired_tag)) {
3239
- gumbo_debug("Bailing because there is no tag %s in table scope.\nOpen elements:",
3240
- gumbo_normalized_tagname(desired_tag));
3241
- for (int i = 0; i < parser->_parser_state->_open_elements.length; ++i) {
3242
- const GumboNode* node = parser->_parser_state->_open_elements.data[i];
3243
- gumbo_debug("%s\n", gumbo_normalized_tagname(node->v.element.tag));
3244
- }
3357
+ } else if (tag_is(token, kEndTag, GUMBO_TAG_TR)) {
3358
+ if (!has_an_element_in_table_scope(parser, GUMBO_TAG_TR)) {
3359
+ parser_add_parse_error(parser, token);
3360
+ ignore_token(parser);
3361
+ return false;
3362
+ } else {
3363
+ clear_stack_to_table_row_context(parser);
3364
+ pop_current_node(parser);
3365
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
3366
+ return true;
3367
+ }
3368
+ } else if (tag_in(token, kStartTag,
3369
+ (gumbo_tagset){TAG(CAPTION), TAG(COL), TAG(COLGROUP),
3370
+ TAG(TBODY), TAG(TFOOT), TAG(THEAD), TAG(TR)}) ||
3371
+ tag_is(token, kEndTag, GUMBO_TAG_TABLE)) {
3372
+ if (!has_an_element_in_table_scope(parser, GUMBO_TAG_TR)) {
3245
3373
  parser_add_parse_error(parser, token);
3246
3374
  ignore_token(parser);
3247
3375
  return false;
3376
+ } else {
3377
+ clear_stack_to_table_row_context(parser);
3378
+ pop_current_node(parser);
3379
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
3380
+ parser->_parser_state->_reprocess_current_token = true;
3381
+ return true;
3248
3382
  }
3249
- clear_stack_to_table_row_context(parser);
3250
- GumboNode* last_element = pop_current_node(parser);
3251
- assert(node_tag_is(last_element, GUMBO_TAG_TR));
3252
- AVOID_UNUSED_VARIABLE_WARNING(last_element);
3253
- set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
3254
- if (!tag_is(token, kEndTag, GUMBO_TAG_TR)) {
3383
+ } else if (tag_in(token, kEndTag,
3384
+ (gumbo_tagset){TAG(TBODY), TAG(TFOOT), TAG(THEAD)})) {
3385
+ if (!has_an_element_in_table_scope(parser, token->v.end_tag) ||
3386
+ (!has_an_element_in_table_scope(parser, GUMBO_TAG_TR))) {
3387
+ parser_add_parse_error(parser, token);
3388
+ ignore_token(parser);
3389
+ return false;
3390
+ } else {
3391
+ clear_stack_to_table_row_context(parser);
3392
+ pop_current_node(parser);
3393
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
3255
3394
  parser->_parser_state->_reprocess_current_token = true;
3395
+ return true;
3256
3396
  }
3257
- return true;
3258
- } else if (tag_in(token, kEndTag, GUMBO_TAG_BODY, GUMBO_TAG_CAPTION,
3259
- GUMBO_TAG_COL, GUMBO_TAG_COLGROUP, GUMBO_TAG_HTML,
3260
- GUMBO_TAG_TD, GUMBO_TAG_TH, GUMBO_TAG_LAST)) {
3397
+ } else if (tag_in(token, kEndTag,
3398
+ (gumbo_tagset){TAG(BODY), TAG(CAPTION), TAG(COL),
3399
+ TAG(COLGROUP), TAG(HTML), TAG(TD), TAG(TH)})) {
3261
3400
  parser_add_parse_error(parser, token);
3262
3401
  ignore_token(parser);
3263
3402
  return false;
@@ -3268,17 +3407,18 @@ static bool handle_in_row(GumboParser* parser, GumboToken* token) {
3268
3407
 
3269
3408
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-intd
3270
3409
  static bool handle_in_cell(GumboParser* parser, GumboToken* token) {
3271
- if (tag_in(token, kEndTag, GUMBO_TAG_TD, GUMBO_TAG_TH, GUMBO_TAG_LAST)) {
3410
+ if (tag_in(token, kEndTag, (gumbo_tagset){TAG(TD), TAG(TH)})) {
3272
3411
  GumboTag token_tag = token->v.end_tag;
3273
3412
  if (!has_an_element_in_table_scope(parser, token_tag)) {
3274
3413
  parser_add_parse_error(parser, token);
3414
+ ignore_token(parser);
3275
3415
  return false;
3276
3416
  }
3277
3417
  return close_table_cell(parser, token, token_tag);
3278
- } else if (tag_in(token, kStartTag, GUMBO_TAG_CAPTION, GUMBO_TAG_COL,
3279
- GUMBO_TAG_COLGROUP, GUMBO_TAG_TBODY, GUMBO_TAG_TD,
3280
- GUMBO_TAG_TFOOT, GUMBO_TAG_TH, GUMBO_TAG_THEAD,
3281
- GUMBO_TAG_TR, GUMBO_TAG_LAST)) {
3418
+ } else if (tag_in(token, kStartTag,
3419
+ (gumbo_tagset){TAG(CAPTION), TAG(COL), TAG(COLGROUP),
3420
+ TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD),
3421
+ TAG(TR)})) {
3282
3422
  gumbo_debug("Handling <td> in cell.\n");
3283
3423
  if (!has_an_element_in_table_scope(parser, GUMBO_TAG_TH) &&
3284
3424
  !has_an_element_in_table_scope(parser, GUMBO_TAG_TD)) {
@@ -3289,15 +3429,13 @@ static bool handle_in_cell(GumboParser* parser, GumboToken* token) {
3289
3429
  }
3290
3430
  parser->_parser_state->_reprocess_current_token = true;
3291
3431
  return close_current_cell(parser, token);
3292
- } else if (tag_in(token, kEndTag, GUMBO_TAG_BODY, GUMBO_TAG_CAPTION,
3293
- GUMBO_TAG_COL, GUMBO_TAG_COLGROUP, GUMBO_TAG_HTML,
3294
- GUMBO_TAG_LAST)) {
3432
+ } else if (tag_in(token, kEndTag, (gumbo_tagset){TAG(BODY), TAG(CAPTION),
3433
+ TAG(COL), TAG(COLGROUP), TAG(HTML)})) {
3295
3434
  parser_add_parse_error(parser, token);
3296
3435
  ignore_token(parser);
3297
3436
  return false;
3298
- } else if (tag_in(token, kEndTag, GUMBO_TAG_TABLE, GUMBO_TAG_TBODY,
3299
- GUMBO_TAG_TFOOT, GUMBO_TAG_THEAD, GUMBO_TAG_TR,
3300
- GUMBO_TAG_LAST)) {
3437
+ } else if (tag_in(token, kEndTag, (gumbo_tagset){TAG(TABLE), TAG(TBODY),
3438
+ TAG(TFOOT), TAG(THEAD), TAG(TR)})) {
3301
3439
  if (!has_an_element_in_table_scope(parser, token->v.end_tag)) {
3302
3440
  parser_add_parse_error(parser, token);
3303
3441
  ignore_token(parser);
@@ -3330,28 +3468,28 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
3330
3468
  } else if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
3331
3469
  return handle_in_body(parser, token);
3332
3470
  } else if (tag_is(token, kStartTag, GUMBO_TAG_OPTION)) {
3333
- if (node_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)) {
3471
+ if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)) {
3334
3472
  pop_current_node(parser);
3335
3473
  }
3336
3474
  insert_element_from_token(parser, token);
3337
3475
  return true;
3338
3476
  } else if (tag_is(token, kStartTag, GUMBO_TAG_OPTGROUP)) {
3339
- if (node_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)) {
3477
+ if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)) {
3340
3478
  pop_current_node(parser);
3341
3479
  }
3342
- if (node_tag_is(get_current_node(parser), GUMBO_TAG_OPTGROUP)) {
3480
+ if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTGROUP)) {
3343
3481
  pop_current_node(parser);
3344
3482
  }
3345
3483
  insert_element_from_token(parser, token);
3346
3484
  return true;
3347
3485
  } else if (tag_is(token, kEndTag, GUMBO_TAG_OPTGROUP)) {
3348
3486
  GumboVector* open_elements = &parser->_parser_state->_open_elements;
3349
- if (node_tag_is(get_current_node(parser), GUMBO_TAG_OPTION) &&
3350
- node_tag_is(open_elements->data[open_elements->length - 2],
3351
- GUMBO_TAG_OPTGROUP)) {
3487
+ if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION) &&
3488
+ node_html_tag_is(open_elements->data[open_elements->length - 2],
3489
+ GUMBO_TAG_OPTGROUP)) {
3352
3490
  pop_current_node(parser);
3353
3491
  }
3354
- if (node_tag_is(get_current_node(parser), GUMBO_TAG_OPTGROUP)) {
3492
+ if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTGROUP)) {
3355
3493
  pop_current_node(parser);
3356
3494
  return true;
3357
3495
  } else {
@@ -3360,7 +3498,7 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
3360
3498
  return false;
3361
3499
  }
3362
3500
  } else if (tag_is(token, kEndTag, GUMBO_TAG_OPTION)) {
3363
- if (node_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)) {
3501
+ if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)) {
3364
3502
  pop_current_node(parser);
3365
3503
  return true;
3366
3504
  } else {
@@ -3379,10 +3517,12 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
3379
3517
  } else if (tag_is(token, kStartTag, GUMBO_TAG_SELECT)) {
3380
3518
  parser_add_parse_error(parser, token);
3381
3519
  ignore_token(parser);
3382
- close_current_select(parser);
3520
+ if (has_an_element_in_select_scope(parser, GUMBO_TAG_SELECT)) {
3521
+ close_current_select(parser);
3522
+ }
3383
3523
  return false;
3384
- } else if (tag_in(token, kStartTag, GUMBO_TAG_INPUT, GUMBO_TAG_KEYGEN,
3385
- GUMBO_TAG_TEXTAREA, GUMBO_TAG_LAST)) {
3524
+ } else if (tag_in(token, kStartTag,
3525
+ (gumbo_tagset){TAG(INPUT), TAG(KEYGEN), TAG(TEXTAREA)})) {
3386
3526
  parser_add_parse_error(parser, token);
3387
3527
  if (!has_an_element_in_select_scope(parser, GUMBO_TAG_SELECT)) {
3388
3528
  ignore_token(parser);
@@ -3391,14 +3531,12 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
3391
3531
  parser->_parser_state->_reprocess_current_token = true;
3392
3532
  }
3393
3533
  return false;
3394
- } else if (tag_is(token, kStartTag, GUMBO_TAG_SCRIPT)) {
3534
+ } else if (tag_in(token, kStartTag,
3535
+ (gumbo_tagset){TAG(SCRIPT), TAG(TEMPLATE)}) ||
3536
+ tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
3395
3537
  return handle_in_head(parser, token);
3396
3538
  } else if (token->type == GUMBO_TOKEN_EOF) {
3397
- if (get_current_node(parser) != parser->_output->root) {
3398
- parser_add_parse_error(parser, token);
3399
- return false;
3400
- }
3401
- return true;
3539
+ return handle_in_body(parser, token);
3402
3540
  } else {
3403
3541
  parser_add_parse_error(parser, token);
3404
3542
  ignore_token(parser);
@@ -3408,25 +3546,28 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
3408
3546
 
3409
3547
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-inselectintable
3410
3548
  static bool handle_in_select_in_table(GumboParser* parser, GumboToken* token) {
3411
- if (tag_in(token, kStartTag, GUMBO_TAG_CAPTION, GUMBO_TAG_TABLE,
3412
- GUMBO_TAG_TBODY, GUMBO_TAG_TFOOT, GUMBO_TAG_THEAD, GUMBO_TAG_TR,
3413
- GUMBO_TAG_TD, GUMBO_TAG_TH, GUMBO_TAG_LAST)) {
3549
+ if (tag_in(token, kStartTag,
3550
+ (gumbo_tagset){TAG(CAPTION), TAG(TABLE), TAG(TBODY), TAG(TFOOT),
3551
+ TAG(THEAD), TAG(TR), TAG(TD), TAG(TH)})) {
3414
3552
  parser_add_parse_error(parser, token);
3415
3553
  close_current_select(parser);
3416
3554
  parser->_parser_state->_reprocess_current_token = true;
3417
3555
  return false;
3418
- } else if (tag_in(token, kEndTag, GUMBO_TAG_CAPTION, GUMBO_TAG_TABLE,
3419
- GUMBO_TAG_TBODY, GUMBO_TAG_TFOOT, GUMBO_TAG_THEAD,
3420
- GUMBO_TAG_TR, GUMBO_TAG_TD, GUMBO_TAG_TH, GUMBO_TAG_LAST)) {
3556
+ } else if (tag_in(token, kEndTag,
3557
+ (gumbo_tagset){TAG(CAPTION), TAG(TABLE), TAG(TBODY),
3558
+ TAG(TFOOT), TAG(THEAD), TAG(TR), TAG(TD), TAG(TH)})) {
3421
3559
  parser_add_parse_error(parser, token);
3422
- if (has_an_element_in_table_scope(parser, token->v.end_tag)) {
3560
+ if (!has_an_element_in_table_scope(parser, token->v.end_tag)) {
3561
+ ignore_token(parser);
3562
+ return false;
3563
+ } else {
3423
3564
  close_current_select(parser);
3424
- reset_insertion_mode_appropriately(parser);
3565
+ // close_current_select already does the
3566
+ // reset_insertion_mode_appropriately
3567
+ // reset_insertion_mode_appropriately(parser);
3425
3568
  parser->_parser_state->_reprocess_current_token = true;
3426
- } else {
3427
- ignore_token(parser);
3569
+ return false;
3428
3570
  }
3429
- return false;
3430
3571
  } else {
3431
3572
  return handle_in_select(parser, token);
3432
3573
  }
@@ -3434,8 +3575,71 @@ static bool handle_in_select_in_table(GumboParser* parser, GumboToken* token) {
3434
3575
 
3435
3576
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#parsing-main-intemplate
3436
3577
  static bool handle_in_template(GumboParser* parser, GumboToken* token) {
3437
- // TODO(jdtang): Implement this.
3438
- return true;
3578
+ GumboParserState* state = parser->_parser_state;
3579
+ if (token->type == GUMBO_TOKEN_WHITESPACE ||
3580
+ token->type == GUMBO_TOKEN_CHARACTER ||
3581
+ token->type == GUMBO_TOKEN_COMMENT || token->type == GUMBO_TOKEN_NULL ||
3582
+ token->type == GUMBO_TOKEN_DOCTYPE) {
3583
+ return handle_in_body(parser, token);
3584
+ } else if (tag_in(token, kStartTag,
3585
+ (gumbo_tagset){TAG(BASE), TAG(BASEFONT), TAG(BGSOUND),
3586
+ TAG(LINK), TAG(META), TAG(NOFRAMES), TAG(SCRIPT),
3587
+ TAG(STYLE), TAG(TEMPLATE), TAG(TITLE)}) ||
3588
+ tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
3589
+ return handle_in_head(parser, token);
3590
+ } else if (tag_in(
3591
+ token, kStartTag, (gumbo_tagset){TAG(CAPTION), TAG(COLGROUP),
3592
+ TAG(TBODY), TAG(TFOOT), TAG(THEAD)})) {
3593
+ pop_template_insertion_mode(parser);
3594
+ push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
3595
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
3596
+ state->_reprocess_current_token = true;
3597
+ return true;
3598
+ } else if (tag_is(token, kStartTag, GUMBO_TAG_COL)) {
3599
+ pop_template_insertion_mode(parser);
3600
+ push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_COLUMN_GROUP);
3601
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_COLUMN_GROUP);
3602
+ state->_reprocess_current_token = true;
3603
+ return true;
3604
+ } else if (tag_is(token, kStartTag, GUMBO_TAG_TR)) {
3605
+ pop_template_insertion_mode(parser);
3606
+ push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
3607
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
3608
+ state->_reprocess_current_token = true;
3609
+ return true;
3610
+ } else if (tag_in(token, kStartTag, (gumbo_tagset){TAG(TD), TAG(TH)})) {
3611
+ pop_template_insertion_mode(parser);
3612
+ push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
3613
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
3614
+ state->_reprocess_current_token = true;
3615
+ return true;
3616
+ } else if (token->type == GUMBO_TOKEN_START_TAG) {
3617
+ pop_template_insertion_mode(parser);
3618
+ push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_BODY);
3619
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_BODY);
3620
+ state->_reprocess_current_token = true;
3621
+ return true;
3622
+ } else if (token->type == GUMBO_TOKEN_END_TAG) {
3623
+ parser_add_parse_error(parser, token);
3624
+ ignore_token(parser);
3625
+ return false;
3626
+ } else if (token->type == GUMBO_TOKEN_EOF) {
3627
+ if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
3628
+ // Stop parsing.
3629
+ return true;
3630
+ }
3631
+ parser_add_parse_error(parser, token);
3632
+ while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_TEMPLATE))
3633
+ ;
3634
+ clear_active_formatting_elements(parser);
3635
+ pop_template_insertion_mode(parser);
3636
+ reset_insertion_mode_appropriately(parser);
3637
+ state->_reprocess_current_token = true;
3638
+ return false;
3639
+ } else {
3640
+ assert(0);
3641
+ return false;
3642
+ }
3439
3643
  }
3440
3644
 
3441
3645
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-afterbody
@@ -3453,10 +3657,15 @@ static bool handle_after_body(GumboParser* parser, GumboToken* token) {
3453
3657
  ignore_token(parser);
3454
3658
  return false;
3455
3659
  } else if (tag_is(token, kEndTag, GUMBO_TAG_HTML)) {
3456
- // TODO(jdtang): Handle fragment parsing algorithm case.
3660
+ /* fragment case: ignore the closing HTML token */
3661
+ if (is_fragment_parser(parser)) {
3662
+ parser_add_parse_error(parser, token);
3663
+ ignore_token(parser);
3664
+ return false;
3665
+ }
3457
3666
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_AFTER_BODY);
3458
3667
  GumboNode* html = parser->_parser_state->_open_elements.data[0];
3459
- assert(node_tag_is(html, GUMBO_TAG_HTML));
3668
+ assert(node_html_tag_is(html, GUMBO_TAG_HTML));
3460
3669
  record_end_of_element(
3461
3670
  parser->_parser_state->_current_token, &html->v.element);
3462
3671
  return true;
@@ -3488,15 +3697,14 @@ static bool handle_in_frameset(GumboParser* parser, GumboToken* token) {
3488
3697
  insert_element_from_token(parser, token);
3489
3698
  return true;
3490
3699
  } else if (tag_is(token, kEndTag, GUMBO_TAG_FRAMESET)) {
3491
- if (node_tag_is(get_current_node(parser), GUMBO_TAG_HTML)) {
3700
+ if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_HTML)) {
3492
3701
  parser_add_parse_error(parser, token);
3493
3702
  ignore_token(parser);
3494
3703
  return false;
3495
3704
  }
3496
3705
  pop_current_node(parser);
3497
- // TODO(jdtang): Add a condition to ignore this for the fragment parsing
3498
- // algorithm.
3499
- if (!node_tag_is(get_current_node(parser), GUMBO_TAG_FRAMESET)) {
3706
+ if (!is_fragment_parser(parser) &&
3707
+ !node_html_tag_is(get_current_node(parser), GUMBO_TAG_FRAMESET)) {
3500
3708
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_FRAMESET);
3501
3709
  }
3502
3710
  return true;
@@ -3508,7 +3716,7 @@ static bool handle_in_frameset(GumboParser* parser, GumboToken* token) {
3508
3716
  } else if (tag_is(token, kStartTag, GUMBO_TAG_NOFRAMES)) {
3509
3717
  return handle_in_head(parser, token);
3510
3718
  } else if (token->type == GUMBO_TOKEN_EOF) {
3511
- if (!node_tag_is(get_current_node(parser), GUMBO_TAG_HTML)) {
3719
+ if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_HTML)) {
3512
3720
  parser_add_parse_error(parser, token);
3513
3721
  return false;
3514
3722
  }
@@ -3536,7 +3744,7 @@ static bool handle_after_frameset(GumboParser* parser, GumboToken* token) {
3536
3744
  return handle_in_body(parser, token);
3537
3745
  } else if (tag_is(token, kEndTag, GUMBO_TAG_HTML)) {
3538
3746
  GumboNode* html = parser->_parser_state->_open_elements.data[0];
3539
- assert(node_tag_is(html, GUMBO_TAG_HTML));
3747
+ assert(node_html_tag_is(html, GUMBO_TAG_HTML));
3540
3748
  record_end_of_element(
3541
3749
  parser->_parser_state->_current_token, &html->v.element);
3542
3750
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_AFTER_FRAMESET);
@@ -3595,31 +3803,14 @@ static bool handle_after_after_frameset(
3595
3803
  // Function pointers for each insertion mode. Keep in sync with
3596
3804
  // insertion_mode.h.
3597
3805
  typedef bool (*TokenHandler)(GumboParser* parser, GumboToken* token);
3598
- static const TokenHandler kTokenHandlers[] = {
3599
- handle_initial,
3600
- handle_before_html,
3601
- handle_before_head,
3602
- handle_in_head,
3603
- handle_in_head_noscript,
3604
- handle_after_head,
3605
- handle_in_body,
3606
- handle_text,
3607
- handle_in_table,
3608
- handle_in_table_text,
3609
- handle_in_caption,
3610
- handle_in_column_group,
3611
- handle_in_table_body,
3612
- handle_in_row,
3613
- handle_in_cell,
3614
- handle_in_select,
3615
- handle_in_select_in_table,
3616
- handle_in_template,
3617
- handle_after_body,
3618
- handle_in_frameset,
3619
- handle_after_frameset,
3620
- handle_after_after_body,
3621
- handle_after_after_frameset
3622
- };
3806
+ static const TokenHandler kTokenHandlers[] = {handle_initial,
3807
+ handle_before_html, handle_before_head, handle_in_head,
3808
+ handle_in_head_noscript, handle_after_head, handle_in_body, handle_text,
3809
+ handle_in_table, handle_in_table_text, handle_in_caption,
3810
+ handle_in_column_group, handle_in_table_body, handle_in_row, handle_in_cell,
3811
+ handle_in_select, handle_in_select_in_table, handle_in_template,
3812
+ handle_after_body, handle_in_frameset, handle_after_frameset,
3813
+ handle_after_after_body, handle_after_after_frameset};
3623
3814
 
3624
3815
  static bool handle_html_content(GumboParser* parser, GumboToken* token) {
3625
3816
  return kTokenHandlers[(unsigned int) parser->_parser_state->_insertion_mode](
@@ -3628,16 +3819,17 @@ static bool handle_html_content(GumboParser* parser, GumboToken* token) {
3628
3819
 
3629
3820
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-inforeign
3630
3821
  static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
3822
+ gumbo_debug("Handling foreign content");
3631
3823
  switch (token->type) {
3632
3824
  case GUMBO_TOKEN_NULL:
3633
3825
  parser_add_parse_error(parser, token);
3634
- token->type = GUMBO_TOKEN_CHARACTER;
3635
3826
  token->v.character = kUtf8ReplacementChar;
3636
3827
  insert_text_token(parser, token);
3637
3828
  return false;
3638
3829
  case GUMBO_TOKEN_WHITESPACE:
3639
3830
  insert_text_token(parser, token);
3640
3831
  return true;
3832
+ case GUMBO_TOKEN_CDATA:
3641
3833
  case GUMBO_TOKEN_CHARACTER:
3642
3834
  insert_text_token(parser, token);
3643
3835
  set_frameset_not_ok(parser);
@@ -3654,35 +3846,44 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
3654
3846
  break;
3655
3847
  }
3656
3848
  // Order matters for these clauses.
3657
- if (tag_in(token, kStartTag, GUMBO_TAG_B, GUMBO_TAG_BIG,
3658
- GUMBO_TAG_BLOCKQUOTE, GUMBO_TAG_BODY, GUMBO_TAG_BR,
3659
- GUMBO_TAG_CENTER, GUMBO_TAG_CODE, GUMBO_TAG_DD, GUMBO_TAG_DIV,
3660
- GUMBO_TAG_DL, GUMBO_TAG_DT, GUMBO_TAG_EM, GUMBO_TAG_EMBED,
3661
- GUMBO_TAG_H1, GUMBO_TAG_H2, GUMBO_TAG_H3, GUMBO_TAG_H4,
3662
- GUMBO_TAG_H5, GUMBO_TAG_H6, GUMBO_TAG_HEAD, GUMBO_TAG_HR,
3663
- GUMBO_TAG_I, GUMBO_TAG_IMG, GUMBO_TAG_LI, GUMBO_TAG_LISTING,
3664
- GUMBO_TAG_MENU, GUMBO_TAG_META, GUMBO_TAG_NOBR, GUMBO_TAG_OL,
3665
- GUMBO_TAG_P, GUMBO_TAG_PRE, GUMBO_TAG_RUBY, GUMBO_TAG_S,
3666
- GUMBO_TAG_SMALL, GUMBO_TAG_SPAN, GUMBO_TAG_STRONG,
3667
- GUMBO_TAG_STRIKE, GUMBO_TAG_SUB, GUMBO_TAG_SUP,
3668
- GUMBO_TAG_TABLE, GUMBO_TAG_TT, GUMBO_TAG_U, GUMBO_TAG_UL,
3669
- GUMBO_TAG_VAR, GUMBO_TAG_LAST) ||
3670
- (tag_is(token, kStartTag, GUMBO_TAG_FONT) && (
3671
- token_has_attribute(token, "color") ||
3672
- token_has_attribute(token, "face") ||
3673
- token_has_attribute(token, "size")))) {
3849
+ if (tag_in(token, kStartTag,
3850
+ (gumbo_tagset){TAG(B), TAG(BIG), TAG(BLOCKQUOTE), TAG(BODY), TAG(BR),
3851
+ TAG(CENTER), TAG(CODE), TAG(DD), TAG(DIV), TAG(DL), TAG(DT),
3852
+ TAG(EM), TAG(EMBED), TAG(H1), TAG(H2), TAG(H3), TAG(H4), TAG(H5),
3853
+ TAG(H6), TAG(HEAD), TAG(HR), TAG(I), TAG(IMG), TAG(LI),
3854
+ TAG(LISTING), TAG(MENU), TAG(META), TAG(NOBR), TAG(OL), TAG(P),
3855
+ TAG(PRE), TAG(RUBY), TAG(S), TAG(SMALL), TAG(SPAN), TAG(STRONG),
3856
+ TAG(STRIKE), TAG(SUB), TAG(SUP), TAG(TABLE), TAG(TT), TAG(U),
3857
+ TAG(UL), TAG(VAR)}) ||
3858
+ (tag_is(token, kStartTag, GUMBO_TAG_FONT) &&
3859
+ (token_has_attribute(token, "color") ||
3860
+ token_has_attribute(token, "face") ||
3861
+ token_has_attribute(token, "size")))) {
3862
+ /* Parse error */
3674
3863
  parser_add_parse_error(parser, token);
3675
- do {
3676
- pop_current_node(parser);
3677
- } while(!(is_mathml_integration_point(get_current_node(parser)) ||
3678
- is_html_integration_point(get_current_node(parser)) ||
3679
- get_current_node(parser)->v.element.tag_namespace ==
3680
- GUMBO_NAMESPACE_HTML));
3681
- parser->_parser_state->_reprocess_current_token = true;
3682
- return false;
3683
- } else if (token->type == GUMBO_TOKEN_START_TAG) {
3864
+
3865
+ /*
3866
+ * Fragment case: If the parser was originally created for the HTML
3867
+ * fragment parsing algorithm, then act as described in the "any other
3868
+ * start tag" entry below.
3869
+ */
3870
+ if (!is_fragment_parser(parser)) {
3871
+ do {
3872
+ pop_current_node(parser);
3873
+ } while (!(is_mathml_integration_point(get_current_node(parser)) ||
3874
+ is_html_integration_point(get_current_node(parser)) ||
3875
+ get_current_node(parser)->v.element.tag_namespace ==
3876
+ GUMBO_NAMESPACE_HTML));
3877
+ parser->_parser_state->_reprocess_current_token = true;
3878
+ return false;
3879
+ }
3880
+
3881
+ assert(token->type == GUMBO_TOKEN_START_TAG);
3882
+ }
3883
+
3884
+ if (token->type == GUMBO_TOKEN_START_TAG) {
3684
3885
  const GumboNamespaceEnum current_namespace =
3685
- get_current_node(parser)->v.element.tag_namespace;
3886
+ get_adjusted_current_node(parser)->v.element.tag_namespace;
3686
3887
  if (current_namespace == GUMBO_NAMESPACE_MATHML) {
3687
3888
  adjust_mathml_attributes(parser, token);
3688
3889
  }
@@ -3698,8 +3899,8 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
3698
3899
  acknowledge_self_closing_tag(parser);
3699
3900
  }
3700
3901
  return true;
3701
- // </script> tags are handled like any other end tag, putting the script's
3702
- // text into a text node child and closing the current node.
3902
+ // </script> tags are handled like any other end tag, putting the script's
3903
+ // text into a text node child and closing the current node.
3703
3904
  } else {
3704
3905
  assert(token->type == GUMBO_TOKEN_END_TAG);
3705
3906
  GumboNode* node = get_current_node(parser);
@@ -3715,13 +3916,13 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
3715
3916
  is_success = false;
3716
3917
  }
3717
3918
  int i = parser->_parser_state->_open_elements.length;
3718
- for( --i; i > 0; ) {
3919
+ for (--i; i > 0;) {
3719
3920
  // Here we move up the stack until we find an HTML element (in which
3720
3921
  // case we do nothing) or we find the element that we're about to
3721
3922
  // close (in which case we pop everything we've seen until that
3722
3923
  // point.)
3723
3924
  gumbo_debug("Foreign %.*s node at %d.\n", node_tagname.length,
3724
- node_tagname.data, i);
3925
+ node_tagname.data, i);
3725
3926
  if (gumbo_string_equals_ignore_case(&node_tagname, &token_tagname)) {
3726
3927
  gumbo_debug("Matches.\n");
3727
3928
  while (pop_current_node(parser) != node) {
@@ -3749,7 +3950,6 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
3749
3950
  }
3750
3951
  }
3751
3952
 
3752
-
3753
3953
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#tree-construction
3754
3954
  static bool handle_token(GumboParser* parser, GumboToken* token) {
3755
3955
  if (parser->_parser_state->_ignore_next_linefeed &&
@@ -3771,29 +3971,31 @@ static bool handle_token(GumboParser* parser, GumboToken* token) {
3771
3971
  parser->_parser_state->_closed_html_tag = true;
3772
3972
  }
3773
3973
 
3774
- const GumboNode* current_node = get_current_node(parser);
3775
- assert(!current_node || current_node->type == GUMBO_NODE_ELEMENT);
3974
+ const GumboNode* current_node = get_adjusted_current_node(parser);
3975
+ assert(!current_node || current_node->type == GUMBO_NODE_ELEMENT ||
3976
+ current_node->type == GUMBO_NODE_TEMPLATE);
3776
3977
  if (current_node) {
3777
3978
  gumbo_debug("Current node: <%s>.\n",
3778
- gumbo_normalized_tagname(current_node->v.element.tag));
3979
+ gumbo_normalized_tagname(current_node->v.element.tag));
3779
3980
  }
3780
3981
  if (!current_node ||
3781
3982
  current_node->v.element.tag_namespace == GUMBO_NAMESPACE_HTML ||
3782
3983
  (is_mathml_integration_point(current_node) &&
3783
- (token->type == GUMBO_TOKEN_CHARACTER ||
3784
- token->type == GUMBO_TOKEN_WHITESPACE ||
3785
- token->type == GUMBO_TOKEN_NULL ||
3786
- (token->type == GUMBO_TOKEN_START_TAG &&
3787
- !tag_in(token, kStartTag, GUMBO_TAG_MGLYPH, GUMBO_TAG_MALIGNMARK,
3788
- GUMBO_TAG_LAST)))) ||
3984
+ (token->type == GUMBO_TOKEN_CHARACTER ||
3985
+ token->type == GUMBO_TOKEN_WHITESPACE ||
3986
+ token->type == GUMBO_TOKEN_NULL ||
3987
+ (token->type == GUMBO_TOKEN_START_TAG &&
3988
+ !tag_in(token, kStartTag,
3989
+ (gumbo_tagset){TAG(MGLYPH), TAG(MALIGNMARK)})))) ||
3789
3990
  (current_node->v.element.tag_namespace == GUMBO_NAMESPACE_MATHML &&
3790
- node_tag_is(current_node, GUMBO_TAG_ANNOTATION_XML) &&
3791
- tag_is(token, kStartTag, GUMBO_TAG_SVG)) ||
3792
- (is_html_integration_point(current_node) && (
3793
- token->type == GUMBO_TOKEN_START_TAG ||
3794
- token->type == GUMBO_TOKEN_CHARACTER ||
3795
- token->type == GUMBO_TOKEN_NULL ||
3796
- token->type == GUMBO_TOKEN_WHITESPACE)) ||
3991
+ node_qualified_tag_is(
3992
+ current_node, GUMBO_NAMESPACE_MATHML, GUMBO_TAG_ANNOTATION_XML) &&
3993
+ tag_is(token, kStartTag, GUMBO_TAG_SVG)) ||
3994
+ (is_html_integration_point(current_node) &&
3995
+ (token->type == GUMBO_TOKEN_START_TAG ||
3996
+ token->type == GUMBO_TOKEN_CHARACTER ||
3997
+ token->type == GUMBO_TOKEN_NULL ||
3998
+ token->type == GUMBO_TOKEN_WHITESPACE)) ||
3797
3999
  token->type == GUMBO_TOKEN_EOF) {
3798
4000
  return handle_html_content(parser, token);
3799
4001
  } else {
@@ -3801,6 +4003,66 @@ static bool handle_token(GumboParser* parser, GumboToken* token) {
3801
4003
  }
3802
4004
  }
3803
4005
 
4006
+ static void fragment_parser_init(GumboParser* parser, GumboTag fragment_ctx,
4007
+ GumboNamespaceEnum fragment_namespace) {
4008
+ GumboNode* root;
4009
+ assert(fragment_ctx != GUMBO_TAG_LAST);
4010
+
4011
+ // 3
4012
+ parser->_parser_state->_fragment_ctx = create_element(parser, fragment_ctx);
4013
+ parser->_parser_state->_fragment_ctx->v.element.tag_namespace =
4014
+ fragment_namespace;
4015
+
4016
+ // 4
4017
+ if (fragment_namespace == GUMBO_NAMESPACE_HTML) {
4018
+ // Non-HTML namespaces always start in the DATA state.
4019
+ switch (fragment_ctx) {
4020
+ case GUMBO_TAG_TITLE:
4021
+ case GUMBO_TAG_TEXTAREA:
4022
+ gumbo_tokenizer_set_state(parser, GUMBO_LEX_RCDATA);
4023
+ break;
4024
+
4025
+ case GUMBO_TAG_STYLE:
4026
+ case GUMBO_TAG_XMP:
4027
+ case GUMBO_TAG_IFRAME:
4028
+ case GUMBO_TAG_NOEMBED:
4029
+ case GUMBO_TAG_NOFRAMES:
4030
+ gumbo_tokenizer_set_state(parser, GUMBO_LEX_RAWTEXT);
4031
+ break;
4032
+
4033
+ case GUMBO_TAG_SCRIPT:
4034
+ gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT);
4035
+ break;
4036
+
4037
+ case GUMBO_TAG_NOSCRIPT:
4038
+ /* scripting is disabled in Gumbo, so leave the tokenizer
4039
+ * in the default data state */
4040
+ break;
4041
+
4042
+ case GUMBO_TAG_PLAINTEXT:
4043
+ gumbo_tokenizer_set_state(parser, GUMBO_LEX_PLAINTEXT);
4044
+ break;
4045
+
4046
+ default:
4047
+ /* default data state */
4048
+ break;
4049
+ }
4050
+ }
4051
+
4052
+ // 5. 6. 7.
4053
+ root = insert_element_of_tag_type(
4054
+ parser, GUMBO_TAG_HTML, GUMBO_INSERTION_IMPLIED);
4055
+ parser->_output->root = root;
4056
+
4057
+ // 8.
4058
+ if (fragment_ctx == GUMBO_TAG_TEMPLATE) {
4059
+ push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TEMPLATE);
4060
+ }
4061
+
4062
+ // 10.
4063
+ reset_insertion_mode_appropriately(parser);
4064
+ }
4065
+
3804
4066
  GumboOutput* gumbo_parse(const char* buffer) {
3805
4067
  return gumbo_parse_with_options(
3806
4068
  &kGumboDefaultOptions, buffer, strlen(buffer));
@@ -3814,6 +4076,11 @@ GumboOutput* gumbo_parse_with_options(
3814
4076
  gumbo_tokenizer_state_init(&parser, buffer, length);
3815
4077
  parser_state_init(&parser);
3816
4078
 
4079
+ if (options->fragment_context != GUMBO_TAG_LAST) {
4080
+ fragment_parser_init(
4081
+ &parser, options->fragment_context, options->fragment_namespace);
4082
+ }
4083
+
3817
4084
  GumboParserState* state = parser._parser_state;
3818
4085
  gumbo_debug("Parsing %.*s.\n", length, buffer);
3819
4086
 
@@ -3823,14 +4090,15 @@ GumboOutput* gumbo_parse_with_options(
3823
4090
 
3824
4091
  GumboToken token;
3825
4092
  bool has_error = false;
4093
+
3826
4094
  do {
3827
4095
  if (state->_reprocess_current_token) {
3828
4096
  state->_reprocess_current_token = false;
3829
4097
  } else {
3830
4098
  GumboNode* current_node = get_current_node(&parser);
3831
- gumbo_tokenizer_set_is_current_node_foreign(
3832
- &parser, current_node &&
3833
- current_node->v.element.tag_namespace != GUMBO_NAMESPACE_HTML);
4099
+ gumbo_tokenizer_set_is_current_node_foreign(&parser,
4100
+ current_node &&
4101
+ current_node->v.element.tag_namespace != GUMBO_NAMESPACE_HTML);
3834
4102
  has_error = !gumbo_lex(&parser, &token) || has_error;
3835
4103
  }
3836
4104
  const char* token_type = "text";
@@ -3850,14 +4118,13 @@ GumboOutput* gumbo_parse_with_options(
3850
4118
  default:
3851
4119
  break;
3852
4120
  }
3853
- gumbo_debug("Handling %s token @%d:%d in state %d.\n",
3854
- (char*) token_type, token.position.line, token.position.column,
3855
- state->_insertion_mode);
4121
+ gumbo_debug("Handling %s token @%d:%d in state %d.\n", (char*) token_type,
4122
+ token.position.line, token.position.column, state->_insertion_mode);
3856
4123
 
3857
4124
  state->_current_token = &token;
3858
4125
  state->_self_closing_flag_acknowledged =
3859
4126
  !(token.type == GUMBO_TOKEN_START_TAG &&
3860
- token.v.start_tag.is_self_closing);
4127
+ token.v.start_tag.is_self_closing);
3861
4128
 
3862
4129
  has_error = !handle_token(&parser, &token) || has_error;
3863
4130
 
@@ -3913,7 +4180,7 @@ void gumbo_destroy_output(const GumboOptions* options, GumboOutput* output) {
3913
4180
  GumboParser parser;
3914
4181
  parser._options = options;
3915
4182
  destroy_node(&parser, output->document);
3916
- for (int i = 0; i < output->errors.length; ++i) {
4183
+ for (unsigned int i = 0; i < output->errors.length; ++i) {
3917
4184
  gumbo_error_destroy(&parser, output->errors.data[i]);
3918
4185
  }
3919
4186
  gumbo_vector_destroy(&parser, &output->errors);