nokogumbo 1.4.2 → 1.4.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -201,24 +201,22 @@ void gumbo_error_destroy(struct GumboInternalParser* parser, GumboError* error);
201
201
  // responsible for deleting the buffer. (Note that the buffer is allocated with
202
202
  // the allocator specified in the GumboParser config and hence should be freed
203
203
  // by gumbo_parser_deallocate().)
204
- void gumbo_error_to_string(
205
- struct GumboInternalParser* parser, const GumboError* error,
206
- GumboStringBuffer* output);
204
+ void gumbo_error_to_string(struct GumboInternalParser* parser,
205
+ const GumboError* error, GumboStringBuffer* output);
207
206
 
208
207
  // Prints a caret diagnostic to a string. This fills an empty GumboStringBuffer
209
208
  // with a freshly-allocated buffer containing the error message text. The
210
209
  // caller is responsible for deleting the buffer. (Note that the buffer is
211
210
  // allocated with the allocator specified in the GumboParser config and hence
212
211
  // should be freed by gumbo_parser_deallocate().)
213
- void gumbo_caret_diagnostic_to_string(
214
- struct GumboInternalParser* parser, const GumboError* error,
215
- const char* source_text, GumboStringBuffer* output);
212
+ void gumbo_caret_diagnostic_to_string(struct GumboInternalParser* parser,
213
+ const GumboError* error, const char* source_text,
214
+ GumboStringBuffer* output);
216
215
 
217
216
  // Like gumbo_caret_diagnostic_to_string, but prints the text to stdout instead
218
217
  // of writing to a string.
219
- void gumbo_print_caret_diagnostic(
220
- struct GumboInternalParser* parser, const GumboError* error,
221
- const char* source_text);
218
+ void gumbo_print_caret_diagnostic(struct GumboInternalParser* parser,
219
+ const GumboError* error, const char* source_text);
222
220
 
223
221
  #ifdef __cplusplus
224
222
  }
@@ -76,7 +76,6 @@ typedef struct {
76
76
  */
77
77
  extern const GumboSourcePosition kGumboEmptySourcePosition;
78
78
 
79
-
80
79
  /**
81
80
  * A struct representing a string or part of a string. Strings within the
82
81
  * parser are represented by a char* and a length; the char* points into
@@ -111,7 +110,6 @@ bool gumbo_string_equals(
111
110
  bool gumbo_string_equals_ignore_case(
112
111
  const GumboStringPiece* str1, const GumboStringPiece* str2);
113
112
 
114
-
115
113
  /**
116
114
  * A simple vector implementation. This stores a pointer to a data array and a
117
115
  * length. All elements are stored as void*; client code must cast to the
@@ -141,8 +139,7 @@ extern const GumboVector kGumboEmptyVector;
141
139
  * Returns the first index at which an element appears in this vector (testing
142
140
  * by pointer equality), or -1 if it never does.
143
141
  */
144
- int gumbo_vector_index_of(GumboVector* vector, void* element);
145
-
142
+ int gumbo_vector_index_of(GumboVector* vector, const void* element);
146
143
 
147
144
  /**
148
145
  * An enum for all the tags defined in the HTML5 standard. These correspond to
@@ -157,172 +154,10 @@ int gumbo_vector_index_of(GumboVector* vector, void* element);
157
154
  * strings.
158
155
  */
159
156
  typedef enum {
160
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/semantics.html#the-root-element
161
- GUMBO_TAG_HTML,
162
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/semantics.html#document-metadata
163
- GUMBO_TAG_HEAD,
164
- GUMBO_TAG_TITLE,
165
- GUMBO_TAG_BASE,
166
- GUMBO_TAG_LINK,
167
- GUMBO_TAG_META,
168
- GUMBO_TAG_STYLE,
169
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/scripting-1.html#scripting-1
170
- GUMBO_TAG_SCRIPT,
171
- GUMBO_TAG_NOSCRIPT,
172
- GUMBO_TAG_TEMPLATE,
173
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/sections.html#sections
174
- GUMBO_TAG_BODY,
175
- GUMBO_TAG_ARTICLE,
176
- GUMBO_TAG_SECTION,
177
- GUMBO_TAG_NAV,
178
- GUMBO_TAG_ASIDE,
179
- GUMBO_TAG_H1,
180
- GUMBO_TAG_H2,
181
- GUMBO_TAG_H3,
182
- GUMBO_TAG_H4,
183
- GUMBO_TAG_H5,
184
- GUMBO_TAG_H6,
185
- GUMBO_TAG_HGROUP,
186
- GUMBO_TAG_HEADER,
187
- GUMBO_TAG_FOOTER,
188
- GUMBO_TAG_ADDRESS,
189
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/grouping-content.html#grouping-content
190
- GUMBO_TAG_P,
191
- GUMBO_TAG_HR,
192
- GUMBO_TAG_PRE,
193
- GUMBO_TAG_BLOCKQUOTE,
194
- GUMBO_TAG_OL,
195
- GUMBO_TAG_UL,
196
- GUMBO_TAG_LI,
197
- GUMBO_TAG_DL,
198
- GUMBO_TAG_DT,
199
- GUMBO_TAG_DD,
200
- GUMBO_TAG_FIGURE,
201
- GUMBO_TAG_FIGCAPTION,
202
- GUMBO_TAG_MAIN,
203
- GUMBO_TAG_DIV,
204
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/text-level-semantics.html#text-level-semantics
205
- GUMBO_TAG_A,
206
- GUMBO_TAG_EM,
207
- GUMBO_TAG_STRONG,
208
- GUMBO_TAG_SMALL,
209
- GUMBO_TAG_S,
210
- GUMBO_TAG_CITE,
211
- GUMBO_TAG_Q,
212
- GUMBO_TAG_DFN,
213
- GUMBO_TAG_ABBR,
214
- GUMBO_TAG_DATA,
215
- GUMBO_TAG_TIME,
216
- GUMBO_TAG_CODE,
217
- GUMBO_TAG_VAR,
218
- GUMBO_TAG_SAMP,
219
- GUMBO_TAG_KBD,
220
- GUMBO_TAG_SUB,
221
- GUMBO_TAG_SUP,
222
- GUMBO_TAG_I,
223
- GUMBO_TAG_B,
224
- GUMBO_TAG_U,
225
- GUMBO_TAG_MARK,
226
- GUMBO_TAG_RUBY,
227
- GUMBO_TAG_RT,
228
- GUMBO_TAG_RP,
229
- GUMBO_TAG_BDI,
230
- GUMBO_TAG_BDO,
231
- GUMBO_TAG_SPAN,
232
- GUMBO_TAG_BR,
233
- GUMBO_TAG_WBR,
234
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/edits.html#edits
235
- GUMBO_TAG_INS,
236
- GUMBO_TAG_DEL,
237
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/embedded-content-1.html#embedded-content-1
238
- GUMBO_TAG_IMAGE,
239
- GUMBO_TAG_IMG,
240
- GUMBO_TAG_IFRAME,
241
- GUMBO_TAG_EMBED,
242
- GUMBO_TAG_OBJECT,
243
- GUMBO_TAG_PARAM,
244
- GUMBO_TAG_VIDEO,
245
- GUMBO_TAG_AUDIO,
246
- GUMBO_TAG_SOURCE,
247
- GUMBO_TAG_TRACK,
248
- GUMBO_TAG_CANVAS,
249
- GUMBO_TAG_MAP,
250
- GUMBO_TAG_AREA,
251
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-map-element.html#mathml
252
- GUMBO_TAG_MATH,
253
- GUMBO_TAG_MI,
254
- GUMBO_TAG_MO,
255
- GUMBO_TAG_MN,
256
- GUMBO_TAG_MS,
257
- GUMBO_TAG_MTEXT,
258
- GUMBO_TAG_MGLYPH,
259
- GUMBO_TAG_MALIGNMARK,
260
- GUMBO_TAG_ANNOTATION_XML,
261
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-map-element.html#svg-0
262
- GUMBO_TAG_SVG,
263
- GUMBO_TAG_FOREIGNOBJECT,
264
- GUMBO_TAG_DESC,
265
- // SVG title tags will have GUMBO_TAG_TITLE as with HTML.
266
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/tabular-data.html#tabular-data
267
- GUMBO_TAG_TABLE,
268
- GUMBO_TAG_CAPTION,
269
- GUMBO_TAG_COLGROUP,
270
- GUMBO_TAG_COL,
271
- GUMBO_TAG_TBODY,
272
- GUMBO_TAG_THEAD,
273
- GUMBO_TAG_TFOOT,
274
- GUMBO_TAG_TR,
275
- GUMBO_TAG_TD,
276
- GUMBO_TAG_TH,
277
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/forms.html#forms
278
- GUMBO_TAG_FORM,
279
- GUMBO_TAG_FIELDSET,
280
- GUMBO_TAG_LEGEND,
281
- GUMBO_TAG_LABEL,
282
- GUMBO_TAG_INPUT,
283
- GUMBO_TAG_BUTTON,
284
- GUMBO_TAG_SELECT,
285
- GUMBO_TAG_DATALIST,
286
- GUMBO_TAG_OPTGROUP,
287
- GUMBO_TAG_OPTION,
288
- GUMBO_TAG_TEXTAREA,
289
- GUMBO_TAG_KEYGEN,
290
- GUMBO_TAG_OUTPUT,
291
- GUMBO_TAG_PROGRESS,
292
- GUMBO_TAG_METER,
293
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/interactive-elements.html#interactive-elements
294
- GUMBO_TAG_DETAILS,
295
- GUMBO_TAG_SUMMARY,
296
- GUMBO_TAG_MENU,
297
- GUMBO_TAG_MENUITEM,
298
- // Non-conforming elements that nonetheless appear in the HTML5 spec.
299
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/obsolete.html#non-conforming-features
300
- GUMBO_TAG_APPLET,
301
- GUMBO_TAG_ACRONYM,
302
- GUMBO_TAG_BGSOUND,
303
- GUMBO_TAG_DIR,
304
- GUMBO_TAG_FRAME,
305
- GUMBO_TAG_FRAMESET,
306
- GUMBO_TAG_NOFRAMES,
307
- GUMBO_TAG_ISINDEX,
308
- GUMBO_TAG_LISTING,
309
- GUMBO_TAG_XMP,
310
- GUMBO_TAG_NEXTID,
311
- GUMBO_TAG_NOEMBED,
312
- GUMBO_TAG_PLAINTEXT,
313
- GUMBO_TAG_RB,
314
- GUMBO_TAG_STRIKE,
315
- GUMBO_TAG_BASEFONT,
316
- GUMBO_TAG_BIG,
317
- GUMBO_TAG_BLINK,
318
- GUMBO_TAG_CENTER,
319
- GUMBO_TAG_FONT,
320
- GUMBO_TAG_MARQUEE,
321
- GUMBO_TAG_MULTICOL,
322
- GUMBO_TAG_NOBR,
323
- GUMBO_TAG_SPACER,
324
- GUMBO_TAG_TT,
325
- // Used for all tags that don't have special handling in HTML.
157
+ // Load all the tags from an external source, generated from tag.in.
158
+ #include "tag_enum.h"
159
+ // Used for all tags that don't have special handling in HTML. Add new tags
160
+ // to the end of tag.in so as to preserve backwards-compatibility.
326
161
  GUMBO_TAG_UNKNOWN,
327
162
  // A marker value to indicate the end of the enum, for iterating over it.
328
163
  // Also used as the terminator for varargs functions that take tags.
@@ -364,9 +199,10 @@ const char* gumbo_normalize_svg_tagname(const GumboStringPiece* tagname);
364
199
 
365
200
  /**
366
201
  * Converts a tag name string (which may be in upper or mixed case) to a tag
367
- * enum.
202
+ * enum. The `tag` version expects `tagname` to be NULL-terminated
368
203
  */
369
204
  GumboTag gumbo_tag_enum(const char* tagname);
205
+ GumboTag gumbo_tagn_enum(const char* tagname, unsigned int length);
370
206
 
371
207
  /**
372
208
  * Attribute namespaces.
@@ -461,10 +297,16 @@ typedef enum {
461
297
  GUMBO_NODE_TEXT,
462
298
  /** CDATA node. v will be a GumboText. */
463
299
  GUMBO_NODE_CDATA,
464
- /** Comment node. v. will be a GumboText, excluding comment delimiters. */
300
+ /** Comment node. v will be a GumboText, excluding comment delimiters. */
465
301
  GUMBO_NODE_COMMENT,
466
302
  /** Text node, where all contents is whitespace. v will be a GumboText. */
467
- GUMBO_NODE_WHITESPACE
303
+ GUMBO_NODE_WHITESPACE,
304
+ /** Template node. This is separate from GUMBO_NODE_ELEMENT because many
305
+ * client libraries will want to ignore the contents of template nodes, as
306
+ * the spec suggests. Recursing on GUMBO_NODE_ELEMENT will do the right thing
307
+ * here, while clients that want to include template contents should also
308
+ * check for GUMBO_NODE_TEMPLATE. v will be a GumboElement. */
309
+ GUMBO_NODE_TEMPLATE
468
310
  } GumboNodeType;
469
311
 
470
312
  /**
@@ -473,7 +315,9 @@ typedef enum {
473
315
  */
474
316
  typedef struct GumboInternalNode GumboNode;
475
317
 
476
- /** http://www.whatwg.org/specs/web-apps/current-work/complete/dom.html#quirks-mode */
318
+ /**
319
+ * http://www.whatwg.org/specs/web-apps/current-work/complete/dom.html#quirks-mode
320
+ */
477
321
  typedef enum {
478
322
  GUMBO_DOCTYPE_NO_QUIRKS,
479
323
  GUMBO_DOCTYPE_QUIRKS,
@@ -571,7 +415,6 @@ typedef enum {
571
415
  GUMBO_INSERTION_FOSTER_PARENTED = 1 << 10,
572
416
  } GumboParseFlags;
573
417
 
574
-
575
418
  /**
576
419
  * Information specific to document nodes.
577
420
  */
@@ -690,9 +533,9 @@ struct GumboInternalNode {
690
533
 
691
534
  /** The actual node data. */
692
535
  union {
693
- GumboDocument document; // For GUMBO_NODE_DOCUMENT.
694
- GumboElement element; // For GUMBO_NODE_ELEMENT.
695
- GumboText text; // For everything else.
536
+ GumboDocument document; // For GUMBO_NODE_DOCUMENT.
537
+ GumboElement element; // For GUMBO_NODE_ELEMENT.
538
+ GumboText text; // For everything else.
696
539
  } v;
697
540
  };
698
541
 
@@ -750,6 +593,29 @@ typedef struct GumboInternalOptions {
750
593
  * Default: -1
751
594
  */
752
595
  int max_errors;
596
+
597
+ /**
598
+ * The fragment context for parsing:
599
+ * https://html.spec.whatwg.org/multipage/syntax.html#parsing-html-fragments
600
+ *
601
+ * If GUMBO_TAG_LAST is passed here, it is assumed to be "no fragment", i.e.
602
+ * the regular parsing algorithm. Otherwise, pass the tag enum for the
603
+ * intended parent of the parsed fragment. We use just the tag enum rather
604
+ * than a full node because that's enough to set all the parsing context we
605
+ * need, and it provides some additional flexibility for client code to act as
606
+ * if parsing a fragment even when a full HTML tree isn't available.
607
+ *
608
+ * Default: GUMBO_TAG_LAST
609
+ */
610
+ GumboTag fragment_context;
611
+
612
+ /**
613
+ * The namespace for the fragment context. This lets client code
614
+ * differentiate between, say, parsing a <title> tag in SVG vs. parsing it in
615
+ * HTML.
616
+ * Default: GUMBO_NAMESPACE_HTML
617
+ */
618
+ GumboNamespaceEnum fragment_namespace;
753
619
  } GumboOptions;
754
620
 
755
621
  /** Default options struct; use this with gumbo_parse_with_options. */
@@ -796,9 +662,7 @@ GumboOutput* gumbo_parse_with_options(
796
662
  const GumboOptions* options, const char* buffer, size_t buffer_length);
797
663
 
798
664
  /** Release the memory used for the parse tree & parse errors. */
799
- void gumbo_destroy_output(
800
- const GumboOptions* options, GumboOutput* output);
801
-
665
+ void gumbo_destroy_output(const GumboOptions* options, GumboOutput* output);
802
666
 
803
667
  #ifdef __cplusplus
804
668
  }
@@ -34,8 +34,10 @@
34
34
 
35
35
  #define AVOID_UNUSED_VARIABLE_WARNING(i) (void)(i)
36
36
 
37
- #define GUMBO_STRING(literal) { literal, sizeof(literal) - 1 }
38
- #define TERMINATOR { "", 0 }
37
+ #define GUMBO_STRING(literal) \
38
+ { literal, sizeof(literal) - 1 }
39
+ #define TERMINATOR \
40
+ { "", 0 }
39
41
 
40
42
  typedef char gumbo_tagset[GUMBO_TAG_LAST];
41
43
  #define TAG(tag) [GUMBO_TAG_##tag] = (1 << GUMBO_NAMESPACE_HTML)
@@ -43,46 +45,42 @@ typedef char gumbo_tagset[GUMBO_TAG_LAST];
43
45
  #define TAG_MATHML(tag) [GUMBO_TAG_##tag] = (1 << GUMBO_NAMESPACE_MATHML)
44
46
 
45
47
  #define TAGSET_INCLUDES(tagset, namespace, tag) \
46
- (tag < GUMBO_TAG_LAST && \
47
- tagset[(int)tag] == (1 << (int)namespace))
48
+ (tag < GUMBO_TAG_LAST && tagset[(int) tag] == (1 << (int) namespace))
48
49
 
50
+ // selected forward declarations as it is getting hard to find
51
+ // an appropriate order
52
+ static bool node_html_tag_is(const GumboNode*, GumboTag);
53
+ static GumboInsertionMode get_current_template_insertion_mode(
54
+ const GumboParser*);
55
+ static bool handle_in_template(GumboParser*, GumboToken*);
56
+ static void destroy_node(GumboParser*, GumboNode*);
49
57
 
50
- static void* malloc_wrapper(void* unused, size_t size) {
51
- return malloc(size);
52
- }
58
+ static void* malloc_wrapper(void* unused, size_t size) { return malloc(size); }
53
59
 
54
- static void free_wrapper(void* unused, void* ptr) {
55
- free(ptr);
56
- }
60
+ static void free_wrapper(void* unused, void* ptr) { free(ptr); }
57
61
 
58
- const GumboOptions kGumboDefaultOptions = {
59
- &malloc_wrapper,
60
- &free_wrapper,
61
- NULL,
62
- 8,
63
- false,
64
- -1,
65
- };
62
+ const GumboOptions kGumboDefaultOptions = {&malloc_wrapper, &free_wrapper, NULL,
63
+ 8, false, -1, GUMBO_TAG_LAST, GUMBO_NAMESPACE_HTML};
66
64
 
67
65
  static const GumboStringPiece kDoctypeHtml = GUMBO_STRING("html");
68
- static const GumboStringPiece kPublicIdHtml4_0 = GUMBO_STRING(
69
- "-//W3C//DTD HTML 4.0//EN");
70
- static const GumboStringPiece kPublicIdHtml4_01 = GUMBO_STRING(
71
- "-//W3C//DTD HTML 4.01//EN");
72
- static const GumboStringPiece kPublicIdXhtml1_0 = GUMBO_STRING(
73
- "-//W3C//DTD XHTML 1.0 Strict//EN");
74
- static const GumboStringPiece kPublicIdXhtml1_1 = GUMBO_STRING(
75
- "-//W3C//DTD XHTML 1.1//EN");
76
- static const GumboStringPiece kSystemIdRecHtml4_0 = GUMBO_STRING(
77
- "http://www.w3.org/TR/REC-html40/strict.dtd");
78
- static const GumboStringPiece kSystemIdHtml4 = GUMBO_STRING(
79
- "http://www.w3.org/TR/html4/strict.dtd");
80
- static const GumboStringPiece kSystemIdXhtmlStrict1_1 = GUMBO_STRING(
81
- "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd");
82
- static const GumboStringPiece kSystemIdXhtml1_1 = GUMBO_STRING(
83
- "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd");
84
- static const GumboStringPiece kSystemIdLegacyCompat = GUMBO_STRING(
85
- "about:legacy-compat");
66
+ static const GumboStringPiece kPublicIdHtml4_0 =
67
+ GUMBO_STRING("-//W3C//DTD HTML 4.0//EN");
68
+ static const GumboStringPiece kPublicIdHtml4_01 =
69
+ GUMBO_STRING("-//W3C//DTD HTML 4.01//EN");
70
+ static const GumboStringPiece kPublicIdXhtml1_0 =
71
+ GUMBO_STRING("-//W3C//DTD XHTML 1.0 Strict//EN");
72
+ static const GumboStringPiece kPublicIdXhtml1_1 =
73
+ GUMBO_STRING("-//W3C//DTD XHTML 1.1//EN");
74
+ static const GumboStringPiece kSystemIdRecHtml4_0 =
75
+ GUMBO_STRING("http://www.w3.org/TR/REC-html40/strict.dtd");
76
+ static const GumboStringPiece kSystemIdHtml4 =
77
+ GUMBO_STRING("http://www.w3.org/TR/html4/strict.dtd");
78
+ static const GumboStringPiece kSystemIdXhtmlStrict1_1 =
79
+ GUMBO_STRING("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd");
80
+ static const GumboStringPiece kSystemIdXhtml1_1 =
81
+ GUMBO_STRING("http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd");
82
+ static const GumboStringPiece kSystemIdLegacyCompat =
83
+ GUMBO_STRING("about:legacy-compat");
86
84
 
87
85
  // The doctype arrays have an explicit terminator because we want to pass them
88
86
  // to a helper function, and passing them as a pointer discards sizeof
@@ -90,96 +88,86 @@ static const GumboStringPiece kSystemIdLegacyCompat = GUMBO_STRING(
90
88
  // over them use sizeof directly instead of a terminator.
91
89
 
92
90
  static const GumboStringPiece kQuirksModePublicIdPrefixes[] = {
93
- GUMBO_STRING("+//Silmaril//dtd html Pro v0r11 19970101//"),
94
- GUMBO_STRING("-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//"),
95
- GUMBO_STRING("-//AS//DTD HTML 3.0 asWedit + extensions//"),
96
- GUMBO_STRING("-//IETF//DTD HTML 2.0 Level 1//"),
97
- GUMBO_STRING("-//IETF//DTD HTML 2.0 Level 2//"),
98
- GUMBO_STRING("-//IETF//DTD HTML 2.0 Strict Level 1//"),
99
- GUMBO_STRING("-//IETF//DTD HTML 2.0 Strict Level 2//"),
100
- GUMBO_STRING("-//IETF//DTD HTML 2.0 Strict//"),
101
- GUMBO_STRING("-//IETF//DTD HTML 2.0//"),
102
- GUMBO_STRING("-//IETF//DTD HTML 2.1E//"),
103
- GUMBO_STRING("-//IETF//DTD HTML 3.0//"),
104
- GUMBO_STRING("-//IETF//DTD HTML 3.2 Final//"),
105
- GUMBO_STRING("-//IETF//DTD HTML 3.2//"),
106
- GUMBO_STRING("-//IETF//DTD HTML 3//"),
107
- GUMBO_STRING("-//IETF//DTD HTML Level 0//"),
108
- GUMBO_STRING("-//IETF//DTD HTML Level 1//"),
109
- GUMBO_STRING("-//IETF//DTD HTML Level 2//"),
110
- GUMBO_STRING("-//IETF//DTD HTML Level 3//"),
111
- GUMBO_STRING("-//IETF//DTD HTML Strict Level 0//"),
112
- GUMBO_STRING("-//IETF//DTD HTML Strict Level 1//"),
113
- GUMBO_STRING("-//IETF//DTD HTML Strict Level 2//"),
114
- GUMBO_STRING("-//IETF//DTD HTML Strict Level 3//"),
115
- GUMBO_STRING("-//IETF//DTD HTML Strict//"),
116
- GUMBO_STRING("-//IETF//DTD HTML//"),
117
- GUMBO_STRING("-//Metrius//DTD Metrius Presentational//"),
118
- GUMBO_STRING("-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//"),
119
- GUMBO_STRING("-//Microsoft//DTD Internet Explorer 2.0 HTML//"),
120
- GUMBO_STRING("-//Microsoft//DTD Internet Explorer 2.0 Tables//"),
121
- GUMBO_STRING("-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//"),
122
- GUMBO_STRING("-//Microsoft//DTD Internet Explorer 3.0 HTML//"),
123
- GUMBO_STRING("-//Microsoft//DTD Internet Explorer 3.0 Tables//"),
124
- GUMBO_STRING("-//Netscape Comm. Corp.//DTD HTML//"),
125
- GUMBO_STRING("-//Netscape Comm. Corp.//DTD Strict HTML//"),
126
- GUMBO_STRING("-//O'Reilly and Associates//DTD HTML 2.0//"),
127
- GUMBO_STRING("-//O'Reilly and Associates//DTD HTML Extended 1.0//"),
128
- GUMBO_STRING("-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//"),
129
- GUMBO_STRING("-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::)"
130
- "extensions to HTML 4.0//"),
131
- GUMBO_STRING("-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::"
132
- "extensions to HTML 4.0//"),
133
- GUMBO_STRING("-//Spyglass//DTD HTML 2.0 Extended//"),
134
- GUMBO_STRING("-//SQ//DTD HTML 2.0 HoTMetaL + extensions//"),
135
- GUMBO_STRING("-//Sun Microsystems Corp.//DTD HotJava HTML//"),
136
- GUMBO_STRING("-//Sun Microsystems Corp.//DTD HotJava Strict HTML//"),
137
- GUMBO_STRING("-//W3C//DTD HTML 3 1995-03-24//"),
138
- GUMBO_STRING("-//W3C//DTD HTML 3.2 Draft//"),
139
- GUMBO_STRING("-//W3C//DTD HTML 3.2 Final//"),
140
- GUMBO_STRING("-//W3C//DTD HTML 3.2//"),
141
- GUMBO_STRING("-//W3C//DTD HTML 3.2S Draft//"),
142
- GUMBO_STRING("-//W3C//DTD HTML 4.0 Frameset//"),
143
- GUMBO_STRING("-//W3C//DTD HTML 4.0 Transitional//"),
144
- GUMBO_STRING("-//W3C//DTD HTML Experimental 19960712//"),
145
- GUMBO_STRING("-//W3C//DTD HTML Experimental 970421//"),
146
- GUMBO_STRING("-//W3C//DTD W3 HTML//"),
147
- GUMBO_STRING("-//W3O//DTD W3 HTML 3.0//"),
148
- GUMBO_STRING("-//WebTechs//DTD Mozilla HTML 2.0//"),
149
- GUMBO_STRING("-//WebTechs//DTD Mozilla HTML//"),
150
- TERMINATOR
151
- };
91
+ GUMBO_STRING("+//Silmaril//dtd html Pro v0r11 19970101//"),
92
+ GUMBO_STRING("-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//"),
93
+ GUMBO_STRING("-//AS//DTD HTML 3.0 asWedit + extensions//"),
94
+ GUMBO_STRING("-//IETF//DTD HTML 2.0 Level 1//"),
95
+ GUMBO_STRING("-//IETF//DTD HTML 2.0 Level 2//"),
96
+ GUMBO_STRING("-//IETF//DTD HTML 2.0 Strict Level 1//"),
97
+ GUMBO_STRING("-//IETF//DTD HTML 2.0 Strict Level 2//"),
98
+ GUMBO_STRING("-//IETF//DTD HTML 2.0 Strict//"),
99
+ GUMBO_STRING("-//IETF//DTD HTML 2.0//"),
100
+ GUMBO_STRING("-//IETF//DTD HTML 2.1E//"),
101
+ GUMBO_STRING("-//IETF//DTD HTML 3.0//"),
102
+ GUMBO_STRING("-//IETF//DTD HTML 3.2 Final//"),
103
+ GUMBO_STRING("-//IETF//DTD HTML 3.2//"),
104
+ GUMBO_STRING("-//IETF//DTD HTML 3//"),
105
+ GUMBO_STRING("-//IETF//DTD HTML Level 0//"),
106
+ GUMBO_STRING("-//IETF//DTD HTML Level 1//"),
107
+ GUMBO_STRING("-//IETF//DTD HTML Level 2//"),
108
+ GUMBO_STRING("-//IETF//DTD HTML Level 3//"),
109
+ GUMBO_STRING("-//IETF//DTD HTML Strict Level 0//"),
110
+ GUMBO_STRING("-//IETF//DTD HTML Strict Level 1//"),
111
+ GUMBO_STRING("-//IETF//DTD HTML Strict Level 2//"),
112
+ GUMBO_STRING("-//IETF//DTD HTML Strict Level 3//"),
113
+ GUMBO_STRING("-//IETF//DTD HTML Strict//"),
114
+ GUMBO_STRING("-//IETF//DTD HTML//"),
115
+ GUMBO_STRING("-//Metrius//DTD Metrius Presentational//"),
116
+ GUMBO_STRING("-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//"),
117
+ GUMBO_STRING("-//Microsoft//DTD Internet Explorer 2.0 HTML//"),
118
+ GUMBO_STRING("-//Microsoft//DTD Internet Explorer 2.0 Tables//"),
119
+ GUMBO_STRING("-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//"),
120
+ GUMBO_STRING("-//Microsoft//DTD Internet Explorer 3.0 HTML//"),
121
+ GUMBO_STRING("-//Microsoft//DTD Internet Explorer 3.0 Tables//"),
122
+ GUMBO_STRING("-//Netscape Comm. Corp.//DTD HTML//"),
123
+ GUMBO_STRING("-//Netscape Comm. Corp.//DTD Strict HTML//"),
124
+ GUMBO_STRING("-//O'Reilly and Associates//DTD HTML 2.0//"),
125
+ GUMBO_STRING("-//O'Reilly and Associates//DTD HTML Extended 1.0//"),
126
+ GUMBO_STRING("-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//"),
127
+ GUMBO_STRING(
128
+ "-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::)"
129
+ "extensions to HTML 4.0//"),
130
+ GUMBO_STRING(
131
+ "-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::"
132
+ "extensions to HTML 4.0//"),
133
+ GUMBO_STRING("-//Spyglass//DTD HTML 2.0 Extended//"),
134
+ GUMBO_STRING("-//SQ//DTD HTML 2.0 HoTMetaL + extensions//"),
135
+ GUMBO_STRING("-//Sun Microsystems Corp.//DTD HotJava HTML//"),
136
+ GUMBO_STRING("-//Sun Microsystems Corp.//DTD HotJava Strict HTML//"),
137
+ GUMBO_STRING("-//W3C//DTD HTML 3 1995-03-24//"),
138
+ GUMBO_STRING("-//W3C//DTD HTML 3.2 Draft//"),
139
+ GUMBO_STRING("-//W3C//DTD HTML 3.2 Final//"),
140
+ GUMBO_STRING("-//W3C//DTD HTML 3.2//"),
141
+ GUMBO_STRING("-//W3C//DTD HTML 3.2S Draft//"),
142
+ GUMBO_STRING("-//W3C//DTD HTML 4.0 Frameset//"),
143
+ GUMBO_STRING("-//W3C//DTD HTML 4.0 Transitional//"),
144
+ GUMBO_STRING("-//W3C//DTD HTML Experimental 19960712//"),
145
+ GUMBO_STRING("-//W3C//DTD HTML Experimental 970421//"),
146
+ GUMBO_STRING("-//W3C//DTD W3 HTML//"),
147
+ GUMBO_STRING("-//W3O//DTD W3 HTML 3.0//"),
148
+ GUMBO_STRING("-//WebTechs//DTD Mozilla HTML 2.0//"),
149
+ GUMBO_STRING("-//WebTechs//DTD Mozilla HTML//"), TERMINATOR};
152
150
 
153
151
  static const GumboStringPiece kQuirksModePublicIdExactMatches[] = {
154
- GUMBO_STRING("-//W3O//DTD W3 HTML Strict 3.0//EN//"),
155
- GUMBO_STRING("-/W3C/DTD HTML 4.0 Transitional/EN"),
156
- GUMBO_STRING("HTML"),
157
- TERMINATOR
158
- };
152
+ GUMBO_STRING("-//W3O//DTD W3 HTML Strict 3.0//EN//"),
153
+ GUMBO_STRING("-/W3C/DTD HTML 4.0 Transitional/EN"), GUMBO_STRING("HTML"),
154
+ TERMINATOR};
159
155
 
160
156
  static const GumboStringPiece kQuirksModeSystemIdExactMatches[] = {
161
- GUMBO_STRING("http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"),
162
- TERMINATOR
163
- };
157
+ GUMBO_STRING("http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"),
158
+ TERMINATOR};
164
159
 
165
160
  static const GumboStringPiece kLimitedQuirksPublicIdPrefixes[] = {
166
- GUMBO_STRING("-//W3C//DTD XHTML 1.0 Frameset//"),
167
- GUMBO_STRING("-//W3C//DTD XHTML 1.0 Transitional//"),
168
- TERMINATOR
169
- };
161
+ GUMBO_STRING("-//W3C//DTD XHTML 1.0 Frameset//"),
162
+ GUMBO_STRING("-//W3C//DTD XHTML 1.0 Transitional//"), TERMINATOR};
170
163
 
171
- static const GumboStringPiece kLimitedQuirksRequiresSystemIdPublicIdPrefixes[] = {
172
- GUMBO_STRING("-//W3C//DTD HTML 4.01 Frameset//"),
173
- GUMBO_STRING("-//W3C//DTD HTML 4.01 Transitional//"),
174
- TERMINATOR
175
- };
164
+ static const GumboStringPiece kLimitedQuirksRequiresSystemIdPublicIdPrefixes[] =
165
+ {GUMBO_STRING("-//W3C//DTD HTML 4.01 Frameset//"),
166
+ GUMBO_STRING("-//W3C//DTD HTML 4.01 Transitional//"), TERMINATOR};
176
167
 
177
168
  // Indexed by GumboNamespaceEnum; keep in sync with that.
178
- static const char* kLegalXmlns[] = {
179
- "http://www.w3.org/1999/xhtml",
180
- "http://www.w3.org/2000/svg",
181
- "http://www.w3.org/1998/Math/MathML"
182
- };
169
+ static const char* kLegalXmlns[] = {"http://www.w3.org/1999/xhtml",
170
+ "http://www.w3.org/2000/svg", "http://www.w3.org/1998/Math/MathML"};
183
171
 
184
172
  typedef struct _ReplacementEntry {
185
173
  const GumboStringPiece from;
@@ -187,112 +175,112 @@ typedef struct _ReplacementEntry {
187
175
  } ReplacementEntry;
188
176
 
189
177
  #define REPLACEMENT_ENTRY(from, to) \
190
- { GUMBO_STRING(from), GUMBO_STRING(to) }
178
+ { GUMBO_STRING(from), GUMBO_STRING(to) }
191
179
 
192
180
  // Static data for SVG attribute replacements.
193
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#adjust-svg-attributes
181
+ // https://html.spec.whatwg.org/multipage/syntax.html#creating-and-inserting-nodes
194
182
  static const ReplacementEntry kSvgAttributeReplacements[] = {
195
- REPLACEMENT_ENTRY("attributename", "attributeName"),
196
- REPLACEMENT_ENTRY("attributetype", "attributeType"),
197
- REPLACEMENT_ENTRY("basefrequency", "baseFrequency"),
198
- REPLACEMENT_ENTRY("baseprofile", "baseProfile"),
199
- REPLACEMENT_ENTRY("calcmode", "calcMode"),
200
- REPLACEMENT_ENTRY("clippathunits", "clipPathUnits"),
201
- REPLACEMENT_ENTRY("contentscripttype", "contentScriptType"),
202
- REPLACEMENT_ENTRY("contentstyletype", "contentStyleType"),
203
- REPLACEMENT_ENTRY("diffuseconstant", "diffuseConstant"),
204
- REPLACEMENT_ENTRY("edgemode", "edgeMode"),
205
- REPLACEMENT_ENTRY("externalresourcesrequired", "externalResourcesRequired"),
206
- REPLACEMENT_ENTRY("filterres", "filterRes"),
207
- REPLACEMENT_ENTRY("filterunits", "filterUnits"),
208
- REPLACEMENT_ENTRY("glyphref", "glyphRef"),
209
- REPLACEMENT_ENTRY("gradienttransform", "gradientTransform"),
210
- REPLACEMENT_ENTRY("gradientunits", "gradientUnits"),
211
- REPLACEMENT_ENTRY("kernelmatrix", "kernelMatrix"),
212
- REPLACEMENT_ENTRY("kernelunitlength", "kernelUnitLength"),
213
- REPLACEMENT_ENTRY("keypoints", "keyPoints"),
214
- REPLACEMENT_ENTRY("keysplines", "keySplines"),
215
- REPLACEMENT_ENTRY("keytimes", "keyTimes"),
216
- REPLACEMENT_ENTRY("lengthadjust", "lengthAdjust"),
217
- REPLACEMENT_ENTRY("limitingconeangle", "limitingConeAngle"),
218
- REPLACEMENT_ENTRY("markerheight", "markerHeight"),
219
- REPLACEMENT_ENTRY("markerunits", "markerUnits"),
220
- REPLACEMENT_ENTRY("markerwidth", "markerWidth"),
221
- REPLACEMENT_ENTRY("maskcontentunits", "maskContentUnits"),
222
- REPLACEMENT_ENTRY("maskunits", "maskUnits"),
223
- REPLACEMENT_ENTRY("numoctaves", "numOctaves"),
224
- REPLACEMENT_ENTRY("pathlength", "pathLength"),
225
- REPLACEMENT_ENTRY("patterncontentunits", "patternContentUnits"),
226
- REPLACEMENT_ENTRY("patterntransform", "patternTransform"),
227
- REPLACEMENT_ENTRY("patternunits", "patternUnits"),
228
- REPLACEMENT_ENTRY("pointsatx", "pointsAtX"),
229
- REPLACEMENT_ENTRY("pointsaty", "pointsAtY"),
230
- REPLACEMENT_ENTRY("pointsatz", "pointsAtZ"),
231
- REPLACEMENT_ENTRY("preservealpha", "preserveAlpha"),
232
- REPLACEMENT_ENTRY("preserveaspectratio", "preserveAspectRatio"),
233
- REPLACEMENT_ENTRY("primitiveunits", "primitiveUnits"),
234
- REPLACEMENT_ENTRY("refx", "refX"),
235
- REPLACEMENT_ENTRY("refy", "refY"),
236
- REPLACEMENT_ENTRY("repeatcount", "repeatCount"),
237
- REPLACEMENT_ENTRY("repeatdur", "repeatDur"),
238
- REPLACEMENT_ENTRY("requiredextensions", "requiredExtensions"),
239
- REPLACEMENT_ENTRY("requiredfeatures", "requiredFeatures"),
240
- REPLACEMENT_ENTRY("specularconstant", "specularConstant"),
241
- REPLACEMENT_ENTRY("specularexponent", "specularExponent"),
242
- REPLACEMENT_ENTRY("spreadmethod", "spreadMethod"),
243
- REPLACEMENT_ENTRY("startoffset", "startOffset"),
244
- REPLACEMENT_ENTRY("stddeviation", "stdDeviation"),
245
- REPLACEMENT_ENTRY("stitchtiles", "stitchTiles"),
246
- REPLACEMENT_ENTRY("surfacescale", "surfaceScale"),
247
- REPLACEMENT_ENTRY("systemlanguage", "systemLanguage"),
248
- REPLACEMENT_ENTRY("tablevalues", "tableValues"),
249
- REPLACEMENT_ENTRY("targetx", "targetX"),
250
- REPLACEMENT_ENTRY("targety", "targetY"),
251
- REPLACEMENT_ENTRY("textlength", "textLength"),
252
- REPLACEMENT_ENTRY("viewbox", "viewBox"),
253
- REPLACEMENT_ENTRY("viewtarget", "viewTarget"),
254
- REPLACEMENT_ENTRY("xchannelselector", "xChannelSelector"),
255
- REPLACEMENT_ENTRY("ychannelselector", "yChannelSelector"),
256
- REPLACEMENT_ENTRY("zoomandpan", "zoomAndPan"),
183
+ REPLACEMENT_ENTRY("attributename", "attributeName"),
184
+ REPLACEMENT_ENTRY("attributetype", "attributeType"),
185
+ REPLACEMENT_ENTRY("basefrequency", "baseFrequency"),
186
+ REPLACEMENT_ENTRY("baseprofile", "baseProfile"),
187
+ REPLACEMENT_ENTRY("calcmode", "calcMode"),
188
+ REPLACEMENT_ENTRY("clippathunits", "clipPathUnits"),
189
+ // REPLACEMENT_ENTRY("contentscripttype", "contentScriptType"),
190
+ // REPLACEMENT_ENTRY("contentstyletype", "contentStyleType"),
191
+ REPLACEMENT_ENTRY("diffuseconstant", "diffuseConstant"),
192
+ REPLACEMENT_ENTRY("edgemode", "edgeMode"),
193
+ // REPLACEMENT_ENTRY("externalresourcesrequired",
194
+ // "externalResourcesRequired"),
195
+ // REPLACEMENT_ENTRY("filterres", "filterRes"),
196
+ REPLACEMENT_ENTRY("filterunits", "filterUnits"),
197
+ REPLACEMENT_ENTRY("glyphref", "glyphRef"),
198
+ REPLACEMENT_ENTRY("gradienttransform", "gradientTransform"),
199
+ REPLACEMENT_ENTRY("gradientunits", "gradientUnits"),
200
+ REPLACEMENT_ENTRY("kernelmatrix", "kernelMatrix"),
201
+ REPLACEMENT_ENTRY("kernelunitlength", "kernelUnitLength"),
202
+ REPLACEMENT_ENTRY("keypoints", "keyPoints"),
203
+ REPLACEMENT_ENTRY("keysplines", "keySplines"),
204
+ REPLACEMENT_ENTRY("keytimes", "keyTimes"),
205
+ REPLACEMENT_ENTRY("lengthadjust", "lengthAdjust"),
206
+ REPLACEMENT_ENTRY("limitingconeangle", "limitingConeAngle"),
207
+ REPLACEMENT_ENTRY("markerheight", "markerHeight"),
208
+ REPLACEMENT_ENTRY("markerunits", "markerUnits"),
209
+ REPLACEMENT_ENTRY("markerwidth", "markerWidth"),
210
+ REPLACEMENT_ENTRY("maskcontentunits", "maskContentUnits"),
211
+ REPLACEMENT_ENTRY("maskunits", "maskUnits"),
212
+ REPLACEMENT_ENTRY("numoctaves", "numOctaves"),
213
+ REPLACEMENT_ENTRY("pathlength", "pathLength"),
214
+ REPLACEMENT_ENTRY("patterncontentunits", "patternContentUnits"),
215
+ REPLACEMENT_ENTRY("patterntransform", "patternTransform"),
216
+ REPLACEMENT_ENTRY("patternunits", "patternUnits"),
217
+ REPLACEMENT_ENTRY("pointsatx", "pointsAtX"),
218
+ REPLACEMENT_ENTRY("pointsaty", "pointsAtY"),
219
+ REPLACEMENT_ENTRY("pointsatz", "pointsAtZ"),
220
+ REPLACEMENT_ENTRY("preservealpha", "preserveAlpha"),
221
+ REPLACEMENT_ENTRY("preserveaspectratio", "preserveAspectRatio"),
222
+ REPLACEMENT_ENTRY("primitiveunits", "primitiveUnits"),
223
+ REPLACEMENT_ENTRY("refx", "refX"), REPLACEMENT_ENTRY("refy", "refY"),
224
+ REPLACEMENT_ENTRY("repeatcount", "repeatCount"),
225
+ REPLACEMENT_ENTRY("repeatdur", "repeatDur"),
226
+ REPLACEMENT_ENTRY("requiredextensions", "requiredExtensions"),
227
+ REPLACEMENT_ENTRY("requiredfeatures", "requiredFeatures"),
228
+ REPLACEMENT_ENTRY("specularconstant", "specularConstant"),
229
+ REPLACEMENT_ENTRY("specularexponent", "specularExponent"),
230
+ REPLACEMENT_ENTRY("spreadmethod", "spreadMethod"),
231
+ REPLACEMENT_ENTRY("startoffset", "startOffset"),
232
+ REPLACEMENT_ENTRY("stddeviation", "stdDeviation"),
233
+ REPLACEMENT_ENTRY("stitchtiles", "stitchTiles"),
234
+ REPLACEMENT_ENTRY("surfacescale", "surfaceScale"),
235
+ REPLACEMENT_ENTRY("systemlanguage", "systemLanguage"),
236
+ REPLACEMENT_ENTRY("tablevalues", "tableValues"),
237
+ REPLACEMENT_ENTRY("targetx", "targetX"),
238
+ REPLACEMENT_ENTRY("targety", "targetY"),
239
+ REPLACEMENT_ENTRY("textlength", "textLength"),
240
+ REPLACEMENT_ENTRY("viewbox", "viewBox"),
241
+ REPLACEMENT_ENTRY("viewtarget", "viewTarget"),
242
+ REPLACEMENT_ENTRY("xchannelselector", "xChannelSelector"),
243
+ REPLACEMENT_ENTRY("ychannelselector", "yChannelSelector"),
244
+ REPLACEMENT_ENTRY("zoomandpan", "zoomAndPan"),
257
245
  };
258
246
 
259
247
  static const ReplacementEntry kSvgTagReplacements[] = {
260
- REPLACEMENT_ENTRY("altglyph", "altGlyph"),
261
- REPLACEMENT_ENTRY("altglyphdef", "altGlyphDef"),
262
- REPLACEMENT_ENTRY("altglyphitem", "altGlyphItem"),
263
- REPLACEMENT_ENTRY("animatecolor", "animateColor"),
264
- REPLACEMENT_ENTRY("animatemotion", "animateMotion"),
265
- REPLACEMENT_ENTRY("animatetransform", "animateTransform"),
266
- REPLACEMENT_ENTRY("clippath", "clipPath"),
267
- REPLACEMENT_ENTRY("feblend", "feBlend"),
268
- REPLACEMENT_ENTRY("fecolormatrix", "feColorMatrix"),
269
- REPLACEMENT_ENTRY("fecomponenttransfer", "feComponentTransfer"),
270
- REPLACEMENT_ENTRY("fecomposite", "feComposite"),
271
- REPLACEMENT_ENTRY("feconvolvematrix", "feConvolveMatrix"),
272
- REPLACEMENT_ENTRY("fediffuselighting", "feDiffuseLighting"),
273
- REPLACEMENT_ENTRY("fedisplacementmap", "feDisplacementMap"),
274
- REPLACEMENT_ENTRY("fedistantlight", "feDistantLight"),
275
- REPLACEMENT_ENTRY("feflood", "feFlood"),
276
- REPLACEMENT_ENTRY("fefunca", "feFuncA"),
277
- REPLACEMENT_ENTRY("fefuncb", "feFuncB"),
278
- REPLACEMENT_ENTRY("fefuncg", "feFuncG"),
279
- REPLACEMENT_ENTRY("fefuncr", "feFuncR"),
280
- REPLACEMENT_ENTRY("fegaussianblur", "feGaussianBlur"),
281
- REPLACEMENT_ENTRY("feimage", "feImage"),
282
- REPLACEMENT_ENTRY("femerge", "feMerge"),
283
- REPLACEMENT_ENTRY("femergenode", "feMergeNode"),
284
- REPLACEMENT_ENTRY("femorphology", "feMorphology"),
285
- REPLACEMENT_ENTRY("feoffset", "feOffset"),
286
- REPLACEMENT_ENTRY("fepointlight", "fePointLight"),
287
- REPLACEMENT_ENTRY("fespecularlighting", "feSpecularLighting"),
288
- REPLACEMENT_ENTRY("fespotlight", "feSpotLight"),
289
- REPLACEMENT_ENTRY("fetile", "feTile"),
290
- REPLACEMENT_ENTRY("feturbulence", "feTurbulence"),
291
- REPLACEMENT_ENTRY("foreignobject", "foreignObject"),
292
- REPLACEMENT_ENTRY("glyphref", "glyphRef"),
293
- REPLACEMENT_ENTRY("lineargradient", "linearGradient"),
294
- REPLACEMENT_ENTRY("radialgradient", "radialGradient"),
295
- REPLACEMENT_ENTRY("textpath", "textPath"),
248
+ REPLACEMENT_ENTRY("altglyph", "altGlyph"),
249
+ REPLACEMENT_ENTRY("altglyphdef", "altGlyphDef"),
250
+ REPLACEMENT_ENTRY("altglyphitem", "altGlyphItem"),
251
+ REPLACEMENT_ENTRY("animatecolor", "animateColor"),
252
+ REPLACEMENT_ENTRY("animatemotion", "animateMotion"),
253
+ REPLACEMENT_ENTRY("animatetransform", "animateTransform"),
254
+ REPLACEMENT_ENTRY("clippath", "clipPath"),
255
+ REPLACEMENT_ENTRY("feblend", "feBlend"),
256
+ REPLACEMENT_ENTRY("fecolormatrix", "feColorMatrix"),
257
+ REPLACEMENT_ENTRY("fecomponenttransfer", "feComponentTransfer"),
258
+ REPLACEMENT_ENTRY("fecomposite", "feComposite"),
259
+ REPLACEMENT_ENTRY("feconvolvematrix", "feConvolveMatrix"),
260
+ REPLACEMENT_ENTRY("fediffuselighting", "feDiffuseLighting"),
261
+ REPLACEMENT_ENTRY("fedisplacementmap", "feDisplacementMap"),
262
+ REPLACEMENT_ENTRY("fedistantlight", "feDistantLight"),
263
+ REPLACEMENT_ENTRY("feflood", "feFlood"),
264
+ REPLACEMENT_ENTRY("fefunca", "feFuncA"),
265
+ REPLACEMENT_ENTRY("fefuncb", "feFuncB"),
266
+ REPLACEMENT_ENTRY("fefuncg", "feFuncG"),
267
+ REPLACEMENT_ENTRY("fefuncr", "feFuncR"),
268
+ REPLACEMENT_ENTRY("fegaussianblur", "feGaussianBlur"),
269
+ REPLACEMENT_ENTRY("feimage", "feImage"),
270
+ REPLACEMENT_ENTRY("femerge", "feMerge"),
271
+ REPLACEMENT_ENTRY("femergenode", "feMergeNode"),
272
+ REPLACEMENT_ENTRY("femorphology", "feMorphology"),
273
+ REPLACEMENT_ENTRY("feoffset", "feOffset"),
274
+ REPLACEMENT_ENTRY("fepointlight", "fePointLight"),
275
+ REPLACEMENT_ENTRY("fespecularlighting", "feSpecularLighting"),
276
+ REPLACEMENT_ENTRY("fespotlight", "feSpotLight"),
277
+ REPLACEMENT_ENTRY("fetile", "feTile"),
278
+ REPLACEMENT_ENTRY("feturbulence", "feTurbulence"),
279
+ REPLACEMENT_ENTRY("foreignobject", "foreignObject"),
280
+ REPLACEMENT_ENTRY("glyphref", "glyphRef"),
281
+ REPLACEMENT_ENTRY("lineargradient", "linearGradient"),
282
+ REPLACEMENT_ENTRY("radialgradient", "radialGradient"),
283
+ REPLACEMENT_ENTRY("textpath", "textPath"),
296
284
  };
297
285
 
298
286
  typedef struct _NamespacedAttributeReplacement {
@@ -302,18 +290,18 @@ typedef struct _NamespacedAttributeReplacement {
302
290
  } NamespacedAttributeReplacement;
303
291
 
304
292
  static const NamespacedAttributeReplacement kForeignAttributeReplacements[] = {
305
- { "xlink:actuate", "actuate", GUMBO_ATTR_NAMESPACE_XLINK },
306
- { "xlink:actuate", "actuate", GUMBO_ATTR_NAMESPACE_XLINK },
307
- { "xlink:href", "href", GUMBO_ATTR_NAMESPACE_XLINK },
308
- { "xlink:role", "role", GUMBO_ATTR_NAMESPACE_XLINK },
309
- { "xlink:show", "show", GUMBO_ATTR_NAMESPACE_XLINK },
310
- { "xlink:title", "title", GUMBO_ATTR_NAMESPACE_XLINK },
311
- { "xlink:type", "type", GUMBO_ATTR_NAMESPACE_XLINK },
312
- { "xml:base", "base", GUMBO_ATTR_NAMESPACE_XML },
313
- { "xml:lang", "lang", GUMBO_ATTR_NAMESPACE_XML },
314
- { "xml:space", "space", GUMBO_ATTR_NAMESPACE_XML },
315
- { "xmlns", "xmlns", GUMBO_ATTR_NAMESPACE_XMLNS },
316
- { "xmlns:xlink", "xlink", GUMBO_ATTR_NAMESPACE_XMLNS },
293
+ {"xlink:actuate", "actuate", GUMBO_ATTR_NAMESPACE_XLINK},
294
+ {"xlink:actuate", "actuate", GUMBO_ATTR_NAMESPACE_XLINK},
295
+ {"xlink:href", "href", GUMBO_ATTR_NAMESPACE_XLINK},
296
+ {"xlink:role", "role", GUMBO_ATTR_NAMESPACE_XLINK},
297
+ {"xlink:show", "show", GUMBO_ATTR_NAMESPACE_XLINK},
298
+ {"xlink:title", "title", GUMBO_ATTR_NAMESPACE_XLINK},
299
+ {"xlink:type", "type", GUMBO_ATTR_NAMESPACE_XLINK},
300
+ {"xml:base", "base", GUMBO_ATTR_NAMESPACE_XML},
301
+ {"xml:lang", "lang", GUMBO_ATTR_NAMESPACE_XML},
302
+ {"xml:space", "space", GUMBO_ATTR_NAMESPACE_XML},
303
+ {"xmlns", "xmlns", GUMBO_ATTR_NAMESPACE_XMLNS},
304
+ {"xmlns:xlink", "xlink", GUMBO_ATTR_NAMESPACE_XMLNS},
317
305
  };
318
306
 
319
307
  // The "scope marker" for the list of active formatting elements. We use a
@@ -371,6 +359,9 @@ typedef struct GumboInternalParserState {
371
359
  GumboNode* _head_element;
372
360
  GumboNode* _form_element;
373
361
 
362
+ // The element used as fragment context when parsing in fragment mode
363
+ GumboNode* _fragment_ctx;
364
+
374
365
  // The flag for when the spec says "Reprocess the current token in..."
375
366
  bool _reprocess_current_token;
376
367
 
@@ -427,14 +418,14 @@ static bool attribute_matches(
427
418
  static bool attribute_matches_case_sensitive(
428
419
  const GumboVector* attributes, const char* name, const char* value) {
429
420
  const GumboAttribute* attr = gumbo_get_attribute(attributes, name);
430
- return attr ? strcmp(value, attr->value) == 0 : false;
421
+ return attr ? strcmp(value, attr->value) == 0 : false;
431
422
  }
432
423
 
433
424
  // Checks if the specified attribute vectors are identical.
434
425
  static bool all_attributes_match(
435
426
  const GumboVector* attr1, const GumboVector* attr2) {
436
- int num_unmatched_attr2_elements = attr2->length;
437
- for (int i = 0; i < attr1->length; ++i) {
427
+ unsigned int num_unmatched_attr2_elements = attr2->length;
428
+ for (unsigned int i = 0; i < attr1->length; ++i) {
438
429
  const GumboAttribute* attr = attr1->data[i];
439
430
  if (attribute_matches_case_sensitive(attr2, attr->name, attr->value)) {
440
431
  --num_unmatched_attr2_elements;
@@ -462,8 +453,7 @@ static GumboNode* create_node(GumboParser* parser, GumboNodeType type) {
462
453
  static GumboNode* new_document_node(GumboParser* parser) {
463
454
  GumboNode* document_node = create_node(parser, GUMBO_NODE_DOCUMENT);
464
455
  document_node->parse_flags = GUMBO_INSERTION_BY_PARSER;
465
- gumbo_vector_init(
466
- parser, 1, &document_node->v.document.children);
456
+ gumbo_vector_init(parser, 1, &document_node->v.document.children);
467
457
 
468
458
  // Must be initialized explicitly, as there's no guarantee that we'll see a
469
459
  // doc type token.
@@ -498,6 +488,7 @@ static void parser_state_init(GumboParser* parser) {
498
488
  gumbo_vector_init(parser, 5, &parser_state->_template_insertion_modes);
499
489
  parser_state->_head_element = NULL;
500
490
  parser_state->_form_element = NULL;
491
+ parser_state->_fragment_ctx = NULL;
501
492
  parser_state->_current_token = NULL;
502
493
  parser_state->_closed_body_tag = false;
503
494
  parser_state->_closed_html_tag = false;
@@ -506,6 +497,9 @@ static void parser_state_init(GumboParser* parser) {
506
497
 
507
498
  static void parser_state_destroy(GumboParser* parser) {
508
499
  GumboParserState* state = parser->_parser_state;
500
+ if (state->_fragment_ctx) {
501
+ destroy_node(parser, state->_fragment_ctx);
502
+ }
509
503
  gumbo_vector_destroy(parser, &state->_active_formatting_elements);
510
504
  gumbo_vector_destroy(parser, &state->_open_elements);
511
505
  gumbo_vector_destroy(parser, &state->_template_insertion_modes);
@@ -517,6 +511,10 @@ static GumboNode* get_document_node(GumboParser* parser) {
517
511
  return parser->_output->document;
518
512
  }
519
513
 
514
+ static bool is_fragment_parser(const GumboParser* parser) {
515
+ return !!parser->_parser_state->_fragment_ctx;
516
+ }
517
+
520
518
  // Returns the node at the bottom of the stack of open elements, or NULL if no
521
519
  // elements have been added yet.
522
520
  static GumboNode* get_current_node(GumboParser* parser) {
@@ -530,6 +528,14 @@ static GumboNode* get_current_node(GumboParser* parser) {
530
528
  return open_elements->data[open_elements->length - 1];
531
529
  }
532
530
 
531
+ static GumboNode* get_adjusted_current_node(GumboParser* parser) {
532
+ GumboParserState* state = parser->_parser_state;
533
+ if (state->_open_elements.length == 1 && state->_fragment_ctx) {
534
+ return state->_fragment_ctx;
535
+ }
536
+ return get_current_node(parser);
537
+ }
538
+
533
539
  // Returns true if the given needle is in the given array of literal
534
540
  // GumboStringPieces. If exact_match is true, this requires that they match
535
541
  // exactly; otherwise, this performs a prefix match to check if any of the
@@ -537,7 +543,7 @@ static GumboNode* get_current_node(GumboParser* parser) {
537
543
  // case-insensitive match.
538
544
  static bool is_in_static_list(
539
545
  const char* needle, const GumboStringPiece* haystack, bool exact_match) {
540
- for (int i = 0; haystack[i].length > 0; ++i) {
546
+ for (unsigned int i = 0; haystack[i].length > 0; ++i) {
541
547
  if ((exact_match && !strcmp(needle, haystack[i].data)) ||
542
548
  (!exact_match && !strcasecmp(needle, haystack[i].data))) {
543
549
  return true;
@@ -556,39 +562,63 @@ static void set_insertion_mode(GumboParser* parser, GumboInsertionMode mode) {
556
562
  // indicate that there is no appropriate insertion mode, and the loop should
557
563
  // continue.
558
564
  static GumboInsertionMode get_appropriate_insertion_mode(
559
- const GumboNode* node, bool is_last) {
560
- assert(node->type == GUMBO_NODE_ELEMENT);
565
+ const GumboParser* parser, int index) {
566
+ const GumboVector* open_elements = &parser->_parser_state->_open_elements;
567
+ const GumboNode* node = open_elements->data[index];
568
+ const bool is_last = index == 0;
561
569
 
562
- if (node->v.element.tag_namespace == GUMBO_NAMESPACE_HTML) {
563
- switch (node->v.element.tag) {
564
- case GUMBO_TAG_SELECT:
570
+ if (is_last && is_fragment_parser(parser)) {
571
+ node = parser->_parser_state->_fragment_ctx;
572
+ }
573
+
574
+ assert(node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE);
575
+ switch (node->v.element.tag) {
576
+ case GUMBO_TAG_SELECT: {
577
+ if (is_last) {
578
+ return GUMBO_INSERTION_MODE_IN_SELECT;
579
+ }
580
+ for (int i = index; i > 0; --i) {
581
+ const GumboNode* ancestor = open_elements->data[i];
582
+ if (node_html_tag_is(ancestor, GUMBO_TAG_TEMPLATE)) {
565
583
  return GUMBO_INSERTION_MODE_IN_SELECT;
566
- case GUMBO_TAG_TD:
567
- case GUMBO_TAG_TH:
568
- return is_last ?
569
- GUMBO_INSERTION_MODE_IN_BODY : GUMBO_INSERTION_MODE_IN_CELL;
570
- case GUMBO_TAG_TR:
571
- return GUMBO_INSERTION_MODE_IN_ROW;
572
- case GUMBO_TAG_TBODY:
573
- case GUMBO_TAG_THEAD:
574
- case GUMBO_TAG_TFOOT:
575
- return GUMBO_INSERTION_MODE_IN_TABLE_BODY;
576
- case GUMBO_TAG_CAPTION:
577
- return GUMBO_INSERTION_MODE_IN_CAPTION;
578
- case GUMBO_TAG_COLGROUP:
579
- return GUMBO_INSERTION_MODE_IN_COLUMN_GROUP;
580
- case GUMBO_TAG_TABLE:
581
- return GUMBO_INSERTION_MODE_IN_TABLE;
582
- case GUMBO_TAG_HEAD:
583
- case GUMBO_TAG_BODY:
584
- return GUMBO_INSERTION_MODE_IN_BODY;
585
- case GUMBO_TAG_FRAMESET:
586
- return GUMBO_INSERTION_MODE_IN_FRAMESET;
587
- case GUMBO_TAG_HTML:
588
- return GUMBO_INSERTION_MODE_BEFORE_HEAD;
589
- default:
590
- break;
584
+ }
585
+ if (node_html_tag_is(ancestor, GUMBO_TAG_TABLE)) {
586
+ return GUMBO_INSERTION_MODE_IN_SELECT_IN_TABLE;
587
+ }
588
+ }
589
+ return GUMBO_INSERTION_MODE_IN_SELECT;
591
590
  }
591
+ case GUMBO_TAG_TD:
592
+ case GUMBO_TAG_TH:
593
+ if (!is_last) return GUMBO_INSERTION_MODE_IN_CELL;
594
+ break;
595
+ case GUMBO_TAG_TR:
596
+ return GUMBO_INSERTION_MODE_IN_ROW;
597
+ case GUMBO_TAG_TBODY:
598
+ case GUMBO_TAG_THEAD:
599
+ case GUMBO_TAG_TFOOT:
600
+ return GUMBO_INSERTION_MODE_IN_TABLE_BODY;
601
+ case GUMBO_TAG_CAPTION:
602
+ return GUMBO_INSERTION_MODE_IN_CAPTION;
603
+ case GUMBO_TAG_COLGROUP:
604
+ return GUMBO_INSERTION_MODE_IN_COLUMN_GROUP;
605
+ case GUMBO_TAG_TABLE:
606
+ return GUMBO_INSERTION_MODE_IN_TABLE;
607
+ case GUMBO_TAG_TEMPLATE:
608
+ return get_current_template_insertion_mode(parser);
609
+ case GUMBO_TAG_HEAD:
610
+ if (!is_last) return GUMBO_INSERTION_MODE_IN_HEAD;
611
+ break;
612
+ case GUMBO_TAG_BODY:
613
+ return GUMBO_INSERTION_MODE_IN_BODY;
614
+ case GUMBO_TAG_FRAMESET:
615
+ return GUMBO_INSERTION_MODE_IN_FRAMESET;
616
+ case GUMBO_TAG_HTML:
617
+ return parser->_parser_state->_head_element
618
+ ? GUMBO_INSERTION_MODE_AFTER_HEAD
619
+ : GUMBO_INSERTION_MODE_BEFORE_HEAD;
620
+ default:
621
+ break;
592
622
  }
593
623
  return is_last ? GUMBO_INSERTION_MODE_IN_BODY : GUMBO_INSERTION_MODE_INITIAL;
594
624
  }
@@ -596,9 +626,8 @@ static GumboInsertionMode get_appropriate_insertion_mode(
596
626
  // This performs the actual "reset the insertion mode" loop.
597
627
  static void reset_insertion_mode_appropriately(GumboParser* parser) {
598
628
  const GumboVector* open_elements = &parser->_parser_state->_open_elements;
599
- for (int i = open_elements->length; --i >= 0; ) {
600
- GumboInsertionMode mode =
601
- get_appropriate_insertion_mode(open_elements->data[i], i == 0);
629
+ for (int i = open_elements->length; --i >= 0;) {
630
+ GumboInsertionMode mode = get_appropriate_insertion_mode(parser, i);
602
631
  if (mode != GUMBO_INSERTION_MODE_INITIAL) {
603
632
  set_insertion_mode(parser, mode);
604
633
  return;
@@ -609,7 +638,8 @@ static void reset_insertion_mode_appropriately(GumboParser* parser) {
609
638
  assert(0);
610
639
  }
611
640
 
612
- static GumboError* parser_add_parse_error(GumboParser* parser, const GumboToken* token) {
641
+ static GumboError* parser_add_parse_error(
642
+ GumboParser* parser, const GumboToken* token) {
613
643
  gumbo_debug("Adding parse error.\n");
614
644
  GumboError* error = gumbo_add_error(parser);
615
645
  if (!error) {
@@ -628,13 +658,14 @@ static GumboError* parser_add_parse_error(GumboParser* parser, const GumboToken*
628
658
  }
629
659
  GumboParserState* state = parser->_parser_state;
630
660
  extra_data->parser_state = state->_insertion_mode;
631
- gumbo_vector_init(parser, state->_open_elements.length,
632
- &extra_data->tag_stack);
633
- for (int i = 0; i < state->_open_elements.length; ++i) {
661
+ gumbo_vector_init(
662
+ parser, state->_open_elements.length, &extra_data->tag_stack);
663
+ for (unsigned int i = 0; i < state->_open_elements.length; ++i) {
634
664
  const GumboNode* node = state->_open_elements.data[i];
635
- assert(node->type == GUMBO_NODE_ELEMENT);
636
- gumbo_vector_add(parser, (void*) node->v.element.tag,
637
- &extra_data->tag_stack);
665
+ assert(
666
+ node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE);
667
+ gumbo_vector_add(
668
+ parser, (void*) node->v.element.tag, &extra_data->tag_stack);
638
669
  }
639
670
  return error;
640
671
  }
@@ -643,7 +674,8 @@ static GumboError* parser_add_parse_error(GumboParser* parser, const GumboToken*
643
674
  // by is_start) with one of the tag types in the varargs list. Terminate the
644
675
  // list with GUMBO_TAG_LAST; this functions as a sentinel since no portion of
645
676
  // the spec references tags that are not in the spec.
646
- static bool tag_in(const GumboToken* token, bool is_start, const gumbo_tagset tags) {
677
+ static bool tag_in(
678
+ const GumboToken* token, bool is_start, const gumbo_tagset tags) {
647
679
  GumboTag token_tag;
648
680
  if (is_start && token->type == GUMBO_TOKEN_START_TAG) {
649
681
  token_tag = token->v.start_tag.tag;
@@ -652,7 +684,7 @@ static bool tag_in(const GumboToken* token, bool is_start, const gumbo_tagset ta
652
684
  } else {
653
685
  return false;
654
686
  }
655
- return (token_tag < GUMBO_TAG_LAST && tags[(int)token_tag] != 0);
687
+ return (token_tag < GUMBO_TAG_LAST && tags[(int) token_tag] != 0);
656
688
  }
657
689
 
658
690
  // Like tag_in, but for the single-tag case.
@@ -669,41 +701,123 @@ static bool tag_is(const GumboToken* token, bool is_start, GumboTag tag) {
669
701
  // Like tag_in, but checks for the tag of a node, rather than a token.
670
702
  static bool node_tag_in_set(const GumboNode* node, const gumbo_tagset tags) {
671
703
  assert(node != NULL);
672
- if (node->type != GUMBO_NODE_ELEMENT) {
704
+ if (node->type != GUMBO_NODE_ELEMENT && node->type != GUMBO_NODE_TEMPLATE) {
673
705
  return false;
674
706
  }
675
- return TAGSET_INCLUDES(tags, node->v.element.tag_namespace, node->v.element.tag);
707
+ return TAGSET_INCLUDES(
708
+ tags, node->v.element.tag_namespace, node->v.element.tag);
676
709
  }
677
710
 
678
-
679
711
  // Like node_tag_in, but for the single-tag case.
680
- static bool node_qualified_tag_is(const GumboNode* node, GumboNamespaceEnum ns, GumboTag tag) {
681
- return node->type == GUMBO_NODE_ELEMENT &&
682
- node->v.element.tag == tag &&
683
- node->v.element.tag_namespace == ns;
712
+ static bool node_qualified_tag_is(
713
+ const GumboNode* node, GumboNamespaceEnum ns, GumboTag tag) {
714
+ assert(node);
715
+ return (node->type == GUMBO_NODE_ELEMENT ||
716
+ node->type == GUMBO_NODE_TEMPLATE) &&
717
+ node->v.element.tag == tag && node->v.element.tag_namespace == ns;
684
718
  }
685
719
 
686
720
  // Like node_tag_in, but for the single-tag case in the HTML namespace
687
- static bool node_html_tag_is(const GumboNode* node, GumboTag tag)
688
- {
721
+ static bool node_html_tag_is(const GumboNode* node, GumboTag tag) {
689
722
  return node_qualified_tag_is(node, GUMBO_NAMESPACE_HTML, tag);
690
723
  }
691
724
 
725
+ static void push_template_insertion_mode(
726
+ GumboParser* parser, GumboInsertionMode mode) {
727
+ gumbo_vector_add(
728
+ parser, (void*) mode, &parser->_parser_state->_template_insertion_modes);
729
+ }
730
+
731
+ static void pop_template_insertion_mode(GumboParser* parser) {
732
+ gumbo_vector_pop(parser, &parser->_parser_state->_template_insertion_modes);
733
+ }
734
+
735
+ // Returns the current template insertion mode. If the stack of template
736
+ // insertion modes is empty, this returns GUMBO_INSERTION_MODE_INITIAL.
737
+ static GumboInsertionMode get_current_template_insertion_mode(
738
+ const GumboParser* parser) {
739
+ GumboVector* template_insertion_modes =
740
+ &parser->_parser_state->_template_insertion_modes;
741
+ if (template_insertion_modes->length == 0) {
742
+ return GUMBO_INSERTION_MODE_INITIAL;
743
+ }
744
+ return (GumboInsertionMode)
745
+ template_insertion_modes->data[(template_insertion_modes->length - 1)];
746
+ }
692
747
 
693
748
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#mathml-text-integration-point
694
749
  static bool is_mathml_integration_point(const GumboNode* node) {
695
- return node_tag_in_set(node, (gumbo_tagset) { TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
696
- TAG_MATHML(MS), TAG_MATHML(MTEXT) });
750
+ return node_tag_in_set(
751
+ node, (gumbo_tagset){TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
752
+ TAG_MATHML(MS), TAG_MATHML(MTEXT)});
697
753
  }
698
754
 
699
755
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#html-integration-point
700
756
  static bool is_html_integration_point(const GumboNode* node) {
701
- return node_tag_in_set(node, (gumbo_tagset) { TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC), TAG_SVG(TITLE) }) ||
702
- (node_qualified_tag_is(node, GUMBO_NAMESPACE_MATHML, GUMBO_TAG_ANNOTATION_XML) && (
703
- attribute_matches(&node->v.element.attributes,
704
- "encoding", "text/html") ||
705
- attribute_matches(&node->v.element.attributes,
706
- "encoding", "application/xhtml+xml")));
757
+ return node_tag_in_set(node, (gumbo_tagset){TAG_SVG(FOREIGNOBJECT),
758
+ TAG_SVG(DESC), TAG_SVG(TITLE)}) ||
759
+ (node_qualified_tag_is(
760
+ node, GUMBO_NAMESPACE_MATHML, GUMBO_TAG_ANNOTATION_XML) &&
761
+ (attribute_matches(
762
+ &node->v.element.attributes, "encoding", "text/html") ||
763
+ attribute_matches(&node->v.element.attributes, "encoding",
764
+ "application/xhtml+xml")));
765
+ }
766
+
767
+ // This represents a place to insert a node, consisting of a target parent and a
768
+ // child index within that parent. If the node should be inserted at the end of
769
+ // the parent's child, index will be -1.
770
+ typedef struct {
771
+ GumboNode* target;
772
+ int index;
773
+ } InsertionLocation;
774
+
775
+ InsertionLocation get_appropriate_insertion_location(
776
+ GumboParser* parser, GumboNode* override_target) {
777
+ InsertionLocation retval = {override_target, -1};
778
+ if (retval.target == NULL) {
779
+ // No override target; default to the current node, but special-case the
780
+ // root node since get_current_node() assumes the stack of open elements is
781
+ // non-empty.
782
+ retval.target = parser->_output->root != NULL ? get_current_node(parser)
783
+ : get_document_node(parser);
784
+ }
785
+ if (!parser->_parser_state->_foster_parent_insertions ||
786
+ !node_tag_in_set(retval.target, (gumbo_tagset){TAG(TABLE), TAG(TBODY),
787
+ TAG(TFOOT), TAG(THEAD), TAG(TR)})) {
788
+ return retval;
789
+ }
790
+
791
+ // Foster-parenting case.
792
+ int last_template_index = -1;
793
+ int last_table_index = -1;
794
+ GumboVector* open_elements = &parser->_parser_state->_open_elements;
795
+ for (unsigned int i = 0; i < open_elements->length; ++i) {
796
+ if (node_html_tag_is(open_elements->data[i], GUMBO_TAG_TEMPLATE)) {
797
+ last_template_index = i;
798
+ }
799
+ if (node_html_tag_is(open_elements->data[i], GUMBO_TAG_TABLE)) {
800
+ last_table_index = i;
801
+ }
802
+ }
803
+ if (last_template_index != -1 &&
804
+ (last_table_index == -1 || last_template_index > last_table_index)) {
805
+ retval.target = open_elements->data[last_template_index];
806
+ return retval;
807
+ }
808
+ if (last_table_index == -1) {
809
+ retval.target = open_elements->data[0];
810
+ return retval;
811
+ }
812
+ GumboNode* last_table = open_elements->data[last_table_index];
813
+ if (last_table->parent != NULL) {
814
+ retval.target = last_table->parent;
815
+ retval.index = last_table->index_within_parent;
816
+ return retval;
817
+ }
818
+
819
+ retval.target = open_elements->data[last_table_index - 1];
820
+ return retval;
707
821
  }
708
822
 
709
823
  // Appends a node to the end of its parent, setting the "parent" and
@@ -713,7 +827,8 @@ static void append_node(
713
827
  assert(node->parent == NULL);
714
828
  assert(node->index_within_parent == -1);
715
829
  GumboVector* children;
716
- if (parent->type == GUMBO_NODE_ELEMENT) {
830
+ if (parent->type == GUMBO_NODE_ELEMENT ||
831
+ parent->type == GUMBO_NODE_TEMPLATE) {
717
832
  children = &parent->v.element.children;
718
833
  } else {
719
834
  assert(parent->type == GUMBO_NODE_DOCUMENT);
@@ -725,64 +840,41 @@ static void append_node(
725
840
  assert(node->index_within_parent < children->length);
726
841
  }
727
842
 
728
- // Inserts a node at the specified index within its parent, updating the
843
+ // Inserts a node at the specified InsertionLocation, updating the
729
844
  // "parent" and "index_within_parent" fields of it and all its siblings.
845
+ // If the index of the location is -1, this calls append_node.
730
846
  static void insert_node(
731
- GumboParser* parser, GumboNode* parent, int index, GumboNode* node) {
847
+ GumboParser* parser, GumboNode* node, InsertionLocation location) {
732
848
  assert(node->parent == NULL);
733
849
  assert(node->index_within_parent == -1);
734
- assert(parent->type == GUMBO_NODE_ELEMENT);
735
- GumboVector* children = &parent->v.element.children;
736
- assert(index >= 0);
737
- assert(index < children->length);
738
- node->parent = parent;
739
- node->index_within_parent = index;
740
- gumbo_vector_insert_at(parser, (void*) node, index, children);
741
- assert(node->index_within_parent < children->length);
742
- for (int i = index + 1; i < children->length; ++i) {
743
- GumboNode* sibling = children->data[i];
744
- sibling->index_within_parent = i;
745
- assert(sibling->index_within_parent < children->length);
746
- }
747
- }
850
+ GumboNode* parent = location.target;
851
+ int index = location.index;
852
+ if (index != -1) {
853
+ GumboVector* children = NULL;
854
+ if (parent->type == GUMBO_NODE_ELEMENT ||
855
+ parent->type == GUMBO_NODE_TEMPLATE) {
856
+ children = &parent->v.element.children;
857
+ } else if (parent->type == GUMBO_NODE_DOCUMENT) {
858
+ children = &parent->v.document.children;
859
+ assert(children->length == 0);
860
+ } else {
861
+ assert(0);
862
+ }
748
863
 
749
- // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#foster-parenting
750
- static void foster_parent_element(GumboParser* parser, GumboNode* node) {
751
- GumboVector* open_elements = &parser->_parser_state->_open_elements;
752
- assert(open_elements->length > 2);
753
-
754
- node->parse_flags |= GUMBO_INSERTION_FOSTER_PARENTED;
755
- GumboNode* foster_parent_element = open_elements->data[0];
756
- assert(foster_parent_element->type == GUMBO_NODE_ELEMENT);
757
- assert(node_html_tag_is(foster_parent_element, GUMBO_TAG_HTML));
758
- for (int i = open_elements->length; --i > 1; ) {
759
- GumboNode* table_element = open_elements->data[i];
760
- if (node_html_tag_is(table_element, GUMBO_TAG_TABLE)) {
761
- foster_parent_element = table_element->parent;
762
- if (!foster_parent_element ||
763
- foster_parent_element->type != GUMBO_NODE_ELEMENT) {
764
- // Table has no parent; spec says it's possible if a script manipulated
765
- // the DOM, although I don't think we have to worry about this case.
766
- gumbo_debug("Table has no parent.\n");
767
- foster_parent_element = open_elements->data[i - 1];
768
- break;
769
- }
770
- assert(foster_parent_element->type == GUMBO_NODE_ELEMENT);
771
- gumbo_debug("Found enclosing table (%x) at %d; parent=%s, index=%d.\n",
772
- table_element, i, gumbo_normalized_tagname(
773
- foster_parent_element->v.element.tag),
774
- table_element->index_within_parent);
775
- assert(foster_parent_element->v.element.children.data[
776
- table_element->index_within_parent] == table_element);
777
- insert_node(parser, foster_parent_element,
778
- table_element->index_within_parent, node);
779
- return;
864
+ assert(index >= 0);
865
+ assert((unsigned int) index < children->length);
866
+ node->parent = parent;
867
+ node->index_within_parent = index;
868
+ gumbo_vector_insert_at(parser, (void*) node, index, children);
869
+ assert(node->index_within_parent < children->length);
870
+ for (unsigned int i = index + 1; i < children->length; ++i) {
871
+ GumboNode* sibling = children->data[i];
872
+ sibling->index_within_parent = i;
873
+ assert(sibling->index_within_parent < children->length);
780
874
  }
875
+ } else {
876
+ append_node(parser, parent, node);
781
877
  }
782
- if (node->type == GUMBO_NODE_ELEMENT) {
783
- gumbo_vector_add(parser, (void*) node, open_elements);
784
- }
785
- append_node(parser, foster_parent_element, node);
786
878
  }
787
879
 
788
880
  static void maybe_flush_text_node_buffer(GumboParser* parser) {
@@ -797,27 +889,27 @@ static void maybe_flush_text_node_buffer(GumboParser* parser) {
797
889
  buffer_state->_type == GUMBO_NODE_CDATA);
798
890
  GumboNode* text_node = create_node(parser, buffer_state->_type);
799
891
  GumboText* text_node_data = &text_node->v.text;
800
- text_node_data->text = gumbo_string_buffer_to_string(
801
- parser, &buffer_state->_buffer);
892
+ text_node_data->text =
893
+ gumbo_string_buffer_to_string(parser, &buffer_state->_buffer);
802
894
  text_node_data->original_text.data = buffer_state->_start_original_text;
803
895
  text_node_data->original_text.length =
804
896
  state->_current_token->original_text.data -
805
897
  buffer_state->_start_original_text;
806
898
  text_node_data->start_pos = buffer_state->_start_position;
807
- if (state->_foster_parent_insertions &&
808
- node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(TABLE), TAG(TBODY), TAG(TFOOT),
809
- TAG(THEAD), TAG(TR) })) {
810
- foster_parent_element(parser, text_node);
899
+
900
+ gumbo_debug("Flushing text node buffer of %.*s.\n",
901
+ (int) buffer_state->_buffer.length, buffer_state->_buffer.data);
902
+
903
+ InsertionLocation location = get_appropriate_insertion_location(parser, NULL);
904
+ if (location.target->type == GUMBO_NODE_DOCUMENT) {
905
+ // The DOM does not allow Document nodes to have Text children, so per the
906
+ // spec, they are dropped on the floor.
907
+ destroy_node(parser, text_node);
811
908
  } else {
812
- append_node(
813
- parser, parser->_output->root ?
814
- get_current_node(parser) : parser->_output->document, text_node);
909
+ insert_node(parser, text_node, location);
815
910
  }
816
- gumbo_debug("Flushing text node buffer of %.*s.\n",
817
- (int) buffer_state->_buffer.length, buffer_state->_buffer.data);
818
911
 
819
- gumbo_string_buffer_destroy(parser, &buffer_state->_buffer);
820
- gumbo_string_buffer_init(parser, &buffer_state->_buffer);
912
+ gumbo_string_buffer_clear(parser, &buffer_state->_buffer);
821
913
  buffer_state->_type = GUMBO_NODE_WHITESPACE;
822
914
  assert(buffer_state->_buffer.length == 0);
823
915
  }
@@ -825,9 +917,9 @@ static void maybe_flush_text_node_buffer(GumboParser* parser) {
825
917
  static void record_end_of_element(
826
918
  GumboToken* current_token, GumboElement* element) {
827
919
  element->end_pos = current_token->position;
828
- element->original_end_tag =
829
- current_token->type == GUMBO_TOKEN_END_TAG ?
830
- current_token->original_text : kGumboEmptyString;
920
+ element->original_end_tag = current_token->type == GUMBO_TOKEN_END_TAG
921
+ ? current_token->original_text
922
+ : kGumboEmptyString;
831
923
  }
832
924
 
833
925
  static GumboNode* pop_current_node(GumboParser* parser) {
@@ -835,8 +927,7 @@ static GumboNode* pop_current_node(GumboParser* parser) {
835
927
  maybe_flush_text_node_buffer(parser);
836
928
  if (state->_open_elements.length > 0) {
837
929
  assert(node_html_tag_is(state->_open_elements.data[0], GUMBO_TAG_HTML));
838
- gumbo_debug(
839
- "Popping %s node.\n",
930
+ gumbo_debug("Popping %s node.\n",
840
931
  gumbo_normalized_tagname(get_current_node(parser)->v.element.tag));
841
932
  }
842
933
  GumboNode* current_node = gumbo_vector_pop(parser, &state->_open_elements);
@@ -844,13 +935,16 @@ static GumboNode* pop_current_node(GumboParser* parser) {
844
935
  assert(state->_open_elements.length == 0);
845
936
  return NULL;
846
937
  }
847
- assert(current_node->type == GUMBO_NODE_ELEMENT);
938
+ assert(current_node->type == GUMBO_NODE_ELEMENT ||
939
+ current_node->type == GUMBO_NODE_TEMPLATE);
848
940
  bool is_closed_body_or_html_tag =
849
- (node_html_tag_is(current_node, GUMBO_TAG_BODY) && state->_closed_body_tag) ||
850
- (node_html_tag_is(current_node, GUMBO_TAG_HTML) && state->_closed_html_tag);
941
+ (node_html_tag_is(current_node, GUMBO_TAG_BODY) &&
942
+ state->_closed_body_tag) ||
943
+ (node_html_tag_is(current_node, GUMBO_TAG_HTML) &&
944
+ state->_closed_html_tag);
851
945
  if ((state->_current_token->type != GUMBO_TOKEN_END_TAG ||
852
- !node_html_tag_is(current_node, state->_current_token->v.end_tag)) &&
853
- !is_closed_body_or_html_tag) {
946
+ !node_html_tag_is(current_node, state->_current_token->v.end_tag)) &&
947
+ !is_closed_body_or_html_tag) {
854
948
  current_node->parse_flags |= GUMBO_INSERTION_IMPLICIT_END_TAG;
855
949
  }
856
950
  if (!is_closed_body_or_html_tag) {
@@ -873,22 +967,25 @@ static void append_comment_node(
873
967
 
874
968
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#clear-the-stack-back-to-a-table-row-context
875
969
  static void clear_stack_to_table_row_context(GumboParser* parser) {
876
- while (!node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(HTML), TAG(TR) })) {
970
+ while (!node_tag_in_set(get_current_node(parser),
971
+ (gumbo_tagset){TAG(HTML), TAG(TR), TAG(TEMPLATE)})) {
877
972
  pop_current_node(parser);
878
973
  }
879
974
  }
880
975
 
881
976
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#clear-the-stack-back-to-a-table-context
882
977
  static void clear_stack_to_table_context(GumboParser* parser) {
883
- while (!node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(HTML), TAG(TABLE) } )) {
978
+ while (!node_tag_in_set(get_current_node(parser),
979
+ (gumbo_tagset){TAG(HTML), TAG(TABLE), TAG(TEMPLATE)})) {
884
980
  pop_current_node(parser);
885
981
  }
886
982
  }
887
983
 
888
984
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#clear-the-stack-back-to-a-table-body-context
889
985
  void clear_stack_to_table_body_context(GumboParser* parser) {
890
- while (!node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(HTML), TAG(TBODY),
891
- TAG(TFOOT), TAG(THEAD) })) {
986
+ while (!node_tag_in_set(get_current_node(parser),
987
+ (gumbo_tagset){TAG(HTML), TAG(TBODY), TAG(TFOOT), TAG(THEAD),
988
+ TAG(TEMPLATE)})) {
892
989
  pop_current_node(parser);
893
990
  }
894
991
  }
@@ -903,7 +1000,9 @@ static GumboNode* create_element(GumboParser* parser, GumboTag tag) {
903
1000
  element->tag_namespace = GUMBO_NAMESPACE_HTML;
904
1001
  element->original_tag = kGumboEmptyString;
905
1002
  element->original_end_tag = kGumboEmptyString;
906
- element->start_pos = parser->_parser_state->_current_token->position;
1003
+ element->start_pos = (parser->_parser_state->_current_token)
1004
+ ? parser->_parser_state->_current_token->position
1005
+ : kGumboEmptySourcePosition;
907
1006
  element->end_pos = kGumboEmptySourcePosition;
908
1007
  return node;
909
1008
  }
@@ -914,7 +1013,12 @@ static GumboNode* create_element_from_token(
914
1013
  assert(token->type == GUMBO_TOKEN_START_TAG);
915
1014
  GumboTokenStartTag* start_tag = &token->v.start_tag;
916
1015
 
917
- GumboNode* node = create_node(parser, GUMBO_NODE_ELEMENT);
1016
+ GumboNodeType type = (tag_namespace == GUMBO_NAMESPACE_HTML &&
1017
+ start_tag->tag == GUMBO_TAG_TEMPLATE)
1018
+ ? GUMBO_NODE_TEMPLATE
1019
+ : GUMBO_NODE_ELEMENT;
1020
+
1021
+ GumboNode* node = create_node(parser, type);
918
1022
  GumboElement* element = &node->v.element;
919
1023
  gumbo_vector_init(parser, 1, &element->children);
920
1024
  element->attributes = start_tag->attributes;
@@ -937,7 +1041,7 @@ static GumboNode* create_element_from_token(
937
1041
 
938
1042
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#insert-an-html-element
939
1043
  static void insert_element(GumboParser* parser, GumboNode* node,
940
- bool is_reconstructing_formatting_elements) {
1044
+ bool is_reconstructing_formatting_elements) {
941
1045
  GumboParserState* state = parser->_parser_state;
942
1046
  // NOTE(jdtang): The text node buffer must always be flushed before inserting
943
1047
  // a node, otherwise we're handling nodes in a different order than the spec
@@ -951,20 +1055,8 @@ static void insert_element(GumboParser* parser, GumboNode* node,
951
1055
  if (!is_reconstructing_formatting_elements) {
952
1056
  maybe_flush_text_node_buffer(parser);
953
1057
  }
954
- if (state->_foster_parent_insertions &&
955
- node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(TABLE), TAG(TBODY), TAG(TFOOT),
956
- TAG(THEAD), TAG(TR) } )) {
957
- foster_parent_element(parser, node);
958
- gumbo_vector_add(parser, (void*) node, &state->_open_elements);
959
- return;
960
- }
961
-
962
- // This is called to insert the root HTML element, but get_current_node
963
- // assumes the stack of open elements is non-empty, so we need special
964
- // handling for this case.
965
- append_node(
966
- parser, parser->_output->root ?
967
- get_current_node(parser) : parser->_output->document, node);
1058
+ InsertionLocation location = get_appropriate_insertion_location(parser, NULL);
1059
+ insert_node(parser, node, location);
968
1060
  gumbo_vector_add(parser, (void*) node, &state->_open_elements);
969
1061
  }
970
1062
 
@@ -977,7 +1069,7 @@ static GumboNode* insert_element_from_token(
977
1069
  create_element_from_token(parser, token, GUMBO_NAMESPACE_HTML);
978
1070
  insert_element(parser, element, false);
979
1071
  gumbo_debug("Inserting <%s> element (@%x) from token.\n",
980
- gumbo_normalized_tagname(element->v.element.tag), element);
1072
+ gumbo_normalized_tagname(element->v.element.tag), element);
981
1073
  return element;
982
1074
  }
983
1075
 
@@ -990,7 +1082,7 @@ static GumboNode* insert_element_of_tag_type(
990
1082
  element->parse_flags |= GUMBO_INSERTION_BY_PARSER | reason;
991
1083
  insert_element(parser, element, false);
992
1084
  gumbo_debug("Inserting %s element (@%x) from tag type.\n",
993
- gumbo_normalized_tagname(tag), element);
1085
+ gumbo_normalized_tagname(tag), element);
994
1086
  return element;
995
1087
  }
996
1088
 
@@ -1002,16 +1094,14 @@ static GumboNode* insert_foreign_element(
1002
1094
  GumboNode* element = create_element_from_token(parser, token, tag_namespace);
1003
1095
  insert_element(parser, element, false);
1004
1096
  if (token_has_attribute(token, "xmlns") &&
1005
- !attribute_matches_case_sensitive(
1006
- &token->v.start_tag.attributes, "xmlns",
1097
+ !attribute_matches_case_sensitive(&token->v.start_tag.attributes, "xmlns",
1007
1098
  kLegalXmlns[tag_namespace])) {
1008
1099
  // TODO(jdtang): Since there're multiple possible error codes here, we
1009
1100
  // eventually need reason codes to differentiate them.
1010
1101
  parser_add_parse_error(parser, token);
1011
1102
  }
1012
1103
  if (token_has_attribute(token, "xmlns:xlink") &&
1013
- !attribute_matches_case_sensitive(
1014
- &token->v.start_tag.attributes,
1104
+ !attribute_matches_case_sensitive(&token->v.start_tag.attributes,
1015
1105
  "xmlns:xlink", "http://www.w3.org/1999/xlink")) {
1016
1106
  parser_add_parse_error(parser, token);
1017
1107
  }
@@ -1021,8 +1111,7 @@ static GumboNode* insert_foreign_element(
1021
1111
  static void insert_text_token(GumboParser* parser, GumboToken* token) {
1022
1112
  assert(token->type == GUMBO_TOKEN_WHITESPACE ||
1023
1113
  token->type == GUMBO_TOKEN_CHARACTER ||
1024
- token->type == GUMBO_TOKEN_NULL ||
1025
- token->type == GUMBO_TOKEN_CDATA);
1114
+ token->type == GUMBO_TOKEN_NULL || token->type == GUMBO_TOKEN_CDATA);
1026
1115
  TextNodeBufferState* buffer_state = &parser->_parser_state->_text_node;
1027
1116
  if (buffer_state->_buffer.length == 0) {
1028
1117
  // Initialize position fields.
@@ -1057,7 +1146,7 @@ static void acknowledge_self_closing_tag(GumboParser* parser) {
1057
1146
  // elements, and fills in its index if so.
1058
1147
  static bool find_last_anchor_index(GumboParser* parser, int* anchor_index) {
1059
1148
  GumboVector* elements = &parser->_parser_state->_active_formatting_elements;
1060
- for (int i = elements->length; --i >= 0; ) {
1149
+ for (int i = elements->length; --i >= 0;) {
1061
1150
  GumboNode* node = elements->data[i];
1062
1151
  if (node == &kActiveFormattingScopeMarker) {
1063
1152
  return false;
@@ -1074,21 +1163,21 @@ static bool find_last_anchor_index(GumboParser* parser, int* anchor_index) {
1074
1163
  // formatting elements (after the last active scope marker) that have a specific
1075
1164
  // tag. If this is > 0, then earliest_matching_index will be filled in with the
1076
1165
  // index of the first such element.
1077
- static int count_formatting_elements_of_tag(
1078
- GumboParser* parser, const GumboNode* desired_node,
1079
- int* earliest_matching_index) {
1166
+ static int count_formatting_elements_of_tag(GumboParser* parser,
1167
+ const GumboNode* desired_node, int* earliest_matching_index) {
1080
1168
  const GumboElement* desired_element = &desired_node->v.element;
1081
1169
  GumboVector* elements = &parser->_parser_state->_active_formatting_elements;
1082
1170
  int num_identical_elements = 0;
1083
- for (int i = elements->length; --i >= 0; ) {
1171
+ for (int i = elements->length; --i >= 0;) {
1084
1172
  GumboNode* node = elements->data[i];
1085
1173
  if (node == &kActiveFormattingScopeMarker) {
1086
1174
  break;
1087
1175
  }
1088
1176
  assert(node->type == GUMBO_NODE_ELEMENT);
1089
- if (node_qualified_tag_is(node, desired_element->tag_namespace, desired_element->tag) &&
1090
- all_attributes_match(&node->v.element.attributes,
1091
- &desired_element->attributes)) {
1177
+ if (node_qualified_tag_is(
1178
+ node, desired_element->tag_namespace, desired_element->tag) &&
1179
+ all_attributes_match(
1180
+ &node->v.element.attributes, &desired_element->attributes)) {
1092
1181
  num_identical_elements++;
1093
1182
  *earliest_matching_index = i;
1094
1183
  }
@@ -1115,7 +1204,7 @@ static void add_formatting_element(GumboParser* parser, const GumboNode* node) {
1115
1204
  // Noah's Ark clause: if there're at least 3, remove the earliest.
1116
1205
  if (num_identical_elements >= 3) {
1117
1206
  gumbo_debug("Noah's ark clause: removing element at %d.\n",
1118
- earliest_identical_element);
1207
+ earliest_identical_element);
1119
1208
  gumbo_vector_remove_at(parser, earliest_identical_element, elements);
1120
1209
  }
1121
1210
 
@@ -1124,7 +1213,7 @@ static void add_formatting_element(GumboParser* parser, const GumboNode* node) {
1124
1213
 
1125
1214
  static bool is_open_element(GumboParser* parser, const GumboNode* node) {
1126
1215
  GumboVector* open_elements = &parser->_parser_state->_open_elements;
1127
- for (int i = 0; i < open_elements->length; ++i) {
1216
+ for (unsigned int i = 0; i < open_elements->length; ++i) {
1128
1217
  if (open_elements->data[i] == node) {
1129
1218
  return true;
1130
1219
  }
@@ -1136,8 +1225,8 @@ static bool is_open_element(GumboParser* parser, const GumboNode* node) {
1136
1225
  // clone shares no structure with the original node: all owned strings and
1137
1226
  // values are fresh copies.
1138
1227
  GumboNode* clone_node(
1139
- GumboParser* parser, const GumboNode* node, GumboParseFlags reason) {
1140
- assert(node->type == GUMBO_NODE_ELEMENT);
1228
+ GumboParser* parser, GumboNode* node, GumboParseFlags reason) {
1229
+ assert(node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE);
1141
1230
  GumboNode* new_node = gumbo_parser_allocate(parser, sizeof(GumboNode));
1142
1231
  *new_node = *node;
1143
1232
  new_node->parent = NULL;
@@ -1151,7 +1240,7 @@ GumboNode* clone_node(
1151
1240
 
1152
1241
  const GumboVector* old_attributes = &node->v.element.attributes;
1153
1242
  gumbo_vector_init(parser, old_attributes->length, &element->attributes);
1154
- for (int i = 0; i < old_attributes->length; ++i) {
1243
+ for (unsigned int i = 0; i < old_attributes->length; ++i) {
1155
1244
  const GumboAttribute* old_attr = old_attributes->data[i];
1156
1245
  GumboAttribute* attr =
1157
1246
  gumbo_parser_allocate(parser, sizeof(GumboAttribute));
@@ -1175,8 +1264,8 @@ static void reconstruct_active_formatting_elements(GumboParser* parser) {
1175
1264
  }
1176
1265
 
1177
1266
  // Step 2 & 3
1178
- int i = elements->length - 1;
1179
- const GumboNode* element = elements->data[i];
1267
+ unsigned int i = elements->length - 1;
1268
+ GumboNode* element = elements->data[i];
1180
1269
  if (element == &kActiveFormattingScopeMarker ||
1181
1270
  is_open_element(parser, element)) {
1182
1271
  return;
@@ -1186,7 +1275,7 @@ static void reconstruct_active_formatting_elements(GumboParser* parser) {
1186
1275
  do {
1187
1276
  if (i == 0) {
1188
1277
  // Step 4
1189
- i = -1; // Incremented to 0 below.
1278
+ i = -1; // Incremented to 0 below.
1190
1279
  break;
1191
1280
  }
1192
1281
  // Step 5
@@ -1196,9 +1285,8 @@ static void reconstruct_active_formatting_elements(GumboParser* parser) {
1196
1285
 
1197
1286
  ++i;
1198
1287
  gumbo_debug("Reconstructing elements from %d on %s parent.\n", i,
1199
- gumbo_normalized_tagname(
1200
- get_current_node(parser)->v.element.tag));
1201
- for(; i < elements->length; ++i) {
1288
+ gumbo_normalized_tagname(get_current_node(parser)->v.element.tag));
1289
+ for (; i < elements->length; ++i) {
1202
1290
  // Step 7 & 8.
1203
1291
  assert(elements->length > 0);
1204
1292
  assert(i < elements->length);
@@ -1207,11 +1295,16 @@ static void reconstruct_active_formatting_elements(GumboParser* parser) {
1207
1295
  GumboNode* clone = clone_node(
1208
1296
  parser, element, GUMBO_INSERTION_RECONSTRUCTED_FORMATTING_ELEMENT);
1209
1297
  // Step 9.
1210
- insert_element(parser, clone, true);
1298
+ InsertionLocation location =
1299
+ get_appropriate_insertion_location(parser, NULL);
1300
+ insert_node(parser, clone, location);
1301
+ gumbo_vector_add(
1302
+ parser, (void*) clone, &parser->_parser_state->_open_elements);
1303
+
1211
1304
  // Step 10.
1212
1305
  elements->data[i] = clone;
1213
1306
  gumbo_debug("Reconstructed %s element at %d.\n",
1214
- gumbo_normalized_tagname(clone->v.element.tag), i);
1307
+ gumbo_normalized_tagname(clone->v.element.tag), i);
1215
1308
  }
1216
1309
  }
1217
1310
 
@@ -1222,32 +1315,30 @@ static void clear_active_formatting_elements(GumboParser* parser) {
1222
1315
  do {
1223
1316
  node = gumbo_vector_pop(parser, elements);
1224
1317
  ++num_elements_cleared;
1225
- } while(node && node != &kActiveFormattingScopeMarker);
1318
+ } while (node && node != &kActiveFormattingScopeMarker);
1226
1319
  gumbo_debug("Cleared %d elements from active formatting list.\n",
1227
- num_elements_cleared);
1320
+ num_elements_cleared);
1228
1321
  }
1229
1322
 
1230
1323
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#the-initial-insertion-mode
1231
1324
  static GumboQuirksModeEnum compute_quirks_mode(
1232
1325
  const GumboTokenDocType* doctype) {
1233
- if (doctype->force_quirks ||
1234
- strcmp(doctype->name, kDoctypeHtml.data) ||
1235
- is_in_static_list(doctype->public_identifier,
1236
- kQuirksModePublicIdPrefixes, false) ||
1237
- is_in_static_list(doctype->public_identifier,
1238
- kQuirksModePublicIdExactMatches, true) ||
1239
- is_in_static_list(doctype->system_identifier,
1240
- kQuirksModeSystemIdExactMatches, true) ||
1326
+ if (doctype->force_quirks || strcmp(doctype->name, kDoctypeHtml.data) ||
1327
+ is_in_static_list(
1328
+ doctype->public_identifier, kQuirksModePublicIdPrefixes, false) ||
1329
+ is_in_static_list(
1330
+ doctype->public_identifier, kQuirksModePublicIdExactMatches, true) ||
1331
+ is_in_static_list(
1332
+ doctype->system_identifier, kQuirksModeSystemIdExactMatches, true) ||
1241
1333
  (is_in_static_list(doctype->public_identifier,
1242
- kLimitedQuirksRequiresSystemIdPublicIdPrefixes, false)
1243
- && !doctype->has_system_identifier)) {
1334
+ kLimitedQuirksRequiresSystemIdPublicIdPrefixes, false) &&
1335
+ !doctype->has_system_identifier)) {
1244
1336
  return GUMBO_DOCTYPE_QUIRKS;
1245
- } else if (
1246
- is_in_static_list(doctype->public_identifier,
1247
- kLimitedQuirksPublicIdPrefixes, false) ||
1248
- (is_in_static_list(doctype->public_identifier,
1249
- kLimitedQuirksRequiresSystemIdPublicIdPrefixes, false)
1250
- && doctype->has_system_identifier)) {
1337
+ } else if (is_in_static_list(doctype->public_identifier,
1338
+ kLimitedQuirksPublicIdPrefixes, false) ||
1339
+ (is_in_static_list(doctype->public_identifier,
1340
+ kLimitedQuirksRequiresSystemIdPublicIdPrefixes, false) &&
1341
+ doctype->has_system_identifier)) {
1251
1342
  return GUMBO_DOCTYPE_LIMITED_QUIRKS;
1252
1343
  }
1253
1344
  return GUMBO_DOCTYPE_NO_QUIRKS;
@@ -1261,39 +1352,45 @@ static GumboQuirksModeEnum compute_quirks_mode(
1261
1352
  // names. For example, "has an element in list scope" looks for an element of
1262
1353
  // the given qualified name within the nearest enclosing <ol> or <ul>, along
1263
1354
  // with a bunch of generic element types that serve to "firewall" their content
1264
- // from the rest of the document.
1265
- static bool has_an_element_in_specific_scope(GumboParser* parser, gumbo_tagset expected, bool negate, const gumbo_tagset tags) {
1355
+ // from the rest of the document. Note that because of the way the spec is
1356
+ // written,
1357
+ // all elements are expected to be in the HTML namespace
1358
+ static bool has_an_element_in_specific_scope(GumboParser* parser,
1359
+ int expected_size, const GumboTag* expected, bool negate,
1360
+ const gumbo_tagset tags) {
1266
1361
  GumboVector* open_elements = &parser->_parser_state->_open_elements;
1267
- bool result = false;
1268
- for (int i = open_elements->length; --i >= 0; ) {
1362
+ for (int i = open_elements->length; --i >= 0;) {
1269
1363
  const GumboNode* node = open_elements->data[i];
1270
- if (node->type != GUMBO_NODE_ELEMENT) {
1364
+ if (node->type != GUMBO_NODE_ELEMENT && node->type != GUMBO_NODE_TEMPLATE)
1271
1365
  continue;
1366
+
1367
+ GumboTag node_tag = node->v.element.tag;
1368
+ GumboNamespaceEnum node_ns = node->v.element.tag_namespace;
1369
+ for (int j = 0; j < expected_size; ++j) {
1370
+ if (node_tag == expected[j] && node_ns == GUMBO_NAMESPACE_HTML)
1371
+ return true;
1272
1372
  }
1273
- if (TAGSET_INCLUDES(expected, node->v.element.tag_namespace, node->v.element.tag)) {
1274
- return true;
1275
- }
1276
- bool found_qualname = false;
1277
- if (TAGSET_INCLUDES(tags, node->v.element.tag_namespace, node->v.element.tag)) {
1278
- found_qualname = true;
1279
- }
1280
- if (negate != found_qualname) {
1281
- result = false;
1282
- return result;
1283
- }
1373
+
1374
+ bool found = TAGSET_INCLUDES(tags, node_ns, node_tag);
1375
+ if (negate != found) return false;
1284
1376
  }
1285
- return result;
1377
+ return false;
1378
+ }
1379
+
1380
+ // Checks for the presence of an open element of the specified tag type.
1381
+ static bool has_open_element(GumboParser* parser, GumboTag tag) {
1382
+ return has_an_element_in_specific_scope(
1383
+ parser, 1, &tag, false, (gumbo_tagset){TAG(HTML)});
1286
1384
  }
1287
1385
 
1288
1386
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-scope
1289
1387
  static bool has_an_element_in_scope(GumboParser* parser, GumboTag tag) {
1290
- gumbo_tagset qualset = {0};
1291
- qualset[(int) tag] = (1 << (int) GUMBO_NAMESPACE_HTML);
1292
- return has_an_element_in_specific_scope(parser, qualset, false, (gumbo_tagset) { TAG(APPLET),
1293
- TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD), TAG(TH), TAG(MARQUEE),
1294
- TAG(OBJECT), TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
1295
- TAG_MATHML(MS), TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
1296
- TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC), TAG_SVG(TITLE) });
1388
+ return has_an_element_in_specific_scope(parser, 1, &tag, false,
1389
+ (gumbo_tagset){TAG(APPLET), TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD),
1390
+ TAG(TH), TAG(MARQUEE), TAG(OBJECT), TAG(TEMPLATE), TAG_MATHML(MI),
1391
+ TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS), TAG_MATHML(MTEXT),
1392
+ TAG_MATHML(ANNOTATION_XML), TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC),
1393
+ TAG_SVG(TITLE)});
1297
1394
  }
1298
1395
 
1299
1396
  // Like "has an element in scope", but for the specific case of looking for a
@@ -1304,19 +1401,21 @@ static bool has_an_element_in_scope(GumboParser* parser, GumboTag tag) {
1304
1401
  // parameterize it.
1305
1402
  static bool has_node_in_scope(GumboParser* parser, const GumboNode* node) {
1306
1403
  GumboVector* open_elements = &parser->_parser_state->_open_elements;
1307
- for (int i = open_elements->length; --i >= 0; ) {
1404
+ for (int i = open_elements->length; --i >= 0;) {
1308
1405
  const GumboNode* current = open_elements->data[i];
1309
1406
  if (current == node) {
1310
1407
  return true;
1311
1408
  }
1312
- if (current->type != GUMBO_NODE_ELEMENT) {
1409
+ if (current->type != GUMBO_NODE_ELEMENT &&
1410
+ current->type != GUMBO_NODE_TEMPLATE) {
1313
1411
  continue;
1314
1412
  }
1315
- if (node_tag_in_set(current, (gumbo_tagset) { TAG(APPLET), TAG(CAPTION), TAG(HTML),
1316
- TAG(TABLE), TAG(TD), TAG(TH), TAG(MARQUEE), TAG(OBJECT),
1317
- TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS),
1318
- TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML), TAG_SVG(FOREIGNOBJECT),
1319
- TAG_SVG(DESC), TAG_SVG(TITLE) } )) {
1413
+ if (node_tag_in_set(current,
1414
+ (gumbo_tagset){TAG(APPLET), TAG(CAPTION), TAG(HTML), TAG(TABLE),
1415
+ TAG(TD), TAG(TH), TAG(MARQUEE), TAG(OBJECT), TAG(TEMPLATE),
1416
+ TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS),
1417
+ TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
1418
+ TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC), TAG_SVG(TITLE)})) {
1320
1419
  return false;
1321
1420
  }
1322
1421
  }
@@ -1326,60 +1425,70 @@ static bool has_node_in_scope(GumboParser* parser, const GumboNode* node) {
1326
1425
 
1327
1426
  // Like has_an_element_in_scope, but restricts the expected qualified name to a
1328
1427
  // range of possible qualified names instead of just a single one.
1329
- static bool has_an_element_in_scope_with_tagname(GumboParser* parser, gumbo_tagset qualset) {
1330
- return has_an_element_in_specific_scope(parser, qualset, false, (gumbo_tagset) { TAG(APPLET),
1331
- TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD), TAG(TH), TAG(MARQUEE),
1332
- TAG(OBJECT), TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
1333
- TAG_MATHML(MS), TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
1334
- TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC), TAG_SVG(TITLE) });
1428
+ static bool has_an_element_in_scope_with_tagname(
1429
+ GumboParser* parser, int expected_len, const GumboTag expected[]) {
1430
+ return has_an_element_in_specific_scope(parser, expected_len, expected, false,
1431
+ (gumbo_tagset){TAG(APPLET), TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD),
1432
+ TAG(TH), TAG(MARQUEE), TAG(OBJECT), TAG(TEMPLATE), TAG_MATHML(MI),
1433
+ TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS), TAG_MATHML(MTEXT),
1434
+ TAG_MATHML(ANNOTATION_XML), TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC),
1435
+ TAG_SVG(TITLE)});
1335
1436
  }
1336
1437
 
1337
1438
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-list-item-scope
1338
1439
  static bool has_an_element_in_list_scope(GumboParser* parser, GumboTag tag) {
1339
- gumbo_tagset qualset = {0};
1340
- qualset[(int)tag] = (1 << (int)(GUMBO_NAMESPACE_HTML));
1341
- return has_an_element_in_specific_scope(parser, qualset, false, (gumbo_tagset) { TAG(APPLET),
1342
- TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD), TAG(TH), TAG(MARQUEE),
1343
- TAG(OBJECT), TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
1344
- TAG_MATHML(MS), TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
1345
- TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC), TAG_SVG(TITLE), TAG(OL),
1346
- TAG(UL) });
1440
+ return has_an_element_in_specific_scope(parser, 1, &tag, false,
1441
+ (gumbo_tagset){TAG(APPLET), TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD),
1442
+ TAG(TH), TAG(MARQUEE), TAG(OBJECT), TAG(TEMPLATE), TAG_MATHML(MI),
1443
+ TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS), TAG_MATHML(MTEXT),
1444
+ TAG_MATHML(ANNOTATION_XML), TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC),
1445
+ TAG_SVG(TITLE), TAG(OL), TAG(UL)});
1347
1446
  }
1348
1447
 
1349
1448
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-button-scope
1350
1449
  static bool has_an_element_in_button_scope(GumboParser* parser, GumboTag tag) {
1351
- gumbo_tagset qualset = {0};
1352
- qualset[(int) tag] = (1 << (int)(GUMBO_NAMESPACE_HTML));
1353
- return has_an_element_in_specific_scope(parser, qualset, false, (gumbo_tagset) { TAG(APPLET),
1354
- TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD), TAG(TH), TAG(MARQUEE),
1355
- TAG(OBJECT), TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
1356
- TAG_MATHML(MS), TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
1357
- TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC), TAG_SVG(TITLE), TAG(BUTTON) });
1450
+ return has_an_element_in_specific_scope(parser, 1, &tag, false,
1451
+ (gumbo_tagset){TAG(APPLET), TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD),
1452
+ TAG(TH), TAG(MARQUEE), TAG(OBJECT), TAG(TEMPLATE), TAG_MATHML(MI),
1453
+ TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS), TAG_MATHML(MTEXT),
1454
+ TAG_MATHML(ANNOTATION_XML), TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC),
1455
+ TAG_SVG(TITLE), TAG(BUTTON)});
1358
1456
  }
1359
1457
 
1360
1458
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-table-scope
1361
1459
  static bool has_an_element_in_table_scope(GumboParser* parser, GumboTag tag) {
1362
- gumbo_tagset qualset = {0};
1363
- qualset[(int) tag] = (1 << (int)(GUMBO_NAMESPACE_HTML));
1364
- return has_an_element_in_specific_scope(parser, qualset, false, (gumbo_tagset) { TAG(HTML), TAG(TABLE) });
1460
+ return has_an_element_in_specific_scope(parser, 1, &tag, false,
1461
+ (gumbo_tagset){TAG(HTML), TAG(TABLE), TAG(TEMPLATE)});
1365
1462
  }
1366
1463
 
1367
1464
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-select-scope
1368
1465
  static bool has_an_element_in_select_scope(GumboParser* parser, GumboTag tag) {
1369
- gumbo_tagset qualset = {0};
1370
- qualset[(int) tag] = (1 << (int)(GUMBO_NAMESPACE_HTML));
1371
- return has_an_element_in_specific_scope(parser, qualset, true, (gumbo_tagset) { TAG(OPTGROUP), TAG(OPTION) });
1466
+ return has_an_element_in_specific_scope(
1467
+ parser, 1, &tag, true, (gumbo_tagset){TAG(OPTGROUP), TAG(OPTION)});
1372
1468
  }
1373
1469
 
1374
1470
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#generate-implied-end-tags
1375
1471
  // "exception" is the "element to exclude from the process" listed in the spec.
1376
1472
  // Pass GUMBO_TAG_LAST to not exclude any of them.
1377
1473
  static void generate_implied_end_tags(GumboParser* parser, GumboTag exception) {
1378
- for (;
1379
- node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(DD), TAG(DT),
1380
- TAG(LI), TAG(OPTION), TAG(OPTGROUP), TAG(P), TAG(RP), TAG(RT) }) &&
1381
- !node_html_tag_is(get_current_node(parser), exception);
1382
- pop_current_node(parser));
1474
+ for (; node_tag_in_set(get_current_node(parser),
1475
+ (gumbo_tagset){TAG(DD), TAG(DT), TAG(LI), TAG(OPTION),
1476
+ TAG(OPTGROUP), TAG(P), TAG(RP), TAG(RB), TAG(RT), TAG(RTC)}) &&
1477
+ !node_html_tag_is(get_current_node(parser), exception);
1478
+ pop_current_node(parser))
1479
+ ;
1480
+ }
1481
+
1482
+ // This is the "generate all implied end tags thoroughly" clause of the spec.
1483
+ // https://html.spec.whatwg.org/multipage/syntax.html#closing-elements-that-have-implied-end-tags
1484
+ static void generate_all_implied_end_tags_thoroughly(GumboParser* parser) {
1485
+ for (
1486
+ ; node_tag_in_set(get_current_node(parser),
1487
+ (gumbo_tagset){TAG(CAPTION), TAG(COLGROUP), TAG(DD), TAG(DT), TAG(LI),
1488
+ TAG(OPTION), TAG(OPTGROUP), TAG(P), TAG(RP), TAG(RT), TAG(RTC),
1489
+ TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(HEAD), TAG(TR)});
1490
+ pop_current_node(parser))
1491
+ ;
1383
1492
  }
1384
1493
 
1385
1494
  // This factors out the clauses relating to "act as if an end tag token with tag
@@ -1401,8 +1510,8 @@ static bool close_table(GumboParser* parser) {
1401
1510
 
1402
1511
  // This factors out the clauses relating to "act as if an end tag token with tag
1403
1512
  // name `cell_tag` had been seen".
1404
- static bool close_table_cell(GumboParser* parser, const GumboToken* token,
1405
- GumboTag cell_tag) {
1513
+ static bool close_table_cell(
1514
+ GumboParser* parser, const GumboToken* token, GumboTag cell_tag) {
1406
1515
  bool result = true;
1407
1516
  generate_implied_end_tags(parser, GUMBO_TAG_LAST);
1408
1517
  const GumboNode* node = get_current_node(parser);
@@ -1446,38 +1555,43 @@ static void close_current_select(GumboParser* parser) {
1446
1555
  // The list of nodes in the "special" category:
1447
1556
  // http://www.whatwg.org/specs/web-apps/current-work/complete/parsing.html#special
1448
1557
  static bool is_special_node(const GumboNode* node) {
1449
- assert(node->type == GUMBO_NODE_ELEMENT);
1450
- return node_tag_in_set(node, (gumbo_tagset) { TAG(ADDRESS), TAG(APPLET), TAG(AREA),
1451
- TAG(ARTICLE), TAG(ASIDE), TAG(BASE), TAG(BASEFONT), TAG(BGSOUND), TAG(BLOCKQUOTE),
1452
- TAG(BODY), TAG(BR), TAG(BUTTON), TAG(CAPTION), TAG(CENTER), TAG(COL),
1453
- TAG(COLGROUP), TAG(MENUITEM), TAG(DD), TAG(DETAILS), TAG(DIR), TAG(DIV), TAG(DL),
1454
- TAG(DT), TAG(EMBED), TAG(FIELDSET), TAG(FIGCAPTION), TAG(FIGURE), TAG(FOOTER),
1455
- TAG(FORM), TAG(FRAME), TAG(FRAMESET), TAG(H1), TAG(H2), TAG(H3), TAG(H4),
1456
- TAG(H5), TAG(H6), TAG(HEAD), TAG(HEADER), TAG(HGROUP), TAG(HR), TAG(HTML),
1457
- TAG(IFRAME), TAG(IMG), TAG(INPUT), TAG(ISINDEX), TAG(LI), TAG(LINK),
1458
- TAG(LISTING), TAG(MARQUEE), TAG(MENU), TAG(META), TAG(NAV), TAG(NOEMBED),
1459
- TAG(NOFRAMES), TAG(NOSCRIPT), TAG(OBJECT), TAG(OL), TAG(P), TAG(PARAM),
1460
- TAG(PLAINTEXT), TAG(PRE), TAG(SCRIPT), TAG(SECTION), TAG(SELECT), TAG(STYLE),
1461
- TAG(SUMMARY), TAG(TABLE), TAG(TBODY), TAG(TD), TAG(TEXTAREA), TAG(TFOOT),
1462
- TAG(TH), TAG(THEAD), TAG(TITLE), TAG(TR), TAG(UL), TAG(WBR), TAG(XMP),
1463
-
1464
- TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS),
1465
- TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
1466
-
1467
- TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC) });
1468
- }
1469
-
1470
- // Implicitly closes currently open elements until it reaches an element with the
1558
+ assert(node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE);
1559
+ return node_tag_in_set(node,
1560
+ (gumbo_tagset){TAG(ADDRESS), TAG(APPLET), TAG(AREA), TAG(ARTICLE),
1561
+ TAG(ASIDE), TAG(BASE), TAG(BASEFONT), TAG(BGSOUND), TAG(BLOCKQUOTE),
1562
+ TAG(BODY), TAG(BR), TAG(BUTTON), TAG(CAPTION), TAG(CENTER), TAG(COL),
1563
+ TAG(COLGROUP), TAG(MENUITEM), TAG(DD), TAG(DETAILS), TAG(DIR),
1564
+ TAG(DIV), TAG(DL), TAG(DT), TAG(EMBED), TAG(FIELDSET),
1565
+ TAG(FIGCAPTION), TAG(FIGURE), TAG(FOOTER), TAG(FORM), TAG(FRAME),
1566
+ TAG(FRAMESET), TAG(H1), TAG(H2), TAG(H3), TAG(H4), TAG(H5), TAG(H6),
1567
+ TAG(HEAD), TAG(HEADER), TAG(HGROUP), TAG(HR), TAG(HTML), TAG(IFRAME),
1568
+ TAG(IMG), TAG(INPUT), TAG(ISINDEX), TAG(LI), TAG(LINK), TAG(LISTING),
1569
+ TAG(MARQUEE), TAG(MENU), TAG(META), TAG(NAV), TAG(NOEMBED),
1570
+ TAG(NOFRAMES), TAG(NOSCRIPT), TAG(OBJECT), TAG(OL), TAG(P),
1571
+ TAG(PARAM), TAG(PLAINTEXT), TAG(PRE), TAG(SCRIPT), TAG(SECTION),
1572
+ TAG(SELECT), TAG(STYLE), TAG(SUMMARY), TAG(TABLE), TAG(TBODY),
1573
+ TAG(TD), TAG(TEMPLATE), TAG(TEXTAREA), TAG(TFOOT), TAG(TH),
1574
+ TAG(THEAD), TAG(TITLE), TAG(TR), TAG(UL), TAG(WBR), TAG(XMP),
1575
+
1576
+ TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS),
1577
+ TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
1578
+
1579
+ TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC)});
1580
+ }
1581
+
1582
+ // Implicitly closes currently open elements until it reaches an element with
1583
+ // the
1471
1584
  // specified qualified name. If the elements closed are in the set handled by
1472
1585
  // generate_implied_end_tags, this is normal operation and this function returns
1473
1586
  // true. Otherwise, a parse error is recorded and this function returns false.
1474
- static bool implicitly_close_tags(
1475
- GumboParser* parser, GumboToken* token, GumboNamespaceEnum target_ns, GumboTag target) {
1587
+ static bool implicitly_close_tags(GumboParser* parser, GumboToken* token,
1588
+ GumboNamespaceEnum target_ns, GumboTag target) {
1476
1589
  bool result = true;
1477
1590
  generate_implied_end_tags(parser, target);
1478
1591
  if (!node_qualified_tag_is(get_current_node(parser), target_ns, target)) {
1479
1592
  parser_add_parse_error(parser, token);
1480
- while (!node_qualified_tag_is(get_current_node(parser), target_ns, target)) {
1593
+ while (
1594
+ !node_qualified_tag_is(get_current_node(parser), target_ns, target)) {
1481
1595
  pop_current_node(parser);
1482
1596
  }
1483
1597
  result = false;
@@ -1491,9 +1605,11 @@ static bool implicitly_close_tags(
1491
1605
  // a </p> tag was encountered, implicitly closing tags. Returns false if a
1492
1606
  // parse error occurs. This is a convenience function because this particular
1493
1607
  // clause appears several times in the spec.
1494
- static bool maybe_implicitly_close_p_tag(GumboParser* parser, GumboToken* token) {
1608
+ static bool maybe_implicitly_close_p_tag(
1609
+ GumboParser* parser, GumboToken* token) {
1495
1610
  if (has_an_element_in_button_scope(parser, GUMBO_TAG_P)) {
1496
- return implicitly_close_tags(parser, token, GUMBO_NAMESPACE_HTML, GUMBO_TAG_P);
1611
+ return implicitly_close_tags(
1612
+ parser, token, GUMBO_NAMESPACE_HTML, GUMBO_TAG_P);
1497
1613
  }
1498
1614
  return true;
1499
1615
  }
@@ -1504,17 +1620,19 @@ static void maybe_implicitly_close_list_tag(
1504
1620
  GumboParser* parser, GumboToken* token, bool is_li) {
1505
1621
  GumboParserState* state = parser->_parser_state;
1506
1622
  state->_frameset_ok = false;
1507
- for (int i = state->_open_elements.length; --i >= 0; ) {
1623
+ for (int i = state->_open_elements.length; --i >= 0;) {
1508
1624
  const GumboNode* node = state->_open_elements.data[i];
1509
- bool is_list_tag = is_li ?
1510
- node_html_tag_is(node, GUMBO_TAG_LI) :
1511
- node_tag_in_set(node, (gumbo_tagset) { TAG(DD), TAG(DT) } );
1625
+ bool is_list_tag =
1626
+ is_li ? node_html_tag_is(node, GUMBO_TAG_LI)
1627
+ : node_tag_in_set(node, (gumbo_tagset){TAG(DD), TAG(DT)});
1512
1628
  if (is_list_tag) {
1513
- implicitly_close_tags(parser, token, node->v.element.tag_namespace, node->v.element.tag);
1629
+ implicitly_close_tags(
1630
+ parser, token, node->v.element.tag_namespace, node->v.element.tag);
1514
1631
  return;
1515
1632
  }
1516
1633
  if (is_special_node(node) &&
1517
- !node_tag_in_set(node, (gumbo_tagset) { TAG(ADDRESS), TAG(DIV), TAG(P) })) {
1634
+ !node_tag_in_set(
1635
+ node, (gumbo_tagset){TAG(ADDRESS), TAG(DIV), TAG(P)})) {
1518
1636
  return;
1519
1637
  }
1520
1638
  }
@@ -1527,7 +1645,7 @@ static void merge_attributes(
1527
1645
  const GumboVector* token_attr = &token->v.start_tag.attributes;
1528
1646
  GumboVector* node_attr = &node->v.element.attributes;
1529
1647
 
1530
- for (int i = 0; i < token_attr->length; ++i) {
1648
+ for (unsigned int i = 0; i < token_attr->length; ++i) {
1531
1649
  GumboAttribute* attr = token_attr->data[i];
1532
1650
  if (!gumbo_get_attribute(node_attr, attr->name)) {
1533
1651
  // Ownership of the attribute is transferred by this gumbo_vector_add,
@@ -1551,8 +1669,8 @@ static void merge_attributes(
1551
1669
  }
1552
1670
 
1553
1671
  const char* gumbo_normalize_svg_tagname(const GumboStringPiece* tag) {
1554
- for (int i = 0;
1555
- i < sizeof(kSvgTagReplacements) / sizeof(ReplacementEntry); ++i) {
1672
+ for (size_t i = 0; i < sizeof(kSvgTagReplacements) / sizeof(ReplacementEntry);
1673
+ ++i) {
1556
1674
  const ReplacementEntry* entry = &kSvgTagReplacements[i];
1557
1675
  if (gumbo_string_equals_ignore_case(tag, &entry->from)) {
1558
1676
  return entry->to.data;
@@ -1567,9 +1685,9 @@ const char* gumbo_normalize_svg_tagname(const GumboStringPiece* tag) {
1567
1685
  static void adjust_foreign_attributes(GumboParser* parser, GumboToken* token) {
1568
1686
  assert(token->type == GUMBO_TOKEN_START_TAG);
1569
1687
  const GumboVector* attributes = &token->v.start_tag.attributes;
1570
- for (int i = 0;
1571
- i < sizeof(kForeignAttributeReplacements) /
1572
- sizeof(NamespacedAttributeReplacement); ++i) {
1688
+ for (size_t i = 0; i < sizeof(kForeignAttributeReplacements) /
1689
+ sizeof(NamespacedAttributeReplacement);
1690
+ ++i) {
1573
1691
  const NamespacedAttributeReplacement* entry =
1574
1692
  &kForeignAttributeReplacements[i];
1575
1693
  GumboAttribute* attr = gumbo_get_attribute(attributes, entry->from);
@@ -1587,7 +1705,7 @@ static void adjust_foreign_attributes(GumboParser* parser, GumboToken* token) {
1587
1705
  static void adjust_svg_attributes(GumboParser* parser, GumboToken* token) {
1588
1706
  assert(token->type == GUMBO_TOKEN_START_TAG);
1589
1707
  const GumboVector* attributes = &token->v.start_tag.attributes;
1590
- for (int i = 0;
1708
+ for (size_t i = 0;
1591
1709
  i < sizeof(kSvgAttributeReplacements) / sizeof(ReplacementEntry); ++i) {
1592
1710
  const ReplacementEntry* entry = &kSvgAttributeReplacements[i];
1593
1711
  GumboAttribute* attr = gumbo_get_attribute(attributes, entry->from.data);
@@ -1604,8 +1722,8 @@ static void adjust_svg_attributes(GumboParser* parser, GumboToken* token) {
1604
1722
  // value.
1605
1723
  static void adjust_mathml_attributes(GumboParser* parser, GumboToken* token) {
1606
1724
  assert(token->type == GUMBO_TOKEN_START_TAG);
1607
- GumboAttribute* attr = gumbo_get_attribute(
1608
- &token->v.start_tag.attributes, "definitionurl");
1725
+ GumboAttribute* attr =
1726
+ gumbo_get_attribute(&token->v.start_tag.attributes, "definitionurl");
1609
1727
  if (!attr) {
1610
1728
  return;
1611
1729
  }
@@ -1613,32 +1731,30 @@ static void adjust_mathml_attributes(GumboParser* parser, GumboToken* token) {
1613
1731
  attr->name = gumbo_copy_stringz(parser, "definitionURL");
1614
1732
  }
1615
1733
 
1616
- static bool doctype_matches(
1617
- const GumboTokenDocType* doctype,
1618
- const GumboStringPiece* public_id,
1619
- const GumboStringPiece* system_id,
1734
+ static bool doctype_matches(const GumboTokenDocType* doctype,
1735
+ const GumboStringPiece* public_id, const GumboStringPiece* system_id,
1620
1736
  bool allow_missing_system_id) {
1621
1737
  return !strcmp(doctype->public_identifier, public_id->data) &&
1622
- (allow_missing_system_id || doctype->has_system_identifier) &&
1623
- !strcmp(doctype->system_identifier, system_id->data);
1738
+ (allow_missing_system_id || doctype->has_system_identifier) &&
1739
+ !strcmp(doctype->system_identifier, system_id->data);
1624
1740
  }
1625
1741
 
1626
1742
  static bool maybe_add_doctype_error(
1627
1743
  GumboParser* parser, const GumboToken* token) {
1628
1744
  const GumboTokenDocType* doctype = &token->v.doc_type;
1629
1745
  bool html_doctype = !strcmp(doctype->name, kDoctypeHtml.data);
1630
- if ((!html_doctype ||
1631
- doctype->has_public_identifier ||
1632
- (doctype->has_system_identifier && !strcmp(
1633
- doctype->system_identifier, kSystemIdLegacyCompat.data))) &&
1634
- !(html_doctype && (
1635
- doctype_matches(doctype, &kPublicIdHtml4_0,
1636
- &kSystemIdRecHtml4_0, true) ||
1637
- doctype_matches(doctype, &kPublicIdHtml4_01, &kSystemIdHtml4, true) ||
1638
- doctype_matches(doctype, &kPublicIdXhtml1_0,
1639
- &kSystemIdXhtmlStrict1_1, false) ||
1640
- doctype_matches(doctype, &kPublicIdXhtml1_1,
1641
- &kSystemIdXhtml1_1, false)))) {
1746
+ if ((!html_doctype || doctype->has_public_identifier ||
1747
+ (doctype->has_system_identifier &&
1748
+ !strcmp(
1749
+ doctype->system_identifier, kSystemIdLegacyCompat.data))) &&
1750
+ !(html_doctype && (doctype_matches(doctype, &kPublicIdHtml4_0,
1751
+ &kSystemIdRecHtml4_0, true) ||
1752
+ doctype_matches(doctype, &kPublicIdHtml4_01,
1753
+ &kSystemIdHtml4, true) ||
1754
+ doctype_matches(doctype, &kPublicIdXhtml1_0,
1755
+ &kSystemIdXhtmlStrict1_1, false) ||
1756
+ doctype_matches(doctype, &kPublicIdXhtml1_1,
1757
+ &kSystemIdXhtml1_1, false)))) {
1642
1758
  parser_add_parse_error(parser, token);
1643
1759
  return false;
1644
1760
  }
@@ -1661,7 +1777,7 @@ static void remove_from_parent(GumboParser* parser, GumboNode* node) {
1661
1777
  gumbo_vector_remove_at(parser, index, children);
1662
1778
  node->parent = NULL;
1663
1779
  node->index_within_parent = -1;
1664
- for (int i = index; i < children->length; ++i) {
1780
+ for (unsigned int i = index; i < children->length; ++i) {
1665
1781
  GumboNode* child = children->data[i];
1666
1782
  child->index_within_parent = i;
1667
1783
  }
@@ -1670,29 +1786,38 @@ static void remove_from_parent(GumboParser* parser, GumboNode* node) {
1670
1786
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#an-introduction-to-error-handling-and-strange-cases-in-the-parser
1671
1787
  // Also described in the "in body" handling for end formatting tags.
1672
1788
  static bool adoption_agency_algorithm(
1673
- GumboParser* parser, GumboToken* token, GumboTag closing_tag) {
1789
+ GumboParser* parser, GumboToken* token, GumboTag subject) {
1674
1790
  GumboParserState* state = parser->_parser_state;
1675
1791
  gumbo_debug("Entering adoption agency algorithm.\n");
1676
- // Steps 1-3 & 16:
1677
- for (int i = 0; i < 8; ++i) {
1678
- // Step 4.
1792
+ // Step 1.
1793
+ GumboNode* current_node = get_current_node(parser);
1794
+ if (current_node->v.element.tag_namespace == GUMBO_NAMESPACE_HTML &&
1795
+ current_node->v.element.tag == subject &&
1796
+ gumbo_vector_index_of(
1797
+ &state->_active_formatting_elements, current_node) == -1) {
1798
+ pop_current_node(parser);
1799
+ return false;
1800
+ }
1801
+ // Steps 2-4 & 20:
1802
+ for (unsigned int i = 0; i < 8; ++i) {
1803
+ // Step 5.
1679
1804
  GumboNode* formatting_node = NULL;
1680
1805
  int formatting_node_in_open_elements = -1;
1681
- for (int j = state->_active_formatting_elements.length; --j >= 0; ) {
1806
+ for (int j = state->_active_formatting_elements.length; --j >= 0;) {
1682
1807
  GumboNode* current_node = state->_active_formatting_elements.data[j];
1683
1808
  if (current_node == &kActiveFormattingScopeMarker) {
1684
1809
  gumbo_debug("Broke on scope marker; aborting.\n");
1685
1810
  // Last scope marker; abort the algorithm.
1686
1811
  return false;
1687
1812
  }
1688
- if (current_node->type == GUMBO_NODE_ELEMENT && current_node->v.element.tag == closing_tag) {
1813
+ if (node_html_tag_is(current_node, subject)) {
1689
1814
  // Found it.
1690
1815
  formatting_node = current_node;
1691
- formatting_node_in_open_elements = gumbo_vector_index_of(
1692
- &state->_open_elements, formatting_node);
1816
+ formatting_node_in_open_elements =
1817
+ gumbo_vector_index_of(&state->_open_elements, formatting_node);
1693
1818
  gumbo_debug("Formatting element of tag %s at %d.\n",
1694
- gumbo_normalized_tagname(closing_tag),
1695
- formatting_node_in_open_elements);
1819
+ gumbo_normalized_tagname(subject),
1820
+ formatting_node_in_open_elements);
1696
1821
  break;
1697
1822
  }
1698
1823
  }
@@ -1704,18 +1829,23 @@ static bool adoption_agency_algorithm(
1704
1829
  return false;
1705
1830
  }
1706
1831
 
1832
+ // Step 6
1707
1833
  if (formatting_node_in_open_elements == -1) {
1708
1834
  gumbo_debug("Formatting node not on stack of open elements.\n");
1709
- gumbo_vector_remove(parser, formatting_node,
1710
- &state->_active_formatting_elements);
1835
+ parser_add_parse_error(parser, token);
1836
+ gumbo_vector_remove(
1837
+ parser, formatting_node, &state->_active_formatting_elements);
1711
1838
  return false;
1712
1839
  }
1713
1840
 
1841
+ // Step 7
1714
1842
  if (!has_an_element_in_scope(parser, formatting_node->v.element.tag)) {
1715
1843
  parser_add_parse_error(parser, token);
1716
1844
  gumbo_debug("Element not in scope.\n");
1717
1845
  return false;
1718
1846
  }
1847
+
1848
+ // Step 8
1719
1849
  if (formatting_node != get_current_node(parser)) {
1720
1850
  parser_add_parse_error(parser, token); // But continue onwards.
1721
1851
  }
@@ -1723,55 +1853,60 @@ static bool adoption_agency_algorithm(
1723
1853
  assert(!node_html_tag_is(formatting_node, GUMBO_TAG_HTML));
1724
1854
  assert(!node_html_tag_is(formatting_node, GUMBO_TAG_BODY));
1725
1855
 
1726
- // Step 5 & 6.
1856
+ // Step 9 & 10
1727
1857
  GumboNode* furthest_block = NULL;
1728
- for (int j = formatting_node_in_open_elements;
1858
+ for (unsigned int j = formatting_node_in_open_elements;
1729
1859
  j < state->_open_elements.length; ++j) {
1730
1860
  assert(j > 0);
1731
1861
  GumboNode* current = state->_open_elements.data[j];
1732
1862
  if (is_special_node(current)) {
1733
- // Step 5.
1863
+ // Step 9.
1734
1864
  furthest_block = current;
1735
1865
  break;
1736
1866
  }
1737
1867
  }
1738
1868
  if (!furthest_block) {
1739
- // Step 6.
1869
+ // Step 10.
1740
1870
  while (get_current_node(parser) != formatting_node) {
1741
1871
  pop_current_node(parser);
1742
1872
  }
1743
1873
  // And the formatting element itself.
1744
1874
  pop_current_node(parser);
1745
- gumbo_vector_remove(parser, formatting_node,
1746
- &state->_active_formatting_elements);
1875
+ gumbo_vector_remove(
1876
+ parser, formatting_node, &state->_active_formatting_elements);
1747
1877
  return false;
1748
1878
  }
1749
1879
  assert(!node_html_tag_is(furthest_block, GUMBO_TAG_HTML));
1750
1880
  assert(furthest_block);
1751
1881
 
1752
- // Step 7.
1882
+ // Step 11.
1753
1883
  // Elements may be moved and reparented by this algorithm, so
1754
1884
  // common_ancestor is not necessarily the same as formatting_node->parent.
1755
1885
  GumboNode* common_ancestor =
1756
- state->_open_elements.data[gumbo_vector_index_of(
1757
- &state->_open_elements, formatting_node) - 1];
1886
+ state->_open_elements.data[gumbo_vector_index_of(&state->_open_elements,
1887
+ formatting_node) -
1888
+ 1];
1758
1889
  gumbo_debug("Common ancestor tag = %s, furthest block tag = %s.\n",
1759
- gumbo_normalized_tagname(common_ancestor->v.element.tag),
1760
- gumbo_normalized_tagname(furthest_block->v.element.tag));
1890
+ gumbo_normalized_tagname(common_ancestor->v.element.tag),
1891
+ gumbo_normalized_tagname(furthest_block->v.element.tag));
1761
1892
 
1762
- // Step 8.
1893
+ // Step 12.
1763
1894
  int bookmark = gumbo_vector_index_of(
1764
- &state->_active_formatting_elements, formatting_node);;
1765
- // Step 9.
1895
+ &state->_active_formatting_elements, formatting_node) +
1896
+ 1;
1897
+ gumbo_debug("Bookmark at %d.\n", bookmark);
1898
+ // Step 13.
1766
1899
  GumboNode* node = furthest_block;
1767
1900
  GumboNode* last_node = furthest_block;
1768
1901
  // Must be stored explicitly, in case node is removed from the stack of open
1769
1902
  // elements, to handle step 9.4.
1770
1903
  int saved_node_index = gumbo_vector_index_of(&state->_open_elements, node);
1771
1904
  assert(saved_node_index > 0);
1772
- // Step 9.1-9.3 & 9.11.
1773
- for (int j = 0; j < 3; ++j) {
1774
- // Step 9.4.
1905
+ // Step 13.1.
1906
+ for (int j = 0;;) {
1907
+ // Step 13.2.
1908
+ ++j;
1909
+ // Step 13.3.
1775
1910
  int node_index = gumbo_vector_index_of(&state->_open_elements, node);
1776
1911
  gumbo_debug(
1777
1912
  "Current index: %d, last index: %d.\n", node_index, saved_node_index);
@@ -1780,58 +1915,72 @@ static bool adoption_agency_algorithm(
1780
1915
  }
1781
1916
  saved_node_index = --node_index;
1782
1917
  assert(node_index > 0);
1783
- assert(node_index < state->_open_elements.capacity);
1918
+ assert((unsigned int) node_index < state->_open_elements.capacity);
1784
1919
  node = state->_open_elements.data[node_index];
1785
1920
  assert(node->parent);
1786
- // Step 9.5.
1787
- if (gumbo_vector_index_of(
1788
- &state->_active_formatting_elements, node) == -1) {
1921
+ if (node == formatting_node) {
1922
+ // Step 13.4.
1923
+ break;
1924
+ }
1925
+ int formatting_index =
1926
+ gumbo_vector_index_of(&state->_active_formatting_elements, node);
1927
+ if (j > 3 && formatting_index != -1) {
1928
+ // Step 13.5.
1929
+ gumbo_debug("Removing formatting element at %d.\n", formatting_index);
1930
+ gumbo_vector_remove_at(
1931
+ parser, formatting_index, &state->_active_formatting_elements);
1932
+ // Removing the element shifts all indices over by one, so we may need
1933
+ // to move the bookmark.
1934
+ if (formatting_index < bookmark) {
1935
+ --bookmark;
1936
+ gumbo_debug("Moving bookmark to %d.\n", bookmark);
1937
+ }
1938
+ continue;
1939
+ }
1940
+ if (formatting_index == -1) {
1941
+ // Step 13.6.
1789
1942
  gumbo_vector_remove_at(parser, node_index, &state->_open_elements);
1790
1943
  continue;
1791
- } else if (node == formatting_node) {
1792
- // Step 9.6.
1793
- break;
1794
1944
  }
1795
- // Step 9.7.
1796
- int formatting_index = gumbo_vector_index_of(
1797
- &state->_active_formatting_elements, node);
1945
+ // Step 13.7.
1946
+ // "common ancestor as the intended parent" doesn't actually mean insert
1947
+ // it into the common ancestor; that happens below.
1798
1948
  node = clone_node(parser, node, GUMBO_INSERTION_ADOPTION_AGENCY_CLONED);
1949
+ assert(formatting_index >= 0);
1799
1950
  state->_active_formatting_elements.data[formatting_index] = node;
1951
+ assert(node_index >= 0);
1800
1952
  state->_open_elements.data[node_index] = node;
1801
- // Step 9.8.
1953
+ // Step 13.8.
1802
1954
  if (last_node == furthest_block) {
1803
1955
  bookmark = formatting_index + 1;
1804
- assert(bookmark <= state->_active_formatting_elements.length);
1956
+ gumbo_debug("Bookmark moved to %d.\n", bookmark);
1957
+ assert((unsigned int) bookmark <= state->_active_formatting_elements.length);
1805
1958
  }
1806
- // Step 9.9.
1959
+ // Step 13.9.
1807
1960
  last_node->parse_flags |= GUMBO_INSERTION_ADOPTION_AGENCY_MOVED;
1808
1961
  remove_from_parent(parser, last_node);
1809
1962
  append_node(parser, node, last_node);
1810
- // Step 9.10.
1963
+ // Step 13.10.
1811
1964
  last_node = node;
1812
- }
1965
+ } // Step 13.11.
1813
1966
 
1814
- // Step 10.
1967
+ // Step 14.
1815
1968
  gumbo_debug("Removing %s node from parent ",
1816
- gumbo_normalized_tagname(last_node->v.element.tag));
1969
+ gumbo_normalized_tagname(last_node->v.element.tag));
1817
1970
  remove_from_parent(parser, last_node);
1818
1971
  last_node->parse_flags |= GUMBO_INSERTION_ADOPTION_AGENCY_MOVED;
1819
- if (node_tag_in_set(common_ancestor, (gumbo_tagset) { TAG(TABLE), TAG(TBODY),
1820
- TAG(TFOOT), TAG(THEAD), TAG(TR) })) {
1821
- gumbo_debug("and foster-parenting it.\n");
1822
- foster_parent_element(parser, last_node);
1823
- } else {
1824
- gumbo_debug("and inserting it into %s.\n",
1825
- gumbo_normalized_tagname(common_ancestor->v.element.tag));
1826
- append_node(parser, common_ancestor, last_node);
1827
- }
1972
+ InsertionLocation location =
1973
+ get_appropriate_insertion_location(parser, common_ancestor);
1974
+ gumbo_debug("and inserting it into %s.\n",
1975
+ gumbo_normalized_tagname(location.target->v.element.tag));
1976
+ insert_node(parser, last_node, location);
1828
1977
 
1829
- // Step 11.
1978
+ // Step 15.
1830
1979
  GumboNode* new_formatting_node = clone_node(
1831
1980
  parser, formatting_node, GUMBO_INSERTION_ADOPTION_AGENCY_CLONED);
1832
1981
  formatting_node->parse_flags |= GUMBO_INSERTION_IMPLICIT_END_TAG;
1833
1982
 
1834
- // Step 12. Instead of appending nodes one-by-one, we swap the children
1983
+ // Step 16. Instead of appending nodes one-by-one, we swap the children
1835
1984
  // vector of furthest_block with the empty children of new_formatting_node,
1836
1985
  // reducing memory traffic and allocations. We still have to reset their
1837
1986
  // parent pointers, though.
@@ -1841,15 +1990,15 @@ static bool adoption_agency_algorithm(
1841
1990
  furthest_block->v.element.children = temp;
1842
1991
 
1843
1992
  temp = new_formatting_node->v.element.children;
1844
- for (int i = 0; i < temp.length; ++i) {
1993
+ for (unsigned int i = 0; i < temp.length; ++i) {
1845
1994
  GumboNode* child = temp.data[i];
1846
1995
  child->parent = new_formatting_node;
1847
1996
  }
1848
1997
 
1849
- // Step 13.
1998
+ // Step 17.
1850
1999
  append_node(parser, furthest_block, new_formatting_node);
1851
2000
 
1852
- // Step 14.
2001
+ // Step 18.
1853
2002
  // If the formatting node was before the bookmark, it may shift over all
1854
2003
  // indices after it, so we need to explicitly find the index and possibly
1855
2004
  // adjust the bookmark.
@@ -1857,25 +2006,27 @@ static bool adoption_agency_algorithm(
1857
2006
  &state->_active_formatting_elements, formatting_node);
1858
2007
  assert(formatting_node_index != -1);
1859
2008
  if (formatting_node_index < bookmark) {
2009
+ gumbo_debug(
2010
+ "Formatting node at %d is before bookmark at %d; decrementing.\n",
2011
+ formatting_node_index, bookmark);
1860
2012
  --bookmark;
1861
2013
  }
1862
2014
  gumbo_vector_remove_at(
1863
2015
  parser, formatting_node_index, &state->_active_formatting_elements);
1864
2016
  assert(bookmark >= 0);
1865
- assert(bookmark <= state->_active_formatting_elements.length);
2017
+ assert((unsigned int) bookmark <= state->_active_formatting_elements.length);
1866
2018
  gumbo_vector_insert_at(parser, new_formatting_node, bookmark,
1867
- &state->_active_formatting_elements);
2019
+ &state->_active_formatting_elements);
1868
2020
 
1869
- // Step 15.
1870
- gumbo_vector_remove(
1871
- parser, formatting_node, &state->_open_elements);
1872
- int insert_at = gumbo_vector_index_of(
1873
- &state->_open_elements, furthest_block) + 1;
2021
+ // Step 19.
2022
+ gumbo_vector_remove(parser, formatting_node, &state->_open_elements);
2023
+ int insert_at =
2024
+ gumbo_vector_index_of(&state->_open_elements, furthest_block) + 1;
1874
2025
  assert(insert_at >= 0);
1875
- assert(insert_at <= state->_open_elements.length);
2026
+ assert((unsigned int) insert_at <= state->_open_elements.length);
1876
2027
  gumbo_vector_insert_at(
1877
2028
  parser, new_formatting_node, insert_at, &state->_open_elements);
1878
- }
2029
+ } // Step 20.
1879
2030
  return true;
1880
2031
  }
1881
2032
 
@@ -1898,6 +2049,7 @@ static void ignore_token(GumboParser* parser) {
1898
2049
 
1899
2050
  // http://www.whatwg.org/specs/web-apps/current-work/complete/the-end.html
1900
2051
  static void finish_parsing(GumboParser* parser) {
2052
+ gumbo_debug("Finishing parsing");
1901
2053
  maybe_flush_text_node_buffer(parser);
1902
2054
  GumboParserState* state = parser->_parser_state;
1903
2055
  for (GumboNode* node = pop_current_node(parser); node;
@@ -1908,7 +2060,8 @@ static void finish_parsing(GumboParser* parser) {
1908
2060
  }
1909
2061
  node->parse_flags |= GUMBO_INSERTION_IMPLICIT_END_TAG;
1910
2062
  }
1911
- while (pop_current_node(parser)); // Pop them all.
2063
+ while (pop_current_node(parser))
2064
+ ; // Pop them all.
1912
2065
  }
1913
2066
 
1914
2067
  static bool handle_initial(GumboParser* parser, GumboToken* token) {
@@ -1952,9 +2105,9 @@ static bool handle_before_html(GumboParser* parser, GumboToken* token) {
1952
2105
  parser->_output->root = html_node;
1953
2106
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_BEFORE_HEAD);
1954
2107
  return true;
1955
- } else if (token->type == GUMBO_TOKEN_END_TAG &&
1956
- !tag_in(token, false, (gumbo_tagset) { TAG(HEAD), TAG(BODY), TAG(HTML),
1957
- TAG(BR) } )) {
2108
+ } else if (token->type == GUMBO_TOKEN_END_TAG &&
2109
+ !tag_in(token, false,
2110
+ (gumbo_tagset){TAG(HEAD), TAG(BODY), TAG(HTML), TAG(BR)})) {
1958
2111
  parser_add_parse_error(parser, token);
1959
2112
  ignore_token(parser);
1960
2113
  return false;
@@ -1986,9 +2139,9 @@ static bool handle_before_head(GumboParser* parser, GumboToken* token) {
1986
2139
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_HEAD);
1987
2140
  parser->_parser_state->_head_element = node;
1988
2141
  return true;
1989
- } else if (token->type == GUMBO_TOKEN_END_TAG &&
1990
- !tag_in(token, false, (gumbo_tagset) { TAG(HEAD), TAG(BODY), TAG(HTML),
1991
- TAG(BR) })) {
2142
+ } else if (token->type == GUMBO_TOKEN_END_TAG &&
2143
+ !tag_in(token, false,
2144
+ (gumbo_tagset){TAG(HEAD), TAG(BODY), TAG(HTML), TAG(BR)})) {
1992
2145
  parser_add_parse_error(parser, token);
1993
2146
  ignore_token(parser);
1994
2147
  return false;
@@ -2020,8 +2173,9 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
2020
2173
  return true;
2021
2174
  } else if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
2022
2175
  return handle_in_body(parser, token);
2023
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(BASE), TAG(BASEFONT),
2024
- TAG(BGSOUND), TAG(MENUITEM), TAG(LINK) })) {
2176
+ } else if (tag_in(token, kStartTag,
2177
+ (gumbo_tagset){TAG(BASE), TAG(BASEFONT), TAG(BGSOUND),
2178
+ TAG(MENUITEM), TAG(LINK)})) {
2025
2179
  insert_element_from_token(parser, token);
2026
2180
  pop_current_node(parser);
2027
2181
  acknowledge_self_closing_tag(parser);
@@ -2038,7 +2192,8 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
2038
2192
  } else if (tag_is(token, kStartTag, GUMBO_TAG_TITLE)) {
2039
2193
  run_generic_parsing_algorithm(parser, token, GUMBO_LEX_RCDATA);
2040
2194
  return true;
2041
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(NOFRAMES), TAG(STYLE) })) {
2195
+ } else if (tag_in(
2196
+ token, kStartTag, (gumbo_tagset){TAG(NOFRAMES), TAG(STYLE)})) {
2042
2197
  run_generic_parsing_algorithm(parser, token, GUMBO_LEX_RAWTEXT);
2043
2198
  return true;
2044
2199
  } else if (tag_is(token, kStartTag, GUMBO_TAG_NOSCRIPT)) {
@@ -2054,29 +2209,48 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
2054
2209
  assert(node_html_tag_is(head, GUMBO_TAG_HEAD));
2055
2210
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_HEAD);
2056
2211
  return true;
2057
- } else if (tag_is(token, kStartTag, GUMBO_TAG_HEAD)) {
2058
- parser_add_parse_error(parser, token);
2059
- ignore_token(parser);
2060
- return false;
2212
+ } else if (tag_in(token, kEndTag,
2213
+ (gumbo_tagset){TAG(BODY), TAG(HTML), TAG(BR)})) {
2214
+ pop_current_node(parser);
2215
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_HEAD);
2216
+ parser->_parser_state->_reprocess_current_token = true;
2217
+ return true;
2218
+ } else if (tag_is(token, kStartTag, GUMBO_TAG_TEMPLATE)) {
2219
+ insert_element_from_token(parser, token);
2220
+ add_formatting_element(parser, &kActiveFormattingScopeMarker);
2221
+ parser->_parser_state->_frameset_ok = false;
2222
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TEMPLATE);
2223
+ push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TEMPLATE);
2224
+ return true;
2225
+ } else if (tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
2226
+ if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2227
+ parser_add_parse_error(parser, token);
2228
+ ignore_token(parser);
2229
+ return false;
2230
+ }
2231
+ generate_all_implied_end_tags_thoroughly(parser);
2232
+ bool success = true;
2233
+ if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_TEMPLATE)) {
2234
+ parser_add_parse_error(parser, token);
2235
+ success = false;
2236
+ }
2237
+ while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_TEMPLATE))
2238
+ ;
2239
+ clear_active_formatting_elements(parser);
2240
+ pop_template_insertion_mode(parser);
2241
+ reset_insertion_mode_appropriately(parser);
2242
+ return success;
2061
2243
  } else if (tag_is(token, kStartTag, GUMBO_TAG_HEAD) ||
2062
- (token->type == GUMBO_TOKEN_END_TAG &&
2063
- !tag_in(token, kEndTag, (gumbo_tagset) { TAG(BODY), TAG(HTML),
2064
- TAG(BR) }))) {
2065
- parser_add_parse_error(parser, token);
2066
- return false;
2067
- } else if (tag_is(token, kStartTag, GUMBO_TAG_UNKNOWN) && token->v.start_tag.is_self_closing) {
2244
+ (token->type == GUMBO_TOKEN_END_TAG)) {
2068
2245
  parser_add_parse_error(parser, token);
2069
2246
  ignore_token(parser);
2070
2247
  return false;
2071
2248
  } else {
2072
- const GumboNode* node = pop_current_node(parser);
2073
- assert(node_html_tag_is(node, GUMBO_TAG_HEAD));
2074
- AVOID_UNUSED_VARIABLE_WARNING(node);
2249
+ pop_current_node(parser);
2075
2250
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_HEAD);
2076
2251
  parser->_parser_state->_reprocess_current_token = true;
2077
2252
  return true;
2078
2253
  }
2079
-
2080
2254
  return true;
2081
2255
  }
2082
2256
 
@@ -2095,12 +2269,14 @@ static bool handle_in_head_noscript(GumboParser* parser, GumboToken* token) {
2095
2269
  return true;
2096
2270
  } else if (token->type == GUMBO_TOKEN_WHITESPACE ||
2097
2271
  token->type == GUMBO_TOKEN_COMMENT ||
2098
- tag_in(token, kStartTag, (gumbo_tagset) { TAG(BASEFONT), TAG(BGSOUND),
2099
- TAG(LINK), TAG(META), TAG(NOFRAMES), TAG(STYLE) })) {
2100
- return handle_in_head(parser, token);
2101
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(HEAD), TAG(NOSCRIPT) }) ||
2102
- (token->type == GUMBO_TOKEN_END_TAG &&
2103
- !tag_is(token, kEndTag, GUMBO_TAG_BR))) {
2272
+ tag_in(token, kStartTag,
2273
+ (gumbo_tagset){TAG(BASEFONT), TAG(BGSOUND), TAG(LINK),
2274
+ TAG(META), TAG(NOFRAMES), TAG(STYLE)})) {
2275
+ return handle_in_head(parser, token);
2276
+ } else if (tag_in(
2277
+ token, kStartTag, (gumbo_tagset){TAG(HEAD), TAG(NOSCRIPT)}) ||
2278
+ (token->type == GUMBO_TOKEN_END_TAG &&
2279
+ !tag_is(token, kEndTag, GUMBO_TAG_BR))) {
2104
2280
  parser_add_parse_error(parser, token);
2105
2281
  ignore_token(parser);
2106
2282
  return false;
@@ -2139,10 +2315,10 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
2139
2315
  insert_element_from_token(parser, token);
2140
2316
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_FRAMESET);
2141
2317
  return true;
2142
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(BASE), TAG(BASEFONT),
2143
- TAG(BGSOUND), TAG(LINK), TAG(META),
2144
- TAG(NOFRAMES), TAG(SCRIPT), TAG(STYLE),
2145
- TAG(TITLE) })) {
2318
+ } else if (tag_in(token, kStartTag,
2319
+ (gumbo_tagset){TAG(BASE), TAG(BASEFONT), TAG(BGSOUND),
2320
+ TAG(LINK), TAG(META), TAG(NOFRAMES), TAG(SCRIPT),
2321
+ TAG(STYLE), TAG(TEMPLATE), TAG(TITLE)})) {
2146
2322
  parser_add_parse_error(parser, token);
2147
2323
  assert(state->_head_element != NULL);
2148
2324
  // This must be flushed before we push the head element on, as there may be
@@ -2152,9 +2328,12 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
2152
2328
  bool result = handle_in_head(parser, token);
2153
2329
  gumbo_vector_remove(parser, state->_head_element, &state->_open_elements);
2154
2330
  return result;
2331
+ } else if (tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
2332
+ return handle_in_head(parser, token);
2155
2333
  } else if (tag_is(token, kStartTag, GUMBO_TAG_HEAD) ||
2156
- (token->type == GUMBO_TOKEN_END_TAG &&
2157
- !tag_in(token, kEndTag, (gumbo_tagset) { TAG(BODY), TAG(HTML), TAG(BR) }))) {
2334
+ (token->type == GUMBO_TOKEN_END_TAG &&
2335
+ !tag_in(token, kEndTag,
2336
+ (gumbo_tagset){TAG(BODY), TAG(HTML), TAG(BR)}))) {
2158
2337
  parser_add_parse_error(parser, token);
2159
2338
  ignore_token(parser);
2160
2339
  return false;
@@ -2168,24 +2347,23 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
2168
2347
 
2169
2348
  static void destroy_node(GumboParser* parser, GumboNode* node) {
2170
2349
  switch (node->type) {
2171
- case GUMBO_NODE_DOCUMENT:
2172
- {
2173
- GumboDocument* doc = &node->v.document;
2174
- for (int i = 0; i < doc->children.length; ++i) {
2175
- destroy_node(parser, doc->children.data[i]);
2176
- }
2177
- gumbo_parser_deallocate(parser, (void*) doc->children.data);
2178
- gumbo_parser_deallocate(parser, (void*) doc->name);
2179
- gumbo_parser_deallocate(parser, (void*) doc->public_identifier);
2180
- gumbo_parser_deallocate(parser, (void*) doc->system_identifier);
2350
+ case GUMBO_NODE_DOCUMENT: {
2351
+ GumboDocument* doc = &node->v.document;
2352
+ for (unsigned int i = 0; i < doc->children.length; ++i) {
2353
+ destroy_node(parser, doc->children.data[i]);
2181
2354
  }
2182
- break;
2355
+ gumbo_parser_deallocate(parser, (void*) doc->children.data);
2356
+ gumbo_parser_deallocate(parser, (void*) doc->name);
2357
+ gumbo_parser_deallocate(parser, (void*) doc->public_identifier);
2358
+ gumbo_parser_deallocate(parser, (void*) doc->system_identifier);
2359
+ } break;
2360
+ case GUMBO_NODE_TEMPLATE:
2183
2361
  case GUMBO_NODE_ELEMENT:
2184
- for (int i = 0; i < node->v.element.attributes.length; ++i) {
2362
+ for (unsigned int i = 0; i < node->v.element.attributes.length; ++i) {
2185
2363
  gumbo_destroy_attribute(parser, node->v.element.attributes.data[i]);
2186
2364
  }
2187
2365
  gumbo_parser_deallocate(parser, node->v.element.attributes.data);
2188
- for (int i = 0; i < node->v.element.children.length; ++i) {
2366
+ for (unsigned int i = 0; i < node->v.element.children.length; ++i) {
2189
2367
  destroy_node(parser, node->v.element.children.data[i]);
2190
2368
  }
2191
2369
  gumbo_parser_deallocate(parser, node->v.element.children.data);
@@ -2226,20 +2404,26 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2226
2404
  ignore_token(parser);
2227
2405
  return false;
2228
2406
  } else if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
2407
+ parser_add_parse_error(parser, token);
2408
+ if (has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2409
+ ignore_token(parser);
2410
+ return false;
2411
+ }
2229
2412
  assert(parser->_output->root != NULL);
2230
2413
  assert(parser->_output->root->type == GUMBO_NODE_ELEMENT);
2231
- parser_add_parse_error(parser, token);
2232
2414
  merge_attributes(parser, token, parser->_output->root);
2233
2415
  return false;
2234
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(BASE), TAG(BASEFONT),
2235
- TAG(BGSOUND), TAG(MENUITEM), TAG(LINK),
2236
- TAG(META), TAG(NOFRAMES), TAG(SCRIPT),
2237
- TAG(STYLE), TAG(TITLE) } )) {
2416
+ } else if (tag_in(token, kStartTag,
2417
+ (gumbo_tagset){TAG(BASE), TAG(BASEFONT), TAG(BGSOUND),
2418
+ TAG(MENUITEM), TAG(LINK), TAG(META), TAG(NOFRAMES),
2419
+ TAG(SCRIPT), TAG(STYLE), TAG(TEMPLATE), TAG(TITLE)}) ||
2420
+ tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
2238
2421
  return handle_in_head(parser, token);
2239
2422
  } else if (tag_is(token, kStartTag, GUMBO_TAG_BODY)) {
2240
2423
  parser_add_parse_error(parser, token);
2241
2424
  if (state->_open_elements.length < 2 ||
2242
- !node_html_tag_is(state->_open_elements.data[1], GUMBO_TAG_BODY)) {
2425
+ !node_html_tag_is(state->_open_elements.data[1], GUMBO_TAG_BODY) ||
2426
+ has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2243
2427
  ignore_token(parser);
2244
2428
  return false;
2245
2429
  }
@@ -2273,7 +2457,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2273
2457
  // Remove the body node. We may want to factor this out into a generic
2274
2458
  // helper, but right now this is the only code that needs to do this.
2275
2459
  GumboVector* children = &parser->_output->root->v.element.children;
2276
- for (int i = 0; i < children->length; ++i) {
2460
+ for (unsigned int i = 0; i < children->length; ++i) {
2277
2461
  if (children->data[i] == body_node) {
2278
2462
  gumbo_vector_remove_at(parser, i, children);
2279
2463
  break;
@@ -2286,27 +2470,32 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2286
2470
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_FRAMESET);
2287
2471
  return true;
2288
2472
  } else if (token->type == GUMBO_TOKEN_EOF) {
2289
- for (int i = 0; i < state->_open_elements.length; ++i) {
2290
- if (!node_tag_in_set(state->_open_elements.data[i], (gumbo_tagset) { TAG(DD),
2291
- TAG(DT), TAG(LI), TAG(P), TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH),
2292
- TAG(THEAD), TAG(TR), TAG(BODY), TAG(HTML) } )) {
2473
+ for (unsigned int i = 0; i < state->_open_elements.length; ++i) {
2474
+ if (!node_tag_in_set(state->_open_elements.data[i],
2475
+ (gumbo_tagset){TAG(DD), TAG(DT), TAG(LI), TAG(P), TAG(TBODY),
2476
+ TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD), TAG(TR), TAG(BODY),
2477
+ TAG(HTML)})) {
2293
2478
  parser_add_parse_error(parser, token);
2294
- return false;
2295
2479
  }
2296
2480
  }
2481
+ if (get_current_template_insertion_mode(parser) !=
2482
+ GUMBO_INSERTION_MODE_INITIAL) {
2483
+ return handle_in_template(parser, token);
2484
+ }
2297
2485
  return true;
2298
- } else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(BODY), TAG(HTML) })) {
2486
+ } else if (tag_in(token, kEndTag, (gumbo_tagset){TAG(BODY), TAG(HTML)})) {
2299
2487
  if (!has_an_element_in_scope(parser, GUMBO_TAG_BODY)) {
2300
2488
  parser_add_parse_error(parser, token);
2301
2489
  ignore_token(parser);
2302
2490
  return false;
2303
2491
  }
2304
2492
  bool success = true;
2305
- for (int i = 0; i < state->_open_elements.length; ++i) {
2306
- if (!node_tag_in_set(state->_open_elements.data[i], (gumbo_tagset) { TAG(DD),
2307
- TAG(DT), TAG(LI), TAG(OPTGROUP), TAG(OPTION), TAG(P), TAG(RP),
2308
- TAG(RT), TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD),
2309
- TAG(TR), TAG(BODY), TAG(HTML) })) {
2493
+ for (unsigned int i = 0; i < state->_open_elements.length; ++i) {
2494
+ if (!node_tag_in_set(state->_open_elements.data[i],
2495
+ (gumbo_tagset){TAG(DD), TAG(DT), TAG(LI), TAG(OPTGROUP),
2496
+ TAG(OPTION), TAG(P), TAG(RB), TAG(RP), TAG(RT), TAG(RTC),
2497
+ TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD), TAG(TR),
2498
+ TAG(BODY), TAG(HTML)})) {
2310
2499
  parser_add_parse_error(parser, token);
2311
2500
  success = false;
2312
2501
  break;
@@ -2321,48 +2510,54 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2321
2510
  record_end_of_element(state->_current_token, &body->v.element);
2322
2511
  }
2323
2512
  return success;
2324
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(ADDRESS), TAG(ARTICLE),
2325
- TAG(ASIDE), TAG(BLOCKQUOTE), TAG(CENTER), TAG(DETAILS),
2326
- TAG(DIR), TAG(DIV), TAG(DL), TAG(FIELDSET), TAG(FIGCAPTION),
2327
- TAG(FIGURE), TAG(FOOTER), TAG(HEADER), TAG(HGROUP), TAG(MENU),
2328
- TAG(NAV), TAG(OL), TAG(P), TAG(SECTION), TAG(SUMMARY), TAG(UL) })) {
2513
+ } else if (tag_in(token, kStartTag,
2514
+ (gumbo_tagset){TAG(ADDRESS), TAG(ARTICLE), TAG(ASIDE),
2515
+ TAG(BLOCKQUOTE), TAG(CENTER), TAG(DETAILS), TAG(DIR),
2516
+ TAG(DIV), TAG(DL), TAG(FIELDSET), TAG(FIGCAPTION),
2517
+ TAG(FIGURE), TAG(FOOTER), TAG(HEADER), TAG(HGROUP),
2518
+ TAG(MENU), TAG(MAIN), TAG(NAV), TAG(OL), TAG(P),
2519
+ TAG(SECTION), TAG(SUMMARY), TAG(UL)})) {
2329
2520
  bool result = maybe_implicitly_close_p_tag(parser, token);
2330
2521
  insert_element_from_token(parser, token);
2331
2522
  return result;
2332
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(H1), TAG(H2), TAG(H3),
2333
- TAG(H4), TAG(H5), TAG(H6) })) {
2523
+ } else if (tag_in(token, kStartTag, (gumbo_tagset){TAG(H1), TAG(H2), TAG(H3),
2524
+ TAG(H4), TAG(H5), TAG(H6)})) {
2334
2525
  bool result = maybe_implicitly_close_p_tag(parser, token);
2335
- if (node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(H1), TAG(H2),
2336
- TAG(H3), TAG(H4), TAG(H5), TAG(H6) })) {
2526
+ if (node_tag_in_set(
2527
+ get_current_node(parser), (gumbo_tagset){TAG(H1), TAG(H2), TAG(H3),
2528
+ TAG(H4), TAG(H5), TAG(H6)})) {
2337
2529
  parser_add_parse_error(parser, token);
2338
2530
  pop_current_node(parser);
2339
2531
  result = false;
2340
2532
  }
2341
2533
  insert_element_from_token(parser, token);
2342
2534
  return result;
2343
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(PRE), TAG(LISTING) })) {
2535
+ } else if (tag_in(token, kStartTag, (gumbo_tagset){TAG(PRE), TAG(LISTING)})) {
2344
2536
  bool result = maybe_implicitly_close_p_tag(parser, token);
2345
2537
  insert_element_from_token(parser, token);
2346
2538
  state->_ignore_next_linefeed = true;
2347
2539
  state->_frameset_ok = false;
2348
2540
  return result;
2349
2541
  } else if (tag_is(token, kStartTag, GUMBO_TAG_FORM)) {
2350
- if (state->_form_element != NULL) {
2542
+ if (state->_form_element != NULL &&
2543
+ !has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2351
2544
  gumbo_debug("Ignoring nested form.\n");
2352
2545
  parser_add_parse_error(parser, token);
2353
2546
  ignore_token(parser);
2354
2547
  return false;
2355
2548
  }
2356
2549
  bool result = maybe_implicitly_close_p_tag(parser, token);
2357
- state->_form_element =
2358
- insert_element_from_token(parser, token);
2550
+ GumboNode* form_element = insert_element_from_token(parser, token);
2551
+ if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2552
+ state->_form_element = form_element;
2553
+ }
2359
2554
  return result;
2360
2555
  } else if (tag_is(token, kStartTag, GUMBO_TAG_LI)) {
2361
2556
  maybe_implicitly_close_list_tag(parser, token, true);
2362
2557
  bool result = maybe_implicitly_close_p_tag(parser, token);
2363
2558
  insert_element_from_token(parser, token);
2364
2559
  return result;
2365
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(DD), TAG(DT) })) {
2560
+ } else if (tag_in(token, kStartTag, (gumbo_tagset){TAG(DD), TAG(DT)})) {
2366
2561
  maybe_implicitly_close_list_tag(parser, token, false);
2367
2562
  bool result = maybe_implicitly_close_p_tag(parser, token);
2368
2563
  insert_element_from_token(parser, token);
@@ -2375,7 +2570,8 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2375
2570
  } else if (tag_is(token, kStartTag, GUMBO_TAG_BUTTON)) {
2376
2571
  if (has_an_element_in_scope(parser, GUMBO_TAG_BUTTON)) {
2377
2572
  parser_add_parse_error(parser, token);
2378
- implicitly_close_tags(parser, token, GUMBO_NAMESPACE_HTML, GUMBO_TAG_BUTTON);
2573
+ implicitly_close_tags(
2574
+ parser, token, GUMBO_NAMESPACE_HTML, GUMBO_TAG_BUTTON);
2379
2575
  state->_reprocess_current_token = true;
2380
2576
  return false;
2381
2577
  }
@@ -2383,45 +2579,63 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2383
2579
  insert_element_from_token(parser, token);
2384
2580
  state->_frameset_ok = false;
2385
2581
  return true;
2386
- } else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(ADDRESS), TAG(ARTICLE),
2387
- TAG(ASIDE), TAG(BLOCKQUOTE), TAG(BUTTON), TAG(CENTER), TAG(DETAILS),
2388
- TAG(DIR), TAG(DIV), TAG(DL), TAG(FIELDSET), TAG(FIGCAPTION),
2389
- TAG(FIGURE), TAG(FOOTER), TAG(HEADER), TAG(HGROUP), TAG(LISTING),
2390
- TAG(MENU), TAG(NAV), TAG(OL), TAG(PRE),
2391
- TAG(SECTION), TAG(SUMMARY), TAG(UL) })) {
2582
+ } else if (tag_in(token, kEndTag,
2583
+ (gumbo_tagset){TAG(ADDRESS), TAG(ARTICLE), TAG(ASIDE),
2584
+ TAG(BLOCKQUOTE), TAG(BUTTON), TAG(CENTER), TAG(DETAILS),
2585
+ TAG(DIR), TAG(DIV), TAG(DL), TAG(FIELDSET),
2586
+ TAG(FIGCAPTION), TAG(FIGURE), TAG(FOOTER), TAG(HEADER),
2587
+ TAG(HGROUP), TAG(LISTING), TAG(MAIN), TAG(MENU), TAG(NAV),
2588
+ TAG(OL), TAG(PRE), TAG(SECTION), TAG(SUMMARY), TAG(UL)})) {
2392
2589
  GumboTag tag = token->v.end_tag;
2393
2590
  if (!has_an_element_in_scope(parser, tag)) {
2394
2591
  parser_add_parse_error(parser, token);
2395
2592
  ignore_token(parser);
2396
2593
  return false;
2397
2594
  }
2398
- implicitly_close_tags(parser, token, GUMBO_NAMESPACE_HTML, token->v.end_tag);
2595
+ implicitly_close_tags(
2596
+ parser, token, GUMBO_NAMESPACE_HTML, token->v.end_tag);
2399
2597
  return true;
2400
2598
  } else if (tag_is(token, kEndTag, GUMBO_TAG_FORM)) {
2401
- bool result = true;
2402
- const GumboNode* node = state->_form_element;
2403
- assert(!node || node->type == GUMBO_NODE_ELEMENT);
2404
- state->_form_element = NULL;
2405
- if (!node || !has_node_in_scope(parser, node)) {
2406
- gumbo_debug("Closing an unopened form.\n");
2407
- parser_add_parse_error(parser, token);
2408
- ignore_token(parser);
2409
- return false;
2410
- }
2411
- // This differs from implicitly_close_tags because we remove *only* the
2412
- // <form> element; other nodes are left in scope.
2413
- generate_implied_end_tags(parser, GUMBO_TAG_LAST);
2414
- if (get_current_node(parser) != node) {
2415
- parser_add_parse_error(parser, token);
2416
- result = false;
2417
- }
2599
+ if (has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2600
+ if (!has_an_element_in_scope(parser, GUMBO_TAG_FORM)) {
2601
+ parser_add_parse_error(parser, token);
2602
+ ignore_token(parser);
2603
+ return false;
2604
+ }
2605
+ bool success = true;
2606
+ generate_implied_end_tags(parser, GUMBO_TAG_LAST);
2607
+ if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_FORM)) {
2608
+ parser_add_parse_error(parser, token);
2609
+ return false;
2610
+ }
2611
+ while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_FORM))
2612
+ ;
2613
+ return success;
2614
+ } else {
2615
+ bool result = true;
2616
+ const GumboNode* node = state->_form_element;
2617
+ assert(!node || node->type == GUMBO_NODE_ELEMENT);
2618
+ state->_form_element = NULL;
2619
+ if (!node || !has_node_in_scope(parser, node)) {
2620
+ gumbo_debug("Closing an unopened form.\n");
2621
+ parser_add_parse_error(parser, token);
2622
+ ignore_token(parser);
2623
+ return false;
2624
+ }
2625
+ // This differs from implicitly_close_tags because we remove *only* the
2626
+ // <form> element; other nodes are left in scope.
2627
+ generate_implied_end_tags(parser, GUMBO_TAG_LAST);
2628
+ if (get_current_node(parser) != node) {
2629
+ parser_add_parse_error(parser, token);
2630
+ result = false;
2631
+ }
2418
2632
 
2419
- GumboVector* open_elements = &state->_open_elements;
2420
- int index = open_elements->length - 1;
2421
- for (; index >= 0 && open_elements->data[index] != node; --index);
2422
- assert(index >= 0);
2423
- gumbo_vector_remove_at(parser, index, open_elements);
2424
- return result;
2633
+ GumboVector* open_elements = &state->_open_elements;
2634
+ int index = gumbo_vector_index_of(open_elements, node);
2635
+ assert(index >= 0);
2636
+ gumbo_vector_remove_at(parser, index, open_elements);
2637
+ return result;
2638
+ }
2425
2639
  } else if (tag_is(token, kEndTag, GUMBO_TAG_P)) {
2426
2640
  if (!has_an_element_in_button_scope(parser, GUMBO_TAG_P)) {
2427
2641
  parser_add_parse_error(parser, token);
@@ -2431,15 +2645,17 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2431
2645
  state->_reprocess_current_token = true;
2432
2646
  return false;
2433
2647
  }
2434
- return implicitly_close_tags(parser, token, GUMBO_NAMESPACE_HTML, GUMBO_TAG_P);
2648
+ return implicitly_close_tags(
2649
+ parser, token, GUMBO_NAMESPACE_HTML, GUMBO_TAG_P);
2435
2650
  } else if (tag_is(token, kEndTag, GUMBO_TAG_LI)) {
2436
2651
  if (!has_an_element_in_list_scope(parser, GUMBO_TAG_LI)) {
2437
2652
  parser_add_parse_error(parser, token);
2438
2653
  ignore_token(parser);
2439
2654
  return false;
2440
2655
  }
2441
- return implicitly_close_tags(parser, token, GUMBO_NAMESPACE_HTML, GUMBO_TAG_LI);
2442
- } else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(DD), TAG(DT) })) {
2656
+ return implicitly_close_tags(
2657
+ parser, token, GUMBO_NAMESPACE_HTML, GUMBO_TAG_LI);
2658
+ } else if (tag_in(token, kEndTag, (gumbo_tagset){TAG(DD), TAG(DT)})) {
2443
2659
  assert(token->type == GUMBO_TOKEN_END_TAG);
2444
2660
  GumboTag token_tag = token->v.end_tag;
2445
2661
  if (!has_an_element_in_scope(parser, token_tag)) {
@@ -2447,11 +2663,13 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2447
2663
  ignore_token(parser);
2448
2664
  return false;
2449
2665
  }
2450
- return implicitly_close_tags(parser, token, GUMBO_NAMESPACE_HTML, token_tag);
2451
- } else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(H1), TAG(H2), TAG(H3),
2452
- TAG(H4), TAG(H5), TAG(H6) })) {
2453
- if (!has_an_element_in_scope_with_tagname(parser, (gumbo_tagset) { TAG(H1), TAG(H2), TAG(H3), TAG(H4),
2454
- TAG(H5), TAG(H6) })) {
2666
+ return implicitly_close_tags(
2667
+ parser, token, GUMBO_NAMESPACE_HTML, token_tag);
2668
+ } else if (tag_in(token, kEndTag, (gumbo_tagset){TAG(H1), TAG(H2), TAG(H3),
2669
+ TAG(H4), TAG(H5), TAG(H6)})) {
2670
+ if (!has_an_element_in_scope_with_tagname(
2671
+ parser, 6, (GumboTag[]){GUMBO_TAG_H1, GUMBO_TAG_H2, GUMBO_TAG_H3,
2672
+ GUMBO_TAG_H4, GUMBO_TAG_H5, GUMBO_TAG_H6})) {
2455
2673
  // No heading open; ignore the token entirely.
2456
2674
  parser_add_parse_error(parser, token);
2457
2675
  ignore_token(parser);
@@ -2469,8 +2687,9 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2469
2687
  }
2470
2688
  do {
2471
2689
  current_node = pop_current_node(parser);
2472
- } while (!node_tag_in_set(current_node, (gumbo_tagset) { TAG(H1), TAG(H2),
2473
- TAG(H3), TAG(H4), TAG(H5), TAG(H6) } ));
2690
+ } while (!node_tag_in_set(
2691
+ current_node, (gumbo_tagset){TAG(H1), TAG(H2), TAG(H3),
2692
+ TAG(H4), TAG(H5), TAG(H6)}));
2474
2693
  return success;
2475
2694
  }
2476
2695
  } else if (tag_is(token, kStartTag, GUMBO_TAG_A)) {
@@ -2488,18 +2707,17 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2488
2707
  if (find_last_anchor_index(parser, &last_a)) {
2489
2708
  void* last_element = gumbo_vector_remove_at(
2490
2709
  parser, last_a, &state->_active_formatting_elements);
2491
- gumbo_vector_remove(
2492
- parser, last_element, &state->_open_elements);
2710
+ gumbo_vector_remove(parser, last_element, &state->_open_elements);
2493
2711
  }
2494
2712
  success = false;
2495
2713
  }
2496
2714
  reconstruct_active_formatting_elements(parser);
2497
2715
  add_formatting_element(parser, insert_element_from_token(parser, token));
2498
2716
  return success;
2499
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(B), TAG(BIG),
2500
- TAG(CODE), TAG(EM), TAG(FONT), TAG(I),
2501
- TAG(S), TAG(SMALL), TAG(STRIKE),
2502
- TAG(STRONG), TAG(TT), TAG(U) })) {
2717
+ } else if (tag_in(token, kStartTag,
2718
+ (gumbo_tagset){TAG(B), TAG(BIG), TAG(CODE), TAG(EM), TAG(FONT),
2719
+ TAG(I), TAG(S), TAG(SMALL), TAG(STRIKE), TAG(STRONG),
2720
+ TAG(TT), TAG(U)})) {
2503
2721
  reconstruct_active_formatting_elements(parser);
2504
2722
  add_formatting_element(parser, insert_element_from_token(parser, token));
2505
2723
  return true;
@@ -2515,20 +2733,20 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2515
2733
  insert_element_from_token(parser, token);
2516
2734
  add_formatting_element(parser, get_current_node(parser));
2517
2735
  return result;
2518
- } else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(A), TAG(B), TAG(BIG),
2519
- TAG(CODE), TAG(EM), TAG(FONT), TAG(I),
2520
- TAG(NOBR), TAG(S), TAG(SMALL),
2521
- TAG(STRIKE), TAG(STRONG), TAG(TT), TAG(U) })) {
2736
+ } else if (tag_in(token, kEndTag,
2737
+ (gumbo_tagset){TAG(A), TAG(B), TAG(BIG), TAG(CODE), TAG(EM),
2738
+ TAG(FONT), TAG(I), TAG(NOBR), TAG(S), TAG(SMALL),
2739
+ TAG(STRIKE), TAG(STRONG), TAG(TT), TAG(U)})) {
2522
2740
  return adoption_agency_algorithm(parser, token, token->v.end_tag);
2523
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(APPLET), TAG(MARQUEE),
2524
- TAG(OBJECT) })) {
2741
+ } else if (tag_in(token, kStartTag,
2742
+ (gumbo_tagset){TAG(APPLET), TAG(MARQUEE), TAG(OBJECT)})) {
2525
2743
  reconstruct_active_formatting_elements(parser);
2526
2744
  insert_element_from_token(parser, token);
2527
2745
  add_formatting_element(parser, &kActiveFormattingScopeMarker);
2528
2746
  set_frameset_not_ok(parser);
2529
2747
  return true;
2530
- } else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(APPLET), TAG(MARQUEE),
2531
- TAG(OBJECT) })) {
2748
+ } else if (tag_in(token, kEndTag,
2749
+ (gumbo_tagset){TAG(APPLET), TAG(MARQUEE), TAG(OBJECT)})) {
2532
2750
  GumboTag token_tag = token->v.end_tag;
2533
2751
  if (!has_an_element_in_table_scope(parser, token_tag)) {
2534
2752
  parser_add_parse_error(parser, token);
@@ -2547,8 +2765,9 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2547
2765
  set_frameset_not_ok(parser);
2548
2766
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
2549
2767
  return true;
2550
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(AREA), TAG(BR),
2551
- TAG(EMBED), TAG(IMG), TAG(IMAGE), TAG(KEYGEN), TAG(WBR) })) {
2768
+ } else if (tag_in(token, kStartTag,
2769
+ (gumbo_tagset){TAG(AREA), TAG(BR), TAG(EMBED), TAG(IMG),
2770
+ TAG(IMAGE), TAG(KEYGEN), TAG(WBR)})) {
2552
2771
  bool success = true;
2553
2772
  if (tag_is(token, kStartTag, GUMBO_TAG_IMAGE)) {
2554
2773
  success = false;
@@ -2578,7 +2797,8 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2578
2797
  pop_current_node(parser);
2579
2798
  acknowledge_self_closing_tag(parser);
2580
2799
  return true;
2581
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(PARAM), TAG(SOURCE), TAG(TRACK) })) {
2800
+ } else if (tag_in(token, kStartTag,
2801
+ (gumbo_tagset){TAG(PARAM), TAG(SOURCE), TAG(TRACK)})) {
2582
2802
  insert_element_from_token(parser, token);
2583
2803
  pop_current_node(parser);
2584
2804
  acknowledge_self_closing_tag(parser);
@@ -2592,7 +2812,8 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2592
2812
  return result;
2593
2813
  } else if (tag_is(token, kStartTag, GUMBO_TAG_ISINDEX)) {
2594
2814
  parser_add_parse_error(parser, token);
2595
- if (parser->_parser_state->_form_element != NULL) {
2815
+ if (parser->_parser_state->_form_element != NULL &&
2816
+ !has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2596
2817
  ignore_token(parser);
2597
2818
  return false;
2598
2819
  }
@@ -2607,15 +2828,18 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2607
2828
 
2608
2829
  GumboNode* form = insert_element_of_tag_type(
2609
2830
  parser, GUMBO_TAG_FORM, GUMBO_INSERTION_FROM_ISINDEX);
2831
+ if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2832
+ parser->_parser_state->_form_element = form;
2833
+ }
2610
2834
  if (action_attr) {
2611
2835
  gumbo_vector_add(parser, action_attr, &form->v.element.attributes);
2612
2836
  }
2613
- insert_element_of_tag_type(parser, GUMBO_TAG_HR,
2614
- GUMBO_INSERTION_FROM_ISINDEX);
2615
- pop_current_node(parser); // <hr>
2837
+ insert_element_of_tag_type(
2838
+ parser, GUMBO_TAG_HR, GUMBO_INSERTION_FROM_ISINDEX);
2839
+ pop_current_node(parser); // <hr>
2616
2840
 
2617
- insert_element_of_tag_type(parser, GUMBO_TAG_LABEL,
2618
- GUMBO_INSERTION_FROM_ISINDEX);
2841
+ insert_element_of_tag_type(
2842
+ parser, GUMBO_TAG_LABEL, GUMBO_INSERTION_FROM_ISINDEX);
2619
2843
  TextNodeBufferState* text_state = &parser->_parser_state->_text_node;
2620
2844
  text_state->_start_original_text = token->original_text.data;
2621
2845
  text_state->_start_position = token->position;
@@ -2628,15 +2852,15 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2628
2852
  text_state->_buffer.capacity = prompt_attr_length + 1;
2629
2853
  gumbo_destroy_attribute(parser, prompt_attr);
2630
2854
  } else {
2631
- GumboStringPiece prompt_text = GUMBO_STRING(
2632
- "This is a searchable index. Enter search keywords: ");
2855
+ GumboStringPiece prompt_text =
2856
+ GUMBO_STRING("This is a searchable index. Enter search keywords: ");
2633
2857
  gumbo_string_buffer_append_string(
2634
2858
  parser, &prompt_text, &text_state->_buffer);
2635
2859
  }
2636
2860
 
2637
2861
  GumboNode* input = insert_element_of_tag_type(
2638
2862
  parser, GUMBO_TAG_INPUT, GUMBO_INSERTION_FROM_ISINDEX);
2639
- for (int i = 0; i < token_attrs->length; ++i) {
2863
+ for (unsigned int i = 0; i < token_attrs->length; ++i) {
2640
2864
  GumboAttribute* attr = token_attrs->data[i];
2641
2865
  if (attr != prompt_attr && attr != action_attr && attr != name_attr) {
2642
2866
  gumbo_vector_add(parser, attr, &input->v.element.attributes);
@@ -2649,6 +2873,13 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2649
2873
  // touching the attributes.
2650
2874
  ignore_token(parser);
2651
2875
 
2876
+ // The name attribute, if present, should be destroyed since it's ignored
2877
+ // when copying over. The action attribute should be kept since it's moved
2878
+ // to the form.
2879
+ if (name_attr) {
2880
+ gumbo_destroy_attribute(parser, name_attr);
2881
+ }
2882
+
2652
2883
  GumboAttribute* name =
2653
2884
  gumbo_parser_allocate(parser, sizeof(GumboAttribute));
2654
2885
  GumboStringPiece name_str = GUMBO_STRING("name");
@@ -2664,12 +2895,15 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2664
2895
  name->value_end = kGumboEmptySourcePosition;
2665
2896
  gumbo_vector_add(parser, name, &input->v.element.attributes);
2666
2897
 
2667
- pop_current_node(parser); // <input>
2668
- pop_current_node(parser); // <label>
2898
+ pop_current_node(parser); // <input>
2899
+ pop_current_node(parser); // <label>
2669
2900
  insert_element_of_tag_type(
2670
2901
  parser, GUMBO_TAG_HR, GUMBO_INSERTION_FROM_ISINDEX);
2671
- pop_current_node(parser); // <hr>
2672
- pop_current_node(parser); // <form>
2902
+ pop_current_node(parser); // <hr>
2903
+ pop_current_node(parser); // <form>
2904
+ if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2905
+ parser->_parser_state->_form_element = NULL;
2906
+ }
2673
2907
  return false;
2674
2908
  } else if (tag_is(token, kStartTag, GUMBO_TAG_TEXTAREA)) {
2675
2909
  run_generic_parsing_algorithm(parser, token, GUMBO_LEX_RCDATA);
@@ -2704,19 +2938,27 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2704
2938
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_SELECT);
2705
2939
  }
2706
2940
  return true;
2707
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(OPTION), TAG(OPTGROUP) })) {
2941
+ } else if (tag_in(token, kStartTag,
2942
+ (gumbo_tagset){TAG(OPTION), TAG(OPTGROUP)})) {
2708
2943
  if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)) {
2709
2944
  pop_current_node(parser);
2710
2945
  }
2711
2946
  reconstruct_active_formatting_elements(parser);
2712
2947
  insert_element_from_token(parser, token);
2713
2948
  return true;
2714
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(RP), TAG(RT) })) {
2949
+ } else if (tag_in(token, kStartTag,
2950
+ (gumbo_tagset){TAG(RB), TAG(RP), TAG(RT), TAG(RTC)})) {
2715
2951
  bool success = true;
2952
+ GumboTag exception =
2953
+ tag_in(token, kStartTag, (gumbo_tagset){TAG(RT), TAG(RP)})
2954
+ ? GUMBO_TAG_RTC
2955
+ : GUMBO_TAG_LAST;
2716
2956
  if (has_an_element_in_scope(parser, GUMBO_TAG_RUBY)) {
2717
- generate_implied_end_tags(parser, GUMBO_TAG_LAST);
2957
+ generate_implied_end_tags(parser, exception);
2718
2958
  }
2719
- if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_RUBY)) {
2959
+ if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_RUBY) &&
2960
+ !(exception == GUMBO_TAG_LAST ||
2961
+ node_html_tag_is(get_current_node(parser), GUMBO_TAG_RTC))) {
2720
2962
  parser_add_parse_error(parser, token);
2721
2963
  success = false;
2722
2964
  }
@@ -2749,10 +2991,10 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2749
2991
  acknowledge_self_closing_tag(parser);
2750
2992
  }
2751
2993
  return true;
2752
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(CAPTION), TAG(COL),
2753
- TAG(COLGROUP), TAG(FRAME), TAG(HEAD),
2754
- TAG(TBODY), TAG(TD), TAG(TFOOT),
2755
- TAG(TH), TAG(THEAD), TAG(TR) })) {
2994
+ } else if (tag_in(token, kStartTag,
2995
+ (gumbo_tagset){TAG(CAPTION), TAG(COL), TAG(COLGROUP),
2996
+ TAG(FRAME), TAG(HEAD), TAG(TBODY), TAG(TD), TAG(TFOOT),
2997
+ TAG(TH), TAG(THEAD), TAG(TR)})) {
2756
2998
  parser_add_parse_error(parser, token);
2757
2999
  ignore_token(parser);
2758
3000
  return false;
@@ -2771,14 +3013,15 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2771
3013
  // If we see a), implicitly close everything up to and including it. If we
2772
3014
  // see b), then record a parse error, don't close anything (except the
2773
3015
  // implied end tags) and ignore the end tag token.
2774
- for (int i = state->_open_elements.length; --i >= 0; ) {
3016
+ for (int i = state->_open_elements.length; --i >= 0;) {
2775
3017
  const GumboNode* node = state->_open_elements.data[i];
2776
3018
  if (node_html_tag_is(node, end_tag)) {
2777
3019
  generate_implied_end_tags(parser, end_tag);
2778
3020
  // TODO(jdtang): Do I need to add a parse error here? The condition in
2779
3021
  // the spec seems like it's the inverse of the loop condition above, and
2780
3022
  // so would never fire.
2781
- while (node != pop_current_node(parser)); // Pop everything.
3023
+ while (node != pop_current_node(parser))
3024
+ ; // Pop everything.
2782
3025
  return true;
2783
3026
  } else if (is_special_node(node)) {
2784
3027
  parser_add_parse_error(parser, token);
@@ -2794,7 +3037,8 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2794
3037
 
2795
3038
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-incdata
2796
3039
  static bool handle_text(GumboParser* parser, GumboToken* token) {
2797
- if (token->type == GUMBO_TOKEN_CHARACTER || token->type == GUMBO_TOKEN_WHITESPACE) {
3040
+ if (token->type == GUMBO_TOKEN_CHARACTER ||
3041
+ token->type == GUMBO_TOKEN_WHITESPACE) {
2798
3042
  insert_text_token(parser, token);
2799
3043
  } else {
2800
3044
  // We provide only bare-bones script handling that doesn't involve any of
@@ -2854,11 +3098,12 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
2854
3098
  parser->_parser_state->_reprocess_current_token = true;
2855
3099
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_COLUMN_GROUP);
2856
3100
  return true;
2857
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(TBODY), TAG(TFOOT),
2858
- TAG(THEAD), TAG(TD), TAG(TH), TAG(TR) })) {
3101
+ } else if (tag_in(token, kStartTag,
3102
+ (gumbo_tagset){TAG(TBODY), TAG(TFOOT), TAG(THEAD), TAG(TD),
3103
+ TAG(TH), TAG(TR)})) {
2859
3104
  clear_stack_to_table_context(parser);
2860
3105
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
2861
- if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(TD), TAG(TH), TAG(TR) })) {
3106
+ if (tag_in(token, kStartTag, (gumbo_tagset){TAG(TD), TAG(TH), TAG(TR)})) {
2862
3107
  insert_element_of_tag_type(
2863
3108
  parser, GUMBO_TAG_TBODY, GUMBO_INSERTION_IMPLIED);
2864
3109
  state->_reprocess_current_token = true;
@@ -2880,25 +3125,27 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
2880
3125
  return false;
2881
3126
  }
2882
3127
  return true;
2883
- } else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(BODY), TAG(CAPTION),
2884
- TAG(COL), TAG(COLGROUP), TAG(HTML),
2885
- TAG(TBODY), TAG(TD), TAG(TFOOT),
2886
- TAG(TH), TAG(THEAD), TAG(TR) })) {
3128
+ } else if (tag_in(token, kEndTag,
3129
+ (gumbo_tagset){TAG(BODY), TAG(CAPTION), TAG(COL),
3130
+ TAG(COLGROUP), TAG(HTML), TAG(TBODY), TAG(TD), TAG(TFOOT),
3131
+ TAG(TH), TAG(THEAD), TAG(TR)})) {
2887
3132
  parser_add_parse_error(parser, token);
2888
3133
  ignore_token(parser);
2889
3134
  return false;
2890
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(STYLE), TAG(SCRIPT) })) {
3135
+ } else if (tag_in(token, kStartTag,
3136
+ (gumbo_tagset){TAG(STYLE), TAG(SCRIPT), TAG(TEMPLATE)}) ||
3137
+ (tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE))) {
2891
3138
  return handle_in_head(parser, token);
2892
3139
  } else if (tag_is(token, kStartTag, GUMBO_TAG_INPUT) &&
2893
- attribute_matches(&token->v.start_tag.attributes,
2894
- "type", "hidden")) {
3140
+ attribute_matches(
3141
+ &token->v.start_tag.attributes, "type", "hidden")) {
2895
3142
  parser_add_parse_error(parser, token);
2896
3143
  insert_element_from_token(parser, token);
2897
3144
  pop_current_node(parser);
2898
3145
  return false;
2899
3146
  } else if (tag_is(token, kStartTag, GUMBO_TAG_FORM)) {
2900
3147
  parser_add_parse_error(parser, token);
2901
- if (state->_form_element) {
3148
+ if (state->_form_element || has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2902
3149
  ignore_token(parser);
2903
3150
  return false;
2904
3151
  }
@@ -2906,11 +3153,7 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
2906
3153
  pop_current_node(parser);
2907
3154
  return false;
2908
3155
  } else if (token->type == GUMBO_TOKEN_EOF) {
2909
- if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_HTML)) {
2910
- parser_add_parse_error(parser, token);
2911
- return false;
2912
- }
2913
- return true;
3156
+ return handle_in_body(parser, token);
2914
3157
  } else {
2915
3158
  parser_add_parse_error(parser, token);
2916
3159
  state->_foster_parent_insertions = true;
@@ -2938,8 +3181,9 @@ static bool handle_in_table_text(GumboParser* parser, GumboToken* token) {
2938
3181
  // Note that TextNodeBuffer may contain UTF-8 characters, but the presence
2939
3182
  // of any one byte that is not whitespace means we flip the flag, so this
2940
3183
  // loop is still valid.
2941
- for (int i = 0; i < buffer->length; ++i) {
2942
- if (!isspace((unsigned char)buffer->data[i]) || buffer->data[i] == '\v') {
3184
+ for (unsigned int i = 0; i < buffer->length; ++i) {
3185
+ if (!isspace((unsigned char) buffer->data[i]) ||
3186
+ buffer->data[i] == '\v') {
2943
3187
  state->_foster_parent_insertions = true;
2944
3188
  reconstruct_active_formatting_elements(parser);
2945
3189
  break;
@@ -2955,35 +3199,43 @@ static bool handle_in_table_text(GumboParser* parser, GumboToken* token) {
2955
3199
 
2956
3200
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-incaption
2957
3201
  static bool handle_in_caption(GumboParser* parser, GumboToken* token) {
2958
- if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(CAPTION), TAG(COL),
2959
- TAG(COLGROUP), TAG(TBODY), TAG(TD),
2960
- TAG(TFOOT), TAG(TH), TAG(THEAD), TAG(TR) }) ||
2961
- tag_in(token, kEndTag, (gumbo_tagset) { TAG(CAPTION), TAG(TABLE) })) {
3202
+ if (tag_is(token, kEndTag, GUMBO_TAG_CAPTION)) {
2962
3203
  if (!has_an_element_in_table_scope(parser, GUMBO_TAG_CAPTION)) {
2963
3204
  parser_add_parse_error(parser, token);
2964
3205
  ignore_token(parser);
2965
3206
  return false;
3207
+ } else {
3208
+ generate_implied_end_tags(parser, GUMBO_TAG_LAST);
3209
+ bool result = true;
3210
+ if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_CAPTION)) {
3211
+ parser_add_parse_error(parser, token);
3212
+ }
3213
+ while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_CAPTION))
3214
+ ;
3215
+ clear_active_formatting_elements(parser);
3216
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
3217
+ return result;
2966
3218
  }
2967
- if (!tag_is(token, kEndTag, GUMBO_TAG_CAPTION)) {
2968
- parser_add_parse_error(parser, token);
2969
- parser->_parser_state->_reprocess_current_token = true;
2970
- }
2971
- generate_implied_end_tags(parser, GUMBO_TAG_LAST);
2972
- bool result = true;
2973
- if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_CAPTION)) {
3219
+ } else if (tag_in(token, kStartTag,
3220
+ (gumbo_tagset){TAG(CAPTION), TAG(COL), TAG(COLGROUP),
3221
+ TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD),
3222
+ TAG(TR)}) ||
3223
+ (tag_is(token, kEndTag, GUMBO_TAG_TABLE))) {
3224
+ if (!has_an_element_in_table_scope(parser, GUMBO_TAG_CAPTION)) {
2974
3225
  parser_add_parse_error(parser, token);
2975
- while (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_CAPTION)) {
2976
- pop_current_node(parser);
2977
- }
2978
- result = false;
3226
+ ignore_token(parser);
3227
+ return false;
2979
3228
  }
2980
- pop_current_node(parser); // The <caption> itself.
3229
+ while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_CAPTION))
3230
+ ;
2981
3231
  clear_active_formatting_elements(parser);
2982
3232
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
2983
- return result;
2984
- } else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(BODY), TAG(COL),
2985
- TAG(COLGROUP), TAG(HTML), TAG(TBODY), TAG(TD), TAG(TFOOT),
2986
- TAG(TH), TAG(THEAD), TAG(TR) })) {
3233
+ parser->_parser_state->_reprocess_current_token = true;
3234
+ return true;
3235
+ } else if (tag_in(token, kEndTag,
3236
+ (gumbo_tagset){TAG(BODY), TAG(COL), TAG(COLGROUP), TAG(HTML),
3237
+ TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD),
3238
+ TAG(TR)})) {
2987
3239
  parser_add_parse_error(parser, token);
2988
3240
  ignore_token(parser);
2989
3241
  return false;
@@ -3011,24 +3263,33 @@ static bool handle_in_column_group(GumboParser* parser, GumboToken* token) {
3011
3263
  pop_current_node(parser);
3012
3264
  acknowledge_self_closing_tag(parser);
3013
3265
  return true;
3266
+ } else if (tag_is(token, kEndTag, GUMBO_TAG_COLGROUP)) {
3267
+ if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_COLGROUP)) {
3268
+ parser_add_parse_error(parser, token);
3269
+ ignore_token(parser);
3270
+ return false;
3271
+ }
3272
+ pop_current_node(parser);
3273
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
3274
+ return false;
3014
3275
  } else if (tag_is(token, kEndTag, GUMBO_TAG_COL)) {
3015
3276
  parser_add_parse_error(parser, token);
3016
3277
  ignore_token(parser);
3017
3278
  return false;
3018
- } else if (token->type == GUMBO_TOKEN_EOF &&
3019
- get_current_node(parser) == parser->_output->root) {
3020
- return true;
3279
+ } else if (tag_is(token, kStartTag, GUMBO_TAG_TEMPLATE) ||
3280
+ tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
3281
+ return handle_in_head(parser, token);
3282
+ } else if (token->type == GUMBO_TOKEN_EOF) {
3283
+ return handle_in_body(parser, token);
3021
3284
  } else {
3022
- if (get_current_node(parser) == parser->_output->root) {
3285
+ if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_COLGROUP)) {
3023
3286
  parser_add_parse_error(parser, token);
3287
+ ignore_token(parser);
3024
3288
  return false;
3025
3289
  }
3026
- assert(node_html_tag_is(get_current_node(parser), GUMBO_TAG_COLGROUP));
3027
3290
  pop_current_node(parser);
3028
3291
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
3029
- if (!tag_is(token, kEndTag, GUMBO_TAG_COLGROUP)) {
3030
- parser->_parser_state->_reprocess_current_token = true;
3031
- }
3292
+ parser->_parser_state->_reprocess_current_token = true;
3032
3293
  return true;
3033
3294
  }
3034
3295
  }
@@ -3040,14 +3301,15 @@ static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
3040
3301
  insert_element_from_token(parser, token);
3041
3302
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
3042
3303
  return true;
3043
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(TD), TAG(TH) })) {
3304
+ } else if (tag_in(token, kStartTag, (gumbo_tagset){TAG(TD), TAG(TH)})) {
3044
3305
  parser_add_parse_error(parser, token);
3045
3306
  clear_stack_to_table_body_context(parser);
3046
3307
  insert_element_of_tag_type(parser, GUMBO_TAG_TR, GUMBO_INSERTION_IMPLIED);
3047
3308
  parser->_parser_state->_reprocess_current_token = true;
3048
3309
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
3049
3310
  return false;
3050
- } else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(TBODY), TAG(TFOOT), TAG(THEAD) })) {
3311
+ } else if (tag_in(token, kEndTag,
3312
+ (gumbo_tagset){TAG(TBODY), TAG(TFOOT), TAG(THEAD)})) {
3051
3313
  if (!has_an_element_in_table_scope(parser, token->v.end_tag)) {
3052
3314
  parser_add_parse_error(parser, token);
3053
3315
  ignore_token(parser);
@@ -3057,12 +3319,13 @@ static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
3057
3319
  pop_current_node(parser);
3058
3320
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
3059
3321
  return true;
3060
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(CAPTION), TAG(COL),
3061
- TAG(COLGROUP), TAG(TBODY), TAG(TFOOT), TAG(THEAD) }) ||
3322
+ } else if (tag_in(token, kStartTag,
3323
+ (gumbo_tagset){TAG(CAPTION), TAG(COL), TAG(COLGROUP),
3324
+ TAG(TBODY), TAG(TFOOT), TAG(THEAD)}) ||
3062
3325
  tag_is(token, kEndTag, GUMBO_TAG_TABLE)) {
3063
3326
  if (!(has_an_element_in_table_scope(parser, GUMBO_TAG_TBODY) ||
3064
- has_an_element_in_table_scope(parser, GUMBO_TAG_THEAD) ||
3065
- has_an_element_in_table_scope(parser, GUMBO_TAG_TFOOT))) {
3327
+ has_an_element_in_table_scope(parser, GUMBO_TAG_THEAD) ||
3328
+ has_an_element_in_table_scope(parser, GUMBO_TAG_TFOOT))) {
3066
3329
  parser_add_parse_error(parser, token);
3067
3330
  ignore_token(parser);
3068
3331
  return false;
@@ -3072,9 +3335,9 @@ static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
3072
3335
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
3073
3336
  parser->_parser_state->_reprocess_current_token = true;
3074
3337
  return true;
3075
- } else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(BODY), TAG(CAPTION),
3076
- TAG(COL), TAG(TR), TAG(COLGROUP), TAG(HTML), TAG(TD), TAG(TH) }))
3077
- {
3338
+ } else if (tag_in(token, kEndTag,
3339
+ (gumbo_tagset){TAG(BODY), TAG(CAPTION), TAG(COL), TAG(TR),
3340
+ TAG(COLGROUP), TAG(HTML), TAG(TD), TAG(TH)})) {
3078
3341
  parser_add_parse_error(parser, token);
3079
3342
  ignore_token(parser);
3080
3343
  return false;
@@ -3085,45 +3348,55 @@ static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
3085
3348
 
3086
3349
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-intr
3087
3350
  static bool handle_in_row(GumboParser* parser, GumboToken* token) {
3088
- if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(TH), TAG(TD) })) {
3351
+ if (tag_in(token, kStartTag, (gumbo_tagset){TAG(TH), TAG(TD)})) {
3089
3352
  clear_stack_to_table_row_context(parser);
3090
3353
  insert_element_from_token(parser, token);
3091
3354
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_CELL);
3092
3355
  add_formatting_element(parser, &kActiveFormattingScopeMarker);
3093
3356
  return true;
3094
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(CAPTION), TAG(COLGROUP),
3095
- TAG(TBODY), TAG(TFOOT), TAG(THEAD), TAG(TR) }) ||
3096
- tag_in(token, kEndTag, (gumbo_tagset) { TAG(TR), TAG(TABLE),
3097
- TAG(TBODY), TAG(TFOOT), TAG(THEAD) })) {
3098
- // This case covers 4 clauses of the spec, each of which say "Otherwise, act
3099
- // as if an end tag with the tag name "tr" had been seen." The differences
3100
- // are in error handling and whether the current token is reprocessed.
3101
- GumboTag desired_tag =
3102
- tag_in(token, kEndTag, (gumbo_tagset) { TAG(TBODY), TAG(TFOOT),
3103
- TAG(THEAD) })
3104
- ? token->v.end_tag : GUMBO_TAG_TR;
3105
- if (!has_an_element_in_table_scope(parser, desired_tag)) {
3106
- gumbo_debug("Bailing because there is no tag %s in table scope.\nOpen elements:",
3107
- gumbo_normalized_tagname(desired_tag));
3108
- for (int i = 0; i < parser->_parser_state->_open_elements.length; ++i) {
3109
- const GumboNode* node = parser->_parser_state->_open_elements.data[i];
3110
- gumbo_debug("%s\n", gumbo_normalized_tagname(node->v.element.tag));
3111
- }
3357
+ } else if (tag_is(token, kEndTag, GUMBO_TAG_TR)) {
3358
+ if (!has_an_element_in_table_scope(parser, GUMBO_TAG_TR)) {
3112
3359
  parser_add_parse_error(parser, token);
3113
3360
  ignore_token(parser);
3114
3361
  return false;
3362
+ } else {
3363
+ clear_stack_to_table_row_context(parser);
3364
+ pop_current_node(parser);
3365
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
3366
+ return true;
3115
3367
  }
3116
- clear_stack_to_table_row_context(parser);
3117
- GumboNode* last_element = pop_current_node(parser);
3118
- assert(node_html_tag_is(last_element, GUMBO_TAG_TR));
3119
- AVOID_UNUSED_VARIABLE_WARNING(last_element);
3120
- set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
3121
- if (!tag_is(token, kEndTag, GUMBO_TAG_TR)) {
3368
+ } else if (tag_in(token, kStartTag,
3369
+ (gumbo_tagset){TAG(CAPTION), TAG(COL), TAG(COLGROUP),
3370
+ TAG(TBODY), TAG(TFOOT), TAG(THEAD), TAG(TR)}) ||
3371
+ tag_is(token, kEndTag, GUMBO_TAG_TABLE)) {
3372
+ if (!has_an_element_in_table_scope(parser, GUMBO_TAG_TR)) {
3373
+ parser_add_parse_error(parser, token);
3374
+ ignore_token(parser);
3375
+ return false;
3376
+ } else {
3377
+ clear_stack_to_table_row_context(parser);
3378
+ pop_current_node(parser);
3379
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
3122
3380
  parser->_parser_state->_reprocess_current_token = true;
3381
+ return true;
3123
3382
  }
3124
- return true;
3125
- } else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(BODY), TAG(CAPTION),
3126
- TAG(COL), TAG(COLGROUP), TAG(HTML), TAG(TD), TAG(TH) })) {
3383
+ } else if (tag_in(token, kEndTag,
3384
+ (gumbo_tagset){TAG(TBODY), TAG(TFOOT), TAG(THEAD)})) {
3385
+ if (!has_an_element_in_table_scope(parser, token->v.end_tag) ||
3386
+ (!has_an_element_in_table_scope(parser, GUMBO_TAG_TR))) {
3387
+ parser_add_parse_error(parser, token);
3388
+ ignore_token(parser);
3389
+ return false;
3390
+ } else {
3391
+ clear_stack_to_table_row_context(parser);
3392
+ pop_current_node(parser);
3393
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
3394
+ parser->_parser_state->_reprocess_current_token = true;
3395
+ return true;
3396
+ }
3397
+ } else if (tag_in(token, kEndTag,
3398
+ (gumbo_tagset){TAG(BODY), TAG(CAPTION), TAG(COL),
3399
+ TAG(COLGROUP), TAG(HTML), TAG(TD), TAG(TH)})) {
3127
3400
  parser_add_parse_error(parser, token);
3128
3401
  ignore_token(parser);
3129
3402
  return false;
@@ -3134,16 +3407,18 @@ static bool handle_in_row(GumboParser* parser, GumboToken* token) {
3134
3407
 
3135
3408
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-intd
3136
3409
  static bool handle_in_cell(GumboParser* parser, GumboToken* token) {
3137
- if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(TD), TAG(TH) })) {
3410
+ if (tag_in(token, kEndTag, (gumbo_tagset){TAG(TD), TAG(TH)})) {
3138
3411
  GumboTag token_tag = token->v.end_tag;
3139
3412
  if (!has_an_element_in_table_scope(parser, token_tag)) {
3140
3413
  parser_add_parse_error(parser, token);
3414
+ ignore_token(parser);
3141
3415
  return false;
3142
3416
  }
3143
3417
  return close_table_cell(parser, token, token_tag);
3144
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(CAPTION), TAG(COL),
3145
- TAG(COLGROUP), TAG(TBODY), TAG(TD),
3146
- TAG(TFOOT), TAG(TH), TAG(THEAD), TAG(TR) })) {
3418
+ } else if (tag_in(token, kStartTag,
3419
+ (gumbo_tagset){TAG(CAPTION), TAG(COL), TAG(COLGROUP),
3420
+ TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD),
3421
+ TAG(TR)})) {
3147
3422
  gumbo_debug("Handling <td> in cell.\n");
3148
3423
  if (!has_an_element_in_table_scope(parser, GUMBO_TAG_TH) &&
3149
3424
  !has_an_element_in_table_scope(parser, GUMBO_TAG_TD)) {
@@ -3154,13 +3429,13 @@ static bool handle_in_cell(GumboParser* parser, GumboToken* token) {
3154
3429
  }
3155
3430
  parser->_parser_state->_reprocess_current_token = true;
3156
3431
  return close_current_cell(parser, token);
3157
- } else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(BODY), TAG(CAPTION),
3158
- TAG(COL), TAG(COLGROUP), TAG(HTML) })) {
3432
+ } else if (tag_in(token, kEndTag, (gumbo_tagset){TAG(BODY), TAG(CAPTION),
3433
+ TAG(COL), TAG(COLGROUP), TAG(HTML)})) {
3159
3434
  parser_add_parse_error(parser, token);
3160
3435
  ignore_token(parser);
3161
3436
  return false;
3162
- } else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(TABLE), TAG(TBODY),
3163
- TAG(TFOOT), TAG(THEAD), TAG(TR) })) {
3437
+ } else if (tag_in(token, kEndTag, (gumbo_tagset){TAG(TABLE), TAG(TBODY),
3438
+ TAG(TFOOT), TAG(THEAD), TAG(TR)})) {
3164
3439
  if (!has_an_element_in_table_scope(parser, token->v.end_tag)) {
3165
3440
  parser_add_parse_error(parser, token);
3166
3441
  ignore_token(parser);
@@ -3211,7 +3486,7 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
3211
3486
  GumboVector* open_elements = &parser->_parser_state->_open_elements;
3212
3487
  if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION) &&
3213
3488
  node_html_tag_is(open_elements->data[open_elements->length - 2],
3214
- GUMBO_TAG_OPTGROUP)) {
3489
+ GUMBO_TAG_OPTGROUP)) {
3215
3490
  pop_current_node(parser);
3216
3491
  }
3217
3492
  if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTGROUP)) {
@@ -3242,9 +3517,12 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
3242
3517
  } else if (tag_is(token, kStartTag, GUMBO_TAG_SELECT)) {
3243
3518
  parser_add_parse_error(parser, token);
3244
3519
  ignore_token(parser);
3245
- close_current_select(parser);
3520
+ if (has_an_element_in_select_scope(parser, GUMBO_TAG_SELECT)) {
3521
+ close_current_select(parser);
3522
+ }
3246
3523
  return false;
3247
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(INPUT), TAG(KEYGEN), TAG(TEXTAREA) })) {
3524
+ } else if (tag_in(token, kStartTag,
3525
+ (gumbo_tagset){TAG(INPUT), TAG(KEYGEN), TAG(TEXTAREA)})) {
3248
3526
  parser_add_parse_error(parser, token);
3249
3527
  if (!has_an_element_in_select_scope(parser, GUMBO_TAG_SELECT)) {
3250
3528
  ignore_token(parser);
@@ -3253,14 +3531,12 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
3253
3531
  parser->_parser_state->_reprocess_current_token = true;
3254
3532
  }
3255
3533
  return false;
3256
- } else if (tag_is(token, kStartTag, GUMBO_TAG_SCRIPT)) {
3534
+ } else if (tag_in(token, kStartTag,
3535
+ (gumbo_tagset){TAG(SCRIPT), TAG(TEMPLATE)}) ||
3536
+ tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
3257
3537
  return handle_in_head(parser, token);
3258
3538
  } else if (token->type == GUMBO_TOKEN_EOF) {
3259
- if (get_current_node(parser) != parser->_output->root) {
3260
- parser_add_parse_error(parser, token);
3261
- return false;
3262
- }
3263
- return true;
3539
+ return handle_in_body(parser, token);
3264
3540
  } else {
3265
3541
  parser_add_parse_error(parser, token);
3266
3542
  ignore_token(parser);
@@ -3270,23 +3546,28 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
3270
3546
 
3271
3547
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-inselectintable
3272
3548
  static bool handle_in_select_in_table(GumboParser* parser, GumboToken* token) {
3273
- if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(CAPTION), TAG(TABLE),
3274
- TAG(TBODY), TAG(TFOOT), TAG(THEAD), TAG(TR), TAG(TD), TAG(TH) })) {
3549
+ if (tag_in(token, kStartTag,
3550
+ (gumbo_tagset){TAG(CAPTION), TAG(TABLE), TAG(TBODY), TAG(TFOOT),
3551
+ TAG(THEAD), TAG(TR), TAG(TD), TAG(TH)})) {
3275
3552
  parser_add_parse_error(parser, token);
3276
3553
  close_current_select(parser);
3277
3554
  parser->_parser_state->_reprocess_current_token = true;
3278
3555
  return false;
3279
- } else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(CAPTION), TAG(TABLE),
3280
- TAG(TBODY), TAG(TFOOT), TAG(THEAD), TAG(TR), TAG(TD), TAG(TH) })) {
3556
+ } else if (tag_in(token, kEndTag,
3557
+ (gumbo_tagset){TAG(CAPTION), TAG(TABLE), TAG(TBODY),
3558
+ TAG(TFOOT), TAG(THEAD), TAG(TR), TAG(TD), TAG(TH)})) {
3281
3559
  parser_add_parse_error(parser, token);
3282
- if (has_an_element_in_table_scope(parser, token->v.end_tag)) {
3560
+ if (!has_an_element_in_table_scope(parser, token->v.end_tag)) {
3561
+ ignore_token(parser);
3562
+ return false;
3563
+ } else {
3283
3564
  close_current_select(parser);
3284
- reset_insertion_mode_appropriately(parser);
3565
+ // close_current_select already does the
3566
+ // reset_insertion_mode_appropriately
3567
+ // reset_insertion_mode_appropriately(parser);
3285
3568
  parser->_parser_state->_reprocess_current_token = true;
3286
- } else {
3287
- ignore_token(parser);
3569
+ return false;
3288
3570
  }
3289
- return false;
3290
3571
  } else {
3291
3572
  return handle_in_select(parser, token);
3292
3573
  }
@@ -3294,8 +3575,71 @@ static bool handle_in_select_in_table(GumboParser* parser, GumboToken* token) {
3294
3575
 
3295
3576
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#parsing-main-intemplate
3296
3577
  static bool handle_in_template(GumboParser* parser, GumboToken* token) {
3297
- // TODO(jdtang): Implement this.
3298
- return true;
3578
+ GumboParserState* state = parser->_parser_state;
3579
+ if (token->type == GUMBO_TOKEN_WHITESPACE ||
3580
+ token->type == GUMBO_TOKEN_CHARACTER ||
3581
+ token->type == GUMBO_TOKEN_COMMENT || token->type == GUMBO_TOKEN_NULL ||
3582
+ token->type == GUMBO_TOKEN_DOCTYPE) {
3583
+ return handle_in_body(parser, token);
3584
+ } else if (tag_in(token, kStartTag,
3585
+ (gumbo_tagset){TAG(BASE), TAG(BASEFONT), TAG(BGSOUND),
3586
+ TAG(LINK), TAG(META), TAG(NOFRAMES), TAG(SCRIPT),
3587
+ TAG(STYLE), TAG(TEMPLATE), TAG(TITLE)}) ||
3588
+ tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
3589
+ return handle_in_head(parser, token);
3590
+ } else if (tag_in(
3591
+ token, kStartTag, (gumbo_tagset){TAG(CAPTION), TAG(COLGROUP),
3592
+ TAG(TBODY), TAG(TFOOT), TAG(THEAD)})) {
3593
+ pop_template_insertion_mode(parser);
3594
+ push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
3595
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
3596
+ state->_reprocess_current_token = true;
3597
+ return true;
3598
+ } else if (tag_is(token, kStartTag, GUMBO_TAG_COL)) {
3599
+ pop_template_insertion_mode(parser);
3600
+ push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_COLUMN_GROUP);
3601
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_COLUMN_GROUP);
3602
+ state->_reprocess_current_token = true;
3603
+ return true;
3604
+ } else if (tag_is(token, kStartTag, GUMBO_TAG_TR)) {
3605
+ pop_template_insertion_mode(parser);
3606
+ push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
3607
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
3608
+ state->_reprocess_current_token = true;
3609
+ return true;
3610
+ } else if (tag_in(token, kStartTag, (gumbo_tagset){TAG(TD), TAG(TH)})) {
3611
+ pop_template_insertion_mode(parser);
3612
+ push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
3613
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
3614
+ state->_reprocess_current_token = true;
3615
+ return true;
3616
+ } else if (token->type == GUMBO_TOKEN_START_TAG) {
3617
+ pop_template_insertion_mode(parser);
3618
+ push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_BODY);
3619
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_BODY);
3620
+ state->_reprocess_current_token = true;
3621
+ return true;
3622
+ } else if (token->type == GUMBO_TOKEN_END_TAG) {
3623
+ parser_add_parse_error(parser, token);
3624
+ ignore_token(parser);
3625
+ return false;
3626
+ } else if (token->type == GUMBO_TOKEN_EOF) {
3627
+ if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
3628
+ // Stop parsing.
3629
+ return true;
3630
+ }
3631
+ parser_add_parse_error(parser, token);
3632
+ while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_TEMPLATE))
3633
+ ;
3634
+ clear_active_formatting_elements(parser);
3635
+ pop_template_insertion_mode(parser);
3636
+ reset_insertion_mode_appropriately(parser);
3637
+ state->_reprocess_current_token = true;
3638
+ return false;
3639
+ } else {
3640
+ assert(0);
3641
+ return false;
3642
+ }
3299
3643
  }
3300
3644
 
3301
3645
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-afterbody
@@ -3313,7 +3657,12 @@ static bool handle_after_body(GumboParser* parser, GumboToken* token) {
3313
3657
  ignore_token(parser);
3314
3658
  return false;
3315
3659
  } else if (tag_is(token, kEndTag, GUMBO_TAG_HTML)) {
3316
- // TODO(jdtang): Handle fragment parsing algorithm case.
3660
+ /* fragment case: ignore the closing HTML token */
3661
+ if (is_fragment_parser(parser)) {
3662
+ parser_add_parse_error(parser, token);
3663
+ ignore_token(parser);
3664
+ return false;
3665
+ }
3317
3666
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_AFTER_BODY);
3318
3667
  GumboNode* html = parser->_parser_state->_open_elements.data[0];
3319
3668
  assert(node_html_tag_is(html, GUMBO_TAG_HTML));
@@ -3354,9 +3703,8 @@ static bool handle_in_frameset(GumboParser* parser, GumboToken* token) {
3354
3703
  return false;
3355
3704
  }
3356
3705
  pop_current_node(parser);
3357
- // TODO(jdtang): Add a condition to ignore this for the fragment parsing
3358
- // algorithm.
3359
- if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_FRAMESET)) {
3706
+ if (!is_fragment_parser(parser) &&
3707
+ !node_html_tag_is(get_current_node(parser), GUMBO_TAG_FRAMESET)) {
3360
3708
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_FRAMESET);
3361
3709
  }
3362
3710
  return true;
@@ -3455,31 +3803,14 @@ static bool handle_after_after_frameset(
3455
3803
  // Function pointers for each insertion mode. Keep in sync with
3456
3804
  // insertion_mode.h.
3457
3805
  typedef bool (*TokenHandler)(GumboParser* parser, GumboToken* token);
3458
- static const TokenHandler kTokenHandlers[] = {
3459
- handle_initial,
3460
- handle_before_html,
3461
- handle_before_head,
3462
- handle_in_head,
3463
- handle_in_head_noscript,
3464
- handle_after_head,
3465
- handle_in_body,
3466
- handle_text,
3467
- handle_in_table,
3468
- handle_in_table_text,
3469
- handle_in_caption,
3470
- handle_in_column_group,
3471
- handle_in_table_body,
3472
- handle_in_row,
3473
- handle_in_cell,
3474
- handle_in_select,
3475
- handle_in_select_in_table,
3476
- handle_in_template,
3477
- handle_after_body,
3478
- handle_in_frameset,
3479
- handle_after_frameset,
3480
- handle_after_after_body,
3481
- handle_after_after_frameset
3482
- };
3806
+ static const TokenHandler kTokenHandlers[] = {handle_initial,
3807
+ handle_before_html, handle_before_head, handle_in_head,
3808
+ handle_in_head_noscript, handle_after_head, handle_in_body, handle_text,
3809
+ handle_in_table, handle_in_table_text, handle_in_caption,
3810
+ handle_in_column_group, handle_in_table_body, handle_in_row, handle_in_cell,
3811
+ handle_in_select, handle_in_select_in_table, handle_in_template,
3812
+ handle_after_body, handle_in_frameset, handle_after_frameset,
3813
+ handle_after_after_body, handle_after_after_frameset};
3483
3814
 
3484
3815
  static bool handle_html_content(GumboParser* parser, GumboToken* token) {
3485
3816
  return kTokenHandlers[(unsigned int) parser->_parser_state->_insertion_mode](
@@ -3488,6 +3819,7 @@ static bool handle_html_content(GumboParser* parser, GumboToken* token) {
3488
3819
 
3489
3820
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-inforeign
3490
3821
  static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
3822
+ gumbo_debug("Handling foreign content");
3491
3823
  switch (token->type) {
3492
3824
  case GUMBO_TOKEN_NULL:
3493
3825
  parser_add_parse_error(parser, token);
@@ -3514,34 +3846,44 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
3514
3846
  break;
3515
3847
  }
3516
3848
  // Order matters for these clauses.
3517
- if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(B), TAG(BIG),
3518
- TAG(BLOCKQUOTE), TAG(BODY), TAG(BR),
3519
- TAG(CENTER), TAG(CODE), TAG(DD), TAG(DIV),
3520
- TAG(DL), TAG(DT), TAG(EM), TAG(EMBED),
3521
- TAG(H1), TAG(H2), TAG(H3), TAG(H4),
3522
- TAG(H5), TAG(H6), TAG(HEAD), TAG(HR),
3523
- TAG(I), TAG(IMG), TAG(LI), TAG(LISTING),
3524
- TAG(MENU), TAG(META), TAG(NOBR), TAG(OL),
3525
- TAG(P), TAG(PRE), TAG(RUBY), TAG(S),
3526
- TAG(SMALL), TAG(SPAN), TAG(STRONG),
3527
- TAG(STRIKE), TAG(SUB), TAG(SUP),
3528
- TAG(TABLE), TAG(TT), TAG(U), TAG(UL), TAG(VAR) }) ||
3529
- (tag_is(token, kStartTag, GUMBO_TAG_FONT) && (
3530
- token_has_attribute(token, "color") ||
3531
- token_has_attribute(token, "face") ||
3532
- token_has_attribute(token, "size")))) {
3849
+ if (tag_in(token, kStartTag,
3850
+ (gumbo_tagset){TAG(B), TAG(BIG), TAG(BLOCKQUOTE), TAG(BODY), TAG(BR),
3851
+ TAG(CENTER), TAG(CODE), TAG(DD), TAG(DIV), TAG(DL), TAG(DT),
3852
+ TAG(EM), TAG(EMBED), TAG(H1), TAG(H2), TAG(H3), TAG(H4), TAG(H5),
3853
+ TAG(H6), TAG(HEAD), TAG(HR), TAG(I), TAG(IMG), TAG(LI),
3854
+ TAG(LISTING), TAG(MENU), TAG(META), TAG(NOBR), TAG(OL), TAG(P),
3855
+ TAG(PRE), TAG(RUBY), TAG(S), TAG(SMALL), TAG(SPAN), TAG(STRONG),
3856
+ TAG(STRIKE), TAG(SUB), TAG(SUP), TAG(TABLE), TAG(TT), TAG(U),
3857
+ TAG(UL), TAG(VAR)}) ||
3858
+ (tag_is(token, kStartTag, GUMBO_TAG_FONT) &&
3859
+ (token_has_attribute(token, "color") ||
3860
+ token_has_attribute(token, "face") ||
3861
+ token_has_attribute(token, "size")))) {
3862
+ /* Parse error */
3533
3863
  parser_add_parse_error(parser, token);
3534
- do {
3535
- pop_current_node(parser);
3536
- } while(!(is_mathml_integration_point(get_current_node(parser)) ||
3537
- is_html_integration_point(get_current_node(parser)) ||
3538
- get_current_node(parser)->v.element.tag_namespace ==
3539
- GUMBO_NAMESPACE_HTML));
3540
- parser->_parser_state->_reprocess_current_token = true;
3541
- return false;
3542
- } else if (token->type == GUMBO_TOKEN_START_TAG) {
3864
+
3865
+ /*
3866
+ * Fragment case: If the parser was originally created for the HTML
3867
+ * fragment parsing algorithm, then act as described in the "any other
3868
+ * start tag" entry below.
3869
+ */
3870
+ if (!is_fragment_parser(parser)) {
3871
+ do {
3872
+ pop_current_node(parser);
3873
+ } while (!(is_mathml_integration_point(get_current_node(parser)) ||
3874
+ is_html_integration_point(get_current_node(parser)) ||
3875
+ get_current_node(parser)->v.element.tag_namespace ==
3876
+ GUMBO_NAMESPACE_HTML));
3877
+ parser->_parser_state->_reprocess_current_token = true;
3878
+ return false;
3879
+ }
3880
+
3881
+ assert(token->type == GUMBO_TOKEN_START_TAG);
3882
+ }
3883
+
3884
+ if (token->type == GUMBO_TOKEN_START_TAG) {
3543
3885
  const GumboNamespaceEnum current_namespace =
3544
- get_current_node(parser)->v.element.tag_namespace;
3886
+ get_adjusted_current_node(parser)->v.element.tag_namespace;
3545
3887
  if (current_namespace == GUMBO_NAMESPACE_MATHML) {
3546
3888
  adjust_mathml_attributes(parser, token);
3547
3889
  }
@@ -3557,8 +3899,8 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
3557
3899
  acknowledge_self_closing_tag(parser);
3558
3900
  }
3559
3901
  return true;
3560
- // </script> tags are handled like any other end tag, putting the script's
3561
- // text into a text node child and closing the current node.
3902
+ // </script> tags are handled like any other end tag, putting the script's
3903
+ // text into a text node child and closing the current node.
3562
3904
  } else {
3563
3905
  assert(token->type == GUMBO_TOKEN_END_TAG);
3564
3906
  GumboNode* node = get_current_node(parser);
@@ -3574,13 +3916,13 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
3574
3916
  is_success = false;
3575
3917
  }
3576
3918
  int i = parser->_parser_state->_open_elements.length;
3577
- for( --i; i > 0; ) {
3919
+ for (--i; i > 0;) {
3578
3920
  // Here we move up the stack until we find an HTML element (in which
3579
3921
  // case we do nothing) or we find the element that we're about to
3580
3922
  // close (in which case we pop everything we've seen until that
3581
3923
  // point.)
3582
3924
  gumbo_debug("Foreign %.*s node at %d.\n", node_tagname.length,
3583
- node_tagname.data, i);
3925
+ node_tagname.data, i);
3584
3926
  if (gumbo_string_equals_ignore_case(&node_tagname, &token_tagname)) {
3585
3927
  gumbo_debug("Matches.\n");
3586
3928
  while (pop_current_node(parser) != node) {
@@ -3608,7 +3950,6 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
3608
3950
  }
3609
3951
  }
3610
3952
 
3611
-
3612
3953
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#tree-construction
3613
3954
  static bool handle_token(GumboParser* parser, GumboToken* token) {
3614
3955
  if (parser->_parser_state->_ignore_next_linefeed &&
@@ -3630,28 +3971,31 @@ static bool handle_token(GumboParser* parser, GumboToken* token) {
3630
3971
  parser->_parser_state->_closed_html_tag = true;
3631
3972
  }
3632
3973
 
3633
- const GumboNode* current_node = get_current_node(parser);
3634
- assert(!current_node || current_node->type == GUMBO_NODE_ELEMENT);
3974
+ const GumboNode* current_node = get_adjusted_current_node(parser);
3975
+ assert(!current_node || current_node->type == GUMBO_NODE_ELEMENT ||
3976
+ current_node->type == GUMBO_NODE_TEMPLATE);
3635
3977
  if (current_node) {
3636
3978
  gumbo_debug("Current node: <%s>.\n",
3637
- gumbo_normalized_tagname(current_node->v.element.tag));
3979
+ gumbo_normalized_tagname(current_node->v.element.tag));
3638
3980
  }
3639
3981
  if (!current_node ||
3640
3982
  current_node->v.element.tag_namespace == GUMBO_NAMESPACE_HTML ||
3641
3983
  (is_mathml_integration_point(current_node) &&
3642
- (token->type == GUMBO_TOKEN_CHARACTER ||
3643
- token->type == GUMBO_TOKEN_WHITESPACE ||
3644
- token->type == GUMBO_TOKEN_NULL ||
3645
- (token->type == GUMBO_TOKEN_START_TAG &&
3646
- !tag_in(token, kStartTag, (gumbo_tagset) { TAG(MGLYPH), TAG(MALIGNMARK) })))) ||
3984
+ (token->type == GUMBO_TOKEN_CHARACTER ||
3985
+ token->type == GUMBO_TOKEN_WHITESPACE ||
3986
+ token->type == GUMBO_TOKEN_NULL ||
3987
+ (token->type == GUMBO_TOKEN_START_TAG &&
3988
+ !tag_in(token, kStartTag,
3989
+ (gumbo_tagset){TAG(MGLYPH), TAG(MALIGNMARK)})))) ||
3647
3990
  (current_node->v.element.tag_namespace == GUMBO_NAMESPACE_MATHML &&
3648
- node_qualified_tag_is(current_node, GUMBO_NAMESPACE_MATHML, GUMBO_TAG_ANNOTATION_XML) &&
3649
- tag_is(token, kStartTag, GUMBO_TAG_SVG)) ||
3650
- (is_html_integration_point(current_node) && (
3651
- token->type == GUMBO_TOKEN_START_TAG ||
3652
- token->type == GUMBO_TOKEN_CHARACTER ||
3653
- token->type == GUMBO_TOKEN_NULL ||
3654
- token->type == GUMBO_TOKEN_WHITESPACE)) ||
3991
+ node_qualified_tag_is(
3992
+ current_node, GUMBO_NAMESPACE_MATHML, GUMBO_TAG_ANNOTATION_XML) &&
3993
+ tag_is(token, kStartTag, GUMBO_TAG_SVG)) ||
3994
+ (is_html_integration_point(current_node) &&
3995
+ (token->type == GUMBO_TOKEN_START_TAG ||
3996
+ token->type == GUMBO_TOKEN_CHARACTER ||
3997
+ token->type == GUMBO_TOKEN_NULL ||
3998
+ token->type == GUMBO_TOKEN_WHITESPACE)) ||
3655
3999
  token->type == GUMBO_TOKEN_EOF) {
3656
4000
  return handle_html_content(parser, token);
3657
4001
  } else {
@@ -3659,6 +4003,66 @@ static bool handle_token(GumboParser* parser, GumboToken* token) {
3659
4003
  }
3660
4004
  }
3661
4005
 
4006
+ static void fragment_parser_init(GumboParser* parser, GumboTag fragment_ctx,
4007
+ GumboNamespaceEnum fragment_namespace) {
4008
+ GumboNode* root;
4009
+ assert(fragment_ctx != GUMBO_TAG_LAST);
4010
+
4011
+ // 3
4012
+ parser->_parser_state->_fragment_ctx = create_element(parser, fragment_ctx);
4013
+ parser->_parser_state->_fragment_ctx->v.element.tag_namespace =
4014
+ fragment_namespace;
4015
+
4016
+ // 4
4017
+ if (fragment_namespace == GUMBO_NAMESPACE_HTML) {
4018
+ // Non-HTML namespaces always start in the DATA state.
4019
+ switch (fragment_ctx) {
4020
+ case GUMBO_TAG_TITLE:
4021
+ case GUMBO_TAG_TEXTAREA:
4022
+ gumbo_tokenizer_set_state(parser, GUMBO_LEX_RCDATA);
4023
+ break;
4024
+
4025
+ case GUMBO_TAG_STYLE:
4026
+ case GUMBO_TAG_XMP:
4027
+ case GUMBO_TAG_IFRAME:
4028
+ case GUMBO_TAG_NOEMBED:
4029
+ case GUMBO_TAG_NOFRAMES:
4030
+ gumbo_tokenizer_set_state(parser, GUMBO_LEX_RAWTEXT);
4031
+ break;
4032
+
4033
+ case GUMBO_TAG_SCRIPT:
4034
+ gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT);
4035
+ break;
4036
+
4037
+ case GUMBO_TAG_NOSCRIPT:
4038
+ /* scripting is disabled in Gumbo, so leave the tokenizer
4039
+ * in the default data state */
4040
+ break;
4041
+
4042
+ case GUMBO_TAG_PLAINTEXT:
4043
+ gumbo_tokenizer_set_state(parser, GUMBO_LEX_PLAINTEXT);
4044
+ break;
4045
+
4046
+ default:
4047
+ /* default data state */
4048
+ break;
4049
+ }
4050
+ }
4051
+
4052
+ // 5. 6. 7.
4053
+ root = insert_element_of_tag_type(
4054
+ parser, GUMBO_TAG_HTML, GUMBO_INSERTION_IMPLIED);
4055
+ parser->_output->root = root;
4056
+
4057
+ // 8.
4058
+ if (fragment_ctx == GUMBO_TAG_TEMPLATE) {
4059
+ push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TEMPLATE);
4060
+ }
4061
+
4062
+ // 10.
4063
+ reset_insertion_mode_appropriately(parser);
4064
+ }
4065
+
3662
4066
  GumboOutput* gumbo_parse(const char* buffer) {
3663
4067
  return gumbo_parse_with_options(
3664
4068
  &kGumboDefaultOptions, buffer, strlen(buffer));
@@ -3672,6 +4076,11 @@ GumboOutput* gumbo_parse_with_options(
3672
4076
  gumbo_tokenizer_state_init(&parser, buffer, length);
3673
4077
  parser_state_init(&parser);
3674
4078
 
4079
+ if (options->fragment_context != GUMBO_TAG_LAST) {
4080
+ fragment_parser_init(
4081
+ &parser, options->fragment_context, options->fragment_namespace);
4082
+ }
4083
+
3675
4084
  GumboParserState* state = parser._parser_state;
3676
4085
  gumbo_debug("Parsing %.*s.\n", length, buffer);
3677
4086
 
@@ -3687,9 +4096,9 @@ GumboOutput* gumbo_parse_with_options(
3687
4096
  state->_reprocess_current_token = false;
3688
4097
  } else {
3689
4098
  GumboNode* current_node = get_current_node(&parser);
3690
- gumbo_tokenizer_set_is_current_node_foreign(
3691
- &parser, current_node &&
3692
- current_node->v.element.tag_namespace != GUMBO_NAMESPACE_HTML);
4099
+ gumbo_tokenizer_set_is_current_node_foreign(&parser,
4100
+ current_node &&
4101
+ current_node->v.element.tag_namespace != GUMBO_NAMESPACE_HTML);
3693
4102
  has_error = !gumbo_lex(&parser, &token) || has_error;
3694
4103
  }
3695
4104
  const char* token_type = "text";
@@ -3709,14 +4118,13 @@ GumboOutput* gumbo_parse_with_options(
3709
4118
  default:
3710
4119
  break;
3711
4120
  }
3712
- gumbo_debug("Handling %s token @%d:%d in state %d.\n",
3713
- (char*) token_type, token.position.line, token.position.column,
3714
- state->_insertion_mode);
4121
+ gumbo_debug("Handling %s token @%d:%d in state %d.\n", (char*) token_type,
4122
+ token.position.line, token.position.column, state->_insertion_mode);
3715
4123
 
3716
4124
  state->_current_token = &token;
3717
4125
  state->_self_closing_flag_acknowledged =
3718
4126
  !(token.type == GUMBO_TOKEN_START_TAG &&
3719
- token.v.start_tag.is_self_closing);
4127
+ token.v.start_tag.is_self_closing);
3720
4128
 
3721
4129
  has_error = !handle_token(&parser, &token) || has_error;
3722
4130
 
@@ -3772,7 +4180,7 @@ void gumbo_destroy_output(const GumboOptions* options, GumboOutput* output) {
3772
4180
  GumboParser parser;
3773
4181
  parser._options = options;
3774
4182
  destroy_node(&parser, output->document);
3775
- for (int i = 0; i < output->errors.length; ++i) {
4183
+ for (unsigned int i = 0; i < output->errors.length; ++i) {
3776
4184
  gumbo_error_destroy(&parser, output->errors.data[i]);
3777
4185
  }
3778
4186
  gumbo_vector_destroy(&parser, &output->errors);