nokogumbo 1.4.2 → 1.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -201,24 +201,22 @@ void gumbo_error_destroy(struct GumboInternalParser* parser, GumboError* error);
201
201
  // responsible for deleting the buffer. (Note that the buffer is allocated with
202
202
  // the allocator specified in the GumboParser config and hence should be freed
203
203
  // by gumbo_parser_deallocate().)
204
- void gumbo_error_to_string(
205
- struct GumboInternalParser* parser, const GumboError* error,
206
- GumboStringBuffer* output);
204
+ void gumbo_error_to_string(struct GumboInternalParser* parser,
205
+ const GumboError* error, GumboStringBuffer* output);
207
206
 
208
207
  // Prints a caret diagnostic to a string. This fills an empty GumboStringBuffer
209
208
  // with a freshly-allocated buffer containing the error message text. The
210
209
  // caller is responsible for deleting the buffer. (Note that the buffer is
211
210
  // allocated with the allocator specified in the GumboParser config and hence
212
211
  // should be freed by gumbo_parser_deallocate().)
213
- void gumbo_caret_diagnostic_to_string(
214
- struct GumboInternalParser* parser, const GumboError* error,
215
- const char* source_text, GumboStringBuffer* output);
212
+ void gumbo_caret_diagnostic_to_string(struct GumboInternalParser* parser,
213
+ const GumboError* error, const char* source_text,
214
+ GumboStringBuffer* output);
216
215
 
217
216
  // Like gumbo_caret_diagnostic_to_string, but prints the text to stdout instead
218
217
  // of writing to a string.
219
- void gumbo_print_caret_diagnostic(
220
- struct GumboInternalParser* parser, const GumboError* error,
221
- const char* source_text);
218
+ void gumbo_print_caret_diagnostic(struct GumboInternalParser* parser,
219
+ const GumboError* error, const char* source_text);
222
220
 
223
221
  #ifdef __cplusplus
224
222
  }
@@ -76,7 +76,6 @@ typedef struct {
76
76
  */
77
77
  extern const GumboSourcePosition kGumboEmptySourcePosition;
78
78
 
79
-
80
79
  /**
81
80
  * A struct representing a string or part of a string. Strings within the
82
81
  * parser are represented by a char* and a length; the char* points into
@@ -111,7 +110,6 @@ bool gumbo_string_equals(
111
110
  bool gumbo_string_equals_ignore_case(
112
111
  const GumboStringPiece* str1, const GumboStringPiece* str2);
113
112
 
114
-
115
113
  /**
116
114
  * A simple vector implementation. This stores a pointer to a data array and a
117
115
  * length. All elements are stored as void*; client code must cast to the
@@ -141,8 +139,7 @@ extern const GumboVector kGumboEmptyVector;
141
139
  * Returns the first index at which an element appears in this vector (testing
142
140
  * by pointer equality), or -1 if it never does.
143
141
  */
144
- int gumbo_vector_index_of(GumboVector* vector, void* element);
145
-
142
+ int gumbo_vector_index_of(GumboVector* vector, const void* element);
146
143
 
147
144
  /**
148
145
  * An enum for all the tags defined in the HTML5 standard. These correspond to
@@ -157,172 +154,10 @@ int gumbo_vector_index_of(GumboVector* vector, void* element);
157
154
  * strings.
158
155
  */
159
156
  typedef enum {
160
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/semantics.html#the-root-element
161
- GUMBO_TAG_HTML,
162
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/semantics.html#document-metadata
163
- GUMBO_TAG_HEAD,
164
- GUMBO_TAG_TITLE,
165
- GUMBO_TAG_BASE,
166
- GUMBO_TAG_LINK,
167
- GUMBO_TAG_META,
168
- GUMBO_TAG_STYLE,
169
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/scripting-1.html#scripting-1
170
- GUMBO_TAG_SCRIPT,
171
- GUMBO_TAG_NOSCRIPT,
172
- GUMBO_TAG_TEMPLATE,
173
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/sections.html#sections
174
- GUMBO_TAG_BODY,
175
- GUMBO_TAG_ARTICLE,
176
- GUMBO_TAG_SECTION,
177
- GUMBO_TAG_NAV,
178
- GUMBO_TAG_ASIDE,
179
- GUMBO_TAG_H1,
180
- GUMBO_TAG_H2,
181
- GUMBO_TAG_H3,
182
- GUMBO_TAG_H4,
183
- GUMBO_TAG_H5,
184
- GUMBO_TAG_H6,
185
- GUMBO_TAG_HGROUP,
186
- GUMBO_TAG_HEADER,
187
- GUMBO_TAG_FOOTER,
188
- GUMBO_TAG_ADDRESS,
189
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/grouping-content.html#grouping-content
190
- GUMBO_TAG_P,
191
- GUMBO_TAG_HR,
192
- GUMBO_TAG_PRE,
193
- GUMBO_TAG_BLOCKQUOTE,
194
- GUMBO_TAG_OL,
195
- GUMBO_TAG_UL,
196
- GUMBO_TAG_LI,
197
- GUMBO_TAG_DL,
198
- GUMBO_TAG_DT,
199
- GUMBO_TAG_DD,
200
- GUMBO_TAG_FIGURE,
201
- GUMBO_TAG_FIGCAPTION,
202
- GUMBO_TAG_MAIN,
203
- GUMBO_TAG_DIV,
204
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/text-level-semantics.html#text-level-semantics
205
- GUMBO_TAG_A,
206
- GUMBO_TAG_EM,
207
- GUMBO_TAG_STRONG,
208
- GUMBO_TAG_SMALL,
209
- GUMBO_TAG_S,
210
- GUMBO_TAG_CITE,
211
- GUMBO_TAG_Q,
212
- GUMBO_TAG_DFN,
213
- GUMBO_TAG_ABBR,
214
- GUMBO_TAG_DATA,
215
- GUMBO_TAG_TIME,
216
- GUMBO_TAG_CODE,
217
- GUMBO_TAG_VAR,
218
- GUMBO_TAG_SAMP,
219
- GUMBO_TAG_KBD,
220
- GUMBO_TAG_SUB,
221
- GUMBO_TAG_SUP,
222
- GUMBO_TAG_I,
223
- GUMBO_TAG_B,
224
- GUMBO_TAG_U,
225
- GUMBO_TAG_MARK,
226
- GUMBO_TAG_RUBY,
227
- GUMBO_TAG_RT,
228
- GUMBO_TAG_RP,
229
- GUMBO_TAG_BDI,
230
- GUMBO_TAG_BDO,
231
- GUMBO_TAG_SPAN,
232
- GUMBO_TAG_BR,
233
- GUMBO_TAG_WBR,
234
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/edits.html#edits
235
- GUMBO_TAG_INS,
236
- GUMBO_TAG_DEL,
237
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/embedded-content-1.html#embedded-content-1
238
- GUMBO_TAG_IMAGE,
239
- GUMBO_TAG_IMG,
240
- GUMBO_TAG_IFRAME,
241
- GUMBO_TAG_EMBED,
242
- GUMBO_TAG_OBJECT,
243
- GUMBO_TAG_PARAM,
244
- GUMBO_TAG_VIDEO,
245
- GUMBO_TAG_AUDIO,
246
- GUMBO_TAG_SOURCE,
247
- GUMBO_TAG_TRACK,
248
- GUMBO_TAG_CANVAS,
249
- GUMBO_TAG_MAP,
250
- GUMBO_TAG_AREA,
251
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-map-element.html#mathml
252
- GUMBO_TAG_MATH,
253
- GUMBO_TAG_MI,
254
- GUMBO_TAG_MO,
255
- GUMBO_TAG_MN,
256
- GUMBO_TAG_MS,
257
- GUMBO_TAG_MTEXT,
258
- GUMBO_TAG_MGLYPH,
259
- GUMBO_TAG_MALIGNMARK,
260
- GUMBO_TAG_ANNOTATION_XML,
261
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-map-element.html#svg-0
262
- GUMBO_TAG_SVG,
263
- GUMBO_TAG_FOREIGNOBJECT,
264
- GUMBO_TAG_DESC,
265
- // SVG title tags will have GUMBO_TAG_TITLE as with HTML.
266
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/tabular-data.html#tabular-data
267
- GUMBO_TAG_TABLE,
268
- GUMBO_TAG_CAPTION,
269
- GUMBO_TAG_COLGROUP,
270
- GUMBO_TAG_COL,
271
- GUMBO_TAG_TBODY,
272
- GUMBO_TAG_THEAD,
273
- GUMBO_TAG_TFOOT,
274
- GUMBO_TAG_TR,
275
- GUMBO_TAG_TD,
276
- GUMBO_TAG_TH,
277
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/forms.html#forms
278
- GUMBO_TAG_FORM,
279
- GUMBO_TAG_FIELDSET,
280
- GUMBO_TAG_LEGEND,
281
- GUMBO_TAG_LABEL,
282
- GUMBO_TAG_INPUT,
283
- GUMBO_TAG_BUTTON,
284
- GUMBO_TAG_SELECT,
285
- GUMBO_TAG_DATALIST,
286
- GUMBO_TAG_OPTGROUP,
287
- GUMBO_TAG_OPTION,
288
- GUMBO_TAG_TEXTAREA,
289
- GUMBO_TAG_KEYGEN,
290
- GUMBO_TAG_OUTPUT,
291
- GUMBO_TAG_PROGRESS,
292
- GUMBO_TAG_METER,
293
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/interactive-elements.html#interactive-elements
294
- GUMBO_TAG_DETAILS,
295
- GUMBO_TAG_SUMMARY,
296
- GUMBO_TAG_MENU,
297
- GUMBO_TAG_MENUITEM,
298
- // Non-conforming elements that nonetheless appear in the HTML5 spec.
299
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/obsolete.html#non-conforming-features
300
- GUMBO_TAG_APPLET,
301
- GUMBO_TAG_ACRONYM,
302
- GUMBO_TAG_BGSOUND,
303
- GUMBO_TAG_DIR,
304
- GUMBO_TAG_FRAME,
305
- GUMBO_TAG_FRAMESET,
306
- GUMBO_TAG_NOFRAMES,
307
- GUMBO_TAG_ISINDEX,
308
- GUMBO_TAG_LISTING,
309
- GUMBO_TAG_XMP,
310
- GUMBO_TAG_NEXTID,
311
- GUMBO_TAG_NOEMBED,
312
- GUMBO_TAG_PLAINTEXT,
313
- GUMBO_TAG_RB,
314
- GUMBO_TAG_STRIKE,
315
- GUMBO_TAG_BASEFONT,
316
- GUMBO_TAG_BIG,
317
- GUMBO_TAG_BLINK,
318
- GUMBO_TAG_CENTER,
319
- GUMBO_TAG_FONT,
320
- GUMBO_TAG_MARQUEE,
321
- GUMBO_TAG_MULTICOL,
322
- GUMBO_TAG_NOBR,
323
- GUMBO_TAG_SPACER,
324
- GUMBO_TAG_TT,
325
- // Used for all tags that don't have special handling in HTML.
157
+ // Load all the tags from an external source, generated from tag.in.
158
+ #include "tag_enum.h"
159
+ // Used for all tags that don't have special handling in HTML. Add new tags
160
+ // to the end of tag.in so as to preserve backwards-compatibility.
326
161
  GUMBO_TAG_UNKNOWN,
327
162
  // A marker value to indicate the end of the enum, for iterating over it.
328
163
  // Also used as the terminator for varargs functions that take tags.
@@ -364,9 +199,10 @@ const char* gumbo_normalize_svg_tagname(const GumboStringPiece* tagname);
364
199
 
365
200
  /**
366
201
  * Converts a tag name string (which may be in upper or mixed case) to a tag
367
- * enum.
202
+ * enum. The `tag` version expects `tagname` to be NULL-terminated
368
203
  */
369
204
  GumboTag gumbo_tag_enum(const char* tagname);
205
+ GumboTag gumbo_tagn_enum(const char* tagname, unsigned int length);
370
206
 
371
207
  /**
372
208
  * Attribute namespaces.
@@ -461,10 +297,16 @@ typedef enum {
461
297
  GUMBO_NODE_TEXT,
462
298
  /** CDATA node. v will be a GumboText. */
463
299
  GUMBO_NODE_CDATA,
464
- /** Comment node. v. will be a GumboText, excluding comment delimiters. */
300
+ /** Comment node. v will be a GumboText, excluding comment delimiters. */
465
301
  GUMBO_NODE_COMMENT,
466
302
  /** Text node, where all contents is whitespace. v will be a GumboText. */
467
- GUMBO_NODE_WHITESPACE
303
+ GUMBO_NODE_WHITESPACE,
304
+ /** Template node. This is separate from GUMBO_NODE_ELEMENT because many
305
+ * client libraries will want to ignore the contents of template nodes, as
306
+ * the spec suggests. Recursing on GUMBO_NODE_ELEMENT will do the right thing
307
+ * here, while clients that want to include template contents should also
308
+ * check for GUMBO_NODE_TEMPLATE. v will be a GumboElement. */
309
+ GUMBO_NODE_TEMPLATE
468
310
  } GumboNodeType;
469
311
 
470
312
  /**
@@ -473,7 +315,9 @@ typedef enum {
473
315
  */
474
316
  typedef struct GumboInternalNode GumboNode;
475
317
 
476
- /** http://www.whatwg.org/specs/web-apps/current-work/complete/dom.html#quirks-mode */
318
+ /**
319
+ * http://www.whatwg.org/specs/web-apps/current-work/complete/dom.html#quirks-mode
320
+ */
477
321
  typedef enum {
478
322
  GUMBO_DOCTYPE_NO_QUIRKS,
479
323
  GUMBO_DOCTYPE_QUIRKS,
@@ -571,7 +415,6 @@ typedef enum {
571
415
  GUMBO_INSERTION_FOSTER_PARENTED = 1 << 10,
572
416
  } GumboParseFlags;
573
417
 
574
-
575
418
  /**
576
419
  * Information specific to document nodes.
577
420
  */
@@ -690,9 +533,9 @@ struct GumboInternalNode {
690
533
 
691
534
  /** The actual node data. */
692
535
  union {
693
- GumboDocument document; // For GUMBO_NODE_DOCUMENT.
694
- GumboElement element; // For GUMBO_NODE_ELEMENT.
695
- GumboText text; // For everything else.
536
+ GumboDocument document; // For GUMBO_NODE_DOCUMENT.
537
+ GumboElement element; // For GUMBO_NODE_ELEMENT.
538
+ GumboText text; // For everything else.
696
539
  } v;
697
540
  };
698
541
 
@@ -750,6 +593,29 @@ typedef struct GumboInternalOptions {
750
593
  * Default: -1
751
594
  */
752
595
  int max_errors;
596
+
597
+ /**
598
+ * The fragment context for parsing:
599
+ * https://html.spec.whatwg.org/multipage/syntax.html#parsing-html-fragments
600
+ *
601
+ * If GUMBO_TAG_LAST is passed here, it is assumed to be "no fragment", i.e.
602
+ * the regular parsing algorithm. Otherwise, pass the tag enum for the
603
+ * intended parent of the parsed fragment. We use just the tag enum rather
604
+ * than a full node because that's enough to set all the parsing context we
605
+ * need, and it provides some additional flexibility for client code to act as
606
+ * if parsing a fragment even when a full HTML tree isn't available.
607
+ *
608
+ * Default: GUMBO_TAG_LAST
609
+ */
610
+ GumboTag fragment_context;
611
+
612
+ /**
613
+ * The namespace for the fragment context. This lets client code
614
+ * differentiate between, say, parsing a <title> tag in SVG vs. parsing it in
615
+ * HTML.
616
+ * Default: GUMBO_NAMESPACE_HTML
617
+ */
618
+ GumboNamespaceEnum fragment_namespace;
753
619
  } GumboOptions;
754
620
 
755
621
  /** Default options struct; use this with gumbo_parse_with_options. */
@@ -796,9 +662,7 @@ GumboOutput* gumbo_parse_with_options(
796
662
  const GumboOptions* options, const char* buffer, size_t buffer_length);
797
663
 
798
664
  /** Release the memory used for the parse tree & parse errors. */
799
- void gumbo_destroy_output(
800
- const GumboOptions* options, GumboOutput* output);
801
-
665
+ void gumbo_destroy_output(const GumboOptions* options, GumboOutput* output);
802
666
 
803
667
  #ifdef __cplusplus
804
668
  }
@@ -34,8 +34,10 @@
34
34
 
35
35
  #define AVOID_UNUSED_VARIABLE_WARNING(i) (void)(i)
36
36
 
37
- #define GUMBO_STRING(literal) { literal, sizeof(literal) - 1 }
38
- #define TERMINATOR { "", 0 }
37
+ #define GUMBO_STRING(literal) \
38
+ { literal, sizeof(literal) - 1 }
39
+ #define TERMINATOR \
40
+ { "", 0 }
39
41
 
40
42
  typedef char gumbo_tagset[GUMBO_TAG_LAST];
41
43
  #define TAG(tag) [GUMBO_TAG_##tag] = (1 << GUMBO_NAMESPACE_HTML)
@@ -43,46 +45,42 @@ typedef char gumbo_tagset[GUMBO_TAG_LAST];
43
45
  #define TAG_MATHML(tag) [GUMBO_TAG_##tag] = (1 << GUMBO_NAMESPACE_MATHML)
44
46
 
45
47
  #define TAGSET_INCLUDES(tagset, namespace, tag) \
46
- (tag < GUMBO_TAG_LAST && \
47
- tagset[(int)tag] == (1 << (int)namespace))
48
+ (tag < GUMBO_TAG_LAST && tagset[(int) tag] == (1 << (int) namespace))
48
49
 
50
+ // selected forward declarations as it is getting hard to find
51
+ // an appropriate order
52
+ static bool node_html_tag_is(const GumboNode*, GumboTag);
53
+ static GumboInsertionMode get_current_template_insertion_mode(
54
+ const GumboParser*);
55
+ static bool handle_in_template(GumboParser*, GumboToken*);
56
+ static void destroy_node(GumboParser*, GumboNode*);
49
57
 
50
- static void* malloc_wrapper(void* unused, size_t size) {
51
- return malloc(size);
52
- }
58
+ static void* malloc_wrapper(void* unused, size_t size) { return malloc(size); }
53
59
 
54
- static void free_wrapper(void* unused, void* ptr) {
55
- free(ptr);
56
- }
60
+ static void free_wrapper(void* unused, void* ptr) { free(ptr); }
57
61
 
58
- const GumboOptions kGumboDefaultOptions = {
59
- &malloc_wrapper,
60
- &free_wrapper,
61
- NULL,
62
- 8,
63
- false,
64
- -1,
65
- };
62
+ const GumboOptions kGumboDefaultOptions = {&malloc_wrapper, &free_wrapper, NULL,
63
+ 8, false, -1, GUMBO_TAG_LAST, GUMBO_NAMESPACE_HTML};
66
64
 
67
65
  static const GumboStringPiece kDoctypeHtml = GUMBO_STRING("html");
68
- static const GumboStringPiece kPublicIdHtml4_0 = GUMBO_STRING(
69
- "-//W3C//DTD HTML 4.0//EN");
70
- static const GumboStringPiece kPublicIdHtml4_01 = GUMBO_STRING(
71
- "-//W3C//DTD HTML 4.01//EN");
72
- static const GumboStringPiece kPublicIdXhtml1_0 = GUMBO_STRING(
73
- "-//W3C//DTD XHTML 1.0 Strict//EN");
74
- static const GumboStringPiece kPublicIdXhtml1_1 = GUMBO_STRING(
75
- "-//W3C//DTD XHTML 1.1//EN");
76
- static const GumboStringPiece kSystemIdRecHtml4_0 = GUMBO_STRING(
77
- "http://www.w3.org/TR/REC-html40/strict.dtd");
78
- static const GumboStringPiece kSystemIdHtml4 = GUMBO_STRING(
79
- "http://www.w3.org/TR/html4/strict.dtd");
80
- static const GumboStringPiece kSystemIdXhtmlStrict1_1 = GUMBO_STRING(
81
- "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd");
82
- static const GumboStringPiece kSystemIdXhtml1_1 = GUMBO_STRING(
83
- "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd");
84
- static const GumboStringPiece kSystemIdLegacyCompat = GUMBO_STRING(
85
- "about:legacy-compat");
66
+ static const GumboStringPiece kPublicIdHtml4_0 =
67
+ GUMBO_STRING("-//W3C//DTD HTML 4.0//EN");
68
+ static const GumboStringPiece kPublicIdHtml4_01 =
69
+ GUMBO_STRING("-//W3C//DTD HTML 4.01//EN");
70
+ static const GumboStringPiece kPublicIdXhtml1_0 =
71
+ GUMBO_STRING("-//W3C//DTD XHTML 1.0 Strict//EN");
72
+ static const GumboStringPiece kPublicIdXhtml1_1 =
73
+ GUMBO_STRING("-//W3C//DTD XHTML 1.1//EN");
74
+ static const GumboStringPiece kSystemIdRecHtml4_0 =
75
+ GUMBO_STRING("http://www.w3.org/TR/REC-html40/strict.dtd");
76
+ static const GumboStringPiece kSystemIdHtml4 =
77
+ GUMBO_STRING("http://www.w3.org/TR/html4/strict.dtd");
78
+ static const GumboStringPiece kSystemIdXhtmlStrict1_1 =
79
+ GUMBO_STRING("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd");
80
+ static const GumboStringPiece kSystemIdXhtml1_1 =
81
+ GUMBO_STRING("http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd");
82
+ static const GumboStringPiece kSystemIdLegacyCompat =
83
+ GUMBO_STRING("about:legacy-compat");
86
84
 
87
85
  // The doctype arrays have an explicit terminator because we want to pass them
88
86
  // to a helper function, and passing them as a pointer discards sizeof
@@ -90,96 +88,86 @@ static const GumboStringPiece kSystemIdLegacyCompat = GUMBO_STRING(
90
88
  // over them use sizeof directly instead of a terminator.
91
89
 
92
90
  static const GumboStringPiece kQuirksModePublicIdPrefixes[] = {
93
- GUMBO_STRING("+//Silmaril//dtd html Pro v0r11 19970101//"),
94
- GUMBO_STRING("-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//"),
95
- GUMBO_STRING("-//AS//DTD HTML 3.0 asWedit + extensions//"),
96
- GUMBO_STRING("-//IETF//DTD HTML 2.0 Level 1//"),
97
- GUMBO_STRING("-//IETF//DTD HTML 2.0 Level 2//"),
98
- GUMBO_STRING("-//IETF//DTD HTML 2.0 Strict Level 1//"),
99
- GUMBO_STRING("-//IETF//DTD HTML 2.0 Strict Level 2//"),
100
- GUMBO_STRING("-//IETF//DTD HTML 2.0 Strict//"),
101
- GUMBO_STRING("-//IETF//DTD HTML 2.0//"),
102
- GUMBO_STRING("-//IETF//DTD HTML 2.1E//"),
103
- GUMBO_STRING("-//IETF//DTD HTML 3.0//"),
104
- GUMBO_STRING("-//IETF//DTD HTML 3.2 Final//"),
105
- GUMBO_STRING("-//IETF//DTD HTML 3.2//"),
106
- GUMBO_STRING("-//IETF//DTD HTML 3//"),
107
- GUMBO_STRING("-//IETF//DTD HTML Level 0//"),
108
- GUMBO_STRING("-//IETF//DTD HTML Level 1//"),
109
- GUMBO_STRING("-//IETF//DTD HTML Level 2//"),
110
- GUMBO_STRING("-//IETF//DTD HTML Level 3//"),
111
- GUMBO_STRING("-//IETF//DTD HTML Strict Level 0//"),
112
- GUMBO_STRING("-//IETF//DTD HTML Strict Level 1//"),
113
- GUMBO_STRING("-//IETF//DTD HTML Strict Level 2//"),
114
- GUMBO_STRING("-//IETF//DTD HTML Strict Level 3//"),
115
- GUMBO_STRING("-//IETF//DTD HTML Strict//"),
116
- GUMBO_STRING("-//IETF//DTD HTML//"),
117
- GUMBO_STRING("-//Metrius//DTD Metrius Presentational//"),
118
- GUMBO_STRING("-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//"),
119
- GUMBO_STRING("-//Microsoft//DTD Internet Explorer 2.0 HTML//"),
120
- GUMBO_STRING("-//Microsoft//DTD Internet Explorer 2.0 Tables//"),
121
- GUMBO_STRING("-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//"),
122
- GUMBO_STRING("-//Microsoft//DTD Internet Explorer 3.0 HTML//"),
123
- GUMBO_STRING("-//Microsoft//DTD Internet Explorer 3.0 Tables//"),
124
- GUMBO_STRING("-//Netscape Comm. Corp.//DTD HTML//"),
125
- GUMBO_STRING("-//Netscape Comm. Corp.//DTD Strict HTML//"),
126
- GUMBO_STRING("-//O'Reilly and Associates//DTD HTML 2.0//"),
127
- GUMBO_STRING("-//O'Reilly and Associates//DTD HTML Extended 1.0//"),
128
- GUMBO_STRING("-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//"),
129
- GUMBO_STRING("-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::)"
130
- "extensions to HTML 4.0//"),
131
- GUMBO_STRING("-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::"
132
- "extensions to HTML 4.0//"),
133
- GUMBO_STRING("-//Spyglass//DTD HTML 2.0 Extended//"),
134
- GUMBO_STRING("-//SQ//DTD HTML 2.0 HoTMetaL + extensions//"),
135
- GUMBO_STRING("-//Sun Microsystems Corp.//DTD HotJava HTML//"),
136
- GUMBO_STRING("-//Sun Microsystems Corp.//DTD HotJava Strict HTML//"),
137
- GUMBO_STRING("-//W3C//DTD HTML 3 1995-03-24//"),
138
- GUMBO_STRING("-//W3C//DTD HTML 3.2 Draft//"),
139
- GUMBO_STRING("-//W3C//DTD HTML 3.2 Final//"),
140
- GUMBO_STRING("-//W3C//DTD HTML 3.2//"),
141
- GUMBO_STRING("-//W3C//DTD HTML 3.2S Draft//"),
142
- GUMBO_STRING("-//W3C//DTD HTML 4.0 Frameset//"),
143
- GUMBO_STRING("-//W3C//DTD HTML 4.0 Transitional//"),
144
- GUMBO_STRING("-//W3C//DTD HTML Experimental 19960712//"),
145
- GUMBO_STRING("-//W3C//DTD HTML Experimental 970421//"),
146
- GUMBO_STRING("-//W3C//DTD W3 HTML//"),
147
- GUMBO_STRING("-//W3O//DTD W3 HTML 3.0//"),
148
- GUMBO_STRING("-//WebTechs//DTD Mozilla HTML 2.0//"),
149
- GUMBO_STRING("-//WebTechs//DTD Mozilla HTML//"),
150
- TERMINATOR
151
- };
91
+ GUMBO_STRING("+//Silmaril//dtd html Pro v0r11 19970101//"),
92
+ GUMBO_STRING("-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//"),
93
+ GUMBO_STRING("-//AS//DTD HTML 3.0 asWedit + extensions//"),
94
+ GUMBO_STRING("-//IETF//DTD HTML 2.0 Level 1//"),
95
+ GUMBO_STRING("-//IETF//DTD HTML 2.0 Level 2//"),
96
+ GUMBO_STRING("-//IETF//DTD HTML 2.0 Strict Level 1//"),
97
+ GUMBO_STRING("-//IETF//DTD HTML 2.0 Strict Level 2//"),
98
+ GUMBO_STRING("-//IETF//DTD HTML 2.0 Strict//"),
99
+ GUMBO_STRING("-//IETF//DTD HTML 2.0//"),
100
+ GUMBO_STRING("-//IETF//DTD HTML 2.1E//"),
101
+ GUMBO_STRING("-//IETF//DTD HTML 3.0//"),
102
+ GUMBO_STRING("-//IETF//DTD HTML 3.2 Final//"),
103
+ GUMBO_STRING("-//IETF//DTD HTML 3.2//"),
104
+ GUMBO_STRING("-//IETF//DTD HTML 3//"),
105
+ GUMBO_STRING("-//IETF//DTD HTML Level 0//"),
106
+ GUMBO_STRING("-//IETF//DTD HTML Level 1//"),
107
+ GUMBO_STRING("-//IETF//DTD HTML Level 2//"),
108
+ GUMBO_STRING("-//IETF//DTD HTML Level 3//"),
109
+ GUMBO_STRING("-//IETF//DTD HTML Strict Level 0//"),
110
+ GUMBO_STRING("-//IETF//DTD HTML Strict Level 1//"),
111
+ GUMBO_STRING("-//IETF//DTD HTML Strict Level 2//"),
112
+ GUMBO_STRING("-//IETF//DTD HTML Strict Level 3//"),
113
+ GUMBO_STRING("-//IETF//DTD HTML Strict//"),
114
+ GUMBO_STRING("-//IETF//DTD HTML//"),
115
+ GUMBO_STRING("-//Metrius//DTD Metrius Presentational//"),
116
+ GUMBO_STRING("-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//"),
117
+ GUMBO_STRING("-//Microsoft//DTD Internet Explorer 2.0 HTML//"),
118
+ GUMBO_STRING("-//Microsoft//DTD Internet Explorer 2.0 Tables//"),
119
+ GUMBO_STRING("-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//"),
120
+ GUMBO_STRING("-//Microsoft//DTD Internet Explorer 3.0 HTML//"),
121
+ GUMBO_STRING("-//Microsoft//DTD Internet Explorer 3.0 Tables//"),
122
+ GUMBO_STRING("-//Netscape Comm. Corp.//DTD HTML//"),
123
+ GUMBO_STRING("-//Netscape Comm. Corp.//DTD Strict HTML//"),
124
+ GUMBO_STRING("-//O'Reilly and Associates//DTD HTML 2.0//"),
125
+ GUMBO_STRING("-//O'Reilly and Associates//DTD HTML Extended 1.0//"),
126
+ GUMBO_STRING("-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//"),
127
+ GUMBO_STRING(
128
+ "-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::)"
129
+ "extensions to HTML 4.0//"),
130
+ GUMBO_STRING(
131
+ "-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::"
132
+ "extensions to HTML 4.0//"),
133
+ GUMBO_STRING("-//Spyglass//DTD HTML 2.0 Extended//"),
134
+ GUMBO_STRING("-//SQ//DTD HTML 2.0 HoTMetaL + extensions//"),
135
+ GUMBO_STRING("-//Sun Microsystems Corp.//DTD HotJava HTML//"),
136
+ GUMBO_STRING("-//Sun Microsystems Corp.//DTD HotJava Strict HTML//"),
137
+ GUMBO_STRING("-//W3C//DTD HTML 3 1995-03-24//"),
138
+ GUMBO_STRING("-//W3C//DTD HTML 3.2 Draft//"),
139
+ GUMBO_STRING("-//W3C//DTD HTML 3.2 Final//"),
140
+ GUMBO_STRING("-//W3C//DTD HTML 3.2//"),
141
+ GUMBO_STRING("-//W3C//DTD HTML 3.2S Draft//"),
142
+ GUMBO_STRING("-//W3C//DTD HTML 4.0 Frameset//"),
143
+ GUMBO_STRING("-//W3C//DTD HTML 4.0 Transitional//"),
144
+ GUMBO_STRING("-//W3C//DTD HTML Experimental 19960712//"),
145
+ GUMBO_STRING("-//W3C//DTD HTML Experimental 970421//"),
146
+ GUMBO_STRING("-//W3C//DTD W3 HTML//"),
147
+ GUMBO_STRING("-//W3O//DTD W3 HTML 3.0//"),
148
+ GUMBO_STRING("-//WebTechs//DTD Mozilla HTML 2.0//"),
149
+ GUMBO_STRING("-//WebTechs//DTD Mozilla HTML//"), TERMINATOR};
152
150
 
153
151
  static const GumboStringPiece kQuirksModePublicIdExactMatches[] = {
154
- GUMBO_STRING("-//W3O//DTD W3 HTML Strict 3.0//EN//"),
155
- GUMBO_STRING("-/W3C/DTD HTML 4.0 Transitional/EN"),
156
- GUMBO_STRING("HTML"),
157
- TERMINATOR
158
- };
152
+ GUMBO_STRING("-//W3O//DTD W3 HTML Strict 3.0//EN//"),
153
+ GUMBO_STRING("-/W3C/DTD HTML 4.0 Transitional/EN"), GUMBO_STRING("HTML"),
154
+ TERMINATOR};
159
155
 
160
156
  static const GumboStringPiece kQuirksModeSystemIdExactMatches[] = {
161
- GUMBO_STRING("http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"),
162
- TERMINATOR
163
- };
157
+ GUMBO_STRING("http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"),
158
+ TERMINATOR};
164
159
 
165
160
  static const GumboStringPiece kLimitedQuirksPublicIdPrefixes[] = {
166
- GUMBO_STRING("-//W3C//DTD XHTML 1.0 Frameset//"),
167
- GUMBO_STRING("-//W3C//DTD XHTML 1.0 Transitional//"),
168
- TERMINATOR
169
- };
161
+ GUMBO_STRING("-//W3C//DTD XHTML 1.0 Frameset//"),
162
+ GUMBO_STRING("-//W3C//DTD XHTML 1.0 Transitional//"), TERMINATOR};
170
163
 
171
- static const GumboStringPiece kLimitedQuirksRequiresSystemIdPublicIdPrefixes[] = {
172
- GUMBO_STRING("-//W3C//DTD HTML 4.01 Frameset//"),
173
- GUMBO_STRING("-//W3C//DTD HTML 4.01 Transitional//"),
174
- TERMINATOR
175
- };
164
+ static const GumboStringPiece kLimitedQuirksRequiresSystemIdPublicIdPrefixes[] =
165
+ {GUMBO_STRING("-//W3C//DTD HTML 4.01 Frameset//"),
166
+ GUMBO_STRING("-//W3C//DTD HTML 4.01 Transitional//"), TERMINATOR};
176
167
 
177
168
  // Indexed by GumboNamespaceEnum; keep in sync with that.
178
- static const char* kLegalXmlns[] = {
179
- "http://www.w3.org/1999/xhtml",
180
- "http://www.w3.org/2000/svg",
181
- "http://www.w3.org/1998/Math/MathML"
182
- };
169
+ static const char* kLegalXmlns[] = {"http://www.w3.org/1999/xhtml",
170
+ "http://www.w3.org/2000/svg", "http://www.w3.org/1998/Math/MathML"};
183
171
 
184
172
  typedef struct _ReplacementEntry {
185
173
  const GumboStringPiece from;
@@ -187,112 +175,112 @@ typedef struct _ReplacementEntry {
187
175
  } ReplacementEntry;
188
176
 
189
177
  #define REPLACEMENT_ENTRY(from, to) \
190
- { GUMBO_STRING(from), GUMBO_STRING(to) }
178
+ { GUMBO_STRING(from), GUMBO_STRING(to) }
191
179
 
192
180
  // Static data for SVG attribute replacements.
193
- // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#adjust-svg-attributes
181
+ // https://html.spec.whatwg.org/multipage/syntax.html#creating-and-inserting-nodes
194
182
  static const ReplacementEntry kSvgAttributeReplacements[] = {
195
- REPLACEMENT_ENTRY("attributename", "attributeName"),
196
- REPLACEMENT_ENTRY("attributetype", "attributeType"),
197
- REPLACEMENT_ENTRY("basefrequency", "baseFrequency"),
198
- REPLACEMENT_ENTRY("baseprofile", "baseProfile"),
199
- REPLACEMENT_ENTRY("calcmode", "calcMode"),
200
- REPLACEMENT_ENTRY("clippathunits", "clipPathUnits"),
201
- REPLACEMENT_ENTRY("contentscripttype", "contentScriptType"),
202
- REPLACEMENT_ENTRY("contentstyletype", "contentStyleType"),
203
- REPLACEMENT_ENTRY("diffuseconstant", "diffuseConstant"),
204
- REPLACEMENT_ENTRY("edgemode", "edgeMode"),
205
- REPLACEMENT_ENTRY("externalresourcesrequired", "externalResourcesRequired"),
206
- REPLACEMENT_ENTRY("filterres", "filterRes"),
207
- REPLACEMENT_ENTRY("filterunits", "filterUnits"),
208
- REPLACEMENT_ENTRY("glyphref", "glyphRef"),
209
- REPLACEMENT_ENTRY("gradienttransform", "gradientTransform"),
210
- REPLACEMENT_ENTRY("gradientunits", "gradientUnits"),
211
- REPLACEMENT_ENTRY("kernelmatrix", "kernelMatrix"),
212
- REPLACEMENT_ENTRY("kernelunitlength", "kernelUnitLength"),
213
- REPLACEMENT_ENTRY("keypoints", "keyPoints"),
214
- REPLACEMENT_ENTRY("keysplines", "keySplines"),
215
- REPLACEMENT_ENTRY("keytimes", "keyTimes"),
216
- REPLACEMENT_ENTRY("lengthadjust", "lengthAdjust"),
217
- REPLACEMENT_ENTRY("limitingconeangle", "limitingConeAngle"),
218
- REPLACEMENT_ENTRY("markerheight", "markerHeight"),
219
- REPLACEMENT_ENTRY("markerunits", "markerUnits"),
220
- REPLACEMENT_ENTRY("markerwidth", "markerWidth"),
221
- REPLACEMENT_ENTRY("maskcontentunits", "maskContentUnits"),
222
- REPLACEMENT_ENTRY("maskunits", "maskUnits"),
223
- REPLACEMENT_ENTRY("numoctaves", "numOctaves"),
224
- REPLACEMENT_ENTRY("pathlength", "pathLength"),
225
- REPLACEMENT_ENTRY("patterncontentunits", "patternContentUnits"),
226
- REPLACEMENT_ENTRY("patterntransform", "patternTransform"),
227
- REPLACEMENT_ENTRY("patternunits", "patternUnits"),
228
- REPLACEMENT_ENTRY("pointsatx", "pointsAtX"),
229
- REPLACEMENT_ENTRY("pointsaty", "pointsAtY"),
230
- REPLACEMENT_ENTRY("pointsatz", "pointsAtZ"),
231
- REPLACEMENT_ENTRY("preservealpha", "preserveAlpha"),
232
- REPLACEMENT_ENTRY("preserveaspectratio", "preserveAspectRatio"),
233
- REPLACEMENT_ENTRY("primitiveunits", "primitiveUnits"),
234
- REPLACEMENT_ENTRY("refx", "refX"),
235
- REPLACEMENT_ENTRY("refy", "refY"),
236
- REPLACEMENT_ENTRY("repeatcount", "repeatCount"),
237
- REPLACEMENT_ENTRY("repeatdur", "repeatDur"),
238
- REPLACEMENT_ENTRY("requiredextensions", "requiredExtensions"),
239
- REPLACEMENT_ENTRY("requiredfeatures", "requiredFeatures"),
240
- REPLACEMENT_ENTRY("specularconstant", "specularConstant"),
241
- REPLACEMENT_ENTRY("specularexponent", "specularExponent"),
242
- REPLACEMENT_ENTRY("spreadmethod", "spreadMethod"),
243
- REPLACEMENT_ENTRY("startoffset", "startOffset"),
244
- REPLACEMENT_ENTRY("stddeviation", "stdDeviation"),
245
- REPLACEMENT_ENTRY("stitchtiles", "stitchTiles"),
246
- REPLACEMENT_ENTRY("surfacescale", "surfaceScale"),
247
- REPLACEMENT_ENTRY("systemlanguage", "systemLanguage"),
248
- REPLACEMENT_ENTRY("tablevalues", "tableValues"),
249
- REPLACEMENT_ENTRY("targetx", "targetX"),
250
- REPLACEMENT_ENTRY("targety", "targetY"),
251
- REPLACEMENT_ENTRY("textlength", "textLength"),
252
- REPLACEMENT_ENTRY("viewbox", "viewBox"),
253
- REPLACEMENT_ENTRY("viewtarget", "viewTarget"),
254
- REPLACEMENT_ENTRY("xchannelselector", "xChannelSelector"),
255
- REPLACEMENT_ENTRY("ychannelselector", "yChannelSelector"),
256
- REPLACEMENT_ENTRY("zoomandpan", "zoomAndPan"),
183
+ REPLACEMENT_ENTRY("attributename", "attributeName"),
184
+ REPLACEMENT_ENTRY("attributetype", "attributeType"),
185
+ REPLACEMENT_ENTRY("basefrequency", "baseFrequency"),
186
+ REPLACEMENT_ENTRY("baseprofile", "baseProfile"),
187
+ REPLACEMENT_ENTRY("calcmode", "calcMode"),
188
+ REPLACEMENT_ENTRY("clippathunits", "clipPathUnits"),
189
+ // REPLACEMENT_ENTRY("contentscripttype", "contentScriptType"),
190
+ // REPLACEMENT_ENTRY("contentstyletype", "contentStyleType"),
191
+ REPLACEMENT_ENTRY("diffuseconstant", "diffuseConstant"),
192
+ REPLACEMENT_ENTRY("edgemode", "edgeMode"),
193
+ // REPLACEMENT_ENTRY("externalresourcesrequired",
194
+ // "externalResourcesRequired"),
195
+ // REPLACEMENT_ENTRY("filterres", "filterRes"),
196
+ REPLACEMENT_ENTRY("filterunits", "filterUnits"),
197
+ REPLACEMENT_ENTRY("glyphref", "glyphRef"),
198
+ REPLACEMENT_ENTRY("gradienttransform", "gradientTransform"),
199
+ REPLACEMENT_ENTRY("gradientunits", "gradientUnits"),
200
+ REPLACEMENT_ENTRY("kernelmatrix", "kernelMatrix"),
201
+ REPLACEMENT_ENTRY("kernelunitlength", "kernelUnitLength"),
202
+ REPLACEMENT_ENTRY("keypoints", "keyPoints"),
203
+ REPLACEMENT_ENTRY("keysplines", "keySplines"),
204
+ REPLACEMENT_ENTRY("keytimes", "keyTimes"),
205
+ REPLACEMENT_ENTRY("lengthadjust", "lengthAdjust"),
206
+ REPLACEMENT_ENTRY("limitingconeangle", "limitingConeAngle"),
207
+ REPLACEMENT_ENTRY("markerheight", "markerHeight"),
208
+ REPLACEMENT_ENTRY("markerunits", "markerUnits"),
209
+ REPLACEMENT_ENTRY("markerwidth", "markerWidth"),
210
+ REPLACEMENT_ENTRY("maskcontentunits", "maskContentUnits"),
211
+ REPLACEMENT_ENTRY("maskunits", "maskUnits"),
212
+ REPLACEMENT_ENTRY("numoctaves", "numOctaves"),
213
+ REPLACEMENT_ENTRY("pathlength", "pathLength"),
214
+ REPLACEMENT_ENTRY("patterncontentunits", "patternContentUnits"),
215
+ REPLACEMENT_ENTRY("patterntransform", "patternTransform"),
216
+ REPLACEMENT_ENTRY("patternunits", "patternUnits"),
217
+ REPLACEMENT_ENTRY("pointsatx", "pointsAtX"),
218
+ REPLACEMENT_ENTRY("pointsaty", "pointsAtY"),
219
+ REPLACEMENT_ENTRY("pointsatz", "pointsAtZ"),
220
+ REPLACEMENT_ENTRY("preservealpha", "preserveAlpha"),
221
+ REPLACEMENT_ENTRY("preserveaspectratio", "preserveAspectRatio"),
222
+ REPLACEMENT_ENTRY("primitiveunits", "primitiveUnits"),
223
+ REPLACEMENT_ENTRY("refx", "refX"), REPLACEMENT_ENTRY("refy", "refY"),
224
+ REPLACEMENT_ENTRY("repeatcount", "repeatCount"),
225
+ REPLACEMENT_ENTRY("repeatdur", "repeatDur"),
226
+ REPLACEMENT_ENTRY("requiredextensions", "requiredExtensions"),
227
+ REPLACEMENT_ENTRY("requiredfeatures", "requiredFeatures"),
228
+ REPLACEMENT_ENTRY("specularconstant", "specularConstant"),
229
+ REPLACEMENT_ENTRY("specularexponent", "specularExponent"),
230
+ REPLACEMENT_ENTRY("spreadmethod", "spreadMethod"),
231
+ REPLACEMENT_ENTRY("startoffset", "startOffset"),
232
+ REPLACEMENT_ENTRY("stddeviation", "stdDeviation"),
233
+ REPLACEMENT_ENTRY("stitchtiles", "stitchTiles"),
234
+ REPLACEMENT_ENTRY("surfacescale", "surfaceScale"),
235
+ REPLACEMENT_ENTRY("systemlanguage", "systemLanguage"),
236
+ REPLACEMENT_ENTRY("tablevalues", "tableValues"),
237
+ REPLACEMENT_ENTRY("targetx", "targetX"),
238
+ REPLACEMENT_ENTRY("targety", "targetY"),
239
+ REPLACEMENT_ENTRY("textlength", "textLength"),
240
+ REPLACEMENT_ENTRY("viewbox", "viewBox"),
241
+ REPLACEMENT_ENTRY("viewtarget", "viewTarget"),
242
+ REPLACEMENT_ENTRY("xchannelselector", "xChannelSelector"),
243
+ REPLACEMENT_ENTRY("ychannelselector", "yChannelSelector"),
244
+ REPLACEMENT_ENTRY("zoomandpan", "zoomAndPan"),
257
245
  };
258
246
 
259
247
  static const ReplacementEntry kSvgTagReplacements[] = {
260
- REPLACEMENT_ENTRY("altglyph", "altGlyph"),
261
- REPLACEMENT_ENTRY("altglyphdef", "altGlyphDef"),
262
- REPLACEMENT_ENTRY("altglyphitem", "altGlyphItem"),
263
- REPLACEMENT_ENTRY("animatecolor", "animateColor"),
264
- REPLACEMENT_ENTRY("animatemotion", "animateMotion"),
265
- REPLACEMENT_ENTRY("animatetransform", "animateTransform"),
266
- REPLACEMENT_ENTRY("clippath", "clipPath"),
267
- REPLACEMENT_ENTRY("feblend", "feBlend"),
268
- REPLACEMENT_ENTRY("fecolormatrix", "feColorMatrix"),
269
- REPLACEMENT_ENTRY("fecomponenttransfer", "feComponentTransfer"),
270
- REPLACEMENT_ENTRY("fecomposite", "feComposite"),
271
- REPLACEMENT_ENTRY("feconvolvematrix", "feConvolveMatrix"),
272
- REPLACEMENT_ENTRY("fediffuselighting", "feDiffuseLighting"),
273
- REPLACEMENT_ENTRY("fedisplacementmap", "feDisplacementMap"),
274
- REPLACEMENT_ENTRY("fedistantlight", "feDistantLight"),
275
- REPLACEMENT_ENTRY("feflood", "feFlood"),
276
- REPLACEMENT_ENTRY("fefunca", "feFuncA"),
277
- REPLACEMENT_ENTRY("fefuncb", "feFuncB"),
278
- REPLACEMENT_ENTRY("fefuncg", "feFuncG"),
279
- REPLACEMENT_ENTRY("fefuncr", "feFuncR"),
280
- REPLACEMENT_ENTRY("fegaussianblur", "feGaussianBlur"),
281
- REPLACEMENT_ENTRY("feimage", "feImage"),
282
- REPLACEMENT_ENTRY("femerge", "feMerge"),
283
- REPLACEMENT_ENTRY("femergenode", "feMergeNode"),
284
- REPLACEMENT_ENTRY("femorphology", "feMorphology"),
285
- REPLACEMENT_ENTRY("feoffset", "feOffset"),
286
- REPLACEMENT_ENTRY("fepointlight", "fePointLight"),
287
- REPLACEMENT_ENTRY("fespecularlighting", "feSpecularLighting"),
288
- REPLACEMENT_ENTRY("fespotlight", "feSpotLight"),
289
- REPLACEMENT_ENTRY("fetile", "feTile"),
290
- REPLACEMENT_ENTRY("feturbulence", "feTurbulence"),
291
- REPLACEMENT_ENTRY("foreignobject", "foreignObject"),
292
- REPLACEMENT_ENTRY("glyphref", "glyphRef"),
293
- REPLACEMENT_ENTRY("lineargradient", "linearGradient"),
294
- REPLACEMENT_ENTRY("radialgradient", "radialGradient"),
295
- REPLACEMENT_ENTRY("textpath", "textPath"),
248
+ REPLACEMENT_ENTRY("altglyph", "altGlyph"),
249
+ REPLACEMENT_ENTRY("altglyphdef", "altGlyphDef"),
250
+ REPLACEMENT_ENTRY("altglyphitem", "altGlyphItem"),
251
+ REPLACEMENT_ENTRY("animatecolor", "animateColor"),
252
+ REPLACEMENT_ENTRY("animatemotion", "animateMotion"),
253
+ REPLACEMENT_ENTRY("animatetransform", "animateTransform"),
254
+ REPLACEMENT_ENTRY("clippath", "clipPath"),
255
+ REPLACEMENT_ENTRY("feblend", "feBlend"),
256
+ REPLACEMENT_ENTRY("fecolormatrix", "feColorMatrix"),
257
+ REPLACEMENT_ENTRY("fecomponenttransfer", "feComponentTransfer"),
258
+ REPLACEMENT_ENTRY("fecomposite", "feComposite"),
259
+ REPLACEMENT_ENTRY("feconvolvematrix", "feConvolveMatrix"),
260
+ REPLACEMENT_ENTRY("fediffuselighting", "feDiffuseLighting"),
261
+ REPLACEMENT_ENTRY("fedisplacementmap", "feDisplacementMap"),
262
+ REPLACEMENT_ENTRY("fedistantlight", "feDistantLight"),
263
+ REPLACEMENT_ENTRY("feflood", "feFlood"),
264
+ REPLACEMENT_ENTRY("fefunca", "feFuncA"),
265
+ REPLACEMENT_ENTRY("fefuncb", "feFuncB"),
266
+ REPLACEMENT_ENTRY("fefuncg", "feFuncG"),
267
+ REPLACEMENT_ENTRY("fefuncr", "feFuncR"),
268
+ REPLACEMENT_ENTRY("fegaussianblur", "feGaussianBlur"),
269
+ REPLACEMENT_ENTRY("feimage", "feImage"),
270
+ REPLACEMENT_ENTRY("femerge", "feMerge"),
271
+ REPLACEMENT_ENTRY("femergenode", "feMergeNode"),
272
+ REPLACEMENT_ENTRY("femorphology", "feMorphology"),
273
+ REPLACEMENT_ENTRY("feoffset", "feOffset"),
274
+ REPLACEMENT_ENTRY("fepointlight", "fePointLight"),
275
+ REPLACEMENT_ENTRY("fespecularlighting", "feSpecularLighting"),
276
+ REPLACEMENT_ENTRY("fespotlight", "feSpotLight"),
277
+ REPLACEMENT_ENTRY("fetile", "feTile"),
278
+ REPLACEMENT_ENTRY("feturbulence", "feTurbulence"),
279
+ REPLACEMENT_ENTRY("foreignobject", "foreignObject"),
280
+ REPLACEMENT_ENTRY("glyphref", "glyphRef"),
281
+ REPLACEMENT_ENTRY("lineargradient", "linearGradient"),
282
+ REPLACEMENT_ENTRY("radialgradient", "radialGradient"),
283
+ REPLACEMENT_ENTRY("textpath", "textPath"),
296
284
  };
297
285
 
298
286
  typedef struct _NamespacedAttributeReplacement {
@@ -302,18 +290,18 @@ typedef struct _NamespacedAttributeReplacement {
302
290
  } NamespacedAttributeReplacement;
303
291
 
304
292
  static const NamespacedAttributeReplacement kForeignAttributeReplacements[] = {
305
- { "xlink:actuate", "actuate", GUMBO_ATTR_NAMESPACE_XLINK },
306
- { "xlink:actuate", "actuate", GUMBO_ATTR_NAMESPACE_XLINK },
307
- { "xlink:href", "href", GUMBO_ATTR_NAMESPACE_XLINK },
308
- { "xlink:role", "role", GUMBO_ATTR_NAMESPACE_XLINK },
309
- { "xlink:show", "show", GUMBO_ATTR_NAMESPACE_XLINK },
310
- { "xlink:title", "title", GUMBO_ATTR_NAMESPACE_XLINK },
311
- { "xlink:type", "type", GUMBO_ATTR_NAMESPACE_XLINK },
312
- { "xml:base", "base", GUMBO_ATTR_NAMESPACE_XML },
313
- { "xml:lang", "lang", GUMBO_ATTR_NAMESPACE_XML },
314
- { "xml:space", "space", GUMBO_ATTR_NAMESPACE_XML },
315
- { "xmlns", "xmlns", GUMBO_ATTR_NAMESPACE_XMLNS },
316
- { "xmlns:xlink", "xlink", GUMBO_ATTR_NAMESPACE_XMLNS },
293
+ {"xlink:actuate", "actuate", GUMBO_ATTR_NAMESPACE_XLINK},
294
+ {"xlink:actuate", "actuate", GUMBO_ATTR_NAMESPACE_XLINK},
295
+ {"xlink:href", "href", GUMBO_ATTR_NAMESPACE_XLINK},
296
+ {"xlink:role", "role", GUMBO_ATTR_NAMESPACE_XLINK},
297
+ {"xlink:show", "show", GUMBO_ATTR_NAMESPACE_XLINK},
298
+ {"xlink:title", "title", GUMBO_ATTR_NAMESPACE_XLINK},
299
+ {"xlink:type", "type", GUMBO_ATTR_NAMESPACE_XLINK},
300
+ {"xml:base", "base", GUMBO_ATTR_NAMESPACE_XML},
301
+ {"xml:lang", "lang", GUMBO_ATTR_NAMESPACE_XML},
302
+ {"xml:space", "space", GUMBO_ATTR_NAMESPACE_XML},
303
+ {"xmlns", "xmlns", GUMBO_ATTR_NAMESPACE_XMLNS},
304
+ {"xmlns:xlink", "xlink", GUMBO_ATTR_NAMESPACE_XMLNS},
317
305
  };
318
306
 
319
307
  // The "scope marker" for the list of active formatting elements. We use a
@@ -371,6 +359,9 @@ typedef struct GumboInternalParserState {
371
359
  GumboNode* _head_element;
372
360
  GumboNode* _form_element;
373
361
 
362
+ // The element used as fragment context when parsing in fragment mode
363
+ GumboNode* _fragment_ctx;
364
+
374
365
  // The flag for when the spec says "Reprocess the current token in..."
375
366
  bool _reprocess_current_token;
376
367
 
@@ -427,14 +418,14 @@ static bool attribute_matches(
427
418
  static bool attribute_matches_case_sensitive(
428
419
  const GumboVector* attributes, const char* name, const char* value) {
429
420
  const GumboAttribute* attr = gumbo_get_attribute(attributes, name);
430
- return attr ? strcmp(value, attr->value) == 0 : false;
421
+ return attr ? strcmp(value, attr->value) == 0 : false;
431
422
  }
432
423
 
433
424
  // Checks if the specified attribute vectors are identical.
434
425
  static bool all_attributes_match(
435
426
  const GumboVector* attr1, const GumboVector* attr2) {
436
- int num_unmatched_attr2_elements = attr2->length;
437
- for (int i = 0; i < attr1->length; ++i) {
427
+ unsigned int num_unmatched_attr2_elements = attr2->length;
428
+ for (unsigned int i = 0; i < attr1->length; ++i) {
438
429
  const GumboAttribute* attr = attr1->data[i];
439
430
  if (attribute_matches_case_sensitive(attr2, attr->name, attr->value)) {
440
431
  --num_unmatched_attr2_elements;
@@ -462,8 +453,7 @@ static GumboNode* create_node(GumboParser* parser, GumboNodeType type) {
462
453
  static GumboNode* new_document_node(GumboParser* parser) {
463
454
  GumboNode* document_node = create_node(parser, GUMBO_NODE_DOCUMENT);
464
455
  document_node->parse_flags = GUMBO_INSERTION_BY_PARSER;
465
- gumbo_vector_init(
466
- parser, 1, &document_node->v.document.children);
456
+ gumbo_vector_init(parser, 1, &document_node->v.document.children);
467
457
 
468
458
  // Must be initialized explicitly, as there's no guarantee that we'll see a
469
459
  // doc type token.
@@ -498,6 +488,7 @@ static void parser_state_init(GumboParser* parser) {
498
488
  gumbo_vector_init(parser, 5, &parser_state->_template_insertion_modes);
499
489
  parser_state->_head_element = NULL;
500
490
  parser_state->_form_element = NULL;
491
+ parser_state->_fragment_ctx = NULL;
501
492
  parser_state->_current_token = NULL;
502
493
  parser_state->_closed_body_tag = false;
503
494
  parser_state->_closed_html_tag = false;
@@ -506,6 +497,9 @@ static void parser_state_init(GumboParser* parser) {
506
497
 
507
498
  static void parser_state_destroy(GumboParser* parser) {
508
499
  GumboParserState* state = parser->_parser_state;
500
+ if (state->_fragment_ctx) {
501
+ destroy_node(parser, state->_fragment_ctx);
502
+ }
509
503
  gumbo_vector_destroy(parser, &state->_active_formatting_elements);
510
504
  gumbo_vector_destroy(parser, &state->_open_elements);
511
505
  gumbo_vector_destroy(parser, &state->_template_insertion_modes);
@@ -517,6 +511,10 @@ static GumboNode* get_document_node(GumboParser* parser) {
517
511
  return parser->_output->document;
518
512
  }
519
513
 
514
+ static bool is_fragment_parser(const GumboParser* parser) {
515
+ return !!parser->_parser_state->_fragment_ctx;
516
+ }
517
+
520
518
  // Returns the node at the bottom of the stack of open elements, or NULL if no
521
519
  // elements have been added yet.
522
520
  static GumboNode* get_current_node(GumboParser* parser) {
@@ -530,6 +528,14 @@ static GumboNode* get_current_node(GumboParser* parser) {
530
528
  return open_elements->data[open_elements->length - 1];
531
529
  }
532
530
 
531
+ static GumboNode* get_adjusted_current_node(GumboParser* parser) {
532
+ GumboParserState* state = parser->_parser_state;
533
+ if (state->_open_elements.length == 1 && state->_fragment_ctx) {
534
+ return state->_fragment_ctx;
535
+ }
536
+ return get_current_node(parser);
537
+ }
538
+
533
539
  // Returns true if the given needle is in the given array of literal
534
540
  // GumboStringPieces. If exact_match is true, this requires that they match
535
541
  // exactly; otherwise, this performs a prefix match to check if any of the
@@ -537,7 +543,7 @@ static GumboNode* get_current_node(GumboParser* parser) {
537
543
  // case-insensitive match.
538
544
  static bool is_in_static_list(
539
545
  const char* needle, const GumboStringPiece* haystack, bool exact_match) {
540
- for (int i = 0; haystack[i].length > 0; ++i) {
546
+ for (unsigned int i = 0; haystack[i].length > 0; ++i) {
541
547
  if ((exact_match && !strcmp(needle, haystack[i].data)) ||
542
548
  (!exact_match && !strcasecmp(needle, haystack[i].data))) {
543
549
  return true;
@@ -556,39 +562,63 @@ static void set_insertion_mode(GumboParser* parser, GumboInsertionMode mode) {
556
562
  // indicate that there is no appropriate insertion mode, and the loop should
557
563
  // continue.
558
564
  static GumboInsertionMode get_appropriate_insertion_mode(
559
- const GumboNode* node, bool is_last) {
560
- assert(node->type == GUMBO_NODE_ELEMENT);
565
+ const GumboParser* parser, int index) {
566
+ const GumboVector* open_elements = &parser->_parser_state->_open_elements;
567
+ const GumboNode* node = open_elements->data[index];
568
+ const bool is_last = index == 0;
561
569
 
562
- if (node->v.element.tag_namespace == GUMBO_NAMESPACE_HTML) {
563
- switch (node->v.element.tag) {
564
- case GUMBO_TAG_SELECT:
570
+ if (is_last && is_fragment_parser(parser)) {
571
+ node = parser->_parser_state->_fragment_ctx;
572
+ }
573
+
574
+ assert(node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE);
575
+ switch (node->v.element.tag) {
576
+ case GUMBO_TAG_SELECT: {
577
+ if (is_last) {
578
+ return GUMBO_INSERTION_MODE_IN_SELECT;
579
+ }
580
+ for (int i = index; i > 0; --i) {
581
+ const GumboNode* ancestor = open_elements->data[i];
582
+ if (node_html_tag_is(ancestor, GUMBO_TAG_TEMPLATE)) {
565
583
  return GUMBO_INSERTION_MODE_IN_SELECT;
566
- case GUMBO_TAG_TD:
567
- case GUMBO_TAG_TH:
568
- return is_last ?
569
- GUMBO_INSERTION_MODE_IN_BODY : GUMBO_INSERTION_MODE_IN_CELL;
570
- case GUMBO_TAG_TR:
571
- return GUMBO_INSERTION_MODE_IN_ROW;
572
- case GUMBO_TAG_TBODY:
573
- case GUMBO_TAG_THEAD:
574
- case GUMBO_TAG_TFOOT:
575
- return GUMBO_INSERTION_MODE_IN_TABLE_BODY;
576
- case GUMBO_TAG_CAPTION:
577
- return GUMBO_INSERTION_MODE_IN_CAPTION;
578
- case GUMBO_TAG_COLGROUP:
579
- return GUMBO_INSERTION_MODE_IN_COLUMN_GROUP;
580
- case GUMBO_TAG_TABLE:
581
- return GUMBO_INSERTION_MODE_IN_TABLE;
582
- case GUMBO_TAG_HEAD:
583
- case GUMBO_TAG_BODY:
584
- return GUMBO_INSERTION_MODE_IN_BODY;
585
- case GUMBO_TAG_FRAMESET:
586
- return GUMBO_INSERTION_MODE_IN_FRAMESET;
587
- case GUMBO_TAG_HTML:
588
- return GUMBO_INSERTION_MODE_BEFORE_HEAD;
589
- default:
590
- break;
584
+ }
585
+ if (node_html_tag_is(ancestor, GUMBO_TAG_TABLE)) {
586
+ return GUMBO_INSERTION_MODE_IN_SELECT_IN_TABLE;
587
+ }
588
+ }
589
+ return GUMBO_INSERTION_MODE_IN_SELECT;
591
590
  }
591
+ case GUMBO_TAG_TD:
592
+ case GUMBO_TAG_TH:
593
+ if (!is_last) return GUMBO_INSERTION_MODE_IN_CELL;
594
+ break;
595
+ case GUMBO_TAG_TR:
596
+ return GUMBO_INSERTION_MODE_IN_ROW;
597
+ case GUMBO_TAG_TBODY:
598
+ case GUMBO_TAG_THEAD:
599
+ case GUMBO_TAG_TFOOT:
600
+ return GUMBO_INSERTION_MODE_IN_TABLE_BODY;
601
+ case GUMBO_TAG_CAPTION:
602
+ return GUMBO_INSERTION_MODE_IN_CAPTION;
603
+ case GUMBO_TAG_COLGROUP:
604
+ return GUMBO_INSERTION_MODE_IN_COLUMN_GROUP;
605
+ case GUMBO_TAG_TABLE:
606
+ return GUMBO_INSERTION_MODE_IN_TABLE;
607
+ case GUMBO_TAG_TEMPLATE:
608
+ return get_current_template_insertion_mode(parser);
609
+ case GUMBO_TAG_HEAD:
610
+ if (!is_last) return GUMBO_INSERTION_MODE_IN_HEAD;
611
+ break;
612
+ case GUMBO_TAG_BODY:
613
+ return GUMBO_INSERTION_MODE_IN_BODY;
614
+ case GUMBO_TAG_FRAMESET:
615
+ return GUMBO_INSERTION_MODE_IN_FRAMESET;
616
+ case GUMBO_TAG_HTML:
617
+ return parser->_parser_state->_head_element
618
+ ? GUMBO_INSERTION_MODE_AFTER_HEAD
619
+ : GUMBO_INSERTION_MODE_BEFORE_HEAD;
620
+ default:
621
+ break;
592
622
  }
593
623
  return is_last ? GUMBO_INSERTION_MODE_IN_BODY : GUMBO_INSERTION_MODE_INITIAL;
594
624
  }
@@ -596,9 +626,8 @@ static GumboInsertionMode get_appropriate_insertion_mode(
596
626
  // This performs the actual "reset the insertion mode" loop.
597
627
  static void reset_insertion_mode_appropriately(GumboParser* parser) {
598
628
  const GumboVector* open_elements = &parser->_parser_state->_open_elements;
599
- for (int i = open_elements->length; --i >= 0; ) {
600
- GumboInsertionMode mode =
601
- get_appropriate_insertion_mode(open_elements->data[i], i == 0);
629
+ for (int i = open_elements->length; --i >= 0;) {
630
+ GumboInsertionMode mode = get_appropriate_insertion_mode(parser, i);
602
631
  if (mode != GUMBO_INSERTION_MODE_INITIAL) {
603
632
  set_insertion_mode(parser, mode);
604
633
  return;
@@ -609,7 +638,8 @@ static void reset_insertion_mode_appropriately(GumboParser* parser) {
609
638
  assert(0);
610
639
  }
611
640
 
612
- static GumboError* parser_add_parse_error(GumboParser* parser, const GumboToken* token) {
641
+ static GumboError* parser_add_parse_error(
642
+ GumboParser* parser, const GumboToken* token) {
613
643
  gumbo_debug("Adding parse error.\n");
614
644
  GumboError* error = gumbo_add_error(parser);
615
645
  if (!error) {
@@ -628,13 +658,14 @@ static GumboError* parser_add_parse_error(GumboParser* parser, const GumboToken*
628
658
  }
629
659
  GumboParserState* state = parser->_parser_state;
630
660
  extra_data->parser_state = state->_insertion_mode;
631
- gumbo_vector_init(parser, state->_open_elements.length,
632
- &extra_data->tag_stack);
633
- for (int i = 0; i < state->_open_elements.length; ++i) {
661
+ gumbo_vector_init(
662
+ parser, state->_open_elements.length, &extra_data->tag_stack);
663
+ for (unsigned int i = 0; i < state->_open_elements.length; ++i) {
634
664
  const GumboNode* node = state->_open_elements.data[i];
635
- assert(node->type == GUMBO_NODE_ELEMENT);
636
- gumbo_vector_add(parser, (void*) node->v.element.tag,
637
- &extra_data->tag_stack);
665
+ assert(
666
+ node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE);
667
+ gumbo_vector_add(
668
+ parser, (void*) node->v.element.tag, &extra_data->tag_stack);
638
669
  }
639
670
  return error;
640
671
  }
@@ -643,7 +674,8 @@ static GumboError* parser_add_parse_error(GumboParser* parser, const GumboToken*
643
674
  // by is_start) with one of the tag types in the varargs list. Terminate the
644
675
  // list with GUMBO_TAG_LAST; this functions as a sentinel since no portion of
645
676
  // the spec references tags that are not in the spec.
646
- static bool tag_in(const GumboToken* token, bool is_start, const gumbo_tagset tags) {
677
+ static bool tag_in(
678
+ const GumboToken* token, bool is_start, const gumbo_tagset tags) {
647
679
  GumboTag token_tag;
648
680
  if (is_start && token->type == GUMBO_TOKEN_START_TAG) {
649
681
  token_tag = token->v.start_tag.tag;
@@ -652,7 +684,7 @@ static bool tag_in(const GumboToken* token, bool is_start, const gumbo_tagset ta
652
684
  } else {
653
685
  return false;
654
686
  }
655
- return (token_tag < GUMBO_TAG_LAST && tags[(int)token_tag] != 0);
687
+ return (token_tag < GUMBO_TAG_LAST && tags[(int) token_tag] != 0);
656
688
  }
657
689
 
658
690
  // Like tag_in, but for the single-tag case.
@@ -669,41 +701,123 @@ static bool tag_is(const GumboToken* token, bool is_start, GumboTag tag) {
669
701
  // Like tag_in, but checks for the tag of a node, rather than a token.
670
702
  static bool node_tag_in_set(const GumboNode* node, const gumbo_tagset tags) {
671
703
  assert(node != NULL);
672
- if (node->type != GUMBO_NODE_ELEMENT) {
704
+ if (node->type != GUMBO_NODE_ELEMENT && node->type != GUMBO_NODE_TEMPLATE) {
673
705
  return false;
674
706
  }
675
- return TAGSET_INCLUDES(tags, node->v.element.tag_namespace, node->v.element.tag);
707
+ return TAGSET_INCLUDES(
708
+ tags, node->v.element.tag_namespace, node->v.element.tag);
676
709
  }
677
710
 
678
-
679
711
  // Like node_tag_in, but for the single-tag case.
680
- static bool node_qualified_tag_is(const GumboNode* node, GumboNamespaceEnum ns, GumboTag tag) {
681
- return node->type == GUMBO_NODE_ELEMENT &&
682
- node->v.element.tag == tag &&
683
- node->v.element.tag_namespace == ns;
712
+ static bool node_qualified_tag_is(
713
+ const GumboNode* node, GumboNamespaceEnum ns, GumboTag tag) {
714
+ assert(node);
715
+ return (node->type == GUMBO_NODE_ELEMENT ||
716
+ node->type == GUMBO_NODE_TEMPLATE) &&
717
+ node->v.element.tag == tag && node->v.element.tag_namespace == ns;
684
718
  }
685
719
 
686
720
  // Like node_tag_in, but for the single-tag case in the HTML namespace
687
- static bool node_html_tag_is(const GumboNode* node, GumboTag tag)
688
- {
721
+ static bool node_html_tag_is(const GumboNode* node, GumboTag tag) {
689
722
  return node_qualified_tag_is(node, GUMBO_NAMESPACE_HTML, tag);
690
723
  }
691
724
 
725
+ static void push_template_insertion_mode(
726
+ GumboParser* parser, GumboInsertionMode mode) {
727
+ gumbo_vector_add(
728
+ parser, (void*) mode, &parser->_parser_state->_template_insertion_modes);
729
+ }
730
+
731
+ static void pop_template_insertion_mode(GumboParser* parser) {
732
+ gumbo_vector_pop(parser, &parser->_parser_state->_template_insertion_modes);
733
+ }
734
+
735
+ // Returns the current template insertion mode. If the stack of template
736
+ // insertion modes is empty, this returns GUMBO_INSERTION_MODE_INITIAL.
737
+ static GumboInsertionMode get_current_template_insertion_mode(
738
+ const GumboParser* parser) {
739
+ GumboVector* template_insertion_modes =
740
+ &parser->_parser_state->_template_insertion_modes;
741
+ if (template_insertion_modes->length == 0) {
742
+ return GUMBO_INSERTION_MODE_INITIAL;
743
+ }
744
+ return (GumboInsertionMode)
745
+ template_insertion_modes->data[(template_insertion_modes->length - 1)];
746
+ }
692
747
 
693
748
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#mathml-text-integration-point
694
749
  static bool is_mathml_integration_point(const GumboNode* node) {
695
- return node_tag_in_set(node, (gumbo_tagset) { TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
696
- TAG_MATHML(MS), TAG_MATHML(MTEXT) });
750
+ return node_tag_in_set(
751
+ node, (gumbo_tagset){TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
752
+ TAG_MATHML(MS), TAG_MATHML(MTEXT)});
697
753
  }
698
754
 
699
755
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#html-integration-point
700
756
  static bool is_html_integration_point(const GumboNode* node) {
701
- return node_tag_in_set(node, (gumbo_tagset) { TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC), TAG_SVG(TITLE) }) ||
702
- (node_qualified_tag_is(node, GUMBO_NAMESPACE_MATHML, GUMBO_TAG_ANNOTATION_XML) && (
703
- attribute_matches(&node->v.element.attributes,
704
- "encoding", "text/html") ||
705
- attribute_matches(&node->v.element.attributes,
706
- "encoding", "application/xhtml+xml")));
757
+ return node_tag_in_set(node, (gumbo_tagset){TAG_SVG(FOREIGNOBJECT),
758
+ TAG_SVG(DESC), TAG_SVG(TITLE)}) ||
759
+ (node_qualified_tag_is(
760
+ node, GUMBO_NAMESPACE_MATHML, GUMBO_TAG_ANNOTATION_XML) &&
761
+ (attribute_matches(
762
+ &node->v.element.attributes, "encoding", "text/html") ||
763
+ attribute_matches(&node->v.element.attributes, "encoding",
764
+ "application/xhtml+xml")));
765
+ }
766
+
767
+ // This represents a place to insert a node, consisting of a target parent and a
768
+ // child index within that parent. If the node should be inserted at the end of
769
+ // the parent's child, index will be -1.
770
+ typedef struct {
771
+ GumboNode* target;
772
+ int index;
773
+ } InsertionLocation;
774
+
775
+ InsertionLocation get_appropriate_insertion_location(
776
+ GumboParser* parser, GumboNode* override_target) {
777
+ InsertionLocation retval = {override_target, -1};
778
+ if (retval.target == NULL) {
779
+ // No override target; default to the current node, but special-case the
780
+ // root node since get_current_node() assumes the stack of open elements is
781
+ // non-empty.
782
+ retval.target = parser->_output->root != NULL ? get_current_node(parser)
783
+ : get_document_node(parser);
784
+ }
785
+ if (!parser->_parser_state->_foster_parent_insertions ||
786
+ !node_tag_in_set(retval.target, (gumbo_tagset){TAG(TABLE), TAG(TBODY),
787
+ TAG(TFOOT), TAG(THEAD), TAG(TR)})) {
788
+ return retval;
789
+ }
790
+
791
+ // Foster-parenting case.
792
+ int last_template_index = -1;
793
+ int last_table_index = -1;
794
+ GumboVector* open_elements = &parser->_parser_state->_open_elements;
795
+ for (unsigned int i = 0; i < open_elements->length; ++i) {
796
+ if (node_html_tag_is(open_elements->data[i], GUMBO_TAG_TEMPLATE)) {
797
+ last_template_index = i;
798
+ }
799
+ if (node_html_tag_is(open_elements->data[i], GUMBO_TAG_TABLE)) {
800
+ last_table_index = i;
801
+ }
802
+ }
803
+ if (last_template_index != -1 &&
804
+ (last_table_index == -1 || last_template_index > last_table_index)) {
805
+ retval.target = open_elements->data[last_template_index];
806
+ return retval;
807
+ }
808
+ if (last_table_index == -1) {
809
+ retval.target = open_elements->data[0];
810
+ return retval;
811
+ }
812
+ GumboNode* last_table = open_elements->data[last_table_index];
813
+ if (last_table->parent != NULL) {
814
+ retval.target = last_table->parent;
815
+ retval.index = last_table->index_within_parent;
816
+ return retval;
817
+ }
818
+
819
+ retval.target = open_elements->data[last_table_index - 1];
820
+ return retval;
707
821
  }
708
822
 
709
823
  // Appends a node to the end of its parent, setting the "parent" and
@@ -713,7 +827,8 @@ static void append_node(
713
827
  assert(node->parent == NULL);
714
828
  assert(node->index_within_parent == -1);
715
829
  GumboVector* children;
716
- if (parent->type == GUMBO_NODE_ELEMENT) {
830
+ if (parent->type == GUMBO_NODE_ELEMENT ||
831
+ parent->type == GUMBO_NODE_TEMPLATE) {
717
832
  children = &parent->v.element.children;
718
833
  } else {
719
834
  assert(parent->type == GUMBO_NODE_DOCUMENT);
@@ -725,64 +840,41 @@ static void append_node(
725
840
  assert(node->index_within_parent < children->length);
726
841
  }
727
842
 
728
- // Inserts a node at the specified index within its parent, updating the
843
+ // Inserts a node at the specified InsertionLocation, updating the
729
844
  // "parent" and "index_within_parent" fields of it and all its siblings.
845
+ // If the index of the location is -1, this calls append_node.
730
846
  static void insert_node(
731
- GumboParser* parser, GumboNode* parent, int index, GumboNode* node) {
847
+ GumboParser* parser, GumboNode* node, InsertionLocation location) {
732
848
  assert(node->parent == NULL);
733
849
  assert(node->index_within_parent == -1);
734
- assert(parent->type == GUMBO_NODE_ELEMENT);
735
- GumboVector* children = &parent->v.element.children;
736
- assert(index >= 0);
737
- assert(index < children->length);
738
- node->parent = parent;
739
- node->index_within_parent = index;
740
- gumbo_vector_insert_at(parser, (void*) node, index, children);
741
- assert(node->index_within_parent < children->length);
742
- for (int i = index + 1; i < children->length; ++i) {
743
- GumboNode* sibling = children->data[i];
744
- sibling->index_within_parent = i;
745
- assert(sibling->index_within_parent < children->length);
746
- }
747
- }
850
+ GumboNode* parent = location.target;
851
+ int index = location.index;
852
+ if (index != -1) {
853
+ GumboVector* children = NULL;
854
+ if (parent->type == GUMBO_NODE_ELEMENT ||
855
+ parent->type == GUMBO_NODE_TEMPLATE) {
856
+ children = &parent->v.element.children;
857
+ } else if (parent->type == GUMBO_NODE_DOCUMENT) {
858
+ children = &parent->v.document.children;
859
+ assert(children->length == 0);
860
+ } else {
861
+ assert(0);
862
+ }
748
863
 
749
- // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#foster-parenting
750
- static void foster_parent_element(GumboParser* parser, GumboNode* node) {
751
- GumboVector* open_elements = &parser->_parser_state->_open_elements;
752
- assert(open_elements->length > 2);
753
-
754
- node->parse_flags |= GUMBO_INSERTION_FOSTER_PARENTED;
755
- GumboNode* foster_parent_element = open_elements->data[0];
756
- assert(foster_parent_element->type == GUMBO_NODE_ELEMENT);
757
- assert(node_html_tag_is(foster_parent_element, GUMBO_TAG_HTML));
758
- for (int i = open_elements->length; --i > 1; ) {
759
- GumboNode* table_element = open_elements->data[i];
760
- if (node_html_tag_is(table_element, GUMBO_TAG_TABLE)) {
761
- foster_parent_element = table_element->parent;
762
- if (!foster_parent_element ||
763
- foster_parent_element->type != GUMBO_NODE_ELEMENT) {
764
- // Table has no parent; spec says it's possible if a script manipulated
765
- // the DOM, although I don't think we have to worry about this case.
766
- gumbo_debug("Table has no parent.\n");
767
- foster_parent_element = open_elements->data[i - 1];
768
- break;
769
- }
770
- assert(foster_parent_element->type == GUMBO_NODE_ELEMENT);
771
- gumbo_debug("Found enclosing table (%x) at %d; parent=%s, index=%d.\n",
772
- table_element, i, gumbo_normalized_tagname(
773
- foster_parent_element->v.element.tag),
774
- table_element->index_within_parent);
775
- assert(foster_parent_element->v.element.children.data[
776
- table_element->index_within_parent] == table_element);
777
- insert_node(parser, foster_parent_element,
778
- table_element->index_within_parent, node);
779
- return;
864
+ assert(index >= 0);
865
+ assert((unsigned int) index < children->length);
866
+ node->parent = parent;
867
+ node->index_within_parent = index;
868
+ gumbo_vector_insert_at(parser, (void*) node, index, children);
869
+ assert(node->index_within_parent < children->length);
870
+ for (unsigned int i = index + 1; i < children->length; ++i) {
871
+ GumboNode* sibling = children->data[i];
872
+ sibling->index_within_parent = i;
873
+ assert(sibling->index_within_parent < children->length);
780
874
  }
875
+ } else {
876
+ append_node(parser, parent, node);
781
877
  }
782
- if (node->type == GUMBO_NODE_ELEMENT) {
783
- gumbo_vector_add(parser, (void*) node, open_elements);
784
- }
785
- append_node(parser, foster_parent_element, node);
786
878
  }
787
879
 
788
880
  static void maybe_flush_text_node_buffer(GumboParser* parser) {
@@ -797,27 +889,27 @@ static void maybe_flush_text_node_buffer(GumboParser* parser) {
797
889
  buffer_state->_type == GUMBO_NODE_CDATA);
798
890
  GumboNode* text_node = create_node(parser, buffer_state->_type);
799
891
  GumboText* text_node_data = &text_node->v.text;
800
- text_node_data->text = gumbo_string_buffer_to_string(
801
- parser, &buffer_state->_buffer);
892
+ text_node_data->text =
893
+ gumbo_string_buffer_to_string(parser, &buffer_state->_buffer);
802
894
  text_node_data->original_text.data = buffer_state->_start_original_text;
803
895
  text_node_data->original_text.length =
804
896
  state->_current_token->original_text.data -
805
897
  buffer_state->_start_original_text;
806
898
  text_node_data->start_pos = buffer_state->_start_position;
807
- if (state->_foster_parent_insertions &&
808
- node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(TABLE), TAG(TBODY), TAG(TFOOT),
809
- TAG(THEAD), TAG(TR) })) {
810
- foster_parent_element(parser, text_node);
899
+
900
+ gumbo_debug("Flushing text node buffer of %.*s.\n",
901
+ (int) buffer_state->_buffer.length, buffer_state->_buffer.data);
902
+
903
+ InsertionLocation location = get_appropriate_insertion_location(parser, NULL);
904
+ if (location.target->type == GUMBO_NODE_DOCUMENT) {
905
+ // The DOM does not allow Document nodes to have Text children, so per the
906
+ // spec, they are dropped on the floor.
907
+ destroy_node(parser, text_node);
811
908
  } else {
812
- append_node(
813
- parser, parser->_output->root ?
814
- get_current_node(parser) : parser->_output->document, text_node);
909
+ insert_node(parser, text_node, location);
815
910
  }
816
- gumbo_debug("Flushing text node buffer of %.*s.\n",
817
- (int) buffer_state->_buffer.length, buffer_state->_buffer.data);
818
911
 
819
- gumbo_string_buffer_destroy(parser, &buffer_state->_buffer);
820
- gumbo_string_buffer_init(parser, &buffer_state->_buffer);
912
+ gumbo_string_buffer_clear(parser, &buffer_state->_buffer);
821
913
  buffer_state->_type = GUMBO_NODE_WHITESPACE;
822
914
  assert(buffer_state->_buffer.length == 0);
823
915
  }
@@ -825,9 +917,9 @@ static void maybe_flush_text_node_buffer(GumboParser* parser) {
825
917
  static void record_end_of_element(
826
918
  GumboToken* current_token, GumboElement* element) {
827
919
  element->end_pos = current_token->position;
828
- element->original_end_tag =
829
- current_token->type == GUMBO_TOKEN_END_TAG ?
830
- current_token->original_text : kGumboEmptyString;
920
+ element->original_end_tag = current_token->type == GUMBO_TOKEN_END_TAG
921
+ ? current_token->original_text
922
+ : kGumboEmptyString;
831
923
  }
832
924
 
833
925
  static GumboNode* pop_current_node(GumboParser* parser) {
@@ -835,8 +927,7 @@ static GumboNode* pop_current_node(GumboParser* parser) {
835
927
  maybe_flush_text_node_buffer(parser);
836
928
  if (state->_open_elements.length > 0) {
837
929
  assert(node_html_tag_is(state->_open_elements.data[0], GUMBO_TAG_HTML));
838
- gumbo_debug(
839
- "Popping %s node.\n",
930
+ gumbo_debug("Popping %s node.\n",
840
931
  gumbo_normalized_tagname(get_current_node(parser)->v.element.tag));
841
932
  }
842
933
  GumboNode* current_node = gumbo_vector_pop(parser, &state->_open_elements);
@@ -844,13 +935,16 @@ static GumboNode* pop_current_node(GumboParser* parser) {
844
935
  assert(state->_open_elements.length == 0);
845
936
  return NULL;
846
937
  }
847
- assert(current_node->type == GUMBO_NODE_ELEMENT);
938
+ assert(current_node->type == GUMBO_NODE_ELEMENT ||
939
+ current_node->type == GUMBO_NODE_TEMPLATE);
848
940
  bool is_closed_body_or_html_tag =
849
- (node_html_tag_is(current_node, GUMBO_TAG_BODY) && state->_closed_body_tag) ||
850
- (node_html_tag_is(current_node, GUMBO_TAG_HTML) && state->_closed_html_tag);
941
+ (node_html_tag_is(current_node, GUMBO_TAG_BODY) &&
942
+ state->_closed_body_tag) ||
943
+ (node_html_tag_is(current_node, GUMBO_TAG_HTML) &&
944
+ state->_closed_html_tag);
851
945
  if ((state->_current_token->type != GUMBO_TOKEN_END_TAG ||
852
- !node_html_tag_is(current_node, state->_current_token->v.end_tag)) &&
853
- !is_closed_body_or_html_tag) {
946
+ !node_html_tag_is(current_node, state->_current_token->v.end_tag)) &&
947
+ !is_closed_body_or_html_tag) {
854
948
  current_node->parse_flags |= GUMBO_INSERTION_IMPLICIT_END_TAG;
855
949
  }
856
950
  if (!is_closed_body_or_html_tag) {
@@ -873,22 +967,25 @@ static void append_comment_node(
873
967
 
874
968
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#clear-the-stack-back-to-a-table-row-context
875
969
  static void clear_stack_to_table_row_context(GumboParser* parser) {
876
- while (!node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(HTML), TAG(TR) })) {
970
+ while (!node_tag_in_set(get_current_node(parser),
971
+ (gumbo_tagset){TAG(HTML), TAG(TR), TAG(TEMPLATE)})) {
877
972
  pop_current_node(parser);
878
973
  }
879
974
  }
880
975
 
881
976
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#clear-the-stack-back-to-a-table-context
882
977
  static void clear_stack_to_table_context(GumboParser* parser) {
883
- while (!node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(HTML), TAG(TABLE) } )) {
978
+ while (!node_tag_in_set(get_current_node(parser),
979
+ (gumbo_tagset){TAG(HTML), TAG(TABLE), TAG(TEMPLATE)})) {
884
980
  pop_current_node(parser);
885
981
  }
886
982
  }
887
983
 
888
984
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#clear-the-stack-back-to-a-table-body-context
889
985
  void clear_stack_to_table_body_context(GumboParser* parser) {
890
- while (!node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(HTML), TAG(TBODY),
891
- TAG(TFOOT), TAG(THEAD) })) {
986
+ while (!node_tag_in_set(get_current_node(parser),
987
+ (gumbo_tagset){TAG(HTML), TAG(TBODY), TAG(TFOOT), TAG(THEAD),
988
+ TAG(TEMPLATE)})) {
892
989
  pop_current_node(parser);
893
990
  }
894
991
  }
@@ -903,7 +1000,9 @@ static GumboNode* create_element(GumboParser* parser, GumboTag tag) {
903
1000
  element->tag_namespace = GUMBO_NAMESPACE_HTML;
904
1001
  element->original_tag = kGumboEmptyString;
905
1002
  element->original_end_tag = kGumboEmptyString;
906
- element->start_pos = parser->_parser_state->_current_token->position;
1003
+ element->start_pos = (parser->_parser_state->_current_token)
1004
+ ? parser->_parser_state->_current_token->position
1005
+ : kGumboEmptySourcePosition;
907
1006
  element->end_pos = kGumboEmptySourcePosition;
908
1007
  return node;
909
1008
  }
@@ -914,7 +1013,12 @@ static GumboNode* create_element_from_token(
914
1013
  assert(token->type == GUMBO_TOKEN_START_TAG);
915
1014
  GumboTokenStartTag* start_tag = &token->v.start_tag;
916
1015
 
917
- GumboNode* node = create_node(parser, GUMBO_NODE_ELEMENT);
1016
+ GumboNodeType type = (tag_namespace == GUMBO_NAMESPACE_HTML &&
1017
+ start_tag->tag == GUMBO_TAG_TEMPLATE)
1018
+ ? GUMBO_NODE_TEMPLATE
1019
+ : GUMBO_NODE_ELEMENT;
1020
+
1021
+ GumboNode* node = create_node(parser, type);
918
1022
  GumboElement* element = &node->v.element;
919
1023
  gumbo_vector_init(parser, 1, &element->children);
920
1024
  element->attributes = start_tag->attributes;
@@ -937,7 +1041,7 @@ static GumboNode* create_element_from_token(
937
1041
 
938
1042
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#insert-an-html-element
939
1043
  static void insert_element(GumboParser* parser, GumboNode* node,
940
- bool is_reconstructing_formatting_elements) {
1044
+ bool is_reconstructing_formatting_elements) {
941
1045
  GumboParserState* state = parser->_parser_state;
942
1046
  // NOTE(jdtang): The text node buffer must always be flushed before inserting
943
1047
  // a node, otherwise we're handling nodes in a different order than the spec
@@ -951,20 +1055,8 @@ static void insert_element(GumboParser* parser, GumboNode* node,
951
1055
  if (!is_reconstructing_formatting_elements) {
952
1056
  maybe_flush_text_node_buffer(parser);
953
1057
  }
954
- if (state->_foster_parent_insertions &&
955
- node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(TABLE), TAG(TBODY), TAG(TFOOT),
956
- TAG(THEAD), TAG(TR) } )) {
957
- foster_parent_element(parser, node);
958
- gumbo_vector_add(parser, (void*) node, &state->_open_elements);
959
- return;
960
- }
961
-
962
- // This is called to insert the root HTML element, but get_current_node
963
- // assumes the stack of open elements is non-empty, so we need special
964
- // handling for this case.
965
- append_node(
966
- parser, parser->_output->root ?
967
- get_current_node(parser) : parser->_output->document, node);
1058
+ InsertionLocation location = get_appropriate_insertion_location(parser, NULL);
1059
+ insert_node(parser, node, location);
968
1060
  gumbo_vector_add(parser, (void*) node, &state->_open_elements);
969
1061
  }
970
1062
 
@@ -977,7 +1069,7 @@ static GumboNode* insert_element_from_token(
977
1069
  create_element_from_token(parser, token, GUMBO_NAMESPACE_HTML);
978
1070
  insert_element(parser, element, false);
979
1071
  gumbo_debug("Inserting <%s> element (@%x) from token.\n",
980
- gumbo_normalized_tagname(element->v.element.tag), element);
1072
+ gumbo_normalized_tagname(element->v.element.tag), element);
981
1073
  return element;
982
1074
  }
983
1075
 
@@ -990,7 +1082,7 @@ static GumboNode* insert_element_of_tag_type(
990
1082
  element->parse_flags |= GUMBO_INSERTION_BY_PARSER | reason;
991
1083
  insert_element(parser, element, false);
992
1084
  gumbo_debug("Inserting %s element (@%x) from tag type.\n",
993
- gumbo_normalized_tagname(tag), element);
1085
+ gumbo_normalized_tagname(tag), element);
994
1086
  return element;
995
1087
  }
996
1088
 
@@ -1002,16 +1094,14 @@ static GumboNode* insert_foreign_element(
1002
1094
  GumboNode* element = create_element_from_token(parser, token, tag_namespace);
1003
1095
  insert_element(parser, element, false);
1004
1096
  if (token_has_attribute(token, "xmlns") &&
1005
- !attribute_matches_case_sensitive(
1006
- &token->v.start_tag.attributes, "xmlns",
1097
+ !attribute_matches_case_sensitive(&token->v.start_tag.attributes, "xmlns",
1007
1098
  kLegalXmlns[tag_namespace])) {
1008
1099
  // TODO(jdtang): Since there're multiple possible error codes here, we
1009
1100
  // eventually need reason codes to differentiate them.
1010
1101
  parser_add_parse_error(parser, token);
1011
1102
  }
1012
1103
  if (token_has_attribute(token, "xmlns:xlink") &&
1013
- !attribute_matches_case_sensitive(
1014
- &token->v.start_tag.attributes,
1104
+ !attribute_matches_case_sensitive(&token->v.start_tag.attributes,
1015
1105
  "xmlns:xlink", "http://www.w3.org/1999/xlink")) {
1016
1106
  parser_add_parse_error(parser, token);
1017
1107
  }
@@ -1021,8 +1111,7 @@ static GumboNode* insert_foreign_element(
1021
1111
  static void insert_text_token(GumboParser* parser, GumboToken* token) {
1022
1112
  assert(token->type == GUMBO_TOKEN_WHITESPACE ||
1023
1113
  token->type == GUMBO_TOKEN_CHARACTER ||
1024
- token->type == GUMBO_TOKEN_NULL ||
1025
- token->type == GUMBO_TOKEN_CDATA);
1114
+ token->type == GUMBO_TOKEN_NULL || token->type == GUMBO_TOKEN_CDATA);
1026
1115
  TextNodeBufferState* buffer_state = &parser->_parser_state->_text_node;
1027
1116
  if (buffer_state->_buffer.length == 0) {
1028
1117
  // Initialize position fields.
@@ -1057,7 +1146,7 @@ static void acknowledge_self_closing_tag(GumboParser* parser) {
1057
1146
  // elements, and fills in its index if so.
1058
1147
  static bool find_last_anchor_index(GumboParser* parser, int* anchor_index) {
1059
1148
  GumboVector* elements = &parser->_parser_state->_active_formatting_elements;
1060
- for (int i = elements->length; --i >= 0; ) {
1149
+ for (int i = elements->length; --i >= 0;) {
1061
1150
  GumboNode* node = elements->data[i];
1062
1151
  if (node == &kActiveFormattingScopeMarker) {
1063
1152
  return false;
@@ -1074,21 +1163,21 @@ static bool find_last_anchor_index(GumboParser* parser, int* anchor_index) {
1074
1163
  // formatting elements (after the last active scope marker) that have a specific
1075
1164
  // tag. If this is > 0, then earliest_matching_index will be filled in with the
1076
1165
  // index of the first such element.
1077
- static int count_formatting_elements_of_tag(
1078
- GumboParser* parser, const GumboNode* desired_node,
1079
- int* earliest_matching_index) {
1166
+ static int count_formatting_elements_of_tag(GumboParser* parser,
1167
+ const GumboNode* desired_node, int* earliest_matching_index) {
1080
1168
  const GumboElement* desired_element = &desired_node->v.element;
1081
1169
  GumboVector* elements = &parser->_parser_state->_active_formatting_elements;
1082
1170
  int num_identical_elements = 0;
1083
- for (int i = elements->length; --i >= 0; ) {
1171
+ for (int i = elements->length; --i >= 0;) {
1084
1172
  GumboNode* node = elements->data[i];
1085
1173
  if (node == &kActiveFormattingScopeMarker) {
1086
1174
  break;
1087
1175
  }
1088
1176
  assert(node->type == GUMBO_NODE_ELEMENT);
1089
- if (node_qualified_tag_is(node, desired_element->tag_namespace, desired_element->tag) &&
1090
- all_attributes_match(&node->v.element.attributes,
1091
- &desired_element->attributes)) {
1177
+ if (node_qualified_tag_is(
1178
+ node, desired_element->tag_namespace, desired_element->tag) &&
1179
+ all_attributes_match(
1180
+ &node->v.element.attributes, &desired_element->attributes)) {
1092
1181
  num_identical_elements++;
1093
1182
  *earliest_matching_index = i;
1094
1183
  }
@@ -1115,7 +1204,7 @@ static void add_formatting_element(GumboParser* parser, const GumboNode* node) {
1115
1204
  // Noah's Ark clause: if there're at least 3, remove the earliest.
1116
1205
  if (num_identical_elements >= 3) {
1117
1206
  gumbo_debug("Noah's ark clause: removing element at %d.\n",
1118
- earliest_identical_element);
1207
+ earliest_identical_element);
1119
1208
  gumbo_vector_remove_at(parser, earliest_identical_element, elements);
1120
1209
  }
1121
1210
 
@@ -1124,7 +1213,7 @@ static void add_formatting_element(GumboParser* parser, const GumboNode* node) {
1124
1213
 
1125
1214
  static bool is_open_element(GumboParser* parser, const GumboNode* node) {
1126
1215
  GumboVector* open_elements = &parser->_parser_state->_open_elements;
1127
- for (int i = 0; i < open_elements->length; ++i) {
1216
+ for (unsigned int i = 0; i < open_elements->length; ++i) {
1128
1217
  if (open_elements->data[i] == node) {
1129
1218
  return true;
1130
1219
  }
@@ -1136,8 +1225,8 @@ static bool is_open_element(GumboParser* parser, const GumboNode* node) {
1136
1225
  // clone shares no structure with the original node: all owned strings and
1137
1226
  // values are fresh copies.
1138
1227
  GumboNode* clone_node(
1139
- GumboParser* parser, const GumboNode* node, GumboParseFlags reason) {
1140
- assert(node->type == GUMBO_NODE_ELEMENT);
1228
+ GumboParser* parser, GumboNode* node, GumboParseFlags reason) {
1229
+ assert(node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE);
1141
1230
  GumboNode* new_node = gumbo_parser_allocate(parser, sizeof(GumboNode));
1142
1231
  *new_node = *node;
1143
1232
  new_node->parent = NULL;
@@ -1151,7 +1240,7 @@ GumboNode* clone_node(
1151
1240
 
1152
1241
  const GumboVector* old_attributes = &node->v.element.attributes;
1153
1242
  gumbo_vector_init(parser, old_attributes->length, &element->attributes);
1154
- for (int i = 0; i < old_attributes->length; ++i) {
1243
+ for (unsigned int i = 0; i < old_attributes->length; ++i) {
1155
1244
  const GumboAttribute* old_attr = old_attributes->data[i];
1156
1245
  GumboAttribute* attr =
1157
1246
  gumbo_parser_allocate(parser, sizeof(GumboAttribute));
@@ -1175,8 +1264,8 @@ static void reconstruct_active_formatting_elements(GumboParser* parser) {
1175
1264
  }
1176
1265
 
1177
1266
  // Step 2 & 3
1178
- int i = elements->length - 1;
1179
- const GumboNode* element = elements->data[i];
1267
+ unsigned int i = elements->length - 1;
1268
+ GumboNode* element = elements->data[i];
1180
1269
  if (element == &kActiveFormattingScopeMarker ||
1181
1270
  is_open_element(parser, element)) {
1182
1271
  return;
@@ -1186,7 +1275,7 @@ static void reconstruct_active_formatting_elements(GumboParser* parser) {
1186
1275
  do {
1187
1276
  if (i == 0) {
1188
1277
  // Step 4
1189
- i = -1; // Incremented to 0 below.
1278
+ i = -1; // Incremented to 0 below.
1190
1279
  break;
1191
1280
  }
1192
1281
  // Step 5
@@ -1196,9 +1285,8 @@ static void reconstruct_active_formatting_elements(GumboParser* parser) {
1196
1285
 
1197
1286
  ++i;
1198
1287
  gumbo_debug("Reconstructing elements from %d on %s parent.\n", i,
1199
- gumbo_normalized_tagname(
1200
- get_current_node(parser)->v.element.tag));
1201
- for(; i < elements->length; ++i) {
1288
+ gumbo_normalized_tagname(get_current_node(parser)->v.element.tag));
1289
+ for (; i < elements->length; ++i) {
1202
1290
  // Step 7 & 8.
1203
1291
  assert(elements->length > 0);
1204
1292
  assert(i < elements->length);
@@ -1207,11 +1295,16 @@ static void reconstruct_active_formatting_elements(GumboParser* parser) {
1207
1295
  GumboNode* clone = clone_node(
1208
1296
  parser, element, GUMBO_INSERTION_RECONSTRUCTED_FORMATTING_ELEMENT);
1209
1297
  // Step 9.
1210
- insert_element(parser, clone, true);
1298
+ InsertionLocation location =
1299
+ get_appropriate_insertion_location(parser, NULL);
1300
+ insert_node(parser, clone, location);
1301
+ gumbo_vector_add(
1302
+ parser, (void*) clone, &parser->_parser_state->_open_elements);
1303
+
1211
1304
  // Step 10.
1212
1305
  elements->data[i] = clone;
1213
1306
  gumbo_debug("Reconstructed %s element at %d.\n",
1214
- gumbo_normalized_tagname(clone->v.element.tag), i);
1307
+ gumbo_normalized_tagname(clone->v.element.tag), i);
1215
1308
  }
1216
1309
  }
1217
1310
 
@@ -1222,32 +1315,30 @@ static void clear_active_formatting_elements(GumboParser* parser) {
1222
1315
  do {
1223
1316
  node = gumbo_vector_pop(parser, elements);
1224
1317
  ++num_elements_cleared;
1225
- } while(node && node != &kActiveFormattingScopeMarker);
1318
+ } while (node && node != &kActiveFormattingScopeMarker);
1226
1319
  gumbo_debug("Cleared %d elements from active formatting list.\n",
1227
- num_elements_cleared);
1320
+ num_elements_cleared);
1228
1321
  }
1229
1322
 
1230
1323
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#the-initial-insertion-mode
1231
1324
  static GumboQuirksModeEnum compute_quirks_mode(
1232
1325
  const GumboTokenDocType* doctype) {
1233
- if (doctype->force_quirks ||
1234
- strcmp(doctype->name, kDoctypeHtml.data) ||
1235
- is_in_static_list(doctype->public_identifier,
1236
- kQuirksModePublicIdPrefixes, false) ||
1237
- is_in_static_list(doctype->public_identifier,
1238
- kQuirksModePublicIdExactMatches, true) ||
1239
- is_in_static_list(doctype->system_identifier,
1240
- kQuirksModeSystemIdExactMatches, true) ||
1326
+ if (doctype->force_quirks || strcmp(doctype->name, kDoctypeHtml.data) ||
1327
+ is_in_static_list(
1328
+ doctype->public_identifier, kQuirksModePublicIdPrefixes, false) ||
1329
+ is_in_static_list(
1330
+ doctype->public_identifier, kQuirksModePublicIdExactMatches, true) ||
1331
+ is_in_static_list(
1332
+ doctype->system_identifier, kQuirksModeSystemIdExactMatches, true) ||
1241
1333
  (is_in_static_list(doctype->public_identifier,
1242
- kLimitedQuirksRequiresSystemIdPublicIdPrefixes, false)
1243
- && !doctype->has_system_identifier)) {
1334
+ kLimitedQuirksRequiresSystemIdPublicIdPrefixes, false) &&
1335
+ !doctype->has_system_identifier)) {
1244
1336
  return GUMBO_DOCTYPE_QUIRKS;
1245
- } else if (
1246
- is_in_static_list(doctype->public_identifier,
1247
- kLimitedQuirksPublicIdPrefixes, false) ||
1248
- (is_in_static_list(doctype->public_identifier,
1249
- kLimitedQuirksRequiresSystemIdPublicIdPrefixes, false)
1250
- && doctype->has_system_identifier)) {
1337
+ } else if (is_in_static_list(doctype->public_identifier,
1338
+ kLimitedQuirksPublicIdPrefixes, false) ||
1339
+ (is_in_static_list(doctype->public_identifier,
1340
+ kLimitedQuirksRequiresSystemIdPublicIdPrefixes, false) &&
1341
+ doctype->has_system_identifier)) {
1251
1342
  return GUMBO_DOCTYPE_LIMITED_QUIRKS;
1252
1343
  }
1253
1344
  return GUMBO_DOCTYPE_NO_QUIRKS;
@@ -1261,39 +1352,45 @@ static GumboQuirksModeEnum compute_quirks_mode(
1261
1352
  // names. For example, "has an element in list scope" looks for an element of
1262
1353
  // the given qualified name within the nearest enclosing <ol> or <ul>, along
1263
1354
  // with a bunch of generic element types that serve to "firewall" their content
1264
- // from the rest of the document.
1265
- static bool has_an_element_in_specific_scope(GumboParser* parser, gumbo_tagset expected, bool negate, const gumbo_tagset tags) {
1355
+ // from the rest of the document. Note that because of the way the spec is
1356
+ // written,
1357
+ // all elements are expected to be in the HTML namespace
1358
+ static bool has_an_element_in_specific_scope(GumboParser* parser,
1359
+ int expected_size, const GumboTag* expected, bool negate,
1360
+ const gumbo_tagset tags) {
1266
1361
  GumboVector* open_elements = &parser->_parser_state->_open_elements;
1267
- bool result = false;
1268
- for (int i = open_elements->length; --i >= 0; ) {
1362
+ for (int i = open_elements->length; --i >= 0;) {
1269
1363
  const GumboNode* node = open_elements->data[i];
1270
- if (node->type != GUMBO_NODE_ELEMENT) {
1364
+ if (node->type != GUMBO_NODE_ELEMENT && node->type != GUMBO_NODE_TEMPLATE)
1271
1365
  continue;
1366
+
1367
+ GumboTag node_tag = node->v.element.tag;
1368
+ GumboNamespaceEnum node_ns = node->v.element.tag_namespace;
1369
+ for (int j = 0; j < expected_size; ++j) {
1370
+ if (node_tag == expected[j] && node_ns == GUMBO_NAMESPACE_HTML)
1371
+ return true;
1272
1372
  }
1273
- if (TAGSET_INCLUDES(expected, node->v.element.tag_namespace, node->v.element.tag)) {
1274
- return true;
1275
- }
1276
- bool found_qualname = false;
1277
- if (TAGSET_INCLUDES(tags, node->v.element.tag_namespace, node->v.element.tag)) {
1278
- found_qualname = true;
1279
- }
1280
- if (negate != found_qualname) {
1281
- result = false;
1282
- return result;
1283
- }
1373
+
1374
+ bool found = TAGSET_INCLUDES(tags, node_ns, node_tag);
1375
+ if (negate != found) return false;
1284
1376
  }
1285
- return result;
1377
+ return false;
1378
+ }
1379
+
1380
+ // Checks for the presence of an open element of the specified tag type.
1381
+ static bool has_open_element(GumboParser* parser, GumboTag tag) {
1382
+ return has_an_element_in_specific_scope(
1383
+ parser, 1, &tag, false, (gumbo_tagset){TAG(HTML)});
1286
1384
  }
1287
1385
 
1288
1386
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-scope
1289
1387
  static bool has_an_element_in_scope(GumboParser* parser, GumboTag tag) {
1290
- gumbo_tagset qualset = {0};
1291
- qualset[(int) tag] = (1 << (int) GUMBO_NAMESPACE_HTML);
1292
- return has_an_element_in_specific_scope(parser, qualset, false, (gumbo_tagset) { TAG(APPLET),
1293
- TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD), TAG(TH), TAG(MARQUEE),
1294
- TAG(OBJECT), TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
1295
- TAG_MATHML(MS), TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
1296
- TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC), TAG_SVG(TITLE) });
1388
+ return has_an_element_in_specific_scope(parser, 1, &tag, false,
1389
+ (gumbo_tagset){TAG(APPLET), TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD),
1390
+ TAG(TH), TAG(MARQUEE), TAG(OBJECT), TAG(TEMPLATE), TAG_MATHML(MI),
1391
+ TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS), TAG_MATHML(MTEXT),
1392
+ TAG_MATHML(ANNOTATION_XML), TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC),
1393
+ TAG_SVG(TITLE)});
1297
1394
  }
1298
1395
 
1299
1396
  // Like "has an element in scope", but for the specific case of looking for a
@@ -1304,19 +1401,21 @@ static bool has_an_element_in_scope(GumboParser* parser, GumboTag tag) {
1304
1401
  // parameterize it.
1305
1402
  static bool has_node_in_scope(GumboParser* parser, const GumboNode* node) {
1306
1403
  GumboVector* open_elements = &parser->_parser_state->_open_elements;
1307
- for (int i = open_elements->length; --i >= 0; ) {
1404
+ for (int i = open_elements->length; --i >= 0;) {
1308
1405
  const GumboNode* current = open_elements->data[i];
1309
1406
  if (current == node) {
1310
1407
  return true;
1311
1408
  }
1312
- if (current->type != GUMBO_NODE_ELEMENT) {
1409
+ if (current->type != GUMBO_NODE_ELEMENT &&
1410
+ current->type != GUMBO_NODE_TEMPLATE) {
1313
1411
  continue;
1314
1412
  }
1315
- if (node_tag_in_set(current, (gumbo_tagset) { TAG(APPLET), TAG(CAPTION), TAG(HTML),
1316
- TAG(TABLE), TAG(TD), TAG(TH), TAG(MARQUEE), TAG(OBJECT),
1317
- TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS),
1318
- TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML), TAG_SVG(FOREIGNOBJECT),
1319
- TAG_SVG(DESC), TAG_SVG(TITLE) } )) {
1413
+ if (node_tag_in_set(current,
1414
+ (gumbo_tagset){TAG(APPLET), TAG(CAPTION), TAG(HTML), TAG(TABLE),
1415
+ TAG(TD), TAG(TH), TAG(MARQUEE), TAG(OBJECT), TAG(TEMPLATE),
1416
+ TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS),
1417
+ TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
1418
+ TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC), TAG_SVG(TITLE)})) {
1320
1419
  return false;
1321
1420
  }
1322
1421
  }
@@ -1326,60 +1425,70 @@ static bool has_node_in_scope(GumboParser* parser, const GumboNode* node) {
1326
1425
 
1327
1426
  // Like has_an_element_in_scope, but restricts the expected qualified name to a
1328
1427
  // range of possible qualified names instead of just a single one.
1329
- static bool has_an_element_in_scope_with_tagname(GumboParser* parser, gumbo_tagset qualset) {
1330
- return has_an_element_in_specific_scope(parser, qualset, false, (gumbo_tagset) { TAG(APPLET),
1331
- TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD), TAG(TH), TAG(MARQUEE),
1332
- TAG(OBJECT), TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
1333
- TAG_MATHML(MS), TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
1334
- TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC), TAG_SVG(TITLE) });
1428
+ static bool has_an_element_in_scope_with_tagname(
1429
+ GumboParser* parser, int expected_len, const GumboTag expected[]) {
1430
+ return has_an_element_in_specific_scope(parser, expected_len, expected, false,
1431
+ (gumbo_tagset){TAG(APPLET), TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD),
1432
+ TAG(TH), TAG(MARQUEE), TAG(OBJECT), TAG(TEMPLATE), TAG_MATHML(MI),
1433
+ TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS), TAG_MATHML(MTEXT),
1434
+ TAG_MATHML(ANNOTATION_XML), TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC),
1435
+ TAG_SVG(TITLE)});
1335
1436
  }
1336
1437
 
1337
1438
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-list-item-scope
1338
1439
  static bool has_an_element_in_list_scope(GumboParser* parser, GumboTag tag) {
1339
- gumbo_tagset qualset = {0};
1340
- qualset[(int)tag] = (1 << (int)(GUMBO_NAMESPACE_HTML));
1341
- return has_an_element_in_specific_scope(parser, qualset, false, (gumbo_tagset) { TAG(APPLET),
1342
- TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD), TAG(TH), TAG(MARQUEE),
1343
- TAG(OBJECT), TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
1344
- TAG_MATHML(MS), TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
1345
- TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC), TAG_SVG(TITLE), TAG(OL),
1346
- TAG(UL) });
1440
+ return has_an_element_in_specific_scope(parser, 1, &tag, false,
1441
+ (gumbo_tagset){TAG(APPLET), TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD),
1442
+ TAG(TH), TAG(MARQUEE), TAG(OBJECT), TAG(TEMPLATE), TAG_MATHML(MI),
1443
+ TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS), TAG_MATHML(MTEXT),
1444
+ TAG_MATHML(ANNOTATION_XML), TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC),
1445
+ TAG_SVG(TITLE), TAG(OL), TAG(UL)});
1347
1446
  }
1348
1447
 
1349
1448
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-button-scope
1350
1449
  static bool has_an_element_in_button_scope(GumboParser* parser, GumboTag tag) {
1351
- gumbo_tagset qualset = {0};
1352
- qualset[(int) tag] = (1 << (int)(GUMBO_NAMESPACE_HTML));
1353
- return has_an_element_in_specific_scope(parser, qualset, false, (gumbo_tagset) { TAG(APPLET),
1354
- TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD), TAG(TH), TAG(MARQUEE),
1355
- TAG(OBJECT), TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
1356
- TAG_MATHML(MS), TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
1357
- TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC), TAG_SVG(TITLE), TAG(BUTTON) });
1450
+ return has_an_element_in_specific_scope(parser, 1, &tag, false,
1451
+ (gumbo_tagset){TAG(APPLET), TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD),
1452
+ TAG(TH), TAG(MARQUEE), TAG(OBJECT), TAG(TEMPLATE), TAG_MATHML(MI),
1453
+ TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS), TAG_MATHML(MTEXT),
1454
+ TAG_MATHML(ANNOTATION_XML), TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC),
1455
+ TAG_SVG(TITLE), TAG(BUTTON)});
1358
1456
  }
1359
1457
 
1360
1458
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-table-scope
1361
1459
  static bool has_an_element_in_table_scope(GumboParser* parser, GumboTag tag) {
1362
- gumbo_tagset qualset = {0};
1363
- qualset[(int) tag] = (1 << (int)(GUMBO_NAMESPACE_HTML));
1364
- return has_an_element_in_specific_scope(parser, qualset, false, (gumbo_tagset) { TAG(HTML), TAG(TABLE) });
1460
+ return has_an_element_in_specific_scope(parser, 1, &tag, false,
1461
+ (gumbo_tagset){TAG(HTML), TAG(TABLE), TAG(TEMPLATE)});
1365
1462
  }
1366
1463
 
1367
1464
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-select-scope
1368
1465
  static bool has_an_element_in_select_scope(GumboParser* parser, GumboTag tag) {
1369
- gumbo_tagset qualset = {0};
1370
- qualset[(int) tag] = (1 << (int)(GUMBO_NAMESPACE_HTML));
1371
- return has_an_element_in_specific_scope(parser, qualset, true, (gumbo_tagset) { TAG(OPTGROUP), TAG(OPTION) });
1466
+ return has_an_element_in_specific_scope(
1467
+ parser, 1, &tag, true, (gumbo_tagset){TAG(OPTGROUP), TAG(OPTION)});
1372
1468
  }
1373
1469
 
1374
1470
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#generate-implied-end-tags
1375
1471
  // "exception" is the "element to exclude from the process" listed in the spec.
1376
1472
  // Pass GUMBO_TAG_LAST to not exclude any of them.
1377
1473
  static void generate_implied_end_tags(GumboParser* parser, GumboTag exception) {
1378
- for (;
1379
- node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(DD), TAG(DT),
1380
- TAG(LI), TAG(OPTION), TAG(OPTGROUP), TAG(P), TAG(RP), TAG(RT) }) &&
1381
- !node_html_tag_is(get_current_node(parser), exception);
1382
- pop_current_node(parser));
1474
+ for (; node_tag_in_set(get_current_node(parser),
1475
+ (gumbo_tagset){TAG(DD), TAG(DT), TAG(LI), TAG(OPTION),
1476
+ TAG(OPTGROUP), TAG(P), TAG(RP), TAG(RB), TAG(RT), TAG(RTC)}) &&
1477
+ !node_html_tag_is(get_current_node(parser), exception);
1478
+ pop_current_node(parser))
1479
+ ;
1480
+ }
1481
+
1482
+ // This is the "generate all implied end tags thoroughly" clause of the spec.
1483
+ // https://html.spec.whatwg.org/multipage/syntax.html#closing-elements-that-have-implied-end-tags
1484
+ static void generate_all_implied_end_tags_thoroughly(GumboParser* parser) {
1485
+ for (
1486
+ ; node_tag_in_set(get_current_node(parser),
1487
+ (gumbo_tagset){TAG(CAPTION), TAG(COLGROUP), TAG(DD), TAG(DT), TAG(LI),
1488
+ TAG(OPTION), TAG(OPTGROUP), TAG(P), TAG(RP), TAG(RT), TAG(RTC),
1489
+ TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(HEAD), TAG(TR)});
1490
+ pop_current_node(parser))
1491
+ ;
1383
1492
  }
1384
1493
 
1385
1494
  // This factors out the clauses relating to "act as if an end tag token with tag
@@ -1401,8 +1510,8 @@ static bool close_table(GumboParser* parser) {
1401
1510
 
1402
1511
  // This factors out the clauses relating to "act as if an end tag token with tag
1403
1512
  // name `cell_tag` had been seen".
1404
- static bool close_table_cell(GumboParser* parser, const GumboToken* token,
1405
- GumboTag cell_tag) {
1513
+ static bool close_table_cell(
1514
+ GumboParser* parser, const GumboToken* token, GumboTag cell_tag) {
1406
1515
  bool result = true;
1407
1516
  generate_implied_end_tags(parser, GUMBO_TAG_LAST);
1408
1517
  const GumboNode* node = get_current_node(parser);
@@ -1446,38 +1555,43 @@ static void close_current_select(GumboParser* parser) {
1446
1555
  // The list of nodes in the "special" category:
1447
1556
  // http://www.whatwg.org/specs/web-apps/current-work/complete/parsing.html#special
1448
1557
  static bool is_special_node(const GumboNode* node) {
1449
- assert(node->type == GUMBO_NODE_ELEMENT);
1450
- return node_tag_in_set(node, (gumbo_tagset) { TAG(ADDRESS), TAG(APPLET), TAG(AREA),
1451
- TAG(ARTICLE), TAG(ASIDE), TAG(BASE), TAG(BASEFONT), TAG(BGSOUND), TAG(BLOCKQUOTE),
1452
- TAG(BODY), TAG(BR), TAG(BUTTON), TAG(CAPTION), TAG(CENTER), TAG(COL),
1453
- TAG(COLGROUP), TAG(MENUITEM), TAG(DD), TAG(DETAILS), TAG(DIR), TAG(DIV), TAG(DL),
1454
- TAG(DT), TAG(EMBED), TAG(FIELDSET), TAG(FIGCAPTION), TAG(FIGURE), TAG(FOOTER),
1455
- TAG(FORM), TAG(FRAME), TAG(FRAMESET), TAG(H1), TAG(H2), TAG(H3), TAG(H4),
1456
- TAG(H5), TAG(H6), TAG(HEAD), TAG(HEADER), TAG(HGROUP), TAG(HR), TAG(HTML),
1457
- TAG(IFRAME), TAG(IMG), TAG(INPUT), TAG(ISINDEX), TAG(LI), TAG(LINK),
1458
- TAG(LISTING), TAG(MARQUEE), TAG(MENU), TAG(META), TAG(NAV), TAG(NOEMBED),
1459
- TAG(NOFRAMES), TAG(NOSCRIPT), TAG(OBJECT), TAG(OL), TAG(P), TAG(PARAM),
1460
- TAG(PLAINTEXT), TAG(PRE), TAG(SCRIPT), TAG(SECTION), TAG(SELECT), TAG(STYLE),
1461
- TAG(SUMMARY), TAG(TABLE), TAG(TBODY), TAG(TD), TAG(TEXTAREA), TAG(TFOOT),
1462
- TAG(TH), TAG(THEAD), TAG(TITLE), TAG(TR), TAG(UL), TAG(WBR), TAG(XMP),
1463
-
1464
- TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS),
1465
- TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
1466
-
1467
- TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC) });
1468
- }
1469
-
1470
- // Implicitly closes currently open elements until it reaches an element with the
1558
+ assert(node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE);
1559
+ return node_tag_in_set(node,
1560
+ (gumbo_tagset){TAG(ADDRESS), TAG(APPLET), TAG(AREA), TAG(ARTICLE),
1561
+ TAG(ASIDE), TAG(BASE), TAG(BASEFONT), TAG(BGSOUND), TAG(BLOCKQUOTE),
1562
+ TAG(BODY), TAG(BR), TAG(BUTTON), TAG(CAPTION), TAG(CENTER), TAG(COL),
1563
+ TAG(COLGROUP), TAG(MENUITEM), TAG(DD), TAG(DETAILS), TAG(DIR),
1564
+ TAG(DIV), TAG(DL), TAG(DT), TAG(EMBED), TAG(FIELDSET),
1565
+ TAG(FIGCAPTION), TAG(FIGURE), TAG(FOOTER), TAG(FORM), TAG(FRAME),
1566
+ TAG(FRAMESET), TAG(H1), TAG(H2), TAG(H3), TAG(H4), TAG(H5), TAG(H6),
1567
+ TAG(HEAD), TAG(HEADER), TAG(HGROUP), TAG(HR), TAG(HTML), TAG(IFRAME),
1568
+ TAG(IMG), TAG(INPUT), TAG(ISINDEX), TAG(LI), TAG(LINK), TAG(LISTING),
1569
+ TAG(MARQUEE), TAG(MENU), TAG(META), TAG(NAV), TAG(NOEMBED),
1570
+ TAG(NOFRAMES), TAG(NOSCRIPT), TAG(OBJECT), TAG(OL), TAG(P),
1571
+ TAG(PARAM), TAG(PLAINTEXT), TAG(PRE), TAG(SCRIPT), TAG(SECTION),
1572
+ TAG(SELECT), TAG(STYLE), TAG(SUMMARY), TAG(TABLE), TAG(TBODY),
1573
+ TAG(TD), TAG(TEMPLATE), TAG(TEXTAREA), TAG(TFOOT), TAG(TH),
1574
+ TAG(THEAD), TAG(TITLE), TAG(TR), TAG(UL), TAG(WBR), TAG(XMP),
1575
+
1576
+ TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS),
1577
+ TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
1578
+
1579
+ TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC)});
1580
+ }
1581
+
1582
+ // Implicitly closes currently open elements until it reaches an element with
1583
+ // the
1471
1584
  // specified qualified name. If the elements closed are in the set handled by
1472
1585
  // generate_implied_end_tags, this is normal operation and this function returns
1473
1586
  // true. Otherwise, a parse error is recorded and this function returns false.
1474
- static bool implicitly_close_tags(
1475
- GumboParser* parser, GumboToken* token, GumboNamespaceEnum target_ns, GumboTag target) {
1587
+ static bool implicitly_close_tags(GumboParser* parser, GumboToken* token,
1588
+ GumboNamespaceEnum target_ns, GumboTag target) {
1476
1589
  bool result = true;
1477
1590
  generate_implied_end_tags(parser, target);
1478
1591
  if (!node_qualified_tag_is(get_current_node(parser), target_ns, target)) {
1479
1592
  parser_add_parse_error(parser, token);
1480
- while (!node_qualified_tag_is(get_current_node(parser), target_ns, target)) {
1593
+ while (
1594
+ !node_qualified_tag_is(get_current_node(parser), target_ns, target)) {
1481
1595
  pop_current_node(parser);
1482
1596
  }
1483
1597
  result = false;
@@ -1491,9 +1605,11 @@ static bool implicitly_close_tags(
1491
1605
  // a </p> tag was encountered, implicitly closing tags. Returns false if a
1492
1606
  // parse error occurs. This is a convenience function because this particular
1493
1607
  // clause appears several times in the spec.
1494
- static bool maybe_implicitly_close_p_tag(GumboParser* parser, GumboToken* token) {
1608
+ static bool maybe_implicitly_close_p_tag(
1609
+ GumboParser* parser, GumboToken* token) {
1495
1610
  if (has_an_element_in_button_scope(parser, GUMBO_TAG_P)) {
1496
- return implicitly_close_tags(parser, token, GUMBO_NAMESPACE_HTML, GUMBO_TAG_P);
1611
+ return implicitly_close_tags(
1612
+ parser, token, GUMBO_NAMESPACE_HTML, GUMBO_TAG_P);
1497
1613
  }
1498
1614
  return true;
1499
1615
  }
@@ -1504,17 +1620,19 @@ static void maybe_implicitly_close_list_tag(
1504
1620
  GumboParser* parser, GumboToken* token, bool is_li) {
1505
1621
  GumboParserState* state = parser->_parser_state;
1506
1622
  state->_frameset_ok = false;
1507
- for (int i = state->_open_elements.length; --i >= 0; ) {
1623
+ for (int i = state->_open_elements.length; --i >= 0;) {
1508
1624
  const GumboNode* node = state->_open_elements.data[i];
1509
- bool is_list_tag = is_li ?
1510
- node_html_tag_is(node, GUMBO_TAG_LI) :
1511
- node_tag_in_set(node, (gumbo_tagset) { TAG(DD), TAG(DT) } );
1625
+ bool is_list_tag =
1626
+ is_li ? node_html_tag_is(node, GUMBO_TAG_LI)
1627
+ : node_tag_in_set(node, (gumbo_tagset){TAG(DD), TAG(DT)});
1512
1628
  if (is_list_tag) {
1513
- implicitly_close_tags(parser, token, node->v.element.tag_namespace, node->v.element.tag);
1629
+ implicitly_close_tags(
1630
+ parser, token, node->v.element.tag_namespace, node->v.element.tag);
1514
1631
  return;
1515
1632
  }
1516
1633
  if (is_special_node(node) &&
1517
- !node_tag_in_set(node, (gumbo_tagset) { TAG(ADDRESS), TAG(DIV), TAG(P) })) {
1634
+ !node_tag_in_set(
1635
+ node, (gumbo_tagset){TAG(ADDRESS), TAG(DIV), TAG(P)})) {
1518
1636
  return;
1519
1637
  }
1520
1638
  }
@@ -1527,7 +1645,7 @@ static void merge_attributes(
1527
1645
  const GumboVector* token_attr = &token->v.start_tag.attributes;
1528
1646
  GumboVector* node_attr = &node->v.element.attributes;
1529
1647
 
1530
- for (int i = 0; i < token_attr->length; ++i) {
1648
+ for (unsigned int i = 0; i < token_attr->length; ++i) {
1531
1649
  GumboAttribute* attr = token_attr->data[i];
1532
1650
  if (!gumbo_get_attribute(node_attr, attr->name)) {
1533
1651
  // Ownership of the attribute is transferred by this gumbo_vector_add,
@@ -1551,8 +1669,8 @@ static void merge_attributes(
1551
1669
  }
1552
1670
 
1553
1671
  const char* gumbo_normalize_svg_tagname(const GumboStringPiece* tag) {
1554
- for (int i = 0;
1555
- i < sizeof(kSvgTagReplacements) / sizeof(ReplacementEntry); ++i) {
1672
+ for (size_t i = 0; i < sizeof(kSvgTagReplacements) / sizeof(ReplacementEntry);
1673
+ ++i) {
1556
1674
  const ReplacementEntry* entry = &kSvgTagReplacements[i];
1557
1675
  if (gumbo_string_equals_ignore_case(tag, &entry->from)) {
1558
1676
  return entry->to.data;
@@ -1567,9 +1685,9 @@ const char* gumbo_normalize_svg_tagname(const GumboStringPiece* tag) {
1567
1685
  static void adjust_foreign_attributes(GumboParser* parser, GumboToken* token) {
1568
1686
  assert(token->type == GUMBO_TOKEN_START_TAG);
1569
1687
  const GumboVector* attributes = &token->v.start_tag.attributes;
1570
- for (int i = 0;
1571
- i < sizeof(kForeignAttributeReplacements) /
1572
- sizeof(NamespacedAttributeReplacement); ++i) {
1688
+ for (size_t i = 0; i < sizeof(kForeignAttributeReplacements) /
1689
+ sizeof(NamespacedAttributeReplacement);
1690
+ ++i) {
1573
1691
  const NamespacedAttributeReplacement* entry =
1574
1692
  &kForeignAttributeReplacements[i];
1575
1693
  GumboAttribute* attr = gumbo_get_attribute(attributes, entry->from);
@@ -1587,7 +1705,7 @@ static void adjust_foreign_attributes(GumboParser* parser, GumboToken* token) {
1587
1705
  static void adjust_svg_attributes(GumboParser* parser, GumboToken* token) {
1588
1706
  assert(token->type == GUMBO_TOKEN_START_TAG);
1589
1707
  const GumboVector* attributes = &token->v.start_tag.attributes;
1590
- for (int i = 0;
1708
+ for (size_t i = 0;
1591
1709
  i < sizeof(kSvgAttributeReplacements) / sizeof(ReplacementEntry); ++i) {
1592
1710
  const ReplacementEntry* entry = &kSvgAttributeReplacements[i];
1593
1711
  GumboAttribute* attr = gumbo_get_attribute(attributes, entry->from.data);
@@ -1604,8 +1722,8 @@ static void adjust_svg_attributes(GumboParser* parser, GumboToken* token) {
1604
1722
  // value.
1605
1723
  static void adjust_mathml_attributes(GumboParser* parser, GumboToken* token) {
1606
1724
  assert(token->type == GUMBO_TOKEN_START_TAG);
1607
- GumboAttribute* attr = gumbo_get_attribute(
1608
- &token->v.start_tag.attributes, "definitionurl");
1725
+ GumboAttribute* attr =
1726
+ gumbo_get_attribute(&token->v.start_tag.attributes, "definitionurl");
1609
1727
  if (!attr) {
1610
1728
  return;
1611
1729
  }
@@ -1613,32 +1731,30 @@ static void adjust_mathml_attributes(GumboParser* parser, GumboToken* token) {
1613
1731
  attr->name = gumbo_copy_stringz(parser, "definitionURL");
1614
1732
  }
1615
1733
 
1616
- static bool doctype_matches(
1617
- const GumboTokenDocType* doctype,
1618
- const GumboStringPiece* public_id,
1619
- const GumboStringPiece* system_id,
1734
+ static bool doctype_matches(const GumboTokenDocType* doctype,
1735
+ const GumboStringPiece* public_id, const GumboStringPiece* system_id,
1620
1736
  bool allow_missing_system_id) {
1621
1737
  return !strcmp(doctype->public_identifier, public_id->data) &&
1622
- (allow_missing_system_id || doctype->has_system_identifier) &&
1623
- !strcmp(doctype->system_identifier, system_id->data);
1738
+ (allow_missing_system_id || doctype->has_system_identifier) &&
1739
+ !strcmp(doctype->system_identifier, system_id->data);
1624
1740
  }
1625
1741
 
1626
1742
  static bool maybe_add_doctype_error(
1627
1743
  GumboParser* parser, const GumboToken* token) {
1628
1744
  const GumboTokenDocType* doctype = &token->v.doc_type;
1629
1745
  bool html_doctype = !strcmp(doctype->name, kDoctypeHtml.data);
1630
- if ((!html_doctype ||
1631
- doctype->has_public_identifier ||
1632
- (doctype->has_system_identifier && !strcmp(
1633
- doctype->system_identifier, kSystemIdLegacyCompat.data))) &&
1634
- !(html_doctype && (
1635
- doctype_matches(doctype, &kPublicIdHtml4_0,
1636
- &kSystemIdRecHtml4_0, true) ||
1637
- doctype_matches(doctype, &kPublicIdHtml4_01, &kSystemIdHtml4, true) ||
1638
- doctype_matches(doctype, &kPublicIdXhtml1_0,
1639
- &kSystemIdXhtmlStrict1_1, false) ||
1640
- doctype_matches(doctype, &kPublicIdXhtml1_1,
1641
- &kSystemIdXhtml1_1, false)))) {
1746
+ if ((!html_doctype || doctype->has_public_identifier ||
1747
+ (doctype->has_system_identifier &&
1748
+ !strcmp(
1749
+ doctype->system_identifier, kSystemIdLegacyCompat.data))) &&
1750
+ !(html_doctype && (doctype_matches(doctype, &kPublicIdHtml4_0,
1751
+ &kSystemIdRecHtml4_0, true) ||
1752
+ doctype_matches(doctype, &kPublicIdHtml4_01,
1753
+ &kSystemIdHtml4, true) ||
1754
+ doctype_matches(doctype, &kPublicIdXhtml1_0,
1755
+ &kSystemIdXhtmlStrict1_1, false) ||
1756
+ doctype_matches(doctype, &kPublicIdXhtml1_1,
1757
+ &kSystemIdXhtml1_1, false)))) {
1642
1758
  parser_add_parse_error(parser, token);
1643
1759
  return false;
1644
1760
  }
@@ -1661,7 +1777,7 @@ static void remove_from_parent(GumboParser* parser, GumboNode* node) {
1661
1777
  gumbo_vector_remove_at(parser, index, children);
1662
1778
  node->parent = NULL;
1663
1779
  node->index_within_parent = -1;
1664
- for (int i = index; i < children->length; ++i) {
1780
+ for (unsigned int i = index; i < children->length; ++i) {
1665
1781
  GumboNode* child = children->data[i];
1666
1782
  child->index_within_parent = i;
1667
1783
  }
@@ -1670,29 +1786,38 @@ static void remove_from_parent(GumboParser* parser, GumboNode* node) {
1670
1786
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#an-introduction-to-error-handling-and-strange-cases-in-the-parser
1671
1787
  // Also described in the "in body" handling for end formatting tags.
1672
1788
  static bool adoption_agency_algorithm(
1673
- GumboParser* parser, GumboToken* token, GumboTag closing_tag) {
1789
+ GumboParser* parser, GumboToken* token, GumboTag subject) {
1674
1790
  GumboParserState* state = parser->_parser_state;
1675
1791
  gumbo_debug("Entering adoption agency algorithm.\n");
1676
- // Steps 1-3 & 16:
1677
- for (int i = 0; i < 8; ++i) {
1678
- // Step 4.
1792
+ // Step 1.
1793
+ GumboNode* current_node = get_current_node(parser);
1794
+ if (current_node->v.element.tag_namespace == GUMBO_NAMESPACE_HTML &&
1795
+ current_node->v.element.tag == subject &&
1796
+ gumbo_vector_index_of(
1797
+ &state->_active_formatting_elements, current_node) == -1) {
1798
+ pop_current_node(parser);
1799
+ return false;
1800
+ }
1801
+ // Steps 2-4 & 20:
1802
+ for (unsigned int i = 0; i < 8; ++i) {
1803
+ // Step 5.
1679
1804
  GumboNode* formatting_node = NULL;
1680
1805
  int formatting_node_in_open_elements = -1;
1681
- for (int j = state->_active_formatting_elements.length; --j >= 0; ) {
1806
+ for (int j = state->_active_formatting_elements.length; --j >= 0;) {
1682
1807
  GumboNode* current_node = state->_active_formatting_elements.data[j];
1683
1808
  if (current_node == &kActiveFormattingScopeMarker) {
1684
1809
  gumbo_debug("Broke on scope marker; aborting.\n");
1685
1810
  // Last scope marker; abort the algorithm.
1686
1811
  return false;
1687
1812
  }
1688
- if (current_node->type == GUMBO_NODE_ELEMENT && current_node->v.element.tag == closing_tag) {
1813
+ if (node_html_tag_is(current_node, subject)) {
1689
1814
  // Found it.
1690
1815
  formatting_node = current_node;
1691
- formatting_node_in_open_elements = gumbo_vector_index_of(
1692
- &state->_open_elements, formatting_node);
1816
+ formatting_node_in_open_elements =
1817
+ gumbo_vector_index_of(&state->_open_elements, formatting_node);
1693
1818
  gumbo_debug("Formatting element of tag %s at %d.\n",
1694
- gumbo_normalized_tagname(closing_tag),
1695
- formatting_node_in_open_elements);
1819
+ gumbo_normalized_tagname(subject),
1820
+ formatting_node_in_open_elements);
1696
1821
  break;
1697
1822
  }
1698
1823
  }
@@ -1704,18 +1829,23 @@ static bool adoption_agency_algorithm(
1704
1829
  return false;
1705
1830
  }
1706
1831
 
1832
+ // Step 6
1707
1833
  if (formatting_node_in_open_elements == -1) {
1708
1834
  gumbo_debug("Formatting node not on stack of open elements.\n");
1709
- gumbo_vector_remove(parser, formatting_node,
1710
- &state->_active_formatting_elements);
1835
+ parser_add_parse_error(parser, token);
1836
+ gumbo_vector_remove(
1837
+ parser, formatting_node, &state->_active_formatting_elements);
1711
1838
  return false;
1712
1839
  }
1713
1840
 
1841
+ // Step 7
1714
1842
  if (!has_an_element_in_scope(parser, formatting_node->v.element.tag)) {
1715
1843
  parser_add_parse_error(parser, token);
1716
1844
  gumbo_debug("Element not in scope.\n");
1717
1845
  return false;
1718
1846
  }
1847
+
1848
+ // Step 8
1719
1849
  if (formatting_node != get_current_node(parser)) {
1720
1850
  parser_add_parse_error(parser, token); // But continue onwards.
1721
1851
  }
@@ -1723,55 +1853,60 @@ static bool adoption_agency_algorithm(
1723
1853
  assert(!node_html_tag_is(formatting_node, GUMBO_TAG_HTML));
1724
1854
  assert(!node_html_tag_is(formatting_node, GUMBO_TAG_BODY));
1725
1855
 
1726
- // Step 5 & 6.
1856
+ // Step 9 & 10
1727
1857
  GumboNode* furthest_block = NULL;
1728
- for (int j = formatting_node_in_open_elements;
1858
+ for (unsigned int j = formatting_node_in_open_elements;
1729
1859
  j < state->_open_elements.length; ++j) {
1730
1860
  assert(j > 0);
1731
1861
  GumboNode* current = state->_open_elements.data[j];
1732
1862
  if (is_special_node(current)) {
1733
- // Step 5.
1863
+ // Step 9.
1734
1864
  furthest_block = current;
1735
1865
  break;
1736
1866
  }
1737
1867
  }
1738
1868
  if (!furthest_block) {
1739
- // Step 6.
1869
+ // Step 10.
1740
1870
  while (get_current_node(parser) != formatting_node) {
1741
1871
  pop_current_node(parser);
1742
1872
  }
1743
1873
  // And the formatting element itself.
1744
1874
  pop_current_node(parser);
1745
- gumbo_vector_remove(parser, formatting_node,
1746
- &state->_active_formatting_elements);
1875
+ gumbo_vector_remove(
1876
+ parser, formatting_node, &state->_active_formatting_elements);
1747
1877
  return false;
1748
1878
  }
1749
1879
  assert(!node_html_tag_is(furthest_block, GUMBO_TAG_HTML));
1750
1880
  assert(furthest_block);
1751
1881
 
1752
- // Step 7.
1882
+ // Step 11.
1753
1883
  // Elements may be moved and reparented by this algorithm, so
1754
1884
  // common_ancestor is not necessarily the same as formatting_node->parent.
1755
1885
  GumboNode* common_ancestor =
1756
- state->_open_elements.data[gumbo_vector_index_of(
1757
- &state->_open_elements, formatting_node) - 1];
1886
+ state->_open_elements.data[gumbo_vector_index_of(&state->_open_elements,
1887
+ formatting_node) -
1888
+ 1];
1758
1889
  gumbo_debug("Common ancestor tag = %s, furthest block tag = %s.\n",
1759
- gumbo_normalized_tagname(common_ancestor->v.element.tag),
1760
- gumbo_normalized_tagname(furthest_block->v.element.tag));
1890
+ gumbo_normalized_tagname(common_ancestor->v.element.tag),
1891
+ gumbo_normalized_tagname(furthest_block->v.element.tag));
1761
1892
 
1762
- // Step 8.
1893
+ // Step 12.
1763
1894
  int bookmark = gumbo_vector_index_of(
1764
- &state->_active_formatting_elements, formatting_node);;
1765
- // Step 9.
1895
+ &state->_active_formatting_elements, formatting_node) +
1896
+ 1;
1897
+ gumbo_debug("Bookmark at %d.\n", bookmark);
1898
+ // Step 13.
1766
1899
  GumboNode* node = furthest_block;
1767
1900
  GumboNode* last_node = furthest_block;
1768
1901
  // Must be stored explicitly, in case node is removed from the stack of open
1769
1902
  // elements, to handle step 9.4.
1770
1903
  int saved_node_index = gumbo_vector_index_of(&state->_open_elements, node);
1771
1904
  assert(saved_node_index > 0);
1772
- // Step 9.1-9.3 & 9.11.
1773
- for (int j = 0; j < 3; ++j) {
1774
- // Step 9.4.
1905
+ // Step 13.1.
1906
+ for (int j = 0;;) {
1907
+ // Step 13.2.
1908
+ ++j;
1909
+ // Step 13.3.
1775
1910
  int node_index = gumbo_vector_index_of(&state->_open_elements, node);
1776
1911
  gumbo_debug(
1777
1912
  "Current index: %d, last index: %d.\n", node_index, saved_node_index);
@@ -1780,58 +1915,72 @@ static bool adoption_agency_algorithm(
1780
1915
  }
1781
1916
  saved_node_index = --node_index;
1782
1917
  assert(node_index > 0);
1783
- assert(node_index < state->_open_elements.capacity);
1918
+ assert((unsigned int) node_index < state->_open_elements.capacity);
1784
1919
  node = state->_open_elements.data[node_index];
1785
1920
  assert(node->parent);
1786
- // Step 9.5.
1787
- if (gumbo_vector_index_of(
1788
- &state->_active_formatting_elements, node) == -1) {
1921
+ if (node == formatting_node) {
1922
+ // Step 13.4.
1923
+ break;
1924
+ }
1925
+ int formatting_index =
1926
+ gumbo_vector_index_of(&state->_active_formatting_elements, node);
1927
+ if (j > 3 && formatting_index != -1) {
1928
+ // Step 13.5.
1929
+ gumbo_debug("Removing formatting element at %d.\n", formatting_index);
1930
+ gumbo_vector_remove_at(
1931
+ parser, formatting_index, &state->_active_formatting_elements);
1932
+ // Removing the element shifts all indices over by one, so we may need
1933
+ // to move the bookmark.
1934
+ if (formatting_index < bookmark) {
1935
+ --bookmark;
1936
+ gumbo_debug("Moving bookmark to %d.\n", bookmark);
1937
+ }
1938
+ continue;
1939
+ }
1940
+ if (formatting_index == -1) {
1941
+ // Step 13.6.
1789
1942
  gumbo_vector_remove_at(parser, node_index, &state->_open_elements);
1790
1943
  continue;
1791
- } else if (node == formatting_node) {
1792
- // Step 9.6.
1793
- break;
1794
1944
  }
1795
- // Step 9.7.
1796
- int formatting_index = gumbo_vector_index_of(
1797
- &state->_active_formatting_elements, node);
1945
+ // Step 13.7.
1946
+ // "common ancestor as the intended parent" doesn't actually mean insert
1947
+ // it into the common ancestor; that happens below.
1798
1948
  node = clone_node(parser, node, GUMBO_INSERTION_ADOPTION_AGENCY_CLONED);
1949
+ assert(formatting_index >= 0);
1799
1950
  state->_active_formatting_elements.data[formatting_index] = node;
1951
+ assert(node_index >= 0);
1800
1952
  state->_open_elements.data[node_index] = node;
1801
- // Step 9.8.
1953
+ // Step 13.8.
1802
1954
  if (last_node == furthest_block) {
1803
1955
  bookmark = formatting_index + 1;
1804
- assert(bookmark <= state->_active_formatting_elements.length);
1956
+ gumbo_debug("Bookmark moved to %d.\n", bookmark);
1957
+ assert((unsigned int) bookmark <= state->_active_formatting_elements.length);
1805
1958
  }
1806
- // Step 9.9.
1959
+ // Step 13.9.
1807
1960
  last_node->parse_flags |= GUMBO_INSERTION_ADOPTION_AGENCY_MOVED;
1808
1961
  remove_from_parent(parser, last_node);
1809
1962
  append_node(parser, node, last_node);
1810
- // Step 9.10.
1963
+ // Step 13.10.
1811
1964
  last_node = node;
1812
- }
1965
+ } // Step 13.11.
1813
1966
 
1814
- // Step 10.
1967
+ // Step 14.
1815
1968
  gumbo_debug("Removing %s node from parent ",
1816
- gumbo_normalized_tagname(last_node->v.element.tag));
1969
+ gumbo_normalized_tagname(last_node->v.element.tag));
1817
1970
  remove_from_parent(parser, last_node);
1818
1971
  last_node->parse_flags |= GUMBO_INSERTION_ADOPTION_AGENCY_MOVED;
1819
- if (node_tag_in_set(common_ancestor, (gumbo_tagset) { TAG(TABLE), TAG(TBODY),
1820
- TAG(TFOOT), TAG(THEAD), TAG(TR) })) {
1821
- gumbo_debug("and foster-parenting it.\n");
1822
- foster_parent_element(parser, last_node);
1823
- } else {
1824
- gumbo_debug("and inserting it into %s.\n",
1825
- gumbo_normalized_tagname(common_ancestor->v.element.tag));
1826
- append_node(parser, common_ancestor, last_node);
1827
- }
1972
+ InsertionLocation location =
1973
+ get_appropriate_insertion_location(parser, common_ancestor);
1974
+ gumbo_debug("and inserting it into %s.\n",
1975
+ gumbo_normalized_tagname(location.target->v.element.tag));
1976
+ insert_node(parser, last_node, location);
1828
1977
 
1829
- // Step 11.
1978
+ // Step 15.
1830
1979
  GumboNode* new_formatting_node = clone_node(
1831
1980
  parser, formatting_node, GUMBO_INSERTION_ADOPTION_AGENCY_CLONED);
1832
1981
  formatting_node->parse_flags |= GUMBO_INSERTION_IMPLICIT_END_TAG;
1833
1982
 
1834
- // Step 12. Instead of appending nodes one-by-one, we swap the children
1983
+ // Step 16. Instead of appending nodes one-by-one, we swap the children
1835
1984
  // vector of furthest_block with the empty children of new_formatting_node,
1836
1985
  // reducing memory traffic and allocations. We still have to reset their
1837
1986
  // parent pointers, though.
@@ -1841,15 +1990,15 @@ static bool adoption_agency_algorithm(
1841
1990
  furthest_block->v.element.children = temp;
1842
1991
 
1843
1992
  temp = new_formatting_node->v.element.children;
1844
- for (int i = 0; i < temp.length; ++i) {
1993
+ for (unsigned int i = 0; i < temp.length; ++i) {
1845
1994
  GumboNode* child = temp.data[i];
1846
1995
  child->parent = new_formatting_node;
1847
1996
  }
1848
1997
 
1849
- // Step 13.
1998
+ // Step 17.
1850
1999
  append_node(parser, furthest_block, new_formatting_node);
1851
2000
 
1852
- // Step 14.
2001
+ // Step 18.
1853
2002
  // If the formatting node was before the bookmark, it may shift over all
1854
2003
  // indices after it, so we need to explicitly find the index and possibly
1855
2004
  // adjust the bookmark.
@@ -1857,25 +2006,27 @@ static bool adoption_agency_algorithm(
1857
2006
  &state->_active_formatting_elements, formatting_node);
1858
2007
  assert(formatting_node_index != -1);
1859
2008
  if (formatting_node_index < bookmark) {
2009
+ gumbo_debug(
2010
+ "Formatting node at %d is before bookmark at %d; decrementing.\n",
2011
+ formatting_node_index, bookmark);
1860
2012
  --bookmark;
1861
2013
  }
1862
2014
  gumbo_vector_remove_at(
1863
2015
  parser, formatting_node_index, &state->_active_formatting_elements);
1864
2016
  assert(bookmark >= 0);
1865
- assert(bookmark <= state->_active_formatting_elements.length);
2017
+ assert((unsigned int) bookmark <= state->_active_formatting_elements.length);
1866
2018
  gumbo_vector_insert_at(parser, new_formatting_node, bookmark,
1867
- &state->_active_formatting_elements);
2019
+ &state->_active_formatting_elements);
1868
2020
 
1869
- // Step 15.
1870
- gumbo_vector_remove(
1871
- parser, formatting_node, &state->_open_elements);
1872
- int insert_at = gumbo_vector_index_of(
1873
- &state->_open_elements, furthest_block) + 1;
2021
+ // Step 19.
2022
+ gumbo_vector_remove(parser, formatting_node, &state->_open_elements);
2023
+ int insert_at =
2024
+ gumbo_vector_index_of(&state->_open_elements, furthest_block) + 1;
1874
2025
  assert(insert_at >= 0);
1875
- assert(insert_at <= state->_open_elements.length);
2026
+ assert((unsigned int) insert_at <= state->_open_elements.length);
1876
2027
  gumbo_vector_insert_at(
1877
2028
  parser, new_formatting_node, insert_at, &state->_open_elements);
1878
- }
2029
+ } // Step 20.
1879
2030
  return true;
1880
2031
  }
1881
2032
 
@@ -1898,6 +2049,7 @@ static void ignore_token(GumboParser* parser) {
1898
2049
 
1899
2050
  // http://www.whatwg.org/specs/web-apps/current-work/complete/the-end.html
1900
2051
  static void finish_parsing(GumboParser* parser) {
2052
+ gumbo_debug("Finishing parsing");
1901
2053
  maybe_flush_text_node_buffer(parser);
1902
2054
  GumboParserState* state = parser->_parser_state;
1903
2055
  for (GumboNode* node = pop_current_node(parser); node;
@@ -1908,7 +2060,8 @@ static void finish_parsing(GumboParser* parser) {
1908
2060
  }
1909
2061
  node->parse_flags |= GUMBO_INSERTION_IMPLICIT_END_TAG;
1910
2062
  }
1911
- while (pop_current_node(parser)); // Pop them all.
2063
+ while (pop_current_node(parser))
2064
+ ; // Pop them all.
1912
2065
  }
1913
2066
 
1914
2067
  static bool handle_initial(GumboParser* parser, GumboToken* token) {
@@ -1952,9 +2105,9 @@ static bool handle_before_html(GumboParser* parser, GumboToken* token) {
1952
2105
  parser->_output->root = html_node;
1953
2106
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_BEFORE_HEAD);
1954
2107
  return true;
1955
- } else if (token->type == GUMBO_TOKEN_END_TAG &&
1956
- !tag_in(token, false, (gumbo_tagset) { TAG(HEAD), TAG(BODY), TAG(HTML),
1957
- TAG(BR) } )) {
2108
+ } else if (token->type == GUMBO_TOKEN_END_TAG &&
2109
+ !tag_in(token, false,
2110
+ (gumbo_tagset){TAG(HEAD), TAG(BODY), TAG(HTML), TAG(BR)})) {
1958
2111
  parser_add_parse_error(parser, token);
1959
2112
  ignore_token(parser);
1960
2113
  return false;
@@ -1986,9 +2139,9 @@ static bool handle_before_head(GumboParser* parser, GumboToken* token) {
1986
2139
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_HEAD);
1987
2140
  parser->_parser_state->_head_element = node;
1988
2141
  return true;
1989
- } else if (token->type == GUMBO_TOKEN_END_TAG &&
1990
- !tag_in(token, false, (gumbo_tagset) { TAG(HEAD), TAG(BODY), TAG(HTML),
1991
- TAG(BR) })) {
2142
+ } else if (token->type == GUMBO_TOKEN_END_TAG &&
2143
+ !tag_in(token, false,
2144
+ (gumbo_tagset){TAG(HEAD), TAG(BODY), TAG(HTML), TAG(BR)})) {
1992
2145
  parser_add_parse_error(parser, token);
1993
2146
  ignore_token(parser);
1994
2147
  return false;
@@ -2020,8 +2173,9 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
2020
2173
  return true;
2021
2174
  } else if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
2022
2175
  return handle_in_body(parser, token);
2023
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(BASE), TAG(BASEFONT),
2024
- TAG(BGSOUND), TAG(MENUITEM), TAG(LINK) })) {
2176
+ } else if (tag_in(token, kStartTag,
2177
+ (gumbo_tagset){TAG(BASE), TAG(BASEFONT), TAG(BGSOUND),
2178
+ TAG(MENUITEM), TAG(LINK)})) {
2025
2179
  insert_element_from_token(parser, token);
2026
2180
  pop_current_node(parser);
2027
2181
  acknowledge_self_closing_tag(parser);
@@ -2038,7 +2192,8 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
2038
2192
  } else if (tag_is(token, kStartTag, GUMBO_TAG_TITLE)) {
2039
2193
  run_generic_parsing_algorithm(parser, token, GUMBO_LEX_RCDATA);
2040
2194
  return true;
2041
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(NOFRAMES), TAG(STYLE) })) {
2195
+ } else if (tag_in(
2196
+ token, kStartTag, (gumbo_tagset){TAG(NOFRAMES), TAG(STYLE)})) {
2042
2197
  run_generic_parsing_algorithm(parser, token, GUMBO_LEX_RAWTEXT);
2043
2198
  return true;
2044
2199
  } else if (tag_is(token, kStartTag, GUMBO_TAG_NOSCRIPT)) {
@@ -2054,29 +2209,48 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
2054
2209
  assert(node_html_tag_is(head, GUMBO_TAG_HEAD));
2055
2210
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_HEAD);
2056
2211
  return true;
2057
- } else if (tag_is(token, kStartTag, GUMBO_TAG_HEAD)) {
2058
- parser_add_parse_error(parser, token);
2059
- ignore_token(parser);
2060
- return false;
2212
+ } else if (tag_in(token, kEndTag,
2213
+ (gumbo_tagset){TAG(BODY), TAG(HTML), TAG(BR)})) {
2214
+ pop_current_node(parser);
2215
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_HEAD);
2216
+ parser->_parser_state->_reprocess_current_token = true;
2217
+ return true;
2218
+ } else if (tag_is(token, kStartTag, GUMBO_TAG_TEMPLATE)) {
2219
+ insert_element_from_token(parser, token);
2220
+ add_formatting_element(parser, &kActiveFormattingScopeMarker);
2221
+ parser->_parser_state->_frameset_ok = false;
2222
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TEMPLATE);
2223
+ push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TEMPLATE);
2224
+ return true;
2225
+ } else if (tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
2226
+ if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2227
+ parser_add_parse_error(parser, token);
2228
+ ignore_token(parser);
2229
+ return false;
2230
+ }
2231
+ generate_all_implied_end_tags_thoroughly(parser);
2232
+ bool success = true;
2233
+ if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_TEMPLATE)) {
2234
+ parser_add_parse_error(parser, token);
2235
+ success = false;
2236
+ }
2237
+ while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_TEMPLATE))
2238
+ ;
2239
+ clear_active_formatting_elements(parser);
2240
+ pop_template_insertion_mode(parser);
2241
+ reset_insertion_mode_appropriately(parser);
2242
+ return success;
2061
2243
  } else if (tag_is(token, kStartTag, GUMBO_TAG_HEAD) ||
2062
- (token->type == GUMBO_TOKEN_END_TAG &&
2063
- !tag_in(token, kEndTag, (gumbo_tagset) { TAG(BODY), TAG(HTML),
2064
- TAG(BR) }))) {
2065
- parser_add_parse_error(parser, token);
2066
- return false;
2067
- } else if (tag_is(token, kStartTag, GUMBO_TAG_UNKNOWN) && token->v.start_tag.is_self_closing) {
2244
+ (token->type == GUMBO_TOKEN_END_TAG)) {
2068
2245
  parser_add_parse_error(parser, token);
2069
2246
  ignore_token(parser);
2070
2247
  return false;
2071
2248
  } else {
2072
- const GumboNode* node = pop_current_node(parser);
2073
- assert(node_html_tag_is(node, GUMBO_TAG_HEAD));
2074
- AVOID_UNUSED_VARIABLE_WARNING(node);
2249
+ pop_current_node(parser);
2075
2250
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_HEAD);
2076
2251
  parser->_parser_state->_reprocess_current_token = true;
2077
2252
  return true;
2078
2253
  }
2079
-
2080
2254
  return true;
2081
2255
  }
2082
2256
 
@@ -2095,12 +2269,14 @@ static bool handle_in_head_noscript(GumboParser* parser, GumboToken* token) {
2095
2269
  return true;
2096
2270
  } else if (token->type == GUMBO_TOKEN_WHITESPACE ||
2097
2271
  token->type == GUMBO_TOKEN_COMMENT ||
2098
- tag_in(token, kStartTag, (gumbo_tagset) { TAG(BASEFONT), TAG(BGSOUND),
2099
- TAG(LINK), TAG(META), TAG(NOFRAMES), TAG(STYLE) })) {
2100
- return handle_in_head(parser, token);
2101
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(HEAD), TAG(NOSCRIPT) }) ||
2102
- (token->type == GUMBO_TOKEN_END_TAG &&
2103
- !tag_is(token, kEndTag, GUMBO_TAG_BR))) {
2272
+ tag_in(token, kStartTag,
2273
+ (gumbo_tagset){TAG(BASEFONT), TAG(BGSOUND), TAG(LINK),
2274
+ TAG(META), TAG(NOFRAMES), TAG(STYLE)})) {
2275
+ return handle_in_head(parser, token);
2276
+ } else if (tag_in(
2277
+ token, kStartTag, (gumbo_tagset){TAG(HEAD), TAG(NOSCRIPT)}) ||
2278
+ (token->type == GUMBO_TOKEN_END_TAG &&
2279
+ !tag_is(token, kEndTag, GUMBO_TAG_BR))) {
2104
2280
  parser_add_parse_error(parser, token);
2105
2281
  ignore_token(parser);
2106
2282
  return false;
@@ -2139,10 +2315,10 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
2139
2315
  insert_element_from_token(parser, token);
2140
2316
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_FRAMESET);
2141
2317
  return true;
2142
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(BASE), TAG(BASEFONT),
2143
- TAG(BGSOUND), TAG(LINK), TAG(META),
2144
- TAG(NOFRAMES), TAG(SCRIPT), TAG(STYLE),
2145
- TAG(TITLE) })) {
2318
+ } else if (tag_in(token, kStartTag,
2319
+ (gumbo_tagset){TAG(BASE), TAG(BASEFONT), TAG(BGSOUND),
2320
+ TAG(LINK), TAG(META), TAG(NOFRAMES), TAG(SCRIPT),
2321
+ TAG(STYLE), TAG(TEMPLATE), TAG(TITLE)})) {
2146
2322
  parser_add_parse_error(parser, token);
2147
2323
  assert(state->_head_element != NULL);
2148
2324
  // This must be flushed before we push the head element on, as there may be
@@ -2152,9 +2328,12 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
2152
2328
  bool result = handle_in_head(parser, token);
2153
2329
  gumbo_vector_remove(parser, state->_head_element, &state->_open_elements);
2154
2330
  return result;
2331
+ } else if (tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
2332
+ return handle_in_head(parser, token);
2155
2333
  } else if (tag_is(token, kStartTag, GUMBO_TAG_HEAD) ||
2156
- (token->type == GUMBO_TOKEN_END_TAG &&
2157
- !tag_in(token, kEndTag, (gumbo_tagset) { TAG(BODY), TAG(HTML), TAG(BR) }))) {
2334
+ (token->type == GUMBO_TOKEN_END_TAG &&
2335
+ !tag_in(token, kEndTag,
2336
+ (gumbo_tagset){TAG(BODY), TAG(HTML), TAG(BR)}))) {
2158
2337
  parser_add_parse_error(parser, token);
2159
2338
  ignore_token(parser);
2160
2339
  return false;
@@ -2168,24 +2347,23 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
2168
2347
 
2169
2348
  static void destroy_node(GumboParser* parser, GumboNode* node) {
2170
2349
  switch (node->type) {
2171
- case GUMBO_NODE_DOCUMENT:
2172
- {
2173
- GumboDocument* doc = &node->v.document;
2174
- for (int i = 0; i < doc->children.length; ++i) {
2175
- destroy_node(parser, doc->children.data[i]);
2176
- }
2177
- gumbo_parser_deallocate(parser, (void*) doc->children.data);
2178
- gumbo_parser_deallocate(parser, (void*) doc->name);
2179
- gumbo_parser_deallocate(parser, (void*) doc->public_identifier);
2180
- gumbo_parser_deallocate(parser, (void*) doc->system_identifier);
2350
+ case GUMBO_NODE_DOCUMENT: {
2351
+ GumboDocument* doc = &node->v.document;
2352
+ for (unsigned int i = 0; i < doc->children.length; ++i) {
2353
+ destroy_node(parser, doc->children.data[i]);
2181
2354
  }
2182
- break;
2355
+ gumbo_parser_deallocate(parser, (void*) doc->children.data);
2356
+ gumbo_parser_deallocate(parser, (void*) doc->name);
2357
+ gumbo_parser_deallocate(parser, (void*) doc->public_identifier);
2358
+ gumbo_parser_deallocate(parser, (void*) doc->system_identifier);
2359
+ } break;
2360
+ case GUMBO_NODE_TEMPLATE:
2183
2361
  case GUMBO_NODE_ELEMENT:
2184
- for (int i = 0; i < node->v.element.attributes.length; ++i) {
2362
+ for (unsigned int i = 0; i < node->v.element.attributes.length; ++i) {
2185
2363
  gumbo_destroy_attribute(parser, node->v.element.attributes.data[i]);
2186
2364
  }
2187
2365
  gumbo_parser_deallocate(parser, node->v.element.attributes.data);
2188
- for (int i = 0; i < node->v.element.children.length; ++i) {
2366
+ for (unsigned int i = 0; i < node->v.element.children.length; ++i) {
2189
2367
  destroy_node(parser, node->v.element.children.data[i]);
2190
2368
  }
2191
2369
  gumbo_parser_deallocate(parser, node->v.element.children.data);
@@ -2226,20 +2404,26 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2226
2404
  ignore_token(parser);
2227
2405
  return false;
2228
2406
  } else if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
2407
+ parser_add_parse_error(parser, token);
2408
+ if (has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2409
+ ignore_token(parser);
2410
+ return false;
2411
+ }
2229
2412
  assert(parser->_output->root != NULL);
2230
2413
  assert(parser->_output->root->type == GUMBO_NODE_ELEMENT);
2231
- parser_add_parse_error(parser, token);
2232
2414
  merge_attributes(parser, token, parser->_output->root);
2233
2415
  return false;
2234
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(BASE), TAG(BASEFONT),
2235
- TAG(BGSOUND), TAG(MENUITEM), TAG(LINK),
2236
- TAG(META), TAG(NOFRAMES), TAG(SCRIPT),
2237
- TAG(STYLE), TAG(TITLE) } )) {
2416
+ } else if (tag_in(token, kStartTag,
2417
+ (gumbo_tagset){TAG(BASE), TAG(BASEFONT), TAG(BGSOUND),
2418
+ TAG(MENUITEM), TAG(LINK), TAG(META), TAG(NOFRAMES),
2419
+ TAG(SCRIPT), TAG(STYLE), TAG(TEMPLATE), TAG(TITLE)}) ||
2420
+ tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
2238
2421
  return handle_in_head(parser, token);
2239
2422
  } else if (tag_is(token, kStartTag, GUMBO_TAG_BODY)) {
2240
2423
  parser_add_parse_error(parser, token);
2241
2424
  if (state->_open_elements.length < 2 ||
2242
- !node_html_tag_is(state->_open_elements.data[1], GUMBO_TAG_BODY)) {
2425
+ !node_html_tag_is(state->_open_elements.data[1], GUMBO_TAG_BODY) ||
2426
+ has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2243
2427
  ignore_token(parser);
2244
2428
  return false;
2245
2429
  }
@@ -2273,7 +2457,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2273
2457
  // Remove the body node. We may want to factor this out into a generic
2274
2458
  // helper, but right now this is the only code that needs to do this.
2275
2459
  GumboVector* children = &parser->_output->root->v.element.children;
2276
- for (int i = 0; i < children->length; ++i) {
2460
+ for (unsigned int i = 0; i < children->length; ++i) {
2277
2461
  if (children->data[i] == body_node) {
2278
2462
  gumbo_vector_remove_at(parser, i, children);
2279
2463
  break;
@@ -2286,27 +2470,32 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2286
2470
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_FRAMESET);
2287
2471
  return true;
2288
2472
  } else if (token->type == GUMBO_TOKEN_EOF) {
2289
- for (int i = 0; i < state->_open_elements.length; ++i) {
2290
- if (!node_tag_in_set(state->_open_elements.data[i], (gumbo_tagset) { TAG(DD),
2291
- TAG(DT), TAG(LI), TAG(P), TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH),
2292
- TAG(THEAD), TAG(TR), TAG(BODY), TAG(HTML) } )) {
2473
+ for (unsigned int i = 0; i < state->_open_elements.length; ++i) {
2474
+ if (!node_tag_in_set(state->_open_elements.data[i],
2475
+ (gumbo_tagset){TAG(DD), TAG(DT), TAG(LI), TAG(P), TAG(TBODY),
2476
+ TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD), TAG(TR), TAG(BODY),
2477
+ TAG(HTML)})) {
2293
2478
  parser_add_parse_error(parser, token);
2294
- return false;
2295
2479
  }
2296
2480
  }
2481
+ if (get_current_template_insertion_mode(parser) !=
2482
+ GUMBO_INSERTION_MODE_INITIAL) {
2483
+ return handle_in_template(parser, token);
2484
+ }
2297
2485
  return true;
2298
- } else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(BODY), TAG(HTML) })) {
2486
+ } else if (tag_in(token, kEndTag, (gumbo_tagset){TAG(BODY), TAG(HTML)})) {
2299
2487
  if (!has_an_element_in_scope(parser, GUMBO_TAG_BODY)) {
2300
2488
  parser_add_parse_error(parser, token);
2301
2489
  ignore_token(parser);
2302
2490
  return false;
2303
2491
  }
2304
2492
  bool success = true;
2305
- for (int i = 0; i < state->_open_elements.length; ++i) {
2306
- if (!node_tag_in_set(state->_open_elements.data[i], (gumbo_tagset) { TAG(DD),
2307
- TAG(DT), TAG(LI), TAG(OPTGROUP), TAG(OPTION), TAG(P), TAG(RP),
2308
- TAG(RT), TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD),
2309
- TAG(TR), TAG(BODY), TAG(HTML) })) {
2493
+ for (unsigned int i = 0; i < state->_open_elements.length; ++i) {
2494
+ if (!node_tag_in_set(state->_open_elements.data[i],
2495
+ (gumbo_tagset){TAG(DD), TAG(DT), TAG(LI), TAG(OPTGROUP),
2496
+ TAG(OPTION), TAG(P), TAG(RB), TAG(RP), TAG(RT), TAG(RTC),
2497
+ TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD), TAG(TR),
2498
+ TAG(BODY), TAG(HTML)})) {
2310
2499
  parser_add_parse_error(parser, token);
2311
2500
  success = false;
2312
2501
  break;
@@ -2321,48 +2510,54 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2321
2510
  record_end_of_element(state->_current_token, &body->v.element);
2322
2511
  }
2323
2512
  return success;
2324
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(ADDRESS), TAG(ARTICLE),
2325
- TAG(ASIDE), TAG(BLOCKQUOTE), TAG(CENTER), TAG(DETAILS),
2326
- TAG(DIR), TAG(DIV), TAG(DL), TAG(FIELDSET), TAG(FIGCAPTION),
2327
- TAG(FIGURE), TAG(FOOTER), TAG(HEADER), TAG(HGROUP), TAG(MENU),
2328
- TAG(NAV), TAG(OL), TAG(P), TAG(SECTION), TAG(SUMMARY), TAG(UL) })) {
2513
+ } else if (tag_in(token, kStartTag,
2514
+ (gumbo_tagset){TAG(ADDRESS), TAG(ARTICLE), TAG(ASIDE),
2515
+ TAG(BLOCKQUOTE), TAG(CENTER), TAG(DETAILS), TAG(DIR),
2516
+ TAG(DIV), TAG(DL), TAG(FIELDSET), TAG(FIGCAPTION),
2517
+ TAG(FIGURE), TAG(FOOTER), TAG(HEADER), TAG(HGROUP),
2518
+ TAG(MENU), TAG(MAIN), TAG(NAV), TAG(OL), TAG(P),
2519
+ TAG(SECTION), TAG(SUMMARY), TAG(UL)})) {
2329
2520
  bool result = maybe_implicitly_close_p_tag(parser, token);
2330
2521
  insert_element_from_token(parser, token);
2331
2522
  return result;
2332
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(H1), TAG(H2), TAG(H3),
2333
- TAG(H4), TAG(H5), TAG(H6) })) {
2523
+ } else if (tag_in(token, kStartTag, (gumbo_tagset){TAG(H1), TAG(H2), TAG(H3),
2524
+ TAG(H4), TAG(H5), TAG(H6)})) {
2334
2525
  bool result = maybe_implicitly_close_p_tag(parser, token);
2335
- if (node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(H1), TAG(H2),
2336
- TAG(H3), TAG(H4), TAG(H5), TAG(H6) })) {
2526
+ if (node_tag_in_set(
2527
+ get_current_node(parser), (gumbo_tagset){TAG(H1), TAG(H2), TAG(H3),
2528
+ TAG(H4), TAG(H5), TAG(H6)})) {
2337
2529
  parser_add_parse_error(parser, token);
2338
2530
  pop_current_node(parser);
2339
2531
  result = false;
2340
2532
  }
2341
2533
  insert_element_from_token(parser, token);
2342
2534
  return result;
2343
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(PRE), TAG(LISTING) })) {
2535
+ } else if (tag_in(token, kStartTag, (gumbo_tagset){TAG(PRE), TAG(LISTING)})) {
2344
2536
  bool result = maybe_implicitly_close_p_tag(parser, token);
2345
2537
  insert_element_from_token(parser, token);
2346
2538
  state->_ignore_next_linefeed = true;
2347
2539
  state->_frameset_ok = false;
2348
2540
  return result;
2349
2541
  } else if (tag_is(token, kStartTag, GUMBO_TAG_FORM)) {
2350
- if (state->_form_element != NULL) {
2542
+ if (state->_form_element != NULL &&
2543
+ !has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2351
2544
  gumbo_debug("Ignoring nested form.\n");
2352
2545
  parser_add_parse_error(parser, token);
2353
2546
  ignore_token(parser);
2354
2547
  return false;
2355
2548
  }
2356
2549
  bool result = maybe_implicitly_close_p_tag(parser, token);
2357
- state->_form_element =
2358
- insert_element_from_token(parser, token);
2550
+ GumboNode* form_element = insert_element_from_token(parser, token);
2551
+ if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2552
+ state->_form_element = form_element;
2553
+ }
2359
2554
  return result;
2360
2555
  } else if (tag_is(token, kStartTag, GUMBO_TAG_LI)) {
2361
2556
  maybe_implicitly_close_list_tag(parser, token, true);
2362
2557
  bool result = maybe_implicitly_close_p_tag(parser, token);
2363
2558
  insert_element_from_token(parser, token);
2364
2559
  return result;
2365
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(DD), TAG(DT) })) {
2560
+ } else if (tag_in(token, kStartTag, (gumbo_tagset){TAG(DD), TAG(DT)})) {
2366
2561
  maybe_implicitly_close_list_tag(parser, token, false);
2367
2562
  bool result = maybe_implicitly_close_p_tag(parser, token);
2368
2563
  insert_element_from_token(parser, token);
@@ -2375,7 +2570,8 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2375
2570
  } else if (tag_is(token, kStartTag, GUMBO_TAG_BUTTON)) {
2376
2571
  if (has_an_element_in_scope(parser, GUMBO_TAG_BUTTON)) {
2377
2572
  parser_add_parse_error(parser, token);
2378
- implicitly_close_tags(parser, token, GUMBO_NAMESPACE_HTML, GUMBO_TAG_BUTTON);
2573
+ implicitly_close_tags(
2574
+ parser, token, GUMBO_NAMESPACE_HTML, GUMBO_TAG_BUTTON);
2379
2575
  state->_reprocess_current_token = true;
2380
2576
  return false;
2381
2577
  }
@@ -2383,45 +2579,63 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2383
2579
  insert_element_from_token(parser, token);
2384
2580
  state->_frameset_ok = false;
2385
2581
  return true;
2386
- } else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(ADDRESS), TAG(ARTICLE),
2387
- TAG(ASIDE), TAG(BLOCKQUOTE), TAG(BUTTON), TAG(CENTER), TAG(DETAILS),
2388
- TAG(DIR), TAG(DIV), TAG(DL), TAG(FIELDSET), TAG(FIGCAPTION),
2389
- TAG(FIGURE), TAG(FOOTER), TAG(HEADER), TAG(HGROUP), TAG(LISTING),
2390
- TAG(MENU), TAG(NAV), TAG(OL), TAG(PRE),
2391
- TAG(SECTION), TAG(SUMMARY), TAG(UL) })) {
2582
+ } else if (tag_in(token, kEndTag,
2583
+ (gumbo_tagset){TAG(ADDRESS), TAG(ARTICLE), TAG(ASIDE),
2584
+ TAG(BLOCKQUOTE), TAG(BUTTON), TAG(CENTER), TAG(DETAILS),
2585
+ TAG(DIR), TAG(DIV), TAG(DL), TAG(FIELDSET),
2586
+ TAG(FIGCAPTION), TAG(FIGURE), TAG(FOOTER), TAG(HEADER),
2587
+ TAG(HGROUP), TAG(LISTING), TAG(MAIN), TAG(MENU), TAG(NAV),
2588
+ TAG(OL), TAG(PRE), TAG(SECTION), TAG(SUMMARY), TAG(UL)})) {
2392
2589
  GumboTag tag = token->v.end_tag;
2393
2590
  if (!has_an_element_in_scope(parser, tag)) {
2394
2591
  parser_add_parse_error(parser, token);
2395
2592
  ignore_token(parser);
2396
2593
  return false;
2397
2594
  }
2398
- implicitly_close_tags(parser, token, GUMBO_NAMESPACE_HTML, token->v.end_tag);
2595
+ implicitly_close_tags(
2596
+ parser, token, GUMBO_NAMESPACE_HTML, token->v.end_tag);
2399
2597
  return true;
2400
2598
  } else if (tag_is(token, kEndTag, GUMBO_TAG_FORM)) {
2401
- bool result = true;
2402
- const GumboNode* node = state->_form_element;
2403
- assert(!node || node->type == GUMBO_NODE_ELEMENT);
2404
- state->_form_element = NULL;
2405
- if (!node || !has_node_in_scope(parser, node)) {
2406
- gumbo_debug("Closing an unopened form.\n");
2407
- parser_add_parse_error(parser, token);
2408
- ignore_token(parser);
2409
- return false;
2410
- }
2411
- // This differs from implicitly_close_tags because we remove *only* the
2412
- // <form> element; other nodes are left in scope.
2413
- generate_implied_end_tags(parser, GUMBO_TAG_LAST);
2414
- if (get_current_node(parser) != node) {
2415
- parser_add_parse_error(parser, token);
2416
- result = false;
2417
- }
2599
+ if (has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2600
+ if (!has_an_element_in_scope(parser, GUMBO_TAG_FORM)) {
2601
+ parser_add_parse_error(parser, token);
2602
+ ignore_token(parser);
2603
+ return false;
2604
+ }
2605
+ bool success = true;
2606
+ generate_implied_end_tags(parser, GUMBO_TAG_LAST);
2607
+ if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_FORM)) {
2608
+ parser_add_parse_error(parser, token);
2609
+ return false;
2610
+ }
2611
+ while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_FORM))
2612
+ ;
2613
+ return success;
2614
+ } else {
2615
+ bool result = true;
2616
+ const GumboNode* node = state->_form_element;
2617
+ assert(!node || node->type == GUMBO_NODE_ELEMENT);
2618
+ state->_form_element = NULL;
2619
+ if (!node || !has_node_in_scope(parser, node)) {
2620
+ gumbo_debug("Closing an unopened form.\n");
2621
+ parser_add_parse_error(parser, token);
2622
+ ignore_token(parser);
2623
+ return false;
2624
+ }
2625
+ // This differs from implicitly_close_tags because we remove *only* the
2626
+ // <form> element; other nodes are left in scope.
2627
+ generate_implied_end_tags(parser, GUMBO_TAG_LAST);
2628
+ if (get_current_node(parser) != node) {
2629
+ parser_add_parse_error(parser, token);
2630
+ result = false;
2631
+ }
2418
2632
 
2419
- GumboVector* open_elements = &state->_open_elements;
2420
- int index = open_elements->length - 1;
2421
- for (; index >= 0 && open_elements->data[index] != node; --index);
2422
- assert(index >= 0);
2423
- gumbo_vector_remove_at(parser, index, open_elements);
2424
- return result;
2633
+ GumboVector* open_elements = &state->_open_elements;
2634
+ int index = gumbo_vector_index_of(open_elements, node);
2635
+ assert(index >= 0);
2636
+ gumbo_vector_remove_at(parser, index, open_elements);
2637
+ return result;
2638
+ }
2425
2639
  } else if (tag_is(token, kEndTag, GUMBO_TAG_P)) {
2426
2640
  if (!has_an_element_in_button_scope(parser, GUMBO_TAG_P)) {
2427
2641
  parser_add_parse_error(parser, token);
@@ -2431,15 +2645,17 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2431
2645
  state->_reprocess_current_token = true;
2432
2646
  return false;
2433
2647
  }
2434
- return implicitly_close_tags(parser, token, GUMBO_NAMESPACE_HTML, GUMBO_TAG_P);
2648
+ return implicitly_close_tags(
2649
+ parser, token, GUMBO_NAMESPACE_HTML, GUMBO_TAG_P);
2435
2650
  } else if (tag_is(token, kEndTag, GUMBO_TAG_LI)) {
2436
2651
  if (!has_an_element_in_list_scope(parser, GUMBO_TAG_LI)) {
2437
2652
  parser_add_parse_error(parser, token);
2438
2653
  ignore_token(parser);
2439
2654
  return false;
2440
2655
  }
2441
- return implicitly_close_tags(parser, token, GUMBO_NAMESPACE_HTML, GUMBO_TAG_LI);
2442
- } else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(DD), TAG(DT) })) {
2656
+ return implicitly_close_tags(
2657
+ parser, token, GUMBO_NAMESPACE_HTML, GUMBO_TAG_LI);
2658
+ } else if (tag_in(token, kEndTag, (gumbo_tagset){TAG(DD), TAG(DT)})) {
2443
2659
  assert(token->type == GUMBO_TOKEN_END_TAG);
2444
2660
  GumboTag token_tag = token->v.end_tag;
2445
2661
  if (!has_an_element_in_scope(parser, token_tag)) {
@@ -2447,11 +2663,13 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2447
2663
  ignore_token(parser);
2448
2664
  return false;
2449
2665
  }
2450
- return implicitly_close_tags(parser, token, GUMBO_NAMESPACE_HTML, token_tag);
2451
- } else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(H1), TAG(H2), TAG(H3),
2452
- TAG(H4), TAG(H5), TAG(H6) })) {
2453
- if (!has_an_element_in_scope_with_tagname(parser, (gumbo_tagset) { TAG(H1), TAG(H2), TAG(H3), TAG(H4),
2454
- TAG(H5), TAG(H6) })) {
2666
+ return implicitly_close_tags(
2667
+ parser, token, GUMBO_NAMESPACE_HTML, token_tag);
2668
+ } else if (tag_in(token, kEndTag, (gumbo_tagset){TAG(H1), TAG(H2), TAG(H3),
2669
+ TAG(H4), TAG(H5), TAG(H6)})) {
2670
+ if (!has_an_element_in_scope_with_tagname(
2671
+ parser, 6, (GumboTag[]){GUMBO_TAG_H1, GUMBO_TAG_H2, GUMBO_TAG_H3,
2672
+ GUMBO_TAG_H4, GUMBO_TAG_H5, GUMBO_TAG_H6})) {
2455
2673
  // No heading open; ignore the token entirely.
2456
2674
  parser_add_parse_error(parser, token);
2457
2675
  ignore_token(parser);
@@ -2469,8 +2687,9 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2469
2687
  }
2470
2688
  do {
2471
2689
  current_node = pop_current_node(parser);
2472
- } while (!node_tag_in_set(current_node, (gumbo_tagset) { TAG(H1), TAG(H2),
2473
- TAG(H3), TAG(H4), TAG(H5), TAG(H6) } ));
2690
+ } while (!node_tag_in_set(
2691
+ current_node, (gumbo_tagset){TAG(H1), TAG(H2), TAG(H3),
2692
+ TAG(H4), TAG(H5), TAG(H6)}));
2474
2693
  return success;
2475
2694
  }
2476
2695
  } else if (tag_is(token, kStartTag, GUMBO_TAG_A)) {
@@ -2488,18 +2707,17 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2488
2707
  if (find_last_anchor_index(parser, &last_a)) {
2489
2708
  void* last_element = gumbo_vector_remove_at(
2490
2709
  parser, last_a, &state->_active_formatting_elements);
2491
- gumbo_vector_remove(
2492
- parser, last_element, &state->_open_elements);
2710
+ gumbo_vector_remove(parser, last_element, &state->_open_elements);
2493
2711
  }
2494
2712
  success = false;
2495
2713
  }
2496
2714
  reconstruct_active_formatting_elements(parser);
2497
2715
  add_formatting_element(parser, insert_element_from_token(parser, token));
2498
2716
  return success;
2499
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(B), TAG(BIG),
2500
- TAG(CODE), TAG(EM), TAG(FONT), TAG(I),
2501
- TAG(S), TAG(SMALL), TAG(STRIKE),
2502
- TAG(STRONG), TAG(TT), TAG(U) })) {
2717
+ } else if (tag_in(token, kStartTag,
2718
+ (gumbo_tagset){TAG(B), TAG(BIG), TAG(CODE), TAG(EM), TAG(FONT),
2719
+ TAG(I), TAG(S), TAG(SMALL), TAG(STRIKE), TAG(STRONG),
2720
+ TAG(TT), TAG(U)})) {
2503
2721
  reconstruct_active_formatting_elements(parser);
2504
2722
  add_formatting_element(parser, insert_element_from_token(parser, token));
2505
2723
  return true;
@@ -2515,20 +2733,20 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2515
2733
  insert_element_from_token(parser, token);
2516
2734
  add_formatting_element(parser, get_current_node(parser));
2517
2735
  return result;
2518
- } else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(A), TAG(B), TAG(BIG),
2519
- TAG(CODE), TAG(EM), TAG(FONT), TAG(I),
2520
- TAG(NOBR), TAG(S), TAG(SMALL),
2521
- TAG(STRIKE), TAG(STRONG), TAG(TT), TAG(U) })) {
2736
+ } else if (tag_in(token, kEndTag,
2737
+ (gumbo_tagset){TAG(A), TAG(B), TAG(BIG), TAG(CODE), TAG(EM),
2738
+ TAG(FONT), TAG(I), TAG(NOBR), TAG(S), TAG(SMALL),
2739
+ TAG(STRIKE), TAG(STRONG), TAG(TT), TAG(U)})) {
2522
2740
  return adoption_agency_algorithm(parser, token, token->v.end_tag);
2523
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(APPLET), TAG(MARQUEE),
2524
- TAG(OBJECT) })) {
2741
+ } else if (tag_in(token, kStartTag,
2742
+ (gumbo_tagset){TAG(APPLET), TAG(MARQUEE), TAG(OBJECT)})) {
2525
2743
  reconstruct_active_formatting_elements(parser);
2526
2744
  insert_element_from_token(parser, token);
2527
2745
  add_formatting_element(parser, &kActiveFormattingScopeMarker);
2528
2746
  set_frameset_not_ok(parser);
2529
2747
  return true;
2530
- } else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(APPLET), TAG(MARQUEE),
2531
- TAG(OBJECT) })) {
2748
+ } else if (tag_in(token, kEndTag,
2749
+ (gumbo_tagset){TAG(APPLET), TAG(MARQUEE), TAG(OBJECT)})) {
2532
2750
  GumboTag token_tag = token->v.end_tag;
2533
2751
  if (!has_an_element_in_table_scope(parser, token_tag)) {
2534
2752
  parser_add_parse_error(parser, token);
@@ -2547,8 +2765,9 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2547
2765
  set_frameset_not_ok(parser);
2548
2766
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
2549
2767
  return true;
2550
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(AREA), TAG(BR),
2551
- TAG(EMBED), TAG(IMG), TAG(IMAGE), TAG(KEYGEN), TAG(WBR) })) {
2768
+ } else if (tag_in(token, kStartTag,
2769
+ (gumbo_tagset){TAG(AREA), TAG(BR), TAG(EMBED), TAG(IMG),
2770
+ TAG(IMAGE), TAG(KEYGEN), TAG(WBR)})) {
2552
2771
  bool success = true;
2553
2772
  if (tag_is(token, kStartTag, GUMBO_TAG_IMAGE)) {
2554
2773
  success = false;
@@ -2578,7 +2797,8 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2578
2797
  pop_current_node(parser);
2579
2798
  acknowledge_self_closing_tag(parser);
2580
2799
  return true;
2581
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(PARAM), TAG(SOURCE), TAG(TRACK) })) {
2800
+ } else if (tag_in(token, kStartTag,
2801
+ (gumbo_tagset){TAG(PARAM), TAG(SOURCE), TAG(TRACK)})) {
2582
2802
  insert_element_from_token(parser, token);
2583
2803
  pop_current_node(parser);
2584
2804
  acknowledge_self_closing_tag(parser);
@@ -2592,7 +2812,8 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2592
2812
  return result;
2593
2813
  } else if (tag_is(token, kStartTag, GUMBO_TAG_ISINDEX)) {
2594
2814
  parser_add_parse_error(parser, token);
2595
- if (parser->_parser_state->_form_element != NULL) {
2815
+ if (parser->_parser_state->_form_element != NULL &&
2816
+ !has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2596
2817
  ignore_token(parser);
2597
2818
  return false;
2598
2819
  }
@@ -2607,15 +2828,18 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2607
2828
 
2608
2829
  GumboNode* form = insert_element_of_tag_type(
2609
2830
  parser, GUMBO_TAG_FORM, GUMBO_INSERTION_FROM_ISINDEX);
2831
+ if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2832
+ parser->_parser_state->_form_element = form;
2833
+ }
2610
2834
  if (action_attr) {
2611
2835
  gumbo_vector_add(parser, action_attr, &form->v.element.attributes);
2612
2836
  }
2613
- insert_element_of_tag_type(parser, GUMBO_TAG_HR,
2614
- GUMBO_INSERTION_FROM_ISINDEX);
2615
- pop_current_node(parser); // <hr>
2837
+ insert_element_of_tag_type(
2838
+ parser, GUMBO_TAG_HR, GUMBO_INSERTION_FROM_ISINDEX);
2839
+ pop_current_node(parser); // <hr>
2616
2840
 
2617
- insert_element_of_tag_type(parser, GUMBO_TAG_LABEL,
2618
- GUMBO_INSERTION_FROM_ISINDEX);
2841
+ insert_element_of_tag_type(
2842
+ parser, GUMBO_TAG_LABEL, GUMBO_INSERTION_FROM_ISINDEX);
2619
2843
  TextNodeBufferState* text_state = &parser->_parser_state->_text_node;
2620
2844
  text_state->_start_original_text = token->original_text.data;
2621
2845
  text_state->_start_position = token->position;
@@ -2628,15 +2852,15 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2628
2852
  text_state->_buffer.capacity = prompt_attr_length + 1;
2629
2853
  gumbo_destroy_attribute(parser, prompt_attr);
2630
2854
  } else {
2631
- GumboStringPiece prompt_text = GUMBO_STRING(
2632
- "This is a searchable index. Enter search keywords: ");
2855
+ GumboStringPiece prompt_text =
2856
+ GUMBO_STRING("This is a searchable index. Enter search keywords: ");
2633
2857
  gumbo_string_buffer_append_string(
2634
2858
  parser, &prompt_text, &text_state->_buffer);
2635
2859
  }
2636
2860
 
2637
2861
  GumboNode* input = insert_element_of_tag_type(
2638
2862
  parser, GUMBO_TAG_INPUT, GUMBO_INSERTION_FROM_ISINDEX);
2639
- for (int i = 0; i < token_attrs->length; ++i) {
2863
+ for (unsigned int i = 0; i < token_attrs->length; ++i) {
2640
2864
  GumboAttribute* attr = token_attrs->data[i];
2641
2865
  if (attr != prompt_attr && attr != action_attr && attr != name_attr) {
2642
2866
  gumbo_vector_add(parser, attr, &input->v.element.attributes);
@@ -2649,6 +2873,13 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2649
2873
  // touching the attributes.
2650
2874
  ignore_token(parser);
2651
2875
 
2876
+ // The name attribute, if present, should be destroyed since it's ignored
2877
+ // when copying over. The action attribute should be kept since it's moved
2878
+ // to the form.
2879
+ if (name_attr) {
2880
+ gumbo_destroy_attribute(parser, name_attr);
2881
+ }
2882
+
2652
2883
  GumboAttribute* name =
2653
2884
  gumbo_parser_allocate(parser, sizeof(GumboAttribute));
2654
2885
  GumboStringPiece name_str = GUMBO_STRING("name");
@@ -2664,12 +2895,15 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2664
2895
  name->value_end = kGumboEmptySourcePosition;
2665
2896
  gumbo_vector_add(parser, name, &input->v.element.attributes);
2666
2897
 
2667
- pop_current_node(parser); // <input>
2668
- pop_current_node(parser); // <label>
2898
+ pop_current_node(parser); // <input>
2899
+ pop_current_node(parser); // <label>
2669
2900
  insert_element_of_tag_type(
2670
2901
  parser, GUMBO_TAG_HR, GUMBO_INSERTION_FROM_ISINDEX);
2671
- pop_current_node(parser); // <hr>
2672
- pop_current_node(parser); // <form>
2902
+ pop_current_node(parser); // <hr>
2903
+ pop_current_node(parser); // <form>
2904
+ if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2905
+ parser->_parser_state->_form_element = NULL;
2906
+ }
2673
2907
  return false;
2674
2908
  } else if (tag_is(token, kStartTag, GUMBO_TAG_TEXTAREA)) {
2675
2909
  run_generic_parsing_algorithm(parser, token, GUMBO_LEX_RCDATA);
@@ -2704,19 +2938,27 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2704
2938
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_SELECT);
2705
2939
  }
2706
2940
  return true;
2707
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(OPTION), TAG(OPTGROUP) })) {
2941
+ } else if (tag_in(token, kStartTag,
2942
+ (gumbo_tagset){TAG(OPTION), TAG(OPTGROUP)})) {
2708
2943
  if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)) {
2709
2944
  pop_current_node(parser);
2710
2945
  }
2711
2946
  reconstruct_active_formatting_elements(parser);
2712
2947
  insert_element_from_token(parser, token);
2713
2948
  return true;
2714
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(RP), TAG(RT) })) {
2949
+ } else if (tag_in(token, kStartTag,
2950
+ (gumbo_tagset){TAG(RB), TAG(RP), TAG(RT), TAG(RTC)})) {
2715
2951
  bool success = true;
2952
+ GumboTag exception =
2953
+ tag_in(token, kStartTag, (gumbo_tagset){TAG(RT), TAG(RP)})
2954
+ ? GUMBO_TAG_RTC
2955
+ : GUMBO_TAG_LAST;
2716
2956
  if (has_an_element_in_scope(parser, GUMBO_TAG_RUBY)) {
2717
- generate_implied_end_tags(parser, GUMBO_TAG_LAST);
2957
+ generate_implied_end_tags(parser, exception);
2718
2958
  }
2719
- if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_RUBY)) {
2959
+ if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_RUBY) &&
2960
+ !(exception == GUMBO_TAG_LAST ||
2961
+ node_html_tag_is(get_current_node(parser), GUMBO_TAG_RTC))) {
2720
2962
  parser_add_parse_error(parser, token);
2721
2963
  success = false;
2722
2964
  }
@@ -2749,10 +2991,10 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2749
2991
  acknowledge_self_closing_tag(parser);
2750
2992
  }
2751
2993
  return true;
2752
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(CAPTION), TAG(COL),
2753
- TAG(COLGROUP), TAG(FRAME), TAG(HEAD),
2754
- TAG(TBODY), TAG(TD), TAG(TFOOT),
2755
- TAG(TH), TAG(THEAD), TAG(TR) })) {
2994
+ } else if (tag_in(token, kStartTag,
2995
+ (gumbo_tagset){TAG(CAPTION), TAG(COL), TAG(COLGROUP),
2996
+ TAG(FRAME), TAG(HEAD), TAG(TBODY), TAG(TD), TAG(TFOOT),
2997
+ TAG(TH), TAG(THEAD), TAG(TR)})) {
2756
2998
  parser_add_parse_error(parser, token);
2757
2999
  ignore_token(parser);
2758
3000
  return false;
@@ -2771,14 +3013,15 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2771
3013
  // If we see a), implicitly close everything up to and including it. If we
2772
3014
  // see b), then record a parse error, don't close anything (except the
2773
3015
  // implied end tags) and ignore the end tag token.
2774
- for (int i = state->_open_elements.length; --i >= 0; ) {
3016
+ for (int i = state->_open_elements.length; --i >= 0;) {
2775
3017
  const GumboNode* node = state->_open_elements.data[i];
2776
3018
  if (node_html_tag_is(node, end_tag)) {
2777
3019
  generate_implied_end_tags(parser, end_tag);
2778
3020
  // TODO(jdtang): Do I need to add a parse error here? The condition in
2779
3021
  // the spec seems like it's the inverse of the loop condition above, and
2780
3022
  // so would never fire.
2781
- while (node != pop_current_node(parser)); // Pop everything.
3023
+ while (node != pop_current_node(parser))
3024
+ ; // Pop everything.
2782
3025
  return true;
2783
3026
  } else if (is_special_node(node)) {
2784
3027
  parser_add_parse_error(parser, token);
@@ -2794,7 +3037,8 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2794
3037
 
2795
3038
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-incdata
2796
3039
  static bool handle_text(GumboParser* parser, GumboToken* token) {
2797
- if (token->type == GUMBO_TOKEN_CHARACTER || token->type == GUMBO_TOKEN_WHITESPACE) {
3040
+ if (token->type == GUMBO_TOKEN_CHARACTER ||
3041
+ token->type == GUMBO_TOKEN_WHITESPACE) {
2798
3042
  insert_text_token(parser, token);
2799
3043
  } else {
2800
3044
  // We provide only bare-bones script handling that doesn't involve any of
@@ -2854,11 +3098,12 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
2854
3098
  parser->_parser_state->_reprocess_current_token = true;
2855
3099
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_COLUMN_GROUP);
2856
3100
  return true;
2857
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(TBODY), TAG(TFOOT),
2858
- TAG(THEAD), TAG(TD), TAG(TH), TAG(TR) })) {
3101
+ } else if (tag_in(token, kStartTag,
3102
+ (gumbo_tagset){TAG(TBODY), TAG(TFOOT), TAG(THEAD), TAG(TD),
3103
+ TAG(TH), TAG(TR)})) {
2859
3104
  clear_stack_to_table_context(parser);
2860
3105
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
2861
- if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(TD), TAG(TH), TAG(TR) })) {
3106
+ if (tag_in(token, kStartTag, (gumbo_tagset){TAG(TD), TAG(TH), TAG(TR)})) {
2862
3107
  insert_element_of_tag_type(
2863
3108
  parser, GUMBO_TAG_TBODY, GUMBO_INSERTION_IMPLIED);
2864
3109
  state->_reprocess_current_token = true;
@@ -2880,25 +3125,27 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
2880
3125
  return false;
2881
3126
  }
2882
3127
  return true;
2883
- } else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(BODY), TAG(CAPTION),
2884
- TAG(COL), TAG(COLGROUP), TAG(HTML),
2885
- TAG(TBODY), TAG(TD), TAG(TFOOT),
2886
- TAG(TH), TAG(THEAD), TAG(TR) })) {
3128
+ } else if (tag_in(token, kEndTag,
3129
+ (gumbo_tagset){TAG(BODY), TAG(CAPTION), TAG(COL),
3130
+ TAG(COLGROUP), TAG(HTML), TAG(TBODY), TAG(TD), TAG(TFOOT),
3131
+ TAG(TH), TAG(THEAD), TAG(TR)})) {
2887
3132
  parser_add_parse_error(parser, token);
2888
3133
  ignore_token(parser);
2889
3134
  return false;
2890
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(STYLE), TAG(SCRIPT) })) {
3135
+ } else if (tag_in(token, kStartTag,
3136
+ (gumbo_tagset){TAG(STYLE), TAG(SCRIPT), TAG(TEMPLATE)}) ||
3137
+ (tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE))) {
2891
3138
  return handle_in_head(parser, token);
2892
3139
  } else if (tag_is(token, kStartTag, GUMBO_TAG_INPUT) &&
2893
- attribute_matches(&token->v.start_tag.attributes,
2894
- "type", "hidden")) {
3140
+ attribute_matches(
3141
+ &token->v.start_tag.attributes, "type", "hidden")) {
2895
3142
  parser_add_parse_error(parser, token);
2896
3143
  insert_element_from_token(parser, token);
2897
3144
  pop_current_node(parser);
2898
3145
  return false;
2899
3146
  } else if (tag_is(token, kStartTag, GUMBO_TAG_FORM)) {
2900
3147
  parser_add_parse_error(parser, token);
2901
- if (state->_form_element) {
3148
+ if (state->_form_element || has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2902
3149
  ignore_token(parser);
2903
3150
  return false;
2904
3151
  }
@@ -2906,11 +3153,7 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
2906
3153
  pop_current_node(parser);
2907
3154
  return false;
2908
3155
  } else if (token->type == GUMBO_TOKEN_EOF) {
2909
- if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_HTML)) {
2910
- parser_add_parse_error(parser, token);
2911
- return false;
2912
- }
2913
- return true;
3156
+ return handle_in_body(parser, token);
2914
3157
  } else {
2915
3158
  parser_add_parse_error(parser, token);
2916
3159
  state->_foster_parent_insertions = true;
@@ -2938,8 +3181,9 @@ static bool handle_in_table_text(GumboParser* parser, GumboToken* token) {
2938
3181
  // Note that TextNodeBuffer may contain UTF-8 characters, but the presence
2939
3182
  // of any one byte that is not whitespace means we flip the flag, so this
2940
3183
  // loop is still valid.
2941
- for (int i = 0; i < buffer->length; ++i) {
2942
- if (!isspace((unsigned char)buffer->data[i]) || buffer->data[i] == '\v') {
3184
+ for (unsigned int i = 0; i < buffer->length; ++i) {
3185
+ if (!isspace((unsigned char) buffer->data[i]) ||
3186
+ buffer->data[i] == '\v') {
2943
3187
  state->_foster_parent_insertions = true;
2944
3188
  reconstruct_active_formatting_elements(parser);
2945
3189
  break;
@@ -2955,35 +3199,43 @@ static bool handle_in_table_text(GumboParser* parser, GumboToken* token) {
2955
3199
 
2956
3200
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-incaption
2957
3201
  static bool handle_in_caption(GumboParser* parser, GumboToken* token) {
2958
- if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(CAPTION), TAG(COL),
2959
- TAG(COLGROUP), TAG(TBODY), TAG(TD),
2960
- TAG(TFOOT), TAG(TH), TAG(THEAD), TAG(TR) }) ||
2961
- tag_in(token, kEndTag, (gumbo_tagset) { TAG(CAPTION), TAG(TABLE) })) {
3202
+ if (tag_is(token, kEndTag, GUMBO_TAG_CAPTION)) {
2962
3203
  if (!has_an_element_in_table_scope(parser, GUMBO_TAG_CAPTION)) {
2963
3204
  parser_add_parse_error(parser, token);
2964
3205
  ignore_token(parser);
2965
3206
  return false;
3207
+ } else {
3208
+ generate_implied_end_tags(parser, GUMBO_TAG_LAST);
3209
+ bool result = true;
3210
+ if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_CAPTION)) {
3211
+ parser_add_parse_error(parser, token);
3212
+ }
3213
+ while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_CAPTION))
3214
+ ;
3215
+ clear_active_formatting_elements(parser);
3216
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
3217
+ return result;
2966
3218
  }
2967
- if (!tag_is(token, kEndTag, GUMBO_TAG_CAPTION)) {
2968
- parser_add_parse_error(parser, token);
2969
- parser->_parser_state->_reprocess_current_token = true;
2970
- }
2971
- generate_implied_end_tags(parser, GUMBO_TAG_LAST);
2972
- bool result = true;
2973
- if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_CAPTION)) {
3219
+ } else if (tag_in(token, kStartTag,
3220
+ (gumbo_tagset){TAG(CAPTION), TAG(COL), TAG(COLGROUP),
3221
+ TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD),
3222
+ TAG(TR)}) ||
3223
+ (tag_is(token, kEndTag, GUMBO_TAG_TABLE))) {
3224
+ if (!has_an_element_in_table_scope(parser, GUMBO_TAG_CAPTION)) {
2974
3225
  parser_add_parse_error(parser, token);
2975
- while (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_CAPTION)) {
2976
- pop_current_node(parser);
2977
- }
2978
- result = false;
3226
+ ignore_token(parser);
3227
+ return false;
2979
3228
  }
2980
- pop_current_node(parser); // The <caption> itself.
3229
+ while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_CAPTION))
3230
+ ;
2981
3231
  clear_active_formatting_elements(parser);
2982
3232
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
2983
- return result;
2984
- } else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(BODY), TAG(COL),
2985
- TAG(COLGROUP), TAG(HTML), TAG(TBODY), TAG(TD), TAG(TFOOT),
2986
- TAG(TH), TAG(THEAD), TAG(TR) })) {
3233
+ parser->_parser_state->_reprocess_current_token = true;
3234
+ return true;
3235
+ } else if (tag_in(token, kEndTag,
3236
+ (gumbo_tagset){TAG(BODY), TAG(COL), TAG(COLGROUP), TAG(HTML),
3237
+ TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD),
3238
+ TAG(TR)})) {
2987
3239
  parser_add_parse_error(parser, token);
2988
3240
  ignore_token(parser);
2989
3241
  return false;
@@ -3011,24 +3263,33 @@ static bool handle_in_column_group(GumboParser* parser, GumboToken* token) {
3011
3263
  pop_current_node(parser);
3012
3264
  acknowledge_self_closing_tag(parser);
3013
3265
  return true;
3266
+ } else if (tag_is(token, kEndTag, GUMBO_TAG_COLGROUP)) {
3267
+ if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_COLGROUP)) {
3268
+ parser_add_parse_error(parser, token);
3269
+ ignore_token(parser);
3270
+ return false;
3271
+ }
3272
+ pop_current_node(parser);
3273
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
3274
+ return false;
3014
3275
  } else if (tag_is(token, kEndTag, GUMBO_TAG_COL)) {
3015
3276
  parser_add_parse_error(parser, token);
3016
3277
  ignore_token(parser);
3017
3278
  return false;
3018
- } else if (token->type == GUMBO_TOKEN_EOF &&
3019
- get_current_node(parser) == parser->_output->root) {
3020
- return true;
3279
+ } else if (tag_is(token, kStartTag, GUMBO_TAG_TEMPLATE) ||
3280
+ tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
3281
+ return handle_in_head(parser, token);
3282
+ } else if (token->type == GUMBO_TOKEN_EOF) {
3283
+ return handle_in_body(parser, token);
3021
3284
  } else {
3022
- if (get_current_node(parser) == parser->_output->root) {
3285
+ if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_COLGROUP)) {
3023
3286
  parser_add_parse_error(parser, token);
3287
+ ignore_token(parser);
3024
3288
  return false;
3025
3289
  }
3026
- assert(node_html_tag_is(get_current_node(parser), GUMBO_TAG_COLGROUP));
3027
3290
  pop_current_node(parser);
3028
3291
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
3029
- if (!tag_is(token, kEndTag, GUMBO_TAG_COLGROUP)) {
3030
- parser->_parser_state->_reprocess_current_token = true;
3031
- }
3292
+ parser->_parser_state->_reprocess_current_token = true;
3032
3293
  return true;
3033
3294
  }
3034
3295
  }
@@ -3040,14 +3301,15 @@ static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
3040
3301
  insert_element_from_token(parser, token);
3041
3302
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
3042
3303
  return true;
3043
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(TD), TAG(TH) })) {
3304
+ } else if (tag_in(token, kStartTag, (gumbo_tagset){TAG(TD), TAG(TH)})) {
3044
3305
  parser_add_parse_error(parser, token);
3045
3306
  clear_stack_to_table_body_context(parser);
3046
3307
  insert_element_of_tag_type(parser, GUMBO_TAG_TR, GUMBO_INSERTION_IMPLIED);
3047
3308
  parser->_parser_state->_reprocess_current_token = true;
3048
3309
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
3049
3310
  return false;
3050
- } else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(TBODY), TAG(TFOOT), TAG(THEAD) })) {
3311
+ } else if (tag_in(token, kEndTag,
3312
+ (gumbo_tagset){TAG(TBODY), TAG(TFOOT), TAG(THEAD)})) {
3051
3313
  if (!has_an_element_in_table_scope(parser, token->v.end_tag)) {
3052
3314
  parser_add_parse_error(parser, token);
3053
3315
  ignore_token(parser);
@@ -3057,12 +3319,13 @@ static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
3057
3319
  pop_current_node(parser);
3058
3320
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
3059
3321
  return true;
3060
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(CAPTION), TAG(COL),
3061
- TAG(COLGROUP), TAG(TBODY), TAG(TFOOT), TAG(THEAD) }) ||
3322
+ } else if (tag_in(token, kStartTag,
3323
+ (gumbo_tagset){TAG(CAPTION), TAG(COL), TAG(COLGROUP),
3324
+ TAG(TBODY), TAG(TFOOT), TAG(THEAD)}) ||
3062
3325
  tag_is(token, kEndTag, GUMBO_TAG_TABLE)) {
3063
3326
  if (!(has_an_element_in_table_scope(parser, GUMBO_TAG_TBODY) ||
3064
- has_an_element_in_table_scope(parser, GUMBO_TAG_THEAD) ||
3065
- has_an_element_in_table_scope(parser, GUMBO_TAG_TFOOT))) {
3327
+ has_an_element_in_table_scope(parser, GUMBO_TAG_THEAD) ||
3328
+ has_an_element_in_table_scope(parser, GUMBO_TAG_TFOOT))) {
3066
3329
  parser_add_parse_error(parser, token);
3067
3330
  ignore_token(parser);
3068
3331
  return false;
@@ -3072,9 +3335,9 @@ static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
3072
3335
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
3073
3336
  parser->_parser_state->_reprocess_current_token = true;
3074
3337
  return true;
3075
- } else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(BODY), TAG(CAPTION),
3076
- TAG(COL), TAG(TR), TAG(COLGROUP), TAG(HTML), TAG(TD), TAG(TH) }))
3077
- {
3338
+ } else if (tag_in(token, kEndTag,
3339
+ (gumbo_tagset){TAG(BODY), TAG(CAPTION), TAG(COL), TAG(TR),
3340
+ TAG(COLGROUP), TAG(HTML), TAG(TD), TAG(TH)})) {
3078
3341
  parser_add_parse_error(parser, token);
3079
3342
  ignore_token(parser);
3080
3343
  return false;
@@ -3085,45 +3348,55 @@ static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
3085
3348
 
3086
3349
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-intr
3087
3350
  static bool handle_in_row(GumboParser* parser, GumboToken* token) {
3088
- if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(TH), TAG(TD) })) {
3351
+ if (tag_in(token, kStartTag, (gumbo_tagset){TAG(TH), TAG(TD)})) {
3089
3352
  clear_stack_to_table_row_context(parser);
3090
3353
  insert_element_from_token(parser, token);
3091
3354
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_CELL);
3092
3355
  add_formatting_element(parser, &kActiveFormattingScopeMarker);
3093
3356
  return true;
3094
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(CAPTION), TAG(COLGROUP),
3095
- TAG(TBODY), TAG(TFOOT), TAG(THEAD), TAG(TR) }) ||
3096
- tag_in(token, kEndTag, (gumbo_tagset) { TAG(TR), TAG(TABLE),
3097
- TAG(TBODY), TAG(TFOOT), TAG(THEAD) })) {
3098
- // This case covers 4 clauses of the spec, each of which say "Otherwise, act
3099
- // as if an end tag with the tag name "tr" had been seen." The differences
3100
- // are in error handling and whether the current token is reprocessed.
3101
- GumboTag desired_tag =
3102
- tag_in(token, kEndTag, (gumbo_tagset) { TAG(TBODY), TAG(TFOOT),
3103
- TAG(THEAD) })
3104
- ? token->v.end_tag : GUMBO_TAG_TR;
3105
- if (!has_an_element_in_table_scope(parser, desired_tag)) {
3106
- gumbo_debug("Bailing because there is no tag %s in table scope.\nOpen elements:",
3107
- gumbo_normalized_tagname(desired_tag));
3108
- for (int i = 0; i < parser->_parser_state->_open_elements.length; ++i) {
3109
- const GumboNode* node = parser->_parser_state->_open_elements.data[i];
3110
- gumbo_debug("%s\n", gumbo_normalized_tagname(node->v.element.tag));
3111
- }
3357
+ } else if (tag_is(token, kEndTag, GUMBO_TAG_TR)) {
3358
+ if (!has_an_element_in_table_scope(parser, GUMBO_TAG_TR)) {
3112
3359
  parser_add_parse_error(parser, token);
3113
3360
  ignore_token(parser);
3114
3361
  return false;
3362
+ } else {
3363
+ clear_stack_to_table_row_context(parser);
3364
+ pop_current_node(parser);
3365
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
3366
+ return true;
3115
3367
  }
3116
- clear_stack_to_table_row_context(parser);
3117
- GumboNode* last_element = pop_current_node(parser);
3118
- assert(node_html_tag_is(last_element, GUMBO_TAG_TR));
3119
- AVOID_UNUSED_VARIABLE_WARNING(last_element);
3120
- set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
3121
- if (!tag_is(token, kEndTag, GUMBO_TAG_TR)) {
3368
+ } else if (tag_in(token, kStartTag,
3369
+ (gumbo_tagset){TAG(CAPTION), TAG(COL), TAG(COLGROUP),
3370
+ TAG(TBODY), TAG(TFOOT), TAG(THEAD), TAG(TR)}) ||
3371
+ tag_is(token, kEndTag, GUMBO_TAG_TABLE)) {
3372
+ if (!has_an_element_in_table_scope(parser, GUMBO_TAG_TR)) {
3373
+ parser_add_parse_error(parser, token);
3374
+ ignore_token(parser);
3375
+ return false;
3376
+ } else {
3377
+ clear_stack_to_table_row_context(parser);
3378
+ pop_current_node(parser);
3379
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
3122
3380
  parser->_parser_state->_reprocess_current_token = true;
3381
+ return true;
3123
3382
  }
3124
- return true;
3125
- } else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(BODY), TAG(CAPTION),
3126
- TAG(COL), TAG(COLGROUP), TAG(HTML), TAG(TD), TAG(TH) })) {
3383
+ } else if (tag_in(token, kEndTag,
3384
+ (gumbo_tagset){TAG(TBODY), TAG(TFOOT), TAG(THEAD)})) {
3385
+ if (!has_an_element_in_table_scope(parser, token->v.end_tag) ||
3386
+ (!has_an_element_in_table_scope(parser, GUMBO_TAG_TR))) {
3387
+ parser_add_parse_error(parser, token);
3388
+ ignore_token(parser);
3389
+ return false;
3390
+ } else {
3391
+ clear_stack_to_table_row_context(parser);
3392
+ pop_current_node(parser);
3393
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
3394
+ parser->_parser_state->_reprocess_current_token = true;
3395
+ return true;
3396
+ }
3397
+ } else if (tag_in(token, kEndTag,
3398
+ (gumbo_tagset){TAG(BODY), TAG(CAPTION), TAG(COL),
3399
+ TAG(COLGROUP), TAG(HTML), TAG(TD), TAG(TH)})) {
3127
3400
  parser_add_parse_error(parser, token);
3128
3401
  ignore_token(parser);
3129
3402
  return false;
@@ -3134,16 +3407,18 @@ static bool handle_in_row(GumboParser* parser, GumboToken* token) {
3134
3407
 
3135
3408
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-intd
3136
3409
  static bool handle_in_cell(GumboParser* parser, GumboToken* token) {
3137
- if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(TD), TAG(TH) })) {
3410
+ if (tag_in(token, kEndTag, (gumbo_tagset){TAG(TD), TAG(TH)})) {
3138
3411
  GumboTag token_tag = token->v.end_tag;
3139
3412
  if (!has_an_element_in_table_scope(parser, token_tag)) {
3140
3413
  parser_add_parse_error(parser, token);
3414
+ ignore_token(parser);
3141
3415
  return false;
3142
3416
  }
3143
3417
  return close_table_cell(parser, token, token_tag);
3144
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(CAPTION), TAG(COL),
3145
- TAG(COLGROUP), TAG(TBODY), TAG(TD),
3146
- TAG(TFOOT), TAG(TH), TAG(THEAD), TAG(TR) })) {
3418
+ } else if (tag_in(token, kStartTag,
3419
+ (gumbo_tagset){TAG(CAPTION), TAG(COL), TAG(COLGROUP),
3420
+ TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD),
3421
+ TAG(TR)})) {
3147
3422
  gumbo_debug("Handling <td> in cell.\n");
3148
3423
  if (!has_an_element_in_table_scope(parser, GUMBO_TAG_TH) &&
3149
3424
  !has_an_element_in_table_scope(parser, GUMBO_TAG_TD)) {
@@ -3154,13 +3429,13 @@ static bool handle_in_cell(GumboParser* parser, GumboToken* token) {
3154
3429
  }
3155
3430
  parser->_parser_state->_reprocess_current_token = true;
3156
3431
  return close_current_cell(parser, token);
3157
- } else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(BODY), TAG(CAPTION),
3158
- TAG(COL), TAG(COLGROUP), TAG(HTML) })) {
3432
+ } else if (tag_in(token, kEndTag, (gumbo_tagset){TAG(BODY), TAG(CAPTION),
3433
+ TAG(COL), TAG(COLGROUP), TAG(HTML)})) {
3159
3434
  parser_add_parse_error(parser, token);
3160
3435
  ignore_token(parser);
3161
3436
  return false;
3162
- } else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(TABLE), TAG(TBODY),
3163
- TAG(TFOOT), TAG(THEAD), TAG(TR) })) {
3437
+ } else if (tag_in(token, kEndTag, (gumbo_tagset){TAG(TABLE), TAG(TBODY),
3438
+ TAG(TFOOT), TAG(THEAD), TAG(TR)})) {
3164
3439
  if (!has_an_element_in_table_scope(parser, token->v.end_tag)) {
3165
3440
  parser_add_parse_error(parser, token);
3166
3441
  ignore_token(parser);
@@ -3211,7 +3486,7 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
3211
3486
  GumboVector* open_elements = &parser->_parser_state->_open_elements;
3212
3487
  if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION) &&
3213
3488
  node_html_tag_is(open_elements->data[open_elements->length - 2],
3214
- GUMBO_TAG_OPTGROUP)) {
3489
+ GUMBO_TAG_OPTGROUP)) {
3215
3490
  pop_current_node(parser);
3216
3491
  }
3217
3492
  if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTGROUP)) {
@@ -3242,9 +3517,12 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
3242
3517
  } else if (tag_is(token, kStartTag, GUMBO_TAG_SELECT)) {
3243
3518
  parser_add_parse_error(parser, token);
3244
3519
  ignore_token(parser);
3245
- close_current_select(parser);
3520
+ if (has_an_element_in_select_scope(parser, GUMBO_TAG_SELECT)) {
3521
+ close_current_select(parser);
3522
+ }
3246
3523
  return false;
3247
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(INPUT), TAG(KEYGEN), TAG(TEXTAREA) })) {
3524
+ } else if (tag_in(token, kStartTag,
3525
+ (gumbo_tagset){TAG(INPUT), TAG(KEYGEN), TAG(TEXTAREA)})) {
3248
3526
  parser_add_parse_error(parser, token);
3249
3527
  if (!has_an_element_in_select_scope(parser, GUMBO_TAG_SELECT)) {
3250
3528
  ignore_token(parser);
@@ -3253,14 +3531,12 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
3253
3531
  parser->_parser_state->_reprocess_current_token = true;
3254
3532
  }
3255
3533
  return false;
3256
- } else if (tag_is(token, kStartTag, GUMBO_TAG_SCRIPT)) {
3534
+ } else if (tag_in(token, kStartTag,
3535
+ (gumbo_tagset){TAG(SCRIPT), TAG(TEMPLATE)}) ||
3536
+ tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
3257
3537
  return handle_in_head(parser, token);
3258
3538
  } else if (token->type == GUMBO_TOKEN_EOF) {
3259
- if (get_current_node(parser) != parser->_output->root) {
3260
- parser_add_parse_error(parser, token);
3261
- return false;
3262
- }
3263
- return true;
3539
+ return handle_in_body(parser, token);
3264
3540
  } else {
3265
3541
  parser_add_parse_error(parser, token);
3266
3542
  ignore_token(parser);
@@ -3270,23 +3546,28 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
3270
3546
 
3271
3547
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-inselectintable
3272
3548
  static bool handle_in_select_in_table(GumboParser* parser, GumboToken* token) {
3273
- if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(CAPTION), TAG(TABLE),
3274
- TAG(TBODY), TAG(TFOOT), TAG(THEAD), TAG(TR), TAG(TD), TAG(TH) })) {
3549
+ if (tag_in(token, kStartTag,
3550
+ (gumbo_tagset){TAG(CAPTION), TAG(TABLE), TAG(TBODY), TAG(TFOOT),
3551
+ TAG(THEAD), TAG(TR), TAG(TD), TAG(TH)})) {
3275
3552
  parser_add_parse_error(parser, token);
3276
3553
  close_current_select(parser);
3277
3554
  parser->_parser_state->_reprocess_current_token = true;
3278
3555
  return false;
3279
- } else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(CAPTION), TAG(TABLE),
3280
- TAG(TBODY), TAG(TFOOT), TAG(THEAD), TAG(TR), TAG(TD), TAG(TH) })) {
3556
+ } else if (tag_in(token, kEndTag,
3557
+ (gumbo_tagset){TAG(CAPTION), TAG(TABLE), TAG(TBODY),
3558
+ TAG(TFOOT), TAG(THEAD), TAG(TR), TAG(TD), TAG(TH)})) {
3281
3559
  parser_add_parse_error(parser, token);
3282
- if (has_an_element_in_table_scope(parser, token->v.end_tag)) {
3560
+ if (!has_an_element_in_table_scope(parser, token->v.end_tag)) {
3561
+ ignore_token(parser);
3562
+ return false;
3563
+ } else {
3283
3564
  close_current_select(parser);
3284
- reset_insertion_mode_appropriately(parser);
3565
+ // close_current_select already does the
3566
+ // reset_insertion_mode_appropriately
3567
+ // reset_insertion_mode_appropriately(parser);
3285
3568
  parser->_parser_state->_reprocess_current_token = true;
3286
- } else {
3287
- ignore_token(parser);
3569
+ return false;
3288
3570
  }
3289
- return false;
3290
3571
  } else {
3291
3572
  return handle_in_select(parser, token);
3292
3573
  }
@@ -3294,8 +3575,71 @@ static bool handle_in_select_in_table(GumboParser* parser, GumboToken* token) {
3294
3575
 
3295
3576
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#parsing-main-intemplate
3296
3577
  static bool handle_in_template(GumboParser* parser, GumboToken* token) {
3297
- // TODO(jdtang): Implement this.
3298
- return true;
3578
+ GumboParserState* state = parser->_parser_state;
3579
+ if (token->type == GUMBO_TOKEN_WHITESPACE ||
3580
+ token->type == GUMBO_TOKEN_CHARACTER ||
3581
+ token->type == GUMBO_TOKEN_COMMENT || token->type == GUMBO_TOKEN_NULL ||
3582
+ token->type == GUMBO_TOKEN_DOCTYPE) {
3583
+ return handle_in_body(parser, token);
3584
+ } else if (tag_in(token, kStartTag,
3585
+ (gumbo_tagset){TAG(BASE), TAG(BASEFONT), TAG(BGSOUND),
3586
+ TAG(LINK), TAG(META), TAG(NOFRAMES), TAG(SCRIPT),
3587
+ TAG(STYLE), TAG(TEMPLATE), TAG(TITLE)}) ||
3588
+ tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
3589
+ return handle_in_head(parser, token);
3590
+ } else if (tag_in(
3591
+ token, kStartTag, (gumbo_tagset){TAG(CAPTION), TAG(COLGROUP),
3592
+ TAG(TBODY), TAG(TFOOT), TAG(THEAD)})) {
3593
+ pop_template_insertion_mode(parser);
3594
+ push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
3595
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
3596
+ state->_reprocess_current_token = true;
3597
+ return true;
3598
+ } else if (tag_is(token, kStartTag, GUMBO_TAG_COL)) {
3599
+ pop_template_insertion_mode(parser);
3600
+ push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_COLUMN_GROUP);
3601
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_COLUMN_GROUP);
3602
+ state->_reprocess_current_token = true;
3603
+ return true;
3604
+ } else if (tag_is(token, kStartTag, GUMBO_TAG_TR)) {
3605
+ pop_template_insertion_mode(parser);
3606
+ push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
3607
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
3608
+ state->_reprocess_current_token = true;
3609
+ return true;
3610
+ } else if (tag_in(token, kStartTag, (gumbo_tagset){TAG(TD), TAG(TH)})) {
3611
+ pop_template_insertion_mode(parser);
3612
+ push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
3613
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
3614
+ state->_reprocess_current_token = true;
3615
+ return true;
3616
+ } else if (token->type == GUMBO_TOKEN_START_TAG) {
3617
+ pop_template_insertion_mode(parser);
3618
+ push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_BODY);
3619
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_BODY);
3620
+ state->_reprocess_current_token = true;
3621
+ return true;
3622
+ } else if (token->type == GUMBO_TOKEN_END_TAG) {
3623
+ parser_add_parse_error(parser, token);
3624
+ ignore_token(parser);
3625
+ return false;
3626
+ } else if (token->type == GUMBO_TOKEN_EOF) {
3627
+ if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
3628
+ // Stop parsing.
3629
+ return true;
3630
+ }
3631
+ parser_add_parse_error(parser, token);
3632
+ while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_TEMPLATE))
3633
+ ;
3634
+ clear_active_formatting_elements(parser);
3635
+ pop_template_insertion_mode(parser);
3636
+ reset_insertion_mode_appropriately(parser);
3637
+ state->_reprocess_current_token = true;
3638
+ return false;
3639
+ } else {
3640
+ assert(0);
3641
+ return false;
3642
+ }
3299
3643
  }
3300
3644
 
3301
3645
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-afterbody
@@ -3313,7 +3657,12 @@ static bool handle_after_body(GumboParser* parser, GumboToken* token) {
3313
3657
  ignore_token(parser);
3314
3658
  return false;
3315
3659
  } else if (tag_is(token, kEndTag, GUMBO_TAG_HTML)) {
3316
- // TODO(jdtang): Handle fragment parsing algorithm case.
3660
+ /* fragment case: ignore the closing HTML token */
3661
+ if (is_fragment_parser(parser)) {
3662
+ parser_add_parse_error(parser, token);
3663
+ ignore_token(parser);
3664
+ return false;
3665
+ }
3317
3666
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_AFTER_BODY);
3318
3667
  GumboNode* html = parser->_parser_state->_open_elements.data[0];
3319
3668
  assert(node_html_tag_is(html, GUMBO_TAG_HTML));
@@ -3354,9 +3703,8 @@ static bool handle_in_frameset(GumboParser* parser, GumboToken* token) {
3354
3703
  return false;
3355
3704
  }
3356
3705
  pop_current_node(parser);
3357
- // TODO(jdtang): Add a condition to ignore this for the fragment parsing
3358
- // algorithm.
3359
- if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_FRAMESET)) {
3706
+ if (!is_fragment_parser(parser) &&
3707
+ !node_html_tag_is(get_current_node(parser), GUMBO_TAG_FRAMESET)) {
3360
3708
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_FRAMESET);
3361
3709
  }
3362
3710
  return true;
@@ -3455,31 +3803,14 @@ static bool handle_after_after_frameset(
3455
3803
  // Function pointers for each insertion mode. Keep in sync with
3456
3804
  // insertion_mode.h.
3457
3805
  typedef bool (*TokenHandler)(GumboParser* parser, GumboToken* token);
3458
- static const TokenHandler kTokenHandlers[] = {
3459
- handle_initial,
3460
- handle_before_html,
3461
- handle_before_head,
3462
- handle_in_head,
3463
- handle_in_head_noscript,
3464
- handle_after_head,
3465
- handle_in_body,
3466
- handle_text,
3467
- handle_in_table,
3468
- handle_in_table_text,
3469
- handle_in_caption,
3470
- handle_in_column_group,
3471
- handle_in_table_body,
3472
- handle_in_row,
3473
- handle_in_cell,
3474
- handle_in_select,
3475
- handle_in_select_in_table,
3476
- handle_in_template,
3477
- handle_after_body,
3478
- handle_in_frameset,
3479
- handle_after_frameset,
3480
- handle_after_after_body,
3481
- handle_after_after_frameset
3482
- };
3806
+ static const TokenHandler kTokenHandlers[] = {handle_initial,
3807
+ handle_before_html, handle_before_head, handle_in_head,
3808
+ handle_in_head_noscript, handle_after_head, handle_in_body, handle_text,
3809
+ handle_in_table, handle_in_table_text, handle_in_caption,
3810
+ handle_in_column_group, handle_in_table_body, handle_in_row, handle_in_cell,
3811
+ handle_in_select, handle_in_select_in_table, handle_in_template,
3812
+ handle_after_body, handle_in_frameset, handle_after_frameset,
3813
+ handle_after_after_body, handle_after_after_frameset};
3483
3814
 
3484
3815
  static bool handle_html_content(GumboParser* parser, GumboToken* token) {
3485
3816
  return kTokenHandlers[(unsigned int) parser->_parser_state->_insertion_mode](
@@ -3488,6 +3819,7 @@ static bool handle_html_content(GumboParser* parser, GumboToken* token) {
3488
3819
 
3489
3820
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-inforeign
3490
3821
  static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
3822
+ gumbo_debug("Handling foreign content");
3491
3823
  switch (token->type) {
3492
3824
  case GUMBO_TOKEN_NULL:
3493
3825
  parser_add_parse_error(parser, token);
@@ -3514,34 +3846,44 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
3514
3846
  break;
3515
3847
  }
3516
3848
  // Order matters for these clauses.
3517
- if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(B), TAG(BIG),
3518
- TAG(BLOCKQUOTE), TAG(BODY), TAG(BR),
3519
- TAG(CENTER), TAG(CODE), TAG(DD), TAG(DIV),
3520
- TAG(DL), TAG(DT), TAG(EM), TAG(EMBED),
3521
- TAG(H1), TAG(H2), TAG(H3), TAG(H4),
3522
- TAG(H5), TAG(H6), TAG(HEAD), TAG(HR),
3523
- TAG(I), TAG(IMG), TAG(LI), TAG(LISTING),
3524
- TAG(MENU), TAG(META), TAG(NOBR), TAG(OL),
3525
- TAG(P), TAG(PRE), TAG(RUBY), TAG(S),
3526
- TAG(SMALL), TAG(SPAN), TAG(STRONG),
3527
- TAG(STRIKE), TAG(SUB), TAG(SUP),
3528
- TAG(TABLE), TAG(TT), TAG(U), TAG(UL), TAG(VAR) }) ||
3529
- (tag_is(token, kStartTag, GUMBO_TAG_FONT) && (
3530
- token_has_attribute(token, "color") ||
3531
- token_has_attribute(token, "face") ||
3532
- token_has_attribute(token, "size")))) {
3849
+ if (tag_in(token, kStartTag,
3850
+ (gumbo_tagset){TAG(B), TAG(BIG), TAG(BLOCKQUOTE), TAG(BODY), TAG(BR),
3851
+ TAG(CENTER), TAG(CODE), TAG(DD), TAG(DIV), TAG(DL), TAG(DT),
3852
+ TAG(EM), TAG(EMBED), TAG(H1), TAG(H2), TAG(H3), TAG(H4), TAG(H5),
3853
+ TAG(H6), TAG(HEAD), TAG(HR), TAG(I), TAG(IMG), TAG(LI),
3854
+ TAG(LISTING), TAG(MENU), TAG(META), TAG(NOBR), TAG(OL), TAG(P),
3855
+ TAG(PRE), TAG(RUBY), TAG(S), TAG(SMALL), TAG(SPAN), TAG(STRONG),
3856
+ TAG(STRIKE), TAG(SUB), TAG(SUP), TAG(TABLE), TAG(TT), TAG(U),
3857
+ TAG(UL), TAG(VAR)}) ||
3858
+ (tag_is(token, kStartTag, GUMBO_TAG_FONT) &&
3859
+ (token_has_attribute(token, "color") ||
3860
+ token_has_attribute(token, "face") ||
3861
+ token_has_attribute(token, "size")))) {
3862
+ /* Parse error */
3533
3863
  parser_add_parse_error(parser, token);
3534
- do {
3535
- pop_current_node(parser);
3536
- } while(!(is_mathml_integration_point(get_current_node(parser)) ||
3537
- is_html_integration_point(get_current_node(parser)) ||
3538
- get_current_node(parser)->v.element.tag_namespace ==
3539
- GUMBO_NAMESPACE_HTML));
3540
- parser->_parser_state->_reprocess_current_token = true;
3541
- return false;
3542
- } else if (token->type == GUMBO_TOKEN_START_TAG) {
3864
+
3865
+ /*
3866
+ * Fragment case: If the parser was originally created for the HTML
3867
+ * fragment parsing algorithm, then act as described in the "any other
3868
+ * start tag" entry below.
3869
+ */
3870
+ if (!is_fragment_parser(parser)) {
3871
+ do {
3872
+ pop_current_node(parser);
3873
+ } while (!(is_mathml_integration_point(get_current_node(parser)) ||
3874
+ is_html_integration_point(get_current_node(parser)) ||
3875
+ get_current_node(parser)->v.element.tag_namespace ==
3876
+ GUMBO_NAMESPACE_HTML));
3877
+ parser->_parser_state->_reprocess_current_token = true;
3878
+ return false;
3879
+ }
3880
+
3881
+ assert(token->type == GUMBO_TOKEN_START_TAG);
3882
+ }
3883
+
3884
+ if (token->type == GUMBO_TOKEN_START_TAG) {
3543
3885
  const GumboNamespaceEnum current_namespace =
3544
- get_current_node(parser)->v.element.tag_namespace;
3886
+ get_adjusted_current_node(parser)->v.element.tag_namespace;
3545
3887
  if (current_namespace == GUMBO_NAMESPACE_MATHML) {
3546
3888
  adjust_mathml_attributes(parser, token);
3547
3889
  }
@@ -3557,8 +3899,8 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
3557
3899
  acknowledge_self_closing_tag(parser);
3558
3900
  }
3559
3901
  return true;
3560
- // </script> tags are handled like any other end tag, putting the script's
3561
- // text into a text node child and closing the current node.
3902
+ // </script> tags are handled like any other end tag, putting the script's
3903
+ // text into a text node child and closing the current node.
3562
3904
  } else {
3563
3905
  assert(token->type == GUMBO_TOKEN_END_TAG);
3564
3906
  GumboNode* node = get_current_node(parser);
@@ -3574,13 +3916,13 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
3574
3916
  is_success = false;
3575
3917
  }
3576
3918
  int i = parser->_parser_state->_open_elements.length;
3577
- for( --i; i > 0; ) {
3919
+ for (--i; i > 0;) {
3578
3920
  // Here we move up the stack until we find an HTML element (in which
3579
3921
  // case we do nothing) or we find the element that we're about to
3580
3922
  // close (in which case we pop everything we've seen until that
3581
3923
  // point.)
3582
3924
  gumbo_debug("Foreign %.*s node at %d.\n", node_tagname.length,
3583
- node_tagname.data, i);
3925
+ node_tagname.data, i);
3584
3926
  if (gumbo_string_equals_ignore_case(&node_tagname, &token_tagname)) {
3585
3927
  gumbo_debug("Matches.\n");
3586
3928
  while (pop_current_node(parser) != node) {
@@ -3608,7 +3950,6 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
3608
3950
  }
3609
3951
  }
3610
3952
 
3611
-
3612
3953
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#tree-construction
3613
3954
  static bool handle_token(GumboParser* parser, GumboToken* token) {
3614
3955
  if (parser->_parser_state->_ignore_next_linefeed &&
@@ -3630,28 +3971,31 @@ static bool handle_token(GumboParser* parser, GumboToken* token) {
3630
3971
  parser->_parser_state->_closed_html_tag = true;
3631
3972
  }
3632
3973
 
3633
- const GumboNode* current_node = get_current_node(parser);
3634
- assert(!current_node || current_node->type == GUMBO_NODE_ELEMENT);
3974
+ const GumboNode* current_node = get_adjusted_current_node(parser);
3975
+ assert(!current_node || current_node->type == GUMBO_NODE_ELEMENT ||
3976
+ current_node->type == GUMBO_NODE_TEMPLATE);
3635
3977
  if (current_node) {
3636
3978
  gumbo_debug("Current node: <%s>.\n",
3637
- gumbo_normalized_tagname(current_node->v.element.tag));
3979
+ gumbo_normalized_tagname(current_node->v.element.tag));
3638
3980
  }
3639
3981
  if (!current_node ||
3640
3982
  current_node->v.element.tag_namespace == GUMBO_NAMESPACE_HTML ||
3641
3983
  (is_mathml_integration_point(current_node) &&
3642
- (token->type == GUMBO_TOKEN_CHARACTER ||
3643
- token->type == GUMBO_TOKEN_WHITESPACE ||
3644
- token->type == GUMBO_TOKEN_NULL ||
3645
- (token->type == GUMBO_TOKEN_START_TAG &&
3646
- !tag_in(token, kStartTag, (gumbo_tagset) { TAG(MGLYPH), TAG(MALIGNMARK) })))) ||
3984
+ (token->type == GUMBO_TOKEN_CHARACTER ||
3985
+ token->type == GUMBO_TOKEN_WHITESPACE ||
3986
+ token->type == GUMBO_TOKEN_NULL ||
3987
+ (token->type == GUMBO_TOKEN_START_TAG &&
3988
+ !tag_in(token, kStartTag,
3989
+ (gumbo_tagset){TAG(MGLYPH), TAG(MALIGNMARK)})))) ||
3647
3990
  (current_node->v.element.tag_namespace == GUMBO_NAMESPACE_MATHML &&
3648
- node_qualified_tag_is(current_node, GUMBO_NAMESPACE_MATHML, GUMBO_TAG_ANNOTATION_XML) &&
3649
- tag_is(token, kStartTag, GUMBO_TAG_SVG)) ||
3650
- (is_html_integration_point(current_node) && (
3651
- token->type == GUMBO_TOKEN_START_TAG ||
3652
- token->type == GUMBO_TOKEN_CHARACTER ||
3653
- token->type == GUMBO_TOKEN_NULL ||
3654
- token->type == GUMBO_TOKEN_WHITESPACE)) ||
3991
+ node_qualified_tag_is(
3992
+ current_node, GUMBO_NAMESPACE_MATHML, GUMBO_TAG_ANNOTATION_XML) &&
3993
+ tag_is(token, kStartTag, GUMBO_TAG_SVG)) ||
3994
+ (is_html_integration_point(current_node) &&
3995
+ (token->type == GUMBO_TOKEN_START_TAG ||
3996
+ token->type == GUMBO_TOKEN_CHARACTER ||
3997
+ token->type == GUMBO_TOKEN_NULL ||
3998
+ token->type == GUMBO_TOKEN_WHITESPACE)) ||
3655
3999
  token->type == GUMBO_TOKEN_EOF) {
3656
4000
  return handle_html_content(parser, token);
3657
4001
  } else {
@@ -3659,6 +4003,66 @@ static bool handle_token(GumboParser* parser, GumboToken* token) {
3659
4003
  }
3660
4004
  }
3661
4005
 
4006
+ static void fragment_parser_init(GumboParser* parser, GumboTag fragment_ctx,
4007
+ GumboNamespaceEnum fragment_namespace) {
4008
+ GumboNode* root;
4009
+ assert(fragment_ctx != GUMBO_TAG_LAST);
4010
+
4011
+ // 3
4012
+ parser->_parser_state->_fragment_ctx = create_element(parser, fragment_ctx);
4013
+ parser->_parser_state->_fragment_ctx->v.element.tag_namespace =
4014
+ fragment_namespace;
4015
+
4016
+ // 4
4017
+ if (fragment_namespace == GUMBO_NAMESPACE_HTML) {
4018
+ // Non-HTML namespaces always start in the DATA state.
4019
+ switch (fragment_ctx) {
4020
+ case GUMBO_TAG_TITLE:
4021
+ case GUMBO_TAG_TEXTAREA:
4022
+ gumbo_tokenizer_set_state(parser, GUMBO_LEX_RCDATA);
4023
+ break;
4024
+
4025
+ case GUMBO_TAG_STYLE:
4026
+ case GUMBO_TAG_XMP:
4027
+ case GUMBO_TAG_IFRAME:
4028
+ case GUMBO_TAG_NOEMBED:
4029
+ case GUMBO_TAG_NOFRAMES:
4030
+ gumbo_tokenizer_set_state(parser, GUMBO_LEX_RAWTEXT);
4031
+ break;
4032
+
4033
+ case GUMBO_TAG_SCRIPT:
4034
+ gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT);
4035
+ break;
4036
+
4037
+ case GUMBO_TAG_NOSCRIPT:
4038
+ /* scripting is disabled in Gumbo, so leave the tokenizer
4039
+ * in the default data state */
4040
+ break;
4041
+
4042
+ case GUMBO_TAG_PLAINTEXT:
4043
+ gumbo_tokenizer_set_state(parser, GUMBO_LEX_PLAINTEXT);
4044
+ break;
4045
+
4046
+ default:
4047
+ /* default data state */
4048
+ break;
4049
+ }
4050
+ }
4051
+
4052
+ // 5. 6. 7.
4053
+ root = insert_element_of_tag_type(
4054
+ parser, GUMBO_TAG_HTML, GUMBO_INSERTION_IMPLIED);
4055
+ parser->_output->root = root;
4056
+
4057
+ // 8.
4058
+ if (fragment_ctx == GUMBO_TAG_TEMPLATE) {
4059
+ push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TEMPLATE);
4060
+ }
4061
+
4062
+ // 10.
4063
+ reset_insertion_mode_appropriately(parser);
4064
+ }
4065
+
3662
4066
  GumboOutput* gumbo_parse(const char* buffer) {
3663
4067
  return gumbo_parse_with_options(
3664
4068
  &kGumboDefaultOptions, buffer, strlen(buffer));
@@ -3672,6 +4076,11 @@ GumboOutput* gumbo_parse_with_options(
3672
4076
  gumbo_tokenizer_state_init(&parser, buffer, length);
3673
4077
  parser_state_init(&parser);
3674
4078
 
4079
+ if (options->fragment_context != GUMBO_TAG_LAST) {
4080
+ fragment_parser_init(
4081
+ &parser, options->fragment_context, options->fragment_namespace);
4082
+ }
4083
+
3675
4084
  GumboParserState* state = parser._parser_state;
3676
4085
  gumbo_debug("Parsing %.*s.\n", length, buffer);
3677
4086
 
@@ -3687,9 +4096,9 @@ GumboOutput* gumbo_parse_with_options(
3687
4096
  state->_reprocess_current_token = false;
3688
4097
  } else {
3689
4098
  GumboNode* current_node = get_current_node(&parser);
3690
- gumbo_tokenizer_set_is_current_node_foreign(
3691
- &parser, current_node &&
3692
- current_node->v.element.tag_namespace != GUMBO_NAMESPACE_HTML);
4099
+ gumbo_tokenizer_set_is_current_node_foreign(&parser,
4100
+ current_node &&
4101
+ current_node->v.element.tag_namespace != GUMBO_NAMESPACE_HTML);
3693
4102
  has_error = !gumbo_lex(&parser, &token) || has_error;
3694
4103
  }
3695
4104
  const char* token_type = "text";
@@ -3709,14 +4118,13 @@ GumboOutput* gumbo_parse_with_options(
3709
4118
  default:
3710
4119
  break;
3711
4120
  }
3712
- gumbo_debug("Handling %s token @%d:%d in state %d.\n",
3713
- (char*) token_type, token.position.line, token.position.column,
3714
- state->_insertion_mode);
4121
+ gumbo_debug("Handling %s token @%d:%d in state %d.\n", (char*) token_type,
4122
+ token.position.line, token.position.column, state->_insertion_mode);
3715
4123
 
3716
4124
  state->_current_token = &token;
3717
4125
  state->_self_closing_flag_acknowledged =
3718
4126
  !(token.type == GUMBO_TOKEN_START_TAG &&
3719
- token.v.start_tag.is_self_closing);
4127
+ token.v.start_tag.is_self_closing);
3720
4128
 
3721
4129
  has_error = !handle_token(&parser, &token) || has_error;
3722
4130
 
@@ -3772,7 +4180,7 @@ void gumbo_destroy_output(const GumboOptions* options, GumboOutput* output) {
3772
4180
  GumboParser parser;
3773
4181
  parser._options = options;
3774
4182
  destroy_node(&parser, output->document);
3775
- for (int i = 0; i < output->errors.length; ++i) {
4183
+ for (unsigned int i = 0; i < output->errors.length; ++i) {
3776
4184
  gumbo_error_destroy(&parser, output->errors.data[i]);
3777
4185
  }
3778
4186
  gumbo_vector_destroy(&parser, &output->errors);