nokogumbo 1.4.1 → 1.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: cf20dd502d8ec6022f2c72193bb0c9a908251088
4
+ data.tar.gz: 326f85766d0e4f97683f5df026f08f4dc33806e8
5
+ SHA512:
6
+ metadata.gz: 800800652a5260bf54399e8cca1fc6e63f7ef53aea489245c5315b6e955b38aa4dfc6d7272b99898ab78150464640ac14c995aa38b9c77644dab5d73fc0e46a5
7
+ data.tar.gz: 18ba647671103cfc2853a88935fe91eb965d1e6fbe1aad981438297a5035ec222b5ae6c5ed3ef127429c8b58edd02a6a5a877ba7e7ec3390d05779f7420f1521
@@ -157,7 +157,7 @@ static xmlNodePtr walk_tree(xmlDocPtr document, GumboElement *node) {
157
157
 
158
158
  switch (child->type) {
159
159
  case GUMBO_NODE_ELEMENT:
160
- case GUMBO_NODE_TEMPLATE:
160
+ // case GUMBO_NODE_TEMPLATE: /* future */
161
161
  node = walk_tree(document, &child->v.element);
162
162
  break;
163
163
  case GUMBO_NODE_WHITESPACE:
@@ -35,11 +35,10 @@ static const size_t kMessageBufferSize = 256;
35
35
  static int print_message(GumboParser* parser, GumboStringBuffer* output,
36
36
  const char* format, ...) {
37
37
  va_list args;
38
- int remaining_capacity = output->capacity - output->length;
39
38
  va_start(args, format);
39
+ int remaining_capacity = output->capacity - output->length;
40
40
  int bytes_written = vsnprintf(output->data + output->length,
41
41
  remaining_capacity, format, args);
42
- va_end(args);
43
42
  #ifdef _MSC_VER
44
43
  if (bytes_written == -1) {
45
44
  // vsnprintf returns -1 on MSVC++ if there's not enough capacity, instead of
@@ -48,7 +47,6 @@ static int print_message(GumboParser* parser, GumboStringBuffer* output,
48
47
  // we retry (letting it fail and returning 0 if it doesn't), since there's
49
48
  // no way to smartly resize the buffer.
50
49
  gumbo_string_buffer_reserve(parser, output->capacity * 2, output);
51
- va_start(args, format);
52
50
  int result = vsnprintf(output->data + output->length,
53
51
  remaining_capacity, format, args);
54
52
  va_end(args);
@@ -57,6 +55,7 @@ static int print_message(GumboParser* parser, GumboStringBuffer* output,
57
55
  #else
58
56
  // -1 in standard C99 indicates an encoding error. Return 0 and do nothing.
59
57
  if (bytes_written == -1) {
58
+ va_end(args);
60
59
  return 0;
61
60
  }
62
61
  #endif
@@ -65,12 +64,11 @@ static int print_message(GumboParser* parser, GumboStringBuffer* output,
65
64
  gumbo_string_buffer_reserve(
66
65
  parser, output->capacity + bytes_written, output);
67
66
  remaining_capacity = output->capacity - output->length;
68
- va_start(args, format);
69
67
  bytes_written = vsnprintf(output->data + output->length,
70
68
  remaining_capacity, format, args);
71
- va_end(args);
72
69
  }
73
70
  output->length += bytes_written;
71
+ va_end(args);
74
72
  return bytes_written;
75
73
  }
76
74
 
@@ -141,7 +141,7 @@ extern const GumboVector kGumboEmptyVector;
141
141
  * Returns the first index at which an element appears in this vector (testing
142
142
  * by pointer equality), or -1 if it never does.
143
143
  */
144
- int gumbo_vector_index_of(GumboVector* vector, const void* element);
144
+ int gumbo_vector_index_of(GumboVector* vector, void* element);
145
145
 
146
146
 
147
147
  /**
@@ -157,10 +157,172 @@ int gumbo_vector_index_of(GumboVector* vector, const void* element);
157
157
  * strings.
158
158
  */
159
159
  typedef enum {
160
- // Load all the tags from an external source, generated from tag.in.
161
- # include "tag_enum.h"
162
- // Used for all tags that don't have special handling in HTML. Add new tags
163
- // to the end of tag.in so as to preserve backwards-compatibility.
160
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/semantics.html#the-root-element
161
+ GUMBO_TAG_HTML,
162
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/semantics.html#document-metadata
163
+ GUMBO_TAG_HEAD,
164
+ GUMBO_TAG_TITLE,
165
+ GUMBO_TAG_BASE,
166
+ GUMBO_TAG_LINK,
167
+ GUMBO_TAG_META,
168
+ GUMBO_TAG_STYLE,
169
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/scripting-1.html#scripting-1
170
+ GUMBO_TAG_SCRIPT,
171
+ GUMBO_TAG_NOSCRIPT,
172
+ GUMBO_TAG_TEMPLATE,
173
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/sections.html#sections
174
+ GUMBO_TAG_BODY,
175
+ GUMBO_TAG_ARTICLE,
176
+ GUMBO_TAG_SECTION,
177
+ GUMBO_TAG_NAV,
178
+ GUMBO_TAG_ASIDE,
179
+ GUMBO_TAG_H1,
180
+ GUMBO_TAG_H2,
181
+ GUMBO_TAG_H3,
182
+ GUMBO_TAG_H4,
183
+ GUMBO_TAG_H5,
184
+ GUMBO_TAG_H6,
185
+ GUMBO_TAG_HGROUP,
186
+ GUMBO_TAG_HEADER,
187
+ GUMBO_TAG_FOOTER,
188
+ GUMBO_TAG_ADDRESS,
189
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/grouping-content.html#grouping-content
190
+ GUMBO_TAG_P,
191
+ GUMBO_TAG_HR,
192
+ GUMBO_TAG_PRE,
193
+ GUMBO_TAG_BLOCKQUOTE,
194
+ GUMBO_TAG_OL,
195
+ GUMBO_TAG_UL,
196
+ GUMBO_TAG_LI,
197
+ GUMBO_TAG_DL,
198
+ GUMBO_TAG_DT,
199
+ GUMBO_TAG_DD,
200
+ GUMBO_TAG_FIGURE,
201
+ GUMBO_TAG_FIGCAPTION,
202
+ GUMBO_TAG_MAIN,
203
+ GUMBO_TAG_DIV,
204
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/text-level-semantics.html#text-level-semantics
205
+ GUMBO_TAG_A,
206
+ GUMBO_TAG_EM,
207
+ GUMBO_TAG_STRONG,
208
+ GUMBO_TAG_SMALL,
209
+ GUMBO_TAG_S,
210
+ GUMBO_TAG_CITE,
211
+ GUMBO_TAG_Q,
212
+ GUMBO_TAG_DFN,
213
+ GUMBO_TAG_ABBR,
214
+ GUMBO_TAG_DATA,
215
+ GUMBO_TAG_TIME,
216
+ GUMBO_TAG_CODE,
217
+ GUMBO_TAG_VAR,
218
+ GUMBO_TAG_SAMP,
219
+ GUMBO_TAG_KBD,
220
+ GUMBO_TAG_SUB,
221
+ GUMBO_TAG_SUP,
222
+ GUMBO_TAG_I,
223
+ GUMBO_TAG_B,
224
+ GUMBO_TAG_U,
225
+ GUMBO_TAG_MARK,
226
+ GUMBO_TAG_RUBY,
227
+ GUMBO_TAG_RT,
228
+ GUMBO_TAG_RP,
229
+ GUMBO_TAG_BDI,
230
+ GUMBO_TAG_BDO,
231
+ GUMBO_TAG_SPAN,
232
+ GUMBO_TAG_BR,
233
+ GUMBO_TAG_WBR,
234
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/edits.html#edits
235
+ GUMBO_TAG_INS,
236
+ GUMBO_TAG_DEL,
237
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/embedded-content-1.html#embedded-content-1
238
+ GUMBO_TAG_IMAGE,
239
+ GUMBO_TAG_IMG,
240
+ GUMBO_TAG_IFRAME,
241
+ GUMBO_TAG_EMBED,
242
+ GUMBO_TAG_OBJECT,
243
+ GUMBO_TAG_PARAM,
244
+ GUMBO_TAG_VIDEO,
245
+ GUMBO_TAG_AUDIO,
246
+ GUMBO_TAG_SOURCE,
247
+ GUMBO_TAG_TRACK,
248
+ GUMBO_TAG_CANVAS,
249
+ GUMBO_TAG_MAP,
250
+ GUMBO_TAG_AREA,
251
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-map-element.html#mathml
252
+ GUMBO_TAG_MATH,
253
+ GUMBO_TAG_MI,
254
+ GUMBO_TAG_MO,
255
+ GUMBO_TAG_MN,
256
+ GUMBO_TAG_MS,
257
+ GUMBO_TAG_MTEXT,
258
+ GUMBO_TAG_MGLYPH,
259
+ GUMBO_TAG_MALIGNMARK,
260
+ GUMBO_TAG_ANNOTATION_XML,
261
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-map-element.html#svg-0
262
+ GUMBO_TAG_SVG,
263
+ GUMBO_TAG_FOREIGNOBJECT,
264
+ GUMBO_TAG_DESC,
265
+ // SVG title tags will have GUMBO_TAG_TITLE as with HTML.
266
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/tabular-data.html#tabular-data
267
+ GUMBO_TAG_TABLE,
268
+ GUMBO_TAG_CAPTION,
269
+ GUMBO_TAG_COLGROUP,
270
+ GUMBO_TAG_COL,
271
+ GUMBO_TAG_TBODY,
272
+ GUMBO_TAG_THEAD,
273
+ GUMBO_TAG_TFOOT,
274
+ GUMBO_TAG_TR,
275
+ GUMBO_TAG_TD,
276
+ GUMBO_TAG_TH,
277
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/forms.html#forms
278
+ GUMBO_TAG_FORM,
279
+ GUMBO_TAG_FIELDSET,
280
+ GUMBO_TAG_LEGEND,
281
+ GUMBO_TAG_LABEL,
282
+ GUMBO_TAG_INPUT,
283
+ GUMBO_TAG_BUTTON,
284
+ GUMBO_TAG_SELECT,
285
+ GUMBO_TAG_DATALIST,
286
+ GUMBO_TAG_OPTGROUP,
287
+ GUMBO_TAG_OPTION,
288
+ GUMBO_TAG_TEXTAREA,
289
+ GUMBO_TAG_KEYGEN,
290
+ GUMBO_TAG_OUTPUT,
291
+ GUMBO_TAG_PROGRESS,
292
+ GUMBO_TAG_METER,
293
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/interactive-elements.html#interactive-elements
294
+ GUMBO_TAG_DETAILS,
295
+ GUMBO_TAG_SUMMARY,
296
+ GUMBO_TAG_MENU,
297
+ GUMBO_TAG_MENUITEM,
298
+ // Non-conforming elements that nonetheless appear in the HTML5 spec.
299
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/obsolete.html#non-conforming-features
300
+ GUMBO_TAG_APPLET,
301
+ GUMBO_TAG_ACRONYM,
302
+ GUMBO_TAG_BGSOUND,
303
+ GUMBO_TAG_DIR,
304
+ GUMBO_TAG_FRAME,
305
+ GUMBO_TAG_FRAMESET,
306
+ GUMBO_TAG_NOFRAMES,
307
+ GUMBO_TAG_ISINDEX,
308
+ GUMBO_TAG_LISTING,
309
+ GUMBO_TAG_XMP,
310
+ GUMBO_TAG_NEXTID,
311
+ GUMBO_TAG_NOEMBED,
312
+ GUMBO_TAG_PLAINTEXT,
313
+ GUMBO_TAG_RB,
314
+ GUMBO_TAG_STRIKE,
315
+ GUMBO_TAG_BASEFONT,
316
+ GUMBO_TAG_BIG,
317
+ GUMBO_TAG_BLINK,
318
+ GUMBO_TAG_CENTER,
319
+ GUMBO_TAG_FONT,
320
+ GUMBO_TAG_MARQUEE,
321
+ GUMBO_TAG_MULTICOL,
322
+ GUMBO_TAG_NOBR,
323
+ GUMBO_TAG_SPACER,
324
+ GUMBO_TAG_TT,
325
+ // Used for all tags that don't have special handling in HTML.
164
326
  GUMBO_TAG_UNKNOWN,
165
327
  // A marker value to indicate the end of the enum, for iterating over it.
166
328
  // Also used as the terminator for varargs functions that take tags.
@@ -202,10 +364,9 @@ const char* gumbo_normalize_svg_tagname(const GumboStringPiece* tagname);
202
364
 
203
365
  /**
204
366
  * Converts a tag name string (which may be in upper or mixed case) to a tag
205
- * enum. The `tag` version expects `tagname` to be NULL-terminated
367
+ * enum.
206
368
  */
207
369
  GumboTag gumbo_tag_enum(const char* tagname);
208
- GumboTag gumbo_tagn_enum(const char* tagname, unsigned int length);
209
370
 
210
371
  /**
211
372
  * Attribute namespaces.
@@ -300,16 +461,10 @@ typedef enum {
300
461
  GUMBO_NODE_TEXT,
301
462
  /** CDATA node. v will be a GumboText. */
302
463
  GUMBO_NODE_CDATA,
303
- /** Comment node. v will be a GumboText, excluding comment delimiters. */
464
+ /** Comment node. v. will be a GumboText, excluding comment delimiters. */
304
465
  GUMBO_NODE_COMMENT,
305
466
  /** Text node, where all contents is whitespace. v will be a GumboText. */
306
- GUMBO_NODE_WHITESPACE,
307
- /** Template node. This is separate from GUMBO_NODE_ELEMENT because many
308
- * client libraries will want to ignore the contents of template nodes, as
309
- * the spec suggests. Recursing on GUMBO_NODE_ELEMENT will do the right thing
310
- * here, while clients that want to include template contents should also
311
- * check for GUMBO_NODE_TEMPLATE. v will be a GumboElement. */
312
- GUMBO_NODE_TEMPLATE
467
+ GUMBO_NODE_WHITESPACE
313
468
  } GumboNodeType;
314
469
 
315
470
  /**
@@ -523,19 +678,6 @@ struct GumboInternalNode {
523
678
  /** Pointer back to parent node. Not owned. */
524
679
  GumboNode* parent;
525
680
 
526
- /**
527
- * Pointer to next node in document order. This is the next node by start tag
528
- * position in the document, or by position of the tag that forces the parser
529
- * to insert it for parser-inserted nodes. It's necessary to maintain API
530
- * compatibility with some other libraries, eg. BeautifulSoup. Not owned.
531
- */
532
- GumboNode* next;
533
-
534
- /**
535
- * Pointer to previous node in document order.
536
- */
537
- GumboNode* prev;
538
-
539
681
  /** The index within the parent's children vector of this node. */
540
682
  size_t index_within_parent;
541
683
 
@@ -653,14 +795,6 @@ GumboOutput* gumbo_parse(const char* buffer);
653
795
  GumboOutput* gumbo_parse_with_options(
654
796
  const GumboOptions* options, const char* buffer, size_t buffer_length);
655
797
 
656
- /**
657
- * Parse a chunk of HTML with the given fragment context. If `fragment_ctx`
658
- * is `GUMBO_TAG_LAST`, the fragment will be parsed as a full document.
659
- */
660
- GumboOutput* gumbo_parse_fragment(
661
- const GumboOptions* options, const char* buffer, size_t length,
662
- const GumboTag fragment_ctx, const GumboNamespaceEnum fragment_namespace);
663
-
664
798
  /** Release the memory used for the parse tree & parse errors. */
665
799
  void gumbo_destroy_output(
666
800
  const GumboOptions* options, GumboOutput* output);
@@ -47,15 +47,6 @@ typedef char gumbo_tagset[GUMBO_TAG_LAST];
47
47
  tagset[(int)tag] == (1 << (int)namespace))
48
48
 
49
49
 
50
-
51
- // selected forward declarations as it is getting hard to find
52
- // an appropriate order
53
- static bool node_html_tag_is(const GumboNode*, GumboTag);
54
- static GumboInsertionMode get_current_template_insertion_mode(const GumboParser*);
55
- static bool handle_in_template(GumboParser*, GumboToken*);
56
- static GumboNode* destroy_node(GumboParser*, GumboNode*);
57
-
58
-
59
50
  static void* malloc_wrapper(void* unused, size_t size) {
60
51
  return malloc(size);
61
52
  }
@@ -199,7 +190,7 @@ typedef struct _ReplacementEntry {
199
190
  { GUMBO_STRING(from), GUMBO_STRING(to) }
200
191
 
201
192
  // Static data for SVG attribute replacements.
202
- // https://html.spec.whatwg.org/multipage/syntax.html#creating-and-inserting-nodes
193
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#adjust-svg-attributes
203
194
  static const ReplacementEntry kSvgAttributeReplacements[] = {
204
195
  REPLACEMENT_ENTRY("attributename", "attributeName"),
205
196
  REPLACEMENT_ENTRY("attributetype", "attributeType"),
@@ -207,12 +198,12 @@ static const ReplacementEntry kSvgAttributeReplacements[] = {
207
198
  REPLACEMENT_ENTRY("baseprofile", "baseProfile"),
208
199
  REPLACEMENT_ENTRY("calcmode", "calcMode"),
209
200
  REPLACEMENT_ENTRY("clippathunits", "clipPathUnits"),
210
- // REPLACEMENT_ENTRY("contentscripttype", "contentScriptType"),
211
- // REPLACEMENT_ENTRY("contentstyletype", "contentStyleType"),
201
+ REPLACEMENT_ENTRY("contentscripttype", "contentScriptType"),
202
+ REPLACEMENT_ENTRY("contentstyletype", "contentStyleType"),
212
203
  REPLACEMENT_ENTRY("diffuseconstant", "diffuseConstant"),
213
204
  REPLACEMENT_ENTRY("edgemode", "edgeMode"),
214
- // REPLACEMENT_ENTRY("externalresourcesrequired", "externalResourcesRequired"),
215
- // REPLACEMENT_ENTRY("filterres", "filterRes"),
205
+ REPLACEMENT_ENTRY("externalresourcesrequired", "externalResourcesRequired"),
206
+ REPLACEMENT_ENTRY("filterres", "filterRes"),
216
207
  REPLACEMENT_ENTRY("filterunits", "filterUnits"),
217
208
  REPLACEMENT_ENTRY("glyphref", "glyphRef"),
218
209
  REPLACEMENT_ENTRY("gradienttransform", "gradientTransform"),
@@ -380,9 +371,6 @@ typedef struct GumboInternalParserState {
380
371
  GumboNode* _head_element;
381
372
  GumboNode* _form_element;
382
373
 
383
- // The element used as fragment context when parsing in fragment mode
384
- GumboNode* _fragment_ctx;
385
-
386
374
  // The flag for when the spec says "Reprocess the current token in..."
387
375
  bool _reprocess_current_token;
388
376
 
@@ -411,10 +399,6 @@ typedef struct GumboInternalParserState {
411
399
  // The current token.
412
400
  GumboToken* _current_token;
413
401
 
414
- // The current (most recently inserted) node. This is used to link together
415
- // nodes in document order.
416
- GumboNode* _current_node;
417
-
418
402
  // The way that the spec is written, the </body> and </html> tags are *always*
419
403
  // implicit, because encountering one of those tokens merely switches the
420
404
  // insertion mode out of "in body". So we have individual state flags for
@@ -467,17 +451,7 @@ static void set_frameset_not_ok(GumboParser* parser) {
467
451
  }
468
452
 
469
453
  static GumboNode* create_node(GumboParser* parser, GumboNodeType type) {
470
- GumboParserState* state = parser->_parser_state;
471
454
  GumboNode* node = gumbo_parser_allocate(parser, sizeof(GumboNode));
472
-
473
- node->next = NULL;
474
- node->prev = state->_current_node;
475
- if (state->_current_node != NULL) {
476
- // May be null for the initial document node.
477
- state->_current_node->next = node;
478
- }
479
- state->_current_node = node;
480
-
481
455
  node->parent = NULL;
482
456
  node->index_within_parent = -1;
483
457
  node->type = type;
@@ -524,9 +498,7 @@ static void parser_state_init(GumboParser* parser) {
524
498
  gumbo_vector_init(parser, 5, &parser_state->_template_insertion_modes);
525
499
  parser_state->_head_element = NULL;
526
500
  parser_state->_form_element = NULL;
527
- parser_state->_fragment_ctx = NULL;
528
501
  parser_state->_current_token = NULL;
529
- parser_state->_current_node = NULL;
530
502
  parser_state->_closed_body_tag = false;
531
503
  parser_state->_closed_html_tag = false;
532
504
  parser->_parser_state = parser_state;
@@ -534,25 +506,17 @@ static void parser_state_init(GumboParser* parser) {
534
506
 
535
507
  static void parser_state_destroy(GumboParser* parser) {
536
508
  GumboParserState* state = parser->_parser_state;
537
- if (state->_fragment_ctx) {
538
- destroy_node(parser, state->_fragment_ctx);
539
- }
540
509
  gumbo_vector_destroy(parser, &state->_active_formatting_elements);
541
510
  gumbo_vector_destroy(parser, &state->_open_elements);
542
511
  gumbo_vector_destroy(parser, &state->_template_insertion_modes);
543
512
  gumbo_string_buffer_destroy(parser, &state->_text_node._buffer);
544
513
  gumbo_parser_deallocate(parser, state);
545
- parser->_parser_state = NULL;
546
514
  }
547
515
 
548
516
  static GumboNode* get_document_node(GumboParser* parser) {
549
517
  return parser->_output->document;
550
518
  }
551
519
 
552
- static bool is_fragment_parser(const GumboParser *parser) {
553
- return !!parser->_parser_state->_fragment_ctx;
554
- }
555
-
556
520
  // Returns the node at the bottom of the stack of open elements, or NULL if no
557
521
  // elements have been added yet.
558
522
  static GumboNode* get_current_node(GumboParser* parser) {
@@ -566,14 +530,6 @@ static GumboNode* get_current_node(GumboParser* parser) {
566
530
  return open_elements->data[open_elements->length - 1];
567
531
  }
568
532
 
569
- static GumboNode* get_adjusted_current_node(GumboParser* parser) {
570
- GumboParserState *state = parser->_parser_state;
571
- if (state->_open_elements.length == 1 && state->_fragment_ctx) {
572
- return state->_fragment_ctx;
573
- }
574
- return get_current_node(parser);
575
- }
576
-
577
533
  // Returns true if the given needle is in the given array of literal
578
534
  // GumboStringPieces. If exact_match is true, this requires that they match
579
535
  // exactly; otherwise, this performs a prefix match to check if any of the
@@ -594,80 +550,55 @@ static void set_insertion_mode(GumboParser* parser, GumboInsertionMode mode) {
594
550
  parser->_parser_state->_insertion_mode = mode;
595
551
  }
596
552
 
597
-
598
553
  // http://www.whatwg.org/specs/web-apps/current-work/complete/parsing.html#reset-the-insertion-mode-appropriately
599
554
  // This is a helper function that returns the appropriate insertion mode instead
600
555
  // of setting it. Returns GUMBO_INSERTION_MODE_INITIAL as a sentinel value to
601
556
  // indicate that there is no appropriate insertion mode, and the loop should
602
557
  // continue.
603
- static GumboInsertionMode get_appropriate_insertion_mode(const GumboParser* parser, int index) {
604
- const GumboVector* open_elements = &parser->_parser_state->_open_elements;
605
- const GumboNode* node = open_elements->data[index];
606
- const bool is_last = index == 0;
607
-
608
- if (is_last && is_fragment_parser(parser)) {
609
- node = parser->_parser_state->_fragment_ctx;
610
- }
558
+ static GumboInsertionMode get_appropriate_insertion_mode(
559
+ const GumboNode* node, bool is_last) {
560
+ assert(node->type == GUMBO_NODE_ELEMENT);
611
561
 
612
- assert(node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE);
613
- switch (node->v.element.tag) {
614
- case GUMBO_TAG_SELECT: {
615
- if (is_last) {
616
- return GUMBO_INSERTION_MODE_IN_SELECT;
617
- }
618
- for (int i = index; i > 0; --i) {
619
- const GumboNode* ancestor = open_elements->data[i];
620
- if (node_html_tag_is(ancestor, GUMBO_TAG_TEMPLATE)) {
621
- return GUMBO_INSERTION_MODE_IN_SELECT;
622
- }
623
- if (node_html_tag_is(ancestor, GUMBO_TAG_TABLE)) {
624
- return GUMBO_INSERTION_MODE_IN_SELECT_IN_TABLE;
625
- }
626
- }
627
- return GUMBO_INSERTION_MODE_IN_SELECT;
628
- }
629
- case GUMBO_TAG_TD:
630
- case GUMBO_TAG_TH:
631
- if (!is_last) return GUMBO_INSERTION_MODE_IN_CELL;
632
- break;
633
- case GUMBO_TAG_TR:
634
- return GUMBO_INSERTION_MODE_IN_ROW;
635
- case GUMBO_TAG_TBODY:
636
- case GUMBO_TAG_THEAD:
637
- case GUMBO_TAG_TFOOT:
638
- return GUMBO_INSERTION_MODE_IN_TABLE_BODY;
639
- case GUMBO_TAG_CAPTION:
640
- return GUMBO_INSERTION_MODE_IN_CAPTION;
641
- case GUMBO_TAG_COLGROUP:
642
- return GUMBO_INSERTION_MODE_IN_COLUMN_GROUP;
643
- case GUMBO_TAG_TABLE:
644
- return GUMBO_INSERTION_MODE_IN_TABLE;
645
- case GUMBO_TAG_TEMPLATE:
646
- return get_current_template_insertion_mode(parser);
647
- case GUMBO_TAG_HEAD:
648
- if (!is_last) return GUMBO_INSERTION_MODE_IN_HEAD;
649
- break;
650
- case GUMBO_TAG_BODY:
651
- return GUMBO_INSERTION_MODE_IN_BODY;
652
- case GUMBO_TAG_FRAMESET:
653
- return GUMBO_INSERTION_MODE_IN_FRAMESET;
654
- case GUMBO_TAG_HTML:
655
- return parser->_parser_state->_head_element ?
656
- GUMBO_INSERTION_MODE_AFTER_HEAD : GUMBO_INSERTION_MODE_BEFORE_HEAD;
657
- default:
658
- break;
659
- }
660
- return is_last ?
661
- GUMBO_INSERTION_MODE_IN_BODY : GUMBO_INSERTION_MODE_INITIAL;
562
+ if (node->v.element.tag_namespace == GUMBO_NAMESPACE_HTML) {
563
+ switch (node->v.element.tag) {
564
+ case GUMBO_TAG_SELECT:
565
+ return GUMBO_INSERTION_MODE_IN_SELECT;
566
+ case GUMBO_TAG_TD:
567
+ case GUMBO_TAG_TH:
568
+ return is_last ?
569
+ GUMBO_INSERTION_MODE_IN_BODY : GUMBO_INSERTION_MODE_IN_CELL;
570
+ case GUMBO_TAG_TR:
571
+ return GUMBO_INSERTION_MODE_IN_ROW;
572
+ case GUMBO_TAG_TBODY:
573
+ case GUMBO_TAG_THEAD:
574
+ case GUMBO_TAG_TFOOT:
575
+ return GUMBO_INSERTION_MODE_IN_TABLE_BODY;
576
+ case GUMBO_TAG_CAPTION:
577
+ return GUMBO_INSERTION_MODE_IN_CAPTION;
578
+ case GUMBO_TAG_COLGROUP:
579
+ return GUMBO_INSERTION_MODE_IN_COLUMN_GROUP;
580
+ case GUMBO_TAG_TABLE:
581
+ return GUMBO_INSERTION_MODE_IN_TABLE;
582
+ case GUMBO_TAG_HEAD:
583
+ case GUMBO_TAG_BODY:
584
+ return GUMBO_INSERTION_MODE_IN_BODY;
585
+ case GUMBO_TAG_FRAMESET:
586
+ return GUMBO_INSERTION_MODE_IN_FRAMESET;
587
+ case GUMBO_TAG_HTML:
588
+ return GUMBO_INSERTION_MODE_BEFORE_HEAD;
589
+ default:
590
+ break;
591
+ }
592
+ }
593
+ return is_last ? GUMBO_INSERTION_MODE_IN_BODY : GUMBO_INSERTION_MODE_INITIAL;
662
594
  }
663
595
 
664
-
665
596
  // This performs the actual "reset the insertion mode" loop.
666
597
  static void reset_insertion_mode_appropriately(GumboParser* parser) {
667
598
  const GumboVector* open_elements = &parser->_parser_state->_open_elements;
668
599
  for (int i = open_elements->length; --i >= 0; ) {
669
600
  GumboInsertionMode mode =
670
- get_appropriate_insertion_mode(parser, i);
601
+ get_appropriate_insertion_mode(open_elements->data[i], i == 0);
671
602
  if (mode != GUMBO_INSERTION_MODE_INITIAL) {
672
603
  set_insertion_mode(parser, mode);
673
604
  return;
@@ -701,7 +632,7 @@ static GumboError* parser_add_parse_error(GumboParser* parser, const GumboToken*
701
632
  &extra_data->tag_stack);
702
633
  for (int i = 0; i < state->_open_elements.length; ++i) {
703
634
  const GumboNode* node = state->_open_elements.data[i];
704
- assert(node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE);
635
+ assert(node->type == GUMBO_NODE_ELEMENT);
705
636
  gumbo_vector_add(parser, (void*) node->v.element.tag,
706
637
  &extra_data->tag_stack);
707
638
  }
@@ -738,7 +669,7 @@ static bool tag_is(const GumboToken* token, bool is_start, GumboTag tag) {
738
669
  // Like tag_in, but checks for the tag of a node, rather than a token.
739
670
  static bool node_tag_in_set(const GumboNode* node, const gumbo_tagset tags) {
740
671
  assert(node != NULL);
741
- if (node->type != GUMBO_NODE_ELEMENT && node->type != GUMBO_NODE_TEMPLATE) {
672
+ if (node->type != GUMBO_NODE_ELEMENT) {
742
673
  return false;
743
674
  }
744
675
  return TAGSET_INCLUDES(tags, node->v.element.tag_namespace, node->v.element.tag);
@@ -747,7 +678,7 @@ static bool node_tag_in_set(const GumboNode* node, const gumbo_tagset tags) {
747
678
 
748
679
  // Like node_tag_in, but for the single-tag case.
749
680
  static bool node_qualified_tag_is(const GumboNode* node, GumboNamespaceEnum ns, GumboTag tag) {
750
- return (node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE) &&
681
+ return node->type == GUMBO_NODE_ELEMENT &&
751
682
  node->v.element.tag == tag &&
752
683
  node->v.element.tag_namespace == ns;
753
684
  }
@@ -758,23 +689,6 @@ static bool node_html_tag_is(const GumboNode* node, GumboTag tag)
758
689
  return node_qualified_tag_is(node, GUMBO_NAMESPACE_HTML, tag);
759
690
  }
760
691
 
761
- static void push_template_insertion_mode(GumboParser* parser, GumboInsertionMode mode) {
762
- gumbo_vector_add(parser, (void*) mode, &parser->_parser_state->_template_insertion_modes);
763
- }
764
-
765
- static void pop_template_insertion_mode(GumboParser* parser) {
766
- gumbo_vector_pop(parser, &parser->_parser_state->_template_insertion_modes);
767
- }
768
-
769
- // Returns the current template insertion mode. If the stack of template
770
- // insertion modes is empty, this returns GUMBO_INSERTION_MODE_INITIAL.
771
- static GumboInsertionMode get_current_template_insertion_mode(const GumboParser* parser) {
772
- GumboVector* template_insertion_modes = &parser->_parser_state->_template_insertion_modes;
773
- if (template_insertion_modes->length == 0) {
774
- return GUMBO_INSERTION_MODE_INITIAL;
775
- }
776
- return (GumboInsertionMode) template_insertion_modes->data[(template_insertion_modes->length - 1)];
777
- }
778
692
 
779
693
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#mathml-text-integration-point
780
694
  static bool is_mathml_integration_point(const GumboNode* node) {
@@ -792,63 +706,6 @@ static bool is_html_integration_point(const GumboNode* node) {
792
706
  "encoding", "application/xhtml+xml")));
793
707
  }
794
708
 
795
-
796
- // This represents a place to insert a node, consisting of a target parent and a
797
- // child index within that parent. If the node should be inserted at the end of
798
- // the parent's child, index will be -1.
799
- typedef struct {
800
- GumboNode* target;
801
- int index;
802
- } InsertionLocation;
803
-
804
- InsertionLocation get_appropriate_insertion_location(GumboParser* parser, GumboNode* override_target) {
805
- InsertionLocation retval = { override_target, -1 };
806
- if (retval.target == NULL) {
807
- // No override target; default to the current node, but special-case the
808
- // root node since get_current_node() assumes the stack of open elements is
809
- // non-empty.
810
- retval.target = parser->_output->root != NULL ?
811
- get_current_node(parser) : get_document_node(parser);
812
- }
813
- if (!parser->_parser_state->_foster_parent_insertions ||
814
- !node_tag_in_set(retval.target, (gumbo_tagset) { TAG(TABLE), TAG(TBODY),
815
- TAG(TFOOT), TAG(THEAD), TAG(TR) })) {
816
- return retval;
817
- }
818
-
819
- // Foster-parenting case.
820
- int last_template_index = -1;
821
- int last_table_index = -1;
822
- GumboVector* open_elements = &parser->_parser_state->_open_elements;
823
- for (int i = 0; i < open_elements->length; ++i) {
824
- if (node_html_tag_is(open_elements->data[i], GUMBO_TAG_TEMPLATE)) {
825
- last_template_index = i;
826
- }
827
- if (node_html_tag_is(open_elements->data[i], GUMBO_TAG_TABLE)) {
828
- last_table_index = i;
829
- }
830
- }
831
- if (last_template_index != -1 &&
832
- (last_table_index == -1 || last_template_index > last_table_index)) {
833
- retval.target = open_elements->data[last_template_index];
834
- return retval;
835
- }
836
- if (last_table_index == -1) {
837
- retval.target = open_elements->data[0];
838
- return retval;
839
- }
840
- GumboNode* last_table = open_elements->data[last_table_index];
841
- if (last_table->parent != NULL) {
842
- retval.target = last_table->parent;
843
- retval.index = last_table->index_within_parent;
844
- return retval;
845
- }
846
-
847
- retval.target = open_elements->data[last_table_index - 1];
848
- return retval;
849
- }
850
-
851
-
852
709
  // Appends a node to the end of its parent, setting the "parent" and
853
710
  // "index_within_parent" fields appropriately.
854
711
  static void append_node(
@@ -856,7 +713,7 @@ static void append_node(
856
713
  assert(node->parent == NULL);
857
714
  assert(node->index_within_parent == -1);
858
715
  GumboVector* children;
859
- if (parent->type == GUMBO_NODE_ELEMENT || parent->type == GUMBO_NODE_TEMPLATE) {
716
+ if (parent->type == GUMBO_NODE_ELEMENT) {
860
717
  children = &parent->v.element.children;
861
718
  } else {
862
719
  assert(parent->type == GUMBO_NODE_DOCUMENT);
@@ -868,44 +725,66 @@ static void append_node(
868
725
  assert(node->index_within_parent < children->length);
869
726
  }
870
727
 
871
- // Inserts a node at the specified InsertionLocation, updating the
728
+ // Inserts a node at the specified index within its parent, updating the
872
729
  // "parent" and "index_within_parent" fields of it and all its siblings.
873
- // If the index of the location is -1, this calls append_node.
874
730
  static void insert_node(
875
- GumboParser* parser, GumboNode* node, InsertionLocation location) {
731
+ GumboParser* parser, GumboNode* parent, int index, GumboNode* node) {
876
732
  assert(node->parent == NULL);
877
733
  assert(node->index_within_parent == -1);
878
- GumboNode* parent = location.target;
879
- int index = location.index;
880
- if (index != -1) {
881
- GumboVector* children = NULL;
882
- if (parent->type == GUMBO_NODE_ELEMENT ||
883
- parent->type == GUMBO_NODE_TEMPLATE) {
884
- children = &parent->v.element.children;
885
- } else if (parent->type == GUMBO_NODE_DOCUMENT) {
886
- children = &parent->v.document.children;
887
- assert(children->length == 0);
888
- } else {
889
- assert(0);
890
- }
734
+ assert(parent->type == GUMBO_NODE_ELEMENT);
735
+ GumboVector* children = &parent->v.element.children;
736
+ assert(index >= 0);
737
+ assert(index < children->length);
738
+ node->parent = parent;
739
+ node->index_within_parent = index;
740
+ gumbo_vector_insert_at(parser, (void*) node, index, children);
741
+ assert(node->index_within_parent < children->length);
742
+ for (int i = index + 1; i < children->length; ++i) {
743
+ GumboNode* sibling = children->data[i];
744
+ sibling->index_within_parent = i;
745
+ assert(sibling->index_within_parent < children->length);
746
+ }
747
+ }
891
748
 
892
- assert(index >= 0);
893
- assert(index < children->length);
894
- node->parent = parent;
895
- node->index_within_parent = index;
896
- gumbo_vector_insert_at(parser, (void*) node, index, children);
897
- assert(node->index_within_parent < children->length);
898
- for (int i = index + 1; i < children->length; ++i) {
899
- GumboNode* sibling = children->data[i];
900
- sibling->index_within_parent = i;
901
- assert(sibling->index_within_parent < children->length);
749
+ // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#foster-parenting
750
+ static void foster_parent_element(GumboParser* parser, GumboNode* node) {
751
+ GumboVector* open_elements = &parser->_parser_state->_open_elements;
752
+ assert(open_elements->length > 2);
753
+
754
+ node->parse_flags |= GUMBO_INSERTION_FOSTER_PARENTED;
755
+ GumboNode* foster_parent_element = open_elements->data[0];
756
+ assert(foster_parent_element->type == GUMBO_NODE_ELEMENT);
757
+ assert(node_html_tag_is(foster_parent_element, GUMBO_TAG_HTML));
758
+ for (int i = open_elements->length; --i > 1; ) {
759
+ GumboNode* table_element = open_elements->data[i];
760
+ if (node_html_tag_is(table_element, GUMBO_TAG_TABLE)) {
761
+ foster_parent_element = table_element->parent;
762
+ if (!foster_parent_element ||
763
+ foster_parent_element->type != GUMBO_NODE_ELEMENT) {
764
+ // Table has no parent; spec says it's possible if a script manipulated
765
+ // the DOM, although I don't think we have to worry about this case.
766
+ gumbo_debug("Table has no parent.\n");
767
+ foster_parent_element = open_elements->data[i - 1];
768
+ break;
769
+ }
770
+ assert(foster_parent_element->type == GUMBO_NODE_ELEMENT);
771
+ gumbo_debug("Found enclosing table (%x) at %d; parent=%s, index=%d.\n",
772
+ table_element, i, gumbo_normalized_tagname(
773
+ foster_parent_element->v.element.tag),
774
+ table_element->index_within_parent);
775
+ assert(foster_parent_element->v.element.children.data[
776
+ table_element->index_within_parent] == table_element);
777
+ insert_node(parser, foster_parent_element,
778
+ table_element->index_within_parent, node);
779
+ return;
902
780
  }
903
- } else {
904
- append_node(parser, parent, node);
905
781
  }
782
+ if (node->type == GUMBO_NODE_ELEMENT) {
783
+ gumbo_vector_add(parser, (void*) node, open_elements);
784
+ }
785
+ append_node(parser, foster_parent_element, node);
906
786
  }
907
787
 
908
-
909
788
  static void maybe_flush_text_node_buffer(GumboParser* parser) {
910
789
  GumboParserState* state = parser->_parser_state;
911
790
  TextNodeBufferState* buffer_state = &state->_text_node;
@@ -925,20 +804,20 @@ static void maybe_flush_text_node_buffer(GumboParser* parser) {
925
804
  state->_current_token->original_text.data -
926
805
  buffer_state->_start_original_text;
927
806
  text_node_data->start_pos = buffer_state->_start_position;
928
-
929
- gumbo_debug("Flushing text node buffer of %.*s.\n",
930
- (int) buffer_state->_buffer.length, buffer_state->_buffer.data);
931
-
932
- InsertionLocation location = get_appropriate_insertion_location(parser, NULL);
933
- if (location.target->type == GUMBO_NODE_DOCUMENT) {
934
- // The DOM does not allow Document nodes to have Text children, so per the
935
- // spec, they are dropped on the floor.
936
- destroy_node(parser, text_node);
807
+ if (state->_foster_parent_insertions &&
808
+ node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(TABLE), TAG(TBODY), TAG(TFOOT),
809
+ TAG(THEAD), TAG(TR) })) {
810
+ foster_parent_element(parser, text_node);
937
811
  } else {
938
- insert_node(parser, text_node, location);
812
+ append_node(
813
+ parser, parser->_output->root ?
814
+ get_current_node(parser) : parser->_output->document, text_node);
939
815
  }
816
+ gumbo_debug("Flushing text node buffer of %.*s.\n",
817
+ (int) buffer_state->_buffer.length, buffer_state->_buffer.data);
940
818
 
941
- gumbo_string_buffer_clear(parser, &buffer_state->_buffer);
819
+ gumbo_string_buffer_destroy(parser, &buffer_state->_buffer);
820
+ gumbo_string_buffer_init(parser, &buffer_state->_buffer);
942
821
  buffer_state->_type = GUMBO_NODE_WHITESPACE;
943
822
  assert(buffer_state->_buffer.length == 0);
944
823
  }
@@ -965,7 +844,7 @@ static GumboNode* pop_current_node(GumboParser* parser) {
965
844
  assert(state->_open_elements.length == 0);
966
845
  return NULL;
967
846
  }
968
- assert(current_node->type == GUMBO_NODE_ELEMENT || current_node->type == GUMBO_NODE_TEMPLATE);
847
+ assert(current_node->type == GUMBO_NODE_ELEMENT);
969
848
  bool is_closed_body_or_html_tag =
970
849
  (node_html_tag_is(current_node, GUMBO_TAG_BODY) && state->_closed_body_tag) ||
971
850
  (node_html_tag_is(current_node, GUMBO_TAG_HTML) && state->_closed_html_tag);
@@ -994,14 +873,14 @@ static void append_comment_node(
994
873
 
995
874
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#clear-the-stack-back-to-a-table-row-context
996
875
  static void clear_stack_to_table_row_context(GumboParser* parser) {
997
- while (!node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(HTML), TAG(TR), TAG(TEMPLATE)})) {
876
+ while (!node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(HTML), TAG(TR) })) {
998
877
  pop_current_node(parser);
999
878
  }
1000
879
  }
1001
880
 
1002
881
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#clear-the-stack-back-to-a-table-context
1003
882
  static void clear_stack_to_table_context(GumboParser* parser) {
1004
- while (!node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(HTML), TAG(TABLE), TAG(TEMPLATE) } )) {
883
+ while (!node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(HTML), TAG(TABLE) } )) {
1005
884
  pop_current_node(parser);
1006
885
  }
1007
886
  }
@@ -1009,7 +888,7 @@ static void clear_stack_to_table_context(GumboParser* parser) {
1009
888
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#clear-the-stack-back-to-a-table-body-context
1010
889
  void clear_stack_to_table_body_context(GumboParser* parser) {
1011
890
  while (!node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(HTML), TAG(TBODY),
1012
- TAG(TFOOT), TAG(THEAD), TAG(TEMPLATE) })) {
891
+ TAG(TFOOT), TAG(THEAD) })) {
1013
892
  pop_current_node(parser);
1014
893
  }
1015
894
  }
@@ -1024,8 +903,7 @@ static GumboNode* create_element(GumboParser* parser, GumboTag tag) {
1024
903
  element->tag_namespace = GUMBO_NAMESPACE_HTML;
1025
904
  element->original_tag = kGumboEmptyString;
1026
905
  element->original_end_tag = kGumboEmptyString;
1027
- element->start_pos = (parser->_parser_state->_current_token) ?
1028
- parser->_parser_state->_current_token->position : kGumboEmptySourcePosition;
906
+ element->start_pos = parser->_parser_state->_current_token->position;
1029
907
  element->end_pos = kGumboEmptySourcePosition;
1030
908
  return node;
1031
909
  }
@@ -1036,12 +914,7 @@ static GumboNode* create_element_from_token(
1036
914
  assert(token->type == GUMBO_TOKEN_START_TAG);
1037
915
  GumboTokenStartTag* start_tag = &token->v.start_tag;
1038
916
 
1039
- GumboNodeType type = (
1040
- tag_namespace == GUMBO_NAMESPACE_HTML &&
1041
- start_tag->tag == GUMBO_TAG_TEMPLATE)
1042
- ? GUMBO_NODE_TEMPLATE : GUMBO_NODE_ELEMENT;
1043
-
1044
- GumboNode* node = create_node(parser, type);
917
+ GumboNode* node = create_node(parser, GUMBO_NODE_ELEMENT);
1045
918
  GumboElement* element = &node->v.element;
1046
919
  gumbo_vector_init(parser, 1, &element->children);
1047
920
  element->attributes = start_tag->attributes;
@@ -1078,9 +951,20 @@ static void insert_element(GumboParser* parser, GumboNode* node,
1078
951
  if (!is_reconstructing_formatting_elements) {
1079
952
  maybe_flush_text_node_buffer(parser);
1080
953
  }
1081
- InsertionLocation location =
1082
- get_appropriate_insertion_location(parser, NULL);
1083
- insert_node(parser, node, location);
954
+ if (state->_foster_parent_insertions &&
955
+ node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(TABLE), TAG(TBODY), TAG(TFOOT),
956
+ TAG(THEAD), TAG(TR) } )) {
957
+ foster_parent_element(parser, node);
958
+ gumbo_vector_add(parser, (void*) node, &state->_open_elements);
959
+ return;
960
+ }
961
+
962
+ // This is called to insert the root HTML element, but get_current_node
963
+ // assumes the stack of open elements is non-empty, so we need special
964
+ // handling for this case.
965
+ append_node(
966
+ parser, parser->_output->root ?
967
+ get_current_node(parser) : parser->_output->document, node);
1084
968
  gumbo_vector_add(parser, (void*) node, &state->_open_elements);
1085
969
  }
1086
970
 
@@ -1253,7 +1137,7 @@ static bool is_open_element(GumboParser* parser, const GumboNode* node) {
1253
1137
  // values are fresh copies.
1254
1138
  GumboNode* clone_node(
1255
1139
  GumboParser* parser, const GumboNode* node, GumboParseFlags reason) {
1256
- assert(node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE);
1140
+ assert(node->type == GUMBO_NODE_ELEMENT);
1257
1141
  GumboNode* new_node = gumbo_parser_allocate(parser, sizeof(GumboNode));
1258
1142
  *new_node = *node;
1259
1143
  new_node->parent = NULL;
@@ -1323,10 +1207,7 @@ static void reconstruct_active_formatting_elements(GumboParser* parser) {
1323
1207
  GumboNode* clone = clone_node(
1324
1208
  parser, element, GUMBO_INSERTION_RECONSTRUCTED_FORMATTING_ELEMENT);
1325
1209
  // Step 9.
1326
- InsertionLocation location = get_appropriate_insertion_location(parser, NULL);
1327
- insert_node(parser, clone, location);
1328
- gumbo_vector_add(parser, (void*) clone, &parser->_parser_state->_open_elements);
1329
-
1210
+ insert_element(parser, clone, true);
1330
1211
  // Step 10.
1331
1212
  elements->data[i] = clone;
1332
1213
  gumbo_debug("Reconstructed %s element at %d.\n",
@@ -1380,40 +1261,37 @@ static GumboQuirksModeEnum compute_quirks_mode(
1380
1261
  // names. For example, "has an element in list scope" looks for an element of
1381
1262
  // the given qualified name within the nearest enclosing <ol> or <ul>, along
1382
1263
  // with a bunch of generic element types that serve to "firewall" their content
1383
- // from the rest of the document. Note that because of the way the spec is written,
1384
- // all elements are expected to be in the HTML namespace
1385
- static bool has_an_element_in_specific_scope(GumboParser* parser,
1386
- int expected_size, const GumboTag *expected, bool negate, const gumbo_tagset tags) {
1264
+ // from the rest of the document.
1265
+ static bool has_an_element_in_specific_scope(GumboParser* parser, gumbo_tagset expected, bool negate, const gumbo_tagset tags) {
1387
1266
  GumboVector* open_elements = &parser->_parser_state->_open_elements;
1267
+ bool result = false;
1388
1268
  for (int i = open_elements->length; --i >= 0; ) {
1389
1269
  const GumboNode* node = open_elements->data[i];
1390
- if (node->type != GUMBO_NODE_ELEMENT && node->type != GUMBO_NODE_TEMPLATE)
1270
+ if (node->type != GUMBO_NODE_ELEMENT) {
1391
1271
  continue;
1392
-
1393
- GumboTag node_tag = node->v.element.tag;
1394
- GumboNamespaceEnum node_ns = node->v.element.tag_namespace;
1395
- for (int j = 0; j < expected_size; ++j) {
1396
- if (node_tag == expected[j] && node_ns == GUMBO_NAMESPACE_HTML)
1397
- return true;
1398
1272
  }
1399
-
1400
- bool found = TAGSET_INCLUDES(tags, node_ns, node_tag);
1401
- if (negate != found)
1402
- return false;
1273
+ if (TAGSET_INCLUDES(expected, node->v.element.tag_namespace, node->v.element.tag)) {
1274
+ return true;
1275
+ }
1276
+ bool found_qualname = false;
1277
+ if (TAGSET_INCLUDES(tags, node->v.element.tag_namespace, node->v.element.tag)) {
1278
+ found_qualname = true;
1279
+ }
1280
+ if (negate != found_qualname) {
1281
+ result = false;
1282
+ return result;
1283
+ }
1403
1284
  }
1404
- return false;
1405
- }
1406
-
1407
- // Checks for the presence of an open element of the specified tag type.
1408
- static bool has_open_element(GumboParser* parser, GumboTag tag) {
1409
- return has_an_element_in_specific_scope(parser, 1, &tag, false, (gumbo_tagset) { TAG(HTML) } );
1285
+ return result;
1410
1286
  }
1411
1287
 
1412
1288
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-scope
1413
1289
  static bool has_an_element_in_scope(GumboParser* parser, GumboTag tag) {
1414
- return has_an_element_in_specific_scope(parser, 1, &tag, false, (gumbo_tagset) { TAG(APPLET),
1290
+ gumbo_tagset qualset = {0};
1291
+ qualset[(int) tag] = (1 << (int) GUMBO_NAMESPACE_HTML);
1292
+ return has_an_element_in_specific_scope(parser, qualset, false, (gumbo_tagset) { TAG(APPLET),
1415
1293
  TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD), TAG(TH), TAG(MARQUEE),
1416
- TAG(OBJECT), TAG(TEMPLATE), TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
1294
+ TAG(OBJECT), TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
1417
1295
  TAG_MATHML(MS), TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
1418
1296
  TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC), TAG_SVG(TITLE) });
1419
1297
  }
@@ -1431,11 +1309,11 @@ static bool has_node_in_scope(GumboParser* parser, const GumboNode* node) {
1431
1309
  if (current == node) {
1432
1310
  return true;
1433
1311
  }
1434
- if (current->type != GUMBO_NODE_ELEMENT && current->type != GUMBO_NODE_TEMPLATE) {
1312
+ if (current->type != GUMBO_NODE_ELEMENT) {
1435
1313
  continue;
1436
1314
  }
1437
1315
  if (node_tag_in_set(current, (gumbo_tagset) { TAG(APPLET), TAG(CAPTION), TAG(HTML),
1438
- TAG(TABLE), TAG(TD), TAG(TH), TAG(MARQUEE), TAG(OBJECT), TAG(TEMPLATE),
1316
+ TAG(TABLE), TAG(TD), TAG(TH), TAG(MARQUEE), TAG(OBJECT),
1439
1317
  TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS),
1440
1318
  TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML), TAG_SVG(FOREIGNOBJECT),
1441
1319
  TAG_SVG(DESC), TAG_SVG(TITLE) } )) {
@@ -1448,19 +1326,21 @@ static bool has_node_in_scope(GumboParser* parser, const GumboNode* node) {
1448
1326
 
1449
1327
  // Like has_an_element_in_scope, but restricts the expected qualified name to a
1450
1328
  // range of possible qualified names instead of just a single one.
1451
- static bool has_an_element_in_scope_with_tagname(GumboParser* parser, int expected_len, const GumboTag expected[]) {
1452
- return has_an_element_in_specific_scope(parser, expected_len, expected, false, (gumbo_tagset) {
1453
- TAG(APPLET), TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD), TAG(TH), TAG(MARQUEE),
1454
- TAG(OBJECT), TAG(TEMPLATE), TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
1455
- TAG_MATHML(MS), TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
1456
- TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC), TAG_SVG(TITLE) });
1329
+ static bool has_an_element_in_scope_with_tagname(GumboParser* parser, gumbo_tagset qualset) {
1330
+ return has_an_element_in_specific_scope(parser, qualset, false, (gumbo_tagset) { TAG(APPLET),
1331
+ TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD), TAG(TH), TAG(MARQUEE),
1332
+ TAG(OBJECT), TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
1333
+ TAG_MATHML(MS), TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
1334
+ TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC), TAG_SVG(TITLE) });
1457
1335
  }
1458
1336
 
1459
1337
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-list-item-scope
1460
1338
  static bool has_an_element_in_list_scope(GumboParser* parser, GumboTag tag) {
1461
- return has_an_element_in_specific_scope(parser, 1, &tag, false, (gumbo_tagset) { TAG(APPLET),
1339
+ gumbo_tagset qualset = {0};
1340
+ qualset[(int)tag] = (1 << (int)(GUMBO_NAMESPACE_HTML));
1341
+ return has_an_element_in_specific_scope(parser, qualset, false, (gumbo_tagset) { TAG(APPLET),
1462
1342
  TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD), TAG(TH), TAG(MARQUEE),
1463
- TAG(OBJECT), TAG(TEMPLATE), TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
1343
+ TAG(OBJECT), TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
1464
1344
  TAG_MATHML(MS), TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
1465
1345
  TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC), TAG_SVG(TITLE), TAG(OL),
1466
1346
  TAG(UL) });
@@ -1468,22 +1348,27 @@ static bool has_an_element_in_list_scope(GumboParser* parser, GumboTag tag) {
1468
1348
 
1469
1349
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-button-scope
1470
1350
  static bool has_an_element_in_button_scope(GumboParser* parser, GumboTag tag) {
1471
- return has_an_element_in_specific_scope(parser, 1, &tag, false, (gumbo_tagset) { TAG(APPLET),
1351
+ gumbo_tagset qualset = {0};
1352
+ qualset[(int) tag] = (1 << (int)(GUMBO_NAMESPACE_HTML));
1353
+ return has_an_element_in_specific_scope(parser, qualset, false, (gumbo_tagset) { TAG(APPLET),
1472
1354
  TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD), TAG(TH), TAG(MARQUEE),
1473
- TAG(OBJECT), TAG(TEMPLATE), TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
1355
+ TAG(OBJECT), TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
1474
1356
  TAG_MATHML(MS), TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
1475
1357
  TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC), TAG_SVG(TITLE), TAG(BUTTON) });
1476
1358
  }
1477
1359
 
1478
1360
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-table-scope
1479
1361
  static bool has_an_element_in_table_scope(GumboParser* parser, GumboTag tag) {
1480
- return has_an_element_in_specific_scope(parser, 1, &tag, false, (gumbo_tagset) { TAG(HTML),
1481
- TAG(TABLE), TAG(TEMPLATE) });
1362
+ gumbo_tagset qualset = {0};
1363
+ qualset[(int) tag] = (1 << (int)(GUMBO_NAMESPACE_HTML));
1364
+ return has_an_element_in_specific_scope(parser, qualset, false, (gumbo_tagset) { TAG(HTML), TAG(TABLE) });
1482
1365
  }
1483
1366
 
1484
1367
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-select-scope
1485
1368
  static bool has_an_element_in_select_scope(GumboParser* parser, GumboTag tag) {
1486
- return has_an_element_in_specific_scope(parser, 1, &tag, true, (gumbo_tagset) { TAG(OPTGROUP), TAG(OPTION) });
1369
+ gumbo_tagset qualset = {0};
1370
+ qualset[(int) tag] = (1 << (int)(GUMBO_NAMESPACE_HTML));
1371
+ return has_an_element_in_specific_scope(parser, qualset, true, (gumbo_tagset) { TAG(OPTGROUP), TAG(OPTION) });
1487
1372
  }
1488
1373
 
1489
1374
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#generate-implied-end-tags
@@ -1491,24 +1376,12 @@ static bool has_an_element_in_select_scope(GumboParser* parser, GumboTag tag) {
1491
1376
  // Pass GUMBO_TAG_LAST to not exclude any of them.
1492
1377
  static void generate_implied_end_tags(GumboParser* parser, GumboTag exception) {
1493
1378
  for (;
1494
- node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(DD),
1495
- TAG(DT), TAG(LI), TAG(OPTION), TAG(OPTGROUP), TAG(P), TAG(RP), TAG(RB),
1496
- TAG(RT), TAG(RTC) }) &&
1379
+ node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(DD), TAG(DT),
1380
+ TAG(LI), TAG(OPTION), TAG(OPTGROUP), TAG(P), TAG(RP), TAG(RT) }) &&
1497
1381
  !node_html_tag_is(get_current_node(parser), exception);
1498
1382
  pop_current_node(parser));
1499
1383
  }
1500
1384
 
1501
- // This is the "generate all implied end tags thoroughly" clause of the spec.
1502
- // https://html.spec.whatwg.org/multipage/syntax.html#closing-elements-that-have-implied-end-tags
1503
- static void generate_all_implied_end_tags_thoroughly(GumboParser* parser) {
1504
- for (;
1505
- node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(CAPTION),
1506
- TAG(COLGROUP), TAG(DD), TAG(DT), TAG(LI), TAG(OPTION), TAG(OPTGROUP),
1507
- TAG(P), TAG(RP), TAG(RT), TAG(RTC), TAG(TBODY), TAG(TD),
1508
- TAG(TFOOT), TAG(TH), TAG(HEAD), TAG(TR) });
1509
- pop_current_node(parser));
1510
- }
1511
-
1512
1385
  // This factors out the clauses relating to "act as if an end tag token with tag
1513
1386
  // name "table" had been seen. Returns true if there's a table element in table
1514
1387
  // scope which was successfully closed, false if not and the token should be
@@ -1573,7 +1446,7 @@ static void close_current_select(GumboParser* parser) {
1573
1446
  // The list of nodes in the "special" category:
1574
1447
  // http://www.whatwg.org/specs/web-apps/current-work/complete/parsing.html#special
1575
1448
  static bool is_special_node(const GumboNode* node) {
1576
- assert(node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE);
1449
+ assert(node->type == GUMBO_NODE_ELEMENT);
1577
1450
  return node_tag_in_set(node, (gumbo_tagset) { TAG(ADDRESS), TAG(APPLET), TAG(AREA),
1578
1451
  TAG(ARTICLE), TAG(ASIDE), TAG(BASE), TAG(BASEFONT), TAG(BGSOUND), TAG(BLOCKQUOTE),
1579
1452
  TAG(BODY), TAG(BR), TAG(BUTTON), TAG(CAPTION), TAG(CENTER), TAG(COL),
@@ -1585,8 +1458,8 @@ static bool is_special_node(const GumboNode* node) {
1585
1458
  TAG(LISTING), TAG(MARQUEE), TAG(MENU), TAG(META), TAG(NAV), TAG(NOEMBED),
1586
1459
  TAG(NOFRAMES), TAG(NOSCRIPT), TAG(OBJECT), TAG(OL), TAG(P), TAG(PARAM),
1587
1460
  TAG(PLAINTEXT), TAG(PRE), TAG(SCRIPT), TAG(SECTION), TAG(SELECT), TAG(STYLE),
1588
- TAG(SUMMARY), TAG(TABLE), TAG(TBODY), TAG(TD), TAG(TEMPLATE), TAG(TEXTAREA),
1589
- TAG(TFOOT), TAG(TH), TAG(THEAD), TAG(TITLE), TAG(TR), TAG(UL), TAG(WBR), TAG(XMP),
1461
+ TAG(SUMMARY), TAG(TABLE), TAG(TBODY), TAG(TD), TAG(TEXTAREA), TAG(TFOOT),
1462
+ TAG(TH), TAG(THEAD), TAG(TITLE), TAG(TR), TAG(UL), TAG(WBR), TAG(XMP),
1590
1463
 
1591
1464
  TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS),
1592
1465
  TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
@@ -1796,20 +1669,13 @@ static void remove_from_parent(GumboParser* parser, GumboNode* node) {
1796
1669
 
1797
1670
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#an-introduction-to-error-handling-and-strange-cases-in-the-parser
1798
1671
  // Also described in the "in body" handling for end formatting tags.
1799
- static bool adoption_agency_algorithm(GumboParser* parser, GumboToken* token, GumboTag subject) {
1672
+ static bool adoption_agency_algorithm(
1673
+ GumboParser* parser, GumboToken* token, GumboTag closing_tag) {
1800
1674
  GumboParserState* state = parser->_parser_state;
1801
1675
  gumbo_debug("Entering adoption agency algorithm.\n");
1802
- // Step 1.
1803
- GumboNode* current_node = get_current_node(parser);
1804
- if (current_node->v.element.tag_namespace == GUMBO_NAMESPACE_HTML &&
1805
- current_node->v.element.tag == subject &&
1806
- gumbo_vector_index_of(&state->_active_formatting_elements, current_node) == -1) {
1807
- pop_current_node(parser);
1808
- return false;
1809
- }
1810
- // Steps 2-4 & 20:
1676
+ // Steps 1-3 & 16:
1811
1677
  for (int i = 0; i < 8; ++i) {
1812
- // Step 5.
1678
+ // Step 4.
1813
1679
  GumboNode* formatting_node = NULL;
1814
1680
  int formatting_node_in_open_elements = -1;
1815
1681
  for (int j = state->_active_formatting_elements.length; --j >= 0; ) {
@@ -1819,13 +1685,13 @@ static bool adoption_agency_algorithm(GumboParser* parser, GumboToken* token, Gu
1819
1685
  // Last scope marker; abort the algorithm.
1820
1686
  return false;
1821
1687
  }
1822
- if (node_html_tag_is(current_node, subject)) {
1688
+ if (current_node->type == GUMBO_NODE_ELEMENT && current_node->v.element.tag == closing_tag) {
1823
1689
  // Found it.
1824
1690
  formatting_node = current_node;
1825
1691
  formatting_node_in_open_elements = gumbo_vector_index_of(
1826
- &state->_open_elements, formatting_node);
1692
+ &state->_open_elements, formatting_node);
1827
1693
  gumbo_debug("Formatting element of tag %s at %d.\n",
1828
- gumbo_normalized_tagname(subject),
1694
+ gumbo_normalized_tagname(closing_tag),
1829
1695
  formatting_node_in_open_elements);
1830
1696
  break;
1831
1697
  }
@@ -1838,23 +1704,18 @@ static bool adoption_agency_algorithm(GumboParser* parser, GumboToken* token, Gu
1838
1704
  return false;
1839
1705
  }
1840
1706
 
1841
- // Step 6
1842
1707
  if (formatting_node_in_open_elements == -1) {
1843
1708
  gumbo_debug("Formatting node not on stack of open elements.\n");
1844
- parser_add_parse_error(parser, token);
1845
1709
  gumbo_vector_remove(parser, formatting_node,
1846
1710
  &state->_active_formatting_elements);
1847
1711
  return false;
1848
1712
  }
1849
1713
 
1850
- // Step 7
1851
1714
  if (!has_an_element_in_scope(parser, formatting_node->v.element.tag)) {
1852
1715
  parser_add_parse_error(parser, token);
1853
1716
  gumbo_debug("Element not in scope.\n");
1854
1717
  return false;
1855
1718
  }
1856
-
1857
- // Step 8
1858
1719
  if (formatting_node != get_current_node(parser)) {
1859
1720
  parser_add_parse_error(parser, token); // But continue onwards.
1860
1721
  }
@@ -1862,20 +1723,20 @@ static bool adoption_agency_algorithm(GumboParser* parser, GumboToken* token, Gu
1862
1723
  assert(!node_html_tag_is(formatting_node, GUMBO_TAG_HTML));
1863
1724
  assert(!node_html_tag_is(formatting_node, GUMBO_TAG_BODY));
1864
1725
 
1865
- // Step 9 & 10
1726
+ // Step 5 & 6.
1866
1727
  GumboNode* furthest_block = NULL;
1867
1728
  for (int j = formatting_node_in_open_elements;
1868
1729
  j < state->_open_elements.length; ++j) {
1869
1730
  assert(j > 0);
1870
1731
  GumboNode* current = state->_open_elements.data[j];
1871
1732
  if (is_special_node(current)) {
1872
- // Step 9.
1733
+ // Step 5.
1873
1734
  furthest_block = current;
1874
1735
  break;
1875
1736
  }
1876
1737
  }
1877
1738
  if (!furthest_block) {
1878
- // Step 10.
1739
+ // Step 6.
1879
1740
  while (get_current_node(parser) != formatting_node) {
1880
1741
  pop_current_node(parser);
1881
1742
  }
@@ -1888,35 +1749,32 @@ static bool adoption_agency_algorithm(GumboParser* parser, GumboToken* token, Gu
1888
1749
  assert(!node_html_tag_is(furthest_block, GUMBO_TAG_HTML));
1889
1750
  assert(furthest_block);
1890
1751
 
1891
- // Step 11.
1752
+ // Step 7.
1892
1753
  // Elements may be moved and reparented by this algorithm, so
1893
1754
  // common_ancestor is not necessarily the same as formatting_node->parent.
1894
1755
  GumboNode* common_ancestor =
1895
- state->_open_elements.data[gumbo_vector_index_of(
1896
- &state->_open_elements, formatting_node) - 1];
1756
+ state->_open_elements.data[gumbo_vector_index_of(
1757
+ &state->_open_elements, formatting_node) - 1];
1897
1758
  gumbo_debug("Common ancestor tag = %s, furthest block tag = %s.\n",
1898
1759
  gumbo_normalized_tagname(common_ancestor->v.element.tag),
1899
1760
  gumbo_normalized_tagname(furthest_block->v.element.tag));
1900
1761
 
1901
- // Step 12.
1762
+ // Step 8.
1902
1763
  int bookmark = gumbo_vector_index_of(
1903
- &state->_active_formatting_elements, formatting_node) + 1;
1904
- gumbo_debug("Bookmark at %d.\n", bookmark);
1905
- // Step 13.
1764
+ &state->_active_formatting_elements, formatting_node);;
1765
+ // Step 9.
1906
1766
  GumboNode* node = furthest_block;
1907
1767
  GumboNode* last_node = furthest_block;
1908
1768
  // Must be stored explicitly, in case node is removed from the stack of open
1909
1769
  // elements, to handle step 9.4.
1910
1770
  int saved_node_index = gumbo_vector_index_of(&state->_open_elements, node);
1911
1771
  assert(saved_node_index > 0);
1912
- // Step 13.1.
1913
- for (int j = 0;;) {
1914
- // Step 13.2.
1915
- ++j;
1916
- // Step 13.3.
1772
+ // Step 9.1-9.3 & 9.11.
1773
+ for (int j = 0; j < 3; ++j) {
1774
+ // Step 9.4.
1917
1775
  int node_index = gumbo_vector_index_of(&state->_open_elements, node);
1918
1776
  gumbo_debug(
1919
- "Current index: %d, last index: %d.\n", node_index, saved_node_index);
1777
+ "Current index: %d, last index: %d.\n", node_index, saved_node_index);
1920
1778
  if (node_index == -1) {
1921
1779
  node_index = saved_node_index;
1922
1780
  }
@@ -1925,78 +1783,61 @@ static bool adoption_agency_algorithm(GumboParser* parser, GumboToken* token, Gu
1925
1783
  assert(node_index < state->_open_elements.capacity);
1926
1784
  node = state->_open_elements.data[node_index];
1927
1785
  assert(node->parent);
1928
- if (node == formatting_node) {
1929
- // Step 13.4.
1930
- break;
1931
- }
1932
- int formatting_index =
1933
- gumbo_vector_index_of(&state->_active_formatting_elements, node);
1934
- if (j > 3 && formatting_index != -1) {
1935
- // Step 13.5.
1936
- gumbo_debug(
1937
- "Removing formatting element at %d.\n", formatting_index);
1938
- gumbo_vector_remove_at(
1939
- parser,
1940
- formatting_index,
1941
- &state->_active_formatting_elements);
1942
- // Removing the element shifts all indices over by one, so we may need
1943
- // to move the bookmark.
1944
- if (formatting_index < bookmark) {
1945
- --bookmark;
1946
- gumbo_debug("Moving bookmark to %d.\n", bookmark);
1947
- }
1948
- continue;
1949
- }
1950
- if (formatting_index == -1) {
1951
- // Step 13.6.
1786
+ // Step 9.5.
1787
+ if (gumbo_vector_index_of(
1788
+ &state->_active_formatting_elements, node) == -1) {
1952
1789
  gumbo_vector_remove_at(parser, node_index, &state->_open_elements);
1953
1790
  continue;
1791
+ } else if (node == formatting_node) {
1792
+ // Step 9.6.
1793
+ break;
1954
1794
  }
1955
- // Step 13.7.
1956
- // "common ancestor as the intended parent" doesn't actually mean insert
1957
- // it into the common ancestor; that happens below.
1795
+ // Step 9.7.
1796
+ int formatting_index = gumbo_vector_index_of(
1797
+ &state->_active_formatting_elements, node);
1958
1798
  node = clone_node(parser, node, GUMBO_INSERTION_ADOPTION_AGENCY_CLONED);
1959
- assert(formatting_index >= 0);
1960
1799
  state->_active_formatting_elements.data[formatting_index] = node;
1961
- assert(node_index >= 0);
1962
1800
  state->_open_elements.data[node_index] = node;
1963
- // Step 13.8.
1801
+ // Step 9.8.
1964
1802
  if (last_node == furthest_block) {
1965
1803
  bookmark = formatting_index + 1;
1966
- gumbo_debug("Bookmark moved to %d.\n", bookmark);
1967
1804
  assert(bookmark <= state->_active_formatting_elements.length);
1968
1805
  }
1969
- // Step 13.9.
1806
+ // Step 9.9.
1970
1807
  last_node->parse_flags |= GUMBO_INSERTION_ADOPTION_AGENCY_MOVED;
1971
1808
  remove_from_parent(parser, last_node);
1972
1809
  append_node(parser, node, last_node);
1973
- // Step 13.10.
1810
+ // Step 9.10.
1974
1811
  last_node = node;
1975
- } // Step 13.11.
1812
+ }
1976
1813
 
1977
- // Step 14.
1814
+ // Step 10.
1978
1815
  gumbo_debug("Removing %s node from parent ",
1979
1816
  gumbo_normalized_tagname(last_node->v.element.tag));
1980
1817
  remove_from_parent(parser, last_node);
1981
1818
  last_node->parse_flags |= GUMBO_INSERTION_ADOPTION_AGENCY_MOVED;
1982
- InsertionLocation location =
1983
- get_appropriate_insertion_location(parser, common_ancestor);
1984
- gumbo_debug("and inserting it into %s.\n",
1985
- gumbo_normalized_tagname(location.target->v.element.tag));
1986
- insert_node(parser, last_node, location);
1819
+ if (node_tag_in_set(common_ancestor, (gumbo_tagset) { TAG(TABLE), TAG(TBODY),
1820
+ TAG(TFOOT), TAG(THEAD), TAG(TR) })) {
1821
+ gumbo_debug("and foster-parenting it.\n");
1822
+ foster_parent_element(parser, last_node);
1823
+ } else {
1824
+ gumbo_debug("and inserting it into %s.\n",
1825
+ gumbo_normalized_tagname(common_ancestor->v.element.tag));
1826
+ append_node(parser, common_ancestor, last_node);
1827
+ }
1987
1828
 
1988
- // Step 15.
1829
+ // Step 11.
1989
1830
  GumboNode* new_formatting_node = clone_node(
1990
- parser, formatting_node, GUMBO_INSERTION_ADOPTION_AGENCY_CLONED);
1831
+ parser, formatting_node, GUMBO_INSERTION_ADOPTION_AGENCY_CLONED);
1991
1832
  formatting_node->parse_flags |= GUMBO_INSERTION_IMPLICIT_END_TAG;
1992
1833
 
1993
- // Step 16. Instead of appending nodes one-by-one, we swap the children
1834
+ // Step 12. Instead of appending nodes one-by-one, we swap the children
1994
1835
  // vector of furthest_block with the empty children of new_formatting_node,
1995
1836
  // reducing memory traffic and allocations. We still have to reset their
1996
1837
  // parent pointers, though.
1997
1838
  GumboVector temp = new_formatting_node->v.element.children;
1998
1839
  new_formatting_node->v.element.children =
1999
- furthest_block->v.element.children;
1840
+ furthest_block->v.element.children;
2000
1841
  furthest_block->v.element.children = temp;
2001
1842
 
2002
1843
  temp = new_formatting_node->v.element.children;
@@ -2005,39 +1846,36 @@ static bool adoption_agency_algorithm(GumboParser* parser, GumboToken* token, Gu
2005
1846
  child->parent = new_formatting_node;
2006
1847
  }
2007
1848
 
2008
- // Step 17.
1849
+ // Step 13.
2009
1850
  append_node(parser, furthest_block, new_formatting_node);
2010
1851
 
2011
- // Step 18.
1852
+ // Step 14.
2012
1853
  // If the formatting node was before the bookmark, it may shift over all
2013
1854
  // indices after it, so we need to explicitly find the index and possibly
2014
1855
  // adjust the bookmark.
2015
1856
  int formatting_node_index = gumbo_vector_index_of(
2016
- &state->_active_formatting_elements, formatting_node);
1857
+ &state->_active_formatting_elements, formatting_node);
2017
1858
  assert(formatting_node_index != -1);
2018
1859
  if (formatting_node_index < bookmark) {
2019
- gumbo_debug(
2020
- "Formatting node at %d is before bookmark at %d; decrementing.\n",
2021
- formatting_node_index, bookmark);
2022
1860
  --bookmark;
2023
1861
  }
2024
1862
  gumbo_vector_remove_at(
2025
- parser, formatting_node_index, &state->_active_formatting_elements);
1863
+ parser, formatting_node_index, &state->_active_formatting_elements);
2026
1864
  assert(bookmark >= 0);
2027
1865
  assert(bookmark <= state->_active_formatting_elements.length);
2028
1866
  gumbo_vector_insert_at(parser, new_formatting_node, bookmark,
2029
1867
  &state->_active_formatting_elements);
2030
1868
 
2031
- // Step 19.
1869
+ // Step 15.
2032
1870
  gumbo_vector_remove(
2033
- parser, formatting_node, &state->_open_elements);
1871
+ parser, formatting_node, &state->_open_elements);
2034
1872
  int insert_at = gumbo_vector_index_of(
2035
- &state->_open_elements, furthest_block) + 1;
1873
+ &state->_open_elements, furthest_block) + 1;
2036
1874
  assert(insert_at >= 0);
2037
1875
  assert(insert_at <= state->_open_elements.length);
2038
1876
  gumbo_vector_insert_at(
2039
- parser, new_formatting_node, insert_at, &state->_open_elements);
2040
- } // Step 20.
1877
+ parser, new_formatting_node, insert_at, &state->_open_elements);
1878
+ }
2041
1879
  return true;
2042
1880
  }
2043
1881
 
@@ -2216,45 +2054,29 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
2216
2054
  assert(node_html_tag_is(head, GUMBO_TAG_HEAD));
2217
2055
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_HEAD);
2218
2056
  return true;
2219
- } else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(BODY), TAG(HTML), TAG(BR) })) {
2220
- pop_current_node(parser);
2221
- set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_HEAD);
2222
- parser->_parser_state->_reprocess_current_token = true;
2223
- return true;
2224
- } else if (tag_is(token, kStartTag, GUMBO_TAG_TEMPLATE)) {
2225
- insert_element_from_token(parser, token);
2226
- add_formatting_element(parser, &kActiveFormattingScopeMarker);
2227
- parser->_parser_state->_frameset_ok = false;
2228
- set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TEMPLATE);
2229
- push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TEMPLATE);
2230
- return true;
2231
- } else if (tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
2232
- if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2233
- parser_add_parse_error(parser, token);
2234
- ignore_token(parser);
2235
- return false;
2236
- }
2237
- generate_all_implied_end_tags_thoroughly(parser);
2238
- bool success = true;
2239
- if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_TEMPLATE)) {
2240
- parser_add_parse_error(parser, token);
2241
- success = false;
2242
- }
2243
- while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_TEMPLATE));
2244
- clear_active_formatting_elements(parser);
2245
- pop_template_insertion_mode(parser);
2246
- reset_insertion_mode_appropriately(parser);
2247
- return success;
2248
- } else if (tag_is(token, kStartTag, GUMBO_TAG_HEAD) || (token->type == GUMBO_TOKEN_END_TAG)) {
2057
+ } else if (tag_is(token, kStartTag, GUMBO_TAG_HEAD)) {
2058
+ parser_add_parse_error(parser, token);
2059
+ ignore_token(parser);
2060
+ return false;
2061
+ } else if (tag_is(token, kStartTag, GUMBO_TAG_HEAD) ||
2062
+ (token->type == GUMBO_TOKEN_END_TAG &&
2063
+ !tag_in(token, kEndTag, (gumbo_tagset) { TAG(BODY), TAG(HTML),
2064
+ TAG(BR) }))) {
2065
+ parser_add_parse_error(parser, token);
2066
+ return false;
2067
+ } else if (tag_is(token, kStartTag, GUMBO_TAG_UNKNOWN) && token->v.start_tag.is_self_closing) {
2249
2068
  parser_add_parse_error(parser, token);
2250
2069
  ignore_token(parser);
2251
2070
  return false;
2252
2071
  } else {
2253
- pop_current_node(parser);
2072
+ const GumboNode* node = pop_current_node(parser);
2073
+ assert(node_html_tag_is(node, GUMBO_TAG_HEAD));
2074
+ AVOID_UNUSED_VARIABLE_WARNING(node);
2254
2075
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_HEAD);
2255
2076
  parser->_parser_state->_reprocess_current_token = true;
2256
2077
  return true;
2257
2078
  }
2079
+
2258
2080
  return true;
2259
2081
  }
2260
2082
 
@@ -2320,7 +2142,7 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
2320
2142
  } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(BASE), TAG(BASEFONT),
2321
2143
  TAG(BGSOUND), TAG(LINK), TAG(META),
2322
2144
  TAG(NOFRAMES), TAG(SCRIPT), TAG(STYLE),
2323
- TAG(TEMPLATE), TAG(TITLE) })) {
2145
+ TAG(TITLE) })) {
2324
2146
  parser_add_parse_error(parser, token);
2325
2147
  assert(state->_head_element != NULL);
2326
2148
  // This must be flushed before we push the head element on, as there may be
@@ -2330,8 +2152,6 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
2330
2152
  bool result = handle_in_head(parser, token);
2331
2153
  gumbo_vector_remove(parser, state->_head_element, &state->_open_elements);
2332
2154
  return result;
2333
- } else if (tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
2334
- return handle_in_head(parser, token);
2335
2155
  } else if (tag_is(token, kStartTag, GUMBO_TAG_HEAD) ||
2336
2156
  (token->type == GUMBO_TOKEN_END_TAG &&
2337
2157
  !tag_in(token, kEndTag, (gumbo_tagset) { TAG(BODY), TAG(HTML), TAG(BR) }))) {
@@ -2346,23 +2166,28 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
2346
2166
  }
2347
2167
  }
2348
2168
 
2349
- static GumboNode* destroy_node(GumboParser* parser, GumboNode* node) {
2169
+ static void destroy_node(GumboParser* parser, GumboNode* node) {
2350
2170
  switch (node->type) {
2351
2171
  case GUMBO_NODE_DOCUMENT:
2352
2172
  {
2353
2173
  GumboDocument* doc = &node->v.document;
2174
+ for (int i = 0; i < doc->children.length; ++i) {
2175
+ destroy_node(parser, doc->children.data[i]);
2176
+ }
2354
2177
  gumbo_parser_deallocate(parser, (void*) doc->children.data);
2355
2178
  gumbo_parser_deallocate(parser, (void*) doc->name);
2356
2179
  gumbo_parser_deallocate(parser, (void*) doc->public_identifier);
2357
2180
  gumbo_parser_deallocate(parser, (void*) doc->system_identifier);
2358
2181
  }
2359
2182
  break;
2360
- case GUMBO_NODE_TEMPLATE:
2361
2183
  case GUMBO_NODE_ELEMENT:
2362
2184
  for (int i = 0; i < node->v.element.attributes.length; ++i) {
2363
2185
  gumbo_destroy_attribute(parser, node->v.element.attributes.data[i]);
2364
2186
  }
2365
2187
  gumbo_parser_deallocate(parser, node->v.element.attributes.data);
2188
+ for (int i = 0; i < node->v.element.children.length; ++i) {
2189
+ destroy_node(parser, node->v.element.children.data[i]);
2190
+ }
2366
2191
  gumbo_parser_deallocate(parser, node->v.element.children.data);
2367
2192
  break;
2368
2193
  case GUMBO_NODE_TEXT:
@@ -2372,21 +2197,7 @@ static GumboNode* destroy_node(GumboParser* parser, GumboNode* node) {
2372
2197
  gumbo_parser_deallocate(parser, (void*) node->v.text.text);
2373
2198
  break;
2374
2199
  }
2375
- // Remove from the next/prev linked list.
2376
- GumboNode* prev = node->prev;
2377
- GumboNode* next = node->next;
2378
- if (prev != NULL) {
2379
- prev->next = next;
2380
- }
2381
- if (next != NULL) {
2382
- next->prev = prev;
2383
- }
2384
- if (parser->_parser_state && parser->_parser_state->_current_node == node) {
2385
- parser->_parser_state->_current_node = prev;
2386
- }
2387
-
2388
2200
  gumbo_parser_deallocate(parser, node);
2389
- return next;
2390
2201
  }
2391
2202
 
2392
2203
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-inbody
@@ -2415,24 +2226,20 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2415
2226
  ignore_token(parser);
2416
2227
  return false;
2417
2228
  } else if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
2418
- parser_add_parse_error(parser, token);
2419
- if (has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2420
- ignore_token(parser);
2421
- return false;
2422
- }
2423
2229
  assert(parser->_output->root != NULL);
2424
2230
  assert(parser->_output->root->type == GUMBO_NODE_ELEMENT);
2231
+ parser_add_parse_error(parser, token);
2425
2232
  merge_attributes(parser, token, parser->_output->root);
2426
2233
  return false;
2427
2234
  } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(BASE), TAG(BASEFONT),
2428
2235
  TAG(BGSOUND), TAG(MENUITEM), TAG(LINK),
2429
2236
  TAG(META), TAG(NOFRAMES), TAG(SCRIPT),
2430
- TAG(STYLE), TAG(TEMPLATE), TAG(TITLE) } ) || tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
2237
+ TAG(STYLE), TAG(TITLE) } )) {
2431
2238
  return handle_in_head(parser, token);
2432
2239
  } else if (tag_is(token, kStartTag, GUMBO_TAG_BODY)) {
2433
2240
  parser_add_parse_error(parser, token);
2434
2241
  if (state->_open_elements.length < 2 ||
2435
- !node_html_tag_is(state->_open_elements.data[1], GUMBO_TAG_BODY) || has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2242
+ !node_html_tag_is(state->_open_elements.data[1], GUMBO_TAG_BODY)) {
2436
2243
  ignore_token(parser);
2437
2244
  return false;
2438
2245
  }
@@ -2484,11 +2291,9 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2484
2291
  TAG(DT), TAG(LI), TAG(P), TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH),
2485
2292
  TAG(THEAD), TAG(TR), TAG(BODY), TAG(HTML) } )) {
2486
2293
  parser_add_parse_error(parser, token);
2294
+ return false;
2487
2295
  }
2488
2296
  }
2489
- if (get_current_template_insertion_mode(parser) != GUMBO_INSERTION_MODE_INITIAL) {
2490
- return handle_in_template(parser, token);
2491
- }
2492
2297
  return true;
2493
2298
  } else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(BODY), TAG(HTML) })) {
2494
2299
  if (!has_an_element_in_scope(parser, GUMBO_TAG_BODY)) {
@@ -2498,11 +2303,10 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2498
2303
  }
2499
2304
  bool success = true;
2500
2305
  for (int i = 0; i < state->_open_elements.length; ++i) {
2501
- if (!node_tag_in_set(state->_open_elements.data[i], (gumbo_tagset) {
2502
- TAG(DD), TAG(DT), TAG(LI), TAG(OPTGROUP), TAG(OPTION), TAG(P),
2503
- TAG(RB), TAG(RP), TAG(RT), TAG(RTC), TAG(TBODY), TAG(TD),
2504
- TAG(TFOOT), TAG(TH), TAG(THEAD), TAG(TR),
2505
- TAG(BODY), TAG(HTML) })) {
2306
+ if (!node_tag_in_set(state->_open_elements.data[i], (gumbo_tagset) { TAG(DD),
2307
+ TAG(DT), TAG(LI), TAG(OPTGROUP), TAG(OPTION), TAG(P), TAG(RP),
2308
+ TAG(RT), TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD),
2309
+ TAG(TR), TAG(BODY), TAG(HTML) })) {
2506
2310
  parser_add_parse_error(parser, token);
2507
2311
  success = false;
2508
2312
  break;
@@ -2520,7 +2324,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2520
2324
  } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(ADDRESS), TAG(ARTICLE),
2521
2325
  TAG(ASIDE), TAG(BLOCKQUOTE), TAG(CENTER), TAG(DETAILS),
2522
2326
  TAG(DIR), TAG(DIV), TAG(DL), TAG(FIELDSET), TAG(FIGCAPTION),
2523
- TAG(FIGURE), TAG(FOOTER), TAG(HEADER), TAG(HGROUP), TAG(MENU), TAG(MAIN),
2327
+ TAG(FIGURE), TAG(FOOTER), TAG(HEADER), TAG(HGROUP), TAG(MENU),
2524
2328
  TAG(NAV), TAG(OL), TAG(P), TAG(SECTION), TAG(SUMMARY), TAG(UL) })) {
2525
2329
  bool result = maybe_implicitly_close_p_tag(parser, token);
2526
2330
  insert_element_from_token(parser, token);
@@ -2543,17 +2347,15 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2543
2347
  state->_frameset_ok = false;
2544
2348
  return result;
2545
2349
  } else if (tag_is(token, kStartTag, GUMBO_TAG_FORM)) {
2546
- if (state->_form_element != NULL && !has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2350
+ if (state->_form_element != NULL) {
2547
2351
  gumbo_debug("Ignoring nested form.\n");
2548
2352
  parser_add_parse_error(parser, token);
2549
2353
  ignore_token(parser);
2550
2354
  return false;
2551
2355
  }
2552
2356
  bool result = maybe_implicitly_close_p_tag(parser, token);
2553
- GumboNode* form_element = insert_element_from_token(parser, token);
2554
- if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2555
- state->_form_element = form_element;
2556
- }
2357
+ state->_form_element =
2358
+ insert_element_from_token(parser, token);
2557
2359
  return result;
2558
2360
  } else if (tag_is(token, kStartTag, GUMBO_TAG_LI)) {
2559
2361
  maybe_implicitly_close_list_tag(parser, token, true);
@@ -2585,7 +2387,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2585
2387
  TAG(ASIDE), TAG(BLOCKQUOTE), TAG(BUTTON), TAG(CENTER), TAG(DETAILS),
2586
2388
  TAG(DIR), TAG(DIV), TAG(DL), TAG(FIELDSET), TAG(FIGCAPTION),
2587
2389
  TAG(FIGURE), TAG(FOOTER), TAG(HEADER), TAG(HGROUP), TAG(LISTING),
2588
- TAG(MAIN), TAG(MENU), TAG(NAV), TAG(OL), TAG(PRE),
2390
+ TAG(MENU), TAG(NAV), TAG(OL), TAG(PRE),
2589
2391
  TAG(SECTION), TAG(SUMMARY), TAG(UL) })) {
2590
2392
  GumboTag tag = token->v.end_tag;
2591
2393
  if (!has_an_element_in_scope(parser, tag)) {
@@ -2596,45 +2398,30 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2596
2398
  implicitly_close_tags(parser, token, GUMBO_NAMESPACE_HTML, token->v.end_tag);
2597
2399
  return true;
2598
2400
  } else if (tag_is(token, kEndTag, GUMBO_TAG_FORM)) {
2599
- if (has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2600
- if (!has_an_element_in_scope(parser, GUMBO_TAG_FORM)) {
2601
- parser_add_parse_error(parser, token);
2602
- ignore_token(parser);
2603
- return false;
2604
- }
2605
- bool success = true;
2606
- generate_implied_end_tags(parser, GUMBO_TAG_LAST);
2607
- if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_FORM)) {
2608
- parser_add_parse_error(parser, token);
2609
- return false;
2610
- }
2611
- while(!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_FORM));
2612
- return success;
2613
- } else {
2614
- bool result = true;
2615
- const GumboNode* node = state->_form_element;
2616
- assert(!node || node->type == GUMBO_NODE_ELEMENT);
2617
- state->_form_element = NULL;
2618
- if (!node || !has_node_in_scope(parser, node)) {
2619
- gumbo_debug("Closing an unopened form.\n");
2620
- parser_add_parse_error(parser, token);
2621
- ignore_token(parser);
2622
- return false;
2623
- }
2624
- // This differs from implicitly_close_tags because we remove *only* the
2625
- // <form> element; other nodes are left in scope.
2626
- generate_implied_end_tags(parser, GUMBO_TAG_LAST);
2627
- if (get_current_node(parser) != node) {
2628
- parser_add_parse_error(parser, token);
2629
- result = false;
2630
- }
2631
-
2632
- GumboVector* open_elements = &state->_open_elements;
2633
- int index = gumbo_vector_index_of(open_elements, node);
2634
- assert(index >= 0);
2635
- gumbo_vector_remove_at(parser, index, open_elements);
2636
- return result;
2401
+ bool result = true;
2402
+ const GumboNode* node = state->_form_element;
2403
+ assert(!node || node->type == GUMBO_NODE_ELEMENT);
2404
+ state->_form_element = NULL;
2405
+ if (!node || !has_node_in_scope(parser, node)) {
2406
+ gumbo_debug("Closing an unopened form.\n");
2407
+ parser_add_parse_error(parser, token);
2408
+ ignore_token(parser);
2409
+ return false;
2637
2410
  }
2411
+ // This differs from implicitly_close_tags because we remove *only* the
2412
+ // <form> element; other nodes are left in scope.
2413
+ generate_implied_end_tags(parser, GUMBO_TAG_LAST);
2414
+ if (get_current_node(parser) != node) {
2415
+ parser_add_parse_error(parser, token);
2416
+ result = false;
2417
+ }
2418
+
2419
+ GumboVector* open_elements = &state->_open_elements;
2420
+ int index = open_elements->length - 1;
2421
+ for (; index >= 0 && open_elements->data[index] != node; --index);
2422
+ assert(index >= 0);
2423
+ gumbo_vector_remove_at(parser, index, open_elements);
2424
+ return result;
2638
2425
  } else if (tag_is(token, kEndTag, GUMBO_TAG_P)) {
2639
2426
  if (!has_an_element_in_button_scope(parser, GUMBO_TAG_P)) {
2640
2427
  parser_add_parse_error(parser, token);
@@ -2661,11 +2448,10 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2661
2448
  return false;
2662
2449
  }
2663
2450
  return implicitly_close_tags(parser, token, GUMBO_NAMESPACE_HTML, token_tag);
2664
- } else if (tag_in(token, kEndTag, (gumbo_tagset) {
2665
- TAG(H1), TAG(H2), TAG(H3), TAG(H4), TAG(H5), TAG(H6) })) {
2666
- if (!has_an_element_in_scope_with_tagname(parser, 6, (GumboTag[]) {
2667
- GUMBO_TAG_H1, GUMBO_TAG_H2, GUMBO_TAG_H3,
2668
- GUMBO_TAG_H4, GUMBO_TAG_H5, GUMBO_TAG_H6})) {
2451
+ } else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(H1), TAG(H2), TAG(H3),
2452
+ TAG(H4), TAG(H5), TAG(H6) })) {
2453
+ if (!has_an_element_in_scope_with_tagname(parser, (gumbo_tagset) { TAG(H1), TAG(H2), TAG(H3), TAG(H4),
2454
+ TAG(H5), TAG(H6) })) {
2669
2455
  // No heading open; ignore the token entirely.
2670
2456
  parser_add_parse_error(parser, token);
2671
2457
  ignore_token(parser);
@@ -2806,8 +2592,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2806
2592
  return result;
2807
2593
  } else if (tag_is(token, kStartTag, GUMBO_TAG_ISINDEX)) {
2808
2594
  parser_add_parse_error(parser, token);
2809
- if (parser->_parser_state->_form_element != NULL &&
2810
- !has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2595
+ if (parser->_parser_state->_form_element != NULL) {
2811
2596
  ignore_token(parser);
2812
2597
  return false;
2813
2598
  }
@@ -2822,9 +2607,6 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2822
2607
 
2823
2608
  GumboNode* form = insert_element_of_tag_type(
2824
2609
  parser, GUMBO_TAG_FORM, GUMBO_INSERTION_FROM_ISINDEX);
2825
- if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2826
- parser->_parser_state->_form_element = form;
2827
- }
2828
2610
  if (action_attr) {
2829
2611
  gumbo_vector_add(parser, action_attr, &form->v.element.attributes);
2830
2612
  }
@@ -2888,9 +2670,6 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2888
2670
  parser, GUMBO_TAG_HR, GUMBO_INSERTION_FROM_ISINDEX);
2889
2671
  pop_current_node(parser); // <hr>
2890
2672
  pop_current_node(parser); // <form>
2891
- if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2892
- parser->_parser_state->_form_element = NULL;
2893
- }
2894
2673
  return false;
2895
2674
  } else if (tag_is(token, kStartTag, GUMBO_TAG_TEXTAREA)) {
2896
2675
  run_generic_parsing_algorithm(parser, token, GUMBO_LEX_RCDATA);
@@ -2932,17 +2711,12 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2932
2711
  reconstruct_active_formatting_elements(parser);
2933
2712
  insert_element_from_token(parser, token);
2934
2713
  return true;
2935
- } else if (tag_in(token, kStartTag, (gumbo_tagset) {
2936
- TAG(RB), TAG(RP), TAG(RT), TAG(RTC) })) {
2714
+ } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(RP), TAG(RT) })) {
2937
2715
  bool success = true;
2938
- GumboTag exception = tag_in(token, kStartTag, (gumbo_tagset) {
2939
- TAG(RT), TAG(RP) }) ? GUMBO_TAG_RTC : GUMBO_TAG_LAST;
2940
2716
  if (has_an_element_in_scope(parser, GUMBO_TAG_RUBY)) {
2941
- generate_implied_end_tags(parser, exception);
2717
+ generate_implied_end_tags(parser, GUMBO_TAG_LAST);
2942
2718
  }
2943
- if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_RUBY) &&
2944
- !(exception == GUMBO_TAG_LAST ||
2945
- node_html_tag_is(get_current_node(parser), GUMBO_TAG_RTC))) {
2719
+ if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_RUBY)) {
2946
2720
  parser_add_parse_error(parser, token);
2947
2721
  success = false;
2948
2722
  }
@@ -3113,8 +2887,7 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
3113
2887
  parser_add_parse_error(parser, token);
3114
2888
  ignore_token(parser);
3115
2889
  return false;
3116
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(STYLE), TAG(SCRIPT), TAG(TEMPLATE) }) ||
3117
- (tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE))) {
2890
+ } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(STYLE), TAG(SCRIPT) })) {
3118
2891
  return handle_in_head(parser, token);
3119
2892
  } else if (tag_is(token, kStartTag, GUMBO_TAG_INPUT) &&
3120
2893
  attribute_matches(&token->v.start_tag.attributes,
@@ -3125,7 +2898,7 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
3125
2898
  return false;
3126
2899
  } else if (tag_is(token, kStartTag, GUMBO_TAG_FORM)) {
3127
2900
  parser_add_parse_error(parser, token);
3128
- if (state->_form_element || has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2901
+ if (state->_form_element) {
3129
2902
  ignore_token(parser);
3130
2903
  return false;
3131
2904
  }
@@ -3133,7 +2906,11 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
3133
2906
  pop_current_node(parser);
3134
2907
  return false;
3135
2908
  } else if (token->type == GUMBO_TOKEN_EOF) {
3136
- return handle_in_body(parser, token);
2909
+ if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_HTML)) {
2910
+ parser_add_parse_error(parser, token);
2911
+ return false;
2912
+ }
2913
+ return true;
3137
2914
  } else {
3138
2915
  parser_add_parse_error(parser, token);
3139
2916
  state->_foster_parent_insertions = true;
@@ -3178,37 +2955,35 @@ static bool handle_in_table_text(GumboParser* parser, GumboToken* token) {
3178
2955
 
3179
2956
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-incaption
3180
2957
  static bool handle_in_caption(GumboParser* parser, GumboToken* token) {
3181
- if (tag_is(token, kEndTag, GUMBO_TAG_CAPTION)) {
2958
+ if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(CAPTION), TAG(COL),
2959
+ TAG(COLGROUP), TAG(TBODY), TAG(TD),
2960
+ TAG(TFOOT), TAG(TH), TAG(THEAD), TAG(TR) }) ||
2961
+ tag_in(token, kEndTag, (gumbo_tagset) { TAG(CAPTION), TAG(TABLE) })) {
3182
2962
  if (!has_an_element_in_table_scope(parser, GUMBO_TAG_CAPTION)) {
3183
2963
  parser_add_parse_error(parser, token);
3184
2964
  ignore_token(parser);
3185
2965
  return false;
3186
- } else {
3187
- generate_implied_end_tags(parser, GUMBO_TAG_LAST);
3188
- bool result = true;
3189
- if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_CAPTION)) {
3190
- parser_add_parse_error(parser, token);
3191
- }
3192
- while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_CAPTION));
3193
- clear_active_formatting_elements(parser);
3194
- set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
3195
- return result;
3196
2966
  }
3197
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(CAPTION), TAG(COL),
3198
- TAG(COLGROUP), TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD), TAG(TR) }) ||
3199
- (tag_is(token, kEndTag, GUMBO_TAG_TABLE))) {
3200
- if (!has_an_element_in_table_scope(parser, GUMBO_TAG_CAPTION)) {
2967
+ if (!tag_is(token, kEndTag, GUMBO_TAG_CAPTION)) {
3201
2968
  parser_add_parse_error(parser, token);
3202
- ignore_token(parser);
3203
- return false;
2969
+ parser->_parser_state->_reprocess_current_token = true;
2970
+ }
2971
+ generate_implied_end_tags(parser, GUMBO_TAG_LAST);
2972
+ bool result = true;
2973
+ if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_CAPTION)) {
2974
+ parser_add_parse_error(parser, token);
2975
+ while (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_CAPTION)) {
2976
+ pop_current_node(parser);
2977
+ }
2978
+ result = false;
3204
2979
  }
3205
- while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_CAPTION));
2980
+ pop_current_node(parser); // The <caption> itself.
3206
2981
  clear_active_formatting_elements(parser);
3207
2982
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
3208
- parser->_parser_state->_reprocess_current_token = true;
3209
- return true;
3210
- } else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(BODY), TAG(COL), TAG(COLGROUP),
3211
- TAG(HTML), TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD), TAG(TR) } )) {
2983
+ return result;
2984
+ } else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(BODY), TAG(COL),
2985
+ TAG(COLGROUP), TAG(HTML), TAG(TBODY), TAG(TD), TAG(TFOOT),
2986
+ TAG(TH), TAG(THEAD), TAG(TR) })) {
3212
2987
  parser_add_parse_error(parser, token);
3213
2988
  ignore_token(parser);
3214
2989
  return false;
@@ -3236,33 +3011,24 @@ static bool handle_in_column_group(GumboParser* parser, GumboToken* token) {
3236
3011
  pop_current_node(parser);
3237
3012
  acknowledge_self_closing_tag(parser);
3238
3013
  return true;
3239
- } else if (tag_is(token, kEndTag, GUMBO_TAG_COLGROUP)) {
3240
- if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_COLGROUP)) {
3241
- parser_add_parse_error(parser, token);
3242
- ignore_token(parser);
3243
- return false;
3244
- }
3245
- pop_current_node(parser);
3246
- set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
3247
- return false;
3248
3014
  } else if (tag_is(token, kEndTag, GUMBO_TAG_COL)) {
3249
3015
  parser_add_parse_error(parser, token);
3250
3016
  ignore_token(parser);
3251
3017
  return false;
3252
- } else if (tag_is(token, kStartTag, GUMBO_TAG_TEMPLATE) ||
3253
- tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
3254
- return handle_in_head(parser, token);
3255
- } else if (token->type == GUMBO_TOKEN_EOF) {
3256
- return handle_in_body(parser, token);
3018
+ } else if (token->type == GUMBO_TOKEN_EOF &&
3019
+ get_current_node(parser) == parser->_output->root) {
3020
+ return true;
3257
3021
  } else {
3258
- if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_COLGROUP)) {
3022
+ if (get_current_node(parser) == parser->_output->root) {
3259
3023
  parser_add_parse_error(parser, token);
3260
- ignore_token(parser);
3261
3024
  return false;
3262
3025
  }
3026
+ assert(node_html_tag_is(get_current_node(parser), GUMBO_TAG_COLGROUP));
3263
3027
  pop_current_node(parser);
3264
3028
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
3265
- parser->_parser_state->_reprocess_current_token = true;
3029
+ if (!tag_is(token, kEndTag, GUMBO_TAG_COLGROUP)) {
3030
+ parser->_parser_state->_reprocess_current_token = true;
3031
+ }
3266
3032
  return true;
3267
3033
  }
3268
3034
  }
@@ -3325,48 +3091,42 @@ static bool handle_in_row(GumboParser* parser, GumboToken* token) {
3325
3091
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_CELL);
3326
3092
  add_formatting_element(parser, &kActiveFormattingScopeMarker);
3327
3093
  return true;
3328
- } else if (tag_is(token, kEndTag, GUMBO_TAG_TR)) {
3329
- if (!has_an_element_in_table_scope(parser,GUMBO_TAG_TR)) {
3330
- parser_add_parse_error(parser, token);
3331
- ignore_token(parser);
3332
- return false;
3333
- } else {
3334
- clear_stack_to_table_row_context(parser);
3335
- pop_current_node(parser);
3336
- set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
3337
- return true;
3338
- }
3339
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(CAPTION), TAG(COL), TAG(COLGROUP),
3340
- TAG(TBODY), TAG(TFOOT), TAG(THEAD), TAG(TR) }) || tag_is(token, kEndTag, GUMBO_TAG_TABLE)) {
3341
- if (!has_an_element_in_table_scope(parser,GUMBO_TAG_TR)) {
3094
+ } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(CAPTION), TAG(COLGROUP),
3095
+ TAG(TBODY), TAG(TFOOT), TAG(THEAD), TAG(TR) }) ||
3096
+ tag_in(token, kEndTag, (gumbo_tagset) { TAG(TR), TAG(TABLE),
3097
+ TAG(TBODY), TAG(TFOOT), TAG(THEAD) })) {
3098
+ // This case covers 4 clauses of the spec, each of which say "Otherwise, act
3099
+ // as if an end tag with the tag name "tr" had been seen." The differences
3100
+ // are in error handling and whether the current token is reprocessed.
3101
+ GumboTag desired_tag =
3102
+ tag_in(token, kEndTag, (gumbo_tagset) { TAG(TBODY), TAG(TFOOT),
3103
+ TAG(THEAD) })
3104
+ ? token->v.end_tag : GUMBO_TAG_TR;
3105
+ if (!has_an_element_in_table_scope(parser, desired_tag)) {
3106
+ gumbo_debug("Bailing because there is no tag %s in table scope.\nOpen elements:",
3107
+ gumbo_normalized_tagname(desired_tag));
3108
+ for (int i = 0; i < parser->_parser_state->_open_elements.length; ++i) {
3109
+ const GumboNode* node = parser->_parser_state->_open_elements.data[i];
3110
+ gumbo_debug("%s\n", gumbo_normalized_tagname(node->v.element.tag));
3111
+ }
3342
3112
  parser_add_parse_error(parser, token);
3343
3113
  ignore_token(parser);
3344
3114
  return false;
3345
- } else {
3346
- clear_stack_to_table_row_context(parser);
3347
- pop_current_node(parser);
3348
- set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
3349
- parser->_parser_state->_reprocess_current_token = true;
3350
- return true;
3351
3115
  }
3352
- } else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(TBODY), TAG(TFOOT), TAG(THEAD) })) {
3353
- if (!has_an_element_in_table_scope(parser, token->v.end_tag) ||
3354
- (!has_an_element_in_table_scope(parser, GUMBO_TAG_TR))) {
3355
- parser_add_parse_error(parser, token);
3356
- ignore_token(parser);
3357
- return false;
3358
- } else {
3359
- clear_stack_to_table_row_context(parser);
3360
- pop_current_node(parser);
3361
- set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
3116
+ clear_stack_to_table_row_context(parser);
3117
+ GumboNode* last_element = pop_current_node(parser);
3118
+ assert(node_html_tag_is(last_element, GUMBO_TAG_TR));
3119
+ AVOID_UNUSED_VARIABLE_WARNING(last_element);
3120
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
3121
+ if (!tag_is(token, kEndTag, GUMBO_TAG_TR)) {
3362
3122
  parser->_parser_state->_reprocess_current_token = true;
3363
- return true;
3364
3123
  }
3365
- } else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(BODY), TAG(CAPTION), TAG(COL),
3366
- TAG(COLGROUP), TAG(HTML), TAG(TD), TAG(TH) })) {
3367
- parser_add_parse_error(parser, token);
3368
- ignore_token(parser);
3369
- return false;
3124
+ return true;
3125
+ } else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(BODY), TAG(CAPTION),
3126
+ TAG(COL), TAG(COLGROUP), TAG(HTML), TAG(TD), TAG(TH) })) {
3127
+ parser_add_parse_error(parser, token);
3128
+ ignore_token(parser);
3129
+ return false;
3370
3130
  } else {
3371
3131
  return handle_in_table(parser, token);
3372
3132
  }
@@ -3378,7 +3138,6 @@ static bool handle_in_cell(GumboParser* parser, GumboToken* token) {
3378
3138
  GumboTag token_tag = token->v.end_tag;
3379
3139
  if (!has_an_element_in_table_scope(parser, token_tag)) {
3380
3140
  parser_add_parse_error(parser, token);
3381
- ignore_token(parser);
3382
3141
  return false;
3383
3142
  }
3384
3143
  return close_table_cell(parser, token, token_tag);
@@ -3494,11 +3253,14 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
3494
3253
  parser->_parser_state->_reprocess_current_token = true;
3495
3254
  }
3496
3255
  return false;
3497
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(SCRIPT) , TAG(TEMPLATE) }) ||
3498
- tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
3256
+ } else if (tag_is(token, kStartTag, GUMBO_TAG_SCRIPT)) {
3499
3257
  return handle_in_head(parser, token);
3500
3258
  } else if (token->type == GUMBO_TOKEN_EOF) {
3501
- return handle_in_body(parser, token);
3259
+ if (get_current_node(parser) != parser->_output->root) {
3260
+ parser_add_parse_error(parser, token);
3261
+ return false;
3262
+ }
3263
+ return true;
3502
3264
  } else {
3503
3265
  parser_add_parse_error(parser, token);
3504
3266
  ignore_token(parser);
@@ -3517,16 +3279,14 @@ static bool handle_in_select_in_table(GumboParser* parser, GumboToken* token) {
3517
3279
  } else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(CAPTION), TAG(TABLE),
3518
3280
  TAG(TBODY), TAG(TFOOT), TAG(THEAD), TAG(TR), TAG(TD), TAG(TH) })) {
3519
3281
  parser_add_parse_error(parser, token);
3520
- if (!has_an_element_in_table_scope(parser, token->v.end_tag)) {
3521
- ignore_token(parser);
3522
- return false;
3523
- } else {
3282
+ if (has_an_element_in_table_scope(parser, token->v.end_tag)) {
3524
3283
  close_current_select(parser);
3525
- // close_current_select already does the reset_insertion_mode_appropriately
3526
- // reset_insertion_mode_appropriately(parser);
3284
+ reset_insertion_mode_appropriately(parser);
3527
3285
  parser->_parser_state->_reprocess_current_token = true;
3528
- return false;
3286
+ } else {
3287
+ ignore_token(parser);
3529
3288
  }
3289
+ return false;
3530
3290
  } else {
3531
3291
  return handle_in_select(parser, token);
3532
3292
  }
@@ -3534,68 +3294,8 @@ static bool handle_in_select_in_table(GumboParser* parser, GumboToken* token) {
3534
3294
 
3535
3295
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#parsing-main-intemplate
3536
3296
  static bool handle_in_template(GumboParser* parser, GumboToken* token) {
3537
- GumboParserState* state = parser->_parser_state;
3538
- if (token->type == GUMBO_TOKEN_WHITESPACE ||
3539
- token->type == GUMBO_TOKEN_CHARACTER ||
3540
- token->type == GUMBO_TOKEN_COMMENT ||
3541
- token->type == GUMBO_TOKEN_DOCTYPE) {
3542
- return handle_in_body(parser, token);
3543
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(BASE), TAG(BASEFONT), TAG(BGSOUND),
3544
- TAG(LINK), TAG(META), TAG(NOFRAMES), TAG(SCRIPT), TAG(STYLE),
3545
- TAG(TEMPLATE), TAG(TITLE) }) ||
3546
- tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
3547
- return handle_in_head(parser, token);
3548
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(CAPTION), TAG(COLGROUP),
3549
- TAG(TBODY), TAG(TFOOT), TAG(THEAD) })) {
3550
- pop_template_insertion_mode(parser);
3551
- push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
3552
- set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
3553
- state->_reprocess_current_token = true;
3554
- return true;
3555
- } else if (tag_is(token, kStartTag, GUMBO_TAG_COL)) {
3556
- pop_template_insertion_mode(parser);
3557
- push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_COLUMN_GROUP);
3558
- set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_COLUMN_GROUP);
3559
- state->_reprocess_current_token = true;
3560
- return true;
3561
- } else if (tag_is(token, kStartTag, GUMBO_TAG_TR)) {
3562
- pop_template_insertion_mode(parser);
3563
- push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
3564
- set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
3565
- state->_reprocess_current_token = true;
3566
- return true;
3567
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(TD), TAG(TH) })) {
3568
- pop_template_insertion_mode(parser);
3569
- push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
3570
- set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
3571
- state->_reprocess_current_token = true;
3572
- return true;
3573
- } else if (token->type == GUMBO_TOKEN_START_TAG) {
3574
- pop_template_insertion_mode(parser);
3575
- push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_BODY);
3576
- set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_BODY);
3577
- state->_reprocess_current_token = true;
3578
- return true;
3579
- } else if (token->type == GUMBO_TOKEN_END_TAG) {
3580
- parser_add_parse_error(parser, token);
3581
- ignore_token(parser);
3582
- return false;
3583
- } else if (token->type == GUMBO_TOKEN_EOF) {
3584
- if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
3585
- // Stop parsing.
3586
- return true;
3587
- }
3588
- parser_add_parse_error(parser, token);
3589
- while(!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_TEMPLATE));
3590
- clear_active_formatting_elements(parser);
3591
- pop_template_insertion_mode(parser);
3592
- reset_insertion_mode_appropriately(parser);
3593
- state->_reprocess_current_token = true;
3594
- return false;
3595
- } else {
3596
- assert(0);
3597
- return false;
3598
- }
3297
+ // TODO(jdtang): Implement this.
3298
+ return true;
3599
3299
  }
3600
3300
 
3601
3301
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-afterbody
@@ -3613,12 +3313,7 @@ static bool handle_after_body(GumboParser* parser, GumboToken* token) {
3613
3313
  ignore_token(parser);
3614
3314
  return false;
3615
3315
  } else if (tag_is(token, kEndTag, GUMBO_TAG_HTML)) {
3616
- /* fragment case: ignore the closing HTML token */
3617
- if (is_fragment_parser(parser)) {
3618
- parser_add_parse_error(parser, token);
3619
- ignore_token(parser);
3620
- return false;
3621
- }
3316
+ // TODO(jdtang): Handle fragment parsing algorithm case.
3622
3317
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_AFTER_BODY);
3623
3318
  GumboNode* html = parser->_parser_state->_open_elements.data[0];
3624
3319
  assert(node_html_tag_is(html, GUMBO_TAG_HTML));
@@ -3659,8 +3354,9 @@ static bool handle_in_frameset(GumboParser* parser, GumboToken* token) {
3659
3354
  return false;
3660
3355
  }
3661
3356
  pop_current_node(parser);
3662
- if (!is_fragment_parser(parser) &&
3663
- !node_html_tag_is(get_current_node(parser), GUMBO_TAG_FRAMESET)) {
3357
+ // TODO(jdtang): Add a condition to ignore this for the fragment parsing
3358
+ // algorithm.
3359
+ if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_FRAMESET)) {
3664
3360
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_FRAMESET);
3665
3361
  }
3666
3362
  return true;
@@ -3834,32 +3530,18 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
3834
3530
  token_has_attribute(token, "color") ||
3835
3531
  token_has_attribute(token, "face") ||
3836
3532
  token_has_attribute(token, "size")))) {
3837
-
3838
- /* Parse error */
3839
3533
  parser_add_parse_error(parser, token);
3840
-
3841
- /*
3842
- * Fragment case: If the parser was originally created for the HTML
3843
- * fragment parsing algorithm, then act as described in the "any other
3844
- * start tag" entry below.
3845
- */
3846
- if (!is_fragment_parser(parser)) {
3847
- do {
3848
- pop_current_node(parser);
3849
- } while(!(is_mathml_integration_point(get_current_node(parser)) ||
3850
- is_html_integration_point(get_current_node(parser)) ||
3851
- get_current_node(parser)->v.element.tag_namespace ==
3852
- GUMBO_NAMESPACE_HTML));
3853
- parser->_parser_state->_reprocess_current_token = true;
3854
- return false;
3855
- }
3856
-
3857
- assert(token->type == GUMBO_TOKEN_START_TAG);
3858
- }
3859
-
3860
- if (token->type == GUMBO_TOKEN_START_TAG) {
3534
+ do {
3535
+ pop_current_node(parser);
3536
+ } while(!(is_mathml_integration_point(get_current_node(parser)) ||
3537
+ is_html_integration_point(get_current_node(parser)) ||
3538
+ get_current_node(parser)->v.element.tag_namespace ==
3539
+ GUMBO_NAMESPACE_HTML));
3540
+ parser->_parser_state->_reprocess_current_token = true;
3541
+ return false;
3542
+ } else if (token->type == GUMBO_TOKEN_START_TAG) {
3861
3543
  const GumboNamespaceEnum current_namespace =
3862
- get_adjusted_current_node(parser)->v.element.tag_namespace;
3544
+ get_current_node(parser)->v.element.tag_namespace;
3863
3545
  if (current_namespace == GUMBO_NAMESPACE_MATHML) {
3864
3546
  adjust_mathml_attributes(parser, token);
3865
3547
  }
@@ -3948,10 +3630,8 @@ static bool handle_token(GumboParser* parser, GumboToken* token) {
3948
3630
  parser->_parser_state->_closed_html_tag = true;
3949
3631
  }
3950
3632
 
3951
- const GumboNode* current_node = get_adjusted_current_node(parser);
3952
- assert(!current_node ||
3953
- current_node->type == GUMBO_NODE_ELEMENT ||
3954
- current_node->type == GUMBO_NODE_TEMPLATE);
3633
+ const GumboNode* current_node = get_current_node(parser);
3634
+ assert(!current_node || current_node->type == GUMBO_NODE_ELEMENT);
3955
3635
  if (current_node) {
3956
3636
  gumbo_debug("Current node: <%s>.\n",
3957
3637
  gumbo_normalized_tagname(current_node->v.element.tag));
@@ -3979,66 +3659,6 @@ static bool handle_token(GumboParser* parser, GumboToken* token) {
3979
3659
  }
3980
3660
  }
3981
3661
 
3982
- static void fragment_parser_init(
3983
- GumboParser *parser, GumboTag fragment_ctx,
3984
- GumboNamespaceEnum fragment_namespace) {
3985
- GumboNode *root;
3986
- assert(fragment_ctx != GUMBO_TAG_LAST);
3987
-
3988
- // 3
3989
- parser->_parser_state->_fragment_ctx = create_element(parser, fragment_ctx);
3990
- parser->_parser_state->_fragment_ctx->v.element.tag_namespace =
3991
- fragment_namespace;
3992
-
3993
- // 4
3994
- if (fragment_namespace == GUMBO_NAMESPACE_HTML) {
3995
- // Non-HTML namespaces always start in the DATA state.
3996
- switch (fragment_ctx) {
3997
- case GUMBO_TAG_TITLE:
3998
- case GUMBO_TAG_TEXTAREA:
3999
- gumbo_tokenizer_set_state(parser, GUMBO_LEX_RCDATA);
4000
- break;
4001
-
4002
- case GUMBO_TAG_STYLE:
4003
- case GUMBO_TAG_XMP:
4004
- case GUMBO_TAG_IFRAME:
4005
- case GUMBO_TAG_NOEMBED:
4006
- case GUMBO_TAG_NOFRAMES:
4007
- gumbo_tokenizer_set_state(parser, GUMBO_LEX_RAWTEXT);
4008
- break;
4009
-
4010
- case GUMBO_TAG_SCRIPT:
4011
- gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT);
4012
- break;
4013
-
4014
- case GUMBO_TAG_NOSCRIPT:
4015
- /* scripting is disabled in Gumbo, so leave the tokenizer
4016
- * in the default data state */
4017
- break;
4018
-
4019
- case GUMBO_TAG_PLAINTEXT:
4020
- gumbo_tokenizer_set_state(parser, GUMBO_LEX_PLAINTEXT);
4021
- break;
4022
-
4023
- default:
4024
- /* default data state */
4025
- break;
4026
- }
4027
- }
4028
-
4029
- // 5. 6. 7.
4030
- root = insert_element_of_tag_type(parser, GUMBO_TAG_HTML, GUMBO_INSERTION_IMPLIED);
4031
- parser->_output->root = root;
4032
-
4033
- // 8.
4034
- if (fragment_ctx == GUMBO_TAG_TEMPLATE) {
4035
- push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TEMPLATE);
4036
- }
4037
-
4038
- // 10.
4039
- reset_insertion_mode_appropriately(parser);
4040
- }
4041
-
4042
3662
  GumboOutput* gumbo_parse(const char* buffer) {
4043
3663
  return gumbo_parse_with_options(
4044
3664
  &kGumboDefaultOptions, buffer, strlen(buffer));
@@ -4046,27 +3666,11 @@ GumboOutput* gumbo_parse(const char* buffer) {
4046
3666
 
4047
3667
  GumboOutput* gumbo_parse_with_options(
4048
3668
  const GumboOptions* options, const char* buffer, size_t length) {
4049
- return gumbo_parse_fragment(
4050
- options, buffer, length, GUMBO_TAG_LAST, GUMBO_NAMESPACE_HTML);
4051
- }
4052
-
4053
- GumboOutput* gumbo_parse_fragment(
4054
- const GumboOptions* options, const char* buffer, size_t length,
4055
- const GumboTag fragment_ctx, const GumboNamespaceEnum fragment_namespace) {
4056
3669
  GumboParser parser;
4057
3670
  parser._options = options;
4058
- parser_state_init(&parser);
4059
- // Must come after parser_state_init, since creating the document node must
4060
- // reference parser_state->_current_node.
4061
3671
  output_init(&parser);
4062
- // And this must come after output_init, because initializing the tokenizer
4063
- // reads the first character and that may cause a UTF-8 decode error
4064
- // (inserting into output->errors) if that's invalid.
4065
3672
  gumbo_tokenizer_state_init(&parser, buffer, length);
4066
-
4067
- if (fragment_ctx != GUMBO_TAG_LAST) {
4068
- fragment_parser_init(&parser, fragment_ctx, fragment_namespace);
4069
- }
3673
+ parser_state_init(&parser);
4070
3674
 
4071
3675
  GumboParserState* state = parser._parser_state;
4072
3676
  gumbo_debug("Parsing %.*s.\n", length, buffer);
@@ -4154,16 +3758,20 @@ GumboOutput* gumbo_parse_fragment(
4154
3758
  return parser._output;
4155
3759
  }
4156
3760
 
3761
+ void gumbo_destroy_node(GumboOptions* options, GumboNode* node) {
3762
+ // Need a dummy GumboParser because the allocator comes along with the
3763
+ // options object.
3764
+ GumboParser parser;
3765
+ parser._options = options;
3766
+ destroy_node(&parser, node);
3767
+ }
3768
+
4157
3769
  void gumbo_destroy_output(const GumboOptions* options, GumboOutput* output) {
4158
3770
  // Need a dummy GumboParser because the allocator comes along with the
4159
3771
  // options object.
4160
3772
  GumboParser parser;
4161
- parser._parser_state = NULL;
4162
3773
  parser._options = options;
4163
- GumboNode* current = output->document;
4164
- while (current) {
4165
- current = destroy_node(&parser, current);
4166
- }
3774
+ destroy_node(&parser, output->document);
4167
3775
  for (int i = 0; i < output->errors.length; ++i) {
4168
3776
  gumbo_error_destroy(&parser, output->errors.data[i]);
4169
3777
  }