nokogumbo 1.4.1 → 1.4.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: cf20dd502d8ec6022f2c72193bb0c9a908251088
4
+ data.tar.gz: 326f85766d0e4f97683f5df026f08f4dc33806e8
5
+ SHA512:
6
+ metadata.gz: 800800652a5260bf54399e8cca1fc6e63f7ef53aea489245c5315b6e955b38aa4dfc6d7272b99898ab78150464640ac14c995aa38b9c77644dab5d73fc0e46a5
7
+ data.tar.gz: 18ba647671103cfc2853a88935fe91eb965d1e6fbe1aad981438297a5035ec222b5ae6c5ed3ef127429c8b58edd02a6a5a877ba7e7ec3390d05779f7420f1521
@@ -157,7 +157,7 @@ static xmlNodePtr walk_tree(xmlDocPtr document, GumboElement *node) {
157
157
 
158
158
  switch (child->type) {
159
159
  case GUMBO_NODE_ELEMENT:
160
- case GUMBO_NODE_TEMPLATE:
160
+ // case GUMBO_NODE_TEMPLATE: /* future */
161
161
  node = walk_tree(document, &child->v.element);
162
162
  break;
163
163
  case GUMBO_NODE_WHITESPACE:
@@ -35,11 +35,10 @@ static const size_t kMessageBufferSize = 256;
35
35
  static int print_message(GumboParser* parser, GumboStringBuffer* output,
36
36
  const char* format, ...) {
37
37
  va_list args;
38
- int remaining_capacity = output->capacity - output->length;
39
38
  va_start(args, format);
39
+ int remaining_capacity = output->capacity - output->length;
40
40
  int bytes_written = vsnprintf(output->data + output->length,
41
41
  remaining_capacity, format, args);
42
- va_end(args);
43
42
  #ifdef _MSC_VER
44
43
  if (bytes_written == -1) {
45
44
  // vsnprintf returns -1 on MSVC++ if there's not enough capacity, instead of
@@ -48,7 +47,6 @@ static int print_message(GumboParser* parser, GumboStringBuffer* output,
48
47
  // we retry (letting it fail and returning 0 if it doesn't), since there's
49
48
  // no way to smartly resize the buffer.
50
49
  gumbo_string_buffer_reserve(parser, output->capacity * 2, output);
51
- va_start(args, format);
52
50
  int result = vsnprintf(output->data + output->length,
53
51
  remaining_capacity, format, args);
54
52
  va_end(args);
@@ -57,6 +55,7 @@ static int print_message(GumboParser* parser, GumboStringBuffer* output,
57
55
  #else
58
56
  // -1 in standard C99 indicates an encoding error. Return 0 and do nothing.
59
57
  if (bytes_written == -1) {
58
+ va_end(args);
60
59
  return 0;
61
60
  }
62
61
  #endif
@@ -65,12 +64,11 @@ static int print_message(GumboParser* parser, GumboStringBuffer* output,
65
64
  gumbo_string_buffer_reserve(
66
65
  parser, output->capacity + bytes_written, output);
67
66
  remaining_capacity = output->capacity - output->length;
68
- va_start(args, format);
69
67
  bytes_written = vsnprintf(output->data + output->length,
70
68
  remaining_capacity, format, args);
71
- va_end(args);
72
69
  }
73
70
  output->length += bytes_written;
71
+ va_end(args);
74
72
  return bytes_written;
75
73
  }
76
74
 
@@ -141,7 +141,7 @@ extern const GumboVector kGumboEmptyVector;
141
141
  * Returns the first index at which an element appears in this vector (testing
142
142
  * by pointer equality), or -1 if it never does.
143
143
  */
144
- int gumbo_vector_index_of(GumboVector* vector, const void* element);
144
+ int gumbo_vector_index_of(GumboVector* vector, void* element);
145
145
 
146
146
 
147
147
  /**
@@ -157,10 +157,172 @@ int gumbo_vector_index_of(GumboVector* vector, const void* element);
157
157
  * strings.
158
158
  */
159
159
  typedef enum {
160
- // Load all the tags from an external source, generated from tag.in.
161
- # include "tag_enum.h"
162
- // Used for all tags that don't have special handling in HTML. Add new tags
163
- // to the end of tag.in so as to preserve backwards-compatibility.
160
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/semantics.html#the-root-element
161
+ GUMBO_TAG_HTML,
162
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/semantics.html#document-metadata
163
+ GUMBO_TAG_HEAD,
164
+ GUMBO_TAG_TITLE,
165
+ GUMBO_TAG_BASE,
166
+ GUMBO_TAG_LINK,
167
+ GUMBO_TAG_META,
168
+ GUMBO_TAG_STYLE,
169
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/scripting-1.html#scripting-1
170
+ GUMBO_TAG_SCRIPT,
171
+ GUMBO_TAG_NOSCRIPT,
172
+ GUMBO_TAG_TEMPLATE,
173
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/sections.html#sections
174
+ GUMBO_TAG_BODY,
175
+ GUMBO_TAG_ARTICLE,
176
+ GUMBO_TAG_SECTION,
177
+ GUMBO_TAG_NAV,
178
+ GUMBO_TAG_ASIDE,
179
+ GUMBO_TAG_H1,
180
+ GUMBO_TAG_H2,
181
+ GUMBO_TAG_H3,
182
+ GUMBO_TAG_H4,
183
+ GUMBO_TAG_H5,
184
+ GUMBO_TAG_H6,
185
+ GUMBO_TAG_HGROUP,
186
+ GUMBO_TAG_HEADER,
187
+ GUMBO_TAG_FOOTER,
188
+ GUMBO_TAG_ADDRESS,
189
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/grouping-content.html#grouping-content
190
+ GUMBO_TAG_P,
191
+ GUMBO_TAG_HR,
192
+ GUMBO_TAG_PRE,
193
+ GUMBO_TAG_BLOCKQUOTE,
194
+ GUMBO_TAG_OL,
195
+ GUMBO_TAG_UL,
196
+ GUMBO_TAG_LI,
197
+ GUMBO_TAG_DL,
198
+ GUMBO_TAG_DT,
199
+ GUMBO_TAG_DD,
200
+ GUMBO_TAG_FIGURE,
201
+ GUMBO_TAG_FIGCAPTION,
202
+ GUMBO_TAG_MAIN,
203
+ GUMBO_TAG_DIV,
204
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/text-level-semantics.html#text-level-semantics
205
+ GUMBO_TAG_A,
206
+ GUMBO_TAG_EM,
207
+ GUMBO_TAG_STRONG,
208
+ GUMBO_TAG_SMALL,
209
+ GUMBO_TAG_S,
210
+ GUMBO_TAG_CITE,
211
+ GUMBO_TAG_Q,
212
+ GUMBO_TAG_DFN,
213
+ GUMBO_TAG_ABBR,
214
+ GUMBO_TAG_DATA,
215
+ GUMBO_TAG_TIME,
216
+ GUMBO_TAG_CODE,
217
+ GUMBO_TAG_VAR,
218
+ GUMBO_TAG_SAMP,
219
+ GUMBO_TAG_KBD,
220
+ GUMBO_TAG_SUB,
221
+ GUMBO_TAG_SUP,
222
+ GUMBO_TAG_I,
223
+ GUMBO_TAG_B,
224
+ GUMBO_TAG_U,
225
+ GUMBO_TAG_MARK,
226
+ GUMBO_TAG_RUBY,
227
+ GUMBO_TAG_RT,
228
+ GUMBO_TAG_RP,
229
+ GUMBO_TAG_BDI,
230
+ GUMBO_TAG_BDO,
231
+ GUMBO_TAG_SPAN,
232
+ GUMBO_TAG_BR,
233
+ GUMBO_TAG_WBR,
234
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/edits.html#edits
235
+ GUMBO_TAG_INS,
236
+ GUMBO_TAG_DEL,
237
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/embedded-content-1.html#embedded-content-1
238
+ GUMBO_TAG_IMAGE,
239
+ GUMBO_TAG_IMG,
240
+ GUMBO_TAG_IFRAME,
241
+ GUMBO_TAG_EMBED,
242
+ GUMBO_TAG_OBJECT,
243
+ GUMBO_TAG_PARAM,
244
+ GUMBO_TAG_VIDEO,
245
+ GUMBO_TAG_AUDIO,
246
+ GUMBO_TAG_SOURCE,
247
+ GUMBO_TAG_TRACK,
248
+ GUMBO_TAG_CANVAS,
249
+ GUMBO_TAG_MAP,
250
+ GUMBO_TAG_AREA,
251
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-map-element.html#mathml
252
+ GUMBO_TAG_MATH,
253
+ GUMBO_TAG_MI,
254
+ GUMBO_TAG_MO,
255
+ GUMBO_TAG_MN,
256
+ GUMBO_TAG_MS,
257
+ GUMBO_TAG_MTEXT,
258
+ GUMBO_TAG_MGLYPH,
259
+ GUMBO_TAG_MALIGNMARK,
260
+ GUMBO_TAG_ANNOTATION_XML,
261
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-map-element.html#svg-0
262
+ GUMBO_TAG_SVG,
263
+ GUMBO_TAG_FOREIGNOBJECT,
264
+ GUMBO_TAG_DESC,
265
+ // SVG title tags will have GUMBO_TAG_TITLE as with HTML.
266
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/tabular-data.html#tabular-data
267
+ GUMBO_TAG_TABLE,
268
+ GUMBO_TAG_CAPTION,
269
+ GUMBO_TAG_COLGROUP,
270
+ GUMBO_TAG_COL,
271
+ GUMBO_TAG_TBODY,
272
+ GUMBO_TAG_THEAD,
273
+ GUMBO_TAG_TFOOT,
274
+ GUMBO_TAG_TR,
275
+ GUMBO_TAG_TD,
276
+ GUMBO_TAG_TH,
277
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/forms.html#forms
278
+ GUMBO_TAG_FORM,
279
+ GUMBO_TAG_FIELDSET,
280
+ GUMBO_TAG_LEGEND,
281
+ GUMBO_TAG_LABEL,
282
+ GUMBO_TAG_INPUT,
283
+ GUMBO_TAG_BUTTON,
284
+ GUMBO_TAG_SELECT,
285
+ GUMBO_TAG_DATALIST,
286
+ GUMBO_TAG_OPTGROUP,
287
+ GUMBO_TAG_OPTION,
288
+ GUMBO_TAG_TEXTAREA,
289
+ GUMBO_TAG_KEYGEN,
290
+ GUMBO_TAG_OUTPUT,
291
+ GUMBO_TAG_PROGRESS,
292
+ GUMBO_TAG_METER,
293
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/interactive-elements.html#interactive-elements
294
+ GUMBO_TAG_DETAILS,
295
+ GUMBO_TAG_SUMMARY,
296
+ GUMBO_TAG_MENU,
297
+ GUMBO_TAG_MENUITEM,
298
+ // Non-conforming elements that nonetheless appear in the HTML5 spec.
299
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/obsolete.html#non-conforming-features
300
+ GUMBO_TAG_APPLET,
301
+ GUMBO_TAG_ACRONYM,
302
+ GUMBO_TAG_BGSOUND,
303
+ GUMBO_TAG_DIR,
304
+ GUMBO_TAG_FRAME,
305
+ GUMBO_TAG_FRAMESET,
306
+ GUMBO_TAG_NOFRAMES,
307
+ GUMBO_TAG_ISINDEX,
308
+ GUMBO_TAG_LISTING,
309
+ GUMBO_TAG_XMP,
310
+ GUMBO_TAG_NEXTID,
311
+ GUMBO_TAG_NOEMBED,
312
+ GUMBO_TAG_PLAINTEXT,
313
+ GUMBO_TAG_RB,
314
+ GUMBO_TAG_STRIKE,
315
+ GUMBO_TAG_BASEFONT,
316
+ GUMBO_TAG_BIG,
317
+ GUMBO_TAG_BLINK,
318
+ GUMBO_TAG_CENTER,
319
+ GUMBO_TAG_FONT,
320
+ GUMBO_TAG_MARQUEE,
321
+ GUMBO_TAG_MULTICOL,
322
+ GUMBO_TAG_NOBR,
323
+ GUMBO_TAG_SPACER,
324
+ GUMBO_TAG_TT,
325
+ // Used for all tags that don't have special handling in HTML.
164
326
  GUMBO_TAG_UNKNOWN,
165
327
  // A marker value to indicate the end of the enum, for iterating over it.
166
328
  // Also used as the terminator for varargs functions that take tags.
@@ -202,10 +364,9 @@ const char* gumbo_normalize_svg_tagname(const GumboStringPiece* tagname);
202
364
 
203
365
  /**
204
366
  * Converts a tag name string (which may be in upper or mixed case) to a tag
205
- * enum. The `tag` version expects `tagname` to be NULL-terminated
367
+ * enum.
206
368
  */
207
369
  GumboTag gumbo_tag_enum(const char* tagname);
208
- GumboTag gumbo_tagn_enum(const char* tagname, unsigned int length);
209
370
 
210
371
  /**
211
372
  * Attribute namespaces.
@@ -300,16 +461,10 @@ typedef enum {
300
461
  GUMBO_NODE_TEXT,
301
462
  /** CDATA node. v will be a GumboText. */
302
463
  GUMBO_NODE_CDATA,
303
- /** Comment node. v will be a GumboText, excluding comment delimiters. */
464
+ /** Comment node. v. will be a GumboText, excluding comment delimiters. */
304
465
  GUMBO_NODE_COMMENT,
305
466
  /** Text node, where all contents is whitespace. v will be a GumboText. */
306
- GUMBO_NODE_WHITESPACE,
307
- /** Template node. This is separate from GUMBO_NODE_ELEMENT because many
308
- * client libraries will want to ignore the contents of template nodes, as
309
- * the spec suggests. Recursing on GUMBO_NODE_ELEMENT will do the right thing
310
- * here, while clients that want to include template contents should also
311
- * check for GUMBO_NODE_TEMPLATE. v will be a GumboElement. */
312
- GUMBO_NODE_TEMPLATE
467
+ GUMBO_NODE_WHITESPACE
313
468
  } GumboNodeType;
314
469
 
315
470
  /**
@@ -523,19 +678,6 @@ struct GumboInternalNode {
523
678
  /** Pointer back to parent node. Not owned. */
524
679
  GumboNode* parent;
525
680
 
526
- /**
527
- * Pointer to next node in document order. This is the next node by start tag
528
- * position in the document, or by position of the tag that forces the parser
529
- * to insert it for parser-inserted nodes. It's necessary to maintain API
530
- * compatibility with some other libraries, eg. BeautifulSoup. Not owned.
531
- */
532
- GumboNode* next;
533
-
534
- /**
535
- * Pointer to previous node in document order.
536
- */
537
- GumboNode* prev;
538
-
539
681
  /** The index within the parent's children vector of this node. */
540
682
  size_t index_within_parent;
541
683
 
@@ -653,14 +795,6 @@ GumboOutput* gumbo_parse(const char* buffer);
653
795
  GumboOutput* gumbo_parse_with_options(
654
796
  const GumboOptions* options, const char* buffer, size_t buffer_length);
655
797
 
656
- /**
657
- * Parse a chunk of HTML with the given fragment context. If `fragment_ctx`
658
- * is `GUMBO_TAG_LAST`, the fragment will be parsed as a full document.
659
- */
660
- GumboOutput* gumbo_parse_fragment(
661
- const GumboOptions* options, const char* buffer, size_t length,
662
- const GumboTag fragment_ctx, const GumboNamespaceEnum fragment_namespace);
663
-
664
798
  /** Release the memory used for the parse tree & parse errors. */
665
799
  void gumbo_destroy_output(
666
800
  const GumboOptions* options, GumboOutput* output);
@@ -47,15 +47,6 @@ typedef char gumbo_tagset[GUMBO_TAG_LAST];
47
47
  tagset[(int)tag] == (1 << (int)namespace))
48
48
 
49
49
 
50
-
51
- // selected forward declarations as it is getting hard to find
52
- // an appropriate order
53
- static bool node_html_tag_is(const GumboNode*, GumboTag);
54
- static GumboInsertionMode get_current_template_insertion_mode(const GumboParser*);
55
- static bool handle_in_template(GumboParser*, GumboToken*);
56
- static GumboNode* destroy_node(GumboParser*, GumboNode*);
57
-
58
-
59
50
  static void* malloc_wrapper(void* unused, size_t size) {
60
51
  return malloc(size);
61
52
  }
@@ -199,7 +190,7 @@ typedef struct _ReplacementEntry {
199
190
  { GUMBO_STRING(from), GUMBO_STRING(to) }
200
191
 
201
192
  // Static data for SVG attribute replacements.
202
- // https://html.spec.whatwg.org/multipage/syntax.html#creating-and-inserting-nodes
193
+ // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#adjust-svg-attributes
203
194
  static const ReplacementEntry kSvgAttributeReplacements[] = {
204
195
  REPLACEMENT_ENTRY("attributename", "attributeName"),
205
196
  REPLACEMENT_ENTRY("attributetype", "attributeType"),
@@ -207,12 +198,12 @@ static const ReplacementEntry kSvgAttributeReplacements[] = {
207
198
  REPLACEMENT_ENTRY("baseprofile", "baseProfile"),
208
199
  REPLACEMENT_ENTRY("calcmode", "calcMode"),
209
200
  REPLACEMENT_ENTRY("clippathunits", "clipPathUnits"),
210
- // REPLACEMENT_ENTRY("contentscripttype", "contentScriptType"),
211
- // REPLACEMENT_ENTRY("contentstyletype", "contentStyleType"),
201
+ REPLACEMENT_ENTRY("contentscripttype", "contentScriptType"),
202
+ REPLACEMENT_ENTRY("contentstyletype", "contentStyleType"),
212
203
  REPLACEMENT_ENTRY("diffuseconstant", "diffuseConstant"),
213
204
  REPLACEMENT_ENTRY("edgemode", "edgeMode"),
214
- // REPLACEMENT_ENTRY("externalresourcesrequired", "externalResourcesRequired"),
215
- // REPLACEMENT_ENTRY("filterres", "filterRes"),
205
+ REPLACEMENT_ENTRY("externalresourcesrequired", "externalResourcesRequired"),
206
+ REPLACEMENT_ENTRY("filterres", "filterRes"),
216
207
  REPLACEMENT_ENTRY("filterunits", "filterUnits"),
217
208
  REPLACEMENT_ENTRY("glyphref", "glyphRef"),
218
209
  REPLACEMENT_ENTRY("gradienttransform", "gradientTransform"),
@@ -380,9 +371,6 @@ typedef struct GumboInternalParserState {
380
371
  GumboNode* _head_element;
381
372
  GumboNode* _form_element;
382
373
 
383
- // The element used as fragment context when parsing in fragment mode
384
- GumboNode* _fragment_ctx;
385
-
386
374
  // The flag for when the spec says "Reprocess the current token in..."
387
375
  bool _reprocess_current_token;
388
376
 
@@ -411,10 +399,6 @@ typedef struct GumboInternalParserState {
411
399
  // The current token.
412
400
  GumboToken* _current_token;
413
401
 
414
- // The current (most recently inserted) node. This is used to link together
415
- // nodes in document order.
416
- GumboNode* _current_node;
417
-
418
402
  // The way that the spec is written, the </body> and </html> tags are *always*
419
403
  // implicit, because encountering one of those tokens merely switches the
420
404
  // insertion mode out of "in body". So we have individual state flags for
@@ -467,17 +451,7 @@ static void set_frameset_not_ok(GumboParser* parser) {
467
451
  }
468
452
 
469
453
  static GumboNode* create_node(GumboParser* parser, GumboNodeType type) {
470
- GumboParserState* state = parser->_parser_state;
471
454
  GumboNode* node = gumbo_parser_allocate(parser, sizeof(GumboNode));
472
-
473
- node->next = NULL;
474
- node->prev = state->_current_node;
475
- if (state->_current_node != NULL) {
476
- // May be null for the initial document node.
477
- state->_current_node->next = node;
478
- }
479
- state->_current_node = node;
480
-
481
455
  node->parent = NULL;
482
456
  node->index_within_parent = -1;
483
457
  node->type = type;
@@ -524,9 +498,7 @@ static void parser_state_init(GumboParser* parser) {
524
498
  gumbo_vector_init(parser, 5, &parser_state->_template_insertion_modes);
525
499
  parser_state->_head_element = NULL;
526
500
  parser_state->_form_element = NULL;
527
- parser_state->_fragment_ctx = NULL;
528
501
  parser_state->_current_token = NULL;
529
- parser_state->_current_node = NULL;
530
502
  parser_state->_closed_body_tag = false;
531
503
  parser_state->_closed_html_tag = false;
532
504
  parser->_parser_state = parser_state;
@@ -534,25 +506,17 @@ static void parser_state_init(GumboParser* parser) {
534
506
 
535
507
  static void parser_state_destroy(GumboParser* parser) {
536
508
  GumboParserState* state = parser->_parser_state;
537
- if (state->_fragment_ctx) {
538
- destroy_node(parser, state->_fragment_ctx);
539
- }
540
509
  gumbo_vector_destroy(parser, &state->_active_formatting_elements);
541
510
  gumbo_vector_destroy(parser, &state->_open_elements);
542
511
  gumbo_vector_destroy(parser, &state->_template_insertion_modes);
543
512
  gumbo_string_buffer_destroy(parser, &state->_text_node._buffer);
544
513
  gumbo_parser_deallocate(parser, state);
545
- parser->_parser_state = NULL;
546
514
  }
547
515
 
548
516
  static GumboNode* get_document_node(GumboParser* parser) {
549
517
  return parser->_output->document;
550
518
  }
551
519
 
552
- static bool is_fragment_parser(const GumboParser *parser) {
553
- return !!parser->_parser_state->_fragment_ctx;
554
- }
555
-
556
520
  // Returns the node at the bottom of the stack of open elements, or NULL if no
557
521
  // elements have been added yet.
558
522
  static GumboNode* get_current_node(GumboParser* parser) {
@@ -566,14 +530,6 @@ static GumboNode* get_current_node(GumboParser* parser) {
566
530
  return open_elements->data[open_elements->length - 1];
567
531
  }
568
532
 
569
- static GumboNode* get_adjusted_current_node(GumboParser* parser) {
570
- GumboParserState *state = parser->_parser_state;
571
- if (state->_open_elements.length == 1 && state->_fragment_ctx) {
572
- return state->_fragment_ctx;
573
- }
574
- return get_current_node(parser);
575
- }
576
-
577
533
  // Returns true if the given needle is in the given array of literal
578
534
  // GumboStringPieces. If exact_match is true, this requires that they match
579
535
  // exactly; otherwise, this performs a prefix match to check if any of the
@@ -594,80 +550,55 @@ static void set_insertion_mode(GumboParser* parser, GumboInsertionMode mode) {
594
550
  parser->_parser_state->_insertion_mode = mode;
595
551
  }
596
552
 
597
-
598
553
  // http://www.whatwg.org/specs/web-apps/current-work/complete/parsing.html#reset-the-insertion-mode-appropriately
599
554
  // This is a helper function that returns the appropriate insertion mode instead
600
555
  // of setting it. Returns GUMBO_INSERTION_MODE_INITIAL as a sentinel value to
601
556
  // indicate that there is no appropriate insertion mode, and the loop should
602
557
  // continue.
603
- static GumboInsertionMode get_appropriate_insertion_mode(const GumboParser* parser, int index) {
604
- const GumboVector* open_elements = &parser->_parser_state->_open_elements;
605
- const GumboNode* node = open_elements->data[index];
606
- const bool is_last = index == 0;
607
-
608
- if (is_last && is_fragment_parser(parser)) {
609
- node = parser->_parser_state->_fragment_ctx;
610
- }
558
+ static GumboInsertionMode get_appropriate_insertion_mode(
559
+ const GumboNode* node, bool is_last) {
560
+ assert(node->type == GUMBO_NODE_ELEMENT);
611
561
 
612
- assert(node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE);
613
- switch (node->v.element.tag) {
614
- case GUMBO_TAG_SELECT: {
615
- if (is_last) {
616
- return GUMBO_INSERTION_MODE_IN_SELECT;
617
- }
618
- for (int i = index; i > 0; --i) {
619
- const GumboNode* ancestor = open_elements->data[i];
620
- if (node_html_tag_is(ancestor, GUMBO_TAG_TEMPLATE)) {
621
- return GUMBO_INSERTION_MODE_IN_SELECT;
622
- }
623
- if (node_html_tag_is(ancestor, GUMBO_TAG_TABLE)) {
624
- return GUMBO_INSERTION_MODE_IN_SELECT_IN_TABLE;
625
- }
626
- }
627
- return GUMBO_INSERTION_MODE_IN_SELECT;
628
- }
629
- case GUMBO_TAG_TD:
630
- case GUMBO_TAG_TH:
631
- if (!is_last) return GUMBO_INSERTION_MODE_IN_CELL;
632
- break;
633
- case GUMBO_TAG_TR:
634
- return GUMBO_INSERTION_MODE_IN_ROW;
635
- case GUMBO_TAG_TBODY:
636
- case GUMBO_TAG_THEAD:
637
- case GUMBO_TAG_TFOOT:
638
- return GUMBO_INSERTION_MODE_IN_TABLE_BODY;
639
- case GUMBO_TAG_CAPTION:
640
- return GUMBO_INSERTION_MODE_IN_CAPTION;
641
- case GUMBO_TAG_COLGROUP:
642
- return GUMBO_INSERTION_MODE_IN_COLUMN_GROUP;
643
- case GUMBO_TAG_TABLE:
644
- return GUMBO_INSERTION_MODE_IN_TABLE;
645
- case GUMBO_TAG_TEMPLATE:
646
- return get_current_template_insertion_mode(parser);
647
- case GUMBO_TAG_HEAD:
648
- if (!is_last) return GUMBO_INSERTION_MODE_IN_HEAD;
649
- break;
650
- case GUMBO_TAG_BODY:
651
- return GUMBO_INSERTION_MODE_IN_BODY;
652
- case GUMBO_TAG_FRAMESET:
653
- return GUMBO_INSERTION_MODE_IN_FRAMESET;
654
- case GUMBO_TAG_HTML:
655
- return parser->_parser_state->_head_element ?
656
- GUMBO_INSERTION_MODE_AFTER_HEAD : GUMBO_INSERTION_MODE_BEFORE_HEAD;
657
- default:
658
- break;
659
- }
660
- return is_last ?
661
- GUMBO_INSERTION_MODE_IN_BODY : GUMBO_INSERTION_MODE_INITIAL;
562
+ if (node->v.element.tag_namespace == GUMBO_NAMESPACE_HTML) {
563
+ switch (node->v.element.tag) {
564
+ case GUMBO_TAG_SELECT:
565
+ return GUMBO_INSERTION_MODE_IN_SELECT;
566
+ case GUMBO_TAG_TD:
567
+ case GUMBO_TAG_TH:
568
+ return is_last ?
569
+ GUMBO_INSERTION_MODE_IN_BODY : GUMBO_INSERTION_MODE_IN_CELL;
570
+ case GUMBO_TAG_TR:
571
+ return GUMBO_INSERTION_MODE_IN_ROW;
572
+ case GUMBO_TAG_TBODY:
573
+ case GUMBO_TAG_THEAD:
574
+ case GUMBO_TAG_TFOOT:
575
+ return GUMBO_INSERTION_MODE_IN_TABLE_BODY;
576
+ case GUMBO_TAG_CAPTION:
577
+ return GUMBO_INSERTION_MODE_IN_CAPTION;
578
+ case GUMBO_TAG_COLGROUP:
579
+ return GUMBO_INSERTION_MODE_IN_COLUMN_GROUP;
580
+ case GUMBO_TAG_TABLE:
581
+ return GUMBO_INSERTION_MODE_IN_TABLE;
582
+ case GUMBO_TAG_HEAD:
583
+ case GUMBO_TAG_BODY:
584
+ return GUMBO_INSERTION_MODE_IN_BODY;
585
+ case GUMBO_TAG_FRAMESET:
586
+ return GUMBO_INSERTION_MODE_IN_FRAMESET;
587
+ case GUMBO_TAG_HTML:
588
+ return GUMBO_INSERTION_MODE_BEFORE_HEAD;
589
+ default:
590
+ break;
591
+ }
592
+ }
593
+ return is_last ? GUMBO_INSERTION_MODE_IN_BODY : GUMBO_INSERTION_MODE_INITIAL;
662
594
  }
663
595
 
664
-
665
596
  // This performs the actual "reset the insertion mode" loop.
666
597
  static void reset_insertion_mode_appropriately(GumboParser* parser) {
667
598
  const GumboVector* open_elements = &parser->_parser_state->_open_elements;
668
599
  for (int i = open_elements->length; --i >= 0; ) {
669
600
  GumboInsertionMode mode =
670
- get_appropriate_insertion_mode(parser, i);
601
+ get_appropriate_insertion_mode(open_elements->data[i], i == 0);
671
602
  if (mode != GUMBO_INSERTION_MODE_INITIAL) {
672
603
  set_insertion_mode(parser, mode);
673
604
  return;
@@ -701,7 +632,7 @@ static GumboError* parser_add_parse_error(GumboParser* parser, const GumboToken*
701
632
  &extra_data->tag_stack);
702
633
  for (int i = 0; i < state->_open_elements.length; ++i) {
703
634
  const GumboNode* node = state->_open_elements.data[i];
704
- assert(node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE);
635
+ assert(node->type == GUMBO_NODE_ELEMENT);
705
636
  gumbo_vector_add(parser, (void*) node->v.element.tag,
706
637
  &extra_data->tag_stack);
707
638
  }
@@ -738,7 +669,7 @@ static bool tag_is(const GumboToken* token, bool is_start, GumboTag tag) {
738
669
  // Like tag_in, but checks for the tag of a node, rather than a token.
739
670
  static bool node_tag_in_set(const GumboNode* node, const gumbo_tagset tags) {
740
671
  assert(node != NULL);
741
- if (node->type != GUMBO_NODE_ELEMENT && node->type != GUMBO_NODE_TEMPLATE) {
672
+ if (node->type != GUMBO_NODE_ELEMENT) {
742
673
  return false;
743
674
  }
744
675
  return TAGSET_INCLUDES(tags, node->v.element.tag_namespace, node->v.element.tag);
@@ -747,7 +678,7 @@ static bool node_tag_in_set(const GumboNode* node, const gumbo_tagset tags) {
747
678
 
748
679
  // Like node_tag_in, but for the single-tag case.
749
680
  static bool node_qualified_tag_is(const GumboNode* node, GumboNamespaceEnum ns, GumboTag tag) {
750
- return (node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE) &&
681
+ return node->type == GUMBO_NODE_ELEMENT &&
751
682
  node->v.element.tag == tag &&
752
683
  node->v.element.tag_namespace == ns;
753
684
  }
@@ -758,23 +689,6 @@ static bool node_html_tag_is(const GumboNode* node, GumboTag tag)
758
689
  return node_qualified_tag_is(node, GUMBO_NAMESPACE_HTML, tag);
759
690
  }
760
691
 
761
- static void push_template_insertion_mode(GumboParser* parser, GumboInsertionMode mode) {
762
- gumbo_vector_add(parser, (void*) mode, &parser->_parser_state->_template_insertion_modes);
763
- }
764
-
765
- static void pop_template_insertion_mode(GumboParser* parser) {
766
- gumbo_vector_pop(parser, &parser->_parser_state->_template_insertion_modes);
767
- }
768
-
769
- // Returns the current template insertion mode. If the stack of template
770
- // insertion modes is empty, this returns GUMBO_INSERTION_MODE_INITIAL.
771
- static GumboInsertionMode get_current_template_insertion_mode(const GumboParser* parser) {
772
- GumboVector* template_insertion_modes = &parser->_parser_state->_template_insertion_modes;
773
- if (template_insertion_modes->length == 0) {
774
- return GUMBO_INSERTION_MODE_INITIAL;
775
- }
776
- return (GumboInsertionMode) template_insertion_modes->data[(template_insertion_modes->length - 1)];
777
- }
778
692
 
779
693
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#mathml-text-integration-point
780
694
  static bool is_mathml_integration_point(const GumboNode* node) {
@@ -792,63 +706,6 @@ static bool is_html_integration_point(const GumboNode* node) {
792
706
  "encoding", "application/xhtml+xml")));
793
707
  }
794
708
 
795
-
796
- // This represents a place to insert a node, consisting of a target parent and a
797
- // child index within that parent. If the node should be inserted at the end of
798
- // the parent's child, index will be -1.
799
- typedef struct {
800
- GumboNode* target;
801
- int index;
802
- } InsertionLocation;
803
-
804
- InsertionLocation get_appropriate_insertion_location(GumboParser* parser, GumboNode* override_target) {
805
- InsertionLocation retval = { override_target, -1 };
806
- if (retval.target == NULL) {
807
- // No override target; default to the current node, but special-case the
808
- // root node since get_current_node() assumes the stack of open elements is
809
- // non-empty.
810
- retval.target = parser->_output->root != NULL ?
811
- get_current_node(parser) : get_document_node(parser);
812
- }
813
- if (!parser->_parser_state->_foster_parent_insertions ||
814
- !node_tag_in_set(retval.target, (gumbo_tagset) { TAG(TABLE), TAG(TBODY),
815
- TAG(TFOOT), TAG(THEAD), TAG(TR) })) {
816
- return retval;
817
- }
818
-
819
- // Foster-parenting case.
820
- int last_template_index = -1;
821
- int last_table_index = -1;
822
- GumboVector* open_elements = &parser->_parser_state->_open_elements;
823
- for (int i = 0; i < open_elements->length; ++i) {
824
- if (node_html_tag_is(open_elements->data[i], GUMBO_TAG_TEMPLATE)) {
825
- last_template_index = i;
826
- }
827
- if (node_html_tag_is(open_elements->data[i], GUMBO_TAG_TABLE)) {
828
- last_table_index = i;
829
- }
830
- }
831
- if (last_template_index != -1 &&
832
- (last_table_index == -1 || last_template_index > last_table_index)) {
833
- retval.target = open_elements->data[last_template_index];
834
- return retval;
835
- }
836
- if (last_table_index == -1) {
837
- retval.target = open_elements->data[0];
838
- return retval;
839
- }
840
- GumboNode* last_table = open_elements->data[last_table_index];
841
- if (last_table->parent != NULL) {
842
- retval.target = last_table->parent;
843
- retval.index = last_table->index_within_parent;
844
- return retval;
845
- }
846
-
847
- retval.target = open_elements->data[last_table_index - 1];
848
- return retval;
849
- }
850
-
851
-
852
709
  // Appends a node to the end of its parent, setting the "parent" and
853
710
  // "index_within_parent" fields appropriately.
854
711
  static void append_node(
@@ -856,7 +713,7 @@ static void append_node(
856
713
  assert(node->parent == NULL);
857
714
  assert(node->index_within_parent == -1);
858
715
  GumboVector* children;
859
- if (parent->type == GUMBO_NODE_ELEMENT || parent->type == GUMBO_NODE_TEMPLATE) {
716
+ if (parent->type == GUMBO_NODE_ELEMENT) {
860
717
  children = &parent->v.element.children;
861
718
  } else {
862
719
  assert(parent->type == GUMBO_NODE_DOCUMENT);
@@ -868,44 +725,66 @@ static void append_node(
868
725
  assert(node->index_within_parent < children->length);
869
726
  }
870
727
 
871
- // Inserts a node at the specified InsertionLocation, updating the
728
+ // Inserts a node at the specified index within its parent, updating the
872
729
  // "parent" and "index_within_parent" fields of it and all its siblings.
873
- // If the index of the location is -1, this calls append_node.
874
730
  static void insert_node(
875
- GumboParser* parser, GumboNode* node, InsertionLocation location) {
731
+ GumboParser* parser, GumboNode* parent, int index, GumboNode* node) {
876
732
  assert(node->parent == NULL);
877
733
  assert(node->index_within_parent == -1);
878
- GumboNode* parent = location.target;
879
- int index = location.index;
880
- if (index != -1) {
881
- GumboVector* children = NULL;
882
- if (parent->type == GUMBO_NODE_ELEMENT ||
883
- parent->type == GUMBO_NODE_TEMPLATE) {
884
- children = &parent->v.element.children;
885
- } else if (parent->type == GUMBO_NODE_DOCUMENT) {
886
- children = &parent->v.document.children;
887
- assert(children->length == 0);
888
- } else {
889
- assert(0);
890
- }
734
+ assert(parent->type == GUMBO_NODE_ELEMENT);
735
+ GumboVector* children = &parent->v.element.children;
736
+ assert(index >= 0);
737
+ assert(index < children->length);
738
+ node->parent = parent;
739
+ node->index_within_parent = index;
740
+ gumbo_vector_insert_at(parser, (void*) node, index, children);
741
+ assert(node->index_within_parent < children->length);
742
+ for (int i = index + 1; i < children->length; ++i) {
743
+ GumboNode* sibling = children->data[i];
744
+ sibling->index_within_parent = i;
745
+ assert(sibling->index_within_parent < children->length);
746
+ }
747
+ }
891
748
 
892
- assert(index >= 0);
893
- assert(index < children->length);
894
- node->parent = parent;
895
- node->index_within_parent = index;
896
- gumbo_vector_insert_at(parser, (void*) node, index, children);
897
- assert(node->index_within_parent < children->length);
898
- for (int i = index + 1; i < children->length; ++i) {
899
- GumboNode* sibling = children->data[i];
900
- sibling->index_within_parent = i;
901
- assert(sibling->index_within_parent < children->length);
749
+ // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#foster-parenting
750
+ static void foster_parent_element(GumboParser* parser, GumboNode* node) {
751
+ GumboVector* open_elements = &parser->_parser_state->_open_elements;
752
+ assert(open_elements->length > 2);
753
+
754
+ node->parse_flags |= GUMBO_INSERTION_FOSTER_PARENTED;
755
+ GumboNode* foster_parent_element = open_elements->data[0];
756
+ assert(foster_parent_element->type == GUMBO_NODE_ELEMENT);
757
+ assert(node_html_tag_is(foster_parent_element, GUMBO_TAG_HTML));
758
+ for (int i = open_elements->length; --i > 1; ) {
759
+ GumboNode* table_element = open_elements->data[i];
760
+ if (node_html_tag_is(table_element, GUMBO_TAG_TABLE)) {
761
+ foster_parent_element = table_element->parent;
762
+ if (!foster_parent_element ||
763
+ foster_parent_element->type != GUMBO_NODE_ELEMENT) {
764
+ // Table has no parent; spec says it's possible if a script manipulated
765
+ // the DOM, although I don't think we have to worry about this case.
766
+ gumbo_debug("Table has no parent.\n");
767
+ foster_parent_element = open_elements->data[i - 1];
768
+ break;
769
+ }
770
+ assert(foster_parent_element->type == GUMBO_NODE_ELEMENT);
771
+ gumbo_debug("Found enclosing table (%x) at %d; parent=%s, index=%d.\n",
772
+ table_element, i, gumbo_normalized_tagname(
773
+ foster_parent_element->v.element.tag),
774
+ table_element->index_within_parent);
775
+ assert(foster_parent_element->v.element.children.data[
776
+ table_element->index_within_parent] == table_element);
777
+ insert_node(parser, foster_parent_element,
778
+ table_element->index_within_parent, node);
779
+ return;
902
780
  }
903
- } else {
904
- append_node(parser, parent, node);
905
781
  }
782
+ if (node->type == GUMBO_NODE_ELEMENT) {
783
+ gumbo_vector_add(parser, (void*) node, open_elements);
784
+ }
785
+ append_node(parser, foster_parent_element, node);
906
786
  }
907
787
 
908
-
909
788
  static void maybe_flush_text_node_buffer(GumboParser* parser) {
910
789
  GumboParserState* state = parser->_parser_state;
911
790
  TextNodeBufferState* buffer_state = &state->_text_node;
@@ -925,20 +804,20 @@ static void maybe_flush_text_node_buffer(GumboParser* parser) {
925
804
  state->_current_token->original_text.data -
926
805
  buffer_state->_start_original_text;
927
806
  text_node_data->start_pos = buffer_state->_start_position;
928
-
929
- gumbo_debug("Flushing text node buffer of %.*s.\n",
930
- (int) buffer_state->_buffer.length, buffer_state->_buffer.data);
931
-
932
- InsertionLocation location = get_appropriate_insertion_location(parser, NULL);
933
- if (location.target->type == GUMBO_NODE_DOCUMENT) {
934
- // The DOM does not allow Document nodes to have Text children, so per the
935
- // spec, they are dropped on the floor.
936
- destroy_node(parser, text_node);
807
+ if (state->_foster_parent_insertions &&
808
+ node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(TABLE), TAG(TBODY), TAG(TFOOT),
809
+ TAG(THEAD), TAG(TR) })) {
810
+ foster_parent_element(parser, text_node);
937
811
  } else {
938
- insert_node(parser, text_node, location);
812
+ append_node(
813
+ parser, parser->_output->root ?
814
+ get_current_node(parser) : parser->_output->document, text_node);
939
815
  }
816
+ gumbo_debug("Flushing text node buffer of %.*s.\n",
817
+ (int) buffer_state->_buffer.length, buffer_state->_buffer.data);
940
818
 
941
- gumbo_string_buffer_clear(parser, &buffer_state->_buffer);
819
+ gumbo_string_buffer_destroy(parser, &buffer_state->_buffer);
820
+ gumbo_string_buffer_init(parser, &buffer_state->_buffer);
942
821
  buffer_state->_type = GUMBO_NODE_WHITESPACE;
943
822
  assert(buffer_state->_buffer.length == 0);
944
823
  }
@@ -965,7 +844,7 @@ static GumboNode* pop_current_node(GumboParser* parser) {
965
844
  assert(state->_open_elements.length == 0);
966
845
  return NULL;
967
846
  }
968
- assert(current_node->type == GUMBO_NODE_ELEMENT || current_node->type == GUMBO_NODE_TEMPLATE);
847
+ assert(current_node->type == GUMBO_NODE_ELEMENT);
969
848
  bool is_closed_body_or_html_tag =
970
849
  (node_html_tag_is(current_node, GUMBO_TAG_BODY) && state->_closed_body_tag) ||
971
850
  (node_html_tag_is(current_node, GUMBO_TAG_HTML) && state->_closed_html_tag);
@@ -994,14 +873,14 @@ static void append_comment_node(
994
873
 
995
874
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#clear-the-stack-back-to-a-table-row-context
996
875
  static void clear_stack_to_table_row_context(GumboParser* parser) {
997
- while (!node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(HTML), TAG(TR), TAG(TEMPLATE)})) {
876
+ while (!node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(HTML), TAG(TR) })) {
998
877
  pop_current_node(parser);
999
878
  }
1000
879
  }
1001
880
 
1002
881
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#clear-the-stack-back-to-a-table-context
1003
882
  static void clear_stack_to_table_context(GumboParser* parser) {
1004
- while (!node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(HTML), TAG(TABLE), TAG(TEMPLATE) } )) {
883
+ while (!node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(HTML), TAG(TABLE) } )) {
1005
884
  pop_current_node(parser);
1006
885
  }
1007
886
  }
@@ -1009,7 +888,7 @@ static void clear_stack_to_table_context(GumboParser* parser) {
1009
888
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#clear-the-stack-back-to-a-table-body-context
1010
889
  void clear_stack_to_table_body_context(GumboParser* parser) {
1011
890
  while (!node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(HTML), TAG(TBODY),
1012
- TAG(TFOOT), TAG(THEAD), TAG(TEMPLATE) })) {
891
+ TAG(TFOOT), TAG(THEAD) })) {
1013
892
  pop_current_node(parser);
1014
893
  }
1015
894
  }
@@ -1024,8 +903,7 @@ static GumboNode* create_element(GumboParser* parser, GumboTag tag) {
1024
903
  element->tag_namespace = GUMBO_NAMESPACE_HTML;
1025
904
  element->original_tag = kGumboEmptyString;
1026
905
  element->original_end_tag = kGumboEmptyString;
1027
- element->start_pos = (parser->_parser_state->_current_token) ?
1028
- parser->_parser_state->_current_token->position : kGumboEmptySourcePosition;
906
+ element->start_pos = parser->_parser_state->_current_token->position;
1029
907
  element->end_pos = kGumboEmptySourcePosition;
1030
908
  return node;
1031
909
  }
@@ -1036,12 +914,7 @@ static GumboNode* create_element_from_token(
1036
914
  assert(token->type == GUMBO_TOKEN_START_TAG);
1037
915
  GumboTokenStartTag* start_tag = &token->v.start_tag;
1038
916
 
1039
- GumboNodeType type = (
1040
- tag_namespace == GUMBO_NAMESPACE_HTML &&
1041
- start_tag->tag == GUMBO_TAG_TEMPLATE)
1042
- ? GUMBO_NODE_TEMPLATE : GUMBO_NODE_ELEMENT;
1043
-
1044
- GumboNode* node = create_node(parser, type);
917
+ GumboNode* node = create_node(parser, GUMBO_NODE_ELEMENT);
1045
918
  GumboElement* element = &node->v.element;
1046
919
  gumbo_vector_init(parser, 1, &element->children);
1047
920
  element->attributes = start_tag->attributes;
@@ -1078,9 +951,20 @@ static void insert_element(GumboParser* parser, GumboNode* node,
1078
951
  if (!is_reconstructing_formatting_elements) {
1079
952
  maybe_flush_text_node_buffer(parser);
1080
953
  }
1081
- InsertionLocation location =
1082
- get_appropriate_insertion_location(parser, NULL);
1083
- insert_node(parser, node, location);
954
+ if (state->_foster_parent_insertions &&
955
+ node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(TABLE), TAG(TBODY), TAG(TFOOT),
956
+ TAG(THEAD), TAG(TR) } )) {
957
+ foster_parent_element(parser, node);
958
+ gumbo_vector_add(parser, (void*) node, &state->_open_elements);
959
+ return;
960
+ }
961
+
962
+ // This is called to insert the root HTML element, but get_current_node
963
+ // assumes the stack of open elements is non-empty, so we need special
964
+ // handling for this case.
965
+ append_node(
966
+ parser, parser->_output->root ?
967
+ get_current_node(parser) : parser->_output->document, node);
1084
968
  gumbo_vector_add(parser, (void*) node, &state->_open_elements);
1085
969
  }
1086
970
 
@@ -1253,7 +1137,7 @@ static bool is_open_element(GumboParser* parser, const GumboNode* node) {
1253
1137
  // values are fresh copies.
1254
1138
  GumboNode* clone_node(
1255
1139
  GumboParser* parser, const GumboNode* node, GumboParseFlags reason) {
1256
- assert(node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE);
1140
+ assert(node->type == GUMBO_NODE_ELEMENT);
1257
1141
  GumboNode* new_node = gumbo_parser_allocate(parser, sizeof(GumboNode));
1258
1142
  *new_node = *node;
1259
1143
  new_node->parent = NULL;
@@ -1323,10 +1207,7 @@ static void reconstruct_active_formatting_elements(GumboParser* parser) {
1323
1207
  GumboNode* clone = clone_node(
1324
1208
  parser, element, GUMBO_INSERTION_RECONSTRUCTED_FORMATTING_ELEMENT);
1325
1209
  // Step 9.
1326
- InsertionLocation location = get_appropriate_insertion_location(parser, NULL);
1327
- insert_node(parser, clone, location);
1328
- gumbo_vector_add(parser, (void*) clone, &parser->_parser_state->_open_elements);
1329
-
1210
+ insert_element(parser, clone, true);
1330
1211
  // Step 10.
1331
1212
  elements->data[i] = clone;
1332
1213
  gumbo_debug("Reconstructed %s element at %d.\n",
@@ -1380,40 +1261,37 @@ static GumboQuirksModeEnum compute_quirks_mode(
1380
1261
  // names. For example, "has an element in list scope" looks for an element of
1381
1262
  // the given qualified name within the nearest enclosing <ol> or <ul>, along
1382
1263
  // with a bunch of generic element types that serve to "firewall" their content
1383
- // from the rest of the document. Note that because of the way the spec is written,
1384
- // all elements are expected to be in the HTML namespace
1385
- static bool has_an_element_in_specific_scope(GumboParser* parser,
1386
- int expected_size, const GumboTag *expected, bool negate, const gumbo_tagset tags) {
1264
+ // from the rest of the document.
1265
+ static bool has_an_element_in_specific_scope(GumboParser* parser, gumbo_tagset expected, bool negate, const gumbo_tagset tags) {
1387
1266
  GumboVector* open_elements = &parser->_parser_state->_open_elements;
1267
+ bool result = false;
1388
1268
  for (int i = open_elements->length; --i >= 0; ) {
1389
1269
  const GumboNode* node = open_elements->data[i];
1390
- if (node->type != GUMBO_NODE_ELEMENT && node->type != GUMBO_NODE_TEMPLATE)
1270
+ if (node->type != GUMBO_NODE_ELEMENT) {
1391
1271
  continue;
1392
-
1393
- GumboTag node_tag = node->v.element.tag;
1394
- GumboNamespaceEnum node_ns = node->v.element.tag_namespace;
1395
- for (int j = 0; j < expected_size; ++j) {
1396
- if (node_tag == expected[j] && node_ns == GUMBO_NAMESPACE_HTML)
1397
- return true;
1398
1272
  }
1399
-
1400
- bool found = TAGSET_INCLUDES(tags, node_ns, node_tag);
1401
- if (negate != found)
1402
- return false;
1273
+ if (TAGSET_INCLUDES(expected, node->v.element.tag_namespace, node->v.element.tag)) {
1274
+ return true;
1275
+ }
1276
+ bool found_qualname = false;
1277
+ if (TAGSET_INCLUDES(tags, node->v.element.tag_namespace, node->v.element.tag)) {
1278
+ found_qualname = true;
1279
+ }
1280
+ if (negate != found_qualname) {
1281
+ result = false;
1282
+ return result;
1283
+ }
1403
1284
  }
1404
- return false;
1405
- }
1406
-
1407
- // Checks for the presence of an open element of the specified tag type.
1408
- static bool has_open_element(GumboParser* parser, GumboTag tag) {
1409
- return has_an_element_in_specific_scope(parser, 1, &tag, false, (gumbo_tagset) { TAG(HTML) } );
1285
+ return result;
1410
1286
  }
1411
1287
 
1412
1288
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-scope
1413
1289
  static bool has_an_element_in_scope(GumboParser* parser, GumboTag tag) {
1414
- return has_an_element_in_specific_scope(parser, 1, &tag, false, (gumbo_tagset) { TAG(APPLET),
1290
+ gumbo_tagset qualset = {0};
1291
+ qualset[(int) tag] = (1 << (int) GUMBO_NAMESPACE_HTML);
1292
+ return has_an_element_in_specific_scope(parser, qualset, false, (gumbo_tagset) { TAG(APPLET),
1415
1293
  TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD), TAG(TH), TAG(MARQUEE),
1416
- TAG(OBJECT), TAG(TEMPLATE), TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
1294
+ TAG(OBJECT), TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
1417
1295
  TAG_MATHML(MS), TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
1418
1296
  TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC), TAG_SVG(TITLE) });
1419
1297
  }
@@ -1431,11 +1309,11 @@ static bool has_node_in_scope(GumboParser* parser, const GumboNode* node) {
1431
1309
  if (current == node) {
1432
1310
  return true;
1433
1311
  }
1434
- if (current->type != GUMBO_NODE_ELEMENT && current->type != GUMBO_NODE_TEMPLATE) {
1312
+ if (current->type != GUMBO_NODE_ELEMENT) {
1435
1313
  continue;
1436
1314
  }
1437
1315
  if (node_tag_in_set(current, (gumbo_tagset) { TAG(APPLET), TAG(CAPTION), TAG(HTML),
1438
- TAG(TABLE), TAG(TD), TAG(TH), TAG(MARQUEE), TAG(OBJECT), TAG(TEMPLATE),
1316
+ TAG(TABLE), TAG(TD), TAG(TH), TAG(MARQUEE), TAG(OBJECT),
1439
1317
  TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS),
1440
1318
  TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML), TAG_SVG(FOREIGNOBJECT),
1441
1319
  TAG_SVG(DESC), TAG_SVG(TITLE) } )) {
@@ -1448,19 +1326,21 @@ static bool has_node_in_scope(GumboParser* parser, const GumboNode* node) {
1448
1326
 
1449
1327
  // Like has_an_element_in_scope, but restricts the expected qualified name to a
1450
1328
  // range of possible qualified names instead of just a single one.
1451
- static bool has_an_element_in_scope_with_tagname(GumboParser* parser, int expected_len, const GumboTag expected[]) {
1452
- return has_an_element_in_specific_scope(parser, expected_len, expected, false, (gumbo_tagset) {
1453
- TAG(APPLET), TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD), TAG(TH), TAG(MARQUEE),
1454
- TAG(OBJECT), TAG(TEMPLATE), TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
1455
- TAG_MATHML(MS), TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
1456
- TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC), TAG_SVG(TITLE) });
1329
+ static bool has_an_element_in_scope_with_tagname(GumboParser* parser, gumbo_tagset qualset) {
1330
+ return has_an_element_in_specific_scope(parser, qualset, false, (gumbo_tagset) { TAG(APPLET),
1331
+ TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD), TAG(TH), TAG(MARQUEE),
1332
+ TAG(OBJECT), TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
1333
+ TAG_MATHML(MS), TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
1334
+ TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC), TAG_SVG(TITLE) });
1457
1335
  }
1458
1336
 
1459
1337
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-list-item-scope
1460
1338
  static bool has_an_element_in_list_scope(GumboParser* parser, GumboTag tag) {
1461
- return has_an_element_in_specific_scope(parser, 1, &tag, false, (gumbo_tagset) { TAG(APPLET),
1339
+ gumbo_tagset qualset = {0};
1340
+ qualset[(int)tag] = (1 << (int)(GUMBO_NAMESPACE_HTML));
1341
+ return has_an_element_in_specific_scope(parser, qualset, false, (gumbo_tagset) { TAG(APPLET),
1462
1342
  TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD), TAG(TH), TAG(MARQUEE),
1463
- TAG(OBJECT), TAG(TEMPLATE), TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
1343
+ TAG(OBJECT), TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
1464
1344
  TAG_MATHML(MS), TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
1465
1345
  TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC), TAG_SVG(TITLE), TAG(OL),
1466
1346
  TAG(UL) });
@@ -1468,22 +1348,27 @@ static bool has_an_element_in_list_scope(GumboParser* parser, GumboTag tag) {
1468
1348
 
1469
1349
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-button-scope
1470
1350
  static bool has_an_element_in_button_scope(GumboParser* parser, GumboTag tag) {
1471
- return has_an_element_in_specific_scope(parser, 1, &tag, false, (gumbo_tagset) { TAG(APPLET),
1351
+ gumbo_tagset qualset = {0};
1352
+ qualset[(int) tag] = (1 << (int)(GUMBO_NAMESPACE_HTML));
1353
+ return has_an_element_in_specific_scope(parser, qualset, false, (gumbo_tagset) { TAG(APPLET),
1472
1354
  TAG(CAPTION), TAG(HTML), TAG(TABLE), TAG(TD), TAG(TH), TAG(MARQUEE),
1473
- TAG(OBJECT), TAG(TEMPLATE), TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
1355
+ TAG(OBJECT), TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN),
1474
1356
  TAG_MATHML(MS), TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
1475
1357
  TAG_SVG(FOREIGNOBJECT), TAG_SVG(DESC), TAG_SVG(TITLE), TAG(BUTTON) });
1476
1358
  }
1477
1359
 
1478
1360
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-table-scope
1479
1361
  static bool has_an_element_in_table_scope(GumboParser* parser, GumboTag tag) {
1480
- return has_an_element_in_specific_scope(parser, 1, &tag, false, (gumbo_tagset) { TAG(HTML),
1481
- TAG(TABLE), TAG(TEMPLATE) });
1362
+ gumbo_tagset qualset = {0};
1363
+ qualset[(int) tag] = (1 << (int)(GUMBO_NAMESPACE_HTML));
1364
+ return has_an_element_in_specific_scope(parser, qualset, false, (gumbo_tagset) { TAG(HTML), TAG(TABLE) });
1482
1365
  }
1483
1366
 
1484
1367
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#has-an-element-in-select-scope
1485
1368
  static bool has_an_element_in_select_scope(GumboParser* parser, GumboTag tag) {
1486
- return has_an_element_in_specific_scope(parser, 1, &tag, true, (gumbo_tagset) { TAG(OPTGROUP), TAG(OPTION) });
1369
+ gumbo_tagset qualset = {0};
1370
+ qualset[(int) tag] = (1 << (int)(GUMBO_NAMESPACE_HTML));
1371
+ return has_an_element_in_specific_scope(parser, qualset, true, (gumbo_tagset) { TAG(OPTGROUP), TAG(OPTION) });
1487
1372
  }
1488
1373
 
1489
1374
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#generate-implied-end-tags
@@ -1491,24 +1376,12 @@ static bool has_an_element_in_select_scope(GumboParser* parser, GumboTag tag) {
1491
1376
  // Pass GUMBO_TAG_LAST to not exclude any of them.
1492
1377
  static void generate_implied_end_tags(GumboParser* parser, GumboTag exception) {
1493
1378
  for (;
1494
- node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(DD),
1495
- TAG(DT), TAG(LI), TAG(OPTION), TAG(OPTGROUP), TAG(P), TAG(RP), TAG(RB),
1496
- TAG(RT), TAG(RTC) }) &&
1379
+ node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(DD), TAG(DT),
1380
+ TAG(LI), TAG(OPTION), TAG(OPTGROUP), TAG(P), TAG(RP), TAG(RT) }) &&
1497
1381
  !node_html_tag_is(get_current_node(parser), exception);
1498
1382
  pop_current_node(parser));
1499
1383
  }
1500
1384
 
1501
- // This is the "generate all implied end tags thoroughly" clause of the spec.
1502
- // https://html.spec.whatwg.org/multipage/syntax.html#closing-elements-that-have-implied-end-tags
1503
- static void generate_all_implied_end_tags_thoroughly(GumboParser* parser) {
1504
- for (;
1505
- node_tag_in_set(get_current_node(parser), (gumbo_tagset) { TAG(CAPTION),
1506
- TAG(COLGROUP), TAG(DD), TAG(DT), TAG(LI), TAG(OPTION), TAG(OPTGROUP),
1507
- TAG(P), TAG(RP), TAG(RT), TAG(RTC), TAG(TBODY), TAG(TD),
1508
- TAG(TFOOT), TAG(TH), TAG(HEAD), TAG(TR) });
1509
- pop_current_node(parser));
1510
- }
1511
-
1512
1385
  // This factors out the clauses relating to "act as if an end tag token with tag
1513
1386
  // name "table" had been seen. Returns true if there's a table element in table
1514
1387
  // scope which was successfully closed, false if not and the token should be
@@ -1573,7 +1446,7 @@ static void close_current_select(GumboParser* parser) {
1573
1446
  // The list of nodes in the "special" category:
1574
1447
  // http://www.whatwg.org/specs/web-apps/current-work/complete/parsing.html#special
1575
1448
  static bool is_special_node(const GumboNode* node) {
1576
- assert(node->type == GUMBO_NODE_ELEMENT || node->type == GUMBO_NODE_TEMPLATE);
1449
+ assert(node->type == GUMBO_NODE_ELEMENT);
1577
1450
  return node_tag_in_set(node, (gumbo_tagset) { TAG(ADDRESS), TAG(APPLET), TAG(AREA),
1578
1451
  TAG(ARTICLE), TAG(ASIDE), TAG(BASE), TAG(BASEFONT), TAG(BGSOUND), TAG(BLOCKQUOTE),
1579
1452
  TAG(BODY), TAG(BR), TAG(BUTTON), TAG(CAPTION), TAG(CENTER), TAG(COL),
@@ -1585,8 +1458,8 @@ static bool is_special_node(const GumboNode* node) {
1585
1458
  TAG(LISTING), TAG(MARQUEE), TAG(MENU), TAG(META), TAG(NAV), TAG(NOEMBED),
1586
1459
  TAG(NOFRAMES), TAG(NOSCRIPT), TAG(OBJECT), TAG(OL), TAG(P), TAG(PARAM),
1587
1460
  TAG(PLAINTEXT), TAG(PRE), TAG(SCRIPT), TAG(SECTION), TAG(SELECT), TAG(STYLE),
1588
- TAG(SUMMARY), TAG(TABLE), TAG(TBODY), TAG(TD), TAG(TEMPLATE), TAG(TEXTAREA),
1589
- TAG(TFOOT), TAG(TH), TAG(THEAD), TAG(TITLE), TAG(TR), TAG(UL), TAG(WBR), TAG(XMP),
1461
+ TAG(SUMMARY), TAG(TABLE), TAG(TBODY), TAG(TD), TAG(TEXTAREA), TAG(TFOOT),
1462
+ TAG(TH), TAG(THEAD), TAG(TITLE), TAG(TR), TAG(UL), TAG(WBR), TAG(XMP),
1590
1463
 
1591
1464
  TAG_MATHML(MI), TAG_MATHML(MO), TAG_MATHML(MN), TAG_MATHML(MS),
1592
1465
  TAG_MATHML(MTEXT), TAG_MATHML(ANNOTATION_XML),
@@ -1796,20 +1669,13 @@ static void remove_from_parent(GumboParser* parser, GumboNode* node) {
1796
1669
 
1797
1670
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#an-introduction-to-error-handling-and-strange-cases-in-the-parser
1798
1671
  // Also described in the "in body" handling for end formatting tags.
1799
- static bool adoption_agency_algorithm(GumboParser* parser, GumboToken* token, GumboTag subject) {
1672
+ static bool adoption_agency_algorithm(
1673
+ GumboParser* parser, GumboToken* token, GumboTag closing_tag) {
1800
1674
  GumboParserState* state = parser->_parser_state;
1801
1675
  gumbo_debug("Entering adoption agency algorithm.\n");
1802
- // Step 1.
1803
- GumboNode* current_node = get_current_node(parser);
1804
- if (current_node->v.element.tag_namespace == GUMBO_NAMESPACE_HTML &&
1805
- current_node->v.element.tag == subject &&
1806
- gumbo_vector_index_of(&state->_active_formatting_elements, current_node) == -1) {
1807
- pop_current_node(parser);
1808
- return false;
1809
- }
1810
- // Steps 2-4 & 20:
1676
+ // Steps 1-3 & 16:
1811
1677
  for (int i = 0; i < 8; ++i) {
1812
- // Step 5.
1678
+ // Step 4.
1813
1679
  GumboNode* formatting_node = NULL;
1814
1680
  int formatting_node_in_open_elements = -1;
1815
1681
  for (int j = state->_active_formatting_elements.length; --j >= 0; ) {
@@ -1819,13 +1685,13 @@ static bool adoption_agency_algorithm(GumboParser* parser, GumboToken* token, Gu
1819
1685
  // Last scope marker; abort the algorithm.
1820
1686
  return false;
1821
1687
  }
1822
- if (node_html_tag_is(current_node, subject)) {
1688
+ if (current_node->type == GUMBO_NODE_ELEMENT && current_node->v.element.tag == closing_tag) {
1823
1689
  // Found it.
1824
1690
  formatting_node = current_node;
1825
1691
  formatting_node_in_open_elements = gumbo_vector_index_of(
1826
- &state->_open_elements, formatting_node);
1692
+ &state->_open_elements, formatting_node);
1827
1693
  gumbo_debug("Formatting element of tag %s at %d.\n",
1828
- gumbo_normalized_tagname(subject),
1694
+ gumbo_normalized_tagname(closing_tag),
1829
1695
  formatting_node_in_open_elements);
1830
1696
  break;
1831
1697
  }
@@ -1838,23 +1704,18 @@ static bool adoption_agency_algorithm(GumboParser* parser, GumboToken* token, Gu
1838
1704
  return false;
1839
1705
  }
1840
1706
 
1841
- // Step 6
1842
1707
  if (formatting_node_in_open_elements == -1) {
1843
1708
  gumbo_debug("Formatting node not on stack of open elements.\n");
1844
- parser_add_parse_error(parser, token);
1845
1709
  gumbo_vector_remove(parser, formatting_node,
1846
1710
  &state->_active_formatting_elements);
1847
1711
  return false;
1848
1712
  }
1849
1713
 
1850
- // Step 7
1851
1714
  if (!has_an_element_in_scope(parser, formatting_node->v.element.tag)) {
1852
1715
  parser_add_parse_error(parser, token);
1853
1716
  gumbo_debug("Element not in scope.\n");
1854
1717
  return false;
1855
1718
  }
1856
-
1857
- // Step 8
1858
1719
  if (formatting_node != get_current_node(parser)) {
1859
1720
  parser_add_parse_error(parser, token); // But continue onwards.
1860
1721
  }
@@ -1862,20 +1723,20 @@ static bool adoption_agency_algorithm(GumboParser* parser, GumboToken* token, Gu
1862
1723
  assert(!node_html_tag_is(formatting_node, GUMBO_TAG_HTML));
1863
1724
  assert(!node_html_tag_is(formatting_node, GUMBO_TAG_BODY));
1864
1725
 
1865
- // Step 9 & 10
1726
+ // Step 5 & 6.
1866
1727
  GumboNode* furthest_block = NULL;
1867
1728
  for (int j = formatting_node_in_open_elements;
1868
1729
  j < state->_open_elements.length; ++j) {
1869
1730
  assert(j > 0);
1870
1731
  GumboNode* current = state->_open_elements.data[j];
1871
1732
  if (is_special_node(current)) {
1872
- // Step 9.
1733
+ // Step 5.
1873
1734
  furthest_block = current;
1874
1735
  break;
1875
1736
  }
1876
1737
  }
1877
1738
  if (!furthest_block) {
1878
- // Step 10.
1739
+ // Step 6.
1879
1740
  while (get_current_node(parser) != formatting_node) {
1880
1741
  pop_current_node(parser);
1881
1742
  }
@@ -1888,35 +1749,32 @@ static bool adoption_agency_algorithm(GumboParser* parser, GumboToken* token, Gu
1888
1749
  assert(!node_html_tag_is(furthest_block, GUMBO_TAG_HTML));
1889
1750
  assert(furthest_block);
1890
1751
 
1891
- // Step 11.
1752
+ // Step 7.
1892
1753
  // Elements may be moved and reparented by this algorithm, so
1893
1754
  // common_ancestor is not necessarily the same as formatting_node->parent.
1894
1755
  GumboNode* common_ancestor =
1895
- state->_open_elements.data[gumbo_vector_index_of(
1896
- &state->_open_elements, formatting_node) - 1];
1756
+ state->_open_elements.data[gumbo_vector_index_of(
1757
+ &state->_open_elements, formatting_node) - 1];
1897
1758
  gumbo_debug("Common ancestor tag = %s, furthest block tag = %s.\n",
1898
1759
  gumbo_normalized_tagname(common_ancestor->v.element.tag),
1899
1760
  gumbo_normalized_tagname(furthest_block->v.element.tag));
1900
1761
 
1901
- // Step 12.
1762
+ // Step 8.
1902
1763
  int bookmark = gumbo_vector_index_of(
1903
- &state->_active_formatting_elements, formatting_node) + 1;
1904
- gumbo_debug("Bookmark at %d.\n", bookmark);
1905
- // Step 13.
1764
+ &state->_active_formatting_elements, formatting_node);;
1765
+ // Step 9.
1906
1766
  GumboNode* node = furthest_block;
1907
1767
  GumboNode* last_node = furthest_block;
1908
1768
  // Must be stored explicitly, in case node is removed from the stack of open
1909
1769
  // elements, to handle step 9.4.
1910
1770
  int saved_node_index = gumbo_vector_index_of(&state->_open_elements, node);
1911
1771
  assert(saved_node_index > 0);
1912
- // Step 13.1.
1913
- for (int j = 0;;) {
1914
- // Step 13.2.
1915
- ++j;
1916
- // Step 13.3.
1772
+ // Step 9.1-9.3 & 9.11.
1773
+ for (int j = 0; j < 3; ++j) {
1774
+ // Step 9.4.
1917
1775
  int node_index = gumbo_vector_index_of(&state->_open_elements, node);
1918
1776
  gumbo_debug(
1919
- "Current index: %d, last index: %d.\n", node_index, saved_node_index);
1777
+ "Current index: %d, last index: %d.\n", node_index, saved_node_index);
1920
1778
  if (node_index == -1) {
1921
1779
  node_index = saved_node_index;
1922
1780
  }
@@ -1925,78 +1783,61 @@ static bool adoption_agency_algorithm(GumboParser* parser, GumboToken* token, Gu
1925
1783
  assert(node_index < state->_open_elements.capacity);
1926
1784
  node = state->_open_elements.data[node_index];
1927
1785
  assert(node->parent);
1928
- if (node == formatting_node) {
1929
- // Step 13.4.
1930
- break;
1931
- }
1932
- int formatting_index =
1933
- gumbo_vector_index_of(&state->_active_formatting_elements, node);
1934
- if (j > 3 && formatting_index != -1) {
1935
- // Step 13.5.
1936
- gumbo_debug(
1937
- "Removing formatting element at %d.\n", formatting_index);
1938
- gumbo_vector_remove_at(
1939
- parser,
1940
- formatting_index,
1941
- &state->_active_formatting_elements);
1942
- // Removing the element shifts all indices over by one, so we may need
1943
- // to move the bookmark.
1944
- if (formatting_index < bookmark) {
1945
- --bookmark;
1946
- gumbo_debug("Moving bookmark to %d.\n", bookmark);
1947
- }
1948
- continue;
1949
- }
1950
- if (formatting_index == -1) {
1951
- // Step 13.6.
1786
+ // Step 9.5.
1787
+ if (gumbo_vector_index_of(
1788
+ &state->_active_formatting_elements, node) == -1) {
1952
1789
  gumbo_vector_remove_at(parser, node_index, &state->_open_elements);
1953
1790
  continue;
1791
+ } else if (node == formatting_node) {
1792
+ // Step 9.6.
1793
+ break;
1954
1794
  }
1955
- // Step 13.7.
1956
- // "common ancestor as the intended parent" doesn't actually mean insert
1957
- // it into the common ancestor; that happens below.
1795
+ // Step 9.7.
1796
+ int formatting_index = gumbo_vector_index_of(
1797
+ &state->_active_formatting_elements, node);
1958
1798
  node = clone_node(parser, node, GUMBO_INSERTION_ADOPTION_AGENCY_CLONED);
1959
- assert(formatting_index >= 0);
1960
1799
  state->_active_formatting_elements.data[formatting_index] = node;
1961
- assert(node_index >= 0);
1962
1800
  state->_open_elements.data[node_index] = node;
1963
- // Step 13.8.
1801
+ // Step 9.8.
1964
1802
  if (last_node == furthest_block) {
1965
1803
  bookmark = formatting_index + 1;
1966
- gumbo_debug("Bookmark moved to %d.\n", bookmark);
1967
1804
  assert(bookmark <= state->_active_formatting_elements.length);
1968
1805
  }
1969
- // Step 13.9.
1806
+ // Step 9.9.
1970
1807
  last_node->parse_flags |= GUMBO_INSERTION_ADOPTION_AGENCY_MOVED;
1971
1808
  remove_from_parent(parser, last_node);
1972
1809
  append_node(parser, node, last_node);
1973
- // Step 13.10.
1810
+ // Step 9.10.
1974
1811
  last_node = node;
1975
- } // Step 13.11.
1812
+ }
1976
1813
 
1977
- // Step 14.
1814
+ // Step 10.
1978
1815
  gumbo_debug("Removing %s node from parent ",
1979
1816
  gumbo_normalized_tagname(last_node->v.element.tag));
1980
1817
  remove_from_parent(parser, last_node);
1981
1818
  last_node->parse_flags |= GUMBO_INSERTION_ADOPTION_AGENCY_MOVED;
1982
- InsertionLocation location =
1983
- get_appropriate_insertion_location(parser, common_ancestor);
1984
- gumbo_debug("and inserting it into %s.\n",
1985
- gumbo_normalized_tagname(location.target->v.element.tag));
1986
- insert_node(parser, last_node, location);
1819
+ if (node_tag_in_set(common_ancestor, (gumbo_tagset) { TAG(TABLE), TAG(TBODY),
1820
+ TAG(TFOOT), TAG(THEAD), TAG(TR) })) {
1821
+ gumbo_debug("and foster-parenting it.\n");
1822
+ foster_parent_element(parser, last_node);
1823
+ } else {
1824
+ gumbo_debug("and inserting it into %s.\n",
1825
+ gumbo_normalized_tagname(common_ancestor->v.element.tag));
1826
+ append_node(parser, common_ancestor, last_node);
1827
+ }
1987
1828
 
1988
- // Step 15.
1829
+ // Step 11.
1989
1830
  GumboNode* new_formatting_node = clone_node(
1990
- parser, formatting_node, GUMBO_INSERTION_ADOPTION_AGENCY_CLONED);
1831
+ parser, formatting_node, GUMBO_INSERTION_ADOPTION_AGENCY_CLONED);
1991
1832
  formatting_node->parse_flags |= GUMBO_INSERTION_IMPLICIT_END_TAG;
1992
1833
 
1993
- // Step 16. Instead of appending nodes one-by-one, we swap the children
1834
+ // Step 12. Instead of appending nodes one-by-one, we swap the children
1994
1835
  // vector of furthest_block with the empty children of new_formatting_node,
1995
1836
  // reducing memory traffic and allocations. We still have to reset their
1996
1837
  // parent pointers, though.
1997
1838
  GumboVector temp = new_formatting_node->v.element.children;
1998
1839
  new_formatting_node->v.element.children =
1999
- furthest_block->v.element.children;
1840
+ furthest_block->v.element.children;
2000
1841
  furthest_block->v.element.children = temp;
2001
1842
 
2002
1843
  temp = new_formatting_node->v.element.children;
@@ -2005,39 +1846,36 @@ static bool adoption_agency_algorithm(GumboParser* parser, GumboToken* token, Gu
2005
1846
  child->parent = new_formatting_node;
2006
1847
  }
2007
1848
 
2008
- // Step 17.
1849
+ // Step 13.
2009
1850
  append_node(parser, furthest_block, new_formatting_node);
2010
1851
 
2011
- // Step 18.
1852
+ // Step 14.
2012
1853
  // If the formatting node was before the bookmark, it may shift over all
2013
1854
  // indices after it, so we need to explicitly find the index and possibly
2014
1855
  // adjust the bookmark.
2015
1856
  int formatting_node_index = gumbo_vector_index_of(
2016
- &state->_active_formatting_elements, formatting_node);
1857
+ &state->_active_formatting_elements, formatting_node);
2017
1858
  assert(formatting_node_index != -1);
2018
1859
  if (formatting_node_index < bookmark) {
2019
- gumbo_debug(
2020
- "Formatting node at %d is before bookmark at %d; decrementing.\n",
2021
- formatting_node_index, bookmark);
2022
1860
  --bookmark;
2023
1861
  }
2024
1862
  gumbo_vector_remove_at(
2025
- parser, formatting_node_index, &state->_active_formatting_elements);
1863
+ parser, formatting_node_index, &state->_active_formatting_elements);
2026
1864
  assert(bookmark >= 0);
2027
1865
  assert(bookmark <= state->_active_formatting_elements.length);
2028
1866
  gumbo_vector_insert_at(parser, new_formatting_node, bookmark,
2029
1867
  &state->_active_formatting_elements);
2030
1868
 
2031
- // Step 19.
1869
+ // Step 15.
2032
1870
  gumbo_vector_remove(
2033
- parser, formatting_node, &state->_open_elements);
1871
+ parser, formatting_node, &state->_open_elements);
2034
1872
  int insert_at = gumbo_vector_index_of(
2035
- &state->_open_elements, furthest_block) + 1;
1873
+ &state->_open_elements, furthest_block) + 1;
2036
1874
  assert(insert_at >= 0);
2037
1875
  assert(insert_at <= state->_open_elements.length);
2038
1876
  gumbo_vector_insert_at(
2039
- parser, new_formatting_node, insert_at, &state->_open_elements);
2040
- } // Step 20.
1877
+ parser, new_formatting_node, insert_at, &state->_open_elements);
1878
+ }
2041
1879
  return true;
2042
1880
  }
2043
1881
 
@@ -2216,45 +2054,29 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
2216
2054
  assert(node_html_tag_is(head, GUMBO_TAG_HEAD));
2217
2055
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_HEAD);
2218
2056
  return true;
2219
- } else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(BODY), TAG(HTML), TAG(BR) })) {
2220
- pop_current_node(parser);
2221
- set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_HEAD);
2222
- parser->_parser_state->_reprocess_current_token = true;
2223
- return true;
2224
- } else if (tag_is(token, kStartTag, GUMBO_TAG_TEMPLATE)) {
2225
- insert_element_from_token(parser, token);
2226
- add_formatting_element(parser, &kActiveFormattingScopeMarker);
2227
- parser->_parser_state->_frameset_ok = false;
2228
- set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TEMPLATE);
2229
- push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TEMPLATE);
2230
- return true;
2231
- } else if (tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
2232
- if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2233
- parser_add_parse_error(parser, token);
2234
- ignore_token(parser);
2235
- return false;
2236
- }
2237
- generate_all_implied_end_tags_thoroughly(parser);
2238
- bool success = true;
2239
- if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_TEMPLATE)) {
2240
- parser_add_parse_error(parser, token);
2241
- success = false;
2242
- }
2243
- while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_TEMPLATE));
2244
- clear_active_formatting_elements(parser);
2245
- pop_template_insertion_mode(parser);
2246
- reset_insertion_mode_appropriately(parser);
2247
- return success;
2248
- } else if (tag_is(token, kStartTag, GUMBO_TAG_HEAD) || (token->type == GUMBO_TOKEN_END_TAG)) {
2057
+ } else if (tag_is(token, kStartTag, GUMBO_TAG_HEAD)) {
2058
+ parser_add_parse_error(parser, token);
2059
+ ignore_token(parser);
2060
+ return false;
2061
+ } else if (tag_is(token, kStartTag, GUMBO_TAG_HEAD) ||
2062
+ (token->type == GUMBO_TOKEN_END_TAG &&
2063
+ !tag_in(token, kEndTag, (gumbo_tagset) { TAG(BODY), TAG(HTML),
2064
+ TAG(BR) }))) {
2065
+ parser_add_parse_error(parser, token);
2066
+ return false;
2067
+ } else if (tag_is(token, kStartTag, GUMBO_TAG_UNKNOWN) && token->v.start_tag.is_self_closing) {
2249
2068
  parser_add_parse_error(parser, token);
2250
2069
  ignore_token(parser);
2251
2070
  return false;
2252
2071
  } else {
2253
- pop_current_node(parser);
2072
+ const GumboNode* node = pop_current_node(parser);
2073
+ assert(node_html_tag_is(node, GUMBO_TAG_HEAD));
2074
+ AVOID_UNUSED_VARIABLE_WARNING(node);
2254
2075
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_HEAD);
2255
2076
  parser->_parser_state->_reprocess_current_token = true;
2256
2077
  return true;
2257
2078
  }
2079
+
2258
2080
  return true;
2259
2081
  }
2260
2082
 
@@ -2320,7 +2142,7 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
2320
2142
  } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(BASE), TAG(BASEFONT),
2321
2143
  TAG(BGSOUND), TAG(LINK), TAG(META),
2322
2144
  TAG(NOFRAMES), TAG(SCRIPT), TAG(STYLE),
2323
- TAG(TEMPLATE), TAG(TITLE) })) {
2145
+ TAG(TITLE) })) {
2324
2146
  parser_add_parse_error(parser, token);
2325
2147
  assert(state->_head_element != NULL);
2326
2148
  // This must be flushed before we push the head element on, as there may be
@@ -2330,8 +2152,6 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
2330
2152
  bool result = handle_in_head(parser, token);
2331
2153
  gumbo_vector_remove(parser, state->_head_element, &state->_open_elements);
2332
2154
  return result;
2333
- } else if (tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
2334
- return handle_in_head(parser, token);
2335
2155
  } else if (tag_is(token, kStartTag, GUMBO_TAG_HEAD) ||
2336
2156
  (token->type == GUMBO_TOKEN_END_TAG &&
2337
2157
  !tag_in(token, kEndTag, (gumbo_tagset) { TAG(BODY), TAG(HTML), TAG(BR) }))) {
@@ -2346,23 +2166,28 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
2346
2166
  }
2347
2167
  }
2348
2168
 
2349
- static GumboNode* destroy_node(GumboParser* parser, GumboNode* node) {
2169
+ static void destroy_node(GumboParser* parser, GumboNode* node) {
2350
2170
  switch (node->type) {
2351
2171
  case GUMBO_NODE_DOCUMENT:
2352
2172
  {
2353
2173
  GumboDocument* doc = &node->v.document;
2174
+ for (int i = 0; i < doc->children.length; ++i) {
2175
+ destroy_node(parser, doc->children.data[i]);
2176
+ }
2354
2177
  gumbo_parser_deallocate(parser, (void*) doc->children.data);
2355
2178
  gumbo_parser_deallocate(parser, (void*) doc->name);
2356
2179
  gumbo_parser_deallocate(parser, (void*) doc->public_identifier);
2357
2180
  gumbo_parser_deallocate(parser, (void*) doc->system_identifier);
2358
2181
  }
2359
2182
  break;
2360
- case GUMBO_NODE_TEMPLATE:
2361
2183
  case GUMBO_NODE_ELEMENT:
2362
2184
  for (int i = 0; i < node->v.element.attributes.length; ++i) {
2363
2185
  gumbo_destroy_attribute(parser, node->v.element.attributes.data[i]);
2364
2186
  }
2365
2187
  gumbo_parser_deallocate(parser, node->v.element.attributes.data);
2188
+ for (int i = 0; i < node->v.element.children.length; ++i) {
2189
+ destroy_node(parser, node->v.element.children.data[i]);
2190
+ }
2366
2191
  gumbo_parser_deallocate(parser, node->v.element.children.data);
2367
2192
  break;
2368
2193
  case GUMBO_NODE_TEXT:
@@ -2372,21 +2197,7 @@ static GumboNode* destroy_node(GumboParser* parser, GumboNode* node) {
2372
2197
  gumbo_parser_deallocate(parser, (void*) node->v.text.text);
2373
2198
  break;
2374
2199
  }
2375
- // Remove from the next/prev linked list.
2376
- GumboNode* prev = node->prev;
2377
- GumboNode* next = node->next;
2378
- if (prev != NULL) {
2379
- prev->next = next;
2380
- }
2381
- if (next != NULL) {
2382
- next->prev = prev;
2383
- }
2384
- if (parser->_parser_state && parser->_parser_state->_current_node == node) {
2385
- parser->_parser_state->_current_node = prev;
2386
- }
2387
-
2388
2200
  gumbo_parser_deallocate(parser, node);
2389
- return next;
2390
2201
  }
2391
2202
 
2392
2203
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-inbody
@@ -2415,24 +2226,20 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2415
2226
  ignore_token(parser);
2416
2227
  return false;
2417
2228
  } else if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
2418
- parser_add_parse_error(parser, token);
2419
- if (has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2420
- ignore_token(parser);
2421
- return false;
2422
- }
2423
2229
  assert(parser->_output->root != NULL);
2424
2230
  assert(parser->_output->root->type == GUMBO_NODE_ELEMENT);
2231
+ parser_add_parse_error(parser, token);
2425
2232
  merge_attributes(parser, token, parser->_output->root);
2426
2233
  return false;
2427
2234
  } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(BASE), TAG(BASEFONT),
2428
2235
  TAG(BGSOUND), TAG(MENUITEM), TAG(LINK),
2429
2236
  TAG(META), TAG(NOFRAMES), TAG(SCRIPT),
2430
- TAG(STYLE), TAG(TEMPLATE), TAG(TITLE) } ) || tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
2237
+ TAG(STYLE), TAG(TITLE) } )) {
2431
2238
  return handle_in_head(parser, token);
2432
2239
  } else if (tag_is(token, kStartTag, GUMBO_TAG_BODY)) {
2433
2240
  parser_add_parse_error(parser, token);
2434
2241
  if (state->_open_elements.length < 2 ||
2435
- !node_html_tag_is(state->_open_elements.data[1], GUMBO_TAG_BODY) || has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2242
+ !node_html_tag_is(state->_open_elements.data[1], GUMBO_TAG_BODY)) {
2436
2243
  ignore_token(parser);
2437
2244
  return false;
2438
2245
  }
@@ -2484,11 +2291,9 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2484
2291
  TAG(DT), TAG(LI), TAG(P), TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH),
2485
2292
  TAG(THEAD), TAG(TR), TAG(BODY), TAG(HTML) } )) {
2486
2293
  parser_add_parse_error(parser, token);
2294
+ return false;
2487
2295
  }
2488
2296
  }
2489
- if (get_current_template_insertion_mode(parser) != GUMBO_INSERTION_MODE_INITIAL) {
2490
- return handle_in_template(parser, token);
2491
- }
2492
2297
  return true;
2493
2298
  } else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(BODY), TAG(HTML) })) {
2494
2299
  if (!has_an_element_in_scope(parser, GUMBO_TAG_BODY)) {
@@ -2498,11 +2303,10 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2498
2303
  }
2499
2304
  bool success = true;
2500
2305
  for (int i = 0; i < state->_open_elements.length; ++i) {
2501
- if (!node_tag_in_set(state->_open_elements.data[i], (gumbo_tagset) {
2502
- TAG(DD), TAG(DT), TAG(LI), TAG(OPTGROUP), TAG(OPTION), TAG(P),
2503
- TAG(RB), TAG(RP), TAG(RT), TAG(RTC), TAG(TBODY), TAG(TD),
2504
- TAG(TFOOT), TAG(TH), TAG(THEAD), TAG(TR),
2505
- TAG(BODY), TAG(HTML) })) {
2306
+ if (!node_tag_in_set(state->_open_elements.data[i], (gumbo_tagset) { TAG(DD),
2307
+ TAG(DT), TAG(LI), TAG(OPTGROUP), TAG(OPTION), TAG(P), TAG(RP),
2308
+ TAG(RT), TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD),
2309
+ TAG(TR), TAG(BODY), TAG(HTML) })) {
2506
2310
  parser_add_parse_error(parser, token);
2507
2311
  success = false;
2508
2312
  break;
@@ -2520,7 +2324,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2520
2324
  } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(ADDRESS), TAG(ARTICLE),
2521
2325
  TAG(ASIDE), TAG(BLOCKQUOTE), TAG(CENTER), TAG(DETAILS),
2522
2326
  TAG(DIR), TAG(DIV), TAG(DL), TAG(FIELDSET), TAG(FIGCAPTION),
2523
- TAG(FIGURE), TAG(FOOTER), TAG(HEADER), TAG(HGROUP), TAG(MENU), TAG(MAIN),
2327
+ TAG(FIGURE), TAG(FOOTER), TAG(HEADER), TAG(HGROUP), TAG(MENU),
2524
2328
  TAG(NAV), TAG(OL), TAG(P), TAG(SECTION), TAG(SUMMARY), TAG(UL) })) {
2525
2329
  bool result = maybe_implicitly_close_p_tag(parser, token);
2526
2330
  insert_element_from_token(parser, token);
@@ -2543,17 +2347,15 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2543
2347
  state->_frameset_ok = false;
2544
2348
  return result;
2545
2349
  } else if (tag_is(token, kStartTag, GUMBO_TAG_FORM)) {
2546
- if (state->_form_element != NULL && !has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2350
+ if (state->_form_element != NULL) {
2547
2351
  gumbo_debug("Ignoring nested form.\n");
2548
2352
  parser_add_parse_error(parser, token);
2549
2353
  ignore_token(parser);
2550
2354
  return false;
2551
2355
  }
2552
2356
  bool result = maybe_implicitly_close_p_tag(parser, token);
2553
- GumboNode* form_element = insert_element_from_token(parser, token);
2554
- if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2555
- state->_form_element = form_element;
2556
- }
2357
+ state->_form_element =
2358
+ insert_element_from_token(parser, token);
2557
2359
  return result;
2558
2360
  } else if (tag_is(token, kStartTag, GUMBO_TAG_LI)) {
2559
2361
  maybe_implicitly_close_list_tag(parser, token, true);
@@ -2585,7 +2387,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2585
2387
  TAG(ASIDE), TAG(BLOCKQUOTE), TAG(BUTTON), TAG(CENTER), TAG(DETAILS),
2586
2388
  TAG(DIR), TAG(DIV), TAG(DL), TAG(FIELDSET), TAG(FIGCAPTION),
2587
2389
  TAG(FIGURE), TAG(FOOTER), TAG(HEADER), TAG(HGROUP), TAG(LISTING),
2588
- TAG(MAIN), TAG(MENU), TAG(NAV), TAG(OL), TAG(PRE),
2390
+ TAG(MENU), TAG(NAV), TAG(OL), TAG(PRE),
2589
2391
  TAG(SECTION), TAG(SUMMARY), TAG(UL) })) {
2590
2392
  GumboTag tag = token->v.end_tag;
2591
2393
  if (!has_an_element_in_scope(parser, tag)) {
@@ -2596,45 +2398,30 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2596
2398
  implicitly_close_tags(parser, token, GUMBO_NAMESPACE_HTML, token->v.end_tag);
2597
2399
  return true;
2598
2400
  } else if (tag_is(token, kEndTag, GUMBO_TAG_FORM)) {
2599
- if (has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2600
- if (!has_an_element_in_scope(parser, GUMBO_TAG_FORM)) {
2601
- parser_add_parse_error(parser, token);
2602
- ignore_token(parser);
2603
- return false;
2604
- }
2605
- bool success = true;
2606
- generate_implied_end_tags(parser, GUMBO_TAG_LAST);
2607
- if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_FORM)) {
2608
- parser_add_parse_error(parser, token);
2609
- return false;
2610
- }
2611
- while(!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_FORM));
2612
- return success;
2613
- } else {
2614
- bool result = true;
2615
- const GumboNode* node = state->_form_element;
2616
- assert(!node || node->type == GUMBO_NODE_ELEMENT);
2617
- state->_form_element = NULL;
2618
- if (!node || !has_node_in_scope(parser, node)) {
2619
- gumbo_debug("Closing an unopened form.\n");
2620
- parser_add_parse_error(parser, token);
2621
- ignore_token(parser);
2622
- return false;
2623
- }
2624
- // This differs from implicitly_close_tags because we remove *only* the
2625
- // <form> element; other nodes are left in scope.
2626
- generate_implied_end_tags(parser, GUMBO_TAG_LAST);
2627
- if (get_current_node(parser) != node) {
2628
- parser_add_parse_error(parser, token);
2629
- result = false;
2630
- }
2631
-
2632
- GumboVector* open_elements = &state->_open_elements;
2633
- int index = gumbo_vector_index_of(open_elements, node);
2634
- assert(index >= 0);
2635
- gumbo_vector_remove_at(parser, index, open_elements);
2636
- return result;
2401
+ bool result = true;
2402
+ const GumboNode* node = state->_form_element;
2403
+ assert(!node || node->type == GUMBO_NODE_ELEMENT);
2404
+ state->_form_element = NULL;
2405
+ if (!node || !has_node_in_scope(parser, node)) {
2406
+ gumbo_debug("Closing an unopened form.\n");
2407
+ parser_add_parse_error(parser, token);
2408
+ ignore_token(parser);
2409
+ return false;
2637
2410
  }
2411
+ // This differs from implicitly_close_tags because we remove *only* the
2412
+ // <form> element; other nodes are left in scope.
2413
+ generate_implied_end_tags(parser, GUMBO_TAG_LAST);
2414
+ if (get_current_node(parser) != node) {
2415
+ parser_add_parse_error(parser, token);
2416
+ result = false;
2417
+ }
2418
+
2419
+ GumboVector* open_elements = &state->_open_elements;
2420
+ int index = open_elements->length - 1;
2421
+ for (; index >= 0 && open_elements->data[index] != node; --index);
2422
+ assert(index >= 0);
2423
+ gumbo_vector_remove_at(parser, index, open_elements);
2424
+ return result;
2638
2425
  } else if (tag_is(token, kEndTag, GUMBO_TAG_P)) {
2639
2426
  if (!has_an_element_in_button_scope(parser, GUMBO_TAG_P)) {
2640
2427
  parser_add_parse_error(parser, token);
@@ -2661,11 +2448,10 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2661
2448
  return false;
2662
2449
  }
2663
2450
  return implicitly_close_tags(parser, token, GUMBO_NAMESPACE_HTML, token_tag);
2664
- } else if (tag_in(token, kEndTag, (gumbo_tagset) {
2665
- TAG(H1), TAG(H2), TAG(H3), TAG(H4), TAG(H5), TAG(H6) })) {
2666
- if (!has_an_element_in_scope_with_tagname(parser, 6, (GumboTag[]) {
2667
- GUMBO_TAG_H1, GUMBO_TAG_H2, GUMBO_TAG_H3,
2668
- GUMBO_TAG_H4, GUMBO_TAG_H5, GUMBO_TAG_H6})) {
2451
+ } else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(H1), TAG(H2), TAG(H3),
2452
+ TAG(H4), TAG(H5), TAG(H6) })) {
2453
+ if (!has_an_element_in_scope_with_tagname(parser, (gumbo_tagset) { TAG(H1), TAG(H2), TAG(H3), TAG(H4),
2454
+ TAG(H5), TAG(H6) })) {
2669
2455
  // No heading open; ignore the token entirely.
2670
2456
  parser_add_parse_error(parser, token);
2671
2457
  ignore_token(parser);
@@ -2806,8 +2592,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2806
2592
  return result;
2807
2593
  } else if (tag_is(token, kStartTag, GUMBO_TAG_ISINDEX)) {
2808
2594
  parser_add_parse_error(parser, token);
2809
- if (parser->_parser_state->_form_element != NULL &&
2810
- !has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2595
+ if (parser->_parser_state->_form_element != NULL) {
2811
2596
  ignore_token(parser);
2812
2597
  return false;
2813
2598
  }
@@ -2822,9 +2607,6 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2822
2607
 
2823
2608
  GumboNode* form = insert_element_of_tag_type(
2824
2609
  parser, GUMBO_TAG_FORM, GUMBO_INSERTION_FROM_ISINDEX);
2825
- if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2826
- parser->_parser_state->_form_element = form;
2827
- }
2828
2610
  if (action_attr) {
2829
2611
  gumbo_vector_add(parser, action_attr, &form->v.element.attributes);
2830
2612
  }
@@ -2888,9 +2670,6 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2888
2670
  parser, GUMBO_TAG_HR, GUMBO_INSERTION_FROM_ISINDEX);
2889
2671
  pop_current_node(parser); // <hr>
2890
2672
  pop_current_node(parser); // <form>
2891
- if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2892
- parser->_parser_state->_form_element = NULL;
2893
- }
2894
2673
  return false;
2895
2674
  } else if (tag_is(token, kStartTag, GUMBO_TAG_TEXTAREA)) {
2896
2675
  run_generic_parsing_algorithm(parser, token, GUMBO_LEX_RCDATA);
@@ -2932,17 +2711,12 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2932
2711
  reconstruct_active_formatting_elements(parser);
2933
2712
  insert_element_from_token(parser, token);
2934
2713
  return true;
2935
- } else if (tag_in(token, kStartTag, (gumbo_tagset) {
2936
- TAG(RB), TAG(RP), TAG(RT), TAG(RTC) })) {
2714
+ } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(RP), TAG(RT) })) {
2937
2715
  bool success = true;
2938
- GumboTag exception = tag_in(token, kStartTag, (gumbo_tagset) {
2939
- TAG(RT), TAG(RP) }) ? GUMBO_TAG_RTC : GUMBO_TAG_LAST;
2940
2716
  if (has_an_element_in_scope(parser, GUMBO_TAG_RUBY)) {
2941
- generate_implied_end_tags(parser, exception);
2717
+ generate_implied_end_tags(parser, GUMBO_TAG_LAST);
2942
2718
  }
2943
- if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_RUBY) &&
2944
- !(exception == GUMBO_TAG_LAST ||
2945
- node_html_tag_is(get_current_node(parser), GUMBO_TAG_RTC))) {
2719
+ if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_RUBY)) {
2946
2720
  parser_add_parse_error(parser, token);
2947
2721
  success = false;
2948
2722
  }
@@ -3113,8 +2887,7 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
3113
2887
  parser_add_parse_error(parser, token);
3114
2888
  ignore_token(parser);
3115
2889
  return false;
3116
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(STYLE), TAG(SCRIPT), TAG(TEMPLATE) }) ||
3117
- (tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE))) {
2890
+ } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(STYLE), TAG(SCRIPT) })) {
3118
2891
  return handle_in_head(parser, token);
3119
2892
  } else if (tag_is(token, kStartTag, GUMBO_TAG_INPUT) &&
3120
2893
  attribute_matches(&token->v.start_tag.attributes,
@@ -3125,7 +2898,7 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
3125
2898
  return false;
3126
2899
  } else if (tag_is(token, kStartTag, GUMBO_TAG_FORM)) {
3127
2900
  parser_add_parse_error(parser, token);
3128
- if (state->_form_element || has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2901
+ if (state->_form_element) {
3129
2902
  ignore_token(parser);
3130
2903
  return false;
3131
2904
  }
@@ -3133,7 +2906,11 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
3133
2906
  pop_current_node(parser);
3134
2907
  return false;
3135
2908
  } else if (token->type == GUMBO_TOKEN_EOF) {
3136
- return handle_in_body(parser, token);
2909
+ if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_HTML)) {
2910
+ parser_add_parse_error(parser, token);
2911
+ return false;
2912
+ }
2913
+ return true;
3137
2914
  } else {
3138
2915
  parser_add_parse_error(parser, token);
3139
2916
  state->_foster_parent_insertions = true;
@@ -3178,37 +2955,35 @@ static bool handle_in_table_text(GumboParser* parser, GumboToken* token) {
3178
2955
 
3179
2956
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-incaption
3180
2957
  static bool handle_in_caption(GumboParser* parser, GumboToken* token) {
3181
- if (tag_is(token, kEndTag, GUMBO_TAG_CAPTION)) {
2958
+ if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(CAPTION), TAG(COL),
2959
+ TAG(COLGROUP), TAG(TBODY), TAG(TD),
2960
+ TAG(TFOOT), TAG(TH), TAG(THEAD), TAG(TR) }) ||
2961
+ tag_in(token, kEndTag, (gumbo_tagset) { TAG(CAPTION), TAG(TABLE) })) {
3182
2962
  if (!has_an_element_in_table_scope(parser, GUMBO_TAG_CAPTION)) {
3183
2963
  parser_add_parse_error(parser, token);
3184
2964
  ignore_token(parser);
3185
2965
  return false;
3186
- } else {
3187
- generate_implied_end_tags(parser, GUMBO_TAG_LAST);
3188
- bool result = true;
3189
- if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_CAPTION)) {
3190
- parser_add_parse_error(parser, token);
3191
- }
3192
- while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_CAPTION));
3193
- clear_active_formatting_elements(parser);
3194
- set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
3195
- return result;
3196
2966
  }
3197
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(CAPTION), TAG(COL),
3198
- TAG(COLGROUP), TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD), TAG(TR) }) ||
3199
- (tag_is(token, kEndTag, GUMBO_TAG_TABLE))) {
3200
- if (!has_an_element_in_table_scope(parser, GUMBO_TAG_CAPTION)) {
2967
+ if (!tag_is(token, kEndTag, GUMBO_TAG_CAPTION)) {
3201
2968
  parser_add_parse_error(parser, token);
3202
- ignore_token(parser);
3203
- return false;
2969
+ parser->_parser_state->_reprocess_current_token = true;
2970
+ }
2971
+ generate_implied_end_tags(parser, GUMBO_TAG_LAST);
2972
+ bool result = true;
2973
+ if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_CAPTION)) {
2974
+ parser_add_parse_error(parser, token);
2975
+ while (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_CAPTION)) {
2976
+ pop_current_node(parser);
2977
+ }
2978
+ result = false;
3204
2979
  }
3205
- while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_CAPTION));
2980
+ pop_current_node(parser); // The <caption> itself.
3206
2981
  clear_active_formatting_elements(parser);
3207
2982
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
3208
- parser->_parser_state->_reprocess_current_token = true;
3209
- return true;
3210
- } else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(BODY), TAG(COL), TAG(COLGROUP),
3211
- TAG(HTML), TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD), TAG(TR) } )) {
2983
+ return result;
2984
+ } else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(BODY), TAG(COL),
2985
+ TAG(COLGROUP), TAG(HTML), TAG(TBODY), TAG(TD), TAG(TFOOT),
2986
+ TAG(TH), TAG(THEAD), TAG(TR) })) {
3212
2987
  parser_add_parse_error(parser, token);
3213
2988
  ignore_token(parser);
3214
2989
  return false;
@@ -3236,33 +3011,24 @@ static bool handle_in_column_group(GumboParser* parser, GumboToken* token) {
3236
3011
  pop_current_node(parser);
3237
3012
  acknowledge_self_closing_tag(parser);
3238
3013
  return true;
3239
- } else if (tag_is(token, kEndTag, GUMBO_TAG_COLGROUP)) {
3240
- if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_COLGROUP)) {
3241
- parser_add_parse_error(parser, token);
3242
- ignore_token(parser);
3243
- return false;
3244
- }
3245
- pop_current_node(parser);
3246
- set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
3247
- return false;
3248
3014
  } else if (tag_is(token, kEndTag, GUMBO_TAG_COL)) {
3249
3015
  parser_add_parse_error(parser, token);
3250
3016
  ignore_token(parser);
3251
3017
  return false;
3252
- } else if (tag_is(token, kStartTag, GUMBO_TAG_TEMPLATE) ||
3253
- tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
3254
- return handle_in_head(parser, token);
3255
- } else if (token->type == GUMBO_TOKEN_EOF) {
3256
- return handle_in_body(parser, token);
3018
+ } else if (token->type == GUMBO_TOKEN_EOF &&
3019
+ get_current_node(parser) == parser->_output->root) {
3020
+ return true;
3257
3021
  } else {
3258
- if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_COLGROUP)) {
3022
+ if (get_current_node(parser) == parser->_output->root) {
3259
3023
  parser_add_parse_error(parser, token);
3260
- ignore_token(parser);
3261
3024
  return false;
3262
3025
  }
3026
+ assert(node_html_tag_is(get_current_node(parser), GUMBO_TAG_COLGROUP));
3263
3027
  pop_current_node(parser);
3264
3028
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
3265
- parser->_parser_state->_reprocess_current_token = true;
3029
+ if (!tag_is(token, kEndTag, GUMBO_TAG_COLGROUP)) {
3030
+ parser->_parser_state->_reprocess_current_token = true;
3031
+ }
3266
3032
  return true;
3267
3033
  }
3268
3034
  }
@@ -3325,48 +3091,42 @@ static bool handle_in_row(GumboParser* parser, GumboToken* token) {
3325
3091
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_CELL);
3326
3092
  add_formatting_element(parser, &kActiveFormattingScopeMarker);
3327
3093
  return true;
3328
- } else if (tag_is(token, kEndTag, GUMBO_TAG_TR)) {
3329
- if (!has_an_element_in_table_scope(parser,GUMBO_TAG_TR)) {
3330
- parser_add_parse_error(parser, token);
3331
- ignore_token(parser);
3332
- return false;
3333
- } else {
3334
- clear_stack_to_table_row_context(parser);
3335
- pop_current_node(parser);
3336
- set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
3337
- return true;
3338
- }
3339
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(CAPTION), TAG(COL), TAG(COLGROUP),
3340
- TAG(TBODY), TAG(TFOOT), TAG(THEAD), TAG(TR) }) || tag_is(token, kEndTag, GUMBO_TAG_TABLE)) {
3341
- if (!has_an_element_in_table_scope(parser,GUMBO_TAG_TR)) {
3094
+ } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(CAPTION), TAG(COLGROUP),
3095
+ TAG(TBODY), TAG(TFOOT), TAG(THEAD), TAG(TR) }) ||
3096
+ tag_in(token, kEndTag, (gumbo_tagset) { TAG(TR), TAG(TABLE),
3097
+ TAG(TBODY), TAG(TFOOT), TAG(THEAD) })) {
3098
+ // This case covers 4 clauses of the spec, each of which say "Otherwise, act
3099
+ // as if an end tag with the tag name "tr" had been seen." The differences
3100
+ // are in error handling and whether the current token is reprocessed.
3101
+ GumboTag desired_tag =
3102
+ tag_in(token, kEndTag, (gumbo_tagset) { TAG(TBODY), TAG(TFOOT),
3103
+ TAG(THEAD) })
3104
+ ? token->v.end_tag : GUMBO_TAG_TR;
3105
+ if (!has_an_element_in_table_scope(parser, desired_tag)) {
3106
+ gumbo_debug("Bailing because there is no tag %s in table scope.\nOpen elements:",
3107
+ gumbo_normalized_tagname(desired_tag));
3108
+ for (int i = 0; i < parser->_parser_state->_open_elements.length; ++i) {
3109
+ const GumboNode* node = parser->_parser_state->_open_elements.data[i];
3110
+ gumbo_debug("%s\n", gumbo_normalized_tagname(node->v.element.tag));
3111
+ }
3342
3112
  parser_add_parse_error(parser, token);
3343
3113
  ignore_token(parser);
3344
3114
  return false;
3345
- } else {
3346
- clear_stack_to_table_row_context(parser);
3347
- pop_current_node(parser);
3348
- set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
3349
- parser->_parser_state->_reprocess_current_token = true;
3350
- return true;
3351
3115
  }
3352
- } else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(TBODY), TAG(TFOOT), TAG(THEAD) })) {
3353
- if (!has_an_element_in_table_scope(parser, token->v.end_tag) ||
3354
- (!has_an_element_in_table_scope(parser, GUMBO_TAG_TR))) {
3355
- parser_add_parse_error(parser, token);
3356
- ignore_token(parser);
3357
- return false;
3358
- } else {
3359
- clear_stack_to_table_row_context(parser);
3360
- pop_current_node(parser);
3361
- set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
3116
+ clear_stack_to_table_row_context(parser);
3117
+ GumboNode* last_element = pop_current_node(parser);
3118
+ assert(node_html_tag_is(last_element, GUMBO_TAG_TR));
3119
+ AVOID_UNUSED_VARIABLE_WARNING(last_element);
3120
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
3121
+ if (!tag_is(token, kEndTag, GUMBO_TAG_TR)) {
3362
3122
  parser->_parser_state->_reprocess_current_token = true;
3363
- return true;
3364
3123
  }
3365
- } else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(BODY), TAG(CAPTION), TAG(COL),
3366
- TAG(COLGROUP), TAG(HTML), TAG(TD), TAG(TH) })) {
3367
- parser_add_parse_error(parser, token);
3368
- ignore_token(parser);
3369
- return false;
3124
+ return true;
3125
+ } else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(BODY), TAG(CAPTION),
3126
+ TAG(COL), TAG(COLGROUP), TAG(HTML), TAG(TD), TAG(TH) })) {
3127
+ parser_add_parse_error(parser, token);
3128
+ ignore_token(parser);
3129
+ return false;
3370
3130
  } else {
3371
3131
  return handle_in_table(parser, token);
3372
3132
  }
@@ -3378,7 +3138,6 @@ static bool handle_in_cell(GumboParser* parser, GumboToken* token) {
3378
3138
  GumboTag token_tag = token->v.end_tag;
3379
3139
  if (!has_an_element_in_table_scope(parser, token_tag)) {
3380
3140
  parser_add_parse_error(parser, token);
3381
- ignore_token(parser);
3382
3141
  return false;
3383
3142
  }
3384
3143
  return close_table_cell(parser, token, token_tag);
@@ -3494,11 +3253,14 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
3494
3253
  parser->_parser_state->_reprocess_current_token = true;
3495
3254
  }
3496
3255
  return false;
3497
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(SCRIPT) , TAG(TEMPLATE) }) ||
3498
- tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
3256
+ } else if (tag_is(token, kStartTag, GUMBO_TAG_SCRIPT)) {
3499
3257
  return handle_in_head(parser, token);
3500
3258
  } else if (token->type == GUMBO_TOKEN_EOF) {
3501
- return handle_in_body(parser, token);
3259
+ if (get_current_node(parser) != parser->_output->root) {
3260
+ parser_add_parse_error(parser, token);
3261
+ return false;
3262
+ }
3263
+ return true;
3502
3264
  } else {
3503
3265
  parser_add_parse_error(parser, token);
3504
3266
  ignore_token(parser);
@@ -3517,16 +3279,14 @@ static bool handle_in_select_in_table(GumboParser* parser, GumboToken* token) {
3517
3279
  } else if (tag_in(token, kEndTag, (gumbo_tagset) { TAG(CAPTION), TAG(TABLE),
3518
3280
  TAG(TBODY), TAG(TFOOT), TAG(THEAD), TAG(TR), TAG(TD), TAG(TH) })) {
3519
3281
  parser_add_parse_error(parser, token);
3520
- if (!has_an_element_in_table_scope(parser, token->v.end_tag)) {
3521
- ignore_token(parser);
3522
- return false;
3523
- } else {
3282
+ if (has_an_element_in_table_scope(parser, token->v.end_tag)) {
3524
3283
  close_current_select(parser);
3525
- // close_current_select already does the reset_insertion_mode_appropriately
3526
- // reset_insertion_mode_appropriately(parser);
3284
+ reset_insertion_mode_appropriately(parser);
3527
3285
  parser->_parser_state->_reprocess_current_token = true;
3528
- return false;
3286
+ } else {
3287
+ ignore_token(parser);
3529
3288
  }
3289
+ return false;
3530
3290
  } else {
3531
3291
  return handle_in_select(parser, token);
3532
3292
  }
@@ -3534,68 +3294,8 @@ static bool handle_in_select_in_table(GumboParser* parser, GumboToken* token) {
3534
3294
 
3535
3295
  // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#parsing-main-intemplate
3536
3296
  static bool handle_in_template(GumboParser* parser, GumboToken* token) {
3537
- GumboParserState* state = parser->_parser_state;
3538
- if (token->type == GUMBO_TOKEN_WHITESPACE ||
3539
- token->type == GUMBO_TOKEN_CHARACTER ||
3540
- token->type == GUMBO_TOKEN_COMMENT ||
3541
- token->type == GUMBO_TOKEN_DOCTYPE) {
3542
- return handle_in_body(parser, token);
3543
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(BASE), TAG(BASEFONT), TAG(BGSOUND),
3544
- TAG(LINK), TAG(META), TAG(NOFRAMES), TAG(SCRIPT), TAG(STYLE),
3545
- TAG(TEMPLATE), TAG(TITLE) }) ||
3546
- tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
3547
- return handle_in_head(parser, token);
3548
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(CAPTION), TAG(COLGROUP),
3549
- TAG(TBODY), TAG(TFOOT), TAG(THEAD) })) {
3550
- pop_template_insertion_mode(parser);
3551
- push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
3552
- set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
3553
- state->_reprocess_current_token = true;
3554
- return true;
3555
- } else if (tag_is(token, kStartTag, GUMBO_TAG_COL)) {
3556
- pop_template_insertion_mode(parser);
3557
- push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_COLUMN_GROUP);
3558
- set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_COLUMN_GROUP);
3559
- state->_reprocess_current_token = true;
3560
- return true;
3561
- } else if (tag_is(token, kStartTag, GUMBO_TAG_TR)) {
3562
- pop_template_insertion_mode(parser);
3563
- push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
3564
- set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
3565
- state->_reprocess_current_token = true;
3566
- return true;
3567
- } else if (tag_in(token, kStartTag, (gumbo_tagset) { TAG(TD), TAG(TH) })) {
3568
- pop_template_insertion_mode(parser);
3569
- push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
3570
- set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
3571
- state->_reprocess_current_token = true;
3572
- return true;
3573
- } else if (token->type == GUMBO_TOKEN_START_TAG) {
3574
- pop_template_insertion_mode(parser);
3575
- push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_BODY);
3576
- set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_BODY);
3577
- state->_reprocess_current_token = true;
3578
- return true;
3579
- } else if (token->type == GUMBO_TOKEN_END_TAG) {
3580
- parser_add_parse_error(parser, token);
3581
- ignore_token(parser);
3582
- return false;
3583
- } else if (token->type == GUMBO_TOKEN_EOF) {
3584
- if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
3585
- // Stop parsing.
3586
- return true;
3587
- }
3588
- parser_add_parse_error(parser, token);
3589
- while(!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_TEMPLATE));
3590
- clear_active_formatting_elements(parser);
3591
- pop_template_insertion_mode(parser);
3592
- reset_insertion_mode_appropriately(parser);
3593
- state->_reprocess_current_token = true;
3594
- return false;
3595
- } else {
3596
- assert(0);
3597
- return false;
3598
- }
3297
+ // TODO(jdtang): Implement this.
3298
+ return true;
3599
3299
  }
3600
3300
 
3601
3301
  // http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-afterbody
@@ -3613,12 +3313,7 @@ static bool handle_after_body(GumboParser* parser, GumboToken* token) {
3613
3313
  ignore_token(parser);
3614
3314
  return false;
3615
3315
  } else if (tag_is(token, kEndTag, GUMBO_TAG_HTML)) {
3616
- /* fragment case: ignore the closing HTML token */
3617
- if (is_fragment_parser(parser)) {
3618
- parser_add_parse_error(parser, token);
3619
- ignore_token(parser);
3620
- return false;
3621
- }
3316
+ // TODO(jdtang): Handle fragment parsing algorithm case.
3622
3317
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_AFTER_BODY);
3623
3318
  GumboNode* html = parser->_parser_state->_open_elements.data[0];
3624
3319
  assert(node_html_tag_is(html, GUMBO_TAG_HTML));
@@ -3659,8 +3354,9 @@ static bool handle_in_frameset(GumboParser* parser, GumboToken* token) {
3659
3354
  return false;
3660
3355
  }
3661
3356
  pop_current_node(parser);
3662
- if (!is_fragment_parser(parser) &&
3663
- !node_html_tag_is(get_current_node(parser), GUMBO_TAG_FRAMESET)) {
3357
+ // TODO(jdtang): Add a condition to ignore this for the fragment parsing
3358
+ // algorithm.
3359
+ if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_FRAMESET)) {
3664
3360
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_FRAMESET);
3665
3361
  }
3666
3362
  return true;
@@ -3834,32 +3530,18 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
3834
3530
  token_has_attribute(token, "color") ||
3835
3531
  token_has_attribute(token, "face") ||
3836
3532
  token_has_attribute(token, "size")))) {
3837
-
3838
- /* Parse error */
3839
3533
  parser_add_parse_error(parser, token);
3840
-
3841
- /*
3842
- * Fragment case: If the parser was originally created for the HTML
3843
- * fragment parsing algorithm, then act as described in the "any other
3844
- * start tag" entry below.
3845
- */
3846
- if (!is_fragment_parser(parser)) {
3847
- do {
3848
- pop_current_node(parser);
3849
- } while(!(is_mathml_integration_point(get_current_node(parser)) ||
3850
- is_html_integration_point(get_current_node(parser)) ||
3851
- get_current_node(parser)->v.element.tag_namespace ==
3852
- GUMBO_NAMESPACE_HTML));
3853
- parser->_parser_state->_reprocess_current_token = true;
3854
- return false;
3855
- }
3856
-
3857
- assert(token->type == GUMBO_TOKEN_START_TAG);
3858
- }
3859
-
3860
- if (token->type == GUMBO_TOKEN_START_TAG) {
3534
+ do {
3535
+ pop_current_node(parser);
3536
+ } while(!(is_mathml_integration_point(get_current_node(parser)) ||
3537
+ is_html_integration_point(get_current_node(parser)) ||
3538
+ get_current_node(parser)->v.element.tag_namespace ==
3539
+ GUMBO_NAMESPACE_HTML));
3540
+ parser->_parser_state->_reprocess_current_token = true;
3541
+ return false;
3542
+ } else if (token->type == GUMBO_TOKEN_START_TAG) {
3861
3543
  const GumboNamespaceEnum current_namespace =
3862
- get_adjusted_current_node(parser)->v.element.tag_namespace;
3544
+ get_current_node(parser)->v.element.tag_namespace;
3863
3545
  if (current_namespace == GUMBO_NAMESPACE_MATHML) {
3864
3546
  adjust_mathml_attributes(parser, token);
3865
3547
  }
@@ -3948,10 +3630,8 @@ static bool handle_token(GumboParser* parser, GumboToken* token) {
3948
3630
  parser->_parser_state->_closed_html_tag = true;
3949
3631
  }
3950
3632
 
3951
- const GumboNode* current_node = get_adjusted_current_node(parser);
3952
- assert(!current_node ||
3953
- current_node->type == GUMBO_NODE_ELEMENT ||
3954
- current_node->type == GUMBO_NODE_TEMPLATE);
3633
+ const GumboNode* current_node = get_current_node(parser);
3634
+ assert(!current_node || current_node->type == GUMBO_NODE_ELEMENT);
3955
3635
  if (current_node) {
3956
3636
  gumbo_debug("Current node: <%s>.\n",
3957
3637
  gumbo_normalized_tagname(current_node->v.element.tag));
@@ -3979,66 +3659,6 @@ static bool handle_token(GumboParser* parser, GumboToken* token) {
3979
3659
  }
3980
3660
  }
3981
3661
 
3982
- static void fragment_parser_init(
3983
- GumboParser *parser, GumboTag fragment_ctx,
3984
- GumboNamespaceEnum fragment_namespace) {
3985
- GumboNode *root;
3986
- assert(fragment_ctx != GUMBO_TAG_LAST);
3987
-
3988
- // 3
3989
- parser->_parser_state->_fragment_ctx = create_element(parser, fragment_ctx);
3990
- parser->_parser_state->_fragment_ctx->v.element.tag_namespace =
3991
- fragment_namespace;
3992
-
3993
- // 4
3994
- if (fragment_namespace == GUMBO_NAMESPACE_HTML) {
3995
- // Non-HTML namespaces always start in the DATA state.
3996
- switch (fragment_ctx) {
3997
- case GUMBO_TAG_TITLE:
3998
- case GUMBO_TAG_TEXTAREA:
3999
- gumbo_tokenizer_set_state(parser, GUMBO_LEX_RCDATA);
4000
- break;
4001
-
4002
- case GUMBO_TAG_STYLE:
4003
- case GUMBO_TAG_XMP:
4004
- case GUMBO_TAG_IFRAME:
4005
- case GUMBO_TAG_NOEMBED:
4006
- case GUMBO_TAG_NOFRAMES:
4007
- gumbo_tokenizer_set_state(parser, GUMBO_LEX_RAWTEXT);
4008
- break;
4009
-
4010
- case GUMBO_TAG_SCRIPT:
4011
- gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT);
4012
- break;
4013
-
4014
- case GUMBO_TAG_NOSCRIPT:
4015
- /* scripting is disabled in Gumbo, so leave the tokenizer
4016
- * in the default data state */
4017
- break;
4018
-
4019
- case GUMBO_TAG_PLAINTEXT:
4020
- gumbo_tokenizer_set_state(parser, GUMBO_LEX_PLAINTEXT);
4021
- break;
4022
-
4023
- default:
4024
- /* default data state */
4025
- break;
4026
- }
4027
- }
4028
-
4029
- // 5. 6. 7.
4030
- root = insert_element_of_tag_type(parser, GUMBO_TAG_HTML, GUMBO_INSERTION_IMPLIED);
4031
- parser->_output->root = root;
4032
-
4033
- // 8.
4034
- if (fragment_ctx == GUMBO_TAG_TEMPLATE) {
4035
- push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TEMPLATE);
4036
- }
4037
-
4038
- // 10.
4039
- reset_insertion_mode_appropriately(parser);
4040
- }
4041
-
4042
3662
  GumboOutput* gumbo_parse(const char* buffer) {
4043
3663
  return gumbo_parse_with_options(
4044
3664
  &kGumboDefaultOptions, buffer, strlen(buffer));
@@ -4046,27 +3666,11 @@ GumboOutput* gumbo_parse(const char* buffer) {
4046
3666
 
4047
3667
  GumboOutput* gumbo_parse_with_options(
4048
3668
  const GumboOptions* options, const char* buffer, size_t length) {
4049
- return gumbo_parse_fragment(
4050
- options, buffer, length, GUMBO_TAG_LAST, GUMBO_NAMESPACE_HTML);
4051
- }
4052
-
4053
- GumboOutput* gumbo_parse_fragment(
4054
- const GumboOptions* options, const char* buffer, size_t length,
4055
- const GumboTag fragment_ctx, const GumboNamespaceEnum fragment_namespace) {
4056
3669
  GumboParser parser;
4057
3670
  parser._options = options;
4058
- parser_state_init(&parser);
4059
- // Must come after parser_state_init, since creating the document node must
4060
- // reference parser_state->_current_node.
4061
3671
  output_init(&parser);
4062
- // And this must come after output_init, because initializing the tokenizer
4063
- // reads the first character and that may cause a UTF-8 decode error
4064
- // (inserting into output->errors) if that's invalid.
4065
3672
  gumbo_tokenizer_state_init(&parser, buffer, length);
4066
-
4067
- if (fragment_ctx != GUMBO_TAG_LAST) {
4068
- fragment_parser_init(&parser, fragment_ctx, fragment_namespace);
4069
- }
3673
+ parser_state_init(&parser);
4070
3674
 
4071
3675
  GumboParserState* state = parser._parser_state;
4072
3676
  gumbo_debug("Parsing %.*s.\n", length, buffer);
@@ -4154,16 +3758,20 @@ GumboOutput* gumbo_parse_fragment(
4154
3758
  return parser._output;
4155
3759
  }
4156
3760
 
3761
+ void gumbo_destroy_node(GumboOptions* options, GumboNode* node) {
3762
+ // Need a dummy GumboParser because the allocator comes along with the
3763
+ // options object.
3764
+ GumboParser parser;
3765
+ parser._options = options;
3766
+ destroy_node(&parser, node);
3767
+ }
3768
+
4157
3769
  void gumbo_destroy_output(const GumboOptions* options, GumboOutput* output) {
4158
3770
  // Need a dummy GumboParser because the allocator comes along with the
4159
3771
  // options object.
4160
3772
  GumboParser parser;
4161
- parser._parser_state = NULL;
4162
3773
  parser._options = options;
4163
- GumboNode* current = output->document;
4164
- while (current) {
4165
- current = destroy_node(&parser, current);
4166
- }
3774
+ destroy_node(&parser, output->document);
4167
3775
  for (int i = 0; i < output->errors.length; ++i) {
4168
3776
  gumbo_error_destroy(&parser, output->errors.data[i]);
4169
3777
  }