nokogumbo 2.0.0.pre.alpha → 2.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,6 +7,7 @@
7
7
  #include "insertion_mode.h"
8
8
  #include "string_buffer.h"
9
9
  #include "token_type.h"
10
+ #include "tokenizer_states.h"
10
11
 
11
12
  #ifdef __cplusplus
12
13
  extern "C" {
@@ -15,85 +16,66 @@ extern "C" {
15
16
  struct GumboInternalParser;
16
17
 
17
18
  typedef enum {
19
+ // Defined errors.
20
+ // https://html.spec.whatwg.org/multipage/parsing.html#parse-errors
21
+ GUMBO_ERR_ABRUPT_CLOSING_OF_EMPTY_COMMENT,
22
+ GUMBO_ERR_ABRUPT_DOCTYPE_PUBLIC_IDENTIFIER,
23
+ GUMBO_ERR_ABRUPT_DOCTYPE_SYSTEM_IDENTIFIER,
24
+ GUMBO_ERR_ABSENCE_OF_DIGITS_IN_NUMERIC_CHARACTER_REFERENCE,
25
+ GUMBO_ERR_CDATA_IN_HTML_CONTENT,
26
+ GUMBO_ERR_CHARACTER_REFERENCE_OUTSIDE_UNICODE_RANGE,
27
+ GUMBO_ERR_CONTROL_CHARACTER_IN_INPUT_STREAM,
28
+ GUMBO_ERR_CONTROL_CHARACTER_REFERENCE,
29
+ GUMBO_ERR_END_TAG_WITH_ATTRIBUTES,
30
+ GUMBO_ERR_DUPLICATE_ATTRIBUTE,
31
+ GUMBO_ERR_END_TAG_WITH_TRAILING_SOLIDUS,
32
+ GUMBO_ERR_EOF_BEFORE_TAG_NAME,
33
+ GUMBO_ERR_EOF_IN_CDATA,
34
+ GUMBO_ERR_EOF_IN_COMMENT,
35
+ GUMBO_ERR_EOF_IN_DOCTYPE,
36
+ GUMBO_ERR_EOF_IN_SCRIPT_HTML_COMMENT_LIKE_TEXT,
37
+ GUMBO_ERR_EOF_IN_TAG,
38
+ GUMBO_ERR_INCORRECTLY_CLOSED_COMMENT,
39
+ GUMBO_ERR_INCORRECTLY_OPENED_COMMENT,
40
+ GUMBO_ERR_INVALID_CHARACTER_SEQUENCE_AFTER_DOCTYPE_NAME,
41
+ GUMBO_ERR_INVALID_FIRST_CHARACTER_OF_TAG_NAME,
42
+ GUMBO_ERR_MISSING_ATTRIBUTE_VALUE,
43
+ GUMBO_ERR_MISSING_DOCTYPE_NAME,
44
+ GUMBO_ERR_MISSING_DOCTYPE_PUBLIC_IDENTIFIER,
45
+ GUMBO_ERR_MISSING_DOCTYPE_SYSTEM_IDENTIFIER,
46
+ GUMBO_ERR_MISSING_END_TAG_NAME,
47
+ GUMBO_ERR_MISSING_QUOTE_BEFORE_DOCTYPE_PUBLIC_IDENTIFIER,
48
+ GUMBO_ERR_MISSING_QUOTE_BEFORE_DOCTYPE_SYSTEM_IDENTIFIER,
49
+ GUMBO_ERR_MISSING_SEMICOLON_AFTER_CHARACTER_REFERENCE,
50
+ GUMBO_ERR_MISSING_WHITESPACE_AFTER_DOCTYPE_PUBLIC_KEYWORD,
51
+ GUMBO_ERR_MISSING_WHITESPACE_AFTER_DOCTYPE_SYSTEM_KEYWORD,
52
+ GUMBO_ERR_MISSING_WHITESPACE_BEFORE_DOCTYPE_NAME,
53
+ GUMBO_ERR_MISSING_WHITESPACE_BETWEEN_ATTRIBUTES,
54
+ GUMBO_ERR_MISSING_WHITESPACE_BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS,
55
+ GUMBO_ERR_NESTED_COMMENT,
56
+ GUMBO_ERR_NONCHARACTER_CHARACTER_REFERENCE,
57
+ GUMBO_ERR_NONCHARACTER_IN_INPUT_STREAM,
58
+ GUMBO_ERR_NON_VOID_HTML_ELEMENT_START_TAG_WITH_TRAILING_SOLIDUS,
59
+ GUMBO_ERR_NULL_CHARACTER_REFERENCE,
60
+ GUMBO_ERR_SURROGATE_CHARACTER_REFERENCE,
61
+ GUMBO_ERR_SURROGATE_IN_INPUT_STREAM,
62
+ GUMBO_ERR_UNEXPECTED_CHARACTER_AFTER_DOCTYPE_SYSTEM_IDENTIFIER,
63
+ GUMBO_ERR_UNEXPECTED_CHARACTER_IN_ATTRIBUTE_NAME,
64
+ GUMBO_ERR_UNEXPECTED_CHARACTER_IN_UNQUOTED_ATTRIBUTE_VALUE,
65
+ GUMBO_ERR_UNEXPECTED_EQUALS_SIGN_BEFORE_ATTRIBUTE_NAME,
66
+ GUMBO_ERR_UNEXPECTED_NULL_CHARACTER,
67
+ GUMBO_ERR_UNEXPECTED_QUESTION_MARK_INSTEAD_OF_TAG_NAME,
68
+ GUMBO_ERR_UNEXPECTED_SOLIDUS_IN_TAG,
69
+ GUMBO_ERR_UNKNOWN_NAMED_CHARACTER_REFERENCE,
70
+
71
+ // Encoding errors.
18
72
  GUMBO_ERR_UTF8_INVALID,
19
73
  GUMBO_ERR_UTF8_TRUNCATED,
20
- GUMBO_ERR_UTF8_NULL,
21
- GUMBO_ERR_NUMERIC_CHAR_REF_NO_DIGITS,
22
- GUMBO_ERR_NUMERIC_CHAR_REF_WITHOUT_SEMICOLON,
23
- GUMBO_ERR_NUMERIC_CHAR_REF_INVALID,
24
- GUMBO_ERR_NAMED_CHAR_REF_WITHOUT_SEMICOLON,
25
- GUMBO_ERR_NAMED_CHAR_REF_INVALID,
26
- GUMBO_ERR_TAG_STARTS_WITH_QUESTION,
27
- GUMBO_ERR_TAG_EOF,
28
- GUMBO_ERR_TAG_INVALID,
29
- GUMBO_ERR_CLOSE_TAG_EMPTY,
30
- GUMBO_ERR_CLOSE_TAG_EOF,
31
- GUMBO_ERR_CLOSE_TAG_INVALID,
32
- GUMBO_ERR_SCRIPT_EOF,
33
- GUMBO_ERR_ATTR_NAME_EOF,
34
- GUMBO_ERR_ATTR_NAME_INVALID,
35
- GUMBO_ERR_ATTR_DOUBLE_QUOTE_EOF,
36
- GUMBO_ERR_ATTR_SINGLE_QUOTE_EOF,
37
- GUMBO_ERR_ATTR_UNQUOTED_EOF,
38
- GUMBO_ERR_ATTR_UNQUOTED_RIGHT_BRACKET,
39
- GUMBO_ERR_ATTR_UNQUOTED_EQUALS,
40
- GUMBO_ERR_ATTR_AFTER_EOF,
41
- GUMBO_ERR_ATTR_AFTER_INVALID,
42
- GUMBO_ERR_DUPLICATE_ATTR,
43
- GUMBO_ERR_SOLIDUS_EOF,
44
- GUMBO_ERR_SOLIDUS_INVALID,
45
- GUMBO_ERR_DASHES_OR_DOCTYPE,
46
- GUMBO_ERR_COMMENT_EOF,
47
- GUMBO_ERR_COMMENT_INVALID,
48
- GUMBO_ERR_COMMENT_BANG_AFTER_DOUBLE_DASH,
49
- GUMBO_ERR_COMMENT_DASH_AFTER_DOUBLE_DASH,
50
- GUMBO_ERR_COMMENT_SPACE_AFTER_DOUBLE_DASH,
51
- GUMBO_ERR_COMMENT_END_BANG_EOF,
52
- GUMBO_ERR_DOCTYPE_EOF,
53
- GUMBO_ERR_DOCTYPE_INVALID,
54
- GUMBO_ERR_DOCTYPE_SPACE,
55
- GUMBO_ERR_DOCTYPE_RIGHT_BRACKET,
56
- GUMBO_ERR_DOCTYPE_SPACE_OR_RIGHT_BRACKET,
57
- GUMBO_ERR_DOCTYPE_END,
74
+
75
+ // Generic parser error.
58
76
  GUMBO_ERR_PARSER,
59
- GUMBO_ERR_UNACKNOWLEDGED_SELF_CLOSING_TAG,
60
- GUMBO_ERR_SELF_CLOSING_END_TAG,
61
77
  } GumboErrorType;
62
78
 
63
- // Additional data for duplicated attributes.
64
- typedef struct GumboInternalDuplicateAttrError {
65
- // The name of the attribute. Owned by this struct.
66
- const char* name;
67
-
68
- // The (0-based) index within the attributes vector of the original
69
- // occurrence.
70
- unsigned int original_index;
71
-
72
- // The (0-based) index where the new occurrence would be.
73
- unsigned int new_index;
74
- } GumboDuplicateAttrError;
75
-
76
- // A simplified representation of the tokenizer state, designed to be more
77
- // useful to clients of this library than the internal representation. This
78
- // condenses the actual states used in the tokenizer state machine into a few
79
- // values that will be familiar to users of HTML.
80
- typedef enum {
81
- GUMBO_ERR_TOKENIZER_DATA,
82
- GUMBO_ERR_TOKENIZER_CHAR_REF,
83
- GUMBO_ERR_TOKENIZER_RCDATA,
84
- GUMBO_ERR_TOKENIZER_RAWTEXT,
85
- GUMBO_ERR_TOKENIZER_PLAINTEXT,
86
- GUMBO_ERR_TOKENIZER_SCRIPT,
87
- GUMBO_ERR_TOKENIZER_TAG,
88
- GUMBO_ERR_TOKENIZER_SELF_CLOSING_TAG,
89
- GUMBO_ERR_TOKENIZER_ATTR_NAME,
90
- GUMBO_ERR_TOKENIZER_ATTR_VALUE,
91
- GUMBO_ERR_TOKENIZER_MARKUP_DECLARATION,
92
- GUMBO_ERR_TOKENIZER_COMMENT,
93
- GUMBO_ERR_TOKENIZER_DOCTYPE,
94
- GUMBO_ERR_TOKENIZER_CDATA,
95
- } GumboTokenizerErrorState;
96
-
97
79
  // Additional data for tokenizer errors.
98
80
  // This records the current state and codepoint encountered - this is usually
99
81
  // enough to reconstruct what went wrong and provide a friendly error message.
@@ -102,7 +84,7 @@ typedef struct GumboInternalTokenizerError {
102
84
  int codepoint;
103
85
 
104
86
  // The state that the tokenizer was in at the time.
105
- GumboTokenizerErrorState state;
87
+ GumboTokenizerEnum state;
106
88
  } GumboTokenizerError;
107
89
 
108
90
  // Additional data for parse errors.
@@ -125,43 +107,25 @@ typedef struct GumboInternalParserError {
125
107
  // The overall error struct representing an error in decoding/tokenizing/parsing
126
108
  // the HTML. This contains an enumerated type flag, a source position, and then
127
109
  // a union of fields containing data specific to the error.
128
- typedef struct GumboInternalError {
110
+ struct GumboInternalError {
129
111
  // The type of error.
130
112
  GumboErrorType type;
131
113
 
132
114
  // The position within the source file where the error occurred.
133
115
  GumboSourcePosition position;
134
116
 
135
- // A pointer to the byte within the original source file text where the error
136
- // occurred (note that this is not the same as position.offset, as that gives
137
- // character-based instead of byte-based offsets).
138
- const char* original_text;
117
+ // The piece of text that caused the error.
118
+ GumboStringPiece original_text;
139
119
 
140
120
  // Type-specific error information.
141
121
  union {
142
- // The code point we encountered, for:
143
- // * GUMBO_ERR_UTF8_INVALID
144
- // * GUMBO_ERR_UTF8_TRUNCATED
145
- // * GUMBO_ERR_NUMERIC_CHAR_REF_WITHOUT_SEMICOLON
146
- // * GUMBO_ERR_NUMERIC_CHAR_REF_INVALID
147
- uint32_t codepoint;
148
-
149
122
  // Tokenizer errors.
150
123
  GumboTokenizerError tokenizer;
151
124
 
152
- // Short textual data, for:
153
- // * GUMBO_ERR_NAMED_CHAR_REF_WITHOUT_SEMICOLON
154
- // * GUMBO_ERR_NAMED_CHAR_REF_INVALID
155
- GumboStringPiece text;
156
-
157
- // Duplicate attribute data, for GUMBO_ERR_DUPLICATE_ATTR.
158
- GumboDuplicateAttrError duplicate_attr;
159
-
160
- // Parser state, for GUMBO_ERR_PARSER and
161
- // GUMBO_ERR_UNACKNOWLEDGE_SELF_CLOSING_TAG.
162
- struct GumboInternalParserError parser;
125
+ // Parser errors.
126
+ GumboParserError parser;
163
127
  } v;
164
- } GumboError;
128
+ };
165
129
 
166
130
  // Adds a new error to the parser's error list, and returns a pointer to it so
167
131
  // that clients can fill out the rest of its fields. May return NULL if we're
@@ -177,32 +141,6 @@ void gumbo_destroy_errors(struct GumboInternalParser* errors);
177
141
  // Frees the memory used for a single GumboError.
178
142
  void gumbo_error_destroy(GumboError* error);
179
143
 
180
- // Prints an error to a string. This fills an empty GumboStringBuffer with a
181
- // freshly-allocated buffer containing the error message text. The caller is
182
- // responsible for freeing the buffer.
183
- void gumbo_error_to_string (
184
- const GumboError* error,
185
- GumboStringBuffer* output
186
- );
187
-
188
- // Prints a caret diagnostic to a string. This fills an empty GumboStringBuffer
189
- // with a freshly-allocated buffer containing the error message text. The
190
- // caller is responsible for freeing the buffer.
191
- void gumbo_caret_diagnostic_to_string (
192
- const GumboError* error,
193
- const char* source_text,
194
- size_t source_length,
195
- GumboStringBuffer* output
196
- );
197
-
198
- // Like gumbo_caret_diagnostic_to_string, but prints the text to stdout instead
199
- // of writing to a string.
200
- void gumbo_print_caret_diagnostic (
201
- const GumboError* error,
202
- const char* source_text,
203
- size_t source_length
204
- );
205
-
206
144
  #ifdef __cplusplus
207
145
  }
208
146
  #endif
@@ -706,6 +706,15 @@ typedef struct GumboInternalOptions {
706
706
  */
707
707
  bool stop_on_first_error;
708
708
 
709
+ /**
710
+ * Maximum allowed number of attributes per element. If this limit is
711
+ * exceeded, the parser will return early with a partial document and
712
+ * the returned `GumboOutput` will have its `status` field set to
713
+ * `GUMBO_STATUS_TOO_MANY_ATTRIBUTES`. Set to `-1` to disable the limit.
714
+ * Default: `400`.
715
+ */
716
+ int max_attributes;
717
+
709
718
  /**
710
719
  * Maximum allowed depth for the parse tree. If this limit is exceeded,
711
720
  * the parser will return early with a partial document and the returned
@@ -796,6 +805,16 @@ typedef enum {
796
805
  */
797
806
  GUMBO_STATUS_TREE_TOO_DEEP,
798
807
 
808
+ /**
809
+ * Indicates that the maximum number of attributes per element
810
+ * (`GumboOptions::max_attributes`) was reached during parsing. The
811
+ * resulting tree will be a partial document, with no further nodes
812
+ * created after the point where the limit was reached. The partial
813
+ * document may be useful for constructing an error message but
814
+ * typically shouldn't be used for other purposes.
815
+ */
816
+ GUMBO_STATUS_TOO_MANY_ATTRIBUTES,
817
+
799
818
  // Currently unused
800
819
  GUMBO_STATUS_OUT_OF_MEMORY,
801
820
  } GumboOutputStatus;
@@ -817,13 +836,17 @@ typedef struct GumboInternalOutput {
817
836
 
818
837
  /**
819
838
  * A list of errors that occurred during the parse.
820
- * NOTE: In version 1.0 of this library, the API for errors hasn't been fully
821
- * fleshed out and may change in the future. For this reason, the GumboError
822
- * header isn't part of the public API. Contact us if you need errors
823
- * reported so we can work out something appropriate for your use-case.
824
839
  */
825
840
  GumboVector /* GumboError */ errors;
826
841
 
842
+ /**
843
+ * True if the parser encounted an error.
844
+ *
845
+ * This can be true and `errors` an empty `GumboVector` if the `max_errors`
846
+ * option was set to 0.
847
+ */
848
+ bool document_error;
849
+
827
850
  /**
828
851
  * A status code indicating whether parsing finished successfully or was
829
852
  * stopped mid-document due to exceptional circumstances.
@@ -866,6 +889,53 @@ const char* gumbo_status_to_string(GumboOutputStatus status);
866
889
  /** Release the memory used for the parse tree and parse errors. */
867
890
  void gumbo_destroy_output(GumboOutput* output);
868
891
 
892
+ /** Opaque GumboError type */
893
+ typedef struct GumboInternalError GumboError;
894
+
895
+ /**
896
+ * Returns the position of the error.
897
+ */
898
+ GumboSourcePosition gumbo_error_position(const GumboError* error);
899
+
900
+ /**
901
+ * Returns a constant string representation of the error's code. This is owned
902
+ * by the library and should not be freed by the caller.
903
+ */
904
+ const char* gumbo_error_code(const GumboError* error);
905
+
906
+ /**
907
+ * Prints an error to a string. This stores a freshly-allocated buffer
908
+ * containing the error message text in output. The caller is responsible for
909
+ * freeing the buffer. The size of the error message is returned. The error
910
+ * message itself may not be NULL-terminated and may contain NULL bytes so the
911
+ * returned size must be used.
912
+ */
913
+ size_t gumbo_error_to_string(const GumboError* error, char **output);
914
+
915
+ /**
916
+ * Prints a caret diagnostic to a string. This stores a freshly-allocated
917
+ * buffer containing the error message text in output. The caller is responsible for
918
+ * freeing the buffer. The size of the error message is returned. The error
919
+ * message itself may not be NULL-terminated and may contain NULL bytes so the
920
+ * returned size must be used.
921
+ */
922
+ size_t gumbo_caret_diagnostic_to_string (
923
+ const GumboError* error,
924
+ const char* source_text,
925
+ size_t source_length,
926
+ char** output
927
+ );
928
+
929
+ /**
930
+ * Like gumbo_caret_diagnostic_to_string, but prints the text to stdout
931
+ * instead of writing to a string.
932
+ */
933
+ void gumbo_print_caret_diagnostic (
934
+ const GumboError* error,
935
+ const char* source_text,
936
+ size_t source_length
937
+ );
938
+
869
939
  #ifdef __cplusplus
870
940
  }
871
941
  #endif
@@ -31,6 +31,7 @@
31
31
  #include "replacement.h"
32
32
  #include "tokenizer.h"
33
33
  #include "tokenizer_states.h"
34
+ #include "token_buffer.h"
34
35
  #include "utf8.h"
35
36
  #include "util.h"
36
37
  #include "vector.h"
@@ -42,11 +43,12 @@ typedef uint8_t TagSet[GUMBO_TAG_LAST + 1];
42
43
 
43
44
  #define GUMBO_EMPTY_SOURCE_POSITION_INIT { .line = 0, .column = 0, .offset = 0 }
44
45
  #define kGumboEmptySourcePosition (const GumboSourcePosition) \
45
- GUMBO_EMPTY_SOURCE_POSITION_INIT
46
+ GUMBO_EMPTY_SOURCE_POSITION_INIT
46
47
 
47
48
  const GumboOptions kGumboDefaultOptions = {
48
49
  .tab_stop = 8,
49
50
  .stop_on_first_error = false,
51
+ .max_attributes = 400,
50
52
  .max_tree_depth = 400,
51
53
  .max_errors = -1,
52
54
  .fragment_context = NULL,
@@ -59,25 +61,6 @@ const GumboOptions kGumboDefaultOptions = {
59
61
  #define STRING(s) {.data = s, .length = sizeof(s) - 1}
60
62
  #define TERMINATOR {.data = NULL, .length = 0}
61
63
 
62
- static const GumboStringPiece kPublicIdHtml4_0 =
63
- STRING("-//W3C//DTD HTML 4.0//EN");
64
- static const GumboStringPiece kPublicIdHtml4_01 =
65
- STRING("-//W3C//DTD HTML 4.01//EN");
66
- static const GumboStringPiece kPublicIdXhtml1_0 =
67
- STRING("-//W3C//DTD XHTML 1.0 Strict//EN");
68
- static const GumboStringPiece kPublicIdXhtml1_1 =
69
- STRING("-//W3C//DTD XHTML 1.1//EN");
70
- static const GumboStringPiece kSystemIdRecHtml4_0 =
71
- STRING("http://www.w3.org/TR/REC-html40/strict.dtd");
72
- static const GumboStringPiece kSystemIdHtml4 =
73
- STRING("http://www.w3.org/TR/html4/strict.dtd");
74
- static const GumboStringPiece kSystemIdXhtmlStrict1_1 =
75
- STRING("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd");
76
- static const GumboStringPiece kSystemIdXhtml1_1 =
77
- STRING("http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd");
78
- static const GumboStringPiece kSystemIdLegacyCompat =
79
- STRING("about:legacy-compat");
80
-
81
64
  // The doctype arrays have an explicit terminator because we want to pass them
82
65
  // to a helper function, and passing them as a pointer discards sizeof
83
66
  // information. The SVG arrays are used only by one-off functions, and so loops
@@ -260,6 +243,9 @@ typedef struct GumboInternalParserState {
260
243
  // The accumulated text node buffer state.
261
244
  TextNodeBufferState _text_node;
262
245
 
246
+ // The accumulated character tokens in tables for error purposes.
247
+ GumboCharacterTokenBuffer _table_character_tokens;
248
+
263
249
  // The current token.
264
250
  GumboToken* _current_token;
265
251
 
@@ -351,6 +337,7 @@ static void output_init(GumboParser* parser) {
351
337
  GumboOutput* output = gumbo_alloc(sizeof(GumboOutput));
352
338
  output->root = NULL;
353
339
  output->document = new_document_node();
340
+ output->document_error = false;
354
341
  output->status = GUMBO_STATUS_OK;
355
342
  parser->_output = output;
356
343
  gumbo_init_errors(parser);
@@ -365,6 +352,7 @@ static void parser_state_init(GumboParser* parser) {
365
352
  parser_state->_foster_parent_insertions = false;
366
353
  parser_state->_text_node._type = GUMBO_NODE_WHITESPACE;
367
354
  gumbo_string_buffer_init(&parser_state->_text_node._buffer);
355
+ gumbo_character_token_buffer_init(&parser_state->_table_character_tokens);
368
356
  gumbo_vector_init(10, &parser_state->_open_elements);
369
357
  gumbo_vector_init(5, &parser_state->_active_formatting_elements);
370
358
  gumbo_vector_init(5, &parser_state->_template_insertion_modes);
@@ -463,6 +451,7 @@ static void parser_state_destroy(GumboParser* parser) {
463
451
  gumbo_vector_destroy(&state->_open_elements);
464
452
  gumbo_vector_destroy(&state->_template_insertion_modes);
465
453
  gumbo_string_buffer_destroy(&state->_text_node._buffer);
454
+ gumbo_character_token_buffer_destroy(&state->_table_character_tokens);
466
455
  gumbo_free(state);
467
456
  }
468
457
 
@@ -573,11 +562,11 @@ static bool tag_in (
573
562
  static bool tag_is(const GumboToken* token, bool is_start, GumboTag tag) {
574
563
  if (is_start && token->type == GUMBO_TOKEN_START_TAG) {
575
564
  return token->v.start_tag.tag == tag;
576
- } else if (!is_start && token->type == GUMBO_TOKEN_END_TAG) {
565
+ }
566
+ if (!is_start && token->type == GUMBO_TOKEN_END_TAG) {
577
567
  return token->v.end_tag.tag == tag;
578
- } else {
579
- return false;
580
568
  }
569
+ return false;
581
570
  }
582
571
 
583
572
  static inline bool tagset_includes (
@@ -621,6 +610,14 @@ static bool node_qualified_tagname_is (
621
610
  return !gumbo_ascii_strcasecmp(element_name, name);
622
611
  }
623
612
 
613
+ static bool node_html_tagname_is (
614
+ const GumboNode* node,
615
+ GumboTag tag,
616
+ const char *name
617
+ ) {
618
+ return node_qualified_tagname_is(node, GUMBO_NAMESPACE_HTML, tag, name);
619
+ }
620
+
624
621
  static bool node_tagname_is (
625
622
  const GumboNode* node,
626
623
  GumboTag tag,
@@ -646,7 +643,6 @@ static bool node_qualified_tag_is (
646
643
 
647
644
  // Like node_tag_in, but for the single-tag case in the HTML namespace
648
645
  static bool node_html_tag_is(const GumboNode* node, GumboTag tag) {
649
- assert(tag != GUMBO_TAG_UNKNOWN);
650
646
  return node_qualified_tag_is(node, GUMBO_NAMESPACE_HTML, tag);
651
647
  }
652
648
 
@@ -738,18 +734,18 @@ static void reset_insertion_mode_appropriately(GumboParser* parser) {
738
734
  assert(0);
739
735
  }
740
736
 
741
- static GumboError* parser_add_parse_error (
737
+ static void parser_add_parse_error (
742
738
  GumboParser* parser,
743
739
  const GumboToken* token
744
740
  ) {
745
741
  gumbo_debug("Adding parse error.\n");
746
742
  GumboError* error = gumbo_add_error(parser);
747
743
  if (!error) {
748
- return NULL;
744
+ return;
749
745
  }
750
746
  error->type = GUMBO_ERR_PARSER;
751
747
  error->position = token->position;
752
- error->original_text = token->original_text.data;
748
+ error->original_text = token->original_text;
753
749
  GumboParserError* extra_data = &error->v.parser;
754
750
  extra_data->input_type = token->type;
755
751
  extra_data->input_tag = GUMBO_TAG_UNKNOWN;
@@ -772,7 +768,6 @@ static GumboError* parser_add_parse_error (
772
768
  &extra_data->tag_stack
773
769
  );
774
770
  }
775
- return error;
776
771
  }
777
772
 
778
773
  // https://html.spec.whatwg.org/multipage/parsing.html#mathml-text-integration-point
@@ -1639,9 +1634,11 @@ static bool has_node_in_scope(const GumboParser* parser, const GumboNode* node)
1639
1634
  const GumboNodeType type = current->type;
1640
1635
  if (current == node) {
1641
1636
  return true;
1642
- } else if (type != GUMBO_NODE_ELEMENT && type != GUMBO_NODE_TEMPLATE) {
1637
+ }
1638
+ if (type != GUMBO_NODE_ELEMENT && type != GUMBO_NODE_TEMPLATE) {
1643
1639
  continue;
1644
- } else if (node_tag_in_set(current, &tags)) {
1640
+ }
1641
+ if (node_tag_in_set(current, &tags)) {
1645
1642
  return false;
1646
1643
  }
1647
1644
  }
@@ -1687,14 +1684,18 @@ static bool has_an_element_in_select_scope(const GumboParser* parser, GumboTag t
1687
1684
  // https://html.spec.whatwg.org/multipage/parsing.html#generate-implied-end-tags
1688
1685
  // "exception" is the "element to exclude from the process" listed in the spec.
1689
1686
  // Pass GUMBO_TAG_LAST to not exclude any of them.
1690
- static void generate_implied_end_tags(GumboParser* parser, GumboTag exception) {
1687
+ static void generate_implied_end_tags (
1688
+ GumboParser* parser,
1689
+ GumboTag exception,
1690
+ const char* exception_name
1691
+ ) {
1691
1692
  static const TagSet tags = {
1692
- TAG(DD), TAG(DT), TAG(LI), TAG(OPTION), TAG(OPTGROUP),
1693
- TAG(P), TAG(RP), TAG(RB), TAG(RT), TAG(RTC)
1693
+ TAG(DD), TAG(DT), TAG(LI), TAG(OPTGROUP), TAG(OPTION),
1694
+ TAG(P), TAG(RB), TAG(RP), TAG(RT), TAG(RTC)
1694
1695
  };
1695
1696
  while (
1696
1697
  node_tag_in_set(get_current_node(parser), &tags)
1697
- && !node_html_tag_is(get_current_node(parser), exception)
1698
+ && !node_html_tagname_is(get_current_node(parser), exception, exception_name)
1698
1699
  ) {
1699
1700
  pop_current_node(parser);
1700
1701
  }
@@ -1704,15 +1705,36 @@ static void generate_implied_end_tags(GumboParser* parser, GumboTag exception) {
1704
1705
  // https://html.spec.whatwg.org/multipage/parsing.html#closing-elements-that-have-implied-end-tags
1705
1706
  static void generate_all_implied_end_tags_thoroughly(GumboParser* parser) {
1706
1707
  static const TagSet tags = {
1707
- TAG(CAPTION), TAG(COLGROUP), TAG(DD), TAG(DT), TAG(LI), TAG(OPTION),
1708
- TAG(OPTGROUP), TAG(P), TAG(RP), TAG(RT), TAG(RTC), TAG(TBODY),
1709
- TAG(TD), TAG(TFOOT), TAG(TH), TAG(HEAD), TAG(TR)
1708
+ TAG(CAPTION), TAG(COLGROUP), TAG(DD), TAG(DT), TAG(LI), TAG(OPTGROUP),
1709
+ TAG(OPTION), TAG(P), TAG(RB), TAG(RP), TAG(RT), TAG(RTC), TAG(TBODY),
1710
+ TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD), TAG(TR)
1710
1711
  };
1711
1712
  while (node_tag_in_set(get_current_node(parser), &tags)) {
1712
1713
  pop_current_node(parser);
1713
1714
  }
1714
1715
  }
1715
1716
 
1717
+ // This factors out the clauses in the "in body" insertion mode checking "if
1718
+ // there is a node in the stack of open elements that is not" one of a list of
1719
+ // elements in which case it's a parse error.
1720
+ // This is used in "an end-of-file token", "an end tag whose tag name is
1721
+ // 'body'", and "an end tag whose tag name is 'html'".
1722
+ static bool stack_contains_nonclosable_element (
1723
+ GumboParser* parser
1724
+ ) {
1725
+ static const TagSet tags = {
1726
+ TAG(DD), TAG(DT), TAG(LI), TAG(OPTGROUP), TAG(OPTION), TAG(P), TAG(RB),
1727
+ TAG(RP), TAG(RT), TAG(RTC), TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH),
1728
+ TAG(THEAD), TAG(TR), TAG(BODY), TAG(HTML),
1729
+ };
1730
+ GumboVector* open_elements = &parser->_parser_state->_open_elements;
1731
+ for (size_t i = 0; i < open_elements->length; ++i) {
1732
+ if (!node_tag_in_set(open_elements->data[i], &tags))
1733
+ return true;
1734
+ }
1735
+ return false;
1736
+ }
1737
+
1716
1738
  // This factors out the clauses relating to "act as if an end tag token with tag
1717
1739
  // name "table" had been seen. Returns true if there's a table element in table
1718
1740
  // scope which was successfully closed, false if not and the token should be
@@ -1732,37 +1754,35 @@ static bool close_table(GumboParser* parser) {
1732
1754
 
1733
1755
  // This factors out the clauses relating to "act as if an end tag token with tag
1734
1756
  // name `cell_tag` had been seen".
1735
- static bool close_table_cell (
1757
+ static void close_table_cell (
1736
1758
  GumboParser* parser,
1737
1759
  const GumboToken* token,
1738
1760
  GumboTag cell_tag
1739
1761
  ) {
1740
- bool result = true;
1741
- generate_implied_end_tags(parser, GUMBO_TAG_LAST);
1762
+ generate_implied_end_tags(parser, GUMBO_TAG_LAST, NULL);
1742
1763
  const GumboNode* node = get_current_node(parser);
1743
- if (!node_html_tag_is(node, cell_tag)) {
1764
+ if (!node_html_tag_is(node, cell_tag))
1744
1765
  parser_add_parse_error(parser, token);
1745
- result = false;
1746
- }
1747
1766
  do {
1748
1767
  node = pop_current_node(parser);
1749
1768
  } while (!node_html_tag_is(node, cell_tag));
1750
1769
 
1751
1770
  clear_active_formatting_elements(parser);
1752
1771
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
1753
- return result;
1754
1772
  }
1755
1773
 
1756
1774
  // https://html.spec.whatwg.org/multipage/parsing.html#close-the-cell
1757
1775
  // This holds the logic to determine whether we should close a <td> or a <th>.
1758
- static bool close_current_cell(GumboParser* parser, const GumboToken* token) {
1776
+ static void close_current_cell(GumboParser* parser, const GumboToken* token) {
1777
+ GumboTag cell_tag;
1759
1778
  if (has_an_element_in_table_scope(parser, GUMBO_TAG_TD)) {
1760
1779
  assert(!has_an_element_in_table_scope(parser, GUMBO_TAG_TH));
1761
- return close_table_cell(parser, token, GUMBO_TAG_TD);
1780
+ cell_tag = GUMBO_TAG_TD;
1762
1781
  } else {
1763
1782
  assert(has_an_element_in_table_scope(parser, GUMBO_TAG_TH));
1764
- return close_table_cell(parser, token, GUMBO_TAG_TH);
1783
+ cell_tag = GUMBO_TAG_TH;
1765
1784
  }
1785
+ close_table_cell(parser, token, cell_tag);
1766
1786
  }
1767
1787
 
1768
1788
  // This factors out the "act as if an end tag of tag name 'select' had been
@@ -1819,14 +1839,14 @@ static bool is_special_node(const GumboNode* node) {
1819
1839
  // specified qualified name. If the elements closed are in the set handled by
1820
1840
  // generate_implied_end_tags, this is normal operation and this function returns
1821
1841
  // true. Otherwise, a parse error is recorded and this function returns false.
1822
- static bool implicitly_close_tags (
1842
+ static void implicitly_close_tags (
1823
1843
  GumboParser* parser,
1824
1844
  GumboToken* token,
1825
1845
  GumboNamespaceEnum target_ns,
1826
1846
  GumboTag target
1827
1847
  ) {
1828
- bool result = true;
1829
- generate_implied_end_tags(parser, target);
1848
+ assert(target != GUMBO_TAG_UNKNOWN);
1849
+ generate_implied_end_tags(parser, target, NULL);
1830
1850
  if (!node_qualified_tag_is(get_current_node(parser), target_ns, target)) {
1831
1851
  parser_add_parse_error(parser, token);
1832
1852
  while (
@@ -1834,30 +1854,27 @@ static bool implicitly_close_tags (
1834
1854
  ) {
1835
1855
  pop_current_node(parser);
1836
1856
  }
1837
- result = false;
1838
1857
  }
1839
1858
  assert(node_qualified_tag_is(get_current_node(parser), target_ns, target));
1840
1859
  pop_current_node(parser);
1841
- return result;
1842
1860
  }
1843
1861
 
1844
1862
  // If the stack of open elements has a <p> tag in button scope, this acts as if
1845
1863
  // a </p> tag was encountered, implicitly closing tags. Returns false if a
1846
1864
  // parse error occurs. This is a convenience function because this particular
1847
1865
  // clause appears several times in the spec.
1848
- static bool maybe_implicitly_close_p_tag (
1866
+ static void maybe_implicitly_close_p_tag (
1849
1867
  GumboParser* parser,
1850
1868
  GumboToken* token
1851
1869
  ) {
1852
1870
  if (has_an_element_in_button_scope(parser, GUMBO_TAG_P)) {
1853
- return implicitly_close_tags (
1871
+ implicitly_close_tags (
1854
1872
  parser,
1855
1873
  token,
1856
1874
  GUMBO_NAMESPACE_HTML,
1857
1875
  GUMBO_TAG_P
1858
1876
  );
1859
1877
  }
1860
- return true;
1861
1878
  }
1862
1879
 
1863
1880
  // Convenience function to encapsulate the logic for closing <li> or <dd>/<dt>
@@ -1868,7 +1885,7 @@ static void maybe_implicitly_close_list_tag (
1868
1885
  bool is_li
1869
1886
  ) {
1870
1887
  GumboParserState* state = parser->_parser_state;
1871
- state->_frameset_ok = false;
1888
+ set_frameset_not_ok(parser);
1872
1889
  for (int i = state->_open_elements.length; --i >= 0;) {
1873
1890
  const GumboNode* node = state->_open_elements.data[i];
1874
1891
  bool is_list_tag = is_li
@@ -1884,6 +1901,7 @@ static void maybe_implicitly_close_list_tag (
1884
1901
  );
1885
1902
  return;
1886
1903
  }
1904
+
1887
1905
  if (
1888
1906
  is_special_node(node)
1889
1907
  && !node_tag_in_set(node, &(const TagSet){TAG(ADDRESS), TAG(DIV), TAG(P)})
@@ -2009,40 +2027,19 @@ static void adjust_mathml_attributes(GumboToken* token) {
2009
2027
  attr->name = gumbo_strdup("definitionURL");
2010
2028
  }
2011
2029
 
2012
- static bool doctype_matches (
2013
- const GumboTokenDocType* doctype,
2014
- const GumboStringPiece* public_id,
2015
- const GumboStringPiece* system_id,
2016
- bool allow_missing_system_id
2017
- ) {
2018
- return
2019
- !strcmp(doctype->public_identifier, public_id->data)
2020
- && (allow_missing_system_id || doctype->has_system_identifier)
2021
- && !strcmp(doctype->system_identifier, system_id->data);
2022
- }
2023
-
2024
- static bool maybe_add_doctype_error (
2030
+ static void maybe_add_doctype_error (
2025
2031
  GumboParser* parser,
2026
2032
  const GumboToken* token
2027
2033
  ) {
2028
2034
  const GumboTokenDocType* doctype = &token->v.doc_type;
2029
- bool html_doctype = !strcmp(doctype->name, "html");
2030
- if ((!html_doctype || doctype->has_public_identifier ||
2031
- (doctype->has_system_identifier &&
2032
- !strcmp(
2033
- doctype->system_identifier, kSystemIdLegacyCompat.data))) &&
2034
- !(html_doctype && (doctype_matches(doctype, &kPublicIdHtml4_0,
2035
- &kSystemIdRecHtml4_0, true) ||
2036
- doctype_matches(doctype, &kPublicIdHtml4_01,
2037
- &kSystemIdHtml4, true) ||
2038
- doctype_matches(doctype, &kPublicIdXhtml1_0,
2039
- &kSystemIdXhtmlStrict1_1, false) ||
2040
- doctype_matches(doctype, &kPublicIdXhtml1_1,
2041
- &kSystemIdXhtml1_1, false)))) {
2035
+ if (
2036
+ strcmp(doctype->name, "html")
2037
+ || doctype->has_public_identifier
2038
+ || (doctype->has_system_identifier
2039
+ && strcmp(doctype->system_identifier, "about:legacy-compat"))
2040
+ ) {
2042
2041
  parser_add_parse_error(parser, token);
2043
- return false;
2044
2042
  }
2045
- return true;
2046
2043
  }
2047
2044
 
2048
2045
  static void remove_from_parent(GumboNode* node) {
@@ -2067,39 +2064,115 @@ static void remove_from_parent(GumboNode* node) {
2067
2064
  }
2068
2065
  }
2069
2066
 
2067
+ // This is here to clean up memory when the spec says "Ignore current token."
2068
+ static void ignore_token(GumboParser* parser) {
2069
+ GumboToken* token = parser->_parser_state->_current_token;
2070
+ // Ownership of the token's internal buffers are normally transferred to the
2071
+ // element, but if no element is emitted (as happens in non-verbatim-mode
2072
+ // when a token is ignored), we need to free it here to prevent a memory
2073
+ // leak.
2074
+ gumbo_token_destroy(token);
2075
+ #ifndef NDEBUG
2076
+ if (token->type == GUMBO_TOKEN_START_TAG) {
2077
+ // Mark this sentinel so the assertion in the main loop knows it's been
2078
+ // destroyed.
2079
+ token->v.start_tag.attributes = kGumboEmptyVector;
2080
+ token->v.start_tag.name = NULL;
2081
+ }
2082
+ #endif
2083
+ }
2084
+
2085
+ // The token is usually an end tag; however, the adoption agency algorithm may
2086
+ // invoke this for an 'a' or 'nobr' start tag.
2087
+ // Returns false if there was an error.
2088
+ static void in_body_any_other_end_tag(GumboParser* parser, GumboToken* token)
2089
+ {
2090
+ GumboParserState* state = parser->_parser_state;
2091
+ GumboTag tag;
2092
+ const char* tagname;
2093
+
2094
+ if (token->type == GUMBO_TOKEN_END_TAG) {
2095
+ tag = token->v.end_tag.tag;
2096
+ tagname = token->v.end_tag.name;
2097
+ } else {
2098
+ assert(token->type == GUMBO_TOKEN_START_TAG);
2099
+ tag = token->v.start_tag.tag;
2100
+ tagname = token->v.start_tag.name;
2101
+ }
2102
+
2103
+ assert(state->_open_elements.length > 0);
2104
+ assert(node_html_tag_is(state->_open_elements.data[0], GUMBO_TAG_HTML));
2105
+ // Walk up the stack of open elements until we find one that either:
2106
+ // a) Matches the tag name we saw
2107
+ // b) Is in the "special" category.
2108
+ // If we see a), implicitly close everything up to and including it. If we
2109
+ // see b), then record a parse error, don't close anything (except the
2110
+ // implied end tags) and ignore the end tag token.
2111
+ for (int i = state->_open_elements.length; --i >= 0;) {
2112
+ const GumboNode* node = state->_open_elements.data[i];
2113
+ if (node_qualified_tagname_is(node, GUMBO_NAMESPACE_HTML, tag, tagname)) {
2114
+ generate_implied_end_tags(parser, tag, tagname);
2115
+ // <!DOCTYPE><body><sarcasm><foo></sarcasm> is an example of an error.
2116
+ // foo is the "current node" but sarcasm is node.
2117
+ // XXX: Write a test for this.
2118
+ if (node != get_current_node(parser)) {
2119
+ parser_add_parse_error(parser, token);
2120
+ }
2121
+ while (node != pop_current_node(parser))
2122
+ ; // Pop everything.
2123
+ return;
2124
+ } else if (is_special_node(node)) {
2125
+ parser_add_parse_error(parser, token);
2126
+ ignore_token(parser);
2127
+ return;
2128
+ }
2129
+ }
2130
+ // <html> is in the special category, so we should never get here.
2131
+ assert(0 && "unreachable");
2132
+ }
2133
+
2070
2134
  // https://html.spec.whatwg.org/multipage/parsing.html#an-introduction-to-error-handling-and-strange-cases-in-the-parser
2071
2135
  // Also described in the "in body" handling for end formatting tags.
2072
- static bool adoption_agency_algorithm (
2073
- GumboParser* parser,
2074
- GumboToken* token,
2075
- GumboTag subject
2076
- ) {
2136
+ // Returns false if there was an error.
2137
+ static void adoption_agency_algorithm(GumboParser* parser, GumboToken* token)
2138
+ {
2077
2139
  GumboParserState* state = parser->_parser_state;
2078
2140
  gumbo_debug("Entering adoption agency algorithm.\n");
2079
2141
  // Step 1.
2142
+ GumboTag subject;
2143
+ if (token->type == GUMBO_TOKEN_START_TAG) {
2144
+ subject = token->v.start_tag.tag;
2145
+ } else {
2146
+ assert(token->type == GUMBO_TOKEN_END_TAG);
2147
+ subject = token->v.end_tag.tag;
2148
+ }
2149
+ assert(subject != GUMBO_TAG_UNKNOWN);
2150
+
2151
+ // Step 2.
2080
2152
  GumboNode* current_node = get_current_node(parser);
2081
2153
  if (
2082
- current_node->v.element.tag_namespace == GUMBO_NAMESPACE_HTML
2083
- && current_node->v.element.tag == subject
2154
+ node_html_tag_is(current_node, subject)
2084
2155
  && -1 == gumbo_vector_index_of (
2085
2156
  &state->_active_formatting_elements,
2086
2157
  current_node
2087
2158
  )
2088
2159
  ) {
2089
2160
  pop_current_node(parser);
2090
- return false;
2161
+ return;
2091
2162
  }
2092
- // Steps 2-4 & 20:
2163
+
2164
+ // Steps 3-5 & 21:
2093
2165
  for (unsigned int i = 0; i < 8; ++i) {
2094
- // Step 5.
2166
+ // Step 6.
2095
2167
  GumboNode* formatting_node = NULL;
2096
2168
  int formatting_node_in_open_elements = -1;
2097
2169
  for (int j = state->_active_formatting_elements.length; --j >= 0;) {
2098
2170
  GumboNode* current_node = state->_active_formatting_elements.data[j];
2099
2171
  if (current_node == &kActiveFormattingScopeMarker) {
2100
2172
  gumbo_debug("Broke on scope marker; aborting.\n");
2101
- // Last scope marker; abort the algorithm.
2102
- return false;
2173
+ // Last scope marker; abort the algorithm and handle according to "any
2174
+ // other end tag" (below).
2175
+ break;
2103
2176
  }
2104
2177
  if (node_html_tag_is(current_node, subject)) {
2105
2178
  // Found it.
@@ -2121,10 +2194,11 @@ static bool adoption_agency_algorithm (
2121
2194
  // "any other end tag" clause (which may potentially add a parse error,
2122
2195
  // but not always).
2123
2196
  gumbo_debug("No active formatting elements; aborting.\n");
2124
- return false;
2197
+ in_body_any_other_end_tag(parser, token);
2198
+ return;
2125
2199
  }
2126
2200
 
2127
- // Step 6
2201
+ // Step 7
2128
2202
  if (formatting_node_in_open_elements == -1) {
2129
2203
  gumbo_debug("Formatting node not on stack of open elements.\n");
2130
2204
  parser_add_parse_error(parser, token);
@@ -2132,25 +2206,24 @@ static bool adoption_agency_algorithm (
2132
2206
  formatting_node,
2133
2207
  &state->_active_formatting_elements
2134
2208
  );
2135
- return false;
2209
+ return;
2136
2210
  }
2137
2211
 
2138
- // Step 7
2212
+ // Step 8
2139
2213
  if (!has_an_element_in_scope(parser, formatting_node->v.element.tag)) {
2140
2214
  parser_add_parse_error(parser, token);
2141
2215
  gumbo_debug("Element not in scope.\n");
2142
- return false;
2216
+ return;
2143
2217
  }
2144
2218
 
2145
- // Step 8
2146
- if (formatting_node != get_current_node(parser)) {
2219
+ // Step 9
2220
+ if (formatting_node != get_current_node(parser))
2147
2221
  parser_add_parse_error(parser, token); // But continue onwards.
2148
- }
2149
2222
  assert(formatting_node);
2150
2223
  assert(!node_html_tag_is(formatting_node, GUMBO_TAG_HTML));
2151
2224
  assert(!node_html_tag_is(formatting_node, GUMBO_TAG_BODY));
2152
2225
 
2153
- // Step 9 & 10
2226
+ // Step 10
2154
2227
  GumboNode* furthest_block = NULL;
2155
2228
  for (
2156
2229
  unsigned int j = formatting_node_in_open_elements;
@@ -2160,32 +2233,27 @@ static bool adoption_agency_algorithm (
2160
2233
  assert(j > 0);
2161
2234
  GumboNode* current = state->_open_elements.data[j];
2162
2235
  if (is_special_node(current)) {
2163
- // Step 9.
2164
2236
  furthest_block = current;
2165
2237
  break;
2166
2238
  }
2167
2239
  }
2240
+ // Step 11.
2168
2241
  if (!furthest_block) {
2169
- // Step 10.
2170
- while (get_current_node(parser) != formatting_node) {
2171
- pop_current_node(parser);
2172
- }
2173
- // And the formatting element itself.
2174
- pop_current_node(parser);
2242
+ while (pop_current_node(parser) != formatting_node)
2243
+ ;
2175
2244
  gumbo_vector_remove (
2176
2245
  formatting_node,
2177
2246
  &state->_active_formatting_elements
2178
2247
  );
2179
- return false;
2248
+ return;
2180
2249
  }
2181
2250
  assert(!node_html_tag_is(furthest_block, GUMBO_TAG_HTML));
2182
- assert(furthest_block);
2183
2251
 
2184
- // Step 11.
2252
+ // Step 12.
2185
2253
  // Elements may be moved and reparented by this algorithm, so
2186
2254
  // common_ancestor is not necessarily the same as formatting_node->parent.
2187
2255
  GumboNode* common_ancestor = state->_open_elements.data [
2188
- gumbo_vector_index_of(&state->_open_elements, formatting_node) - 1
2256
+ formatting_node_in_open_elements - 1
2189
2257
  ];
2190
2258
  gumbo_debug (
2191
2259
  "Common ancestor tag = %s, furthest block tag = %s.\n",
@@ -2193,24 +2261,24 @@ static bool adoption_agency_algorithm (
2193
2261
  gumbo_normalized_tagname(furthest_block->v.element.tag)
2194
2262
  );
2195
2263
 
2196
- // Step 12.
2264
+ // Step 13.
2197
2265
  int bookmark = 1 + gumbo_vector_index_of (
2198
2266
  &state->_active_formatting_elements,
2199
2267
  formatting_node
2200
2268
  );
2201
2269
  gumbo_debug("Bookmark at %d.\n", bookmark);
2202
- // Step 13.
2270
+ // Step 14.
2203
2271
  GumboNode* node = furthest_block;
2204
2272
  GumboNode* last_node = furthest_block;
2205
2273
  // Must be stored explicitly, in case node is removed from the stack of open
2206
- // elements, to handle step 9.4.
2274
+ // elements, to handle step 14.3.
2207
2275
  int saved_node_index = gumbo_vector_index_of(&state->_open_elements, node);
2208
2276
  assert(saved_node_index > 0);
2209
- // Step 13.1.
2277
+ // Step 14.1.
2210
2278
  for (int j = 0;;) {
2211
- // Step 13.2.
2279
+ // Step 14.2.
2212
2280
  ++j;
2213
- // Step 13.3.
2281
+ // Step 14.3.
2214
2282
  int node_index = gumbo_vector_index_of(&state->_open_elements, node);
2215
2283
  gumbo_debug (
2216
2284
  "Current index: %d, last index: %d.\n",
@@ -2225,16 +2293,16 @@ static bool adoption_agency_algorithm (
2225
2293
  assert((unsigned int) node_index < state->_open_elements.capacity);
2226
2294
  node = state->_open_elements.data[node_index];
2227
2295
  assert(node->parent);
2296
+ // Step 14.4.
2228
2297
  if (node == formatting_node) {
2229
- // Step 13.4.
2230
2298
  break;
2231
2299
  }
2232
2300
  int formatting_index = gumbo_vector_index_of (
2233
2301
  &state->_active_formatting_elements,
2234
2302
  node
2235
2303
  );
2304
+ // Step 14.5.
2236
2305
  if (j > 3 && formatting_index != -1) {
2237
- // Step 13.5.
2238
2306
  gumbo_debug("Removing formatting element at %d.\n", formatting_index);
2239
2307
  gumbo_vector_remove_at (
2240
2308
  formatting_index,
@@ -2249,11 +2317,11 @@ static bool adoption_agency_algorithm (
2249
2317
  continue;
2250
2318
  }
2251
2319
  if (formatting_index == -1) {
2252
- // Step 13.6.
2320
+ // Step 14.6.
2253
2321
  gumbo_vector_remove_at(node_index, &state->_open_elements);
2254
2322
  continue;
2255
2323
  }
2256
- // Step 13.7.
2324
+ // Step 14.7.
2257
2325
  // "common ancestor as the intended parent" doesn't actually mean insert
2258
2326
  // it into the common ancestor; that happens below.
2259
2327
  node = clone_node(node, GUMBO_INSERTION_ADOPTION_AGENCY_CLONED);
@@ -2261,21 +2329,21 @@ static bool adoption_agency_algorithm (
2261
2329
  state->_active_formatting_elements.data[formatting_index] = node;
2262
2330
  assert(node_index >= 0);
2263
2331
  state->_open_elements.data[node_index] = node;
2264
- // Step 13.8.
2332
+ // Step 14.8.
2265
2333
  if (last_node == furthest_block) {
2266
2334
  bookmark = formatting_index + 1;
2267
2335
  gumbo_debug("Bookmark moved to %d.\n", bookmark);
2268
2336
  assert((unsigned int) bookmark <= state->_active_formatting_elements.length);
2269
2337
  }
2270
- // Step 13.9.
2338
+ // Step 14.9.
2271
2339
  last_node->parse_flags |= GUMBO_INSERTION_ADOPTION_AGENCY_MOVED;
2272
2340
  remove_from_parent(last_node);
2273
2341
  append_node(node, last_node);
2274
- // Step 13.10.
2342
+ // Step 14.10.
2275
2343
  last_node = node;
2276
- } // Step 13.11.
2344
+ } // Step 14.11.
2277
2345
 
2278
- // Step 14.
2346
+ // Step 15.
2279
2347
  gumbo_debug (
2280
2348
  "Removing %s node from parent ",
2281
2349
  gumbo_normalized_tagname(last_node->v.element.tag)
@@ -2292,14 +2360,14 @@ static bool adoption_agency_algorithm (
2292
2360
  );
2293
2361
  insert_node(last_node, location);
2294
2362
 
2295
- // Step 15.
2363
+ // Step 16.
2296
2364
  GumboNode* new_formatting_node = clone_node (
2297
2365
  formatting_node,
2298
2366
  GUMBO_INSERTION_ADOPTION_AGENCY_CLONED
2299
2367
  );
2300
2368
  formatting_node->parse_flags |= GUMBO_INSERTION_IMPLICIT_END_TAG;
2301
2369
 
2302
- // Step 16. Instead of appending nodes one-by-one, we swap the children
2370
+ // Step 17. Instead of appending nodes one-by-one, we swap the children
2303
2371
  // vector of furthest_block with the empty children of new_formatting_node,
2304
2372
  // reducing memory traffic and allocations. We still have to reset their
2305
2373
  // parent pointers, though.
@@ -2313,10 +2381,10 @@ static bool adoption_agency_algorithm (
2313
2381
  child->parent = new_formatting_node;
2314
2382
  }
2315
2383
 
2316
- // Step 17.
2384
+ // Step 18.
2317
2385
  append_node(furthest_block, new_formatting_node);
2318
2386
 
2319
- // Step 18.
2387
+ // Step 19.
2320
2388
  // If the formatting node was before the bookmark, it may shift over all
2321
2389
  // indices after it, so we need to explicitly find the index and possibly
2322
2390
  // adjust the bookmark.
@@ -2344,7 +2412,7 @@ static bool adoption_agency_algorithm (
2344
2412
  &state->_active_formatting_elements
2345
2413
  );
2346
2414
 
2347
- // Step 19.
2415
+ // Step 20.
2348
2416
  gumbo_vector_remove(formatting_node, &state->_open_elements);
2349
2417
  int insert_at = 1 + gumbo_vector_index_of (
2350
2418
  &state->_open_elements,
@@ -2357,26 +2425,7 @@ static bool adoption_agency_algorithm (
2357
2425
  insert_at,
2358
2426
  &state->_open_elements
2359
2427
  );
2360
- } // Step 20.
2361
- return true;
2362
- }
2363
-
2364
- // This is here to clean up memory when the spec says "Ignore current token."
2365
- static void ignore_token(GumboParser* parser) {
2366
- GumboToken* token = parser->_parser_state->_current_token;
2367
- // Ownership of the token's internal buffers are normally transferred to the
2368
- // element, but if no element is emitted (as happens in non-verbatim-mode
2369
- // when a token is ignored), we need to free it here to prevent a memory
2370
- // leak.
2371
- gumbo_token_destroy(token);
2372
- #ifndef NDEBUG
2373
- if (token->type == GUMBO_TOKEN_START_TAG) {
2374
- // Mark this sentinel so the assertion in the main loop knows it's been
2375
- // destroyed.
2376
- token->v.start_tag.attributes = kGumboEmptyVector;
2377
- token->v.start_tag.name = NULL;
2378
- }
2379
- #endif
2428
+ } // Step 21.
2380
2429
  }
2381
2430
 
2382
2431
  // https://html.spec.whatwg.org/multipage/parsing.html#the-end
@@ -2401,125 +2450,139 @@ static void finish_parsing(GumboParser* parser) {
2401
2450
  ; // Pop them all.
2402
2451
  }
2403
2452
 
2404
- static bool handle_initial(GumboParser* parser, GumboToken* token) {
2453
+ static void handle_initial(GumboParser* parser, GumboToken* token) {
2405
2454
  GumboDocument* document = &get_document_node(parser)->v.document;
2406
2455
  if (token->type == GUMBO_TOKEN_WHITESPACE) {
2407
2456
  ignore_token(parser);
2408
- return true;
2409
- } else if (token->type == GUMBO_TOKEN_COMMENT) {
2457
+ return;
2458
+ }
2459
+ if (token->type == GUMBO_TOKEN_COMMENT) {
2410
2460
  append_comment_node(parser, get_document_node(parser), token);
2411
- return true;
2412
- } else if (token->type == GUMBO_TOKEN_DOCTYPE) {
2461
+ return;
2462
+ }
2463
+ if (token->type == GUMBO_TOKEN_DOCTYPE) {
2413
2464
  document->has_doctype = true;
2414
2465
  document->name = token->v.doc_type.name;
2415
2466
  document->public_identifier = token->v.doc_type.public_identifier;
2416
2467
  document->system_identifier = token->v.doc_type.system_identifier;
2417
2468
  document->doc_type_quirks_mode = compute_quirks_mode(&token->v.doc_type);
2418
2469
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_BEFORE_HTML);
2419
- return maybe_add_doctype_error(parser, token);
2470
+ maybe_add_doctype_error(parser, token);
2471
+ return;
2420
2472
  }
2421
2473
  parser_add_parse_error(parser, token);
2422
2474
  document->doc_type_quirks_mode = GUMBO_DOCTYPE_QUIRKS;
2423
2475
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_BEFORE_HTML);
2424
2476
  parser->_parser_state->_reprocess_current_token = true;
2425
- return true;
2426
2477
  }
2427
2478
 
2428
2479
  // https://html.spec.whatwg.org/multipage/parsing.html#the-before-html-insertion-mode
2429
- static bool handle_before_html(GumboParser* parser, GumboToken* token) {
2480
+ static void handle_before_html(GumboParser* parser, GumboToken* token) {
2430
2481
  if (token->type == GUMBO_TOKEN_DOCTYPE) {
2431
2482
  parser_add_parse_error(parser, token);
2432
2483
  ignore_token(parser);
2433
- return false;
2434
- } else if (token->type == GUMBO_TOKEN_COMMENT) {
2484
+ return;
2485
+ }
2486
+ if (token->type == GUMBO_TOKEN_COMMENT) {
2435
2487
  append_comment_node(parser, get_document_node(parser), token);
2436
- return true;
2437
- } else if (token->type == GUMBO_TOKEN_WHITESPACE) {
2488
+ return;
2489
+ }
2490
+ if (token->type == GUMBO_TOKEN_WHITESPACE) {
2438
2491
  ignore_token(parser);
2439
- return true;
2440
- } else if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
2492
+ return;
2493
+ }
2494
+ if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
2441
2495
  GumboNode* html_node = insert_element_from_token(parser, token);
2442
2496
  parser->_output->root = html_node;
2443
2497
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_BEFORE_HEAD);
2444
- return true;
2445
- } else if (
2498
+ return;
2499
+ }
2500
+ if (
2446
2501
  token->type == GUMBO_TOKEN_END_TAG
2447
2502
  && !tag_in(token, false, &(const TagSet){TAG(HEAD), TAG(BODY), TAG(HTML), TAG(BR)})
2448
2503
  ) {
2449
2504
  parser_add_parse_error(parser, token);
2450
2505
  ignore_token(parser);
2451
- return false;
2452
- } else {
2453
- GumboNode* html_node = insert_element_of_tag_type (
2454
- parser,
2455
- GUMBO_TAG_HTML,
2456
- GUMBO_INSERTION_IMPLIED
2457
- );
2458
- assert(html_node);
2459
- parser->_output->root = html_node;
2460
- set_insertion_mode(parser, GUMBO_INSERTION_MODE_BEFORE_HEAD);
2461
- parser->_parser_state->_reprocess_current_token = true;
2462
- return true;
2506
+ return;
2463
2507
  }
2508
+ GumboNode* html_node = insert_element_of_tag_type (
2509
+ parser,
2510
+ GUMBO_TAG_HTML,
2511
+ GUMBO_INSERTION_IMPLIED
2512
+ );
2513
+ assert(html_node);
2514
+ parser->_output->root = html_node;
2515
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_BEFORE_HEAD);
2516
+ parser->_parser_state->_reprocess_current_token = true;
2464
2517
  }
2465
2518
 
2519
+ // Forward declarations because of mutual dependencies.
2520
+ static void handle_token(GumboParser* parser, GumboToken* token);
2521
+ static void handle_in_body(GumboParser* parser, GumboToken* token);
2522
+ static void handle_in_template(GumboParser* parser, GumboToken* token);
2523
+
2466
2524
  // https://html.spec.whatwg.org/multipage/parsing.html#the-before-head-insertion-mode
2467
- static bool handle_before_head(GumboParser* parser, GumboToken* token) {
2468
- if (token->type == GUMBO_TOKEN_DOCTYPE) {
2469
- parser_add_parse_error(parser, token);
2525
+ static void handle_before_head(GumboParser* parser, GumboToken* token) {
2526
+ if (token->type == GUMBO_TOKEN_WHITESPACE) {
2470
2527
  ignore_token(parser);
2471
- return false;
2472
- } else if (token->type == GUMBO_TOKEN_COMMENT) {
2528
+ return;
2529
+ }
2530
+ if (token->type == GUMBO_TOKEN_COMMENT) {
2473
2531
  append_comment_node(parser, get_current_node(parser), token);
2474
- return true;
2475
- } else if (token->type == GUMBO_TOKEN_WHITESPACE) {
2532
+ return;
2533
+ }
2534
+ if (token->type == GUMBO_TOKEN_DOCTYPE) {
2535
+ parser_add_parse_error(parser, token);
2476
2536
  ignore_token(parser);
2477
- return true;
2478
- } else if (tag_is(token, kStartTag, GUMBO_TAG_HEAD)) {
2537
+ return;
2538
+ }
2539
+ if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
2540
+ handle_in_body(parser, token);
2541
+ return;
2542
+ }
2543
+ if (tag_is(token, kStartTag, GUMBO_TAG_HEAD)) {
2479
2544
  GumboNode* node = insert_element_from_token(parser, token);
2480
- set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_HEAD);
2481
2545
  parser->_parser_state->_head_element = node;
2482
- return true;
2483
- } else if (
2546
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_HEAD);
2547
+ return;
2548
+ }
2549
+ if (
2484
2550
  token->type == GUMBO_TOKEN_END_TAG
2485
- && !tag_in(token, false, &(const TagSet){TAG(HEAD), TAG(BODY), TAG(HTML), TAG(BR)})
2551
+ && !tag_in(token, kEndTag, &(const TagSet){TAG(HEAD), TAG(BODY), TAG(HTML), TAG(BR)})
2486
2552
  ) {
2487
2553
  parser_add_parse_error(parser, token);
2488
2554
  ignore_token(parser);
2489
- return false;
2490
- } else {
2491
- GumboNode* node = insert_element_of_tag_type (
2492
- parser,
2493
- GUMBO_TAG_HEAD,
2494
- GUMBO_INSERTION_IMPLIED
2495
- );
2496
- set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_HEAD);
2497
- parser->_parser_state->_head_element = node;
2498
- parser->_parser_state->_reprocess_current_token = true;
2499
- return true;
2555
+ return;
2500
2556
  }
2557
+ GumboNode* node = insert_element_of_tag_type (
2558
+ parser,
2559
+ GUMBO_TAG_HEAD,
2560
+ GUMBO_INSERTION_IMPLIED
2561
+ );
2562
+ parser->_parser_state->_head_element = node;
2563
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_HEAD);
2564
+ parser->_parser_state->_reprocess_current_token = true;
2501
2565
  }
2502
2566
 
2503
- // Forward declarations because of mutual dependencies.
2504
- static bool handle_token(GumboParser* parser, GumboToken* token);
2505
- static bool handle_in_body(GumboParser* parser, GumboToken* token);
2506
- static bool handle_in_template(GumboParser* parser, GumboToken* token);
2507
-
2508
2567
  // https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inhead
2509
- static bool handle_in_head(GumboParser* parser, GumboToken* token) {
2568
+ static void handle_in_head(GumboParser* parser, GumboToken* token) {
2510
2569
  if (token->type == GUMBO_TOKEN_WHITESPACE) {
2511
2570
  insert_text_token(parser, token);
2512
- return true;
2513
- } else if (token->type == GUMBO_TOKEN_DOCTYPE) {
2571
+ return;
2572
+ }
2573
+ if (token->type == GUMBO_TOKEN_COMMENT) {
2574
+ append_comment_node(parser, get_current_node(parser), token);
2575
+ return;
2576
+ }
2577
+ if (token->type == GUMBO_TOKEN_DOCTYPE) {
2514
2578
  parser_add_parse_error(parser, token);
2515
2579
  ignore_token(parser);
2516
- return false;
2517
- } else if (token->type == GUMBO_TOKEN_COMMENT) {
2518
- append_comment_node(parser, get_current_node(parser), token);
2519
- return true;
2520
- } else if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
2580
+ return;
2581
+ }
2582
+ if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
2521
2583
  return handle_in_body(parser, token);
2522
- } else if (
2584
+ }
2585
+ if (
2523
2586
  tag_in(token, kStartTag, &(const TagSet) {
2524
2587
  TAG(BASE), TAG(BASEFONT), TAG(BGSOUND), TAG(LINK)
2525
2588
  })
@@ -2527,8 +2590,9 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
2527
2590
  insert_element_from_token(parser, token);
2528
2591
  pop_current_node(parser);
2529
2592
  acknowledge_self_closing_tag(parser);
2530
- return true;
2531
- } else if (tag_is(token, kStartTag, GUMBO_TAG_META)) {
2593
+ return;
2594
+ }
2595
+ if (tag_is(token, kStartTag, GUMBO_TAG_META)) {
2532
2596
  insert_element_from_token(parser, token);
2533
2597
  pop_current_node(parser);
2534
2598
  acknowledge_self_closing_tag(parser);
@@ -2536,90 +2600,98 @@ static bool handle_in_head(GumboParser* parser, GumboToken* token) {
2536
2600
  // spec doesn't apply. If clients want to handle meta-tag re-encoding, they
2537
2601
  // should specifically look for that string in the document and re-encode it
2538
2602
  // before passing to Gumbo.
2539
- return true;
2540
- } else if (tag_is(token, kStartTag, GUMBO_TAG_TITLE)) {
2603
+ return;
2604
+ }
2605
+ if (tag_is(token, kStartTag, GUMBO_TAG_TITLE)) {
2541
2606
  run_generic_parsing_algorithm(parser, token, GUMBO_LEX_RCDATA);
2542
- return true;
2543
- } else if (
2607
+ return;
2608
+ }
2609
+ if (
2544
2610
  tag_in(token, kStartTag, &(const TagSet){TAG(NOFRAMES), TAG(STYLE)})
2545
2611
  ) {
2546
2612
  run_generic_parsing_algorithm(parser, token, GUMBO_LEX_RAWTEXT);
2547
- return true;
2548
- } else if (tag_is(token, kStartTag, GUMBO_TAG_NOSCRIPT)) {
2613
+ return;
2614
+ }
2615
+ if (tag_is(token, kStartTag, GUMBO_TAG_NOSCRIPT)) {
2549
2616
  insert_element_from_token(parser, token);
2550
2617
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_HEAD_NOSCRIPT);
2551
- return true;
2552
- } else if (tag_is(token, kStartTag, GUMBO_TAG_SCRIPT)) {
2553
- run_generic_parsing_algorithm(parser, token, GUMBO_LEX_SCRIPT);
2554
- return true;
2555
- } else if (tag_is(token, kEndTag, GUMBO_TAG_HEAD)) {
2618
+ return;
2619
+ }
2620
+ if (tag_is(token, kStartTag, GUMBO_TAG_SCRIPT)) {
2621
+ run_generic_parsing_algorithm(parser, token, GUMBO_LEX_SCRIPT_DATA);
2622
+ return;
2623
+ }
2624
+ if (tag_is(token, kEndTag, GUMBO_TAG_HEAD)) {
2556
2625
  GumboNode* head = pop_current_node(parser);
2557
2626
  UNUSED_IF_NDEBUG(head);
2558
2627
  assert(node_html_tag_is(head, GUMBO_TAG_HEAD));
2559
2628
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_HEAD);
2560
- return true;
2561
- } else if (
2629
+ return;
2630
+ }
2631
+ if (
2562
2632
  tag_in(token, kEndTag, &(const TagSet){TAG(BODY), TAG(HTML), TAG(BR)})
2563
2633
  ) {
2564
2634
  pop_current_node(parser);
2565
2635
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_HEAD);
2566
2636
  parser->_parser_state->_reprocess_current_token = true;
2567
- return true;
2568
- } else if (tag_is(token, kStartTag, GUMBO_TAG_TEMPLATE)) {
2637
+ return;
2638
+ }
2639
+ if (tag_is(token, kStartTag, GUMBO_TAG_TEMPLATE)) {
2569
2640
  insert_element_from_token(parser, token);
2570
2641
  add_formatting_element(parser, &kActiveFormattingScopeMarker);
2571
- parser->_parser_state->_frameset_ok = false;
2642
+ set_frameset_not_ok(parser);
2572
2643
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TEMPLATE);
2573
2644
  push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TEMPLATE);
2574
- return true;
2575
- } else if (tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
2645
+ return;
2646
+ }
2647
+ if (tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
2576
2648
  if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2577
2649
  parser_add_parse_error(parser, token);
2578
2650
  ignore_token(parser);
2579
- return false;
2651
+ return;
2580
2652
  }
2581
2653
  generate_all_implied_end_tags_thoroughly(parser);
2582
- bool success = true;
2583
- if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_TEMPLATE)) {
2654
+ if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_TEMPLATE))
2584
2655
  parser_add_parse_error(parser, token);
2585
- success = false;
2586
- }
2587
2656
  while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_TEMPLATE))
2588
2657
  ;
2589
2658
  clear_active_formatting_elements(parser);
2590
2659
  pop_template_insertion_mode(parser);
2591
2660
  reset_insertion_mode_appropriately(parser);
2592
- return success;
2593
- } else if (
2661
+ return;
2662
+ }
2663
+ if (
2594
2664
  tag_is(token, kStartTag, GUMBO_TAG_HEAD)
2595
2665
  || (token->type == GUMBO_TOKEN_END_TAG)
2596
2666
  ) {
2597
2667
  parser_add_parse_error(parser, token);
2598
2668
  ignore_token(parser);
2599
- return false;
2600
- } else {
2601
- pop_current_node(parser);
2602
- set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_HEAD);
2603
- parser->_parser_state->_reprocess_current_token = true;
2604
- return true;
2669
+ return;
2605
2670
  }
2606
- return true;
2671
+ pop_current_node(parser);
2672
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_HEAD);
2673
+ parser->_parser_state->_reprocess_current_token = true;
2674
+ return;
2607
2675
  }
2608
2676
 
2609
2677
  // https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inheadnoscript
2610
- static bool handle_in_head_noscript(GumboParser* parser, GumboToken* token) {
2678
+ static void handle_in_head_noscript(GumboParser* parser, GumboToken* token) {
2611
2679
  if (token->type == GUMBO_TOKEN_DOCTYPE) {
2612
2680
  parser_add_parse_error(parser, token);
2613
- return false;
2614
- } else if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
2615
- return handle_in_body(parser, token);
2616
- } else if (tag_is(token, kEndTag, GUMBO_TAG_NOSCRIPT)) {
2681
+ return;
2682
+ }
2683
+ if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
2684
+ handle_in_body(parser, token);
2685
+ return;
2686
+ }
2687
+ if (tag_is(token, kEndTag, GUMBO_TAG_NOSCRIPT)) {
2617
2688
  const GumboNode* node = pop_current_node(parser);
2618
2689
  assert(node_html_tag_is(node, GUMBO_TAG_NOSCRIPT));
2619
2690
  UNUSED_IF_NDEBUG(node);
2620
2691
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_HEAD);
2621
- return true;
2622
- } else if (
2692
+ return;
2693
+ }
2694
+ if (
2623
2695
  token->type == GUMBO_TOKEN_WHITESPACE
2624
2696
  || token->type == GUMBO_TOKEN_COMMENT
2625
2697
  || tag_in (token, kStartTag, &(const TagSet) {
@@ -2627,8 +2699,10 @@ static bool handle_in_head_noscript(GumboParser* parser, GumboToken* token) {
2627
2699
  TAG(META), TAG(NOFRAMES), TAG(STYLE)
2628
2700
  })
2629
2701
  ) {
2630
- return handle_in_head(parser, token);
2631
- } else if (
2702
+ handle_in_head(parser, token);
2703
+ return;
2704
+ }
2705
+ if (
2632
2706
  tag_in(token, kStartTag, &(const TagSet){TAG(HEAD), TAG(NOSCRIPT)})
2633
2707
  || (
2634
2708
  token->type == GUMBO_TOKEN_END_TAG
@@ -2637,43 +2711,48 @@ static bool handle_in_head_noscript(GumboParser* parser, GumboToken* token) {
2637
2711
  ) {
2638
2712
  parser_add_parse_error(parser, token);
2639
2713
  ignore_token(parser);
2640
- return false;
2641
- } else {
2642
- parser_add_parse_error(parser, token);
2643
- const GumboNode* node = pop_current_node(parser);
2644
- assert(node_html_tag_is(node, GUMBO_TAG_NOSCRIPT));
2645
- UNUSED_IF_NDEBUG(node);
2646
- set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_HEAD);
2647
- parser->_parser_state->_reprocess_current_token = true;
2648
- return false;
2714
+ return;
2649
2715
  }
2716
+ parser_add_parse_error(parser, token);
2717
+ const GumboNode* node = pop_current_node(parser);
2718
+ assert(node_html_tag_is(node, GUMBO_TAG_NOSCRIPT));
2719
+ UNUSED_IF_NDEBUG(node);
2720
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_HEAD);
2721
+ parser->_parser_state->_reprocess_current_token = true;
2650
2722
  }
2651
2723
 
2652
2724
  // https://html.spec.whatwg.org/multipage/parsing.html#the-after-head-insertion-mode
2653
- static bool handle_after_head(GumboParser* parser, GumboToken* token) {
2725
+ static void handle_after_head(GumboParser* parser, GumboToken* token) {
2654
2726
  GumboParserState* state = parser->_parser_state;
2655
2727
  if (token->type == GUMBO_TOKEN_WHITESPACE) {
2656
2728
  insert_text_token(parser, token);
2657
- return true;
2658
- } else if (token->type == GUMBO_TOKEN_DOCTYPE) {
2729
+ return;
2730
+ }
2731
+ if (token->type == GUMBO_TOKEN_COMMENT) {
2732
+ append_comment_node(parser, get_current_node(parser), token);
2733
+ return;
2734
+ }
2735
+ if (token->type == GUMBO_TOKEN_DOCTYPE) {
2659
2736
  parser_add_parse_error(parser, token);
2660
2737
  ignore_token(parser);
2661
- return false;
2662
- } else if (token->type == GUMBO_TOKEN_COMMENT) {
2663
- append_comment_node(parser, get_current_node(parser), token);
2664
- return true;
2665
- } else if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
2666
- return handle_in_body(parser, token);
2667
- } else if (tag_is(token, kStartTag, GUMBO_TAG_BODY)) {
2738
+ return;
2739
+ }
2740
+ if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
2741
+ handle_in_body(parser, token);
2742
+ return;
2743
+ }
2744
+ if (tag_is(token, kStartTag, GUMBO_TAG_BODY)) {
2668
2745
  insert_element_from_token(parser, token);
2669
- state->_frameset_ok = false;
2746
+ set_frameset_not_ok(parser);
2670
2747
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_BODY);
2671
- return true;
2672
- } else if (tag_is(token, kStartTag, GUMBO_TAG_FRAMESET)) {
2748
+ return;
2749
+ }
2750
+ if (tag_is(token, kStartTag, GUMBO_TAG_FRAMESET)) {
2673
2751
  insert_element_from_token(parser, token);
2674
2752
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_FRAMESET);
2675
- return true;
2676
- } else if (
2753
+ return;
2754
+ }
2755
+ if (
2677
2756
  tag_in(token, kStartTag, &(const TagSet) {
2678
2757
  TAG(BASE), TAG(BASEFONT), TAG(BGSOUND), TAG(LINK), TAG(META),
2679
2758
  TAG(NOFRAMES), TAG(SCRIPT), TAG(STYLE), TAG(TEMPLATE), TAG(TITLE)
@@ -2685,12 +2764,15 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
2685
2764
  // pending character tokens that should be attached to the root.
2686
2765
  maybe_flush_text_node_buffer(parser);
2687
2766
  gumbo_vector_add(state->_head_element, &state->_open_elements);
2688
- bool result = handle_in_head(parser, token);
2767
+ handle_in_head(parser, token);
2689
2768
  gumbo_vector_remove(state->_head_element, &state->_open_elements);
2690
- return result;
2691
- } else if (tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
2692
- return handle_in_head(parser, token);
2693
- } else if (
2769
+ return;
2770
+ }
2771
+ if (tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)) {
2772
+ handle_in_head(parser, token);
2773
+ return;
2774
+ }
2775
+ if (
2694
2776
  tag_is(token, kStartTag, GUMBO_TAG_HEAD)
2695
2777
  || (
2696
2778
  token->type == GUMBO_TOKEN_END_TAG
@@ -2699,53 +2781,57 @@ static bool handle_after_head(GumboParser* parser, GumboToken* token) {
2699
2781
  ) {
2700
2782
  parser_add_parse_error(parser, token);
2701
2783
  ignore_token(parser);
2702
- return false;
2703
- } else {
2704
- insert_element_of_tag_type(parser, GUMBO_TAG_BODY, GUMBO_INSERTION_IMPLIED);
2705
- set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_BODY);
2706
- state->_reprocess_current_token = true;
2707
- return true;
2784
+ return;
2708
2785
  }
2786
+ insert_element_of_tag_type(parser, GUMBO_TAG_BODY, GUMBO_INSERTION_IMPLIED);
2787
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_BODY);
2788
+ state->_reprocess_current_token = true;
2709
2789
  }
2710
2790
 
2711
2791
  // https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inbody
2712
- static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2792
+ static void handle_in_body(GumboParser* parser, GumboToken* token) {
2713
2793
  GumboParserState* state = parser->_parser_state;
2714
2794
  assert(state->_open_elements.length > 0);
2715
2795
  if (token->type == GUMBO_TOKEN_NULL) {
2716
2796
  parser_add_parse_error(parser, token);
2717
2797
  ignore_token(parser);
2718
- return false;
2719
- } else if (token->type == GUMBO_TOKEN_WHITESPACE) {
2798
+ return;
2799
+ }
2800
+ if (token->type == GUMBO_TOKEN_WHITESPACE) {
2720
2801
  reconstruct_active_formatting_elements(parser);
2721
2802
  insert_text_token(parser, token);
2722
- return true;
2723
- } else if (
2803
+ return;
2804
+ }
2805
+ if (
2724
2806
  token->type == GUMBO_TOKEN_CHARACTER
2725
2807
  || token->type == GUMBO_TOKEN_CDATA
2726
2808
  ) {
2727
2809
  reconstruct_active_formatting_elements(parser);
2728
2810
  insert_text_token(parser, token);
2729
2811
  set_frameset_not_ok(parser);
2730
- return true;
2731
- } else if (token->type == GUMBO_TOKEN_COMMENT) {
2812
+ return;
2813
+ }
2814
+ if (token->type == GUMBO_TOKEN_COMMENT) {
2732
2815
  append_comment_node(parser, get_current_node(parser), token);
2733
- return true;
2734
- } else if (token->type == GUMBO_TOKEN_DOCTYPE) {
2816
+ return;
2817
+ }
2818
+ if (token->type == GUMBO_TOKEN_DOCTYPE) {
2735
2819
  parser_add_parse_error(parser, token);
2736
2820
  ignore_token(parser);
2737
- return false;
2738
- } else if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
2821
+ return;
2822
+ }
2823
+ if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
2739
2824
  parser_add_parse_error(parser, token);
2740
2825
  if (has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2741
2826
  ignore_token(parser);
2742
- return false;
2827
+ return;
2743
2828
  }
2744
2829
  assert(parser->_output->root != NULL);
2745
2830
  assert(parser->_output->root->type == GUMBO_NODE_ELEMENT);
2746
2831
  merge_attributes(token, parser->_output->root);
2747
- return false;
2748
- } else if (
2832
+ return;
2833
+ }
2834
+ if (
2749
2835
  tag_in(token, kStartTag, &(const TagSet) {
2750
2836
  TAG(BASE), TAG(BASEFONT), TAG(BGSOUND), TAG(LINK),
2751
2837
  TAG(META), TAG(NOFRAMES), TAG(SCRIPT), TAG(STYLE), TAG(TEMPLATE),
@@ -2753,8 +2839,10 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2753
2839
  })
2754
2840
  || tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)
2755
2841
  ) {
2756
- return handle_in_head(parser, token);
2757
- } else if (tag_is(token, kStartTag, GUMBO_TAG_BODY)) {
2842
+ handle_in_head(parser, token);
2843
+ return;
2844
+ }
2845
+ if (tag_is(token, kStartTag, GUMBO_TAG_BODY)) {
2758
2846
  parser_add_parse_error(parser, token);
2759
2847
  if (
2760
2848
  state->_open_elements.length < 2
@@ -2762,12 +2850,13 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2762
2850
  || has_open_element(parser, GUMBO_TAG_TEMPLATE)
2763
2851
  ) {
2764
2852
  ignore_token(parser);
2765
- return false;
2853
+ } else {
2854
+ set_frameset_not_ok(parser);
2855
+ merge_attributes(token, state->_open_elements.data[1]);
2766
2856
  }
2767
- state->_frameset_ok = false;
2768
- merge_attributes(token, state->_open_elements.data[1]);
2769
- return false;
2770
- } else if (tag_is(token, kStartTag, GUMBO_TAG_FRAMESET)) {
2857
+ return;
2858
+ }
2859
+ if (tag_is(token, kStartTag, GUMBO_TAG_FRAMESET)) {
2771
2860
  parser_add_parse_error(parser, token);
2772
2861
  if (
2773
2862
  state->_open_elements.length < 2
@@ -2775,7 +2864,7 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2775
2864
  || !state->_frameset_ok
2776
2865
  ) {
2777
2866
  ignore_token(parser);
2778
- return false;
2867
+ return;
2779
2868
  }
2780
2869
  // Save the body node for later removal.
2781
2870
  GumboNode* body_node = state->_open_elements.data[1];
@@ -2807,80 +2896,74 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2807
2896
  // Insert the <frameset>, and switch the insertion mode.
2808
2897
  insert_element_from_token(parser, token);
2809
2898
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_FRAMESET);
2810
- return true;
2811
- } else if (token->type == GUMBO_TOKEN_EOF) {
2812
- for (unsigned int i = 0; i < state->_open_elements.length; ++i) {
2813
- if (
2814
- !node_tag_in_set(state->_open_elements.data[i], &(const TagSet) {
2815
- TAG(DD), TAG(DT), TAG(LI), TAG(P), TAG(TBODY), TAG(TD), TAG(TFOOT),
2816
- TAG(TH), TAG(THEAD), TAG(TR), TAG(BODY), TAG(HTML)
2817
- })
2818
- ) {
2819
- parser_add_parse_error(parser, token);
2820
- }
2821
- }
2899
+ return;
2900
+ }
2901
+ if (token->type == GUMBO_TOKEN_EOF) {
2822
2902
  if (get_current_template_insertion_mode(parser) !=
2823
2903
  GUMBO_INSERTION_MODE_INITIAL) {
2824
- return handle_in_template(parser, token);
2904
+ handle_in_template(parser, token);
2905
+ return;
2825
2906
  }
2826
- return true;
2827
- } else if (tag_in(token, kEndTag, &(const TagSet){TAG(BODY), TAG(HTML)})) {
2907
+ if (stack_contains_nonclosable_element(parser))
2908
+ parser_add_parse_error(parser, token);
2909
+ return;
2910
+ }
2911
+ if (tag_is(token, kEndTag, GUMBO_TAG_BODY)) {
2828
2912
  if (!has_an_element_in_scope(parser, GUMBO_TAG_BODY)) {
2829
2913
  parser_add_parse_error(parser, token);
2830
2914
  ignore_token(parser);
2831
- return false;
2832
- }
2833
- bool success = true;
2834
- for (unsigned int i = 0; i < state->_open_elements.length; ++i) {
2835
- if (
2836
- !node_tag_in_set(state->_open_elements.data[i], &(const TagSet) {
2837
- TAG(DD), TAG(DT), TAG(LI), TAG(OPTGROUP), TAG(OPTION), TAG(P),
2838
- TAG(RB), TAG(RP), TAG(RT), TAG(RTC), TAG(TBODY), TAG(TD),
2839
- TAG(TFOOT), TAG(TH), TAG(THEAD), TAG(TR), TAG(BODY), TAG(HTML)
2840
- })
2841
- ) {
2842
- parser_add_parse_error(parser, token);
2843
- success = false;
2844
- break;
2845
- }
2915
+ return;
2846
2916
  }
2917
+ if (stack_contains_nonclosable_element(parser))
2918
+ parser_add_parse_error(parser, token);
2919
+ GumboNode* body = state->_open_elements.data[1];
2920
+ assert(node_html_tag_is(body, GUMBO_TAG_BODY));
2921
+ record_end_of_element(state->_current_token, &body->v.element);
2847
2922
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_BODY);
2848
- if (tag_is(token, kEndTag, GUMBO_TAG_HTML)) {
2849
- parser->_parser_state->_reprocess_current_token = true;
2850
- } else {
2851
- GumboNode* body = state->_open_elements.data[1];
2852
- assert(node_html_tag_is(body, GUMBO_TAG_BODY));
2853
- record_end_of_element(state->_current_token, &body->v.element);
2923
+ return;
2924
+ }
2925
+ if (tag_is(token, kEndTag, GUMBO_TAG_HTML)) {
2926
+ if (!has_an_element_in_scope(parser, GUMBO_TAG_BODY)) {
2927
+ parser_add_parse_error(parser, token);
2928
+ ignore_token(parser);
2929
+ return;
2854
2930
  }
2855
- return success;
2856
- } else if (
2931
+ if (stack_contains_nonclosable_element(parser))
2932
+ parser_add_parse_error(parser, token);
2933
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_BODY);
2934
+ parser->_parser_state->_reprocess_current_token = true;
2935
+ return;
2936
+ }
2937
+ if (
2857
2938
  tag_in(token, kStartTag, &(const TagSet) {
2858
2939
  TAG(ADDRESS), TAG(ARTICLE), TAG(ASIDE), TAG(BLOCKQUOTE), TAG(CENTER),
2859
2940
  TAG(DETAILS), TAG(DIALOG), TAG(DIR), TAG(DIV), TAG(DL), TAG(FIELDSET),
2860
2941
  TAG(FIGCAPTION), TAG(FIGURE), TAG(FOOTER), TAG(HEADER), TAG(HGROUP),
2861
- TAG(MENU), TAG(MAIN), TAG(NAV), TAG(OL), TAG(P), TAG(SECTION),
2942
+ TAG(MAIN), TAG(MENU), TAG(NAV), TAG(OL), TAG(P), TAG(SECTION),
2862
2943
  TAG(SUMMARY), TAG(UL)
2863
2944
  })
2864
2945
  ) {
2865
- bool result = maybe_implicitly_close_p_tag(parser, token);
2946
+ maybe_implicitly_close_p_tag(parser, token);
2866
2947
  insert_element_from_token(parser, token);
2867
- return result;
2868
- } else if (tag_in(token, kStartTag, &heading_tags)) {
2869
- bool result = maybe_implicitly_close_p_tag(parser, token);
2948
+ return;
2949
+ }
2950
+ if (tag_in(token, kStartTag, &heading_tags)) {
2951
+ maybe_implicitly_close_p_tag(parser, token);
2870
2952
  if (node_tag_in_set(get_current_node(parser), &heading_tags)) {
2871
2953
  parser_add_parse_error(parser, token);
2872
2954
  pop_current_node(parser);
2873
- result = false;
2874
2955
  }
2875
2956
  insert_element_from_token(parser, token);
2876
- return result;
2877
- } else if (tag_in(token, kStartTag, &(const TagSet){TAG(PRE), TAG(LISTING)})) {
2878
- bool result = maybe_implicitly_close_p_tag(parser, token);
2957
+ return;
2958
+ }
2959
+ if (tag_in(token, kStartTag, &(const TagSet){TAG(PRE), TAG(LISTING)})) {
2960
+ maybe_implicitly_close_p_tag(parser, token);
2879
2961
  insert_element_from_token(parser, token);
2880
2962
  state->_ignore_next_linefeed = true;
2881
- state->_frameset_ok = false;
2882
- return result;
2883
- } else if (tag_is(token, kStartTag, GUMBO_TAG_FORM)) {
2963
+ set_frameset_not_ok(parser);
2964
+ return;
2965
+ }
2966
+ if (tag_is(token, kStartTag, GUMBO_TAG_FORM)) {
2884
2967
  if (
2885
2968
  state->_form_element != NULL
2886
2969
  && !has_open_element(parser, GUMBO_TAG_TEMPLATE)
@@ -2888,46 +2971,48 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2888
2971
  gumbo_debug("Ignoring nested form.\n");
2889
2972
  parser_add_parse_error(parser, token);
2890
2973
  ignore_token(parser);
2891
- return false;
2974
+ return;
2892
2975
  }
2893
- bool result = maybe_implicitly_close_p_tag(parser, token);
2976
+ maybe_implicitly_close_p_tag(parser, token);
2894
2977
  GumboNode* form_element = insert_element_from_token(parser, token);
2895
2978
  if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2896
2979
  state->_form_element = form_element;
2897
2980
  }
2898
- return result;
2899
- } else if (tag_is(token, kStartTag, GUMBO_TAG_LI)) {
2981
+ return;
2982
+ }
2983
+ if (tag_is(token, kStartTag, GUMBO_TAG_LI)) {
2900
2984
  maybe_implicitly_close_list_tag(parser, token, true);
2901
- bool result = maybe_implicitly_close_p_tag(parser, token);
2985
+ maybe_implicitly_close_p_tag(parser, token);
2902
2986
  insert_element_from_token(parser, token);
2903
- return result;
2904
- } else if (tag_in(token, kStartTag, &dd_dt_tags)) {
2987
+ return;
2988
+ }
2989
+ if (tag_in(token, kStartTag, &dd_dt_tags)) {
2905
2990
  maybe_implicitly_close_list_tag(parser, token, false);
2906
- bool result = maybe_implicitly_close_p_tag(parser, token);
2991
+ maybe_implicitly_close_p_tag(parser, token);
2907
2992
  insert_element_from_token(parser, token);
2908
- return result;
2909
- } else if (tag_is(token, kStartTag, GUMBO_TAG_PLAINTEXT)) {
2910
- bool result = maybe_implicitly_close_p_tag(parser, token);
2993
+ return;
2994
+ }
2995
+ if (tag_is(token, kStartTag, GUMBO_TAG_PLAINTEXT)) {
2996
+ maybe_implicitly_close_p_tag(parser, token);
2911
2997
  insert_element_from_token(parser, token);
2912
2998
  gumbo_tokenizer_set_state(parser, GUMBO_LEX_PLAINTEXT);
2913
- return result;
2914
- } else if (tag_is(token, kStartTag, GUMBO_TAG_BUTTON)) {
2999
+ return;
3000
+ }
3001
+ if (tag_is(token, kStartTag, GUMBO_TAG_BUTTON)) {
2915
3002
  if (has_an_element_in_scope(parser, GUMBO_TAG_BUTTON)) {
2916
3003
  parser_add_parse_error(parser, token);
2917
- implicitly_close_tags (
2918
- parser,
2919
- token,
2920
- GUMBO_NAMESPACE_HTML,
2921
- GUMBO_TAG_BUTTON
2922
- );
2923
- state->_reprocess_current_token = true;
2924
- return false;
3004
+ // We don't want to use implicitly_close_tags here because it may add an
3005
+ // error and we've already added the only error the standard specifies.
3006
+ generate_implied_end_tags(parser, GUMBO_TAG_LAST, NULL);
3007
+ while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_BUTTON))
3008
+ ;
2925
3009
  }
2926
3010
  reconstruct_active_formatting_elements(parser);
2927
3011
  insert_element_from_token(parser, token);
2928
- state->_frameset_ok = false;
2929
- return true;
2930
- } else if (
3012
+ set_frameset_not_ok(parser);
3013
+ return;
3014
+ }
3015
+ if (
2931
3016
  tag_in(token, kEndTag, &(const TagSet) {
2932
3017
  TAG(ADDRESS), TAG(ARTICLE), TAG(ASIDE), TAG(BLOCKQUOTE), TAG(BUTTON),
2933
3018
  TAG(CENTER), TAG(DETAILS), TAG(DIALOG), TAG(DIR), TAG(DIV), TAG(DL),
@@ -2940,33 +3025,29 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2940
3025
  if (!has_an_element_in_scope(parser, tag)) {
2941
3026
  parser_add_parse_error(parser, token);
2942
3027
  ignore_token(parser);
2943
- return false;
3028
+ return;
2944
3029
  }
2945
- implicitly_close_tags (
3030
+ return implicitly_close_tags (
2946
3031
  parser,
2947
3032
  token,
2948
3033
  GUMBO_NAMESPACE_HTML,
2949
3034
  token->v.end_tag.tag
2950
3035
  );
2951
- return true;
2952
- } else if (tag_is(token, kEndTag, GUMBO_TAG_FORM)) {
3036
+ }
3037
+ if (tag_is(token, kEndTag, GUMBO_TAG_FORM)) {
2953
3038
  if (has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
2954
3039
  if (!has_an_element_in_scope(parser, GUMBO_TAG_FORM)) {
2955
3040
  parser_add_parse_error(parser, token);
2956
3041
  ignore_token(parser);
2957
- return false;
3042
+ return;
2958
3043
  }
2959
- bool success = true;
2960
- generate_implied_end_tags(parser, GUMBO_TAG_LAST);
2961
- if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_FORM)) {
3044
+ generate_implied_end_tags(parser, GUMBO_TAG_LAST, NULL);
3045
+ if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_FORM))
2962
3046
  parser_add_parse_error(parser, token);
2963
- return false;
2964
- }
2965
3047
  while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_FORM))
2966
3048
  ;
2967
- return success;
3049
+ return;
2968
3050
  } else {
2969
- bool result = true;
2970
3051
  GumboNode* node = state->_form_element;
2971
3052
  assert(!node || node->type == GUMBO_NODE_ELEMENT);
2972
3053
  state->_form_element = NULL;
@@ -2974,25 +3055,24 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
2974
3055
  gumbo_debug("Closing an unopened form.\n");
2975
3056
  parser_add_parse_error(parser, token);
2976
3057
  ignore_token(parser);
2977
- return false;
3058
+ return;
2978
3059
  }
2979
3060
  // This differs from implicitly_close_tags because we remove *only* the
2980
3061
  // <form> element; other nodes are left in scope.
2981
- generate_implied_end_tags(parser, GUMBO_TAG_LAST);
2982
- if (get_current_node(parser) != node) {
3062
+ generate_implied_end_tags(parser, GUMBO_TAG_LAST, NULL);
3063
+ if (get_current_node(parser) != node)
2983
3064
  parser_add_parse_error(parser, token);
2984
- result = false;
2985
- } else {
3065
+ else
2986
3066
  record_end_of_element(token, &node->v.element);
2987
- }
2988
3067
 
2989
3068
  GumboVector* open_elements = &state->_open_elements;
2990
3069
  int index = gumbo_vector_index_of(open_elements, node);
2991
3070
  assert(index >= 0);
2992
3071
  gumbo_vector_remove_at(index, open_elements);
2993
- return result;
3072
+ return;
2994
3073
  }
2995
- } else if (tag_is(token, kEndTag, GUMBO_TAG_P)) {
3074
+ }
3075
+ if (tag_is(token, kEndTag, GUMBO_TAG_P)) {
2996
3076
  if (!has_an_element_in_button_scope(parser, GUMBO_TAG_P)) {
2997
3077
  parser_add_parse_error(parser, token);
2998
3078
  // reconstruct_active_formatting_elements(parser);
@@ -3001,42 +3081,45 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
3001
3081
  GUMBO_TAG_P,
3002
3082
  GUMBO_INSERTION_CONVERTED_FROM_END_TAG
3003
3083
  );
3004
- state->_reprocess_current_token = true;
3005
- return false;
3006
3084
  }
3007
- return implicitly_close_tags (
3085
+ implicitly_close_tags (
3008
3086
  parser,
3009
3087
  token,
3010
3088
  GUMBO_NAMESPACE_HTML,
3011
3089
  GUMBO_TAG_P
3012
3090
  );
3013
- } else if (tag_is(token, kEndTag, GUMBO_TAG_LI)) {
3091
+ return;
3092
+ }
3093
+ if (tag_is(token, kEndTag, GUMBO_TAG_LI)) {
3014
3094
  if (!has_an_element_in_list_scope(parser, GUMBO_TAG_LI)) {
3015
3095
  parser_add_parse_error(parser, token);
3016
3096
  ignore_token(parser);
3017
- return false;
3097
+ return;
3018
3098
  }
3019
- return implicitly_close_tags (
3099
+ implicitly_close_tags (
3020
3100
  parser,
3021
3101
  token,
3022
3102
  GUMBO_NAMESPACE_HTML,
3023
3103
  GUMBO_TAG_LI
3024
3104
  );
3025
- } else if (tag_in(token, kEndTag, &dd_dt_tags)) {
3026
- assert(token->type == GUMBO_TOKEN_END_TAG);
3105
+ return;
3106
+ }
3107
+ if (tag_in(token, kEndTag, &dd_dt_tags)) {
3027
3108
  GumboTag token_tag = token->v.end_tag.tag;
3028
3109
  if (!has_an_element_in_scope(parser, token_tag)) {
3029
3110
  parser_add_parse_error(parser, token);
3030
3111
  ignore_token(parser);
3031
- return false;
3112
+ return;
3032
3113
  }
3033
- return implicitly_close_tags (
3114
+ implicitly_close_tags (
3034
3115
  parser,
3035
3116
  token,
3036
3117
  GUMBO_NAMESPACE_HTML,
3037
3118
  token_tag
3038
3119
  );
3039
- } else if (tag_in(token, kEndTag, &heading_tags)) {
3120
+ return;
3121
+ }
3122
+ if (tag_in(token, kEndTag, &heading_tags)) {
3040
3123
  if (
3041
3124
  !has_an_element_in_scope_with_tagname(parser, 6, (GumboTag[]) {
3042
3125
  GUMBO_TAG_H1, GUMBO_TAG_H2, GUMBO_TAG_H3, GUMBO_TAG_H4,
@@ -3046,31 +3129,29 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
3046
3129
  // No heading open; ignore the token entirely.
3047
3130
  parser_add_parse_error(parser, token);
3048
3131
  ignore_token(parser);
3049
- return false;
3050
- } else {
3051
- generate_implied_end_tags(parser, GUMBO_TAG_LAST);
3052
- const GumboNode* current_node = get_current_node(parser);
3053
- bool success = node_html_tag_is(current_node, token->v.end_tag.tag);
3054
- if (!success) {
3055
- // There're children of the heading currently open; close them below and
3056
- // record a parse error.
3057
- // TODO(jdtang): Add a way to distinguish this error case from the one
3058
- // above.
3059
- parser_add_parse_error(parser, token);
3060
- }
3061
- do {
3062
- current_node = pop_current_node(parser);
3063
- } while (!node_tag_in_set(current_node, &heading_tags));
3064
- return success;
3132
+ return;
3133
+ }
3134
+ generate_implied_end_tags(parser, GUMBO_TAG_LAST, NULL);
3135
+ const GumboNode* current_node = get_current_node(parser);
3136
+ if (!node_html_tag_is(current_node, token->v.end_tag.tag)) {
3137
+ // There're children of the heading currently open; close them below and
3138
+ // record a parse error.
3139
+ // TODO(jdtang): Add a way to distinguish this error case from the one
3140
+ // above.
3141
+ parser_add_parse_error(parser, token);
3065
3142
  }
3066
- } else if (tag_is(token, kStartTag, GUMBO_TAG_A)) {
3067
- bool success = true;
3143
+ do {
3144
+ current_node = pop_current_node(parser);
3145
+ } while (!node_tag_in_set(current_node, &heading_tags));
3146
+ return;
3147
+ }
3148
+ if (tag_is(token, kStartTag, GUMBO_TAG_A)) {
3068
3149
  int last_a;
3069
3150
  int has_matching_a = find_last_anchor_index(parser, &last_a);
3070
3151
  if (has_matching_a) {
3071
3152
  assert(has_matching_a == 1);
3072
3153
  parser_add_parse_error(parser, token);
3073
- adoption_agency_algorithm(parser, token, GUMBO_TAG_A);
3154
+ (void)adoption_agency_algorithm(parser, token);
3074
3155
  // The adoption agency algorithm usually removes all instances of <a>
3075
3156
  // from the list of active formatting elements, but in case it doesn't,
3076
3157
  // we're supposed to do this. (The conditions where it might not are
@@ -3082,12 +3163,12 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
3082
3163
  );
3083
3164
  gumbo_vector_remove(last_element, &state->_open_elements);
3084
3165
  }
3085
- success = false;
3086
3166
  }
3087
3167
  reconstruct_active_formatting_elements(parser);
3088
3168
  add_formatting_element(parser, insert_element_from_token(parser, token));
3089
- return success;
3090
- } else if (
3169
+ return;
3170
+ }
3171
+ if (
3091
3172
  tag_in(token, kStartTag, &(const TagSet) {
3092
3173
  TAG(B), TAG(BIG), TAG(CODE), TAG(EM), TAG(FONT), TAG(I), TAG(S),
3093
3174
  TAG(SMALL), TAG(STRIKE), TAG(STRONG), TAG(TT), TAG(U)
@@ -3095,48 +3176,52 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
3095
3176
  ) {
3096
3177
  reconstruct_active_formatting_elements(parser);
3097
3178
  add_formatting_element(parser, insert_element_from_token(parser, token));
3098
- return true;
3099
- } else if (tag_is(token, kStartTag, GUMBO_TAG_NOBR)) {
3100
- bool result = true;
3179
+ return;
3180
+ }
3181
+ if (tag_is(token, kStartTag, GUMBO_TAG_NOBR)) {
3101
3182
  reconstruct_active_formatting_elements(parser);
3102
3183
  if (has_an_element_in_scope(parser, GUMBO_TAG_NOBR)) {
3103
- result = false;
3104
3184
  parser_add_parse_error(parser, token);
3105
- adoption_agency_algorithm(parser, token, GUMBO_TAG_NOBR);
3185
+ adoption_agency_algorithm(parser, token);
3106
3186
  reconstruct_active_formatting_elements(parser);
3107
3187
  }
3108
3188
  insert_element_from_token(parser, token);
3109
3189
  add_formatting_element(parser, get_current_node(parser));
3110
- return result;
3111
- } else if (
3190
+ return;
3191
+ }
3192
+ if (
3112
3193
  tag_in(token, kEndTag, &(const TagSet) {
3113
3194
  TAG(A), TAG(B), TAG(BIG), TAG(CODE), TAG(EM), TAG(FONT), TAG(I),
3114
3195
  TAG(NOBR), TAG(S), TAG(SMALL), TAG(STRIKE), TAG(STRONG), TAG(TT),
3115
3196
  TAG(U)
3116
3197
  })
3117
3198
  ) {
3118
- return adoption_agency_algorithm(parser, token, token->v.end_tag.tag);
3119
- } else if (
3199
+ adoption_agency_algorithm(parser, token);
3200
+ return;
3201
+ }
3202
+ if (
3120
3203
  tag_in(token, kStartTag, &(const TagSet){TAG(APPLET), TAG(MARQUEE), TAG(OBJECT)})
3121
3204
  ) {
3122
3205
  reconstruct_active_formatting_elements(parser);
3123
3206
  insert_element_from_token(parser, token);
3124
3207
  add_formatting_element(parser, &kActiveFormattingScopeMarker);
3125
3208
  set_frameset_not_ok(parser);
3126
- return true;
3127
- } else if (
3209
+ return;
3210
+ }
3211
+ if (
3128
3212
  tag_in(token, kEndTag, &(const TagSet){TAG(APPLET), TAG(MARQUEE), TAG(OBJECT)})
3129
3213
  ) {
3130
3214
  GumboTag token_tag = token->v.end_tag.tag;
3131
- if (!has_an_element_in_table_scope(parser, token_tag)) {
3215
+ if (!has_an_element_in_scope(parser, token_tag)) {
3132
3216
  parser_add_parse_error(parser, token);
3133
3217
  ignore_token(parser);
3134
- return false;
3218
+ return;
3135
3219
  }
3136
3220
  implicitly_close_tags(parser, token, GUMBO_NAMESPACE_HTML, token_tag);
3137
3221
  clear_active_formatting_elements(parser);
3138
- return true;
3139
- } else if (tag_is(token, kStartTag, GUMBO_TAG_TABLE)) {
3222
+ return;
3223
+ }
3224
+ if (tag_is(token, kStartTag, GUMBO_TAG_TABLE)) {
3140
3225
  if (
3141
3226
  get_document_node(parser)->v.document.doc_type_quirks_mode
3142
3227
  != GUMBO_DOCTYPE_QUIRKS
@@ -3146,75 +3231,89 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
3146
3231
  insert_element_from_token(parser, token);
3147
3232
  set_frameset_not_ok(parser);
3148
3233
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
3149
- return true;
3150
- } else if (
3151
- tag_in(token, kStartTag, &(const TagSet) {
3234
+ return;
3235
+ }
3236
+ if (tag_is(token, kEndTag, GUMBO_TAG_BR)) {
3237
+ parser_add_parse_error(parser, token);
3238
+ reconstruct_active_formatting_elements(parser);
3239
+ insert_element_of_tag_type (
3240
+ parser,
3241
+ GUMBO_TAG_BR,
3242
+ GUMBO_INSERTION_CONVERTED_FROM_END_TAG
3243
+ );
3244
+ pop_current_node(parser);
3245
+ acknowledge_self_closing_tag(parser);
3246
+ set_frameset_not_ok(parser);
3247
+ return;
3248
+ }
3249
+ if (
3250
+ tag_in(token, kStartTag, &(const TagSet) {
3152
3251
  TAG(AREA), TAG(BR), TAG(EMBED), TAG(IMG), TAG(IMAGE), TAG(KEYGEN),
3153
3252
  TAG(WBR)
3154
3253
  })
3155
3254
  ) {
3156
- bool success = true;
3157
- if (tag_is(token, kStartTag, GUMBO_TAG_IMAGE)) {
3158
- success = false;
3255
+ bool is_image = tag_is(token, kStartTag, GUMBO_TAG_IMAGE);
3256
+ if (is_image) {
3159
3257
  parser_add_parse_error(parser, token);
3160
3258
  token->v.start_tag.tag = GUMBO_TAG_IMG;
3161
3259
  }
3162
3260
  reconstruct_active_formatting_elements(parser);
3163
3261
  GumboNode* node = insert_element_from_token(parser, token);
3164
- if (tag_is(token, kStartTag, GUMBO_TAG_IMAGE)) {
3165
- success = false;
3166
- parser_add_parse_error(parser, token);
3167
- node->v.element.tag = GUMBO_TAG_IMG;
3262
+ if (is_image)
3168
3263
  node->parse_flags |= GUMBO_INSERTION_FROM_IMAGE;
3169
- }
3170
3264
  pop_current_node(parser);
3171
3265
  acknowledge_self_closing_tag(parser);
3172
3266
  set_frameset_not_ok(parser);
3173
- return success;
3174
- } else if (tag_is(token, kStartTag, GUMBO_TAG_INPUT)) {
3175
- if (!attribute_matches(&token->v.start_tag.attributes, "type", "hidden")) {
3176
- // Must be before the element is inserted, as that takes ownership of the
3177
- // token's attribute vector.
3178
- set_frameset_not_ok(parser);
3179
- }
3267
+ return;
3268
+ }
3269
+ if (tag_is(token, kStartTag, GUMBO_TAG_INPUT)) {
3180
3270
  reconstruct_active_formatting_elements(parser);
3181
- insert_element_from_token(parser, token);
3271
+ GumboNode *input = insert_element_from_token(parser, token);
3182
3272
  pop_current_node(parser);
3183
3273
  acknowledge_self_closing_tag(parser);
3184
- return true;
3185
- } else if (
3274
+ if (!attribute_matches(&input->v.element.attributes, "type", "hidden"))
3275
+ set_frameset_not_ok(parser);
3276
+ return;
3277
+ }
3278
+ if (
3186
3279
  tag_in(token, kStartTag, &(const TagSet){TAG(PARAM), TAG(SOURCE), TAG(TRACK)})
3187
3280
  ) {
3188
3281
  insert_element_from_token(parser, token);
3189
3282
  pop_current_node(parser);
3190
3283
  acknowledge_self_closing_tag(parser);
3191
- return true;
3192
- } else if (tag_is(token, kStartTag, GUMBO_TAG_HR)) {
3193
- bool result = maybe_implicitly_close_p_tag(parser, token);
3284
+ return;
3285
+ }
3286
+ if (tag_is(token, kStartTag, GUMBO_TAG_HR)) {
3287
+ maybe_implicitly_close_p_tag(parser, token);
3194
3288
  insert_element_from_token(parser, token);
3195
3289
  pop_current_node(parser);
3196
3290
  acknowledge_self_closing_tag(parser);
3197
3291
  set_frameset_not_ok(parser);
3198
- return result;
3199
- } else if (tag_is(token, kStartTag, GUMBO_TAG_TEXTAREA)) {
3292
+ return;
3293
+ }
3294
+ if (tag_is(token, kStartTag, GUMBO_TAG_TEXTAREA)) {
3200
3295
  run_generic_parsing_algorithm(parser, token, GUMBO_LEX_RCDATA);
3201
3296
  parser->_parser_state->_ignore_next_linefeed = true;
3202
3297
  set_frameset_not_ok(parser);
3203
- return true;
3204
- } else if (tag_is(token, kStartTag, GUMBO_TAG_XMP)) {
3205
- bool result = maybe_implicitly_close_p_tag(parser, token);
3298
+ return;
3299
+ }
3300
+ if (tag_is(token, kStartTag, GUMBO_TAG_XMP)) {
3301
+ maybe_implicitly_close_p_tag(parser, token);
3206
3302
  reconstruct_active_formatting_elements(parser);
3207
3303
  set_frameset_not_ok(parser);
3208
3304
  run_generic_parsing_algorithm(parser, token, GUMBO_LEX_RAWTEXT);
3209
- return result;
3210
- } else if (tag_is(token, kStartTag, GUMBO_TAG_IFRAME)) {
3305
+ return;
3306
+ }
3307
+ if (tag_is(token, kStartTag, GUMBO_TAG_IFRAME)) {
3211
3308
  set_frameset_not_ok(parser);
3212
3309
  run_generic_parsing_algorithm(parser, token, GUMBO_LEX_RAWTEXT);
3213
- return true;
3214
- } else if (tag_is(token, kStartTag, GUMBO_TAG_NOEMBED)) {
3310
+ return;
3311
+ }
3312
+ if (tag_is(token, kStartTag, GUMBO_TAG_NOEMBED)) {
3215
3313
  run_generic_parsing_algorithm(parser, token, GUMBO_LEX_RAWTEXT);
3216
- return true;
3217
- } else if (tag_is(token, kStartTag, GUMBO_TAG_SELECT)) {
3314
+ return;
3315
+ }
3316
+ if (tag_is(token, kStartTag, GUMBO_TAG_SELECT)) {
3218
3317
  reconstruct_active_formatting_elements(parser);
3219
3318
  insert_element_from_token(parser, token);
3220
3319
  set_frameset_not_ok(parser);
@@ -3230,50 +3329,40 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
3230
3329
  } else {
3231
3330
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_SELECT);
3232
3331
  }
3233
- return true;
3234
- } else if (
3235
- tag_in(token, kStartTag, &(const TagSet){TAG(OPTION), TAG(OPTGROUP)})
3332
+ return;
3333
+ }
3334
+ if (
3335
+ tag_in(token, kStartTag, &(const TagSet){TAG(OPTGROUP), TAG(OPTION)})
3236
3336
  ) {
3237
3337
  if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)) {
3238
3338
  pop_current_node(parser);
3239
3339
  }
3240
3340
  reconstruct_active_formatting_elements(parser);
3241
3341
  insert_element_from_token(parser, token);
3242
- return true;
3243
- } else if (
3244
- tag_in(token, kStartTag, &(const TagSet){TAG(RB), TAG(RP), TAG(RT), TAG(RTC)})
3245
- ) {
3246
- bool success = true;
3247
- GumboTag exception = tag_in(token, kStartTag, &(const TagSet){TAG(RT), TAG(RP)})
3248
- ? GUMBO_TAG_RTC
3249
- : GUMBO_TAG_LAST
3250
- ;
3342
+ return;
3343
+ }
3344
+ if (tag_in(token, kStartTag, &(const TagSet){TAG(RB), TAG(RTC)})) {
3251
3345
  if (has_an_element_in_scope(parser, GUMBO_TAG_RUBY)) {
3252
- generate_implied_end_tags(parser, exception);
3346
+ generate_implied_end_tags(parser, GUMBO_TAG_LAST, NULL);
3347
+ if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_RUBY))
3348
+ parser_add_parse_error(parser, token);
3253
3349
  }
3254
- if (
3255
- !node_html_tag_is(get_current_node(parser), GUMBO_TAG_RUBY)
3256
- && !(
3257
- exception == GUMBO_TAG_LAST
3258
- || node_html_tag_is(get_current_node(parser), GUMBO_TAG_RTC)
3259
- )
3260
- ) {
3261
- parser_add_parse_error(parser, token);
3262
- success = false;
3350
+ insert_element_from_token(parser, token);
3351
+ return;
3352
+ }
3353
+ if (tag_in(token, kStartTag, &(const TagSet){TAG(RP), TAG(RT)})) {
3354
+ if (has_an_element_in_scope(parser, GUMBO_TAG_RUBY)) {
3355
+ generate_implied_end_tags(parser, GUMBO_TAG_RTC, NULL);
3356
+ GumboNode* current = get_current_node(parser);
3357
+ if (!node_html_tag_is(current, GUMBO_TAG_RUBY) &&
3358
+ !node_html_tag_is(current, GUMBO_TAG_RTC)) {
3359
+ parser_add_parse_error(parser, token);
3360
+ }
3263
3361
  }
3264
3362
  insert_element_from_token(parser, token);
3265
- return success;
3266
- } else if (tag_is(token, kEndTag, GUMBO_TAG_BR)) {
3267
- parser_add_parse_error(parser, token);
3268
- reconstruct_active_formatting_elements(parser);
3269
- insert_element_of_tag_type (
3270
- parser,
3271
- GUMBO_TAG_BR,
3272
- GUMBO_INSERTION_CONVERTED_FROM_END_TAG
3273
- );
3274
- pop_current_node(parser);
3275
- return false;
3276
- } else if (tag_is(token, kStartTag, GUMBO_TAG_MATH)) {
3363
+ return;
3364
+ }
3365
+ if (tag_is(token, kStartTag, GUMBO_TAG_MATH)) {
3277
3366
  reconstruct_active_formatting_elements(parser);
3278
3367
  adjust_mathml_attributes(token);
3279
3368
  adjust_foreign_attributes(token);
@@ -3282,8 +3371,9 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
3282
3371
  pop_current_node(parser);
3283
3372
  acknowledge_self_closing_tag(parser);
3284
3373
  }
3285
- return true;
3286
- } else if (tag_is(token, kStartTag, GUMBO_TAG_SVG)) {
3374
+ return;
3375
+ }
3376
+ if (tag_is(token, kStartTag, GUMBO_TAG_SVG)) {
3287
3377
  reconstruct_active_formatting_elements(parser);
3288
3378
  adjust_svg_attributes(token);
3289
3379
  adjust_foreign_attributes(token);
@@ -3292,8 +3382,9 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
3292
3382
  pop_current_node(parser);
3293
3383
  acknowledge_self_closing_tag(parser);
3294
3384
  }
3295
- return true;
3296
- } else if (
3385
+ return;
3386
+ }
3387
+ if (
3297
3388
  tag_in(token, kStartTag, &(const TagSet) {
3298
3389
  TAG(CAPTION), TAG(COL), TAG(COLGROUP), TAG(FRAME), TAG(HEAD),
3299
3390
  TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD), TAG(TR)
@@ -3301,82 +3392,50 @@ static bool handle_in_body(GumboParser* parser, GumboToken* token) {
3301
3392
  ) {
3302
3393
  parser_add_parse_error(parser, token);
3303
3394
  ignore_token(parser);
3304
- return false;
3305
- } else if (token->type == GUMBO_TOKEN_START_TAG) {
3395
+ return;
3396
+ }
3397
+ if (token->type == GUMBO_TOKEN_START_TAG) {
3306
3398
  reconstruct_active_formatting_elements(parser);
3307
3399
  insert_element_from_token(parser, token);
3308
- return true;
3309
- } else {
3310
- assert(token->type == GUMBO_TOKEN_END_TAG);
3311
- GumboTag end_tag = token->v.end_tag.tag;
3312
- const char *end_tagname = token->v.end_tag.name;
3313
- assert(state->_open_elements.length > 0);
3314
- assert(node_html_tag_is(state->_open_elements.data[0], GUMBO_TAG_HTML));
3315
- // Walk up the stack of open elements until we find one that either:
3316
- // a) Matches the tag name we saw
3317
- // b) Is in the "special" category.
3318
- // If we see a), implicitly close everything up to and including it. If we
3319
- // see b), then record a parse error, don't close anything (except the
3320
- // implied end tags) and ignore the end tag token.
3321
- for (int i = state->_open_elements.length; --i >= 0;) {
3322
- const GumboNode* node = state->_open_elements.data[i];
3323
- if (node_qualified_tagname_is(node, GUMBO_NAMESPACE_HTML, end_tag, end_tagname)) {
3324
- generate_implied_end_tags(parser, end_tag);
3325
- // TODO(jdtang): Do I need to add a parse error here? The condition in
3326
- // the spec seems like it's the inverse of the loop condition above, and
3327
- // so would never fire.
3328
- // sfc: Yes, an error is needed here.
3329
- // <!DOCTYPE><body><sarcasm><foo></sarcasm> is an example.
3330
- // foo is the "current node" but sarcasm is node.
3331
- // XXX: Write a test for this.
3332
- if (node != get_current_node(parser))
3333
- parser_add_parse_error(parser, token);
3334
- while (node != pop_current_node(parser))
3335
- ; // Pop everything.
3336
- return true;
3337
- } else if (is_special_node(node)) {
3338
- parser_add_parse_error(parser, token);
3339
- ignore_token(parser);
3340
- return false;
3341
- }
3342
- }
3343
- // <html> is in the special category, so we should never get here.
3344
- assert(0);
3345
- return false;
3400
+ return;
3346
3401
  }
3402
+ in_body_any_other_end_tag(parser, token);
3347
3403
  }
3348
3404
 
3349
3405
  // https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-incdata
3350
- static bool handle_text(GumboParser* parser, GumboToken* token) {
3406
+ static void handle_text(GumboParser* parser, GumboToken* token) {
3351
3407
  if (
3352
3408
  token->type == GUMBO_TOKEN_CHARACTER
3353
3409
  || token->type == GUMBO_TOKEN_WHITESPACE
3354
3410
  ) {
3355
3411
  insert_text_token(parser, token);
3356
- } else {
3357
- // We provide only bare-bones script handling that doesn't involve any of
3358
- // the parser-pause/already-started/script-nesting flags or re-entrant
3359
- // invocations of the tokenizer. Because the intended usage of this library
3360
- // is mostly for templating, refactoring, and static-analysis libraries, we
3361
- // provide the script body as a text-node child of the <script> element.
3362
- // This behavior doesn't support document.write of partial HTML elements,
3363
- // but should be adequate for almost all other scripting support.
3364
- if (token->type == GUMBO_TOKEN_EOF) {
3365
- parser_add_parse_error(parser, token);
3366
- parser->_parser_state->_reprocess_current_token = true;
3367
- }
3368
- pop_current_node(parser);
3369
- set_insertion_mode(parser, parser->_parser_state->_original_insertion_mode);
3412
+ return;
3370
3413
  }
3371
- return true;
3414
+ // We provide only bare-bones script handling that doesn't involve any of
3415
+ // the parser-pause/already-started/script-nesting flags or re-entrant
3416
+ // invocations of the tokenizer. Because the intended usage of this library
3417
+ // is mostly for templating, refactoring, and static-analysis libraries, we
3418
+ // provide the script body as a text-node child of the <script> element.
3419
+ // This behavior doesn't support document.write of partial HTML elements,
3420
+ // but should be adequate for almost all other scripting support.
3421
+ if (token->type == GUMBO_TOKEN_EOF) {
3422
+ parser_add_parse_error(parser, token);
3423
+ parser->_parser_state->_reprocess_current_token = true;
3424
+ }
3425
+ pop_current_node(parser);
3426
+ set_insertion_mode(parser, parser->_parser_state->_original_insertion_mode);
3372
3427
  }
3373
3428
 
3374
3429
  // https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-intable
3375
- static bool handle_in_table(GumboParser* parser, GumboToken* token) {
3430
+ static void handle_in_table(GumboParser* parser, GumboToken* token) {
3376
3431
  GumboParserState* state = parser->_parser_state;
3377
3432
  if (
3378
- token->type == GUMBO_TOKEN_CHARACTER
3379
- || token->type == GUMBO_TOKEN_WHITESPACE
3433
+ (token->type == GUMBO_TOKEN_CHARACTER
3434
+ || token->type == GUMBO_TOKEN_WHITESPACE
3435
+ || token->type == GUMBO_TOKEN_NULL)
3436
+ && node_tag_in_set(get_current_node(parser), &(const TagSet) {
3437
+ TAG(TABLE), TAG(TBODY), TAG(TFOOT), TAG(THEAD), TAG(TR)
3438
+ })
3380
3439
  ) {
3381
3440
  // The "pending table character tokens" list described in the spec is
3382
3441
  // nothing more than the TextNodeBufferState. We accumulate text tokens as
@@ -3384,71 +3443,87 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
3384
3443
  // we set _foster_parent_insertions if there're non-whitespace characters in
3385
3444
  // the buffer.
3386
3445
  assert(state->_text_node._buffer.length == 0);
3446
+ assert(state->_table_character_tokens.length == 0);
3387
3447
  state->_original_insertion_mode = state->_insertion_mode;
3388
3448
  state->_reprocess_current_token = true;
3389
3449
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_TEXT);
3390
- return true;
3391
- } else if (token->type == GUMBO_TOKEN_DOCTYPE) {
3450
+ return;
3451
+ }
3452
+ if (token->type == GUMBO_TOKEN_COMMENT) {
3453
+ append_comment_node(parser, get_current_node(parser), token);
3454
+ return;
3455
+ }
3456
+ if (token->type == GUMBO_TOKEN_DOCTYPE) {
3392
3457
  parser_add_parse_error(parser, token);
3393
3458
  ignore_token(parser);
3394
- return false;
3395
- } else if (token->type == GUMBO_TOKEN_COMMENT) {
3396
- append_comment_node(parser, get_current_node(parser), token);
3397
- return true;
3398
- } else if (tag_is(token, kStartTag, GUMBO_TAG_CAPTION)) {
3459
+ return;
3460
+ }
3461
+ if (tag_is(token, kStartTag, GUMBO_TAG_CAPTION)) {
3399
3462
  clear_stack_to_table_context(parser);
3400
3463
  add_formatting_element(parser, &kActiveFormattingScopeMarker);
3401
3464
  insert_element_from_token(parser, token);
3402
3465
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_CAPTION);
3403
- return true;
3404
- } else if (tag_is(token, kStartTag, GUMBO_TAG_COLGROUP)) {
3466
+ return;
3467
+ }
3468
+ if (tag_is(token, kStartTag, GUMBO_TAG_COLGROUP)) {
3405
3469
  clear_stack_to_table_context(parser);
3406
3470
  insert_element_from_token(parser, token);
3407
3471
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_COLUMN_GROUP);
3408
- return true;
3409
- } else if (tag_is(token, kStartTag, GUMBO_TAG_COL)) {
3472
+ return;
3473
+ }
3474
+ if (tag_is(token, kStartTag, GUMBO_TAG_COL)) {
3410
3475
  clear_stack_to_table_context(parser);
3411
3476
  insert_element_of_tag_type (
3412
3477
  parser,
3413
3478
  GUMBO_TAG_COLGROUP,
3414
3479
  GUMBO_INSERTION_IMPLIED
3415
3480
  );
3416
- parser->_parser_state->_reprocess_current_token = true;
3481
+ state->_reprocess_current_token = true;
3417
3482
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_COLUMN_GROUP);
3418
- return true;
3419
- } else if (
3483
+ return;
3484
+ }
3485
+ if (
3420
3486
  tag_in(token, kStartTag, &(const TagSet) {
3421
- TAG(TBODY), TAG(TFOOT), TAG(THEAD), TAG(TD), TAG(TH), TAG(TR)
3487
+ TAG(TBODY), TAG(TFOOT), TAG(THEAD)
3422
3488
  })
3423
3489
  ) {
3424
3490
  clear_stack_to_table_context(parser);
3491
+ insert_element_from_token(parser, token);
3425
3492
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
3426
- if (tag_in(token, kStartTag, &(const TagSet){TAG(TD), TAG(TH), TAG(TR)})) {
3427
- insert_element_of_tag_type (
3428
- parser,
3429
- GUMBO_TAG_TBODY,
3430
- GUMBO_INSERTION_IMPLIED
3431
- );
3432
- state->_reprocess_current_token = true;
3433
- } else {
3434
- insert_element_from_token(parser, token);
3435
- }
3436
- return true;
3437
- } else if (tag_is(token, kStartTag, GUMBO_TAG_TABLE)) {
3493
+ return;
3494
+ }
3495
+ if (
3496
+ tag_in(token, kStartTag, &(const TagSet) {
3497
+ TAG(TD), TAG(TH), TAG(TR)
3498
+ })
3499
+ ) {
3500
+ clear_stack_to_table_context(parser);
3501
+ insert_element_of_tag_type (
3502
+ parser,
3503
+ GUMBO_TAG_TBODY,
3504
+ GUMBO_INSERTION_IMPLIED
3505
+ );
3506
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
3507
+ state->_reprocess_current_token = true;
3508
+ return;
3509
+ }
3510
+ if (tag_is(token, kStartTag, GUMBO_TAG_TABLE)) {
3438
3511
  parser_add_parse_error(parser, token);
3439
3512
  if (close_table(parser)) {
3440
- parser->_parser_state->_reprocess_current_token = true;
3513
+ state->_reprocess_current_token = true;
3441
3514
  } else {
3442
3515
  ignore_token(parser);
3443
3516
  }
3444
- return false;
3445
- } else if (tag_is(token, kEndTag, GUMBO_TAG_TABLE)) {
3517
+ return;
3518
+ }
3519
+ if (tag_is(token, kEndTag, GUMBO_TAG_TABLE)) {
3446
3520
  if (!close_table(parser)) {
3447
3521
  parser_add_parse_error(parser, token);
3448
- return false;
3522
+ return;
3449
3523
  }
3450
- return true;
3451
- } else if (
3524
+ return;
3525
+ }
3526
+ if (
3452
3527
  tag_in(token, kEndTag, &(const TagSet) {
3453
3528
  TAG(BODY), TAG(CAPTION), TAG(COL), TAG(COLGROUP), TAG(HTML),
3454
3529
  TAG(TBODY), TAG(TD), TAG(TFOOT), TAG(TH), TAG(THEAD), TAG(TR)
@@ -3456,102 +3531,103 @@ static bool handle_in_table(GumboParser* parser, GumboToken* token) {
3456
3531
  ) {
3457
3532
  parser_add_parse_error(parser, token);
3458
3533
  ignore_token(parser);
3459
- return false;
3460
- } else if (
3534
+ return;
3535
+ }
3536
+ if (
3461
3537
  tag_in(token, kStartTag, &(const TagSet){TAG(STYLE), TAG(SCRIPT), TAG(TEMPLATE)})
3462
3538
  || (tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE))
3463
3539
  ) {
3464
- return handle_in_head(parser, token);
3465
- } else if (
3540
+ handle_in_head(parser, token);
3541
+ return;
3542
+ }
3543
+ if (
3466
3544
  tag_is(token, kStartTag, GUMBO_TAG_INPUT)
3467
3545
  && attribute_matches(&token->v.start_tag.attributes, "type", "hidden")
3468
3546
  ) {
3469
3547
  parser_add_parse_error(parser, token);
3470
3548
  insert_element_from_token(parser, token);
3471
3549
  pop_current_node(parser);
3472
- return false;
3473
- } else if (tag_is(token, kStartTag, GUMBO_TAG_FORM)) {
3550
+ acknowledge_self_closing_tag(parser);
3551
+ return;
3552
+ }
3553
+ if (tag_is(token, kStartTag, GUMBO_TAG_FORM)) {
3474
3554
  parser_add_parse_error(parser, token);
3475
3555
  if (state->_form_element || has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
3476
3556
  ignore_token(parser);
3477
- return false;
3557
+ return;
3478
3558
  }
3479
3559
  state->_form_element = insert_element_from_token(parser, token);
3480
3560
  pop_current_node(parser);
3481
- return false;
3482
- } else if (token->type == GUMBO_TOKEN_EOF) {
3483
- return handle_in_body(parser, token);
3484
- } else {
3485
- parser_add_parse_error(parser, token);
3486
- state->_foster_parent_insertions = true;
3487
- bool result = handle_in_body(parser, token);
3488
- state->_foster_parent_insertions = false;
3489
- return result;
3561
+ return;
3562
+ }
3563
+ if (token->type == GUMBO_TOKEN_EOF) {
3564
+ handle_in_body(parser, token);
3565
+ return;
3490
3566
  }
3567
+ // foster-parenting-start-tag or foster-parenting-end-tag error
3568
+ parser_add_parse_error(parser, token);
3569
+ state->_foster_parent_insertions = true;
3570
+ handle_in_body(parser, token);
3571
+ state->_foster_parent_insertions = false;
3491
3572
  }
3492
3573
 
3493
3574
  // https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-intabletext
3494
- static bool handle_in_table_text(GumboParser* parser, GumboToken* token) {
3575
+ static void handle_in_table_text(GumboParser* parser, GumboToken* token) {
3495
3576
  if (token->type == GUMBO_TOKEN_NULL) {
3496
3577
  parser_add_parse_error(parser, token);
3497
3578
  ignore_token(parser);
3498
- return false;
3499
- } else if (
3500
- token->type == GUMBO_TOKEN_CHARACTER
3501
- || token->type == GUMBO_TOKEN_WHITESPACE
3502
- ) {
3579
+ return;
3580
+ }
3581
+ GumboParserState* state = parser->_parser_state;
3582
+ // Non-whitespace tokens will cause parse errors later.
3583
+ // It's not entirely clear from the spec how this is supposed to work.
3584
+ // https://github.com/whatwg/html/issues/4046
3585
+ if (token->type == GUMBO_TOKEN_WHITESPACE
3586
+ || token->type == GUMBO_TOKEN_CHARACTER) {
3503
3587
  insert_text_token(parser, token);
3504
- return true;
3505
- } else {
3506
- GumboParserState* state = parser->_parser_state;
3507
- GumboStringBuffer* buffer = &state->_text_node._buffer;
3508
- const char* data = buffer->data;
3509
- // Note that TextNodeBuffer may contain UTF-8 characters, but the
3510
- // presence of any one byte that is not whitespace means we flip
3511
- // the flag, so this loop is still valid.
3588
+ gumbo_character_token_buffer_append(token, &state->_table_character_tokens);
3589
+ return;
3590
+ }
3591
+
3592
+ GumboCharacterTokenBuffer* buffer = &state->_table_character_tokens;
3593
+ if (state->_text_node._type != GUMBO_NODE_WHITESPACE) {
3594
+ // Each character in buffer is an error. Unfortunately, that means we need
3595
+ // to emit a bunch of errors at the appropriate locations.
3512
3596
  for (size_t i = 0, n = buffer->length; i < n; ++i) {
3513
- switch (data[i]) {
3514
- case '\t':
3515
- case '\n':
3516
- case '\f':
3517
- case '\r':
3518
- case ' ':
3519
- continue;
3520
- default:
3521
- state->_foster_parent_insertions = true;
3522
- reconstruct_active_formatting_elements(parser);
3523
- goto loopbreak;
3524
- }
3597
+ GumboToken tok;
3598
+ gumbo_character_token_buffer_get(buffer, i, &tok);
3599
+ // foster-parenting-character error
3600
+ parser_add_parse_error(parser, &tok);
3525
3601
  }
3526
- loopbreak:
3527
- maybe_flush_text_node_buffer(parser);
3528
- state->_foster_parent_insertions = false;
3529
- state->_reprocess_current_token = true;
3530
- state->_insertion_mode = state->_original_insertion_mode;
3531
- return true;
3602
+ state->_foster_parent_insertions = true;
3603
+ set_frameset_not_ok(parser);
3604
+ reconstruct_active_formatting_elements(parser);
3532
3605
  }
3606
+ maybe_flush_text_node_buffer(parser);
3607
+ gumbo_character_token_buffer_clear(buffer);
3608
+ state->_foster_parent_insertions = false;
3609
+ state->_reprocess_current_token = true;
3610
+ state->_insertion_mode = state->_original_insertion_mode;
3533
3611
  }
3534
3612
 
3535
3613
  // https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-incaption
3536
- static bool handle_in_caption(GumboParser* parser, GumboToken* token) {
3614
+ static void handle_in_caption(GumboParser* parser, GumboToken* token) {
3537
3615
  if (tag_is(token, kEndTag, GUMBO_TAG_CAPTION)) {
3538
3616
  if (!has_an_element_in_table_scope(parser, GUMBO_TAG_CAPTION)) {
3539
3617
  parser_add_parse_error(parser, token);
3540
3618
  ignore_token(parser);
3541
- return false;
3542
- } else {
3543
- generate_implied_end_tags(parser, GUMBO_TAG_LAST);
3544
- bool result = true;
3545
- if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_CAPTION)) {
3546
- parser_add_parse_error(parser, token);
3547
- }
3548
- while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_CAPTION))
3549
- ;
3550
- clear_active_formatting_elements(parser);
3551
- set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
3552
- return result;
3619
+ return;
3553
3620
  }
3554
- } else if (
3621
+ generate_implied_end_tags(parser, GUMBO_TAG_LAST, NULL);
3622
+ if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_CAPTION))
3623
+ parser_add_parse_error(parser, token);
3624
+ while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_CAPTION))
3625
+ ;
3626
+ clear_active_formatting_elements(parser);
3627
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
3628
+ return;
3629
+ }
3630
+ if (
3555
3631
  tag_in(token, kStartTag, &(const TagSet) {
3556
3632
  TAG(CAPTION), TAG(COL), TAG(COLGROUP), TAG(TBODY), TAG(TD),
3557
3633
  TAG(TFOOT), TAG(TH), TAG(THEAD), TAG(TR)
@@ -3561,15 +3637,19 @@ static bool handle_in_caption(GumboParser* parser, GumboToken* token) {
3561
3637
  if (!has_an_element_in_table_scope(parser, GUMBO_TAG_CAPTION)) {
3562
3638
  parser_add_parse_error(parser, token);
3563
3639
  ignore_token(parser);
3564
- return false;
3640
+ return;
3565
3641
  }
3642
+ generate_implied_end_tags(parser, GUMBO_TAG_LAST, NULL);
3643
+ if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_CAPTION))
3644
+ parser_add_parse_error(parser, token);
3566
3645
  while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_CAPTION))
3567
3646
  ;
3568
3647
  clear_active_formatting_elements(parser);
3569
3648
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
3570
3649
  parser->_parser_state->_reprocess_current_token = true;
3571
- return true;
3572
- } else if (
3650
+ return;
3651
+ }
3652
+ if (
3573
3653
  tag_in(token, kEndTag, &(const TagSet) {
3574
3654
  TAG(BODY), TAG(COL), TAG(COLGROUP), TAG(HTML), TAG(TBODY), TAG(TD),
3575
3655
  TAG(TFOOT), TAG(TH), TAG(THEAD), TAG(TR)
@@ -3577,91 +3657,102 @@ static bool handle_in_caption(GumboParser* parser, GumboToken* token) {
3577
3657
  ) {
3578
3658
  parser_add_parse_error(parser, token);
3579
3659
  ignore_token(parser);
3580
- return false;
3581
- } else {
3582
- return handle_in_body(parser, token);
3660
+ return;
3583
3661
  }
3662
+ handle_in_body(parser, token);
3584
3663
  }
3585
3664
 
3586
3665
  // https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-incolgroup
3587
- static bool handle_in_column_group(GumboParser* parser, GumboToken* token) {
3666
+ static void handle_in_column_group(GumboParser* parser, GumboToken* token) {
3588
3667
  if (token->type == GUMBO_TOKEN_WHITESPACE) {
3589
3668
  insert_text_token(parser, token);
3590
- return true;
3591
- } else if (token->type == GUMBO_TOKEN_DOCTYPE) {
3669
+ return;
3670
+ }
3671
+ if (token->type == GUMBO_TOKEN_COMMENT) {
3672
+ append_comment_node(parser, get_current_node(parser), token);
3673
+ return;
3674
+ }
3675
+ if (token->type == GUMBO_TOKEN_DOCTYPE) {
3592
3676
  parser_add_parse_error(parser, token);
3593
3677
  ignore_token(parser);
3594
- return false;
3595
- } else if (token->type == GUMBO_TOKEN_COMMENT) {
3596
- append_comment_node(parser, get_current_node(parser), token);
3597
- return true;
3598
- } else if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
3599
- return handle_in_body(parser, token);
3600
- } else if (tag_is(token, kStartTag, GUMBO_TAG_COL)) {
3678
+ return;
3679
+ }
3680
+ if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
3681
+ handle_in_body(parser, token);
3682
+ return;
3683
+ }
3684
+ if (tag_is(token, kStartTag, GUMBO_TAG_COL)) {
3601
3685
  insert_element_from_token(parser, token);
3602
3686
  pop_current_node(parser);
3603
3687
  acknowledge_self_closing_tag(parser);
3604
- return true;
3605
- } else if (tag_is(token, kEndTag, GUMBO_TAG_COLGROUP)) {
3688
+ return;
3689
+ }
3690
+ if (tag_is(token, kEndTag, GUMBO_TAG_COLGROUP)) {
3606
3691
  if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_COLGROUP)) {
3607
3692
  parser_add_parse_error(parser, token);
3608
3693
  ignore_token(parser);
3609
- return false;
3694
+ return;
3610
3695
  }
3611
3696
  pop_current_node(parser);
3612
3697
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
3613
- return false;
3614
- } else if (tag_is(token, kEndTag, GUMBO_TAG_COL)) {
3698
+ return;
3699
+ }
3700
+ if (tag_is(token, kEndTag, GUMBO_TAG_COL)) {
3615
3701
  parser_add_parse_error(parser, token);
3616
3702
  ignore_token(parser);
3617
- return false;
3618
- } else if (
3703
+ return;
3704
+ }
3705
+ if (
3619
3706
  tag_is(token, kStartTag, GUMBO_TAG_TEMPLATE)
3620
3707
  || tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)
3621
3708
  ) {
3622
- return handle_in_head(parser, token);
3623
- } else if (token->type == GUMBO_TOKEN_EOF) {
3624
- return handle_in_body(parser, token);
3625
- } else {
3626
- if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_COLGROUP)) {
3627
- parser_add_parse_error(parser, token);
3628
- ignore_token(parser);
3629
- return false;
3630
- }
3631
- pop_current_node(parser);
3632
- set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
3633
- parser->_parser_state->_reprocess_current_token = true;
3634
- return true;
3709
+ handle_in_head(parser, token);
3710
+ return;
3711
+ }
3712
+ if (token->type == GUMBO_TOKEN_EOF) {
3713
+ handle_in_body(parser, token);
3714
+ return;
3635
3715
  }
3716
+ if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_COLGROUP)) {
3717
+ parser_add_parse_error(parser, token);
3718
+ ignore_token(parser);
3719
+ return;
3720
+ }
3721
+ pop_current_node(parser);
3722
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
3723
+ parser->_parser_state->_reprocess_current_token = true;
3636
3724
  }
3637
3725
 
3638
3726
  // https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-intbody
3639
- static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
3727
+ static void handle_in_table_body(GumboParser* parser, GumboToken* token) {
3640
3728
  if (tag_is(token, kStartTag, GUMBO_TAG_TR)) {
3641
3729
  clear_stack_to_table_body_context(parser);
3642
3730
  insert_element_from_token(parser, token);
3643
3731
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
3644
- return true;
3645
- } else if (tag_in(token, kStartTag, &td_th_tags)) {
3732
+ return;
3733
+ }
3734
+ if (tag_in(token, kStartTag, &td_th_tags)) {
3646
3735
  parser_add_parse_error(parser, token);
3647
3736
  clear_stack_to_table_body_context(parser);
3648
3737
  insert_element_of_tag_type(parser, GUMBO_TAG_TR, GUMBO_INSERTION_IMPLIED);
3649
- parser->_parser_state->_reprocess_current_token = true;
3650
3738
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
3651
- return false;
3652
- } else if (
3739
+ parser->_parser_state->_reprocess_current_token = true;
3740
+ return;
3741
+ }
3742
+ if (
3653
3743
  tag_in(token, kEndTag, &(const TagSet){TAG(TBODY), TAG(TFOOT), TAG(THEAD)})
3654
3744
  ) {
3655
3745
  if (!has_an_element_in_table_scope(parser, token->v.end_tag.tag)) {
3656
3746
  parser_add_parse_error(parser, token);
3657
3747
  ignore_token(parser);
3658
- return false;
3748
+ return;
3659
3749
  }
3660
3750
  clear_stack_to_table_body_context(parser);
3661
3751
  pop_current_node(parser);
3662
3752
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
3663
- return true;
3664
- } else if (
3753
+ return;
3754
+ }
3755
+ if (
3665
3756
  tag_in(token, kStartTag, &(const TagSet) {
3666
3757
  TAG(CAPTION), TAG(COL), TAG(COLGROUP), TAG(TBODY), TAG(TFOOT),
3667
3758
  TAG(THEAD)
@@ -3677,47 +3768,48 @@ static bool handle_in_table_body(GumboParser* parser, GumboToken* token) {
3677
3768
  ) {
3678
3769
  parser_add_parse_error(parser, token);
3679
3770
  ignore_token(parser);
3680
- return false;
3771
+ return;
3681
3772
  }
3682
3773
  clear_stack_to_table_body_context(parser);
3683
3774
  pop_current_node(parser);
3684
3775
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
3685
3776
  parser->_parser_state->_reprocess_current_token = true;
3686
- return true;
3687
- } else if (
3777
+ return;
3778
+ }
3779
+ if (
3688
3780
  tag_in(token, kEndTag, &(const TagSet) {
3689
- TAG(BODY), TAG(CAPTION), TAG(COL), TAG(TR), TAG(COLGROUP),
3690
- TAG(HTML), TAG(TD), TAG(TH)
3781
+ TAG(BODY), TAG(CAPTION), TAG(COL), TAG(COLGROUP), TAG(HTML), TAG(TD),
3782
+ TAG(TH), TAG(TR)
3691
3783
  })
3692
3784
  ) {
3693
3785
  parser_add_parse_error(parser, token);
3694
3786
  ignore_token(parser);
3695
- return false;
3696
- } else {
3697
- return handle_in_table(parser, token);
3787
+ return;
3698
3788
  }
3789
+ handle_in_table(parser, token);
3699
3790
  }
3700
3791
 
3701
3792
  // https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-intr
3702
- static bool handle_in_row(GumboParser* parser, GumboToken* token) {
3793
+ static void handle_in_row(GumboParser* parser, GumboToken* token) {
3703
3794
  if (tag_in(token, kStartTag, &td_th_tags)) {
3704
3795
  clear_stack_to_table_row_context(parser);
3705
3796
  insert_element_from_token(parser, token);
3706
3797
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_CELL);
3707
3798
  add_formatting_element(parser, &kActiveFormattingScopeMarker);
3708
- return true;
3709
- } else if (tag_is(token, kEndTag, GUMBO_TAG_TR)) {
3799
+ return;
3800
+ }
3801
+ if (tag_is(token, kEndTag, GUMBO_TAG_TR)) {
3710
3802
  if (!has_an_element_in_table_scope(parser, GUMBO_TAG_TR)) {
3711
3803
  parser_add_parse_error(parser, token);
3712
3804
  ignore_token(parser);
3713
- return false;
3714
- } else {
3715
- clear_stack_to_table_row_context(parser);
3716
- pop_current_node(parser);
3717
- set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
3718
- return true;
3805
+ return;
3719
3806
  }
3720
- } else if (
3807
+ clear_stack_to_table_row_context(parser);
3808
+ pop_current_node(parser);
3809
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
3810
+ return;
3811
+ }
3812
+ if (
3721
3813
  tag_in(token, kStartTag, &(const TagSet) {
3722
3814
  TAG(CAPTION), TAG(COL), TAG(COLGROUP), TAG(TBODY), TAG(TFOOT),
3723
3815
  TAG(THEAD), TAG(TR)
@@ -3727,32 +3819,33 @@ static bool handle_in_row(GumboParser* parser, GumboToken* token) {
3727
3819
  if (!has_an_element_in_table_scope(parser, GUMBO_TAG_TR)) {
3728
3820
  parser_add_parse_error(parser, token);
3729
3821
  ignore_token(parser);
3730
- return false;
3731
- } else {
3732
- clear_stack_to_table_row_context(parser);
3733
- pop_current_node(parser);
3734
- set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
3735
- parser->_parser_state->_reprocess_current_token = true;
3736
- return true;
3822
+ return;
3737
3823
  }
3738
- } else if (
3824
+ clear_stack_to_table_row_context(parser);
3825
+ pop_current_node(parser);
3826
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
3827
+ parser->_parser_state->_reprocess_current_token = true;
3828
+ return;
3829
+ }
3830
+ if (
3739
3831
  tag_in(token, kEndTag, &(const TagSet) {TAG(TBODY), TAG(TFOOT), TAG(THEAD)})
3740
3832
  ) {
3741
- if (
3742
- !has_an_element_in_table_scope(parser, token->v.end_tag.tag)
3743
- || !has_an_element_in_table_scope(parser, GUMBO_TAG_TR)
3744
- ) {
3833
+ if (!has_an_element_in_table_scope(parser, token->v.end_tag.tag)) {
3745
3834
  parser_add_parse_error(parser, token);
3746
3835
  ignore_token(parser);
3747
- return false;
3748
- } else {
3749
- clear_stack_to_table_row_context(parser);
3750
- pop_current_node(parser);
3751
- set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
3752
- parser->_parser_state->_reprocess_current_token = true;
3753
- return true;
3836
+ return;
3837
+ }
3838
+ if (!has_an_element_in_table_scope(parser, GUMBO_TAG_TR)) {
3839
+ ignore_token(parser);
3840
+ return;
3754
3841
  }
3755
- } else if (
3842
+ clear_stack_to_table_row_context(parser);
3843
+ pop_current_node(parser);
3844
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
3845
+ parser->_parser_state->_reprocess_current_token = true;
3846
+ return;
3847
+ }
3848
+ if (
3756
3849
  tag_in(token, kEndTag, &(const TagSet) {
3757
3850
  TAG(BODY), TAG(CAPTION), TAG(COL), TAG(COLGROUP), TAG(HTML),
3758
3851
  TAG(TD), TAG(TH)
@@ -3760,23 +3853,24 @@ static bool handle_in_row(GumboParser* parser, GumboToken* token) {
3760
3853
  ) {
3761
3854
  parser_add_parse_error(parser, token);
3762
3855
  ignore_token(parser);
3763
- return false;
3764
- } else {
3765
- return handle_in_table(parser, token);
3856
+ return;
3766
3857
  }
3858
+ handle_in_table(parser, token);
3767
3859
  }
3768
3860
 
3769
3861
  // https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-intd
3770
- static bool handle_in_cell(GumboParser* parser, GumboToken* token) {
3862
+ static void handle_in_cell(GumboParser* parser, GumboToken* token) {
3771
3863
  if (tag_in(token, kEndTag, &td_th_tags)) {
3772
3864
  GumboTag token_tag = token->v.end_tag.tag;
3773
3865
  if (!has_an_element_in_table_scope(parser, token_tag)) {
3774
3866
  parser_add_parse_error(parser, token);
3775
3867
  ignore_token(parser);
3776
- return false;
3868
+ return;
3777
3869
  }
3778
- return close_table_cell(parser, token, token_tag);
3779
- } else if (
3870
+ close_table_cell(parser, token, token_tag);
3871
+ return;
3872
+ }
3873
+ if (
3780
3874
  tag_in(token, kStartTag, &(const TagSet) {
3781
3875
  TAG(CAPTION), TAG(COL), TAG(COLGROUP), TAG(TBODY), TAG(TD),
3782
3876
  TAG(TFOOT), TAG(TH), TAG(THEAD), TAG(TR)
@@ -3790,19 +3884,22 @@ static bool handle_in_cell(GumboParser* parser, GumboToken* token) {
3790
3884
  gumbo_debug("Bailing out because there's no <td> or <th> in scope.\n");
3791
3885
  parser_add_parse_error(parser, token);
3792
3886
  ignore_token(parser);
3793
- return false;
3887
+ return;
3794
3888
  }
3795
3889
  parser->_parser_state->_reprocess_current_token = true;
3796
- return close_current_cell(parser, token);
3797
- } else if (
3890
+ close_current_cell(parser, token);
3891
+ return;
3892
+ }
3893
+ if (
3798
3894
  tag_in(token, kEndTag, &(const TagSet) {
3799
3895
  TAG(BODY), TAG(CAPTION), TAG(COL), TAG(COLGROUP), TAG(HTML)
3800
3896
  })
3801
3897
  ) {
3802
3898
  parser_add_parse_error(parser, token);
3803
3899
  ignore_token(parser);
3804
- return false;
3805
- } else if (
3900
+ return;
3901
+ }
3902
+ if (
3806
3903
  tag_in(token, kEndTag, &(const TagSet) {
3807
3904
  TAG(TABLE), TAG(TBODY), TAG(TFOOT), TAG(THEAD), TAG(TR)
3808
3905
  })
@@ -3810,43 +3907,50 @@ static bool handle_in_cell(GumboParser* parser, GumboToken* token) {
3810
3907
  if (!has_an_element_in_table_scope(parser, token->v.end_tag.tag)) {
3811
3908
  parser_add_parse_error(parser, token);
3812
3909
  ignore_token(parser);
3813
- return false;
3910
+ return;
3814
3911
  }
3815
3912
  parser->_parser_state->_reprocess_current_token = true;
3816
- return close_current_cell(parser, token);
3817
- } else {
3818
- return handle_in_body(parser, token);
3913
+ close_current_cell(parser, token);
3914
+ return;
3819
3915
  }
3916
+ handle_in_body(parser, token);
3820
3917
  }
3821
3918
 
3822
3919
  // https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inselect
3823
- static bool handle_in_select(GumboParser* parser, GumboToken* token) {
3920
+ static void handle_in_select(GumboParser* parser, GumboToken* token) {
3824
3921
  if (token->type == GUMBO_TOKEN_NULL) {
3825
3922
  parser_add_parse_error(parser, token);
3826
3923
  ignore_token(parser);
3827
- return false;
3828
- } else if (
3924
+ return;
3925
+ }
3926
+ if (
3829
3927
  token->type == GUMBO_TOKEN_CHARACTER
3830
3928
  || token->type == GUMBO_TOKEN_WHITESPACE
3831
3929
  ) {
3832
3930
  insert_text_token(parser, token);
3833
- return true;
3834
- } else if (token->type == GUMBO_TOKEN_DOCTYPE) {
3931
+ return;
3932
+ }
3933
+ if (token->type == GUMBO_TOKEN_COMMENT) {
3934
+ append_comment_node(parser, get_current_node(parser), token);
3935
+ return;
3936
+ }
3937
+ if (token->type == GUMBO_TOKEN_DOCTYPE) {
3835
3938
  parser_add_parse_error(parser, token);
3836
3939
  ignore_token(parser);
3837
- return false;
3838
- } else if (token->type == GUMBO_TOKEN_COMMENT) {
3839
- append_comment_node(parser, get_current_node(parser), token);
3840
- return true;
3841
- } else if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
3842
- return handle_in_body(parser, token);
3843
- } else if (tag_is(token, kStartTag, GUMBO_TAG_OPTION)) {
3940
+ return;
3941
+ }
3942
+ if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
3943
+ handle_in_body(parser, token);
3944
+ return;
3945
+ }
3946
+ if (tag_is(token, kStartTag, GUMBO_TAG_OPTION)) {
3844
3947
  if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)) {
3845
3948
  pop_current_node(parser);
3846
3949
  }
3847
3950
  insert_element_from_token(parser, token);
3848
- return true;
3849
- } else if (tag_is(token, kStartTag, GUMBO_TAG_OPTGROUP)) {
3951
+ return;
3952
+ }
3953
+ if (tag_is(token, kStartTag, GUMBO_TAG_OPTGROUP)) {
3850
3954
  if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)) {
3851
3955
  pop_current_node(parser);
3852
3956
  }
@@ -3854,8 +3958,9 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
3854
3958
  pop_current_node(parser);
3855
3959
  }
3856
3960
  insert_element_from_token(parser, token);
3857
- return true;
3858
- } else if (tag_is(token, kEndTag, GUMBO_TAG_OPTGROUP)) {
3961
+ return;
3962
+ }
3963
+ if (tag_is(token, kEndTag, GUMBO_TAG_OPTGROUP)) {
3859
3964
  GumboVector* open_elements = &parser->_parser_state->_open_elements;
3860
3965
  if (
3861
3966
  node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)
@@ -3868,37 +3973,39 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
3868
3973
  }
3869
3974
  if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTGROUP)) {
3870
3975
  pop_current_node(parser);
3871
- return true;
3872
- } else {
3873
- parser_add_parse_error(parser, token);
3874
- ignore_token(parser);
3875
- return false;
3976
+ return;
3876
3977
  }
3877
- } else if (tag_is(token, kEndTag, GUMBO_TAG_OPTION)) {
3978
+ parser_add_parse_error(parser, token);
3979
+ ignore_token(parser);
3980
+ return;
3981
+ }
3982
+ if (tag_is(token, kEndTag, GUMBO_TAG_OPTION)) {
3878
3983
  if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_OPTION)) {
3879
3984
  pop_current_node(parser);
3880
- return true;
3881
- } else {
3882
- parser_add_parse_error(parser, token);
3883
- ignore_token(parser);
3884
- return false;
3985
+ return;
3885
3986
  }
3886
- } else if (tag_is(token, kEndTag, GUMBO_TAG_SELECT)) {
3987
+ parser_add_parse_error(parser, token);
3988
+ ignore_token(parser);
3989
+ return;
3990
+ }
3991
+ if (tag_is(token, kEndTag, GUMBO_TAG_SELECT)) {
3887
3992
  if (!has_an_element_in_select_scope(parser, GUMBO_TAG_SELECT)) {
3888
3993
  parser_add_parse_error(parser, token);
3889
3994
  ignore_token(parser);
3890
- return false;
3995
+ return;
3891
3996
  }
3892
3997
  close_current_select(parser);
3893
- return true;
3894
- } else if (tag_is(token, kStartTag, GUMBO_TAG_SELECT)) {
3998
+ return;
3999
+ }
4000
+ if (tag_is(token, kStartTag, GUMBO_TAG_SELECT)) {
3895
4001
  parser_add_parse_error(parser, token);
3896
4002
  ignore_token(parser);
3897
4003
  if (has_an_element_in_select_scope(parser, GUMBO_TAG_SELECT)) {
3898
4004
  close_current_select(parser);
3899
4005
  }
3900
- return false;
3901
- } else if (
4006
+ return;
4007
+ }
4008
+ if (
3902
4009
  tag_in(token, kStartTag, &(const TagSet) {TAG(INPUT), TAG(KEYGEN), TAG(TEXTAREA)})
3903
4010
  ) {
3904
4011
  parser_add_parse_error(parser, token);
@@ -3908,23 +4015,25 @@ static bool handle_in_select(GumboParser* parser, GumboToken* token) {
3908
4015
  close_current_select(parser);
3909
4016
  parser->_parser_state->_reprocess_current_token = true;
3910
4017
  }
3911
- return false;
3912
- } else if (
4018
+ return;
4019
+ }
4020
+ if (
3913
4021
  tag_in(token, kStartTag, &(const TagSet){TAG(SCRIPT), TAG(TEMPLATE)})
3914
4022
  || tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)
3915
4023
  ) {
3916
- return handle_in_head(parser, token);
3917
- } else if (token->type == GUMBO_TOKEN_EOF) {
3918
- return handle_in_body(parser, token);
3919
- } else {
3920
- parser_add_parse_error(parser, token);
3921
- ignore_token(parser);
3922
- return false;
4024
+ handle_in_head(parser, token);
4025
+ return;
3923
4026
  }
4027
+ if (token->type == GUMBO_TOKEN_EOF) {
4028
+ handle_in_body(parser, token);
4029
+ return;
4030
+ }
4031
+ parser_add_parse_error(parser, token);
4032
+ ignore_token(parser);
3924
4033
  }
3925
4034
 
3926
4035
  // https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inselectintable
3927
- static bool handle_in_select_in_table(GumboParser* parser, GumboToken* token) {
4036
+ static void handle_in_select_in_table(GumboParser* parser, GumboToken* token) {
3928
4037
  static const TagSet tags = {
3929
4038
  TAG(CAPTION), TAG(TABLE), TAG(TBODY), TAG(TFOOT), TAG(THEAD),
3930
4039
  TAG(TR), TAG(TD), TAG(TH)
@@ -3933,27 +4042,23 @@ static bool handle_in_select_in_table(GumboParser* parser, GumboToken* token) {
3933
4042
  parser_add_parse_error(parser, token);
3934
4043
  close_current_select(parser);
3935
4044
  parser->_parser_state->_reprocess_current_token = true;
3936
- return false;
3937
- } else if (tag_in(token, kEndTag, &tags)) {
4045
+ return;
4046
+ }
4047
+ if (tag_in(token, kEndTag, &tags)) {
3938
4048
  parser_add_parse_error(parser, token);
3939
4049
  if (!has_an_element_in_table_scope(parser, token->v.end_tag.tag)) {
3940
4050
  ignore_token(parser);
3941
- return false;
3942
- } else {
3943
- close_current_select(parser);
3944
- // close_current_select already does the
3945
- // reset_insertion_mode_appropriately
3946
- // reset_insertion_mode_appropriately(parser);
3947
- parser->_parser_state->_reprocess_current_token = true;
3948
- return false;
4051
+ return;
3949
4052
  }
3950
- } else {
3951
- return handle_in_select(parser, token);
4053
+ close_current_select(parser);
4054
+ parser->_parser_state->_reprocess_current_token = true;
4055
+ return;
3952
4056
  }
4057
+ handle_in_select(parser, token);
3953
4058
  }
3954
4059
 
3955
4060
  // https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-intemplate
3956
- static bool handle_in_template(GumboParser* parser, GumboToken* token) {
4061
+ static void handle_in_template(GumboParser* parser, GumboToken* token) {
3957
4062
  GumboParserState* state = parser->_parser_state;
3958
4063
  switch (token->type) {
3959
4064
  case GUMBO_TOKEN_WHITESPACE:
@@ -3961,7 +4066,8 @@ static bool handle_in_template(GumboParser* parser, GumboToken* token) {
3961
4066
  case GUMBO_TOKEN_COMMENT:
3962
4067
  case GUMBO_TOKEN_NULL:
3963
4068
  case GUMBO_TOKEN_DOCTYPE:
3964
- return handle_in_body(parser, token);
4069
+ handle_in_body(parser, token);
4070
+ return;
3965
4071
  default:
3966
4072
  break;
3967
4073
  }
@@ -3972,8 +4078,10 @@ static bool handle_in_template(GumboParser* parser, GumboToken* token) {
3972
4078
  })
3973
4079
  || tag_is(token, kEndTag, GUMBO_TAG_TEMPLATE)
3974
4080
  ) {
3975
- return handle_in_head(parser, token);
3976
- } else if (
4081
+ handle_in_head(parser, token);
4082
+ return;
4083
+ }
4084
+ if (
3977
4085
  tag_in(token, kStartTag, &(const TagSet) {
3978
4086
  TAG(CAPTION), TAG(COLGROUP), TAG(TBODY), TAG(TFOOT), TAG(THEAD)
3979
4087
  })
@@ -3982,39 +4090,45 @@ static bool handle_in_template(GumboParser* parser, GumboToken* token) {
3982
4090
  push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
3983
4091
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE);
3984
4092
  state->_reprocess_current_token = true;
3985
- return true;
3986
- } else if (tag_is(token, kStartTag, GUMBO_TAG_COL)) {
4093
+ return;
4094
+ }
4095
+ if (tag_is(token, kStartTag, GUMBO_TAG_COL)) {
3987
4096
  pop_template_insertion_mode(parser);
3988
4097
  push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_COLUMN_GROUP);
3989
4098
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_COLUMN_GROUP);
3990
4099
  state->_reprocess_current_token = true;
3991
- return true;
3992
- } else if (tag_is(token, kStartTag, GUMBO_TAG_TR)) {
4100
+ return;
4101
+ }
4102
+ if (tag_is(token, kStartTag, GUMBO_TAG_TR)) {
3993
4103
  pop_template_insertion_mode(parser);
3994
4104
  push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
3995
4105
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_TABLE_BODY);
3996
4106
  state->_reprocess_current_token = true;
3997
- return true;
3998
- } else if (tag_in(token, kStartTag, &td_th_tags)) {
4107
+ return;
4108
+ }
4109
+ if (tag_in(token, kStartTag, &td_th_tags)) {
3999
4110
  pop_template_insertion_mode(parser);
4000
4111
  push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
4001
4112
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_ROW);
4002
4113
  state->_reprocess_current_token = true;
4003
- return true;
4004
- } else if (token->type == GUMBO_TOKEN_START_TAG) {
4114
+ return;
4115
+ }
4116
+ if (token->type == GUMBO_TOKEN_START_TAG) {
4005
4117
  pop_template_insertion_mode(parser);
4006
4118
  push_template_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_BODY);
4007
4119
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_BODY);
4008
4120
  state->_reprocess_current_token = true;
4009
- return true;
4010
- } else if (token->type == GUMBO_TOKEN_END_TAG) {
4121
+ return;
4122
+ }
4123
+ if (token->type == GUMBO_TOKEN_END_TAG) {
4011
4124
  parser_add_parse_error(parser, token);
4012
4125
  ignore_token(parser);
4013
- return false;
4014
- } else if (token->type == GUMBO_TOKEN_EOF) {
4126
+ return;
4127
+ }
4128
+ if (token->type == GUMBO_TOKEN_EOF) {
4015
4129
  if (!has_open_element(parser, GUMBO_TAG_TEMPLATE)) {
4016
4130
  // Stop parsing.
4017
- return true;
4131
+ return;
4018
4132
  }
4019
4133
  parser_add_parse_error(parser, token);
4020
4134
  while (!node_html_tag_is(pop_current_node(parser), GUMBO_TAG_TEMPLATE))
@@ -4023,35 +4137,41 @@ static bool handle_in_template(GumboParser* parser, GumboToken* token) {
4023
4137
  pop_template_insertion_mode(parser);
4024
4138
  reset_insertion_mode_appropriately(parser);
4025
4139
  state->_reprocess_current_token = true;
4026
- return false;
4027
- } else {
4028
- assert(0);
4029
- return false;
4140
+ return;
4030
4141
  }
4142
+ assert(0 && "unreachable");
4031
4143
  }
4032
4144
 
4033
4145
  // https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-afterbody
4034
- static bool handle_after_body(GumboParser* parser, GumboToken* token) {
4146
+ static void handle_after_body(GumboParser* parser, GumboToken* token) {
4035
4147
  if (
4036
4148
  token->type == GUMBO_TOKEN_WHITESPACE
4037
4149
  || tag_is(token, kStartTag, GUMBO_TAG_HTML)
4038
4150
  ) {
4039
- return handle_in_body(parser, token);
4040
- } else if (token->type == GUMBO_TOKEN_COMMENT) {
4151
+ handle_in_body(parser, token);
4152
+ return;
4153
+ }
4154
+ if (token->type == GUMBO_TOKEN_COMMENT) {
4041
4155
  GumboNode* html_node = parser->_output->root;
4042
4156
  assert(html_node != NULL);
4043
4157
  append_comment_node(parser, html_node, token);
4044
- return true;
4045
- } else if (token->type == GUMBO_TOKEN_DOCTYPE) {
4158
+ return;
4159
+ }
4160
+ if (token->type == GUMBO_TOKEN_DOCTYPE) {
4046
4161
  parser_add_parse_error(parser, token);
4047
4162
  ignore_token(parser);
4048
- return false;
4049
- } else if (tag_is(token, kEndTag, GUMBO_TAG_HTML)) {
4163
+ return;
4164
+ }
4165
+ if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
4166
+ handle_in_body(parser, token);
4167
+ return;
4168
+ }
4169
+ if (tag_is(token, kEndTag, GUMBO_TAG_HTML)) {
4050
4170
  /* fragment case: ignore the closing HTML token */
4051
4171
  if (is_fragment_parser(parser)) {
4052
4172
  parser_add_parse_error(parser, token);
4053
4173
  ignore_token(parser);
4054
- return false;
4174
+ return;
4055
4175
  }
4056
4176
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_AFTER_BODY);
4057
4177
  GumboNode* html = parser->_parser_state->_open_elements.data[0];
@@ -4060,39 +4180,44 @@ static bool handle_after_body(GumboParser* parser, GumboToken* token) {
4060
4180
  parser->_parser_state->_current_token,
4061
4181
  &html->v.element
4062
4182
  );
4063
- return true;
4064
- } else if (token->type == GUMBO_TOKEN_EOF) {
4065
- return true;
4066
- } else {
4067
- parser_add_parse_error(parser, token);
4068
- set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_BODY);
4069
- parser->_parser_state->_reprocess_current_token = true;
4070
- return false;
4183
+ return;
4184
+ }
4185
+ if (token->type == GUMBO_TOKEN_EOF) {
4186
+ return;
4071
4187
  }
4188
+ parser_add_parse_error(parser, token);
4189
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_BODY);
4190
+ parser->_parser_state->_reprocess_current_token = true;
4072
4191
  }
4073
4192
 
4074
4193
  // https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inframeset
4075
- static bool handle_in_frameset(GumboParser* parser, GumboToken* token) {
4194
+ static void handle_in_frameset(GumboParser* parser, GumboToken* token) {
4076
4195
  if (token->type == GUMBO_TOKEN_WHITESPACE) {
4077
4196
  insert_text_token(parser, token);
4078
- return true;
4079
- } else if (token->type == GUMBO_TOKEN_COMMENT) {
4197
+ return;
4198
+ }
4199
+ if (token->type == GUMBO_TOKEN_COMMENT) {
4080
4200
  append_comment_node(parser, get_current_node(parser), token);
4081
- return true;
4082
- } else if (token->type == GUMBO_TOKEN_DOCTYPE) {
4201
+ return;
4202
+ }
4203
+ if (token->type == GUMBO_TOKEN_DOCTYPE) {
4083
4204
  parser_add_parse_error(parser, token);
4084
4205
  ignore_token(parser);
4085
- return false;
4086
- } else if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
4087
- return handle_in_body(parser, token);
4088
- } else if (tag_is(token, kStartTag, GUMBO_TAG_FRAMESET)) {
4206
+ return;
4207
+ }
4208
+ if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
4209
+ handle_in_body(parser, token);
4210
+ return;
4211
+ }
4212
+ if (tag_is(token, kStartTag, GUMBO_TAG_FRAMESET)) {
4089
4213
  insert_element_from_token(parser, token);
4090
- return true;
4091
- } else if (tag_is(token, kEndTag, GUMBO_TAG_FRAMESET)) {
4214
+ return;
4215
+ }
4216
+ if (tag_is(token, kEndTag, GUMBO_TAG_FRAMESET)) {
4092
4217
  if (node_html_tag_is(get_current_node(parser), GUMBO_TAG_HTML)) {
4093
4218
  parser_add_parse_error(parser, token);
4094
4219
  ignore_token(parser);
4095
- return false;
4220
+ return;
4096
4221
  }
4097
4222
  pop_current_node(parser);
4098
4223
  if (
@@ -4101,42 +4226,47 @@ static bool handle_in_frameset(GumboParser* parser, GumboToken* token) {
4101
4226
  ) {
4102
4227
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_FRAMESET);
4103
4228
  }
4104
- return true;
4105
- } else if (tag_is(token, kStartTag, GUMBO_TAG_FRAME)) {
4229
+ return;
4230
+ }
4231
+ if (tag_is(token, kStartTag, GUMBO_TAG_FRAME)) {
4106
4232
  insert_element_from_token(parser, token);
4107
4233
  pop_current_node(parser);
4108
4234
  acknowledge_self_closing_tag(parser);
4109
- return true;
4110
- } else if (tag_is(token, kStartTag, GUMBO_TAG_NOFRAMES)) {
4111
- return handle_in_head(parser, token);
4112
- } else if (token->type == GUMBO_TOKEN_EOF) {
4113
- if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_HTML)) {
4235
+ return;
4236
+ }
4237
+ if (tag_is(token, kStartTag, GUMBO_TAG_NOFRAMES)) {
4238
+ handle_in_head(parser, token);
4239
+ return;
4240
+ }
4241
+ if (token->type == GUMBO_TOKEN_EOF) {
4242
+ if (!node_html_tag_is(get_current_node(parser), GUMBO_TAG_HTML))
4114
4243
  parser_add_parse_error(parser, token);
4115
- return false;
4116
- }
4117
- return true;
4118
- } else {
4119
- parser_add_parse_error(parser, token);
4120
- ignore_token(parser);
4121
- return false;
4244
+ return;
4122
4245
  }
4246
+ parser_add_parse_error(parser, token);
4247
+ ignore_token(parser);
4123
4248
  }
4124
4249
 
4125
4250
  // https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-afterframeset
4126
- static bool handle_after_frameset(GumboParser* parser, GumboToken* token) {
4251
+ static void handle_after_frameset(GumboParser* parser, GumboToken* token) {
4127
4252
  if (token->type == GUMBO_TOKEN_WHITESPACE) {
4128
4253
  insert_text_token(parser, token);
4129
- return true;
4130
- } else if (token->type == GUMBO_TOKEN_COMMENT) {
4254
+ return;
4255
+ }
4256
+ if (token->type == GUMBO_TOKEN_COMMENT) {
4131
4257
  append_comment_node(parser, get_current_node(parser), token);
4132
- return true;
4133
- } else if (token->type == GUMBO_TOKEN_DOCTYPE) {
4258
+ return;
4259
+ }
4260
+ if (token->type == GUMBO_TOKEN_DOCTYPE) {
4134
4261
  parser_add_parse_error(parser, token);
4135
4262
  ignore_token(parser);
4136
- return false;
4137
- } else if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
4138
- return handle_in_body(parser, token);
4139
- } else if (tag_is(token, kEndTag, GUMBO_TAG_HTML)) {
4263
+ return;
4264
+ }
4265
+ if (tag_is(token, kStartTag, GUMBO_TAG_HTML)) {
4266
+ handle_in_body(parser, token);
4267
+ return;
4268
+ }
4269
+ if (tag_is(token, kEndTag, GUMBO_TAG_HTML)) {
4140
4270
  GumboNode* html = parser->_parser_state->_open_elements.data[0];
4141
4271
  assert(node_html_tag_is(html, GUMBO_TAG_HTML));
4142
4272
  record_end_of_element (
@@ -4144,67 +4274,71 @@ static bool handle_after_frameset(GumboParser* parser, GumboToken* token) {
4144
4274
  &html->v.element
4145
4275
  );
4146
4276
  set_insertion_mode(parser, GUMBO_INSERTION_MODE_AFTER_AFTER_FRAMESET);
4147
- return true;
4148
- } else if (tag_is(token, kStartTag, GUMBO_TAG_NOFRAMES)) {
4277
+ return;
4278
+ }
4279
+ if (tag_is(token, kStartTag, GUMBO_TAG_NOFRAMES)) {
4149
4280
  return handle_in_head(parser, token);
4150
- } else if (token->type == GUMBO_TOKEN_EOF) {
4151
- return true;
4152
- } else {
4153
- parser_add_parse_error(parser, token);
4154
- ignore_token(parser);
4155
- return false;
4156
4281
  }
4282
+ if (token->type == GUMBO_TOKEN_EOF) {
4283
+ return;
4284
+ }
4285
+ parser_add_parse_error(parser, token);
4286
+ ignore_token(parser);
4157
4287
  }
4158
4288
 
4159
4289
  // https://html.spec.whatwg.org/multipage/parsing.html#the-after-after-body-insertion-mode
4160
- static bool handle_after_after_body(GumboParser* parser, GumboToken* token) {
4290
+ static void handle_after_after_body(GumboParser* parser, GumboToken* token) {
4161
4291
  if (token->type == GUMBO_TOKEN_COMMENT) {
4162
4292
  append_comment_node(parser, get_document_node(parser), token);
4163
- return true;
4164
- } else if (
4293
+ return;
4294
+ }
4295
+ if (
4165
4296
  token->type == GUMBO_TOKEN_DOCTYPE
4166
4297
  || token->type == GUMBO_TOKEN_WHITESPACE
4167
4298
  || tag_is(token, kStartTag, GUMBO_TAG_HTML)
4168
4299
  ) {
4169
- return handle_in_body(parser, token);
4170
- } else if (token->type == GUMBO_TOKEN_EOF) {
4171
- return true;
4172
- } else {
4173
- parser_add_parse_error(parser, token);
4174
- set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_BODY);
4175
- parser->_parser_state->_reprocess_current_token = true;
4176
- return false;
4300
+ handle_in_body(parser, token);
4301
+ return;
4177
4302
  }
4303
+ if (token->type == GUMBO_TOKEN_EOF) {
4304
+ return;
4305
+ }
4306
+ parser_add_parse_error(parser, token);
4307
+ set_insertion_mode(parser, GUMBO_INSERTION_MODE_IN_BODY);
4308
+ parser->_parser_state->_reprocess_current_token = true;
4178
4309
  }
4179
4310
 
4180
4311
  // https://html.spec.whatwg.org/multipage/parsing.html#the-after-after-frameset-insertion-mode
4181
- static bool handle_after_after_frameset (
4312
+ static void handle_after_after_frameset (
4182
4313
  GumboParser* parser,
4183
4314
  GumboToken* token
4184
4315
  ) {
4185
4316
  if (token->type == GUMBO_TOKEN_COMMENT) {
4186
4317
  append_comment_node(parser, get_document_node(parser), token);
4187
- return true;
4188
- } else if (
4318
+ return;
4319
+ }
4320
+ if (
4189
4321
  token->type == GUMBO_TOKEN_DOCTYPE
4190
4322
  || token->type == GUMBO_TOKEN_WHITESPACE
4191
4323
  || tag_is(token, kStartTag, GUMBO_TAG_HTML)
4192
4324
  ) {
4193
- return handle_in_body(parser, token);
4194
- } else if (token->type == GUMBO_TOKEN_EOF) {
4195
- return true;
4196
- } else if (tag_is(token, kStartTag, GUMBO_TAG_NOFRAMES)) {
4197
- return handle_in_head(parser, token);
4198
- } else {
4199
- parser_add_parse_error(parser, token);
4200
- ignore_token(parser);
4201
- return false;
4325
+ handle_in_body(parser, token);
4326
+ return;
4202
4327
  }
4328
+ if (token->type == GUMBO_TOKEN_EOF) {
4329
+ return;
4330
+ }
4331
+ if (tag_is(token, kStartTag, GUMBO_TAG_NOFRAMES)) {
4332
+ handle_in_head(parser, token);
4333
+ return;
4334
+ }
4335
+ parser_add_parse_error(parser, token);
4336
+ ignore_token(parser);
4203
4337
  }
4204
4338
 
4205
4339
  // Function pointers for each insertion mode.
4206
4340
  // Keep in sync with insertion_mode.h.
4207
- typedef bool (*TokenHandler)(GumboParser* parser, GumboToken* token);
4341
+ typedef void (*TokenHandler)(GumboParser* parser, GumboToken* token);
4208
4342
  static const TokenHandler kTokenHandlers[] = {
4209
4343
  handle_initial,
4210
4344
  handle_before_html,
@@ -4231,36 +4365,36 @@ static const TokenHandler kTokenHandlers[] = {
4231
4365
  handle_after_after_frameset
4232
4366
  };
4233
4367
 
4234
- static bool handle_html_content(GumboParser* parser, GumboToken* token) {
4368
+ static void handle_html_content(GumboParser* parser, GumboToken* token) {
4235
4369
  const GumboInsertionMode mode = parser->_parser_state->_insertion_mode;
4236
4370
  const TokenHandler handler = kTokenHandlers[mode];
4237
- return handler(parser, token);
4371
+ handler(parser, token);
4238
4372
  }
4239
4373
 
4240
4374
  // https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inforeign
4241
- static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
4375
+ static void handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
4242
4376
  gumbo_debug("Handling foreign content");
4243
4377
  switch (token->type) {
4244
4378
  case GUMBO_TOKEN_NULL:
4245
4379
  parser_add_parse_error(parser, token);
4246
4380
  token->v.character = kUtf8ReplacementChar;
4247
4381
  insert_text_token(parser, token);
4248
- return false;
4382
+ return;
4249
4383
  case GUMBO_TOKEN_WHITESPACE:
4250
4384
  insert_text_token(parser, token);
4251
- return true;
4385
+ return;
4252
4386
  case GUMBO_TOKEN_CDATA:
4253
4387
  case GUMBO_TOKEN_CHARACTER:
4254
4388
  insert_text_token(parser, token);
4255
4389
  set_frameset_not_ok(parser);
4256
- return true;
4390
+ return;
4257
4391
  case GUMBO_TOKEN_COMMENT:
4258
4392
  append_comment_node(parser, get_current_node(parser), token);
4259
- return true;
4393
+ return;
4260
4394
  case GUMBO_TOKEN_DOCTYPE:
4261
4395
  parser_add_parse_error(parser, token);
4262
4396
  ignore_token(parser);
4263
- return false;
4397
+ return;
4264
4398
  default:
4265
4399
  // Fall through to the if-statements below.
4266
4400
  break;
@@ -4304,10 +4438,9 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
4304
4438
  )
4305
4439
  );
4306
4440
  parser->_parser_state->_reprocess_current_token = true;
4307
- return false;
4441
+ return;
4308
4442
  }
4309
-
4310
- assert(token->type == GUMBO_TOKEN_START_TAG);
4443
+ // This is a start tag so the next if's then branch will be taken.
4311
4444
  }
4312
4445
 
4313
4446
  if (token->type == GUMBO_TOKEN_START_TAG) {
@@ -4326,63 +4459,59 @@ static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
4326
4459
  pop_current_node(parser);
4327
4460
  acknowledge_self_closing_tag(parser);
4328
4461
  }
4329
- return true;
4462
+ return;
4330
4463
  // </script> tags are handled like any other end tag, putting the script's
4331
4464
  // text into a text node child and closing the current node.
4332
- } else {
4333
- assert(token->type == GUMBO_TOKEN_END_TAG);
4334
- GumboNode* node = get_current_node(parser);
4335
- GumboTag tag = token->v.end_tag.tag;
4336
- const char* name = token->v.end_tag.name;
4337
- assert(node != NULL);
4465
+ }
4466
+ assert(token->type == GUMBO_TOKEN_END_TAG);
4467
+ GumboNode* node = get_current_node(parser);
4468
+ GumboTag tag = token->v.end_tag.tag;
4469
+ const char* name = token->v.end_tag.name;
4470
+ assert(node != NULL);
4338
4471
 
4339
- bool is_success = true;
4340
- if (!node_tagname_is(node, tag, name)) {
4341
- parser_add_parse_error(parser, token);
4342
- is_success = false;
4343
- }
4344
- int i = parser->_parser_state->_open_elements.length;
4345
- for (--i; i > 0;) {
4346
- // Here we move up the stack until we find an HTML element (in which
4347
- // case we do nothing) or we find the element that we're about to
4348
- // close (in which case we pop everything we've seen until that
4349
- // point.)
4350
- gumbo_debug("Foreign %s node at %d.\n", node->v.element.name, i);
4351
- if (node_tagname_is(node, tag, name)) {
4352
- gumbo_debug("Matches.\n");
4353
- while (node != pop_current_node(parser)) {
4354
- // Pop all the nodes below the current one. Node is guaranteed to
4355
- // be an element on the stack of open elements (set below), so
4356
- // this loop is guaranteed to terminate.
4357
- }
4358
- return is_success;
4359
- }
4360
- --i;
4361
- node = parser->_parser_state->_open_elements.data[i];
4362
- if (node->v.element.tag_namespace == GUMBO_NAMESPACE_HTML) {
4363
- // The loop continues only in foreign namespaces.
4364
- break;
4472
+ if (!node_tagname_is(node, tag, name))
4473
+ parser_add_parse_error(parser, token);
4474
+ int i = parser->_parser_state->_open_elements.length;
4475
+ for (--i; i > 0;) {
4476
+ // Here we move up the stack until we find an HTML element (in which
4477
+ // case we do nothing) or we find the element that we're about to
4478
+ // close (in which case we pop everything we've seen until that
4479
+ // point.)
4480
+ gumbo_debug("Foreign %s node at %d.\n", node->v.element.name, i);
4481
+ if (node_tagname_is(node, tag, name)) {
4482
+ gumbo_debug("Matches.\n");
4483
+ while (node != pop_current_node(parser)) {
4484
+ // Pop all the nodes below the current one. Node is guaranteed to
4485
+ // be an element on the stack of open elements (set below), so
4486
+ // this loop is guaranteed to terminate.
4365
4487
  }
4488
+ return;
4489
+ }
4490
+ --i;
4491
+ node = parser->_parser_state->_open_elements.data[i];
4492
+ if (node->v.element.tag_namespace == GUMBO_NAMESPACE_HTML) {
4493
+ // The loop continues only in foreign namespaces.
4494
+ break;
4366
4495
  }
4367
- assert(node->v.element.tag_namespace == GUMBO_NAMESPACE_HTML);
4368
- if (i == 0)
4369
- return is_success;
4370
- // We can't call handle_token directly because the current node is still in
4371
- // a foriegn namespace, so it would re-enter this and result in infinite
4372
- // recursion.
4373
- return handle_html_content(parser, token) && is_success;
4374
4496
  }
4497
+ assert(node->v.element.tag_namespace == GUMBO_NAMESPACE_HTML);
4498
+ if (i == 0)
4499
+ return;
4500
+ // We can't call handle_token directly because the current node is still in
4501
+ // a foriegn namespace, so it would re-enter this and result in infinite
4502
+ // recursion.
4503
+ handle_html_content(parser, token);
4375
4504
  }
4376
4505
 
4377
4506
  // https://html.spec.whatwg.org/multipage/parsing.html#tree-construction
4378
- static bool handle_token(GumboParser* parser, GumboToken* token) {
4507
+ static void handle_token(GumboParser* parser, GumboToken* token) {
4379
4508
  if (
4380
4509
  parser->_parser_state->_ignore_next_linefeed
4381
4510
  && token->type == GUMBO_TOKEN_WHITESPACE && token->v.character == '\n'
4382
4511
  ) {
4383
4512
  parser->_parser_state->_ignore_next_linefeed = false;
4384
4513
  ignore_token(parser);
4385
- return true;
4514
+ return;
4386
4515
  }
4387
4516
  // This needs to be reset both here and in the conditional above to catch both
4388
4517
  // the case where the next token is not whitespace (so we don't ignore
@@ -4424,9 +4553,9 @@ static bool handle_token(GumboParser* parser, GumboToken* token) {
4424
4553
  token->type == GUMBO_TOKEN_NULL ||
4425
4554
  token->type == GUMBO_TOKEN_WHITESPACE)) ||
4426
4555
  token->type == GUMBO_TOKEN_EOF) {
4427
- return handle_html_content(parser, token);
4556
+ handle_html_content(parser, token);
4428
4557
  } else {
4429
- return handle_in_foreign_content(parser, token);
4558
+ handle_in_foreign_content(parser, token);
4430
4559
  }
4431
4560
  }
4432
4561
 
@@ -4517,7 +4646,7 @@ static void fragment_parser_init (
4517
4646
  break;
4518
4647
 
4519
4648
  case GUMBO_TAG_SCRIPT:
4520
- gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT);
4649
+ gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_DATA);
4521
4650
  break;
4522
4651
 
4523
4652
  case GUMBO_TAG_NOSCRIPT:
@@ -4554,7 +4683,7 @@ static void fragment_parser_init (
4554
4683
  // 11.
4555
4684
  if (ctx_has_form_ancestor
4556
4685
  || (ctx_tag == GUMBO_TAG_FORM
4557
- && fragment_namespace == GUMBO_NAMESPACE_HTML)) {
4686
+ && fragment_namespace == GUMBO_NAMESPACE_HTML)) {
4558
4687
  static const GumboNode form_ancestor = {
4559
4688
  .type = GUMBO_NODE_ELEMENT,
4560
4689
  .parent = NULL,
@@ -4613,19 +4742,18 @@ GumboOutput* gumbo_parse_with_options (
4613
4742
 
4614
4743
  const unsigned int max_tree_depth = options->max_tree_depth;
4615
4744
  GumboToken token;
4616
- bool has_error = false;
4617
4745
 
4618
4746
  do {
4619
4747
  if (state->_reprocess_current_token) {
4620
4748
  state->_reprocess_current_token = false;
4621
4749
  } else {
4622
- GumboNode* current_node = get_current_node(&parser);
4623
- gumbo_tokenizer_set_is_current_node_foreign (
4750
+ GumboNode* adjusted_current_node = get_adjusted_current_node(&parser);
4751
+ gumbo_tokenizer_set_is_adjusted_current_node_foreign (
4624
4752
  &parser,
4625
- current_node &&
4626
- current_node->v.element.tag_namespace != GUMBO_NAMESPACE_HTML
4753
+ adjusted_current_node &&
4754
+ adjusted_current_node->v.element.tag_namespace != GUMBO_NAMESPACE_HTML
4627
4755
  );
4628
- has_error = !gumbo_lex(&parser, &token) || has_error;
4756
+ gumbo_lex(&parser, &token);
4629
4757
  }
4630
4758
 
4631
4759
  const char* token_type = "text";
@@ -4649,17 +4777,17 @@ GumboOutput* gumbo_parse_with_options (
4649
4777
  break;
4650
4778
  }
4651
4779
  gumbo_debug (
4652
- "Handling %s token @%zu:%zu in state %u.\n",
4780
+ "Handling %s token @%lu:%lu in state %u.\n",
4653
4781
  (char*) token_type,
4654
- token.position.line,
4655
- token.position.column,
4782
+ (unsigned long)token.position.line,
4783
+ (unsigned long)token.position.column,
4656
4784
  state->_insertion_mode
4657
4785
  );
4658
4786
 
4659
4787
  state->_current_token = &token;
4660
4788
  state->_self_closing_flag_acknowledged = false;
4661
4789
 
4662
- has_error = !handle_token(&parser, &token) || has_error;
4790
+ handle_token(&parser, &token);
4663
4791
 
4664
4792
  // Check for memory leaks when ownership is transferred from start tag
4665
4793
  // tokens to nodes.
@@ -4671,19 +4799,25 @@ GumboOutput* gumbo_parse_with_options (
4671
4799
  );
4672
4800
 
4673
4801
  if (!state->_reprocess_current_token) {
4802
+ // If we're done with the token, check for unacknowledged self-closing
4803
+ // flags on start tags.
4674
4804
  if (token.type == GUMBO_TOKEN_START_TAG &&
4675
4805
  token.v.start_tag.is_self_closing &&
4676
4806
  !state->_self_closing_flag_acknowledged) {
4677
- GumboError* error = parser_add_parse_error(&parser, &token);
4678
- if (error)
4679
- error->type = GUMBO_ERR_UNACKNOWLEDGED_SELF_CLOSING_TAG;
4807
+ GumboError* error = gumbo_add_error(&parser);
4808
+ if (error) {
4809
+ // This is essentially a tokenizer error that's only caught during
4810
+ // tree construction.
4811
+ error->type = GUMBO_ERR_NON_VOID_HTML_ELEMENT_START_TAG_WITH_TRAILING_SOLIDUS;
4812
+ error->original_text = token.original_text;
4813
+ error->position = token.position;
4814
+ }
4680
4815
  }
4816
+ // Make sure we free the end tag's name since it doesn't get transferred
4817
+ // to a token.
4681
4818
  if (token.type == GUMBO_TOKEN_END_TAG &&
4682
- token.v.end_tag.is_self_closing) {
4683
- GumboError* error = parser_add_parse_error(&parser, &token);
4684
- if (error)
4685
- error->type = GUMBO_ERR_SELF_CLOSING_END_TAG;
4686
- }
4819
+ token.v.end_tag.tag == GUMBO_TAG_UNKNOWN)
4820
+ gumbo_free(token.v.end_tag.name);
4687
4821
  }
4688
4822
 
4689
4823
  if (unlikely(state->_open_elements.length > max_tree_depth)) {
@@ -4697,7 +4831,7 @@ GumboOutput* gumbo_parse_with_options (
4697
4831
 
4698
4832
  } while (
4699
4833
  (token.type != GUMBO_TOKEN_EOF || state->_reprocess_current_token)
4700
- && !(options->stop_on_first_error && has_error)
4834
+ && !(options->stop_on_first_error && parser._output->document_error)
4701
4835
  );
4702
4836
 
4703
4837
  finish_parsing(&parser);
@@ -4725,6 +4859,8 @@ const char* gumbo_status_to_string(GumboOutputStatus status) {
4725
4859
  return "OK";
4726
4860
  case GUMBO_STATUS_OUT_OF_MEMORY:
4727
4861
  return "System allocator returned NULL during parsing";
4862
+ case GUMBO_STATUS_TOO_MANY_ATTRIBUTES:
4863
+ return "Attributes per element limit exceeded";
4728
4864
  case GUMBO_STATUS_TREE_TOO_DEEP:
4729
4865
  return "Document tree depth limit exceeded";
4730
4866
  default: