RubyGems - nokogumbo - Versions diffs - 1.5.0 → 2.0.0.pre.alpha - Mend

nokogumbo 1.5.0 → 2.0.0.pre.alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +56 -0
data/README.md +146 -22
data/ext/nokogumbo/extconf.rb +116 -0
data/ext/{nokogumboc → nokogumbo}/nokogumbo.c +174 -71
data/gumbo-parser/src/ascii.c +33 -0
data/gumbo-parser/src/ascii.h +31 -0
data/gumbo-parser/src/attribute.c +26 -28
data/gumbo-parser/src/attribute.h +3 -23
data/gumbo-parser/src/char_ref.c +135 -2351
data/gumbo-parser/src/char_ref.h +13 -29
data/gumbo-parser/src/error.c +215 -133
data/gumbo-parser/src/error.h +34 -49
data/gumbo-parser/src/foreign_attrs.c +104 -0
data/gumbo-parser/src/gumbo.h +506 -304
data/gumbo-parser/src/insertion_mode.h +4 -28
data/gumbo-parser/src/macros.h +91 -0
data/gumbo-parser/src/parser.c +1989 -1431
data/gumbo-parser/src/parser.h +6 -22
data/gumbo-parser/src/replacement.h +33 -0
data/gumbo-parser/src/string_buffer.c +43 -50
data/gumbo-parser/src/string_buffer.h +24 -40
data/gumbo-parser/src/string_piece.c +39 -39
data/gumbo-parser/src/svg_attrs.c +174 -0
data/gumbo-parser/src/svg_tags.c +137 -0
data/gumbo-parser/src/tag.c +186 -59
data/gumbo-parser/src/tag_lookup.c +382 -0
data/gumbo-parser/src/tag_lookup.h +13 -0
data/gumbo-parser/src/token_type.h +1 -25
data/gumbo-parser/src/tokenizer.c +899 -495
data/gumbo-parser/src/tokenizer.h +37 -37
data/gumbo-parser/src/tokenizer_states.h +6 -22
data/gumbo-parser/src/utf8.c +103 -86
data/gumbo-parser/src/utf8.h +37 -41
data/gumbo-parser/src/util.c +48 -38
data/gumbo-parser/src/util.h +10 -40
data/gumbo-parser/src/vector.c +45 -57
data/gumbo-parser/src/vector.h +17 -39
data/lib/nokogumbo.rb +10 -174
data/lib/nokogumbo/html5.rb +250 -0
data/lib/nokogumbo/html5/document.rb +37 -0
data/lib/nokogumbo/html5/document_fragment.rb +46 -0
data/lib/nokogumbo/version.rb +3 -0
data/lib/nokogumbo/xml/node.rb +57 -0
metadata +32 -19
data/ext/nokogumboc/extconf.rb +0 -60
data/gumbo-parser/src/char_ref.rl +0 -2554
data/gumbo-parser/src/string_piece.h +0 -38
data/gumbo-parser/src/tag.in +0 -150
data/gumbo-parser/src/tag_enum.h +0 -153
data/gumbo-parser/src/tag_gperf.h +0 -105
data/gumbo-parser/src/tag_sizes.h +0 -4
data/gumbo-parser/src/tag_strings.h +0 -153
data/gumbo-parser/visualc/include/strings.h +0 -4
data/test-nokogumbo.rb +0 -190

data/gumbo-parser/src/tag_lookup.h ADDED Viewed

@@ -0,0 +1,13 @@
+#ifndef GUMBO_TAG_LOOKUP_H_
+#define GUMBO_TAG_LOOKUP_H_
+#include "gumbo.h"
+typedef struct {
+    const char *key;
+    const GumboTag tag;
+} TagHashSlot;
+const TagHashSlot *gumbo_tag_lookup(const char *str, size_t len);
+#endif // GUMBO_TAG_LOOKUP_H_

data/gumbo-parser/src/token_type.h CHANGED Viewed

@@ -1,26 +1,6 @@
-// Copyright 2011 Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// Author: jdtang@google.com (Jonathan Tang)
 #ifndef GUMBO_TOKEN_TYPE_H_
 #define GUMBO_TOKEN_TYPE_H_
-#ifdef __cplusplus
-extern "C" {
-#endif
 // An enum representing the type of token.
 typedef enum {
   GUMBO_TOKEN_DOCTYPE,
@@ -34,8 +14,4 @@ typedef enum {
   GUMBO_TOKEN_EOF
 } GumboTokenType;
-#ifdef __cplusplus
-}  // extern C
-#endif
-#endif  // GUMBO_TOKEN_TYPE_H_
+#endif // GUMBO_TOKEN_TYPE_H_

data/gumbo-parser/src/tokenizer.c CHANGED Viewed

@@ -1,69 +1,68 @@
-// Copyright 2010 Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// Author: jdtang@google.com (Jonathan Tang)
-//
-// Coding conventions specific to this file:
-//
-// 1. Functions that fill in a token should be named emit_*, and should be
-// followed immediately by a return from the tokenizer (true if no error
-// occurred, false if an error occurred).  Sometimes the emit functions
-// themselves return a boolean so that they can be combined with the return
-// statement; in this case, they should match this convention.
-// 2. Functions that shuffle data from temporaries to final API structures
-// should be named finish_*, and be called just before the tokenizer exits the
-// state that accumulates the temporary.
-// 3. All internal data structures should be kept in an initialized state from
-// tokenizer creation onwards, ready to accept input.  When a buffer's flushed
-// and reset, it should be deallocated and immediately reinitialized.
-// 4. Make sure there are appropriate break statements following each state.
-// 5. Assertions on the state of the temporary and tag buffers are usually a
-// good idea, and should go at the entry point of each state when added.
-// 6. Statement order within states goes:
-//    1. Add parse errors, if appropriate.
-//    2. Call finish_* functions to build up tag state.
-//    2. Switch to new state.  Set _reconsume flag if appropriate.
-//    3. Perform any other temporary buffer manipulation.
-//    4. Emit tokens
-//    5. Return/break.
-// This order ensures that we can verify that every emit is followed by a
-// return, ensures that the correct state is recorded with any parse errors, and
-// prevents parse error position from being messed up by possible mark/resets in
-// temporary buffer manipulation.
-#include "tokenizer.h"
+/*
+ Copyright 2010 Google Inc.
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+    https://www.apache.org/licenses/LICENSE-2.0
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+/*
+ Coding conventions specific to this file:
+ 1. Functions that fill in a token should be named emit_*, and should be
+    followed immediately by a return from the tokenizer (true if no error
+    occurred, false if an error occurred). Sometimes the emit functions
+    themselves return a boolean so that they can be combined with the return
+    statement; in this case, they should match this convention.
+ 2. Functions that shuffle data from temporaries to final API structures
+    should be named finish_*, and be called just before the tokenizer exits the
+    state that accumulates the temporary.
+ 3. All internal data structures should be kept in an initialized state from
+    tokenizer creation onwards, ready to accept input. When a buffer's flushed
+    and reset, it should be deallocated and immediately reinitialized.
+ 4. Make sure there are appropriate break statements following each state.
+ 5. Assertions on the state of the temporary and tag buffers are usually a
+    good idea, and should go at the entry point of each state when added.
+ 6. Statement order within states goes:
+    1. Add parse errors, if appropriate.
+    2. Call finish_* functions to build up tag state.
+    2. Switch to new state. Set _reconsume flag if appropriate.
+    3. Perform any other temporary buffer manipulation.
+    4. Emit tokens
+    5. Return/break.
+    This order ensures that we can verify that every emit is followed by
+    a return, ensures that the correct state is recorded with any parse
+    errors, and prevents parse error position from being messed up by
+    possible mark/resets in temporary buffer manipulation.
+*/
 #include <assert.h>
-#include <stdbool.h>
 #include <string.h>
+#include "tokenizer.h"
+#include "ascii.h"
 #include "attribute.h"
 #include "char_ref.h"
 #include "error.h"
 #include "gumbo.h"
 #include "parser.h"
 #include "string_buffer.h"
-#include "string_piece.h"
 #include "token_type.h"
 #include "tokenizer_states.h"
 #include "utf8.h"
 #include "util.h"
 #include "vector.h"
-// Compared against _script_data_buffer to determine if we're in double-escaped
-// script mode.
-const GumboStringPiece kScriptTag = {"script", 6};
+// Compared against _script_data_buffer to determine if we're in
+// double-escaped script mode.
+static const GumboStringPiece kScriptTag = {.data = "script", .length = 6};
 // An enum for the return value of each individual state.
 typedef enum {
@@ -86,31 +85,35 @@ typedef struct GumboInternalTagState {
   // the buffer can be re-used for building up attributes.
   GumboTag _tag;
+  // The current tag name. It's set at the same time that _tag is set if _tag
+  // is set to GUMBO_TAG_UNKNOWN.
+  char *_name;
   // The starting location of the text in the buffer.
   GumboSourcePosition _start_pos;
-  // The current list of attributes.  This is copied (and ownership of its data
-  // transferred) to the GumboStartTag token upon completion of the tag.  New
+  // The current list of attributes. This is copied (and ownership of its data
+  // transferred) to the GumboStartTag token upon completion of the tag. New
   // attributes are added as soon as their attribute name state is complete, and
   // values are filled in by operating on _attributes.data[attributes.length-1].
   GumboVector /* GumboAttribute */ _attributes;
-  // If true, the next attribute value to be finished should be dropped.  This
+  // If true, the next attribute value to be finished should be dropped. This
   // happens if a duplicate attribute name is encountered - we want to consume
   // the attribute value, but shouldn't overwrite the existing value.
   bool _drop_next_attr_value;
   // The state that caused the tokenizer to switch into a character reference in
-  // attribute value state.  This is used to set the additional allowed
-  // character, and is switched back to on completion.  Initialized as the
+  // attribute value state. This is used to set the additional allowed
+  // character, and is switched back to on completion. Initialized as the
   // tokenizer enters the character reference state.
   GumboTokenizerEnum _attr_value_state;
-  // The last start tag to have been emitted by the tokenizer.  This is
+  // The last start tag to have been emitted by the tokenizer. This is
   // necessary to check for appropriate end tags.
   GumboTag _last_start_tag;
-  // If true, then this is a start tag.  If false, it's an end tag.  This is
+  // If true, then this is a start tag. If false, it's an end tag. This is
   // necessary to generate the appropriate token type at tag-closing time.
   bool _is_start_tag;
@@ -121,43 +124,43 @@ typedef struct GumboInternalTagState {
 // This is the main tokenizer state struct, containing all state used by in
 // tokenizing the input stream.
 typedef struct GumboInternalTokenizerState {
-  // The current lexer state.  Starts in GUMBO_LEX_DATA.
+  // The current lexer state. Starts in GUMBO_LEX_DATA.
   GumboTokenizerEnum _state;
   // A flag indicating whether the current input character needs to reconsumed
   // in another state, or whether the next input character should be read for
-  // the next iteration of the state loop.  This is set when the spec reads
+  // the next iteration of the state loop. This is set when the spec reads
   // "Reconsume the current input character in..."
   bool _reconsume_current_input;
-  // A flag indicating whether the current node is a foreign element.  This is
+  // A flag indicating whether the current node is a foreign element. This is
   // set by gumbo_tokenizer_set_is_current_node_foreign and checked in the
   // markup declaration state.
   bool _is_current_node_foreign;
-  // A flag indicating whether the tokenizer is in a CDATA section.  If so, then
+  // A flag indicating whether the tokenizer is in a CDATA section. If so, then
   // text tokens emitted will be GUMBO_TOKEN_CDATA.
   bool _is_in_cdata;
   // Certain states (notably character references) may emit two character tokens
-  // at once, but the contract for lex() fills in only one token at a time.  The
+  // at once, but the contract for lex() fills in only one token at a time. The
   // extra character is buffered here, and then this is checked on entry to
-  // lex().  If a character is stored here, it's immediately emitted and control
-  // returns from the lexer.  kGumboNoChar is used to represent 'no character
+  // lex(). If a character is stored here, it's immediately emitted and control
+  // returns from the lexer. kGumboNoChar is used to represent 'no character
   // stored.'
   //
   // Note that characters emitted through this mechanism will have their source
   // position marked as the character under the mark, i.e. multiple characters
-  // may be emitted with the same position.  This is desirable for character
-  // references, but unsuitable for many other cases.  Use the _temporary_buffer
+  // may be emitted with the same position. This is desirable for character
+  // references, but unsuitable for many other cases. Use the _temporary_buffer
   // mechanism if the buffered characters must have their original positions in
   // the document.
   int _buffered_emit_char;
   // A temporary buffer to accumulate characters, as described by the "temporary
-  // buffer" phrase in the tokenizer spec.  We use this in a somewhat unorthodox
+  // buffer" phrase in the tokenizer spec. We use this in a somewhat unorthodox
   // way: we record the specific character to go into the buffer, which may
-  // sometimes be a lowercased version of the actual input character.  However,
+  // sometimes be a lowercased version of the actual input character. However,
   // we *also* use utf8iterator_mark() to record the position at tag start.
   // When we start flushing the temporary buffer, we set _temporary_buffer_emit
   // to the start of it, and then increment it for each call to the tokenizer.
@@ -167,13 +170,13 @@ typedef struct GumboInternalTokenizerState {
   GumboStringBuffer _temporary_buffer;
   // The current cursor position we're emitting from within
-  // _temporary_buffer.data.  NULL whenever we're not flushing the buffer.
+  // _temporary_buffer.data. NULL whenever we're not flushing the buffer.
   const char* _temporary_buffer_emit;
   // The temporary buffer is also used by the spec to check whether we should
   // enter the script data double escaped state, but we can't use the same
   // buffer for both because we have to flush out "<s" as emits while still
-  // maintaining the context that will eventually become "script".  This is a
+  // maintaining the context that will eventually become "script". This is a
   // separate buffer that's used in place of the temporary buffer for states
   // that may enter the script data double escape start state.
   GumboStringBuffer _script_data_buffer;
@@ -189,7 +192,7 @@ typedef struct GumboInternalTokenizerState {
   // Current tag state.
   GumboTagState _tag_state;
-  // Doctype state.  We use the temporary buffer to accumulate characters (it's
+  // Doctype state. We use the temporary buffer to accumulate characters (it's
   // not used for anything else in the doctype states), and then freshly
   // allocate the strings in the doctype token, then copy it over on emit.
   GumboTokenDocType _doc_type_state;
@@ -199,8 +202,10 @@ typedef struct GumboInternalTokenizerState {
 } GumboTokenizerState;
 // Adds an ERR_UNEXPECTED_CODE_POINT parse error to the parser's error struct.
-static void tokenizer_add_parse_error(
-    GumboParser* parser, GumboErrorType type) {
+static void tokenizer_add_parse_error (
+  GumboParser* parser,
+  GumboErrorType type
+) {
   GumboError* error = gumbo_add_error(parser);
   if (!error) {
     return;
@@ -309,14 +314,14 @@ static void tokenizer_add_parse_error(
 }
 static bool is_alpha(int c) {
-  // We don't use ISO C isupper/islower functions here because they
-  // depend upon the program's locale, while the behavior of the HTML5 spec is
-  // independent of which locale the program is run in.
-  return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
+  // We don't use the ISO C isalpha() function here because it depends
+  // on the current locale, whereas the behavior in the HTML5 spec is
+  // locale-independent.
+  return ((unsigned) c | 32) - 'a' < 26;
 }
 static int ensure_lowercase(int c) {
-  return c >= 'A' && c <= 'Z' ? c + 0x20 : c;
+  return gumbo_ascii_tolower(c);
 }
 static GumboTokenType get_char_token_type(bool is_in_cdata, int c) {
@@ -346,7 +351,7 @@ static GumboTokenType get_char_token_type(bool is_in_cdata, int c) {
 // text that will eventually be emitted, it needs to be called a couple of
 // states before the spec says "Set the temporary buffer to the empty string".
 // In general, this should be called whenever there's a transition to a
-// "less-than sign state".  The initial < and possibly / then need to be
+// "less-than sign state". The initial < and possibly / then need to be
 // appended to the temporary buffer, their presence needs to be accounted for in
 // states that compare the temporary buffer against a literal value, and
 // spec stanzas that say "emit a < and / character token along with a character
@@ -356,30 +361,40 @@ static void clear_temporary_buffer(GumboParser* parser) {
   GumboTokenizerState* tokenizer = parser->_tokenizer_state;
   assert(!tokenizer->_temporary_buffer_emit);
   utf8iterator_mark(&tokenizer->_input);
-  gumbo_string_buffer_clear(parser, &tokenizer->_temporary_buffer);
+  gumbo_string_buffer_clear(&tokenizer->_temporary_buffer);
   // The temporary buffer and script data buffer are the same object in the
   // spec, so the script data buffer should be cleared as well.
-  gumbo_string_buffer_clear(parser, &tokenizer->_script_data_buffer);
+  gumbo_string_buffer_clear(&tokenizer->_script_data_buffer);
 }
 // Appends a codepoint to the temporary buffer.
-static void append_char_to_temporary_buffer(
-    GumboParser* parser, int codepoint) {
-  gumbo_string_buffer_append_codepoint(
-      parser, codepoint, &parser->_tokenizer_state->_temporary_buffer);
+static void append_char_to_temporary_buffer (
+  GumboParser* parser,
+  int codepoint
+) {
+  gumbo_string_buffer_append_codepoint (
+   codepoint,
+   &parser->_tokenizer_state->_temporary_buffer
+  );
 }
-// Checks to see if the temporary buffer equals a certain string.
-// Make sure this remains side-effect free; it's used in assertions.
 #ifndef NDEBUG
-static bool temporary_buffer_equals(GumboParser* parser, const char* text) {
-  GumboStringBuffer* buffer = &parser->_tokenizer_state->_temporary_buffer;
-  // TODO(jdtang): See if the extra strlen is a performance problem, and replace
-  // it with an explicit sizeof(literal) if necessary.  I don't think it will
-  // be, as this is only used in a couple of rare states.
-  int text_len = strlen(text);
-  return text_len == buffer->length &&
-         memcmp(buffer->data, text, text_len) == 0;
+static bool temporary_buffer_equals__ (
+  const GumboParser* parser,
+  const char* text,
+  size_t text_len
+) {
+  const GumboStringBuffer* buf = &parser->_tokenizer_state->_temporary_buffer;
+  return
+    text_len == buf->length
+    && memcmp(buf->data, text, text_len) == 0;
+}
+#define temporary_buffer_equals(parser, text) \
+  temporary_buffer_equals__(parser, "" text, sizeof(text) - 1)
+static bool temporary_buffer_is_empty(const GumboParser* parser) {
+  return parser->_tokenizer_state->_temporary_buffer.length == 0;
 }
 #endif
@@ -387,9 +402,9 @@ static void doc_type_state_init(GumboParser* parser) {
   GumboTokenDocType* doc_type_state =
       &parser->_tokenizer_state->_doc_type_state;
   // We initialize these to NULL here so that we don't end up leaking memory if
-  // we never see a doctype token.  When we do see a doctype token, we reset
+  // we never see a doctype token. When we do see a doctype token, we reset
   // them to a freshly-allocated empty string so that we can present a uniform
-  // interface to client code and not make them check for null.  Ownership is
+  // interface to client code and not make them check for null. Ownership is
   // transferred to the doctype token when it's emitted.
   doc_type_state->name = NULL;
   doc_type_state->public_identifier = NULL;
@@ -408,7 +423,7 @@ static void reset_token_start_point(GumboTokenizerState* tokenizer) {
 }
 // Sets the tag buffer original text and start point to the current iterator
-// position.  This is necessary because attribute names & values may have
+// position. This is necessary because attribute names & values may have
 // whitespace preceeding them, and so we can't assume that the actual token
 // starting point was the end of the last tag buffer usage.
 static void reset_tag_buffer_start_point(GumboParser* parser) {
@@ -423,15 +438,14 @@ static void reset_tag_buffer_start_point(GumboParser* parser) {
 // and clears the temporary buffer.
 static void finish_temporary_buffer(GumboParser* parser, const char** output) {
   GumboTokenizerState* tokenizer = parser->_tokenizer_state;
-  *output =
-      gumbo_string_buffer_to_string(parser, &tokenizer->_temporary_buffer);
+  *output = gumbo_string_buffer_to_string(&tokenizer->_temporary_buffer);
   clear_temporary_buffer(parser);
 }
 // Advances the iterator past the end of the token, and then fills in the
-// relevant position fields.  It's assumed that after every emit, the tokenizer
+// relevant position fields. It's assumed that after every emit, the tokenizer
 // will immediately return (letting the tree-construction stage read the filled
-// in Token).  Thus, it's safe to advance the input stream here, since it will
+// in Token). Thus, it's safe to advance the input stream here, since it will
 // bypass the advance at the bottom of the state machine loop.
 //
 // Since this advances the iterator and resets the current input, make sure to
@@ -450,7 +464,7 @@ static void finish_token(GumboParser* parser, GumboToken* token) {
   if (token->original_text.length > 0 &&
       token->original_text.data[token->original_text.length - 1] == '\r') {
     // The UTF8 iterator will ignore carriage returns in the input stream, which
-    // means that the next token may start one past a \r character.  The pointer
+    // means that the next token may start one past a \r character. The pointer
     // arithmetic above results in that \r being appended to the original text
     // of the preceding token, so we have to adjust its length here to chop the
     // \r off.
@@ -463,7 +477,7 @@ static void finish_token(GumboParser* parser, GumboToken* token) {
 static void finish_doctype_public_id(GumboParser* parser) {
   GumboTokenDocType* doc_type_state =
       &parser->_tokenizer_state->_doc_type_state;
-  gumbo_parser_deallocate(parser, (void*) doc_type_state->public_identifier);
+  gumbo_free((void*) doc_type_state->public_identifier);
   finish_temporary_buffer(parser, &doc_type_state->public_identifier);
   doc_type_state->has_public_identifier = true;
 }
@@ -473,7 +487,7 @@ static void finish_doctype_public_id(GumboParser* parser) {
 static void finish_doctype_system_id(GumboParser* parser) {
   GumboTokenDocType* doc_type_state =
       &parser->_tokenizer_state->_doc_type_state;
-  gumbo_parser_deallocate(parser, (void*) doc_type_state->system_identifier);
+  gumbo_free((void*) doc_type_state->system_identifier);
   finish_temporary_buffer(parser, &doc_type_state->system_identifier);
   doc_type_state->has_system_identifier = true;
 }
@@ -495,7 +509,7 @@ static StateResult emit_replacement_char(
   return RETURN_ERROR;
 }
-// Writes an EOF character token.  Always returns RETURN_SUCCESS.
+// Writes an EOF character token. Always returns RETURN_SUCCESS.
 static StateResult emit_eof(GumboParser* parser, GumboToken* output) {
   emit_char(parser, -1, output);
   return RETURN_SUCCESS;
@@ -520,7 +534,9 @@ static void emit_doctype(GumboParser* parser, GumboToken* output) {
 // Debug-only function that explicitly sets the attribute vector data to NULL so
 // it can be asserted on tag creation, verifying that there are no memory leaks.
 static void mark_tag_state_as_empty(GumboTagState* tag_state) {
+  UNUSED_IF_NDEBUG(tag_state);
 #ifndef NDEBUG
+  tag_state->_name = NULL;
   tag_state->_attributes = kGumboEmptyVector;
 #endif
 }
@@ -532,6 +548,7 @@ static StateResult emit_current_tag(GumboParser* parser, GumboToken* output) {
   if (tag_state->_is_start_tag) {
     output->type = GUMBO_TOKEN_START_TAG;
     output->v.start_tag.tag = tag_state->_tag;
+    output->v.start_tag.name = tag_state->_name;
     output->v.start_tag.attributes = tag_state->_attributes;
     output->v.start_tag.is_self_closing = tag_state->_is_self_closing;
     tag_state->_last_start_tag = tag_state->_tag;
@@ -540,23 +557,28 @@ static StateResult emit_current_tag(GumboParser* parser, GumboToken* output) {
         "Emitted start tag %s.\n", gumbo_normalized_tagname(tag_state->_tag));
   } else {
     output->type = GUMBO_TOKEN_END_TAG;
-    output->v.end_tag = tag_state->_tag;
+    output->v.end_tag.tag = tag_state->_tag;
+    output->v.end_tag.name = tag_state->_name;
+    output->v.end_tag.is_self_closing = tag_state->_is_self_closing;
     // In end tags, ownership of the attributes vector is not transferred to the
     // token, but it's still initialized as normal, so it must be manually
-    // deallocated.  There may also be attributes to destroy, in certain broken
+    // deallocated. There may also be attributes to destroy, in certain broken
     // cases like </div</th> (the "th" is an attribute there).
     for (unsigned int i = 0; i < tag_state->_attributes.length; ++i) {
-      gumbo_destroy_attribute(parser, tag_state->_attributes.data[i]);
+      gumbo_destroy_attribute(tag_state->_attributes.data[i]);
     }
-    gumbo_parser_deallocate(parser, tag_state->_attributes.data);
+    gumbo_free(tag_state->_attributes.data);
     mark_tag_state_as_empty(tag_state);
     gumbo_debug(
         "Emitted end tag %s.\n", gumbo_normalized_tagname(tag_state->_tag));
   }
-  gumbo_string_buffer_destroy(parser, &tag_state->_buffer);
+  gumbo_string_buffer_destroy(&tag_state->_buffer);
   finish_token(parser, output);
-  gumbo_debug("Original text = %.*s.\n", output->original_text.length,
-      output->original_text.data);
+  gumbo_debug (
+    "Original text = %.*s.\n",
+    (int) output->original_text.length,
+    output->original_text.data
+  );
   assert(output->original_text.length >= 2);
   assert(output->original_text.data[0] == '<');
   assert(output->original_text.data[output->original_text.length - 1] == '>');
@@ -570,26 +592,36 @@ static StateResult emit_current_tag(GumboParser* parser, GumboToken* output) {
 static void abandon_current_tag(GumboParser* parser) {
   GumboTagState* tag_state = &parser->_tokenizer_state->_tag_state;
   for (unsigned int i = 0; i < tag_state->_attributes.length; ++i) {
-    gumbo_destroy_attribute(parser, tag_state->_attributes.data[i]);
+    gumbo_destroy_attribute(tag_state->_attributes.data[i]);
   }
-  gumbo_parser_deallocate(parser, tag_state->_attributes.data);
+  gumbo_free(tag_state->_attributes.data);
   mark_tag_state_as_empty(tag_state);
-  gumbo_string_buffer_destroy(parser, &tag_state->_buffer);
+  gumbo_string_buffer_destroy(&tag_state->_buffer);
   gumbo_debug("Abandoning current tag.\n");
 }
-// Wraps the consume_char_ref function to handle its output and make the
-// appropriate TokenizerState modifications.  Returns RETURN_ERROR if a parse
+// Wraps the gumbo_consume_char_ref function to handle its output and make the
+// appropriate TokenizerState modifications. Returns RETURN_ERROR if a parse
 // error occurred, RETURN_SUCCESS otherwise.
-static StateResult emit_char_ref(GumboParser* parser,
-    int additional_allowed_char, bool is_in_attribute, GumboToken* output) {
+static StateResult emit_char_ref (
+  GumboParser* parser,
+  int additional_allowed_char,
+  bool UNUSED_ARG(is_in_attribute),
+  GumboToken* output
+) {
   GumboTokenizerState* tokenizer = parser->_tokenizer_state;
   OneOrTwoCodepoints char_ref;
-  bool status = consume_char_ref(
-      parser, &tokenizer->_input, additional_allowed_char, false, &char_ref);
+  bool status = gumbo_consume_char_ref (
+    parser,
+    &tokenizer->_input,
+    additional_allowed_char,
+    false,
+    &char_ref
+  );
   if (char_ref.first != kGumboNoChar) {
-    // consume_char_ref ends with the iterator pointing at the next character,
-    // so we need to be sure not advance it again before reading the next token.
+    // gumbo_consume_char_ref ends with the iterator pointing at the next
+    // character, so we need to be sure not advance it again before
+    // reading the next token.
     tokenizer->_reconsume_current_input = true;
     emit_char(parser, char_ref.first, output);
     tokenizer->_buffered_emit_char = char_ref.second;
@@ -599,9 +631,9 @@ static StateResult emit_char_ref(GumboParser* parser,
   return status ? RETURN_SUCCESS : RETURN_ERROR;
 }
-// Emits a comment token.  Comments use the temporary buffer to accumulate their
+// Emits a comment token. Comments use the temporary buffer to accumulate their
 // data, and then it's copied over and released to the 'text' field of the
-// GumboToken union.  Always returns RETURN_SUCCESS.
+// GumboToken union. Always returns RETURN_SUCCESS.
 static StateResult emit_comment(GumboParser* parser, GumboToken* output) {
   output->type = GUMBO_TOKEN_COMMENT;
   finish_temporary_buffer(parser, &output->v.text);
@@ -626,11 +658,11 @@ static bool maybe_emit_from_temporary_buffer(
   }
   assert(*c == utf8iterator_current(&tokenizer->_input));
-  // emit_char also advances the input stream.  We need to do some juggling of
+  // emit_char also advances the input stream. We need to do some juggling of
   // the _reconsume_current_input flag to get the proper behavior when emitting
-  // previous tokens.  Basically, _reconsume_current_input should *never* be set
+  // previous tokens. Basically, _reconsume_current_input should *never* be set
   // when emitting anything from the temporary buffer, since those characters
-  // have already been advanced past.  However, it should be preserved so that
+  // have already been advanced past. However, it should be preserved so that
   // when the *next* character is encountered again, the tokenizer knows not to
   // advance past it.
   bool saved_reconsume_state = tokenizer->_reconsume_current_input;
@@ -644,7 +676,7 @@ static bool maybe_emit_from_temporary_buffer(
 // Sets up the tokenizer to begin flushing the temporary buffer.
 // This resets the input iterator stream to the start of the last tag, sets up
 // _temporary_buffer_emit, and then (if the temporary buffer is non-empty) emits
-// the first character in it.  It returns true if a character was emitted, false
+// the first character in it. It returns true if a character was emitted, false
 // otherwise.
 static bool emit_temporary_buffer(GumboParser* parser, GumboToken* output) {
   GumboTokenizerState* tokenizer = parser->_tokenizer_state;
@@ -654,32 +686,35 @@ static bool emit_temporary_buffer(GumboParser* parser, GumboToken* output) {
   return maybe_emit_from_temporary_buffer(parser, output);
 }
-// Appends a codepoint to the current tag buffer.  If
+// Appends a codepoint to the current tag buffer. If
 // reinitilize_position_on_first is set, this also initializes the tag buffer
 // start point; the only time you would *not* want to pass true for this
 // parameter is if you want the original_text to include character (like an
 // opening quote) that doesn't appear in the value.
-static void append_char_to_tag_buffer(
-    GumboParser* parser, int codepoint, bool reinitilize_position_on_first) {
+static void append_char_to_tag_buffer (
+  GumboParser* parser,
+  int codepoint,
+  bool reinitilize_position_on_first
+) {
   GumboStringBuffer* buffer = &parser->_tokenizer_state->_tag_state._buffer;
   if (buffer->length == 0 && reinitilize_position_on_first) {
     reset_tag_buffer_start_point(parser);
   }
-  gumbo_string_buffer_append_codepoint(parser, codepoint, buffer);
+  gumbo_string_buffer_append_codepoint(codepoint, buffer);
 }
-// (Re-)initialize the tag buffer.  This also resets the original_text pointer
+// (Re-)initialize the tag buffer. This also resets the original_text pointer
 // and _start_pos field to point to the current position.
 static void initialize_tag_buffer(GumboParser* parser) {
   GumboTokenizerState* tokenizer = parser->_tokenizer_state;
   GumboTagState* tag_state = &tokenizer->_tag_state;
-  gumbo_string_buffer_init(parser, &tag_state->_buffer);
+  gumbo_string_buffer_init(&tag_state->_buffer);
   reset_tag_buffer_start_point(parser);
 }
 // Initializes the tag_state to start a new tag, keeping track of the opening
-// positions and original text.  Takes a boolean indicating whether this is a
+// positions and original text. Takes a boolean indicating whether this is a
 // start or end tag.
 static void start_new_tag(GumboParser* parser, bool is_start_tag) {
   GumboTokenizerState* tokenizer = parser->_tokenizer_state;
@@ -690,14 +725,15 @@ static void start_new_tag(GumboParser* parser, bool is_start_tag) {
   assert(is_alpha(c));
   initialize_tag_buffer(parser);
-  gumbo_string_buffer_append_codepoint(parser, c, &tag_state->_buffer);
+  gumbo_string_buffer_append_codepoint(c, &tag_state->_buffer);
+  assert(tag_state->_name == NULL);
   assert(tag_state->_attributes.data == NULL);
   // Initial size chosen by statistical analysis of a corpus of 60k webpages.
-  // 99.5% of elements have 0 attributes, 93% of the remainder have 1.  These
+  // 99.5% of elements have 0 attributes, 93% of the remainder have 1. These
   // numbers are a bit higher for more modern websites (eg. ~45% = 0, ~40% = 1
   // for the HTML5 Spec), but still have basically 99% of nodes with <= 2 attrs.
-  gumbo_vector_init(parser, 1, &tag_state->_attributes);
+  gumbo_vector_init(1, &tag_state->_attributes);
   tag_state->_drop_next_attr_value = false;
   tag_state->_is_start_tag = is_start_tag;
   tag_state->_is_self_closing = false;
@@ -708,7 +744,7 @@ static void start_new_tag(GumboParser* parser, bool is_start_tag) {
 static void copy_over_tag_buffer(GumboParser* parser, const char** output) {
   GumboTokenizerState* tokenizer = parser->_tokenizer_state;
   GumboTagState* tag_state = &tokenizer->_tag_state;
-  *output = gumbo_string_buffer_to_string(parser, &tag_state->_buffer);
+  *output = gumbo_string_buffer_to_string(&tag_state->_buffer);
 }
 // Fills in:
@@ -717,9 +753,12 @@ static void copy_over_tag_buffer(GumboParser* parser, const char** output) {
 // * The start_pos GumboSourcePosition with the start position of the tag
 // buffer.
 // * The end_pos GumboSourcePosition with the current source position.
-static void copy_over_original_tag_text(GumboParser* parser,
-    GumboStringPiece* original_text, GumboSourcePosition* start_pos,
-    GumboSourcePosition* end_pos) {
+static void copy_over_original_tag_text (
+  GumboParser* parser,
+  GumboStringPiece* original_text,
+  GumboSourcePosition* start_pos,
+  GumboSourcePosition* end_pos
+) {
   GumboTokenizerState* tokenizer = parser->_tokenizer_state;
   GumboTagState* tag_state = &tokenizer->_tag_state;
@@ -729,7 +768,7 @@ static void copy_over_original_tag_text(GumboParser* parser,
   if (original_text->data[original_text->length - 1] == '\r') {
     // Since \r is skipped by the UTF-8 iterator, it can sometimes end up
     // appended to the end of original text even when it's really the first part
-    // of the next character.  If we detect this situation, shrink the length of
+    // of the next character. If we detect this situation, shrink the length of
     // the original text by 1 to remove the carriage return.
     --original_text->length;
   }
@@ -739,8 +778,7 @@ static void copy_over_original_tag_text(GumboParser* parser,
 // Releases and then re-initializes the tag buffer.
 static void reinitialize_tag_buffer(GumboParser* parser) {
-  gumbo_parser_deallocate(
-      parser, parser->_tokenizer_state->_tag_state._buffer.data);
+  gumbo_free(parser->_tokenizer_state->_tag_state._buffer.data);
   initialize_tag_buffer(parser);
 }
@@ -750,14 +788,24 @@ static void finish_tag_name(GumboParser* parser) {
   GumboTokenizerState* tokenizer = parser->_tokenizer_state;
   GumboTagState* tag_state = &tokenizer->_tag_state;
-  tag_state->_tag =
-      gumbo_tagn_enum(tag_state->_buffer.data, tag_state->_buffer.length);
+  const char *data = tag_state->_buffer.data;
+  size_t length = tag_state->_buffer.length;
+  tag_state->_tag = gumbo_tagn_enum(data, length);
+  if (tag_state->_tag == GUMBO_TAG_UNKNOWN) {
+    char *name = gumbo_alloc(length + 1);
+    memcpy(name, data, length);
+    name[length] = 0;
+    tag_state->_name = name;
+  }
   reinitialize_tag_buffer(parser);
 }
 // Adds an ERR_DUPLICATE_ATTR parse error to the parser's error struct.
-static void add_duplicate_attr_error(GumboParser* parser, const char* attr_name,
-    int original_index, int new_index) {
+static void add_duplicate_attr_error (
+  GumboParser* parser,
+  int original_index,
+  int new_index
+) {
   GumboError* error = gumbo_add_error(parser);
   if (!error) {
     return;
@@ -773,11 +821,11 @@ static void add_duplicate_attr_error(GumboParser* parser, const char* attr_name,
 }
 // Creates a new attribute in the current tag, copying the current tag buffer to
-// the attribute's name.  The attribute's value starts out as the empty string
+// the attribute's name. The attribute's value starts out as the empty string
 // (following the "Boolean attributes" section of the spec) and is only
-// overwritten on finish_attribute_value().  If the attribute has already been
+// overwritten on finish_attribute_value(). If the attribute has already been
 // specified, the new attribute is dropped, a parse error is added, and the
-// function returns false.  Otherwise, this returns true.
+// function returns false. Otherwise, this returns true.
 static bool finish_attribute_name(GumboParser* parser) {
   GumboTokenizerState* tokenizer = parser->_tokenizer_state;
   GumboTagState* tag_state = &tokenizer->_tag_state;
@@ -789,30 +837,43 @@ static bool finish_attribute_name(GumboParser* parser) {
   GumboVector* /* GumboAttribute* */ attributes = &tag_state->_attributes;
   for (unsigned int i = 0; i < attributes->length; ++i) {
     GumboAttribute* attr = attributes->data[i];
-    if (strlen(attr->name) == tag_state->_buffer.length &&
-        memcmp(attr->name, tag_state->_buffer.data,
-            tag_state->_buffer.length) == 0) {
+    if (
+      strlen(attr->name) == tag_state->_buffer.length
+      && 0 == memcmp (
+        attr->name,
+        tag_state->_buffer.data,
+        tag_state->_buffer.length
+      )
+    ) {
       // Identical attribute; bail.
-      add_duplicate_attr_error(parser, attr->name, i, attributes->length);
+      add_duplicate_attr_error(parser, i, attributes->length);
       tag_state->_drop_next_attr_value = true;
       return false;
     }
   }
-  GumboAttribute* attr = gumbo_parser_allocate(parser, sizeof(GumboAttribute));
+  GumboAttribute* attr = gumbo_alloc(sizeof(GumboAttribute));
   attr->attr_namespace = GUMBO_ATTR_NAMESPACE_NONE;
   copy_over_tag_buffer(parser, &attr->name);
-  copy_over_original_tag_text(
-      parser, &attr->original_name, &attr->name_start, &attr->name_end);
-  attr->value = gumbo_copy_stringz(parser, "");
-  copy_over_original_tag_text(
-      parser, &attr->original_value, &attr->name_start, &attr->name_end);
-  gumbo_vector_add(parser, attr, attributes);
+  copy_over_original_tag_text (
+    parser,
+    &attr->original_name,
+    &attr->name_start,
+    &attr->name_end
+  );
+  attr->value = gumbo_strdup("");
+  copy_over_original_tag_text (
+    parser,
+    &attr->original_value,
+    &attr->name_start,
+    &attr->name_end
+  );
+  gumbo_vector_add(attr, attributes);
   reinitialize_tag_buffer(parser);
   return true;
 }
-// Finishes an attribute value.  This sets the value of the most recently added
+// Finishes an attribute value. This sets the value of the most recently added
 // attribute to the current contents of the tag buffer.
 static void finish_attribute_value(GumboParser* parser) {
   GumboTagState* tag_state = &parser->_tokenizer_state->_tag_state;
@@ -826,7 +887,7 @@ static void finish_attribute_value(GumboParser* parser) {
   GumboAttribute* attr =
       tag_state->_attributes.data[tag_state->_attributes.length - 1];
-  gumbo_parser_deallocate(parser, (void*) attr->value);
+  gumbo_free((void*) attr->value);
   copy_over_tag_buffer(parser, &attr->value);
   copy_over_original_tag_text(
       parser, &attr->original_value, &attr->value_start, &attr->value_end);
@@ -842,24 +903,27 @@ static bool is_appropriate_end_tag(GumboParser* parser) {
                                            tag_state->_buffer.length);
 }
-void gumbo_tokenizer_state_init(
-    GumboParser* parser, const char* text, size_t text_length) {
-  GumboTokenizerState* tokenizer =
-      gumbo_parser_allocate(parser, sizeof(GumboTokenizerState));
+void gumbo_tokenizer_state_init (
+  GumboParser* parser,
+  const char* text,
+  size_t text_length
+) {
+  GumboTokenizerState* tokenizer = gumbo_alloc(sizeof(GumboTokenizerState));
   parser->_tokenizer_state = tokenizer;
   gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
   tokenizer->_reconsume_current_input = false;
   tokenizer->_is_current_node_foreign = false;
   tokenizer->_is_in_cdata = false;
   tokenizer->_tag_state._last_start_tag = GUMBO_TAG_LAST;
+  tokenizer->_tag_state._name = NULL;
   tokenizer->_buffered_emit_char = kGumboNoChar;
-  gumbo_string_buffer_init(parser, &tokenizer->_temporary_buffer);
+  gumbo_string_buffer_init(&tokenizer->_temporary_buffer);
   tokenizer->_temporary_buffer_emit = NULL;
   mark_tag_state_as_empty(&tokenizer->_tag_state);
-  gumbo_string_buffer_init(parser, &tokenizer->_script_data_buffer);
+  gumbo_string_buffer_init(&tokenizer->_script_data_buffer);
   tokenizer->_token_start = text;
   utf8iterator_init(parser, text, text_length, &tokenizer->_input);
   utf8iterator_get_position(&tokenizer->_input, &tokenizer->_token_start_pos);
@@ -871,27 +935,37 @@ void gumbo_tokenizer_state_destroy(GumboParser* parser) {
   assert(tokenizer->_doc_type_state.name == NULL);
   assert(tokenizer->_doc_type_state.public_identifier == NULL);
   assert(tokenizer->_doc_type_state.system_identifier == NULL);
-  gumbo_string_buffer_destroy(parser, &tokenizer->_temporary_buffer);
-  gumbo_string_buffer_destroy(parser, &tokenizer->_script_data_buffer);
-  gumbo_parser_deallocate(parser, tokenizer);
+  gumbo_string_buffer_destroy(&tokenizer->_temporary_buffer);
+  gumbo_string_buffer_destroy(&tokenizer->_script_data_buffer);
+  assert(tokenizer->_tag_state._name == NULL);
+  assert(tokenizer->_tag_state._attributes.data == NULL);
+  gumbo_free(tokenizer);
 }
 void gumbo_tokenizer_set_state(GumboParser* parser, GumboTokenizerEnum state) {
   parser->_tokenizer_state->_state = state;
 }
-void gumbo_tokenizer_set_is_current_node_foreign(
-    GumboParser* parser, bool is_foreign) {
+void gumbo_tokenizer_set_is_current_node_foreign (
+  GumboParser* parser,
+  bool is_foreign
+) {
   if (is_foreign != parser->_tokenizer_state->_is_current_node_foreign) {
-    gumbo_debug("Toggling is_current_node_foreign to %s.\n",
-        is_foreign ? "true" : "false");
+    gumbo_debug (
+      "Toggling is_current_node_foreign to %s.\n",
+      is_foreign ? "true" : "false"
+    );
   }
   parser->_tokenizer_state->_is_current_node_foreign = is_foreign;
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#data-state
-static StateResult handle_data_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#data-state
+static StateResult handle_data_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '&':
       gumbo_tokenizer_set_state(parser, GUMBO_LEX_CHAR_REF_IN_DATA);
@@ -914,16 +988,24 @@ static StateResult handle_data_state(GumboParser* parser,
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#character-reference-in-data-state
-static StateResult handle_char_ref_in_data_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#character-reference-in-data-state
+static StateResult handle_char_ref_in_data_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int UNUSED_ARG(c),
+  GumboToken* output
+) {
   gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
   return emit_char_ref(parser, ' ', false, output);
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#rcdata-state
-static StateResult handle_rcdata_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#rcdata-state
+static StateResult handle_rcdata_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '&':
       gumbo_tokenizer_set_state(parser, GUMBO_LEX_CHAR_REF_IN_RCDATA);
@@ -943,16 +1025,24 @@ static StateResult handle_rcdata_state(GumboParser* parser,
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#character-reference-in-rcdata-state
-static StateResult handle_char_ref_in_rcdata_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#character-reference-in-rcdata-state
+static StateResult handle_char_ref_in_rcdata_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int UNUSED_ARG(c),
+  GumboToken* output
+) {
   gumbo_tokenizer_set_state(parser, GUMBO_LEX_RCDATA);
   return emit_char_ref(parser, ' ', false, output);
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#rawtext-state
-static StateResult handle_rawtext_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#rawtext-state
+static StateResult handle_rawtext_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '<':
       gumbo_tokenizer_set_state(parser, GUMBO_LEX_RAWTEXT_LT);
@@ -968,9 +1058,13 @@ static StateResult handle_rawtext_state(GumboParser* parser,
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-state
-static StateResult handle_script_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#script-data-state
+static StateResult handle_script_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '<':
       gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_LT);
@@ -986,9 +1080,13 @@ static StateResult handle_script_state(GumboParser* parser,
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#plaintext-state
-static StateResult handle_plaintext_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#plaintext-state
+static StateResult handle_plaintext_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '\0':
       return emit_replacement_char(parser, output);
@@ -999,9 +1097,13 @@ static StateResult handle_plaintext_state(GumboParser* parser,
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#tag-open-state
-static StateResult handle_tag_open_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
+static StateResult handle_tag_open_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int c,
+  GumboToken* output
+) {
   assert(temporary_buffer_equals(parser, "<"));
   switch (c) {
     case '!':
@@ -1032,9 +1134,13 @@ static StateResult handle_tag_open_state(GumboParser* parser,
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#end-tag-open-state
-static StateResult handle_end_tag_open_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#end-tag-open-state
+static StateResult handle_end_tag_open_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int c,
+  GumboToken* output
+) {
   assert(temporary_buffer_equals(parser, "</"));
   switch (c) {
     case '>':
@@ -1059,9 +1165,13 @@ static StateResult handle_end_tag_open_state(GumboParser* parser,
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#tag-name-state
-static StateResult handle_tag_name_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#tag-name-state
+static StateResult handle_tag_name_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '\t':
     case '\n':
@@ -1093,9 +1203,13 @@ static StateResult handle_tag_name_state(GumboParser* parser,
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete.html#rcdata-less-than-sign-state
-static StateResult handle_rcdata_lt_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#rcdata-less-than-sign-state
+static StateResult handle_rcdata_lt_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   assert(temporary_buffer_equals(parser, "<"));
   if (c == '/') {
     gumbo_tokenizer_set_state(parser, GUMBO_LEX_RCDATA_END_TAG_OPEN);
@@ -1108,9 +1222,13 @@ static StateResult handle_rcdata_lt_state(GumboParser* parser,
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete.html#rcdata-end-tag-open-state
-static StateResult handle_rcdata_end_tag_open_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#rcdata-end-tag-open-state
+static StateResult handle_rcdata_end_tag_open_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int c,
+  GumboToken* output
+) {
   assert(temporary_buffer_equals(parser, "</"));
   if (is_alpha(c)) {
     gumbo_tokenizer_set_state(parser, GUMBO_LEX_RCDATA_END_TAG_NAME);
@@ -1124,9 +1242,14 @@ static StateResult handle_rcdata_end_tag_open_state(GumboParser* parser,
   return true;
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete.html#rcdata-end-tag-name-state
-static StateResult handle_rcdata_end_tag_name_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#rcdata-end-tag-name-state
+static StateResult handle_rcdata_end_tag_name_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
+  UNUSED_IF_NDEBUG(tokenizer);
   assert(tokenizer->_temporary_buffer.length >= 2);
   if (is_alpha(c)) {
     append_char_to_tag_buffer(parser, ensure_lowercase(c), true);
@@ -1156,9 +1279,13 @@ static StateResult handle_rcdata_end_tag_name_state(GumboParser* parser,
   return emit_temporary_buffer(parser, output);
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#rawtext-less-than-sign-state
-static StateResult handle_rawtext_lt_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#rawtext-less-than-sign-state
+static StateResult handle_rawtext_lt_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   assert(temporary_buffer_equals(parser, "<"));
   if (c == '/') {
     gumbo_tokenizer_set_state(parser, GUMBO_LEX_RAWTEXT_END_TAG_OPEN);
@@ -1171,9 +1298,13 @@ static StateResult handle_rawtext_lt_state(GumboParser* parser,
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#rawtext-end-tag-open-state
-static StateResult handle_rawtext_end_tag_open_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#rawtext-end-tag-open-state
+static StateResult handle_rawtext_end_tag_open_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int c,
+  GumboToken* output
+) {
   assert(temporary_buffer_equals(parser, "</"));
   if (is_alpha(c)) {
     gumbo_tokenizer_set_state(parser, GUMBO_LEX_RAWTEXT_END_TAG_NAME);
@@ -1186,9 +1317,13 @@ static StateResult handle_rawtext_end_tag_open_state(GumboParser* parser,
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#rawtext-end-tag-name-state
-static StateResult handle_rawtext_end_tag_name_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#rawtext-end-tag-name-state
+static StateResult handle_rawtext_end_tag_name_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   assert(tokenizer->_temporary_buffer.length >= 2);
   gumbo_debug("Last end tag: %*s\n", (int) tokenizer->_tag_state._buffer.length,
       tokenizer->_tag_state._buffer.data);
@@ -1221,9 +1356,13 @@ static StateResult handle_rawtext_end_tag_name_state(GumboParser* parser,
   return emit_temporary_buffer(parser, output);
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-less-than-sign-state
-static StateResult handle_script_lt_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#script-data-less-than-sign-state
+static StateResult handle_script_lt_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   assert(temporary_buffer_equals(parser, "<"));
   if (c == '/') {
     gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_END_TAG_OPEN);
@@ -1240,9 +1379,13 @@ static StateResult handle_script_lt_state(GumboParser* parser,
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-end-tag-open-state
-static StateResult handle_script_end_tag_open_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#script-data-end-tag-open-state
+static StateResult handle_script_end_tag_open_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int c,
+  GumboToken* output
+) {
   assert(temporary_buffer_equals(parser, "</"));
   if (is_alpha(c)) {
     gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_END_TAG_NAME);
@@ -1255,9 +1398,14 @@ static StateResult handle_script_end_tag_open_state(GumboParser* parser,
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-end-tag-name-state
-static StateResult handle_script_end_tag_name_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#script-data-end-tag-name-state
+static StateResult handle_script_end_tag_name_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
+  UNUSED_IF_NDEBUG(tokenizer);
   assert(tokenizer->_temporary_buffer.length >= 2);
   if (is_alpha(c)) {
     append_char_to_tag_buffer(parser, ensure_lowercase(c), true);
@@ -1287,9 +1435,13 @@ static StateResult handle_script_end_tag_name_state(GumboParser* parser,
   return emit_temporary_buffer(parser, output);
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-escape-start-state
-static StateResult handle_script_escaped_start_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#script-data-escape-start-state
+static StateResult handle_script_escaped_start_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   if (c == '-') {
     gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_ESCAPED_START_DASH);
     return emit_current_char(parser, output);
@@ -1300,9 +1452,13 @@ static StateResult handle_script_escaped_start_state(GumboParser* parser,
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-escape-start-dash-state
-static StateResult handle_script_escaped_start_dash_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#script-data-escape-start-dash-state
+static StateResult handle_script_escaped_start_dash_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   if (c == '-') {
     gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_ESCAPED_DASH_DASH);
     return emit_current_char(parser, output);
@@ -1313,9 +1469,13 @@ static StateResult handle_script_escaped_start_dash_state(GumboParser* parser,
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-escaped-state
-static StateResult handle_script_escaped_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-state
+static StateResult handle_script_escaped_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '-':
       gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_ESCAPED_DASH);
@@ -1335,9 +1495,13 @@ static StateResult handle_script_escaped_state(GumboParser* parser,
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-escaped-dash-state
-static StateResult handle_script_escaped_dash_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-dash-state
+static StateResult handle_script_escaped_dash_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '-':
       gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_ESCAPED_DASH_DASH);
@@ -1360,9 +1524,13 @@ static StateResult handle_script_escaped_dash_state(GumboParser* parser,
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-escaped-dash-dash-state
-static StateResult handle_script_escaped_dash_dash_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-dash-dash-state
+static StateResult handle_script_escaped_dash_dash_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '-':
       return emit_current_char(parser, output);
@@ -1387,9 +1555,13 @@ static StateResult handle_script_escaped_dash_dash_state(GumboParser* parser,
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-escaped-less-than-sign-state
-static StateResult handle_script_escaped_lt_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-less-than-sign-state
+static StateResult handle_script_escaped_lt_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   assert(temporary_buffer_equals(parser, "<"));
   assert(!tokenizer->_script_data_buffer.length);
   if (c == '/') {
@@ -1399,8 +1571,10 @@ static StateResult handle_script_escaped_lt_state(GumboParser* parser,
   } else if (is_alpha(c)) {
     gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED_START);
     append_char_to_temporary_buffer(parser, c);
-    gumbo_string_buffer_append_codepoint(
-        parser, ensure_lowercase(c), &tokenizer->_script_data_buffer);
+    gumbo_string_buffer_append_codepoint (
+      ensure_lowercase(c),
+      &tokenizer->_script_data_buffer
+    );
     return emit_temporary_buffer(parser, output);
   } else {
     gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_ESCAPED);
@@ -1408,9 +1582,13 @@ static StateResult handle_script_escaped_lt_state(GumboParser* parser,
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-escaped-end-tag-open-state
-static StateResult handle_script_escaped_end_tag_open_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-end-tag-open-state
+static StateResult handle_script_escaped_end_tag_open_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int c,
+  GumboToken* output
+) {
   assert(temporary_buffer_equals(parser, "</"));
   if (is_alpha(c)) {
     gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_ESCAPED_END_TAG_NAME);
@@ -1423,9 +1601,14 @@ static StateResult handle_script_escaped_end_tag_open_state(GumboParser* parser,
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-escaped-end-tag-name-state
-static StateResult handle_script_escaped_end_tag_name_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-end-tag-name-state
+static StateResult handle_script_escaped_end_tag_name_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
+  UNUSED_IF_NDEBUG(tokenizer);
   assert(tokenizer->_temporary_buffer.length >= 2);
   if (is_alpha(c)) {
     append_char_to_tag_buffer(parser, ensure_lowercase(c), true);
@@ -1455,9 +1638,13 @@ static StateResult handle_script_escaped_end_tag_name_state(GumboParser* parser,
   return emit_temporary_buffer(parser, output);
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-double-escape-start-state
-static StateResult handle_script_double_escaped_start_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escape-start-state
+static StateResult handle_script_double_escaped_start_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '\t':
     case '\n':
@@ -1465,16 +1652,22 @@ static StateResult handle_script_double_escaped_start_state(GumboParser* parser,
     case ' ':
     case '/':
     case '>':
-      gumbo_tokenizer_set_state(
-          parser, gumbo_string_equals(&kScriptTag,
-                      (GumboStringPiece*) &tokenizer->_script_data_buffer)
-                      ? GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED
-                      : GUMBO_LEX_SCRIPT_ESCAPED);
+      gumbo_tokenizer_set_state (
+        parser,
+        gumbo_string_equals (
+          &kScriptTag,
+          (GumboStringPiece*) &tokenizer->_script_data_buffer
+        )
+        ? GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED
+        : GUMBO_LEX_SCRIPT_ESCAPED
+      );
       return emit_current_char(parser, output);
     default:
       if (is_alpha(c)) {
-        gumbo_string_buffer_append_codepoint(
-            parser, ensure_lowercase(c), &tokenizer->_script_data_buffer);
+        gumbo_string_buffer_append_codepoint (
+          ensure_lowercase(c),
+          &tokenizer->_script_data_buffer
+        );
         return emit_current_char(parser, output);
       } else {
         gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_ESCAPED);
@@ -1484,9 +1677,13 @@ static StateResult handle_script_double_escaped_start_state(GumboParser* parser,
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-double-escaped-state
-static StateResult handle_script_double_escaped_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escaped-state
+static StateResult handle_script_double_escaped_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '-':
       gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED_DASH);
@@ -1505,9 +1702,13 @@ static StateResult handle_script_double_escaped_state(GumboParser* parser,
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-double-escaped-dash-state
-static StateResult handle_script_double_escaped_dash_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escaped-dash-state
+static StateResult handle_script_double_escaped_dash_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '-':
       gumbo_tokenizer_set_state(
@@ -1529,10 +1730,13 @@ static StateResult handle_script_double_escaped_dash_state(GumboParser* parser,
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-double-escaped-dash-dash-state
-static StateResult handle_script_double_escaped_dash_dash_state(
-    GumboParser* parser, GumboTokenizerState* tokenizer, int c,
-    GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escaped-dash-dash-state
+static StateResult handle_script_double_escaped_dash_dash_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '-':
       return emit_current_char(parser, output);
@@ -1555,12 +1759,16 @@ static StateResult handle_script_double_escaped_dash_dash_state(
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-double-escaped-less-than-sign-state
-static StateResult handle_script_double_escaped_lt_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escaped-less-than-sign-state
+static StateResult handle_script_double_escaped_lt_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   if (c == '/') {
     gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED_END);
-    gumbo_string_buffer_clear(parser, &tokenizer->_script_data_buffer);
+    gumbo_string_buffer_clear(&tokenizer->_script_data_buffer);
     return emit_current_char(parser, output);
   } else {
     gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED);
@@ -1569,9 +1777,13 @@ static StateResult handle_script_double_escaped_lt_state(GumboParser* parser,
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#script-data-double-escape-end-state
-static StateResult handle_script_double_escaped_end_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#script-data-double-escape-end-state
+static StateResult handle_script_double_escaped_end_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '\t':
     case '\n':
@@ -1587,8 +1799,10 @@ static StateResult handle_script_double_escaped_end_state(GumboParser* parser,
       return emit_current_char(parser, output);
     default:
       if (is_alpha(c)) {
-        gumbo_string_buffer_append_codepoint(
-            parser, ensure_lowercase(c), &tokenizer->_script_data_buffer);
+        gumbo_string_buffer_append_codepoint (
+          ensure_lowercase(c),
+          &tokenizer->_script_data_buffer
+        );
         return emit_current_char(parser, output);
       } else {
         gumbo_tokenizer_set_state(parser, GUMBO_LEX_SCRIPT_DOUBLE_ESCAPED);
@@ -1598,9 +1812,13 @@ static StateResult handle_script_double_escaped_end_state(GumboParser* parser,
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#before-attribute-name-state
-static StateResult handle_before_attr_name_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#before-attribute-name-state
+static StateResult handle_before_attr_name_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '\t':
     case '\n':
@@ -1636,9 +1854,13 @@ static StateResult handle_before_attr_name_state(GumboParser* parser,
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#attribute-name-state
-static StateResult handle_attr_name_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#attribute-name-state
+static StateResult handle_attr_name_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '\t':
     case '\n':
@@ -1679,9 +1901,13 @@ static StateResult handle_attr_name_state(GumboParser* parser,
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#after-attribute-name-state
-static StateResult handle_after_attr_name_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#after-attribute-name-state
+static StateResult handle_after_attr_name_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '\t':
     case '\n':
@@ -1719,9 +1945,13 @@ static StateResult handle_after_attr_name_state(GumboParser* parser,
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#before-attribute-value-state
-static StateResult handle_before_attr_value_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#before-attribute-value-state
+static StateResult handle_before_attr_value_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '\t':
     case '\n':
@@ -1768,9 +1998,13 @@ static StateResult handle_before_attr_value_state(GumboParser* parser,
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#attribute-value-double-quoted-state
-static StateResult handle_attr_value_double_quoted_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#attribute-value-double-quoted-state
+static StateResult handle_attr_value_double_quoted_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* UNUSED_ARG(output)
+) {
   switch (c) {
     case '"':
       gumbo_tokenizer_set_state(parser, GUMBO_LEX_AFTER_ATTR_VALUE_QUOTED);
@@ -1796,9 +2030,13 @@ static StateResult handle_attr_value_double_quoted_state(GumboParser* parser,
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#attribute-value-single-quoted-state
-static StateResult handle_attr_value_single_quoted_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#attribute-value-single-quoted-state
+static StateResult handle_attr_value_single_quoted_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* UNUSED_ARG(output)
+) {
   switch (c) {
     case '\'':
       gumbo_tokenizer_set_state(parser, GUMBO_LEX_AFTER_ATTR_VALUE_QUOTED);
@@ -1824,9 +2062,13 @@ static StateResult handle_attr_value_single_quoted_state(GumboParser* parser,
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#attribute-value-unquoted-state
-static StateResult handle_attr_value_unquoted_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#attribute-value-unquoted-state
+static StateResult handle_attr_value_unquoted_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '\t':
     case '\n':
@@ -1867,9 +2109,13 @@ static StateResult handle_attr_value_unquoted_state(GumboParser* parser,
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#character-reference-in-attribute-value-state
-static StateResult handle_char_ref_in_attr_value_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#character-reference-in-attribute-value-state
+static StateResult handle_char_ref_in_attr_value_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int UNUSED_ARG(c),
+  GumboToken* UNUSED_ARG(output)
+) {
   OneOrTwoCodepoints char_ref;
   int allowed_char;
   bool is_unquoted = false;
@@ -1893,9 +2139,15 @@ static StateResult handle_char_ref_in_attr_value_state(GumboParser* parser,
   // Ignore the status, since we don't have a convenient way of signalling that
   // a parser error has occurred when the error occurs in the middle of a
-  // multi-state token.  We'd need a flag inside the TokenizerState to do this,
+  // multi-state token. We'd need a flag inside the TokenizerState to do this,
   // but that's a low priority fix.
-  consume_char_ref(parser, &tokenizer->_input, allowed_char, true, &char_ref);
+  gumbo_consume_char_ref (
+    parser,
+    &tokenizer->_input,
+    allowed_char,
+    true,
+    &char_ref
+  );
   if (char_ref.first != kGumboNoChar) {
     tokenizer->_reconsume_current_input = true;
     append_char_to_tag_buffer(parser, char_ref.first, is_unquoted);
@@ -1909,9 +2161,13 @@ static StateResult handle_char_ref_in_attr_value_state(GumboParser* parser,
   return NEXT_CHAR;
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#after-attribute-value-quoted-state
-static StateResult handle_after_attr_value_quoted_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#after-attribute-value-quoted-state
+static StateResult handle_after_attr_value_quoted_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   finish_attribute_value(parser);
   switch (c) {
     case '\t':
@@ -1940,9 +2196,13 @@ static StateResult handle_after_attr_value_quoted_state(GumboParser* parser,
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete.html#self-closing-start-tag-state
-static StateResult handle_self_closing_start_tag_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#self-closing-start-tag-state
+static StateResult handle_self_closing_start_tag_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '>':
       gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
@@ -1961,11 +2221,16 @@ static StateResult handle_self_closing_start_tag_state(GumboParser* parser,
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete.html#bogus-comment-state
-static StateResult handle_bogus_comment_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#bogus-comment-state
+static StateResult handle_bogus_comment_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   while (c != '>' && c != -1) {
     if (c == '\0') {
+      tokenizer_add_parse_error(parser, GUMBO_ERR_UTF8_NULL);
       c = 0xFFFD;
     }
     append_char_to_temporary_buffer(parser, c);
@@ -1976,29 +2241,48 @@ static StateResult handle_bogus_comment_state(GumboParser* parser,
   return emit_comment(parser, output);
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete.html#markup-declaration-open-state
-static StateResult handle_markup_declaration_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
-  if (utf8iterator_maybe_consume_match(
-          &tokenizer->_input, "--", sizeof("--") - 1, true)) {
+// https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state
+static StateResult handle_markup_declaration_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int UNUSED_ARG(c),
+  GumboToken* UNUSED_ARG(output)
+) {
+  if (
+    utf8iterator_maybe_consume_match (
+      &tokenizer->_input,
+      "--",
+      sizeof("--") - 1,
+      true
+    )
+  ) {
     gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT_START);
     tokenizer->_reconsume_current_input = true;
-  } else if (utf8iterator_maybe_consume_match(
-                 &tokenizer->_input, "DOCTYPE", sizeof("DOCTYPE") - 1, false)) {
+  } else if (
+    utf8iterator_maybe_consume_match (
+      &tokenizer->_input,
+      "DOCTYPE",
+      sizeof("DOCTYPE") - 1,
+      false
+    )
+  ) {
     gumbo_tokenizer_set_state(parser, GUMBO_LEX_DOCTYPE);
     tokenizer->_reconsume_current_input = true;
     // If we get here, we know we'll eventually emit a doctype token, so now is
-    // the time to initialize the doctype strings.  (Not in doctype_state_init,
+    // the time to initialize the doctype strings. (Not in doctype_state_init,
     // since then they'll leak if ownership never gets transferred to the
     // doctype token.
-    tokenizer->_doc_type_state.name = gumbo_copy_stringz(parser, "");
-    tokenizer->_doc_type_state.public_identifier =
-        gumbo_copy_stringz(parser, "");
-    tokenizer->_doc_type_state.system_identifier =
-        gumbo_copy_stringz(parser, "");
-  } else if (tokenizer->_is_current_node_foreign &&
-             utf8iterator_maybe_consume_match(
-                 &tokenizer->_input, "[CDATA[", sizeof("[CDATA[") - 1, true)) {
+    tokenizer->_doc_type_state.name = gumbo_strdup("");
+    tokenizer->_doc_type_state.public_identifier = gumbo_strdup("");
+    tokenizer->_doc_type_state.system_identifier = gumbo_strdup("");
+  } else if (
+    tokenizer->_is_current_node_foreign
+    && utf8iterator_maybe_consume_match (
+      &tokenizer->_input,
+      "[CDATA[", sizeof("[CDATA[") - 1,
+      true
+    )
+  ) {
     gumbo_tokenizer_set_state(parser, GUMBO_LEX_CDATA);
     tokenizer->_is_in_cdata = true;
     tokenizer->_reconsume_current_input = true;
@@ -2011,9 +2295,13 @@ static StateResult handle_markup_declaration_state(GumboParser* parser,
   return NEXT_CHAR;
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete.html#comment-start-state
-static StateResult handle_comment_start_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#comment-start-state
+static StateResult handle_comment_start_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '-':
       gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT_START_DASH);
@@ -2040,9 +2328,13 @@ static StateResult handle_comment_start_state(GumboParser* parser,
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete.html#comment-start-dash-state
-static StateResult handle_comment_start_dash_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#comment-start-dash-state
+static StateResult handle_comment_start_dash_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '-':
       gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT_END);
@@ -2071,9 +2363,13 @@ static StateResult handle_comment_start_dash_state(GumboParser* parser,
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete.html#comment-state
-static StateResult handle_comment_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#comment-state
+static StateResult handle_comment_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '-':
       gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT_END_DASH);
@@ -2093,9 +2389,13 @@ static StateResult handle_comment_state(GumboParser* parser,
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete.html#comment-end-dash-state
-static StateResult handle_comment_end_dash_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#comment-end-dash-state
+static StateResult handle_comment_end_dash_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '-':
       gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT_END);
@@ -2119,9 +2419,13 @@ static StateResult handle_comment_end_dash_state(GumboParser* parser,
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete.html#comment-end-state
-static StateResult handle_comment_end_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#comment-end-state
+static StateResult handle_comment_end_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '>':
       gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
@@ -2158,9 +2462,13 @@ static StateResult handle_comment_end_state(GumboParser* parser,
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete.html#comment-end-bang-state
-static StateResult handle_comment_end_bang_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#comment-end-bang-state
+static StateResult handle_comment_end_bang_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '-':
       gumbo_tokenizer_set_state(parser, GUMBO_LEX_COMMENT_END_DASH);
@@ -2194,9 +2502,13 @@ static StateResult handle_comment_end_bang_state(GumboParser* parser,
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#doctype-state
-static StateResult handle_doctype_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#doctype-state
+static StateResult handle_doctype_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   assert(!tokenizer->_temporary_buffer.length);
   switch (c) {
     case '\t':
@@ -2220,9 +2532,13 @@ static StateResult handle_doctype_state(GumboParser* parser,
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#before-doctype-name-state
-static StateResult handle_before_doctype_name_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#before-doctype-name-state
+static StateResult handle_before_doctype_name_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '\t':
     case '\n':
@@ -2255,21 +2571,25 @@ static StateResult handle_before_doctype_name_state(GumboParser* parser,
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete5/tokenization.html#doctype-name-state
-static StateResult handle_doctype_name_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#doctype-name-state
+static StateResult handle_doctype_name_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '\t':
     case '\n':
     case '\f':
     case ' ':
       gumbo_tokenizer_set_state(parser, GUMBO_LEX_AFTER_DOCTYPE_NAME);
-      gumbo_parser_deallocate(parser, (void*) tokenizer->_doc_type_state.name);
+      gumbo_free((void*) tokenizer->_doc_type_state.name);
       finish_temporary_buffer(parser, &tokenizer->_doc_type_state.name);
       return NEXT_CHAR;
     case '>':
       gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
-      gumbo_parser_deallocate(parser, (void*) tokenizer->_doc_type_state.name);
+      gumbo_free((void*) tokenizer->_doc_type_state.name);
       finish_temporary_buffer(parser, &tokenizer->_doc_type_state.name);
       emit_doctype(parser, output);
       return RETURN_SUCCESS;
@@ -2281,7 +2601,7 @@ static StateResult handle_doctype_name_state(GumboParser* parser,
       tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_EOF);
       gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
       tokenizer->_doc_type_state.force_quirks = true;
-      gumbo_parser_deallocate(parser, (void*) tokenizer->_doc_type_state.name);
+      gumbo_free((void*) tokenizer->_doc_type_state.name);
       finish_temporary_buffer(parser, &tokenizer->_doc_type_state.name);
       emit_doctype(parser, output);
       return RETURN_ERROR;
@@ -2293,9 +2613,13 @@ static StateResult handle_doctype_name_state(GumboParser* parser,
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete.html#after-doctype-name-state
-static StateResult handle_after_doctype_name_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#after-doctype-name-state
+static StateResult handle_after_doctype_name_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '\t':
     case '\n':
@@ -2333,10 +2657,13 @@ static StateResult handle_after_doctype_name_state(GumboParser* parser,
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete.html#after-doctype-public-keyword-state
-static StateResult handle_after_doctype_public_keyword_state(
-    GumboParser* parser, GumboTokenizerState* tokenizer, int c,
-    GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#after-doctype-public-keyword-state
+static StateResult handle_after_doctype_public_keyword_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '\t':
     case '\n':
@@ -2346,13 +2673,13 @@ static StateResult handle_after_doctype_public_keyword_state(
       return NEXT_CHAR;
     case '"':
       tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_INVALID);
-      assert(temporary_buffer_equals(parser, ""));
+      assert(temporary_buffer_is_empty(parser));
       gumbo_tokenizer_set_state(
           parser, GUMBO_LEX_DOCTYPE_PUBLIC_ID_DOUBLE_QUOTED);
       return NEXT_CHAR;
     case '\'':
       tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_INVALID);
-      assert(temporary_buffer_equals(parser, ""));
+      assert(temporary_buffer_is_empty(parser));
       gumbo_tokenizer_set_state(
           parser, GUMBO_LEX_DOCTYPE_PUBLIC_ID_SINGLE_QUOTED);
       return NEXT_CHAR;
@@ -2377,9 +2704,13 @@ static StateResult handle_after_doctype_public_keyword_state(
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete.html#before-doctype-public-identifier-state
-static StateResult handle_before_doctype_public_id_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#before-doctype-public-identifier-state
+static StateResult handle_before_doctype_public_id_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '\t':
     case '\n':
@@ -2387,12 +2718,12 @@ static StateResult handle_before_doctype_public_id_state(GumboParser* parser,
     case ' ':
       return NEXT_CHAR;
     case '"':
-      assert(temporary_buffer_equals(parser, ""));
+      assert(temporary_buffer_is_empty(parser));
       gumbo_tokenizer_set_state(
           parser, GUMBO_LEX_DOCTYPE_PUBLIC_ID_DOUBLE_QUOTED);
       return NEXT_CHAR;
     case '\'':
-      assert(temporary_buffer_equals(parser, ""));
+      assert(temporary_buffer_is_empty(parser));
       gumbo_tokenizer_set_state(
           parser, GUMBO_LEX_DOCTYPE_PUBLIC_ID_SINGLE_QUOTED);
       return NEXT_CHAR;
@@ -2417,10 +2748,13 @@ static StateResult handle_before_doctype_public_id_state(GumboParser* parser,
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete.html#doctype-public-identifier-(double-quoted)-state
-static StateResult handle_doctype_public_id_double_quoted_state(
-    GumboParser* parser, GumboTokenizerState* tokenizer, int c,
-    GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#doctype-public-identifier-(double-quoted)-state
+static StateResult handle_doctype_public_id_double_quoted_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '"':
       gumbo_tokenizer_set_state(parser, GUMBO_LEX_AFTER_DOCTYPE_PUBLIC_ID);
@@ -2450,10 +2784,13 @@ static StateResult handle_doctype_public_id_double_quoted_state(
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete.html#doctype-public-identifier-(single-quoted)-state
-static StateResult handle_doctype_public_id_single_quoted_state(
-    GumboParser* parser, GumboTokenizerState* tokenizer, int c,
-    GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#doctype-public-identifier-(single-quoted)-state
+static StateResult handle_doctype_public_id_single_quoted_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '\'':
       gumbo_tokenizer_set_state(parser, GUMBO_LEX_AFTER_DOCTYPE_PUBLIC_ID);
@@ -2483,9 +2820,13 @@ static StateResult handle_doctype_public_id_single_quoted_state(
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete.html#after-doctype-public-identifier-state
-static StateResult handle_after_doctype_public_id_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#after-doctype-public-identifier-state
+static StateResult handle_after_doctype_public_id_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '\t':
     case '\n':
@@ -2500,13 +2841,13 @@ static StateResult handle_after_doctype_public_id_state(GumboParser* parser,
       return RETURN_SUCCESS;
     case '"':
       tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_INVALID);
-      assert(temporary_buffer_equals(parser, ""));
+      assert(temporary_buffer_is_empty(parser));
       gumbo_tokenizer_set_state(
           parser, GUMBO_LEX_DOCTYPE_SYSTEM_ID_DOUBLE_QUOTED);
       return NEXT_CHAR;
     case '\'':
       tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_INVALID);
-      assert(temporary_buffer_equals(parser, ""));
+      assert(temporary_buffer_is_empty(parser));
       gumbo_tokenizer_set_state(
           parser, GUMBO_LEX_DOCTYPE_SYSTEM_ID_SINGLE_QUOTED);
       return NEXT_CHAR;
@@ -2525,10 +2866,13 @@ static StateResult handle_after_doctype_public_id_state(GumboParser* parser,
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete.html#between-doctype-public-and-system-identifiers-state
-static StateResult handle_between_doctype_public_system_id_state(
-    GumboParser* parser, GumboTokenizerState* tokenizer, int c,
-    GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#between-doctype-public-and-system-identifiers-state
+static StateResult handle_between_doctype_public_system_id_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '\t':
     case '\n':
@@ -2540,12 +2884,12 @@ static StateResult handle_between_doctype_public_system_id_state(
       emit_doctype(parser, output);
       return RETURN_SUCCESS;
     case '"':
-      assert(temporary_buffer_equals(parser, ""));
+      assert(temporary_buffer_is_empty(parser));
       gumbo_tokenizer_set_state(
           parser, GUMBO_LEX_DOCTYPE_SYSTEM_ID_DOUBLE_QUOTED);
       return NEXT_CHAR;
     case '\'':
-      assert(temporary_buffer_equals(parser, ""));
+      assert(temporary_buffer_is_empty(parser));
       gumbo_tokenizer_set_state(
           parser, GUMBO_LEX_DOCTYPE_SYSTEM_ID_SINGLE_QUOTED);
       return NEXT_CHAR;
@@ -2564,10 +2908,13 @@ static StateResult handle_between_doctype_public_system_id_state(
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete.html#after-doctype-system-keyword-state
-static StateResult handle_after_doctype_system_keyword_state(
-    GumboParser* parser, GumboTokenizerState* tokenizer, int c,
-    GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#after-doctype-system-keyword-state
+static StateResult handle_after_doctype_system_keyword_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '\t':
     case '\n':
@@ -2577,13 +2924,13 @@ static StateResult handle_after_doctype_system_keyword_state(
       return NEXT_CHAR;
     case '"':
       tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_INVALID);
-      assert(temporary_buffer_equals(parser, ""));
+      assert(temporary_buffer_is_empty(parser));
       gumbo_tokenizer_set_state(
           parser, GUMBO_LEX_DOCTYPE_SYSTEM_ID_DOUBLE_QUOTED);
       return NEXT_CHAR;
     case '\'':
       tokenizer_add_parse_error(parser, GUMBO_ERR_DOCTYPE_INVALID);
-      assert(temporary_buffer_equals(parser, ""));
+      assert(temporary_buffer_is_empty(parser));
       gumbo_tokenizer_set_state(
           parser, GUMBO_LEX_DOCTYPE_SYSTEM_ID_SINGLE_QUOTED);
       return NEXT_CHAR;
@@ -2607,9 +2954,13 @@ static StateResult handle_after_doctype_system_keyword_state(
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete.html#before-doctype-system-identifier-state
-static StateResult handle_before_doctype_system_id_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#before-doctype-system-identifier-state
+static StateResult handle_before_doctype_system_id_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '\t':
     case '\n':
@@ -2617,12 +2968,12 @@ static StateResult handle_before_doctype_system_id_state(GumboParser* parser,
     case ' ':
       return NEXT_CHAR;
     case '"':
-      assert(temporary_buffer_equals(parser, ""));
+      assert(temporary_buffer_is_empty(parser));
       gumbo_tokenizer_set_state(
           parser, GUMBO_LEX_DOCTYPE_SYSTEM_ID_DOUBLE_QUOTED);
       return NEXT_CHAR;
     case '\'':
-      assert(temporary_buffer_equals(parser, ""));
+      assert(temporary_buffer_is_empty(parser));
       gumbo_tokenizer_set_state(
           parser, GUMBO_LEX_DOCTYPE_SYSTEM_ID_SINGLE_QUOTED);
       return NEXT_CHAR;
@@ -2646,10 +2997,13 @@ static StateResult handle_before_doctype_system_id_state(GumboParser* parser,
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete.html#doctype-system-identifier-(double-quoted)-state
-static StateResult handle_doctype_system_id_double_quoted_state(
-    GumboParser* parser, GumboTokenizerState* tokenizer, int c,
-    GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#doctype-system-identifier-(double-quoted)-state
+static StateResult handle_doctype_system_id_double_quoted_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '"':
       gumbo_tokenizer_set_state(parser, GUMBO_LEX_AFTER_DOCTYPE_SYSTEM_ID);
@@ -2679,10 +3033,13 @@ static StateResult handle_doctype_system_id_double_quoted_state(
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete.html#doctype-system-identifier-(single-quoted)-state
-static StateResult handle_doctype_system_id_single_quoted_state(
-    GumboParser* parser, GumboTokenizerState* tokenizer, int c,
-    GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#doctype-system-identifier-(single-quoted)-state
+static StateResult handle_doctype_system_id_single_quoted_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '\'':
       gumbo_tokenizer_set_state(parser, GUMBO_LEX_AFTER_DOCTYPE_SYSTEM_ID);
@@ -2712,9 +3069,13 @@ static StateResult handle_doctype_system_id_single_quoted_state(
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete.html#after-doctype-system-identifier-state
-static StateResult handle_after_doctype_system_id_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#after-doctype-system-identifier-state
+static StateResult handle_after_doctype_system_id_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   switch (c) {
     case '\t':
     case '\n':
@@ -2738,9 +3099,13 @@ static StateResult handle_after_doctype_system_id_state(GumboParser* parser,
   }
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete.html#bogus-doctype-state
-static StateResult handle_bogus_doctype_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#bogus-doctype-state
+static StateResult handle_bogus_doctype_state (
+  GumboParser* parser,
+  GumboTokenizerState* UNUSED_ARG(tokenizer),
+  int c,
+  GumboToken* output
+) {
   if (c == '>' || c == -1) {
     gumbo_tokenizer_set_state(parser, GUMBO_LEX_DATA);
     emit_doctype(parser, output);
@@ -2749,9 +3114,13 @@ static StateResult handle_bogus_doctype_state(GumboParser* parser,
   return NEXT_CHAR;
 }
-// http://www.whatwg.org/specs/web-apps/current-work/complete.html#cdata-section-state
-static StateResult handle_cdata_state(GumboParser* parser,
-    GumboTokenizerState* tokenizer, int c, GumboToken* output) {
+// https://html.spec.whatwg.org/multipage/parsing.html#cdata-section-state
+static StateResult handle_cdata_state (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+) {
   if (c == -1 || utf8iterator_maybe_consume_match(
                      &tokenizer->_input, "]]>", sizeof("]]>") - 1, true)) {
     tokenizer->_reconsume_current_input = true;
@@ -2764,50 +3133,83 @@ static StateResult handle_cdata_state(GumboParser* parser,
   }
 }
-typedef StateResult (*GumboLexerStateFunction)(
-    GumboParser*, GumboTokenizerState*, int, GumboToken*);
-static GumboLexerStateFunction dispatch_table[] = {handle_data_state,
-    handle_char_ref_in_data_state, handle_rcdata_state,
-    handle_char_ref_in_rcdata_state, handle_rawtext_state, handle_script_state,
-    handle_plaintext_state, handle_tag_open_state, handle_end_tag_open_state,
-    handle_tag_name_state, handle_rcdata_lt_state,
-    handle_rcdata_end_tag_open_state, handle_rcdata_end_tag_name_state,
-    handle_rawtext_lt_state, handle_rawtext_end_tag_open_state,
-    handle_rawtext_end_tag_name_state, handle_script_lt_state,
-    handle_script_end_tag_open_state, handle_script_end_tag_name_state,
-    handle_script_escaped_start_state, handle_script_escaped_start_dash_state,
-    handle_script_escaped_state, handle_script_escaped_dash_state,
-    handle_script_escaped_dash_dash_state, handle_script_escaped_lt_state,
-    handle_script_escaped_end_tag_open_state,
-    handle_script_escaped_end_tag_name_state,
-    handle_script_double_escaped_start_state,
-    handle_script_double_escaped_state, handle_script_double_escaped_dash_state,
-    handle_script_double_escaped_dash_dash_state,
-    handle_script_double_escaped_lt_state,
-    handle_script_double_escaped_end_state, handle_before_attr_name_state,
-    handle_attr_name_state, handle_after_attr_name_state,
-    handle_before_attr_value_state, handle_attr_value_double_quoted_state,
-    handle_attr_value_single_quoted_state, handle_attr_value_unquoted_state,
-    handle_char_ref_in_attr_value_state, handle_after_attr_value_quoted_state,
-    handle_self_closing_start_tag_state, handle_bogus_comment_state,
-    handle_markup_declaration_state, handle_comment_start_state,
-    handle_comment_start_dash_state, handle_comment_state,
-    handle_comment_end_dash_state, handle_comment_end_state,
-    handle_comment_end_bang_state, handle_doctype_state,
-    handle_before_doctype_name_state, handle_doctype_name_state,
-    handle_after_doctype_name_state, handle_after_doctype_public_keyword_state,
-    handle_before_doctype_public_id_state,
-    handle_doctype_public_id_double_quoted_state,
-    handle_doctype_public_id_single_quoted_state,
-    handle_after_doctype_public_id_state,
-    handle_between_doctype_public_system_id_state,
-    handle_after_doctype_system_keyword_state,
-    handle_before_doctype_system_id_state,
-    handle_doctype_system_id_double_quoted_state,
-    handle_doctype_system_id_single_quoted_state,
-    handle_after_doctype_system_id_state, handle_bogus_doctype_state,
-    handle_cdata_state};
+typedef StateResult (*GumboLexerStateFunction) (
+  GumboParser* parser,
+  GumboTokenizerState* tokenizer,
+  int c,
+  GumboToken* output
+);
+static GumboLexerStateFunction dispatch_table[] = {
+  handle_data_state,
+  handle_char_ref_in_data_state,
+  handle_rcdata_state,
+  handle_char_ref_in_rcdata_state,
+  handle_rawtext_state,
+  handle_script_state,
+  handle_plaintext_state,
+  handle_tag_open_state,
+  handle_end_tag_open_state,
+  handle_tag_name_state,
+  handle_rcdata_lt_state,
+  handle_rcdata_end_tag_open_state,
+  handle_rcdata_end_tag_name_state,
+  handle_rawtext_lt_state,
+  handle_rawtext_end_tag_open_state,
+  handle_rawtext_end_tag_name_state,
+  handle_script_lt_state,
+  handle_script_end_tag_open_state,
+  handle_script_end_tag_name_state,
+  handle_script_escaped_start_state,
+  handle_script_escaped_start_dash_state,
+  handle_script_escaped_state,
+  handle_script_escaped_dash_state,
+  handle_script_escaped_dash_dash_state,
+  handle_script_escaped_lt_state,
+  handle_script_escaped_end_tag_open_state,
+  handle_script_escaped_end_tag_name_state,
+  handle_script_double_escaped_start_state,
+  handle_script_double_escaped_state,
+  handle_script_double_escaped_dash_state,
+  handle_script_double_escaped_dash_dash_state,
+  handle_script_double_escaped_lt_state,
+  handle_script_double_escaped_end_state,
+  handle_before_attr_name_state,
+  handle_attr_name_state,
+  handle_after_attr_name_state,
+  handle_before_attr_value_state,
+  handle_attr_value_double_quoted_state,
+  handle_attr_value_single_quoted_state,
+  handle_attr_value_unquoted_state,
+  handle_char_ref_in_attr_value_state,
+  handle_after_attr_value_quoted_state,
+  handle_self_closing_start_tag_state,
+  handle_bogus_comment_state,
+  handle_markup_declaration_state,
+  handle_comment_start_state,
+  handle_comment_start_dash_state,
+  handle_comment_state,
+  handle_comment_end_dash_state,
+  handle_comment_end_state,
+  handle_comment_end_bang_state,
+  handle_doctype_state,
+  handle_before_doctype_name_state,
+  handle_doctype_name_state,
+  handle_after_doctype_name_state,
+  handle_after_doctype_public_keyword_state,
+  handle_before_doctype_public_id_state,
+  handle_doctype_public_id_double_quoted_state,
+  handle_doctype_public_id_single_quoted_state,
+  handle_after_doctype_public_id_state,
+  handle_between_doctype_public_system_id_state,
+  handle_after_doctype_system_keyword_state,
+  handle_before_doctype_system_id_state,
+  handle_doctype_system_id_double_quoted_state,
+  handle_doctype_system_id_single_quoted_state,
+  handle_after_doctype_system_id_state,
+  handle_bogus_doctype_state,
+  handle_cdata_state
+};
 bool gumbo_lex(GumboParser* parser, GumboToken* output) {
   // Because of the spec requirements that...
@@ -2819,9 +3221,9 @@ bool gumbo_lex(GumboParser* parser, GumboToken* output) {
   // state.
   //
   // ...all state must be held in the GumboTokenizer struct instead of in local
-  // variables in this function.  That allows us to return from this method with
+  // variables in this function. That allows us to return from this method with
   // a token, and then immediately jump back to the same state with the same
-  // input if we need to return a different token.  The various emit_* functions
+  // input if we need to return a different token. The various emit_* functions
   // are responsible for changing state (eg. flushing the chardata buffer,
   // reading the next input character) to avoid an infinite loop.
   GumboTokenizerState* tokenizer = parser->_tokenizer_state;
@@ -2845,10 +3247,9 @@ bool gumbo_lex(GumboParser* parser, GumboToken* output) {
     assert(!tokenizer->_temporary_buffer_emit);
     assert(tokenizer->_buffered_emit_char == kGumboNoChar);
     int c = utf8iterator_current(&tokenizer->_input);
-    gumbo_debug(
-        "Lexing character '%c' (%d) in state %d.\n", c, c, tokenizer->_state);
-    StateResult result =
-        dispatch_table[tokenizer->_state](parser, tokenizer, c, output);
+    GumboTokenizerEnum state = tokenizer->_state;
+    gumbo_debug("Lexing character '%c' (%d) in state %u.\n", c, c, state);
+    StateResult result = dispatch_table[state](parser, tokenizer, c, output);
     // We need to clear reconsume_current_input before returning to prevent
     // certain infinite loop states.
     bool should_advance = !tokenizer->_reconsume_current_input;
@@ -2866,30 +3267,33 @@ bool gumbo_lex(GumboParser* parser, GumboToken* output) {
   }
 }
-void gumbo_token_destroy(GumboParser* parser, GumboToken* token) {
+void gumbo_token_destroy(GumboToken* token) {
   if (!token) return;
   switch (token->type) {
     case GUMBO_TOKEN_DOCTYPE:
-      gumbo_parser_deallocate(parser, (void*) token->v.doc_type.name);
-      gumbo_parser_deallocate(
-          parser, (void*) token->v.doc_type.public_identifier);
-      gumbo_parser_deallocate(
-          parser, (void*) token->v.doc_type.system_identifier);
+      gumbo_free((void*) token->v.doc_type.name);
+      gumbo_free((void*) token->v.doc_type.public_identifier);
+      gumbo_free((void*) token->v.doc_type.system_identifier);
       return;
     case GUMBO_TOKEN_START_TAG:
       for (unsigned int i = 0; i < token->v.start_tag.attributes.length; ++i) {
         GumboAttribute* attr = token->v.start_tag.attributes.data[i];
         if (attr) {
           // May have been nulled out if this token was merged with another.
-          gumbo_destroy_attribute(parser, attr);
+          gumbo_destroy_attribute(attr);
         }
       }
-      gumbo_parser_deallocate(
-          parser, (void*) token->v.start_tag.attributes.data);
+      gumbo_free((void*) token->v.start_tag.attributes.data);
+      if (token->v.start_tag.tag == GUMBO_TAG_UNKNOWN)
+        gumbo_free(token->v.start_tag.name);
       return;
+    case GUMBO_TOKEN_END_TAG:
+      if (token->v.end_tag.tag == GUMBO_TAG_UNKNOWN)
+        gumbo_free(token->v.end_tag.name);
+      break;
     case GUMBO_TOKEN_COMMENT:
-      gumbo_parser_deallocate(parser, (void*) token->v.text);
+      gumbo_free((void*) token->v.text);
       return;
     default:
       return;