RubyGems - prism - Versions diffs - 0.13.0 → 0.14.0 - Mend

prism 0.13.0 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +20 -1
data/README.md +4 -1
data/config.yml +10 -14
data/docs/fuzzing.md +5 -10
data/docs/prism.png +0 -0
data/docs/serialization.md +10 -0
data/ext/prism/api_node.c +35 -28
data/ext/prism/extension.c +35 -48
data/ext/prism/extension.h +1 -1
data/include/prism/ast.h +38 -36
data/include/prism/node.h +1 -0
data/include/prism/parser.h +26 -0
data/include/prism/util/pm_buffer.h +3 -0
data/include/prism/util/pm_constant_pool.h +5 -0
data/include/prism/util/pm_string.h +2 -1
data/include/prism/version.h +2 -2
data/include/prism.h +0 -1
data/lib/prism/compiler.rb +141 -141
data/lib/prism/ffi.rb +2 -2
data/lib/prism/lex_compat.rb +42 -8
data/lib/prism/node.rb +1456 -46
data/lib/prism/node_ext.rb +44 -0
data/lib/prism/parse_result.rb +32 -5
data/lib/prism/pattern.rb +1 -1
data/lib/prism/serialize.rb +16 -14
data/prism.gemspec +2 -3
data/src/diagnostic.c +1 -1
data/src/node.c +0 -14
data/src/prettyprint.c +35 -35
data/src/prism.c +1728 -811
data/src/serialize.c +45 -22
data/src/util/pm_buffer.c +9 -7
metadata +3 -4
data/include/prism/unescape.h +0 -48
data/src/unescape.c +0 -637

data/src/prism.c CHANGED Viewed

@@ -421,6 +421,63 @@ debug_lex_state_set(pm_parser_t *parser, pm_lex_state_t state, char const * call
 #define lex_state_set(parser, state) debug_lex_state_set(parser, state, __func__, __LINE__)
 #endif
+/******************************************************************************/
+/* Diagnostic-related functions                                               */
+/******************************************************************************/
+// Append an error to the list of errors on the parser.
+static inline void
+pm_parser_err(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
+    pm_diagnostic_list_append(&parser->error_list, start, end, diag_id);
+}
+// Append an error to the list of errors on the parser using the location of the
+// current token.
+static inline void
+pm_parser_err_current(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
+    pm_parser_err(parser, parser->current.start, parser->current.end, diag_id);
+}
+// Append an error to the list of errors on the parser using the given location.
+static inline void
+pm_parser_err_location(pm_parser_t *parser, const pm_location_t *location, pm_diagnostic_id_t diag_id) {
+    pm_parser_err(parser, location->start, location->end, diag_id);
+}
+// Append an error to the list of errors on the parser using the location of the
+// given node.
+static inline void
+pm_parser_err_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_t diag_id) {
+    pm_parser_err(parser, node->location.start, node->location.end, diag_id);
+}
+// Append an error to the list of errors on the parser using the location of the
+// previous token.
+static inline void
+pm_parser_err_previous(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
+    pm_parser_err(parser, parser->previous.start, parser->previous.end, diag_id);
+}
+// Append an error to the list of errors on the parser using the location of the
+// given token.
+static inline void
+pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
+    pm_parser_err(parser, token->start, token->end, diag_id);
+}
+// Append a warning to the list of warnings on the parser.
+static inline void
+pm_parser_warn(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_diagnostic_id_t diag_id) {
+    pm_diagnostic_list_append(&parser->warning_list, start, end, diag_id);
+}
+// Append a warning to the list of warnings on the parser using the location of
+// the given token.
+static inline void
+pm_parser_warn_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_id_t diag_id) {
+    pm_parser_warn(parser, token->start, token->end, diag_id);
+}
 /******************************************************************************/
 /* Node-related functions                                                     */
 /******************************************************************************/
@@ -437,6 +494,22 @@ pm_parser_constant_id_owned(pm_parser_t *parser, const uint8_t *start, size_t le
     return pm_constant_pool_insert_owned(&parser->constant_pool, start, length);
 }
+// Retrieve the constant pool id for the given static literal C string.
+static inline pm_constant_id_t
+pm_parser_constant_id_static(pm_parser_t *parser, const char *start, size_t length) {
+    uint8_t *owned_copy;
+    if (length > 0) {
+        owned_copy = malloc(length);
+        memcpy(owned_copy, start, length);
+    } else {
+        owned_copy = malloc(1);
+        owned_copy[0] = '\0';
+    }
+    return pm_constant_pool_insert_owned(&parser->constant_pool, owned_copy, length);
+    // Does not work because the static literal cannot be serialized as an offset of source
+    // return pm_constant_pool_insert_shared(&parser->constant_pool, start, length);
+}
 // Retrieve the constant pool id for the given token.
 static inline pm_constant_id_t
 pm_parser_constant_id_token(pm_parser_t *parser, const pm_token_t *token) {
@@ -582,12 +655,7 @@ pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_b
     // If we didn't hit a case before this check, then at this point we need to
     // add a syntax error.
-    pm_diagnostic_list_append(
-        &parser->error_list,
-        block->base.location.start,
-        block->base.location.end,
-        PM_ERR_ARGUMENT_UNEXPECTED_BLOCK
-    );
+    pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_UNEXPECTED_BLOCK);
 }
 /******************************************************************************/
@@ -601,6 +669,7 @@ pm_scope_node_init(pm_node_t *node, pm_scope_node_t *scope) {
     scope->base.location.start = node->location.start;
     scope->base.location.end = node->location.end;
+    scope->ast_node = node;
     scope->parameters = NULL;
     scope->body = NULL;
     pm_constant_id_list_init(&scope->locals);
@@ -626,6 +695,11 @@ pm_scope_node_init(pm_node_t *node, pm_scope_node_t *scope) {
             scope->locals = cast->locals;
             break;
         }
+        case PM_FOR_NODE: {
+            pm_for_node_t *cast = (pm_for_node_t *)node;
+            scope->body = (pm_node_t *)cast->statements;
+            break;
+        }
         case PM_LAMBDA_NODE: {
             pm_lambda_node_t *cast = (pm_lambda_node_t *) node;
             if (cast->parameters) scope->parameters = cast->parameters->parameters;
@@ -679,14 +753,14 @@ parse_decimal_number(pm_parser_t *parser, const uint8_t *start, const uint8_t *e
     unsigned long value = strtoul(digits, &endptr, 10);
     if ((digits == endptr) || (*endptr != '\0') || (errno == ERANGE)) {
-        pm_diagnostic_list_append(&parser->error_list, start, end, PM_ERR_INVALID_NUMBER_DECIMAL);
+        pm_parser_err(parser, start, end, PM_ERR_INVALID_NUMBER_DECIMAL);
         value = UINT32_MAX;
     }
     free(digits);
     if (value > UINT32_MAX) {
-        pm_diagnostic_list_append(&parser->error_list, start, end, PM_ERR_INVALID_NUMBER_DECIMAL);
+        pm_parser_err(parser, start, end, PM_ERR_INVALID_NUMBER_DECIMAL);
         value = UINT32_MAX;
     }
@@ -907,7 +981,7 @@ pm_array_node_elements_append(pm_array_node_t *node, pm_node_t *element) {
     // If the element is not a static literal, then the array is not a static
     // literal. Turn that flag off.
-    if (PM_NODE_TYPE_P(element, PM_ARRAY_NODE) || PM_NODE_TYPE_P(element, PM_HASH_NODE) || (element->flags & PM_NODE_FLAG_STATIC_LITERAL) == 0) {
+    if (PM_NODE_TYPE_P(element, PM_ARRAY_NODE) || PM_NODE_TYPE_P(element, PM_HASH_NODE) || PM_NODE_TYPE_P(element, PM_RANGE_NODE) || (element->flags & PM_NODE_FLAG_STATIC_LITERAL) == 0) {
         node->base.flags &= (pm_node_flags_t) ~PM_NODE_FLAG_STATIC_LITERAL;
     }
 }
@@ -1051,8 +1125,10 @@ pm_assoc_node_create(pm_parser_t *parser, pm_node_t *key, const pm_token_t *oper
         end = key->location.end;
     }
+    // If the key and value of this assoc node are both static literals, then
+    // we can mark this node as a static literal.
     pm_node_flags_t flags = 0;
-    if (value && !PM_NODE_TYPE_P(value, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(value, PM_HASH_NODE)) {
+    if (value && !PM_NODE_TYPE_P(value, PM_ARRAY_NODE) && !PM_NODE_TYPE_P(value, PM_HASH_NODE) && !PM_NODE_TYPE_P(value, PM_RANGE_NODE)) {
         flags = key->flags & value->flags & PM_NODE_FLAG_STATIC_LITERAL;
     }
@@ -1341,7 +1417,8 @@ pm_call_node_create(pm_parser_t *parser) {
         .opening_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
         .arguments = NULL,
         .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
-        .block = NULL
+        .block = NULL,
+        .name = 0
     };
     return node;
@@ -1369,7 +1446,7 @@ pm_call_node_aref_create(pm_parser_t *parser, pm_node_t *receiver, pm_arguments_
     node->closing_loc = arguments->closing_loc;
     node->block = arguments->block;
-    pm_string_constant_init(&node->name, "[]", 2);
+    node->name = pm_parser_constant_id_static(parser, "[]", 2);
     return node;
 }
@@ -1388,7 +1465,7 @@ pm_call_node_binary_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t
     pm_arguments_node_arguments_append(arguments, argument);
     node->arguments = arguments;
-    pm_string_shared_init(&node->name, operator->start, operator->end);
+    node->name = pm_parser_constant_id_token(parser, operator);
     return node;
 }
@@ -1420,7 +1497,7 @@ pm_call_node_call_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *o
         node->base.flags |= PM_CALL_NODE_FLAGS_SAFE_NAVIGATION;
     }
-    pm_string_shared_init(&node->name, message->start, message->end);
+    node->name = pm_parser_constant_id_token(parser, message);
     return node;
 }
@@ -1447,7 +1524,7 @@ pm_call_node_fcall_create(pm_parser_t *parser, pm_token_t *message, pm_arguments
     node->closing_loc = arguments->closing_loc;
     node->block = arguments->block;
-    pm_string_shared_init(&node->name, message->start, message->end);
+    node->name = pm_parser_constant_id_token(parser, message);
     return node;
 }
@@ -1469,7 +1546,7 @@ pm_call_node_not_create(pm_parser_t *parser, pm_node_t *receiver, pm_token_t *me
     node->arguments = arguments->arguments;
     node->closing_loc = arguments->closing_loc;
-    pm_string_constant_init(&node->name, "!", 1);
+    node->name = pm_parser_constant_id_static(parser, "!", 1);
     return node;
 }
@@ -1496,7 +1573,7 @@ pm_call_node_shorthand_create(pm_parser_t *parser, pm_node_t *receiver, pm_token
         node->base.flags |= PM_CALL_NODE_FLAGS_SAFE_NAVIGATION;
     }
-    pm_string_constant_init(&node->name, "call", 4);
+    node->name = pm_parser_constant_id_static(parser, "call", 4);
     return node;
 }
@@ -1511,7 +1588,7 @@ pm_call_node_unary_create(pm_parser_t *parser, pm_token_t *operator, pm_node_t *
     node->receiver = receiver;
     node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(operator);
-    pm_string_constant_init(&node->name, name, strlen(name));
+    node->name = pm_parser_constant_id_static(parser, name, strlen(name));
     return node;
 }
@@ -1524,7 +1601,7 @@ pm_call_node_variable_call_create(pm_parser_t *parser, pm_token_t *message) {
     node->base.location = PM_LOCATION_TOKEN_VALUE(message);
     node->message_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(message);
-    pm_string_shared_init(&node->name, message->start, message->end);
+    node->name = pm_parser_constant_id_token(parser, message);
     return node;
 }
@@ -1537,17 +1614,18 @@ pm_call_node_variable_call_p(pm_call_node_t *node) {
 // Initialize the read name by reading the write name and chopping off the '='.
 static void
-pm_call_write_read_name_init(pm_string_t *read_name, pm_string_t *write_name) {
-    if (write_name->length >= 1) {
-        size_t length = write_name->length - 1;
+pm_call_write_read_name_init(pm_parser_t *parser, pm_constant_id_t *read_name, pm_constant_id_t *write_name) {
+    pm_constant_t *write_constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *write_name);
+    if (write_constant->length >= 1) {
+        size_t length = write_constant->length - 1;
         void *memory = malloc(length);
-        memcpy(memory, write_name->source, length);
+        memcpy(memory, write_constant->start, length);
-        pm_string_owned_init(read_name, (uint8_t *) memory, length);
+        *read_name = pm_constant_pool_insert_owned(&parser->constant_pool, (uint8_t *) memory, length);
     } else {
         // We can get here if the message was missing because of a syntax error.
-        pm_string_constant_init(read_name, "", 0);
+        *read_name = pm_parser_constant_id_static(parser, "", 0);
     }
 }
@@ -1573,13 +1651,13 @@ pm_call_and_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const
         .opening_loc = target->opening_loc,
         .arguments = target->arguments,
         .closing_loc = target->closing_loc,
-        .read_name = PM_EMPTY_STRING,
+        .read_name = 0,
         .write_name = target->name,
         .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
         .value = value
     };
-    pm_call_write_read_name_init(&node->read_name, &node->write_name);
+    pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
     // Here we're going to free the target, since it is no longer necessary.
     // However, we don't want to call `pm_node_destroy` because we want to keep
@@ -1610,14 +1688,14 @@ pm_call_operator_write_node_create(pm_parser_t *parser, pm_call_node_t *target,
         .opening_loc = target->opening_loc,
         .arguments = target->arguments,
         .closing_loc = target->closing_loc,
-        .read_name = PM_EMPTY_STRING,
+        .read_name = 0,
         .write_name = target->name,
         .operator = pm_parser_constant_id_location(parser, operator->start, operator->end - 1),
         .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
         .value = value
     };
-    pm_call_write_read_name_init(&node->read_name, &node->write_name);
+    pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
     // Here we're going to free the target, since it is no longer necessary.
     // However, we don't want to call `pm_node_destroy` because we want to keep
@@ -1649,13 +1727,13 @@ pm_call_or_write_node_create(pm_parser_t *parser, pm_call_node_t *target, const
         .opening_loc = target->opening_loc,
         .arguments = target->arguments,
         .closing_loc = target->closing_loc,
-        .read_name = PM_EMPTY_STRING,
+        .read_name = 0,
         .write_name = target->name,
         .operator_loc = PM_LOCATION_TOKEN_VALUE(operator),
         .value = value
     };
-    pm_call_write_read_name_init(&node->read_name, &node->write_name);
+    pm_call_write_read_name_init(parser, &node->read_name, &node->write_name);
     // Here we're going to free the target, since it is no longer necessary.
     // However, we don't want to call `pm_node_destroy` because we want to keep
@@ -3372,11 +3450,20 @@ pm_local_variable_write_node_create(pm_parser_t *parser, pm_constant_id_t name,
     return node;
 }
+static inline bool
+token_is_numbered_parameter(const uint8_t *start, const uint8_t *end) {
+    return (end - start == 2) && (start[0] == '_') && (start[1] != '0') && (pm_char_is_decimal_digit(start[1]));
+}
 // Allocate and initialize a new LocalVariableTargetNode node.
 static pm_local_variable_target_node_t *
 pm_local_variable_target_node_create(pm_parser_t *parser, const pm_token_t *name) {
     pm_local_variable_target_node_t *node = PM_ALLOC_NODE(parser, pm_local_variable_target_node_t);
+    if (token_is_numbered_parameter(name->start, name->end)) {
+        pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NUMBERED_RESERVED);
+    }
     *node = (pm_local_variable_target_node_t) {
         {
             .type = PM_LOCAL_VARIABLE_TARGET_NODE,
@@ -3870,10 +3957,27 @@ pm_pre_execution_node_create(pm_parser_t *parser, const pm_token_t *keyword, con
 static pm_range_node_t *
 pm_range_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *operator, pm_node_t *right) {
     pm_range_node_t *node = PM_ALLOC_NODE(parser, pm_range_node_t);
+    pm_node_flags_t flags = 0;
+    // Indicate that this node an exclusive range if the operator is `...`.
+    if (operator->type == PM_TOKEN_DOT_DOT_DOT || operator->type == PM_TOKEN_UDOT_DOT_DOT) {
+        flags |= PM_RANGE_FLAGS_EXCLUDE_END;
+    }
+    // Indicate that this node is a static literal (i.e., can be compiled with
+    // a putobject in CRuby) if the left and right are implicit nil, explicit
+    // nil, or integers.
+    if (
+        (left == NULL || PM_NODE_TYPE_P(left, PM_NIL_NODE) || PM_NODE_TYPE_P(left, PM_INTEGER_NODE)) &&
+        (right == NULL || PM_NODE_TYPE_P(right, PM_NIL_NODE) || PM_NODE_TYPE_P(right, PM_INTEGER_NODE))
+    ) {
+        flags |= PM_NODE_FLAG_STATIC_LITERAL;
+    }
     *node = (pm_range_node_t) {
         {
             .type = PM_RANGE_NODE,
+            .flags = flags,
             .location = {
                 .start = (left == NULL ? operator->start : left->location.start),
                 .end = (right == NULL ? operator->end : right->location.end)
@@ -3884,15 +3988,6 @@ pm_range_node_create(pm_parser_t *parser, pm_node_t *left, const pm_token_t *ope
         .operator_loc = PM_LOCATION_TOKEN_VALUE(operator)
     };
-    switch (operator->type) {
-        case PM_TOKEN_DOT_DOT_DOT:
-        case PM_TOKEN_UDOT_DOT_DOT:
-            node->base.flags |= PM_RANGE_FLAGS_EXCLUDE_END;
-            break;
-        default:
-            break;
-    }
     return node;
 }
@@ -3906,9 +4001,10 @@ pm_redo_node_create(pm_parser_t *parser, const pm_token_t *token) {
     return node;
 }
-// Allocate a new RegularExpressionNode node.
+// Allocate a new initialize a new RegularExpressionNode node with the given
+// unescaped string.
 static pm_regular_expression_node_t *
-pm_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
+pm_regular_expression_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
     pm_regular_expression_node_t *node = PM_ALLOC_NODE(parser, pm_regular_expression_node_t);
     *node = (pm_regular_expression_node_t) {
@@ -3923,12 +4019,18 @@ pm_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening
         .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
         .content_loc = PM_LOCATION_TOKEN_VALUE(content),
         .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
-        .unescaped = PM_EMPTY_STRING
+        .unescaped = *unescaped
     };
     return node;
 }
+// Allocate a new initialize a new RegularExpressionNode node.
+static inline pm_regular_expression_node_t *
+pm_regular_expression_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
+    return pm_regular_expression_node_create_unescaped(parser, opening, content, closing, &PM_EMPTY_STRING);
+}
 // Allocate a new RequiredDestructuredParameterNode node.
 static pm_required_destructured_parameter_node_t *
 pm_required_destructured_parameter_node_create(pm_parser_t *parser, const pm_token_t *opening) {
@@ -4274,9 +4376,9 @@ pm_string_concat_node_create(pm_parser_t *parser, pm_node_t *left, pm_node_t *ri
     return node;
 }
-// Allocate a new StringNode node.
-static pm_string_node_t *
-pm_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
+// Allocate a new StringNode node with the current string on the parser.
+static inline pm_string_node_t *
+pm_string_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *string) {
     pm_string_node_t *node = PM_ALLOC_NODE(parser, pm_string_node_t);
     pm_node_flags_t flags = 0;
@@ -4296,12 +4398,27 @@ pm_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_t
         .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
         .content_loc = PM_LOCATION_TOKEN_VALUE(content),
         .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
-        .unescaped = PM_EMPTY_STRING
+        .unescaped = *string
     };
     return node;
 }
+// Allocate a new StringNode node.
+static pm_string_node_t *
+pm_string_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
+    return pm_string_node_create_unescaped(parser, opening, content, closing, &PM_EMPTY_STRING);
+}
+// Allocate a new StringNode node and create it using the current string on the
+// parser.
+static pm_string_node_t *
+pm_string_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
+    pm_string_node_t *node = pm_string_node_create_unescaped(parser, opening, content, closing, &parser->current_string);
+    parser->current_string = PM_EMPTY_STRING;
+    return node;
+}
 // Allocate and initialize a new SuperNode node.
 static pm_super_node_t *
 pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_arguments_t *arguments) {
@@ -4338,9 +4455,10 @@ pm_super_node_create(pm_parser_t *parser, const pm_token_t *keyword, pm_argument
     return node;
 }
-// Allocate a new SymbolNode node.
+// Allocate and initialize a new SymbolNode node with the given unescaped
+// string.
 static pm_symbol_node_t *
-pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
+pm_symbol_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing, const pm_string_t *unescaped) {
     pm_symbol_node_t *node = PM_ALLOC_NODE(parser, pm_symbol_node_t);
     *node = (pm_symbol_node_t) {
@@ -4355,12 +4473,26 @@ pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_t
         .opening_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(opening),
         .value_loc = PM_LOCATION_TOKEN_VALUE(value),
         .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
-        .unescaped = PM_EMPTY_STRING
+        .unescaped = *unescaped
     };
     return node;
 }
+// Allocate and initialize a new SymbolNode node.
+static inline pm_symbol_node_t *
+pm_symbol_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
+    return pm_symbol_node_create_unescaped(parser, opening, value, closing, &PM_EMPTY_STRING);
+}
+// Allocate and initialize a new SymbolNode node with the current string.
+static pm_symbol_node_t *
+pm_symbol_node_create_current_string(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *value, const pm_token_t *closing) {
+    pm_symbol_node_t *node = pm_symbol_node_create_unescaped(parser, opening, value, closing, &parser->current_string);
+    parser->current_string = PM_EMPTY_STRING;
+    return node;
+}
 // Allocate and initialize a new SymbolNode node from a label.
 static pm_symbol_node_t *
 pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
@@ -4376,8 +4508,6 @@ pm_symbol_node_label_create(pm_parser_t *parser, const pm_token_t *token) {
             assert((label.end - label.start) >= 0);
             pm_string_shared_init(&node->unescaped, label.start, label.end);
-            pm_unescape_manipulate_string(parser, &node->unescaped, PM_UNESCAPE_ALL);
             break;
         }
         case PM_TOKEN_MISSING: {
@@ -4710,9 +4840,10 @@ pm_while_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm
     return node;
 }
-// Allocate and initialize a new XStringNode node.
+// Allocate and initialize a new XStringNode node with the given unescaped
+// string.
 static pm_x_string_node_t *
-pm_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
+pm_xstring_node_create_unescaped(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, const pm_string_t *unescaped) {
     pm_x_string_node_t *node = PM_ALLOC_NODE(parser, pm_x_string_node_t);
     *node = (pm_x_string_node_t) {
@@ -4726,12 +4857,18 @@ pm_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_
         .opening_loc = PM_LOCATION_TOKEN_VALUE(opening),
         .content_loc = PM_LOCATION_TOKEN_VALUE(content),
         .closing_loc = PM_LOCATION_TOKEN_VALUE(closing),
-        .unescaped = PM_EMPTY_STRING
+        .unescaped = *unescaped
     };
     return node;
 }
+// Allocate and initialize a new XStringNode node.
+static inline pm_x_string_node_t *
+pm_xstring_node_create(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
+    return pm_xstring_node_create_unescaped(parser, opening, content, closing, &PM_EMPTY_STRING);
+}
 // Allocate a new YieldNode node.
 static pm_yield_node_t *
 pm_yield_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_location_t *lparen_loc, pm_arguments_node_t *arguments, const pm_location_t *rparen_loc) {
@@ -4765,8 +4902,6 @@ pm_yield_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_lo
     return node;
 }
-#undef PM_EMPTY_STRING
 #undef PM_ALLOC_NODE
 /******************************************************************************/
@@ -4783,7 +4918,8 @@ pm_parser_scope_push(pm_parser_t *parser, bool closed) {
         .previous = parser->current_scope,
         .closed = closed,
         .explicit_params = false,
-        .numbered_params = false
+        .numbered_params = false,
+        .transparent = false
     };
     pm_constant_id_list_init(&scope->locals);
@@ -4792,6 +4928,25 @@ pm_parser_scope_push(pm_parser_t *parser, bool closed) {
     return true;
 }
+// Allocate and initialize a new scope. Push it onto the scope stack.
+static bool
+pm_parser_scope_push_transparent(pm_parser_t *parser) {
+    pm_scope_t *scope = (pm_scope_t *) malloc(sizeof(pm_scope_t));
+    if (scope == NULL) return false;
+    *scope = (pm_scope_t) {
+        .previous = parser->current_scope,
+        .closed = false,
+        .explicit_params = false,
+        .numbered_params = false,
+        .transparent = true
+    };
+    parser->current_scope = scope;
+    return true;
+}
 // Check if the current scope has a given local variables.
 static int
 pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
@@ -4800,7 +4955,8 @@ pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
     int depth = 0;
     while (scope != NULL) {
-        if (pm_constant_id_list_includes(&scope->locals, constant_id)) return depth;
+        if (!scope->transparent &&
+                pm_constant_id_list_includes(&scope->locals, constant_id)) return depth;
         if (scope->closed) break;
         scope = scope->previous;
@@ -4813,8 +4969,12 @@ pm_parser_local_depth(pm_parser_t *parser, pm_token_t *token) {
 // Add a constant id to the local table of the current scope.
 static inline void
 pm_parser_local_add(pm_parser_t *parser, pm_constant_id_t constant_id) {
-    if (!pm_constant_id_list_includes(&parser->current_scope->locals, constant_id)) {
-        pm_constant_id_list_append(&parser->current_scope->locals, constant_id);
+    pm_scope_t *scope = parser->current_scope;
+    while (scope && scope->transparent) scope = scope->previous;
+    assert(scope != NULL);
+    if (!pm_constant_id_list_includes(&scope->locals, constant_id)) {
+        pm_constant_id_list_append(&scope->locals, constant_id);
     }
 }
@@ -4839,18 +4999,13 @@ pm_parser_local_add_owned(pm_parser_t *parser, const uint8_t *start, size_t leng
     if (constant_id != 0) pm_parser_local_add(parser, constant_id);
 }
-static inline bool
-token_is_numbered_parameter(const uint8_t *start, const uint8_t *end) {
-    return (end - start == 2) && (start[0] == '_') && (start[1] != '0') && (pm_char_is_decimal_digit(start[1]));
-}
 // Add a parameter name to the current scope and check whether the name of the
 // parameter is unique or not.
 static void
-pm_parser_parameter_name_check(pm_parser_t *parser, pm_token_t *name) {
+pm_parser_parameter_name_check(pm_parser_t *parser, const pm_token_t *name) {
     // We want to check whether the parameter name is a numbered parameter or not.
     if (token_is_numbered_parameter(name->start, name->end)) {
-        pm_diagnostic_list_append(&parser->error_list, name->start, name->end, PM_ERR_PARAMETER_NUMBERED_RESERVED);
+        pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NUMBERED_RESERVED);
     }
     // We want to ignore any parameter name that starts with an underscore.
@@ -4861,7 +5016,7 @@ pm_parser_parameter_name_check(pm_parser_t *parser, pm_token_t *name) {
     pm_constant_id_t constant_id = pm_parser_constant_id_token(parser, name);
     if (pm_constant_id_list_includes(&parser->current_scope->locals, constant_id)) {
-        pm_diagnostic_list_append(&parser->error_list, name->start, name->end, PM_ERR_PARAMETER_NAME_REPEAT);
+        pm_parser_err_token(parser, name, PM_ERR_PARAMETER_NAME_REPEAT);
     }
 }
@@ -5007,17 +5162,6 @@ peek(pm_parser_t *parser) {
     return peek_at(parser, parser->current.end);
 }
-// Get the next string of length len in the source starting from parser->current.end.
-// If the string extends beyond the end of the source, return the empty string ""
-static inline const uint8_t *
-peek_string(pm_parser_t *parser, size_t len) {
-    if (parser->current.end + len <= parser->end) {
-        return parser->current.end;
-    } else {
-        return (const uint8_t *) "";
-    }
-}
 // If the character to be read matches the given value, then returns true and
 // advanced the current pointer.
 static inline bool
@@ -5069,66 +5213,17 @@ next_newline(const uint8_t *cursor, ptrdiff_t length) {
     return memchr(cursor, '\n', (size_t) length);
 }
-// Find the start of the encoding comment. This is effectively an inlined
-// version of strnstr with some modifications.
-static inline const uint8_t *
-parser_lex_encoding_comment_start(pm_parser_t *parser, const uint8_t *cursor, ptrdiff_t remaining) {
-    assert(remaining >= 0);
-    size_t length = (size_t) remaining;
-    size_t key_length = strlen("coding:");
-    if (key_length > length) return NULL;
-    const uint8_t *cursor_limit = cursor + length - key_length + 1;
-    while ((cursor = pm_memchr(cursor, 'c', (size_t) (cursor_limit - cursor), parser->encoding_changed, &parser->encoding)) != NULL) {
-        if (memcmp(cursor, "coding", key_length - 1) == 0) {
-            size_t whitespace_after_coding = pm_strspn_inline_whitespace(cursor + key_length - 1, parser->end - (cursor + key_length - 1));
-            size_t cur_pos = key_length + whitespace_after_coding;
-            if (cursor[cur_pos - 1] == ':' || cursor[cur_pos - 1] == '=') {
-                return cursor + cur_pos;
-            }
-        }
-        cursor++;
-    }
-    return NULL;
-}
 // Here we're going to check if this is a "magic" comment, and perform whatever
 // actions are necessary for it here.
 static void
-parser_lex_encoding_comment(pm_parser_t *parser) {
-    const uint8_t *start = parser->current.start + 1;
-    const uint8_t *end = parser->current.end;
-    // These are the patterns we're going to match to find the encoding comment.
-    // This is definitely not complete or even really correct.
-    const uint8_t *encoding_start = parser_lex_encoding_comment_start(parser, start, end - start);
-    // If we didn't find anything that matched our patterns, then return. Note
-    // that this does a _very_ poor job of actually finding the encoding, and
-    // there is a lot of work to do here to better reflect actual magic comment
-    // parsing from CRuby, but this at least gets us part of the way there.
-    if (encoding_start == NULL) return;
-    // Skip any non-newline whitespace after the "coding:" or "coding=".
-    encoding_start += pm_strspn_inline_whitespace(encoding_start, end - encoding_start);
-    // Now determine the end of the encoding string. This is either the end of
-    // the line, the first whitespace character, or a punctuation mark.
-    const uint8_t *encoding_end = pm_strpbrk(parser, encoding_start, (const uint8_t *) " \t\f\r\v\n;,", end - encoding_start);
-    encoding_end = encoding_end == NULL ? end : encoding_end;
-    // Finally, we can determine the width of the encoding string.
-    size_t width = (size_t) (encoding_end - encoding_start);
+parser_lex_magic_comment_encoding_value(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
+    size_t width = (size_t) (end - start);
     // First, we're going to call out to a user-defined callback if one was
     // provided. If they return an encoding struct that we can use, then we'll
     // use that here.
     if (parser->encoding_decode_callback != NULL) {
-        pm_encoding_t *encoding = parser->encoding_decode_callback(parser, encoding_start, width);
+        pm_encoding_t *encoding = parser->encoding_decode_callback(parser, start, width);
         if (encoding != NULL) {
             parser->encoding = *encoding;
@@ -5140,7 +5235,7 @@ parser_lex_encoding_comment(pm_parser_t *parser) {
     // Extensions like utf-8 can contain extra encoding details like,
     // utf-8-dos, utf-8-linux, utf-8-mac. We treat these all as utf-8 should
     // treat any encoding starting utf-8 as utf-8.
-    if ((encoding_start + 5 <= parser->end) && (pm_strncasecmp(encoding_start, (const uint8_t *) "utf-8", 5) == 0)) {
+    if ((start + 5 <= end) && (pm_strncasecmp(start, (const uint8_t *) "utf-8", 5) == 0)) {
         // We don't need to do anything here because the default encoding is
         // already UTF-8. We'll just return.
         return;
@@ -5149,7 +5244,7 @@ parser_lex_encoding_comment(pm_parser_t *parser) {
     // Next, we're going to loop through each of the encodings that we handle
     // explicitly. If we found one that we understand, we'll use that value.
 #define ENCODING(value, prebuilt) \
-    if (width == sizeof(value) - 1 && encoding_start + width <= parser->end && pm_strncasecmp(encoding_start, (const uint8_t *) value, width) == 0) { \
+    if (width == sizeof(value) - 1 && start + width <= end && pm_strncasecmp(start, (const uint8_t *) value, width) == 0) { \
         parser->encoding = prebuilt; \
         parser->encoding_changed |= true; \
         if (parser->encoding_changed_callback != NULL) parser->encoding_changed_callback(parser); \
@@ -5198,40 +5293,220 @@ parser_lex_encoding_comment(pm_parser_t *parser) {
     // didn't understand the encoding that the user was trying to use. In this
     // case we'll keep using the default encoding but add an error to the
     // parser to indicate an unsuccessful parse.
-    pm_diagnostic_list_append(&parser->error_list, encoding_start, encoding_end, PM_ERR_INVALID_ENCODING_MAGIC_COMMENT);
+    pm_parser_err(parser, start, end, PM_ERR_INVALID_ENCODING_MAGIC_COMMENT);
+}
+// Look for a specific pattern of "coding" and potentially set the encoding on
+// the parser.
+static void
+parser_lex_magic_comment_encoding(pm_parser_t *parser) {
+    const uint8_t *cursor = parser->current.start + 1;
+    const uint8_t *end = parser->current.end;
+    bool separator = false;
+    while (true) {
+        if (end - cursor <= 6) return;
+        switch (cursor[6]) {
+            case 'C': case 'c': cursor += 6; continue;
+            case 'O': case 'o': cursor += 5; continue;
+            case 'D': case 'd': cursor += 4; continue;
+            case 'I': case 'i': cursor += 3; continue;
+            case 'N': case 'n': cursor += 2; continue;
+            case 'G': case 'g': cursor += 1; continue;
+            case '=': case ':':
+                separator = true;
+                cursor += 6;
+                break;
+            default:
+                cursor += 6;
+                if (pm_char_is_whitespace(*cursor)) break;
+                continue;
+        }
+        if (pm_strncasecmp(cursor - 6, (const uint8_t *) "coding", 6) == 0) break;
+        separator = false;
+    }
+    while (true) {
+        do {
+            if (++cursor >= end) return;
+        } while (pm_char_is_whitespace(*cursor));
+        if (separator) break;
+        if (*cursor != '=' && *cursor != ':') return;
+        separator = true;
+        cursor++;
+    }
+    const uint8_t *value_start = cursor;
+    while ((*cursor == '-' || *cursor == '_' || parser->encoding.alnum_char(cursor, 1)) && ++cursor < end);
+    parser_lex_magic_comment_encoding_value(parser, value_start, cursor);
 }
 // Check if this is a magic comment that includes the frozen_string_literal
 // pragma. If it does, set that field on the parser.
 static void
-parser_lex_frozen_string_literal_comment(pm_parser_t *parser) {
-    const uint8_t *cursor = parser->current.start + 1;
+parser_lex_magic_comment_frozen_string_literal_value(pm_parser_t *parser, const uint8_t *start, const uint8_t *end) {
+    if (start + 4 <= end && pm_strncasecmp(start, (const uint8_t *) "true", 4) == 0) {
+        parser->frozen_string_literal = true;
+    }
+}
+static inline bool
+pm_char_is_magic_comment_key_delimiter(const uint8_t b) {
+    return b == '\'' || b == '"' || b == ':' || b == ';';
+}
+// Find an emacs magic comment marker (-*-) within the given bounds. If one is
+// found, it returns a pointer to the start of the marker. Otherwise it returns
+// NULL.
+static inline const uint8_t *
+parser_lex_magic_comment_emacs_marker(pm_parser_t *parser, const uint8_t *cursor, const uint8_t *end) {
+    while ((cursor + 3 <= end) && (cursor = pm_memchr(cursor, '-', (size_t) (end - cursor), parser->encoding_changed, &parser->encoding)) != NULL) {
+        if (cursor + 3 <= end && cursor[1] == '*' && cursor[2] == '-') {
+            return cursor;
+        }
+        cursor++;
+    }
+    return NULL;
+}
+// Parse the current token on the parser to see if it's a magic comment and
+// potentially perform some action based on that. A regular expression that this
+// function is effectively matching is:
+//
+//     %r"([^\\s\'\":;]+)\\s*:\\s*(\"(?:\\\\.|[^\"])*\"|[^\"\\s;]+)[\\s;]*"
+//
+// It returns true if it consumes the entire comment. Otherwise it returns
+// false.
+static inline bool
+parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
+    const uint8_t *start = parser->current.start + 1;
     const uint8_t *end = parser->current.end;
+    if (end - start <= 7) return false;
+    const uint8_t *cursor;
+    bool indicator = false;
-    size_t key_length = strlen("frozen_string_literal");
-    if (key_length > (size_t) (end - cursor)) return;
+    if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
+        start = cursor + 3;
-    const uint8_t *cursor_limit = cursor + (end - cursor) - key_length + 1;
+        if ((cursor = parser_lex_magic_comment_emacs_marker(parser, start, end)) != NULL) {
+            end = cursor;
+            indicator = true;
+        } else {
+            // If we have a start marker but not an end marker, then we cannot
+            // have a magic comment.
+            return false;
+        }
+    }
-    while ((cursor = pm_memchr(cursor, 'f', (size_t) (cursor_limit - cursor), parser->encoding_changed, &parser->encoding)) != NULL) {
-        if (memcmp(cursor, "frozen_string_literal", key_length) == 0) {
-            cursor += key_length;
-            cursor += pm_strspn_inline_whitespace(cursor, end - cursor);
+    cursor = start;
+    while (cursor < end) {
+        while (cursor < end && (pm_char_is_magic_comment_key_delimiter(*cursor) || pm_char_is_whitespace(*cursor))) cursor++;
-            if (*cursor == ':' || *cursor == '=') {
-                cursor++;
-                cursor += pm_strspn_inline_whitespace(cursor, end - cursor);
+        const uint8_t *key_start = cursor;
+        while (cursor < end && (!pm_char_is_magic_comment_key_delimiter(*cursor) && !pm_char_is_whitespace(*cursor))) cursor++;
-                if (cursor + 4 <= end && pm_strncasecmp(cursor, (const uint8_t *) "true", 4) == 0) {
-                    parser->frozen_string_literal = true;
-                }
+        const uint8_t *key_end = cursor;
+        while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
+        if (cursor == end) break;
-                return;
+        if (*cursor == ':') {
+            cursor++;
+        } else {
+            if (!indicator) return false;
+            continue;
+        }
+        while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
+        if (cursor == end) break;
+        const uint8_t *value_start;
+        const uint8_t *value_end;
+        if (*cursor == '"') {
+            value_start = ++cursor;
+            for (; cursor < end && *cursor != '"'; cursor++) {
+                if (*cursor == '\\' && (cursor + 1 < end)) cursor++;
             }
+            value_end = cursor;
+        } else {
+            value_start = cursor;
+            while (cursor < end && *cursor != '"' && *cursor != ';' && !pm_char_is_whitespace(*cursor)) cursor++;
+            value_end = cursor;
         }
-        cursor++;
+        if (indicator) {
+            while (cursor < end && (*cursor == ';' || pm_char_is_whitespace(*cursor))) cursor++;
+        } else {
+            while (cursor < end && pm_char_is_whitespace(*cursor)) cursor++;
+            if (cursor != end) return false;
+        }
+        // Here, we need to do some processing on the key to swap out dashes for
+        // underscores. We only need to do this if there _is_ a dash in the key.
+        pm_string_t key;
+        const size_t key_length = (size_t) (key_end - key_start);
+        const uint8_t *dash = pm_memchr(key_start, '-', (size_t) key_length, parser->encoding_changed, &parser->encoding);
+        if (dash == NULL) {
+            pm_string_shared_init(&key, key_start, key_end);
+        } else {
+            size_t width = (size_t) (key_end - key_start);
+            uint8_t *buffer = malloc(width);
+            if (buffer == NULL) break;
+            memcpy(buffer, key_start, width);
+            buffer[dash - key_start] = '_';
+            while ((dash = pm_memchr(dash + 1, '-', (size_t) (key_end - dash - 1), parser->encoding_changed, &parser->encoding)) != NULL) {
+                buffer[dash - key_start] = '_';
+            }
+            pm_string_owned_init(&key, buffer, width);
+        }
+        // Finally, we can start checking the key against the list of known
+        // magic comment keys, and potentially change state based on that.
+        const uint8_t *key_source = pm_string_source(&key);
+        // We only want to attempt to compare against encoding comments if it's
+        // the first line in the file (or the second in the case of a shebang).
+        if (parser->current.start == parser->encoding_comment_start) {
+            if (
+                (key_length == 8 && pm_strncasecmp(key_source, (const uint8_t *) "encoding", 8) == 0) ||
+                (key_length == 6 && pm_strncasecmp(key_source, (const uint8_t *) "coding", 6) == 0)
+            ) {
+                parser_lex_magic_comment_encoding_value(parser, value_start, value_end);
+            }
+        }
+        // We only want to handle frozen string literal comments if it's before
+        // any semantic tokens have been seen.
+        if (!semantic_token_seen) {
+            if (key_length == 21 && pm_strncasecmp(key_source, (const uint8_t *) "frozen_string_literal", 21) == 0) {
+                parser_lex_magic_comment_frozen_string_literal_value(parser, value_start, value_end);
+            }
+        }
+        // When we're done, we want to free the string in case we had to
+        // allocate memory for it.
+        pm_string_free(&key);
+        // Allocate a new magic comment node to append to the parser's list.
+        pm_magic_comment_t *magic_comment;
+        if ((magic_comment = (pm_magic_comment_t *) calloc(sizeof(pm_magic_comment_t), 1)) != NULL) {
+            magic_comment->key_start = key_start;
+            magic_comment->value_start = value_start;
+            magic_comment->key_length = (uint32_t) key_length;
+            magic_comment->value_length = (uint32_t) (value_end - value_start);
+            pm_list_append(&parser->magic_comment_list, (pm_list_node_t *) magic_comment);
+        }
     }
+    return true;
 }
 /******************************************************************************/
@@ -5366,7 +5641,7 @@ context_def_p(pm_parser_t *parser) {
 static void
 pm_strspn_number_validate(pm_parser_t *parser, const uint8_t *invalid) {
     if (invalid != NULL) {
-        pm_diagnostic_list_append(&parser->error_list, invalid, invalid + 1, PM_ERR_INVALID_NUMBER_UNDERSCORE);
+        pm_parser_err(parser, invalid, invalid + 1, PM_ERR_INVALID_NUMBER_UNDERSCORE);
     }
 }
@@ -5430,7 +5705,7 @@ lex_optional_float_suffix(pm_parser_t *parser) {
             parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
             type = PM_TOKEN_FLOAT;
         } else {
-            pm_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, PM_ERR_INVALID_FLOAT_EXPONENT);
+            pm_parser_err_current(parser, PM_ERR_INVALID_FLOAT_EXPONENT);
             type = PM_TOKEN_FLOAT;
         }
     }
@@ -5451,7 +5726,7 @@ lex_numeric_prefix(pm_parser_t *parser) {
                 if (pm_char_is_decimal_digit(peek(parser))) {
                     parser->current.end += pm_strspn_decimal_number_validate(parser, parser->current.end);
                 } else {
-                    pm_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, PM_ERR_INVALID_NUMBER_DECIMAL);
+                    pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_DECIMAL);
                 }
                 break;
@@ -5463,7 +5738,7 @@ lex_numeric_prefix(pm_parser_t *parser) {
                 if (pm_char_is_binary_digit(peek(parser))) {
                     parser->current.end += pm_strspn_binary_number_validate(parser, parser->current.end);
                 } else {
-                    pm_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, PM_ERR_INVALID_NUMBER_BINARY);
+                    pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_BINARY);
                 }
                 parser->integer_base = PM_INTEGER_BASE_FLAGS_BINARY;
@@ -5476,7 +5751,7 @@ lex_numeric_prefix(pm_parser_t *parser) {
                 if (pm_char_is_octal_digit(peek(parser))) {
                     parser->current.end += pm_strspn_octal_number_validate(parser, parser->current.end);
                 } else {
-                    pm_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, PM_ERR_INVALID_NUMBER_OCTAL);
+                    pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_OCTAL);
                 }
                 parser->integer_base = PM_INTEGER_BASE_FLAGS_OCTAL;
@@ -5503,7 +5778,7 @@ lex_numeric_prefix(pm_parser_t *parser) {
                 if (pm_char_is_hexadecimal_digit(peek(parser))) {
                     parser->current.end += pm_strspn_hexadecimal_number_validate(parser, parser->current.end);
                 } else {
-                    pm_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, PM_ERR_INVALID_NUMBER_HEXADECIMAL);
+                    pm_parser_err_current(parser, PM_ERR_INVALID_NUMBER_HEXADECIMAL);
                 }
                 parser->integer_base = PM_INTEGER_BASE_FLAGS_HEXADECIMAL;
@@ -5581,7 +5856,7 @@ lex_numeric(pm_parser_t *parser) {
 static pm_token_type_t
 lex_global_variable(pm_parser_t *parser) {
     if (parser->current.end >= parser->end) {
-        pm_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, PM_ERR_INVALID_VARIABLE_GLOBAL);
+        pm_parser_err_current(parser, PM_ERR_INVALID_VARIABLE_GLOBAL);
         return PM_TOKEN_GLOBAL_VARIABLE;
     }
@@ -5622,7 +5897,7 @@ lex_global_variable(pm_parser_t *parser) {
                 } while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0);
                 // $0 isn't allowed to be followed by anything.
-                pm_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, PM_ERR_INVALID_VARIABLE_GLOBAL);
+                pm_parser_err_current(parser, PM_ERR_INVALID_VARIABLE_GLOBAL);
             }
             return PM_TOKEN_GLOBAL_VARIABLE;
@@ -5653,7 +5928,7 @@ lex_global_variable(pm_parser_t *parser) {
             } else {
                 // If we get here, then we have a $ followed by something that isn't
                 // recognized as a global variable.
-                pm_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, PM_ERR_INVALID_VARIABLE_GLOBAL);
+                pm_parser_err_current(parser, PM_ERR_INVALID_VARIABLE_GLOBAL);
             }
             return PM_TOKEN_GLOBAL_VARIABLE;
@@ -5962,52 +6237,475 @@ lex_interpolation(pm_parser_t *parser, const uint8_t *pound) {
     }
 }
-// This function is responsible for lexing either a character literal or the ?
-// operator. The supported character literals are described below.
-//
-// \a             bell, ASCII 07h (BEL)
-// \b             backspace, ASCII 08h (BS)
-// \t             horizontal tab, ASCII 09h (TAB)
-// \n             newline (line feed), ASCII 0Ah (LF)
-// \v             vertical tab, ASCII 0Bh (VT)
-// \f             form feed, ASCII 0Ch (FF)
-// \r             carriage return, ASCII 0Dh (CR)
-// \e             escape, ASCII 1Bh (ESC)
-// \s             space, ASCII 20h (SPC)
-// \\             backslash
-// \nnn           octal bit pattern, where nnn is 1-3 octal digits ([0-7])
-// \xnn           hexadecimal bit pattern, where nn is 1-2 hexadecimal digits ([0-9a-fA-F])
-// \unnnn         Unicode character, where nnnn is exactly 4 hexadecimal digits ([0-9a-fA-F])
-// \u{nnnn ...}   Unicode character(s), where each nnnn is 1-6 hexadecimal digits ([0-9a-fA-F])
-// \cx or \C-x    control character, where x is an ASCII printable character
-// \M-x           meta character, where x is an ASCII printable character
-// \M-\C-x        meta control character, where x is an ASCII printable character
-// \M-\cx         same as above
-// \c\M-x         same as above
-// \c? or \C-?    delete, ASCII 7Fh (DEL)
-//
-static pm_token_type_t
-lex_question_mark(pm_parser_t *parser) {
-    if (lex_state_end_p(parser)) {
-        lex_state_set(parser, PM_LEX_STATE_BEG);
-        return PM_TOKEN_QUESTION_MARK;
-    }
+static const uint8_t PM_ESCAPE_FLAG_NONE = 0x0;
+static const uint8_t PM_ESCAPE_FLAG_CONTROL = 0x1;
+static const uint8_t PM_ESCAPE_FLAG_META = 0x2;
+static const uint8_t PM_ESCAPE_FLAG_SINGLE = 0x4;
+static const uint8_t PM_ESCAPE_FLAG_REGEXP = 0x8;
-    if (parser->current.end >= parser->end) {
-        pm_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, PM_ERR_INCOMPLETE_QUESTION_MARK);
-        return PM_TOKEN_CHARACTER_LITERAL;
-    }
+// This is a lookup table for whether or not an ASCII character is printable.
+static const bool ascii_printable_chars[] = {
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
+};
-    if (pm_char_is_whitespace(*parser->current.end)) {
-        lex_state_set(parser, PM_LEX_STATE_BEG);
-        return PM_TOKEN_QUESTION_MARK;
-    }
+static inline bool
+char_is_ascii_printable(const uint8_t b) {
+    return (b < 0x80) && ascii_printable_chars[b];
+}
+// Return the value that a hexadecimal digit character represents. For example,
+// transform 'a' into 10, 'b' into 11, etc.
+static inline uint8_t
+escape_hexadecimal_digit(const uint8_t value) {
+    return (uint8_t) ((value <= '9') ? (value - '0') : (value & 0x7) + 9);
+}
+// Scan the 4 digits of a Unicode escape into the value. Returns the number of
+// digits scanned. This function assumes that the characters have already been
+// validated.
+static inline uint32_t
+escape_unicode(const uint8_t *string, size_t length) {
+    uint32_t value = 0;
+    for (size_t index = 0; index < length; index++) {
+        if (index != 0) value <<= 4;
+        value |= escape_hexadecimal_digit(string[index]);
+    }
+    return value;
+}
+// Escape a single character value based on the given flags.
+static inline uint8_t
+escape_byte(uint8_t value, const uint8_t flags) {
+    if (flags & PM_ESCAPE_FLAG_CONTROL) value &= 0x1f;
+    if (flags & PM_ESCAPE_FLAG_META) value |= 0x80;
+    return value;
+}
+// Write a unicode codepoint to the given buffer.
+static inline void
+escape_write_unicode(pm_parser_t *parser, pm_buffer_t *buffer, const uint8_t *start, const uint8_t *end, uint32_t value) {
+    if (value <= 0x7F) { // 0xxxxxxx
+        pm_buffer_append_u8(buffer, (uint8_t) value);
+    } else if (value <= 0x7FF) { // 110xxxxx 10xxxxxx
+        pm_buffer_append_u8(buffer, (uint8_t) (0xC0 | (value >> 6)));
+        pm_buffer_append_u8(buffer, (uint8_t) (0x80 | (value & 0x3F)));
+    } else if (value <= 0xFFFF) { // 1110xxxx 10xxxxxx 10xxxxxx
+        pm_buffer_append_u8(buffer, (uint8_t) (0xE0 | (value >> 12)));
+        pm_buffer_append_u8(buffer, (uint8_t) (0x80 | ((value >> 6) & 0x3F)));
+        pm_buffer_append_u8(buffer, (uint8_t) (0x80 | (value & 0x3F)));
+    } else if (value <= 0x10FFFF) { // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+        pm_buffer_append_u8(buffer, (uint8_t) (0xF0 | (value >> 18)));
+        pm_buffer_append_u8(buffer, (uint8_t) (0x80 | ((value >> 12) & 0x3F)));
+        pm_buffer_append_u8(buffer, (uint8_t) (0x80 | ((value >> 6) & 0x3F)));
+        pm_buffer_append_u8(buffer, (uint8_t) (0x80 | (value & 0x3F)));
+    } else {
+        pm_parser_err(parser, start, end, PM_ERR_ESCAPE_INVALID_UNICODE);
+        pm_buffer_append_u8(buffer, 0xEF);
+        pm_buffer_append_u8(buffer, 0xBF);
+        pm_buffer_append_u8(buffer, 0xBD);
+    }
+}
+// The regular expression engine doesn't support the same escape sequences as
+// Ruby does. So first we have to read the escape sequence, and then we have to
+// format it like the regular expression engine expects it. For example, in Ruby
+// if we have:
+//
+//     /\M-\C-?/
+//
+// then the first byte is actually 255, so we have to rewrite this as:
+//
+//     /\xFF/
+//
+// Note that in this case there is a literal \ byte in the regular expression
+// source so that the regular expression engine will perform its own unescaping.
+static inline void
+escape_write_byte(pm_buffer_t *buffer, uint8_t flags, uint8_t byte) {
+    if (flags & PM_ESCAPE_FLAG_REGEXP) {
+        pm_buffer_append_bytes(buffer, (const uint8_t *) "\\x", 2);
+        uint8_t byte1 = (uint8_t) ((byte >> 4) & 0xF);
+        uint8_t byte2 = (uint8_t) (byte & 0xF);
+        if (byte1 >= 0xA) {
+            pm_buffer_append_u8(buffer, (uint8_t) ((byte1 - 0xA) + 'A'));
+        } else {
+            pm_buffer_append_u8(buffer, (uint8_t) (byte1 + '0'));
+        }
+        if (byte2 >= 0xA) {
+            pm_buffer_append_u8(buffer, (uint8_t) (byte2 - 0xA + 'A'));
+        } else {
+            pm_buffer_append_u8(buffer, (uint8_t) (byte2 + '0'));
+        }
+    } else {
+        pm_buffer_append_u8(buffer, byte);
+    }
+}
+// Read the value of an escape into the buffer.
+static void
+escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
+    switch (peek(parser)) {
+        case '\\': {
+            parser->current.end++;
+            pm_buffer_append_u8(buffer, '\\');
+            return;
+        }
+        case '\'': {
+            parser->current.end++;
+            pm_buffer_append_u8(buffer, '\'');
+            return;
+        }
+        case 'a': {
+            parser->current.end++;
+            pm_buffer_append_u8(buffer, '\a');
+            return;
+        }
+        case 'b': {
+            parser->current.end++;
+            pm_buffer_append_u8(buffer, '\b');
+            return;
+        }
+        case 'e': {
+            parser->current.end++;
+            pm_buffer_append_u8(buffer, '\033');
+            return;
+        }
+        case 'f': {
+            parser->current.end++;
+            pm_buffer_append_u8(buffer, '\f');
+            return;
+        }
+        case 'n': {
+            parser->current.end++;
+            pm_buffer_append_u8(buffer, '\n');
+            return;
+        }
+        case 'r': {
+            parser->current.end++;
+            pm_buffer_append_u8(buffer, '\r');
+            return;
+        }
+        case 's': {
+            parser->current.end++;
+            pm_buffer_append_u8(buffer, ' ');
+            return;
+        }
+        case 't': {
+            parser->current.end++;
+            pm_buffer_append_u8(buffer, '\t');
+            return;
+        }
+        case 'v': {
+            parser->current.end++;
+            pm_buffer_append_u8(buffer, '\v');
+            return;
+        }
+        case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': {
+            uint8_t value = (uint8_t) (*parser->current.end - '0');
+            parser->current.end++;
+            if (pm_char_is_octal_digit(peek(parser))) {
+                value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
+                parser->current.end++;
+                if (pm_char_is_octal_digit(peek(parser))) {
+                    value = ((uint8_t) (value << 3)) | ((uint8_t) (*parser->current.end - '0'));
+                    parser->current.end++;
+                }
+            }
+            pm_buffer_append_u8(buffer, value);
+            return;
+        }
+        case 'x': {
+            const uint8_t *start = parser->current.end - 1;
+            parser->current.end++;
+            uint8_t byte = peek(parser);
+            if (pm_char_is_hexadecimal_digit(byte)) {
+                uint8_t value = escape_hexadecimal_digit(byte);
+                parser->current.end++;
+                byte = peek(parser);
+                if (pm_char_is_hexadecimal_digit(byte)) {
+                    value = (uint8_t) ((value << 4) | escape_hexadecimal_digit(byte));
+                    parser->current.end++;
+                }
+                if (flags & PM_ESCAPE_FLAG_REGEXP) {
+                    pm_buffer_append_bytes(buffer, start, (size_t) (parser->current.end - start));
+                } else {
+                    pm_buffer_append_u8(buffer, value);
+                }
+            } else {
+                pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_HEXADECIMAL);
+            }
+            return;
+        }
+        case 'u': {
+            const uint8_t *start = parser->current.end - 1;
+            parser->current.end++;
+            if (
+                (parser->current.end + 4 <= parser->end) &&
+                pm_char_is_hexadecimal_digit(parser->current.end[0]) &&
+                pm_char_is_hexadecimal_digit(parser->current.end[1]) &&
+                pm_char_is_hexadecimal_digit(parser->current.end[2]) &&
+                pm_char_is_hexadecimal_digit(parser->current.end[3])
+            ) {
+                uint32_t value = escape_unicode(parser->current.end, 4);
+                if (flags & PM_ESCAPE_FLAG_REGEXP) {
+                    pm_buffer_append_bytes(buffer, start, (size_t) (parser->current.end + 4 - start));
+                } else {
+                    escape_write_unicode(parser, buffer, start, parser->current.end + 4, value);
+                }
+                parser->current.end += 4;
+            } else if (peek(parser) == '{') {
+                const uint8_t *unicode_codepoints_start = parser->current.end - 2;
+                parser->current.end++;
+                parser->current.end += pm_strspn_whitespace(parser->current.end, parser->end - parser->current.end);
+                const uint8_t *extra_codepoints_start = NULL;
+                int codepoints_count = 0;
+                while ((parser->current.end < parser->end) && (*parser->current.end != '}')) {
+                    const uint8_t *unicode_start = parser->current.end;
+                    size_t hexadecimal_length = pm_strspn_hexadecimal_digit(parser->current.end, parser->end - parser->current.end);
+                    if (hexadecimal_length > 6) {
+                        // \u{nnnn} character literal allows only 1-6 hexadecimal digits
+                        pm_parser_err(parser, unicode_start, unicode_start + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE_LONG);
+                    } else if (hexadecimal_length == 0) {
+                        // there are not hexadecimal characters
+                        pm_parser_err(parser, unicode_start, unicode_start + hexadecimal_length, PM_ERR_ESCAPE_INVALID_UNICODE);
+                        return;
+                    }
+                    parser->current.end += hexadecimal_length;
+                    codepoints_count++;
+                    if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count == 2) {
+                        extra_codepoints_start = unicode_start;
+                    }
+                    if (!(flags & PM_ESCAPE_FLAG_REGEXP)) {
+                        uint32_t value = escape_unicode(unicode_start, hexadecimal_length);
+                        escape_write_unicode(parser, buffer, unicode_start, parser->current.end, value);
+                    }
+                    parser->current.end += pm_strspn_whitespace(parser->current.end, parser->end - parser->current.end);
+                }
+                // ?\u{nnnn} character literal should contain only one codepoint and cannot be like ?\u{nnnn mmmm}
+                if (flags & PM_ESCAPE_FLAG_SINGLE && codepoints_count > 1) {
+                    pm_parser_err(parser, extra_codepoints_start, parser->current.end - 1, PM_ERR_ESCAPE_INVALID_UNICODE_LITERAL);
+                }
+                if (peek(parser) == '}') {
+                    parser->current.end++;
+                } else {
+                    pm_parser_err(parser, unicode_codepoints_start, parser->current.end, PM_ERR_ESCAPE_INVALID_UNICODE_TERM);
+                }
+                if (flags & PM_ESCAPE_FLAG_REGEXP) {
+                    pm_buffer_append_bytes(buffer, unicode_codepoints_start, (size_t) (parser->current.end - unicode_codepoints_start));
+                }
+            } else {
+                pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_UNICODE);
+            }
+            return;
+        }
+        case 'c': {
+            parser->current.end++;
+            if (parser->current.end == parser->end) {
+                pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
+                return;
+            }
+            uint8_t peeked = peek(parser);
+            switch (peeked) {
+                case '?': {
+                    parser->current.end++;
+                    escape_write_byte(buffer, flags, escape_byte(0x7f, flags));
+                    return;
+                }
+                case '\\':
+                    if (flags & PM_ESCAPE_FLAG_CONTROL) {
+                        pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
+                        return;
+                    }
+                    parser->current.end++;
+                    escape_read(parser, buffer, flags | PM_ESCAPE_FLAG_CONTROL);
+                    return;
+                default: {
+                    if (!char_is_ascii_printable(peeked)) {
+                        pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
+                        return;
+                    }
+                    parser->current.end++;
+                    escape_write_byte(buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
+                    return;
+                }
+            }
+        }
+        case 'C': {
+            parser->current.end++;
+            if (peek(parser) != '-') {
+                pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
+                return;
+            }
+            parser->current.end++;
+            if (parser->current.end == parser->end) {
+                pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
+                return;
+            }
+            uint8_t peeked = peek(parser);
+            switch (peeked) {
+                case '?': {
+                    parser->current.end++;
+                    escape_write_byte(buffer, flags, escape_byte(0x7f, flags));
+                    return;
+                }
+                case '\\':
+                    if (flags & PM_ESCAPE_FLAG_CONTROL) {
+                        pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL_REPEAT);
+                        return;
+                    }
+                    parser->current.end++;
+                    escape_read(parser, buffer, flags | PM_ESCAPE_FLAG_CONTROL);
+                    return;
+                default: {
+                    if (!char_is_ascii_printable(peeked)) {
+                        pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_CONTROL);
+                        return;
+                    }
+                    parser->current.end++;
+                    escape_write_byte(buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_CONTROL));
+                    return;
+                }
+            }
+        }
+        case 'M': {
+            parser->current.end++;
+            if (peek(parser) != '-') {
+                pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META);
+                return;
+            }
+            parser->current.end++;
+            if (parser->current.end == parser->end) {
+                pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META);
+                return;
+            }
+            uint8_t peeked = peek(parser);
+            if (peeked == '\\') {
+                if (flags & PM_ESCAPE_FLAG_META) {
+                    pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META_REPEAT);
+                    return;
+                }
+                parser->current.end++;
+                escape_read(parser, buffer, flags | PM_ESCAPE_FLAG_META);
+                return;
+            }
+            if (!char_is_ascii_printable(peeked)) {
+                pm_parser_err_current(parser, PM_ERR_ESCAPE_INVALID_META);
+                return;
+            }
+            parser->current.end++;
+            escape_write_byte(buffer, flags, escape_byte(peeked, flags | PM_ESCAPE_FLAG_META));
+            return;
+        }
+        case '\r': {
+            if (peek_offset(parser, 1) == '\n') {
+                parser->current.end += 2;
+                pm_buffer_append_u8(buffer, '\n');
+                return;
+            }
+        }
+        /* fallthrough */
+        default: {
+            if (parser->current.end < parser->end) {
+                pm_buffer_append_u8(buffer, *parser->current.end++);
+            }
+            return;
+        }
+    }
+}
+// This function is responsible for lexing either a character literal or the ?
+// operator. The supported character literals are described below.
+//
+// \a             bell, ASCII 07h (BEL)
+// \b             backspace, ASCII 08h (BS)
+// \t             horizontal tab, ASCII 09h (TAB)
+// \n             newline (line feed), ASCII 0Ah (LF)
+// \v             vertical tab, ASCII 0Bh (VT)
+// \f             form feed, ASCII 0Ch (FF)
+// \r             carriage return, ASCII 0Dh (CR)
+// \e             escape, ASCII 1Bh (ESC)
+// \s             space, ASCII 20h (SPC)
+// \\             backslash
+// \nnn           octal bit pattern, where nnn is 1-3 octal digits ([0-7])
+// \xnn           hexadecimal bit pattern, where nn is 1-2 hexadecimal digits ([0-9a-fA-F])
+// \unnnn         Unicode character, where nnnn is exactly 4 hexadecimal digits ([0-9a-fA-F])
+// \u{nnnn ...}   Unicode character(s), where each nnnn is 1-6 hexadecimal digits ([0-9a-fA-F])
+// \cx or \C-x    control character, where x is an ASCII printable character
+// \M-x           meta character, where x is an ASCII printable character
+// \M-\C-x        meta control character, where x is an ASCII printable character
+// \M-\cx         same as above
+// \c\M-x         same as above
+// \c? or \C-?    delete, ASCII 7Fh (DEL)
+//
+static pm_token_type_t
+lex_question_mark(pm_parser_t *parser) {
+    if (lex_state_end_p(parser)) {
+        lex_state_set(parser, PM_LEX_STATE_BEG);
+        return PM_TOKEN_QUESTION_MARK;
+    }
+    if (parser->current.end >= parser->end) {
+        pm_parser_err_current(parser, PM_ERR_INCOMPLETE_QUESTION_MARK);
+        pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
+        return PM_TOKEN_CHARACTER_LITERAL;
+    }
+    if (pm_char_is_whitespace(*parser->current.end)) {
+        lex_state_set(parser, PM_LEX_STATE_BEG);
+        return PM_TOKEN_QUESTION_MARK;
+    }
     lex_state_set(parser, PM_LEX_STATE_BEG);
-    if (parser->current.start[1] == '\\') {
+    if (match(parser, '\\')) {
         lex_state_set(parser, PM_LEX_STATE_END);
-        parser->current.end += pm_unescape_calculate_difference(parser, parser->current.start + 1, PM_UNESCAPE_ALL, true);
+        pm_buffer_t buffer;
+        pm_buffer_init_capacity(&buffer, 3);
+        escape_read(parser, &buffer, PM_ESCAPE_FLAG_SINGLE);
+        pm_string_owned_init(&parser->current_string, (uint8_t *) buffer.value, buffer.length);
         return PM_TOKEN_CHARACTER_LITERAL;
     } else {
         size_t encoding_width = parser->encoding.char_width(parser->current.end, parser->end - parser->current.end);
@@ -6024,6 +6722,7 @@ lex_question_mark(pm_parser_t *parser) {
         ) {
             lex_state_set(parser, PM_LEX_STATE_END);
             parser->current.end += encoding_width;
+            pm_string_shared_init(&parser->current_string, parser->current.start + 1, parser->current.end);
             return PM_TOKEN_CHARACTER_LITERAL;
         }
     }
@@ -6045,9 +6744,9 @@ lex_at_variable(pm_parser_t *parser) {
             parser->current.end += width;
         }
     } else if (type == PM_TOKEN_CLASS_VARIABLE) {
-        pm_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, PM_ERR_INCOMPLETE_VARIABLE_CLASS);
+        pm_parser_err_current(parser, PM_ERR_INCOMPLETE_VARIABLE_CLASS);
     } else {
-        pm_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, PM_ERR_INCOMPLETE_VARIABLE_INSTANCE);
+        pm_parser_err_current(parser, PM_ERR_INCOMPLETE_VARIABLE_INSTANCE);
     }
     // If we're lexing an embedded variable, then we need to pop back into the
@@ -6070,7 +6769,7 @@ parser_lex_callback(pm_parser_t *parser) {
 // Return a new comment node of the specified type.
 static inline pm_comment_t *
 parser_comment(pm_parser_t *parser, pm_comment_type_t type) {
-    pm_comment_t *comment = (pm_comment_t *) malloc(sizeof(pm_comment_t));
+    pm_comment_t *comment = (pm_comment_t *) calloc(sizeof(pm_comment_t), 1);
     if (comment == NULL) return NULL;
     *comment = (pm_comment_t) {
@@ -6146,7 +6845,7 @@ lex_embdoc(pm_parser_t *parser) {
         parser_lex_callback(parser);
     }
-    pm_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, PM_ERR_EMBDOC_TERM);
+    pm_parser_err_current(parser, PM_ERR_EMBDOC_TERM);
     comment->end = parser->current.end;
     pm_list_append(&parser->comment_list, (pm_list_node_t *) comment);
@@ -6177,6 +6876,113 @@ parser_flush_heredoc_end(pm_parser_t *parser) {
     parser->heredoc_end = NULL;
 }
+// When we're lexing certain types (strings, symbols, lists, etc.) we have
+// string content associated with the tokens. For example:
+//
+//     "foo"
+//
+// In this case, the string content is foo. Since there is no escaping, there's
+// no need to track additional information and the token can be returned as
+// normal. However, if we have escape sequences:
+//
+//     "foo\n"
+//
+// then the bytes in the string are "f", "o", "o", "\", "n", but we want to
+// provide out consumers with the string content "f", "o", "o", "\n". In these
+// cases, when we find the first escape sequence, we initialize a pm_buffer_t
+// to keep track of the string content. Then in the parser, it will
+// automatically attach the string content to the node that it belongs to.
+typedef struct {
+    pm_buffer_t buffer;
+    const uint8_t *cursor;
+} pm_token_buffer_t;
+// Push the given byte into the token buffer.
+static inline void
+pm_token_buffer_push(pm_token_buffer_t *token_buffer, uint8_t byte) {
+    pm_buffer_append_u8(&token_buffer->buffer, byte);
+}
+// When we're about to return from lexing the current token and we know for sure
+// that we have found an escape sequence, this function is called to copy the
+// contents of the token buffer into the current string on the parser so that it
+// can be attached to the correct node.
+static inline void
+pm_token_buffer_copy(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
+    pm_string_owned_init(&parser->current_string, (uint8_t *) token_buffer->buffer.value, token_buffer->buffer.length);
+}
+// When we're about to return from lexing the current token, we need to flush
+// all of the content that we have pushed into the buffer into the current
+// string. If we haven't pushed anything into the buffer, this means that we
+// never found an escape sequence, so we can directly reference the bounds of
+// the current string. Either way, at the return of this function it is expected
+// that parser->current_string is established in such a way that it can be
+// attached to a node.
+static void
+pm_token_buffer_flush(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
+    if (token_buffer->cursor == NULL) {
+        pm_string_shared_init(&parser->current_string, parser->current.start, parser->current.end);
+    } else {
+        pm_buffer_append_bytes(&token_buffer->buffer, token_buffer->cursor, (size_t) (parser->current.end - token_buffer->cursor));
+        pm_token_buffer_copy(parser, token_buffer);
+    }
+}
+// When we've found an escape sequence, we need to copy everything up to this
+// point into the buffer because we're about to provide a string that has
+// different content than a direct slice of the source.
+//
+// It is expected that the parser's current token end will be pointing at one
+// byte past the backslash that starts the escape sequence.
+static void
+pm_token_buffer_escape(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
+    const uint8_t *start;
+    if (token_buffer->cursor == NULL) {
+        pm_buffer_init_capacity(&token_buffer->buffer, 16);
+        start = parser->current.start;
+    } else {
+        start = token_buffer->cursor;
+    }
+    const uint8_t *end = parser->current.end - 1;
+    pm_buffer_append_bytes(&token_buffer->buffer, start, (size_t) (end - start));
+}
+// Effectively the same thing as pm_strspn_inline_whitespace, but in the case of
+// a tilde heredoc expands out tab characters to the nearest tab boundaries.
+static inline size_t
+pm_heredoc_strspn_inline_whitespace(pm_parser_t *parser, const uint8_t **cursor, pm_heredoc_indent_t indent) {
+    size_t whitespace = 0;
+    switch (indent) {
+        case PM_HEREDOC_INDENT_NONE:
+            // Do nothing, we can't match a terminator with
+            // indentation and there's no need to calculate common
+            // whitespace.
+            break;
+        case PM_HEREDOC_INDENT_DASH:
+            // Skip past inline whitespace.
+            *cursor += pm_strspn_inline_whitespace(*cursor, parser->end - *cursor);
+            break;
+        case PM_HEREDOC_INDENT_TILDE:
+            // Skip past inline whitespace and calculate common
+            // whitespace.
+            while (*cursor < parser->end && pm_char_is_inline_whitespace(**cursor)) {
+                if (**cursor == '\t') {
+                    whitespace = (whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
+                } else {
+                    whitespace++;
+                }
+                (*cursor)++;
+            }
+            break;
+    }
+    return whitespace;
+}
 // This is a convenience macro that will set the current token type, call the
 // lex callback, and then return from the parser_lex function.
 #define LEX(token_type) parser->current.type = token_type; parser_lex_callback(parser); return
@@ -6301,12 +7107,16 @@ parser_lex(pm_parser_t *parser) {
                     parser->current.type = PM_TOKEN_COMMENT;
                     parser_lex_callback(parser);
-                    if (parser->current.start == parser->encoding_comment_start) {
-                        parser_lex_encoding_comment(parser);
-                    }
+                    // Here, parse the comment to see if it's a magic comment
+                    // and potentially change state on the parser.
+                    if (!parser_lex_magic_comment(parser, semantic_token_seen) && (parser->current.start == parser->encoding_comment_start)) {
+                        ptrdiff_t length = parser->current.end - parser->current.start;
-                    if (!semantic_token_seen) {
-                        parser_lex_frozen_string_literal_comment(parser);
+                        // If we didn't find a magic comment within the first
+                        // pass and we're at the start of the file, then we need
+                        // to do another pass to potentially find other patterns
+                        // for encoding comments.
+                        if (length >= 10) parser_lex_magic_comment_encoding(parser);
                     }
                     lexed_comment = true;
@@ -6588,7 +7398,7 @@ parser_lex(pm_parser_t *parser) {
                     pm_token_type_t type = PM_TOKEN_STAR;
                     if (lex_state_spcarg_p(parser, space_seen)) {
-                        pm_diagnostic_list_append(&parser->warning_list, parser->current.start, parser->current.end, PM_WARN_AMBIGUOUS_PREFIX_STAR);
+                        pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_PREFIX_STAR);
                         type = PM_TOKEN_USTAR;
                     } else if (lex_state_beg_p(parser)) {
                         type = PM_TOKEN_USTAR;
@@ -6626,7 +7436,7 @@ parser_lex(pm_parser_t *parser) {
                 // = => =~ == === =begin
                 case '=':
-                    if (current_token_starts_line(parser) && memcmp(peek_string(parser, 5), "begin", 5) == 0 && pm_char_is_whitespace(peek_offset(parser, 5))) {
+                    if (current_token_starts_line(parser) && (parser->current.end + 5 <= parser->end) && memcmp(parser->current.end, "begin", 5) == 0 && pm_char_is_whitespace(peek_offset(parser, 5))) {
                         pm_token_type_t type = lex_embdoc(parser);
                         if (type == PM_TOKEN_EOF) {
@@ -6720,7 +7530,8 @@ parser_lex(pm_parser_t *parser) {
                                         .ident_length = ident_length,
                                         .next_start = parser->current.end,
                                         .quote = quote,
-                                        .indent = indent
+                                        .indent = indent,
+                                        .common_whitespace = (size_t) -1
                                     }
                                 });
@@ -6732,7 +7543,7 @@ parser_lex(pm_parser_t *parser) {
                                         // this is not a valid heredoc declaration. In this case we
                                         // will add an error, but we will still return a heredoc
                                         // start.
-                                        pm_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, PM_ERR_EMBDOC_TERM);
+                                        pm_parser_err_current(parser, PM_ERR_EMBDOC_TERM);
                                         body_start = parser->end;
                                     } else {
                                         // Otherwise, we want to indicate that the body of the
@@ -6925,12 +7736,7 @@ parser_lex(pm_parser_t *parser) {
                     bool spcarg = lex_state_spcarg_p(parser, space_seen);
                     if (spcarg) {
-                        pm_diagnostic_list_append(
-                            &parser->warning_list,
-                            parser->current.start,
-                            parser->current.end,
-                            PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS
-                        );
+                        pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_PLUS);
                     }
                     if (lex_state_beg_p(parser) || spcarg) {
@@ -6974,12 +7780,7 @@ parser_lex(pm_parser_t *parser) {
                     bool spcarg = lex_state_spcarg_p(parser, space_seen);
                     if (spcarg) {
-                        pm_diagnostic_list_append(
-                            &parser->warning_list,
-                            parser->current.start,
-                            parser->current.end,
-                            PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS
-                        );
+                        pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_FIRST_ARGUMENT_MINUS);
                     }
                     if (lex_state_beg_p(parser) || spcarg) {
@@ -7076,7 +7877,7 @@ parser_lex(pm_parser_t *parser) {
                     }
                     if (lex_state_spcarg_p(parser, space_seen)) {
-                        pm_diagnostic_list_append(&parser->warning_list, parser->current.start, parser->current.end, PM_WARN_AMBIGUOUS_SLASH);
+                        pm_parser_warn_token(parser, &parser->current, PM_WARN_AMBIGUOUS_SLASH);
                         lex_mode_push_regexp(parser, '\0', '/');
                         LEX(PM_TOKEN_REGEXP_BEGIN);
                     }
@@ -7116,7 +7917,7 @@ parser_lex(pm_parser_t *parser) {
                     // operator because we don't want to move into the string
                     // lex mode unnecessarily.
                     if ((lex_state_beg_p(parser) || lex_state_arg_p(parser)) && (parser->current.end >= parser->end)) {
-                        pm_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, PM_ERR_INVALID_PERCENT);
+                        pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
                         LEX(PM_TOKEN_PERCENT);
                     }
@@ -7149,7 +7950,7 @@ parser_lex(pm_parser_t *parser) {
                         // validate that here.
                         uint8_t delimiter = peek_offset(parser, 1);
                         if (delimiter >= 0x80 || parser->encoding.alnum_char(&delimiter, 1)) {
-                            pm_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, PM_ERR_INVALID_PERCENT);
+                            pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
                             goto lex_next_token;
                         }
@@ -7249,7 +8050,7 @@ parser_lex(pm_parser_t *parser) {
                                 // unparseable. In this case we'll just drop it from the parser
                                 // and skip past it and hope that the next token is something
                                 // that we can parse.
-                                pm_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, PM_ERR_INVALID_PERCENT);
+                                pm_parser_err_current(parser, PM_ERR_INVALID_PERCENT);
                                 goto lex_next_token;
                         }
                     }
@@ -7285,7 +8086,7 @@ parser_lex(pm_parser_t *parser) {
                         // token as we've exhausted all of the other options. We'll skip past
                         // it and return the next token.
                         if (!width) {
-                            pm_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, PM_ERR_INVALID_TOKEN);
+                            pm_parser_err_current(parser, PM_ERR_INVALID_TOKEN);
                             goto lex_next_token;
                         }
@@ -7351,7 +8152,7 @@ parser_lex(pm_parser_t *parser) {
                 }
             }
         }
-        case PM_LEX_LIST:
+        case PM_LEX_LIST: {
             if (parser->next_start != NULL) {
                 parser->current.end = parser->next_start;
                 parser->next_start = NULL;
@@ -7394,6 +8195,10 @@ parser_lex(pm_parser_t *parser) {
             const uint8_t *breakpoints = lex_mode->as.list.breakpoints;
             const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
+            // If we haven't found an escape yet, then this buffer will be
+            // unallocated since we can refer directly to the source string.
+            pm_token_buffer_t token_buffer = { 0 };
             while (breakpoint != NULL) {
                 // If we hit a null byte, skip directly past it.
                 if (*breakpoint == '\0') {
@@ -7405,16 +8210,18 @@ parser_lex(pm_parser_t *parser) {
                 // now, so we can return an element of the list.
                 if (pm_char_is_whitespace(*breakpoint)) {
                     parser->current.end = breakpoint;
+                    pm_token_buffer_flush(parser, &token_buffer);
                     LEX(PM_TOKEN_STRING_CONTENT);
                 }
-                //If we hit the terminator, we need to check which token to
+                // If we hit the terminator, we need to check which token to
                 // return.
                 if (*breakpoint == lex_mode->as.list.terminator) {
                     // If this terminator doesn't actually close the list, then
                     // we need to continue on past it.
                     if (lex_mode->as.list.nesting > 0) {
-                        breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
+                        parser->current.end = breakpoint + 1;
+                        breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
                         lex_mode->as.list.nesting--;
                         continue;
                     }
@@ -7423,6 +8230,7 @@ parser_lex(pm_parser_t *parser) {
                     // past content, then we can return a list node.
                     if (breakpoint > parser->current.start) {
                         parser->current.end = breakpoint;
+                        pm_token_buffer_flush(parser, &token_buffer);
                         LEX(PM_TOKEN_STRING_CONTENT);
                     }
@@ -7438,59 +8246,109 @@ parser_lex(pm_parser_t *parser) {
                 // literally. In this case we'll skip past the next character
                 // and find the next breakpoint.
                 if (*breakpoint == '\\') {
-                    pm_unescape_type_t unescape_type = lex_mode->as.list.interpolation ? PM_UNESCAPE_ALL : PM_UNESCAPE_MINIMAL;
-                    size_t difference = pm_unescape_calculate_difference(parser, breakpoint, unescape_type, false);
-                    if (difference == 0) {
-                        // we're at the end of the file
+                    parser->current.end = breakpoint + 1;
+                    // If we've hit the end of the file, then break out of the
+                    // loop by setting the breakpoint to NULL.
+                    if (parser->current.end == parser->end) {
                         breakpoint = NULL;
                         continue;
                     }
-                    // If the result is an escaped newline ...
-                    if (breakpoint[difference - 1] == '\n') {
-                        if (parser->heredoc_end) {
-                            // ... if we are on the same line as a heredoc, flush the heredoc and
-                            // continue parsing after heredoc_end.
-                            parser->current.end = breakpoint + difference;
-                            parser_flush_heredoc_end(parser);
-                            LEX(PM_TOKEN_STRING_CONTENT);
-                        } else {
-                            // ... else track the newline.
-                            pm_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
-                        }
+                    pm_token_buffer_escape(parser, &token_buffer);
+                    uint8_t peeked = peek(parser);
+                    switch (peeked) {
+                        case ' ':
+                        case '\f':
+                        case '\t':
+                        case '\v':
+                        case '\\':
+                            pm_token_buffer_push(&token_buffer, peeked);
+                            parser->current.end++;
+                            break;
+                        case '\r':
+                            parser->current.end++;
+                            if (peek(parser) != '\n') {
+                                pm_token_buffer_push(&token_buffer, '\r');
+                                break;
+                            }
+                        /* fallthrough */
+                        case '\n':
+                            pm_token_buffer_push(&token_buffer, '\n');
+                            if (parser->heredoc_end) {
+                                // ... if we are on the same line as a heredoc,
+                                // flush the heredoc and continue parsing after
+                                // heredoc_end.
+                                parser_flush_heredoc_end(parser);
+                                pm_token_buffer_copy(parser, &token_buffer);
+                                LEX(PM_TOKEN_STRING_CONTENT);
+                            } else {
+                                // ... else track the newline.
+                                pm_newline_list_append(&parser->newline_list, parser->current.end);
+                            }
+                            parser->current.end++;
+                            break;
+                        default:
+                            if (peeked == lex_mode->as.list.incrementor || peeked == lex_mode->as.list.terminator) {
+                                pm_token_buffer_push(&token_buffer, peeked);
+                                parser->current.end++;
+                            } else if (lex_mode->as.list.interpolation) {
+                                escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_NONE);
+                            } else {
+                                pm_token_buffer_push(&token_buffer, '\\');
+                                pm_token_buffer_push(&token_buffer, peeked);
+                                parser->current.end++;
+                            }
+                            break;
                     }
-                    breakpoint = pm_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
+                    token_buffer.cursor = parser->current.end;
+                    breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
                     continue;
                 }
                 // If we hit a #, then we will attempt to lex interpolation.
                 if (*breakpoint == '#') {
                     pm_token_type_t type = lex_interpolation(parser, breakpoint);
-                    if (type != PM_TOKEN_NOT_PROVIDED) {
-                        LEX(type);
+                    if (type == PM_TOKEN_NOT_PROVIDED) {
+                        // If we haven't returned at this point then we had something
+                        // that looked like an interpolated class or instance variable
+                        // like "#@" but wasn't actually. In this case we'll just skip
+                        // to the next breakpoint.
+                        breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
+                        continue;
                     }
-                    // If we haven't returned at this point then we had something
-                    // that looked like an interpolated class or instance variable
-                    // like "#@" but wasn't actually. In this case we'll just skip
-                    // to the next breakpoint.
-                    breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
-                    continue;
+                    if (type == PM_TOKEN_STRING_CONTENT) {
+                        pm_token_buffer_flush(parser, &token_buffer);
+                    }
+                    LEX(type);
                 }
                 // If we've hit the incrementor, then we need to skip past it
                 // and find the next breakpoint.
                 assert(*breakpoint == lex_mode->as.list.incrementor);
-                breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
+                parser->current.end = breakpoint + 1;
+                breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
                 lex_mode->as.list.nesting++;
                 continue;
             }
-            // If we were unable to find a breakpoint, then this token hits the end of
-            // the file.
-            LEX(PM_TOKEN_EOF);
+            if (parser->current.end > parser->current.start) {
+                pm_token_buffer_flush(parser, &token_buffer);
+                LEX(PM_TOKEN_STRING_CONTENT);
+            }
+            // If we were unable to find a breakpoint, then this token hits the
+            // end of the file.
+            LEX(PM_TOKEN_EOF);
+        }
         case PM_LEX_REGEXP: {
             // First, we'll set to start of this token to be the current end.
             if (parser->next_start == NULL) {
@@ -7515,11 +8373,13 @@ parser_lex(pm_parser_t *parser) {
             // characters.
             const uint8_t *breakpoints = lex_mode->as.regexp.breakpoints;
             const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
+            pm_token_buffer_t token_buffer = { 0 };
             while (breakpoint != NULL) {
                 // If we hit a null byte, skip directly past it.
                 if (*breakpoint == '\0') {
-                    breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
+                    parser->current.end = breakpoint + 1;
+                    breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
                     continue;
                 }
@@ -7540,7 +8400,8 @@ parser_lex(pm_parser_t *parser) {
                     if (lex_mode->as.regexp.terminator != '\n') {
                         // If the terminator is not a newline, then we can set
                         // the next breakpoint and continue.
-                        breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
+                        parser->current.end = breakpoint + 1;
+                        breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
                         continue;
                     }
                 }
@@ -7549,7 +8410,8 @@ parser_lex(pm_parser_t *parser) {
                 // token to return.
                 if (*breakpoint == lex_mode->as.regexp.terminator) {
                     if (lex_mode->as.regexp.nesting > 0) {
-                        breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
+                        parser->current.end = breakpoint + 1;
+                        breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
                         lex_mode->as.regexp.nesting--;
                         continue;
                     }
@@ -7559,11 +8421,12 @@ parser_lex(pm_parser_t *parser) {
                     // first.
                     if (breakpoint > parser->current.start) {
                         parser->current.end = breakpoint;
+                        pm_token_buffer_flush(parser, &token_buffer);
                         LEX(PM_TOKEN_STRING_CONTENT);
                     }
-                    // Since we've hit the terminator of the regular expression, we now
-                    // need to parse the options.
+                    // Since we've hit the terminator of the regular expression,
+                    // we now need to parse the options.
                     parser->current.end = breakpoint + 1;
                     parser->current.end += pm_strspn_regexp_option(parser->current.end, parser->end - parser->current.end);
@@ -7576,56 +8439,103 @@ parser_lex(pm_parser_t *parser) {
                 // literally. In this case we'll skip past the next character
                 // and find the next breakpoint.
                 if (*breakpoint == '\\') {
-                    size_t difference = pm_unescape_calculate_difference(parser, breakpoint, PM_UNESCAPE_ALL, false);
-                    if (difference == 0) {
-                        // we're at the end of the file
+                    parser->current.end = breakpoint + 1;
+                    // If we've hit the end of the file, then break out of the
+                    // loop by setting the breakpoint to NULL.
+                    if (parser->current.end == parser->end) {
                         breakpoint = NULL;
                         continue;
                     }
-                    // If the result is an escaped newline ...
-                    if (breakpoint[difference - 1] == '\n') {
-                        if (parser->heredoc_end) {
-                            // ... if we are on the same line as a heredoc, flush the heredoc and
-                            // continue parsing after heredoc_end.
-                            parser->current.end = breakpoint + difference;
-                            parser_flush_heredoc_end(parser);
-                            LEX(PM_TOKEN_STRING_CONTENT);
-                        } else {
-                            // ... else track the newline.
-                            pm_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
-                        }
+                    pm_token_buffer_escape(parser, &token_buffer);
+                    uint8_t peeked = peek(parser);
+                    switch (peeked) {
+                        case '\r':
+                            parser->current.end++;
+                            if (peek(parser) != '\n') {
+                                pm_token_buffer_push(&token_buffer, '\\');
+                                pm_token_buffer_push(&token_buffer, '\r');
+                                break;
+                            }
+                        /* fallthrough */
+                        case '\n':
+                            if (parser->heredoc_end) {
+                                // ... if we are on the same line as a heredoc,
+                                // flush the heredoc and continue parsing after
+                                // heredoc_end.
+                                parser_flush_heredoc_end(parser);
+                                pm_token_buffer_copy(parser, &token_buffer);
+                                LEX(PM_TOKEN_STRING_CONTENT);
+                            } else {
+                                // ... else track the newline.
+                                pm_newline_list_append(&parser->newline_list, parser->current.end);
+                            }
+                            parser->current.end++;
+                            break;
+                        case 'c':
+                        case 'C':
+                        case 'M':
+                        case 'u':
+                        case 'x':
+                            escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_REGEXP);
+                            break;
+                        default:
+                            if (lex_mode->as.regexp.terminator == '/' && peeked == '/') {
+                                pm_token_buffer_push(&token_buffer, peeked);
+                                parser->current.end++;
+                                break;
+                            }
+                            if (peeked < 0x80) pm_token_buffer_push(&token_buffer, '\\');
+                            pm_token_buffer_push(&token_buffer, peeked);
+                            parser->current.end++;
+                            break;
                     }
-                    breakpoint = pm_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
+                    token_buffer.cursor = parser->current.end;
+                    breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
                     continue;
                 }
                 // If we hit a #, then we will attempt to lex interpolation.
                 if (*breakpoint == '#') {
                     pm_token_type_t type = lex_interpolation(parser, breakpoint);
-                    if (type != PM_TOKEN_NOT_PROVIDED) {
-                        LEX(type);
+                    if (type == PM_TOKEN_NOT_PROVIDED) {
+                        // If we haven't returned at this point then we had
+                        // something that looked like an interpolated class or
+                        // instance variable like "#@" but wasn't actually. In
+                        // this case we'll just skip to the next breakpoint.
+                        breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
+                        continue;
                     }
-                    // If we haven't returned at this point then we had
-                    // something that looked like an interpolated class or
-                    // instance variable like "#@" but wasn't actually. In this
-                    // case we'll just skip to the next breakpoint.
-                    breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
-                    continue;
+                    if (type == PM_TOKEN_STRING_CONTENT) {
+                        pm_token_buffer_flush(parser, &token_buffer);
+                    }
+                    LEX(type);
                 }
                 // If we've hit the incrementor, then we need to skip past it
                 // and find the next breakpoint.
                 assert(*breakpoint == lex_mode->as.regexp.incrementor);
-                breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
+                parser->current.end = breakpoint + 1;
+                breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
                 lex_mode->as.regexp.nesting++;
                 continue;
             }
-            // At this point, the breakpoint is NULL which means we were unable to
-            // find anything before the end of the file.
+            if (parser->current.end > parser->current.start) {
+                pm_token_buffer_flush(parser, &token_buffer);
+                LEX(PM_TOKEN_STRING_CONTENT);
+            }
+            // If we were unable to find a breakpoint, then this token hits the
+            // end of the file.
             LEX(PM_TOKEN_EOF);
         }
         case PM_LEX_STRING: {
@@ -7646,30 +8556,34 @@ parser_lex(pm_parser_t *parser) {
             // These are the places where we need to split up the content of the
             // string. We'll use strpbrk to find the first of these characters.
-            const uint8_t *breakpoints = parser->lex_modes.current->as.string.breakpoints;
+            pm_lex_mode_t *lex_mode = parser->lex_modes.current;
+            const uint8_t *breakpoints = lex_mode->as.string.breakpoints;
             const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
+            // If we haven't found an escape yet, then this buffer will be
+            // unallocated since we can refer directly to the source string.
+            pm_token_buffer_t token_buffer = { 0 };
             while (breakpoint != NULL) {
                 // If we hit the incrementor, then we'll increment then nesting and
                 // continue lexing.
-                if (
-                    parser->lex_modes.current->as.string.incrementor != '\0' &&
-                    *breakpoint == parser->lex_modes.current->as.string.incrementor
-                ) {
-                    parser->lex_modes.current->as.string.nesting++;
-                    breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
+                if (lex_mode->as.string.incrementor != '\0' && *breakpoint == lex_mode->as.string.incrementor) {
+                    lex_mode->as.string.nesting++;
+                    parser->current.end = breakpoint + 1;
+                    breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
                     continue;
                 }
                 // Note that we have to check the terminator here first because we could
                 // potentially be parsing a % string that has a # character as the
                 // terminator.
-                if (*breakpoint == parser->lex_modes.current->as.string.terminator) {
+                if (*breakpoint == lex_mode->as.string.terminator) {
                     // If this terminator doesn't actually close the string, then we need
                     // to continue on past it.
-                    if (parser->lex_modes.current->as.string.nesting > 0) {
-                        breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
-                        parser->lex_modes.current->as.string.nesting--;
+                    if (lex_mode->as.string.nesting > 0) {
+                        parser->current.end = breakpoint + 1;
+                        breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
+                        lex_mode->as.string.nesting--;
                         continue;
                     }
@@ -7677,6 +8591,7 @@ parser_lex(pm_parser_t *parser) {
                     // then we need to return that content as string content first.
                     if (breakpoint > parser->current.start) {
                         parser->current.end = breakpoint;
+                        pm_token_buffer_flush(parser, &token_buffer);
                         LEX(PM_TOKEN_STRING_CONTENT);
                     }
@@ -7690,11 +8605,7 @@ parser_lex(pm_parser_t *parser) {
                         parser->current.end = breakpoint + 1;
                     }
-                    if (
-                        parser->lex_modes.current->as.string.label_allowed &&
-                        (peek(parser) == ':') &&
-                        (peek_offset(parser, 1) != ':')
-                    ) {
+                    if (lex_mode->as.string.label_allowed && (peek(parser) == ':') && (peek_offset(parser, 1) != ':')) {
                         parser->current.end++;
                         lex_state_set(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
                         lex_mode_pop(parser);
@@ -7712,11 +8623,13 @@ parser_lex(pm_parser_t *parser) {
                 if (*breakpoint == '\n') {
                     if (parser->heredoc_end == NULL) {
                         pm_newline_list_append(&parser->newline_list, breakpoint);
-                        breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
+                        parser->current.end = breakpoint + 1;
+                        breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
                         continue;
                     } else {
                         parser->current.end = breakpoint + 1;
                         parser_flush_heredoc_end(parser);
+                        pm_token_buffer_flush(parser, &token_buffer);
                         LEX(PM_TOKEN_STRING_CONTENT);
                     }
                 }
@@ -7724,58 +8637,110 @@ parser_lex(pm_parser_t *parser) {
                 switch (*breakpoint) {
                     case '\0':
                         // Skip directly past the null character.
-                        breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
+                        parser->current.end = breakpoint + 1;
+                        breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
                         break;
                     case '\\': {
-                        // If we hit escapes, then we need to treat the next token
-                        // literally. In this case we'll skip past the next character and
-                        // find the next breakpoint.
-                        pm_unescape_type_t unescape_type = parser->lex_modes.current->as.string.interpolation ? PM_UNESCAPE_ALL : PM_UNESCAPE_MINIMAL;
-                        size_t difference = pm_unescape_calculate_difference(parser, breakpoint, unescape_type, false);
-                        if (difference == 0) {
-                            // we're at the end of the file
+                        // Here we hit escapes.
+                        parser->current.end = breakpoint + 1;
+                        // If we've hit the end of the file, then break out of
+                        // the loop by setting the breakpoint to NULL.
+                        if (parser->current.end == parser->end) {
                             breakpoint = NULL;
-                            break;
+                            continue;
                         }
-                        // If the result is an escaped newline ...
-                        if (breakpoint[difference - 1] == '\n') {
-                            if (parser->heredoc_end) {
-                                // ... if we are on the same line as a heredoc, flush the heredoc and
-                                // continue parsing after heredoc_end.
-                                parser->current.end = breakpoint + difference;
-                                parser_flush_heredoc_end(parser);
-                                LEX(PM_TOKEN_STRING_CONTENT);
-                            } else {
-                                // ... else track the newline.
-                                pm_newline_list_append(&parser->newline_list, breakpoint + difference - 1);
-                            }
+                        pm_token_buffer_escape(parser, &token_buffer);
+                        uint8_t peeked = peek(parser);
+                        switch (peeked) {
+                            case '\\':
+                                pm_token_buffer_push(&token_buffer, '\\');
+                                parser->current.end++;
+                                break;
+                            case '\r':
+                                parser->current.end++;
+                                if (peek(parser) != '\n') {
+                                    if (!lex_mode->as.string.interpolation) {
+                                        pm_token_buffer_push(&token_buffer, '\\');
+                                    }
+                                    pm_token_buffer_push(&token_buffer, '\r');
+                                    break;
+                                }
+                            /* fallthrough */
+                            case '\n':
+                                if (!lex_mode->as.string.interpolation) {
+                                    pm_token_buffer_push(&token_buffer, '\\');
+                                    pm_token_buffer_push(&token_buffer, '\n');
+                                }
+                                if (parser->heredoc_end) {
+                                    // ... if we are on the same line as a heredoc,
+                                    // flush the heredoc and continue parsing after
+                                    // heredoc_end.
+                                    parser_flush_heredoc_end(parser);
+                                    pm_token_buffer_copy(parser, &token_buffer);
+                                    LEX(PM_TOKEN_STRING_CONTENT);
+                                } else {
+                                    // ... else track the newline.
+                                    pm_newline_list_append(&parser->newline_list, parser->current.end);
+                                }
+                                parser->current.end++;
+                                break;
+                            default:
+                                if (lex_mode->as.string.incrementor != '\0' && peeked == lex_mode->as.string.incrementor) {
+                                    pm_token_buffer_push(&token_buffer, peeked);
+                                    parser->current.end++;
+                                } else if (lex_mode->as.string.terminator != '\0' && peeked == lex_mode->as.string.terminator) {
+                                    pm_token_buffer_push(&token_buffer, peeked);
+                                    parser->current.end++;
+                                } else if (lex_mode->as.string.interpolation) {
+                                    escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_NONE);
+                                } else {
+                                    pm_token_buffer_push(&token_buffer, '\\');
+                                    pm_token_buffer_push(&token_buffer, peeked);
+                                    parser->current.end++;
+                                }
+                                break;
                         }
-                        breakpoint = pm_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
+                        token_buffer.cursor = parser->current.end;
+                        breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
                         break;
                     }
                     case '#': {
                         pm_token_type_t type = lex_interpolation(parser, breakpoint);
-                        if (type != PM_TOKEN_NOT_PROVIDED) {
-                            LEX(type);
+                        if (type == PM_TOKEN_NOT_PROVIDED) {
+                            // If we haven't returned at this point then we had something that
+                            // looked like an interpolated class or instance variable like "#@"
+                            // but wasn't actually. In this case we'll just skip to the next
+                            // breakpoint.
+                            breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
+                            break;
                         }
-                        // If we haven't returned at this point then we had something that
-                        // looked like an interpolated class or instance variable like "#@"
-                        // but wasn't actually. In this case we'll just skip to the next
-                        // breakpoint.
-                        breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
-                        break;
+                        if (type == PM_TOKEN_STRING_CONTENT) {
+                            pm_token_buffer_flush(parser, &token_buffer);
+                        }
+                        LEX(type);
                     }
                     default:
                         assert(false && "unreachable");
                 }
             }
+            if (parser->current.end > parser->current.start) {
+                pm_token_buffer_flush(parser, &token_buffer);
+                LEX(PM_TOKEN_STRING_CONTENT);
+            }
             // If we've hit the end of the string, then this is an unterminated
             // string. In that case we'll return the EOF token.
-            parser->current.end = parser->end;
             LEX(PM_TOKEN_EOF);
         }
         case PM_LEX_HEREDOC: {
@@ -7797,16 +8762,15 @@ parser_lex(pm_parser_t *parser) {
             // Now let's grab the information about the identifier off of the current
             // lex mode.
-            const uint8_t *ident_start = parser->lex_modes.current->as.heredoc.ident_start;
-            size_t ident_length = parser->lex_modes.current->as.heredoc.ident_length;
+            pm_lex_mode_t *lex_mode = parser->lex_modes.current;
+            const uint8_t *ident_start = lex_mode->as.heredoc.ident_start;
+            size_t ident_length = lex_mode->as.heredoc.ident_length;
             // If we are immediately following a newline and we have hit the
             // terminator, then we need to return the ending of the heredoc.
             if (current_token_starts_line(parser)) {
                 const uint8_t *start = parser->current.start;
-                if (parser->lex_modes.current->as.heredoc.indent != PM_HEREDOC_INDENT_NONE) {
-                    start += pm_strspn_inline_whitespace(start, parser->end - start);
-                }
+                size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, lex_mode->as.heredoc.indent);
                 if ((start + ident_length <= parser->end) && (memcmp(start, ident_start, ident_length) == 0)) {
                     bool matched = true;
@@ -7824,10 +8788,10 @@ parser_lex(pm_parser_t *parser) {
                     }
                     if (matched) {
-                        if (*parser->lex_modes.current->as.heredoc.next_start == '\\') {
+                        if (*lex_mode->as.heredoc.next_start == '\\') {
                             parser->next_start = NULL;
                         } else {
-                            parser->next_start = parser->lex_modes.current->as.heredoc.next_start;
+                            parser->next_start = lex_mode->as.heredoc.next_start;
                             parser->heredoc_end = parser->current.end;
                         }
@@ -7838,61 +8802,91 @@ parser_lex(pm_parser_t *parser) {
                         LEX(PM_TOKEN_HEREDOC_END);
                     }
                 }
+                if (
+                    lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE &&
+                    (lex_mode->as.heredoc.common_whitespace > whitespace) &&
+                    peek_at(parser, start) != '\n'
+                ) {
+                    lex_mode->as.heredoc.common_whitespace = whitespace;
+                }
             }
-            // Otherwise we'll be parsing string content. These are the places where
-            // we need to split up the content of the heredoc. We'll use strpbrk to
-            // find the first of these characters.
+            // Otherwise we'll be parsing string content. These are the places
+            // where we need to split up the content of the heredoc. We'll use
+            // strpbrk to find the first of these characters.
             uint8_t breakpoints[] = "\n\\#";
-            pm_heredoc_quote_t quote = parser->lex_modes.current->as.heredoc.quote;
+            pm_heredoc_quote_t quote = lex_mode->as.heredoc.quote;
             if (quote == PM_HEREDOC_QUOTE_SINGLE) {
                 breakpoints[2] = '\0';
             }
             const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
+            pm_token_buffer_t token_buffer = { 0 };
+            bool was_escaped_newline = false;
             while (breakpoint != NULL) {
                 switch (*breakpoint) {
                     case '\0':
                         // Skip directly past the null character.
-                        breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
+                        parser->current.end = breakpoint + 1;
+                        breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
                         break;
                     case '\n': {
                         if (parser->heredoc_end != NULL && (parser->heredoc_end > breakpoint)) {
                             parser_flush_heredoc_end(parser);
                             parser->current.end = breakpoint + 1;
+                            pm_token_buffer_flush(parser, &token_buffer);
                             LEX(PM_TOKEN_STRING_CONTENT);
                         }
                         pm_newline_list_append(&parser->newline_list, breakpoint);
+                        // If we have a - or ~ heredoc, then we can match after
+                        // some leading whitespace.
                         const uint8_t *start = breakpoint + 1;
-                        if (parser->lex_modes.current->as.heredoc.indent != PM_HEREDOC_INDENT_NONE) {
-                            start += pm_strspn_inline_whitespace(start, parser->end - start);
-                        }
+                        size_t whitespace = pm_heredoc_strspn_inline_whitespace(parser, &start, lex_mode->as.heredoc.indent);
-                        // If we have hit a newline that is followed by a valid terminator,
-                        // then we need to return the content of the heredoc here as string
-                        // content. Then, the next time a token is lexed, it will match
-                        // again and return the end of the heredoc.
+                        // If we have hit a newline that is followed by a valid
+                        // terminator, then we need to return the content of the
+                        // heredoc here as string content. Then, the next time a
+                        // token is lexed, it will match again and return the
+                        // end of the heredoc.
                         if (
+                            !was_escaped_newline &&
                             (start + ident_length <= parser->end) &&
                             (memcmp(start, ident_start, ident_length) == 0)
                         ) {
-                            // Heredoc terminators must be followed by a newline, CRLF, or EOF to be valid.
+                            // Heredoc terminators must be followed by a
+                            // newline, CRLF, or EOF to be valid.
                             if (
                                 start + ident_length == parser->end ||
                                 match_eol_at(parser, start + ident_length)
                             ) {
                                 parser->current.end = breakpoint + 1;
+                                pm_token_buffer_flush(parser, &token_buffer);
+                                LEX(PM_TOKEN_STRING_CONTENT);
+                            }
+                        }
+                        if (lex_mode->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE) {
+                            if ((lex_mode->as.heredoc.common_whitespace > whitespace) && peek_at(parser, start) != '\n') {
+                                lex_mode->as.heredoc.common_whitespace = whitespace;
+                            }
+                            parser->current.end = breakpoint + 1;
+                            if (!was_escaped_newline) {
+                                pm_token_buffer_flush(parser, &token_buffer);
                                 LEX(PM_TOKEN_STRING_CONTENT);
                             }
                         }
-                        // Otherwise we hit a newline and it wasn't followed by a
-                        // terminator, so we can continue parsing.
-                        breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
+                        // Otherwise we hit a newline and it wasn't followed by
+                        // a terminator, so we can continue parsing.
+                        parser->current.end = breakpoint + 1;
+                        breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
                         break;
                     }
                     case '\\': {
@@ -7902,46 +8896,98 @@ parser_lex(pm_parser_t *parser) {
                         // stop looping before the newline and not after the
                         // newline so that we can still potentially find the
                         // terminator of the heredoc.
-                        size_t eol_length = match_eol_at(parser, breakpoint + 1);
-                        if (eol_length) {
-                            breakpoint += eol_length;
-                        } else {
-                            pm_unescape_type_t unescape_type = (quote == PM_HEREDOC_QUOTE_SINGLE) ? PM_UNESCAPE_MINIMAL : PM_UNESCAPE_ALL;
-                            size_t difference = pm_unescape_calculate_difference(parser, breakpoint, unescape_type, false);
-                            if (difference == 0) {
-                                // we're at the end of the file
-                                breakpoint = NULL;
-                                break;
-                            }
+                        parser->current.end = breakpoint + 1;
+                        // If we've hit the end of the file, then break out of
+                        // the loop by setting the breakpoint to NULL.
+                        if (parser->current.end == parser->end) {
+                            breakpoint = NULL;
+                            continue;
+                        }
-                            pm_newline_list_check_append(&parser->newline_list, breakpoint + difference - 1);
+                        pm_token_buffer_escape(parser, &token_buffer);
+                        uint8_t peeked = peek(parser);
-                            breakpoint = pm_strpbrk(parser, breakpoint + difference, breakpoints, parser->end - (breakpoint + difference));
+                        if (quote == PM_HEREDOC_QUOTE_SINGLE) {
+                            switch (peeked) {
+                                case '\r':
+                                    parser->current.end++;
+                                    if (peek(parser) != '\n') {
+                                        pm_token_buffer_push(&token_buffer, '\\');
+                                        pm_token_buffer_push(&token_buffer, '\r');
+                                        break;
+                                    }
+                                /* fallthrough */
+                                case '\n':
+                                    pm_token_buffer_push(&token_buffer, '\\');
+                                    pm_token_buffer_push(&token_buffer, '\n');
+                                    token_buffer.cursor = parser->current.end + 1;
+                                    breakpoint = parser->current.end;
+                                    continue;
+                                default:
+                                    parser->current.end++;
+                                    pm_token_buffer_push(&token_buffer, '\\');
+                                    pm_token_buffer_push(&token_buffer, peeked);
+                                    break;
+                            }
+                        } else {
+                            switch (peeked) {
+                                case '\r':
+                                    parser->current.end++;
+                                    if (peek(parser) != '\n') {
+                                        pm_token_buffer_push(&token_buffer, '\r');
+                                        break;
+                                    }
+                                /* fallthrough */
+                                case '\n':
+                                    was_escaped_newline = true;
+                                    token_buffer.cursor = parser->current.end + 1;
+                                    breakpoint = parser->current.end;
+                                    continue;
+                                default:
+                                    escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_NONE);
+                                    break;
+                            }
                         }
+                        token_buffer.cursor = parser->current.end;
+                        breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
                         break;
                     }
                     case '#': {
                         pm_token_type_t type = lex_interpolation(parser, breakpoint);
-                        if (type != PM_TOKEN_NOT_PROVIDED) {
-                            LEX(type);
+                        if (type == PM_TOKEN_NOT_PROVIDED) {
+                            // If we haven't returned at this point then we had
+                            // something that looked like an interpolated class
+                            // or instance variable like "#@" but wasn't
+                            // actually. In this case we'll just skip to the
+                            // next breakpoint.
+                            breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
+                            break;
                         }
-                        // If we haven't returned at this point then we had something
-                        // that looked like an interpolated class or instance variable
-                        // like "#@" but wasn't actually. In this case we'll just skip
-                        // to the next breakpoint.
-                        breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
-                        break;
+                        if (type == PM_TOKEN_STRING_CONTENT) {
+                            pm_token_buffer_flush(parser, &token_buffer);
+                        }
+                        LEX(type);
                     }
                     default:
                         assert(false && "unreachable");
                 }
+                was_escaped_newline = false;
+            }
+            if (parser->current.end > parser->current.start) {
+                parser->current.end = parser->end;
+                pm_token_buffer_flush(parser, &token_buffer);
+                LEX(PM_TOKEN_STRING_CONTENT);
             }
             // If we've hit the end of the string, then this is an unterminated
             // heredoc. In that case we'll return the EOF token.
-            parser->current.end = parser->end;
             LEX(PM_TOKEN_EOF);
         }
     }
@@ -7955,67 +9001,6 @@ parser_lex(pm_parser_t *parser) {
 /* Parse functions                                                            */
 /******************************************************************************/
-// When we are parsing certain content, we need to unescape the content to
-// provide to the consumers of the parser. The following functions accept a range
-// of characters from the source and unescapes into the provided type.
-//
-// We have functions for unescaping regular expression nodes, string nodes,
-// symbol nodes, and xstring nodes
-static pm_regular_expression_node_t *
-pm_regular_expression_node_create_and_unescape(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, pm_unescape_type_t unescape_type) {
-    pm_regular_expression_node_t *node = pm_regular_expression_node_create(parser, opening, content, closing);
-    assert((content->end - content->start) >= 0);
-    pm_string_shared_init(&node->unescaped, content->start, content->end);
-    pm_unescape_manipulate_string(parser, &node->unescaped, unescape_type);
-    return node;
-}
-static pm_symbol_node_t *
-pm_symbol_node_create_and_unescape(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, pm_unescape_type_t unescape_type) {
-    pm_symbol_node_t *node = pm_symbol_node_create(parser, opening, content, closing);
-    assert((content->end - content->start) >= 0);
-    pm_string_shared_init(&node->unescaped, content->start, content->end);
-    pm_unescape_manipulate_string(parser, &node->unescaped, unescape_type);
-    return node;
-}
-static pm_string_node_t *
-pm_char_literal_node_create_and_unescape(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, pm_unescape_type_t unescape_type) {
-    pm_string_node_t *node = pm_string_node_create(parser, opening, content, closing);
-    assert((content->end - content->start) >= 0);
-    pm_string_shared_init(&node->unescaped, content->start, content->end);
-    pm_unescape_manipulate_char_literal(parser, &node->unescaped, unescape_type);
-    return node;
-}
-static pm_string_node_t *
-pm_string_node_create_and_unescape(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing, pm_unescape_type_t unescape_type) {
-    pm_string_node_t *node = pm_string_node_create(parser, opening, content, closing);
-    assert((content->end - content->start) >= 0);
-    pm_string_shared_init(&node->unescaped, content->start, content->end);
-    pm_unescape_manipulate_string(parser, &node->unescaped, unescape_type);
-    return node;
-}
-static pm_x_string_node_t *
-pm_xstring_node_create_and_unescape(pm_parser_t *parser, const pm_token_t *opening, const pm_token_t *content, const pm_token_t *closing) {
-    pm_x_string_node_t *node = pm_xstring_node_create(parser, opening, content, closing);
-    assert((content->end - content->start) >= 0);
-    pm_string_shared_init(&node->unescaped, content->start, content->end);
-    pm_unescape_manipulate_string(parser, &node->unescaped, PM_UNESCAPE_ALL);
-    return node;
-}
 // These are the various precedence rules. Because we are using a Pratt parser,
 // they are named binding power to represent the manner in which nodes are bound
 // together in the stack.
@@ -8269,7 +9254,7 @@ expect1(pm_parser_t *parser, pm_token_type_t type, pm_diagnostic_id_t diag_id) {
     if (accept1(parser, type)) return;
     const uint8_t *location = parser->previous.end;
-    pm_diagnostic_list_append(&parser->error_list, location, location, diag_id);
+    pm_parser_err(parser, location, location, diag_id);
     parser->previous.start = location;
     parser->previous.type = PM_TOKEN_MISSING;
@@ -8282,7 +9267,7 @@ expect2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_di
     if (accept2(parser, type1, type2)) return;
     const uint8_t *location = parser->previous.end;
-    pm_diagnostic_list_append(&parser->error_list, location, location, diag_id);
+    pm_parser_err(parser, location, location, diag_id);
     parser->previous.start = location;
     parser->previous.type = PM_TOKEN_MISSING;
@@ -8294,7 +9279,7 @@ expect3(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_to
     if (accept3(parser, type1, type2, type3)) return;
     const uint8_t *location = parser->previous.end;
-    pm_diagnostic_list_append(&parser->error_list, location, location, diag_id);
+    pm_parser_err(parser, location, location, diag_id);
     parser->previous.start = location;
     parser->previous.type = PM_TOKEN_MISSING;
@@ -8389,23 +9374,23 @@ parse_starred_expression(pm_parser_t *parser, pm_binding_power_t binding_power,
 }
 // Convert the name of a method into the corresponding write method name. For
-// exmaple, foo would be turned into foo=.
+// example, foo would be turned into foo=.
 static void
-parse_write_name(pm_string_t *string) {
+parse_write_name(pm_parser_t *parser, pm_constant_id_t *name_field) {
     // The method name needs to change. If we previously had
     // foo, we now need foo=. In this case we'll allocate a new
     // owned string, copy the previous method name in, and
     // append an =.
-    size_t length = pm_string_length(string);
+    pm_constant_t *constant = pm_constant_pool_id_to_constant(&parser->constant_pool, *name_field);
+    size_t length = constant->length;
     uint8_t *name = calloc(length + 1, sizeof(uint8_t));
     if (name == NULL) return;
-    memcpy(name, pm_string_source(string), length);
+    memcpy(name, constant->start, length);
     name[length] = '=';
     // Now switch the name to the new string.
-    pm_string_free(string);
-    pm_string_owned_init(string, name, length + 1);
+    *name_field = pm_constant_pool_insert_owned(&parser->constant_pool, name, length + 1);
 }
 // Convert the given node into a valid target node.
@@ -8428,7 +9413,7 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
             return target;
         case PM_BACK_REFERENCE_READ_NODE:
         case PM_NUMBERED_REFERENCE_READ_NODE:
-            pm_diagnostic_list_append(&parser->error_list, target->location.start, target->location.end, PM_ERR_WRITE_TARGET_READONLY);
+            pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_READONLY);
             return target;
         case PM_GLOBAL_VARIABLE_READ_NODE:
             assert(sizeof(pm_global_variable_target_node_t) == sizeof(pm_global_variable_read_node_t));
@@ -8436,7 +9421,7 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
             return target;
         case PM_LOCAL_VARIABLE_READ_NODE:
             if (token_is_numbered_parameter(target->location.start, target->location.end)) {
-                pm_diagnostic_list_append(&parser->error_list, target->location.start, target->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED);
+                pm_parser_err_node(parser, target, PM_ERR_PARAMETER_NUMBERED_RESERVED);
             } else {
                 assert(sizeof(pm_local_variable_target_node_t) == sizeof(pm_local_variable_read_node_t));
                 target->type = PM_LOCAL_VARIABLE_TARGET_NODE;
@@ -8489,21 +9474,23 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
                     pm_parser_local_add_location(parser, message.start, message.end);
                     pm_node_destroy(parser, target);
+                    uint32_t depth = 0;
+                    for (pm_scope_t *scope = parser->current_scope; scope && scope->transparent; depth++, scope = scope->previous);
                     const pm_token_t name = { .type = PM_TOKEN_IDENTIFIER, .start = message.start, .end = message.end };
-                    target = (pm_node_t *) pm_local_variable_read_node_create(parser, &name, 0);
+                    target = (pm_node_t *) pm_local_variable_read_node_create(parser, &name, depth);
                     assert(sizeof(pm_local_variable_target_node_t) == sizeof(pm_local_variable_read_node_t));
                     target->type = PM_LOCAL_VARIABLE_TARGET_NODE;
                     if (token_is_numbered_parameter(message.start, message.end)) {
-                        pm_diagnostic_list_append(&parser->error_list, message.start, message.end, PM_ERR_PARAMETER_NUMBERED_RESERVED);
+                        pm_parser_err_location(parser, &message, PM_ERR_PARAMETER_NUMBERED_RESERVED);
                     }
                     return target;
                 }
                 if (*call->message_loc.start == '_' || parser->encoding.alnum_char(call->message_loc.start, call->message_loc.end - call->message_loc.start)) {
-                    parse_write_name(&call->name);
+                    parse_write_name(parser, &call->name);
                     return (pm_node_t *) call;
                 }
             }
@@ -8518,9 +9505,8 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
                 (call->message_loc.end[-1] == ']') &&
                 (call->block == NULL)
             ) {
-                // Free the previous name and replace it with "[]=".
-                pm_string_free(&call->name);
-                pm_string_constant_init(&call->name, "[]=", 3);
+                // Replace the name with "[]=".
+                call->name = pm_parser_constant_id_static(parser, "[]=", 3);
                 return target;
             }
         }
@@ -8529,7 +9515,7 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
             // In this case we have a node that we don't know how to convert
             // into a target. We need to treat it as an error. For now, we'll
             // mark it as an error and just skip right past it.
-            pm_diagnostic_list_append(&parser->error_list, target->location.start, target->location.end, PM_ERR_WRITE_TARGET_UNEXPECTED);
+            pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_UNEXPECTED);
             return target;
     }
 }
@@ -8542,7 +9528,7 @@ parse_target_validate(pm_parser_t *parser, pm_node_t *target) {
     // Ensure that we have either an = or a ) after the targets.
     if (!match3(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_KEYWORD_IN)) {
-        pm_diagnostic_list_append(&parser->error_list, result->location.start, result->location.end, PM_ERR_WRITE_TARGET_UNEXPECTED);
+        pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
     }
     return result;
@@ -8568,7 +9554,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
         }
         case PM_BACK_REFERENCE_READ_NODE:
         case PM_NUMBERED_REFERENCE_READ_NODE:
-            pm_diagnostic_list_append(&parser->error_list, target->location.start, target->location.end, PM_ERR_WRITE_TARGET_READONLY);
+            pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_READONLY);
             /* fallthrough */
         case PM_GLOBAL_VARIABLE_READ_NODE: {
             pm_global_variable_write_node_t *node = pm_global_variable_write_node_create(parser, target, operator, value);
@@ -8577,7 +9563,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
         }
         case PM_LOCAL_VARIABLE_READ_NODE: {
             if (token_is_numbered_parameter(target->location.start, target->location.end)) {
-                pm_diagnostic_list_append(&parser->error_list, target->location.start, target->location.end, PM_ERR_PARAMETER_NUMBERED_RESERVED);
+                pm_parser_err_node(parser, target, PM_ERR_PARAMETER_NUMBERED_RESERVED);
             }
             pm_local_variable_read_node_t *local_read = (pm_local_variable_read_node_t *) target;
@@ -8642,7 +9628,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
                     target = (pm_node_t *) pm_local_variable_write_node_create(parser, constant_id, 0, value, &message, operator);
                     if (token_is_numbered_parameter(message.start, message.end)) {
-                        pm_diagnostic_list_append(&parser->error_list, message.start, message.end, PM_ERR_PARAMETER_NUMBERED_RESERVED);
+                        pm_parser_err_location(parser, &message, PM_ERR_PARAMETER_NUMBERED_RESERVED);
                     }
                     return target;
@@ -8665,7 +9651,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
                     pm_arguments_node_arguments_append(arguments, value);
                     call->base.location.end = arguments->base.location.end;
-                    parse_write_name(&call->name);
+                    parse_write_name(parser, &call->name);
                     return (pm_node_t *) call;
                 }
             }
@@ -8686,9 +9672,8 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
                 pm_arguments_node_arguments_append(call->arguments, value);
                 target->location.end = value->location.end;
-                // Free the previous name and replace it with "[]=".
-                pm_string_free(&call->name);
-                pm_string_constant_init(&call->name, "[]=", 3);
+                // Replace the name with "[]=".
+                call->name = pm_parser_constant_id_static(parser, "[]=", 3);
                 return target;
             }
@@ -8704,7 +9689,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
             // In this case we have a node that we don't know how to convert into a
             // target. We need to treat it as an error. For now, we'll mark it as an
             // error and just skip right past it.
-            pm_diagnostic_list_append(&parser->error_list, operator->start, operator->end, PM_ERR_WRITE_TARGET_UNEXPECTED);
+            pm_parser_err_token(parser, operator, PM_ERR_WRITE_TARGET_UNEXPECTED);
             return target;
     }
 }
@@ -8730,7 +9715,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
             // anonymous. It can be the final target or be in the middle if
             // there haven't been any others yet.
             if (has_splat) {
-                pm_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
+                pm_parser_err_previous(parser, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
             }
             pm_token_t star_operator = parser->previous;
@@ -8770,7 +9755,7 @@ parse_targets_validate(pm_parser_t *parser, pm_node_t *first_target, pm_binding_
     // Ensure that we have either an = or a ) after the targets.
     if (!match2(parser, PM_TOKEN_EQUAL, PM_TOKEN_PARENTHESIS_RIGHT)) {
-        pm_diagnostic_list_append(&parser->error_list, result->location.start, result->location.end, PM_ERR_WRITE_TARGET_UNEXPECTED);
+        pm_parser_err_node(parser, result, PM_ERR_WRITE_TARGET_UNEXPECTED);
     }
     return result;
@@ -8863,7 +9848,7 @@ parse_assocs(pm_parser_t *parser, pm_node_t *node) {
                 if (token_begins_expression_p(parser->current.type)) {
                     value = parse_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
                 } else if (pm_parser_local_depth(parser, &operator) == -1) {
-                    pm_diagnostic_list_append(&parser->error_list, operator.start, operator.end, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
+                    pm_parser_err_token(parser, &operator, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT_HASH);
                 }
                 element = (pm_node_t *) pm_assoc_splat_node_create(parser, value, &operator);
@@ -8970,7 +9955,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
     while (!match1(parser, PM_TOKEN_EOF)) {
         if (parsed_block_argument) {
-            pm_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, PM_ERR_ARGUMENT_AFTER_BLOCK);
+            pm_parser_err_current(parser, PM_ERR_ARGUMENT_AFTER_BLOCK);
         }
         pm_node_t *argument = NULL;
@@ -8979,7 +9964,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
             case PM_TOKEN_USTAR_STAR:
             case PM_TOKEN_LABEL: {
                 if (parsed_bare_hash) {
-                    pm_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, PM_ERR_ARGUMENT_BARE_HASH);
+                    pm_parser_err_current(parser, PM_ERR_ARGUMENT_BARE_HASH);
                 }
                 pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
@@ -9001,7 +9986,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
                 if (token_begins_expression_p(parser->current.type)) {
                     expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_EXPECT_ARGUMENT);
                 } else if (pm_parser_local_depth(parser, &operator) == -1) {
-                    pm_diagnostic_list_append(&parser->error_list, operator.start, operator.end, PM_ERR_ARGUMENT_NO_FORWARDING_AMP);
+                    pm_parser_err_token(parser, &operator, PM_ERR_ARGUMENT_NO_FORWARDING_AMP);
                 }
                 argument = (pm_node_t *) pm_block_argument_node_create(parser, &operator, expression);
@@ -9020,7 +10005,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
                 if (match2(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_COMMA)) {
                     if (pm_parser_local_depth(parser, &parser->previous) == -1) {
-                        pm_diagnostic_list_append(&parser->error_list, operator.start, operator.end, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
+                        pm_parser_err_token(parser, &operator, PM_ERR_ARGUMENT_NO_FORWARDING_STAR);
                     }
                     argument = (pm_node_t *) pm_splat_node_create(parser, &operator, NULL);
@@ -9028,7 +10013,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
                     pm_node_t *expression = parse_expression(parser, PM_BINDING_POWER_DEFINED, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT);
                     if (parsed_bare_hash) {
-                        pm_diagnostic_list_append(&parser->error_list, operator.start, expression->location.end, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
+                        pm_parser_err(parser, operator.start, expression->location.end, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
                     }
                     argument = (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
@@ -9049,7 +10034,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
                         argument = (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
                     } else {
                         if (pm_parser_local_depth(parser, &parser->previous) == -1) {
-                            pm_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
+                            pm_parser_err_previous(parser, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
                         }
                         argument = (pm_node_t *) pm_forwarding_arguments_node_create(parser, &parser->previous);
@@ -9066,7 +10051,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
                 if (pm_symbol_node_label_p(argument) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
                     if (parsed_bare_hash) {
-                        pm_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, PM_ERR_ARGUMENT_BARE_HASH);
+                        pm_parser_err_previous(parser, PM_ERR_ARGUMENT_BARE_HASH);
                     }
                     pm_token_t operator;
@@ -9145,7 +10130,7 @@ parse_required_destructured_parameter(pm_parser_t *parser) {
         if (node->parameters.size > 0 && match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
             if (parsed_splat) {
-                pm_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, PM_ERR_ARGUMENT_SPLAT_AFTER_SPLAT);
+                pm_parser_err_previous(parser, PM_ERR_ARGUMENT_SPLAT_AFTER_SPLAT);
             }
             param = (pm_node_t *) pm_splat_node_create(parser, &parser->previous, NULL);
@@ -9157,7 +10142,7 @@ parse_required_destructured_parameter(pm_parser_t *parser) {
             param = (pm_node_t *) parse_required_destructured_parameter(parser);
         } else if (accept1(parser, PM_TOKEN_USTAR)) {
             if (parsed_splat) {
-                pm_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, PM_ERR_ARGUMENT_SPLAT_AFTER_SPLAT);
+                pm_parser_err_previous(parser, PM_ERR_ARGUMENT_SPLAT_AFTER_SPLAT);
             }
             pm_token_t star = parser->previous;
@@ -9166,6 +10151,7 @@ parse_required_destructured_parameter(pm_parser_t *parser) {
             if (accept1(parser, PM_TOKEN_IDENTIFIER)) {
                 pm_token_t name = parser->previous;
                 value = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
+                pm_parser_parameter_name_check(parser, &name);
                 pm_parser_local_add_token(parser, &name);
             }
@@ -9176,6 +10162,7 @@ parse_required_destructured_parameter(pm_parser_t *parser) {
             pm_token_t name = parser->previous;
             param = (pm_node_t *) pm_required_parameter_node_create(parser, &name);
+            pm_parser_parameter_name_check(parser, &name);
             pm_parser_local_add_token(parser, &name);
         }
@@ -9237,12 +10224,12 @@ update_parameter_state(pm_parser_t *parser, pm_token_t *token, pm_parameters_ord
     }
     if (token->type == PM_TOKEN_USTAR && *current == PM_PARAMETERS_ORDER_AFTER_OPTIONAL) {
-        pm_diagnostic_list_append(&parser->error_list, token->start, token->end, PM_ERR_PARAMETER_STAR);
+        pm_parser_err_token(parser, token, PM_ERR_PARAMETER_STAR);
     }
     if (*current == PM_PARAMETERS_ORDER_NOTHING_AFTER || state > *current) {
         // We know what transition we failed on, so we can provide a better error here.
-        pm_diagnostic_list_append(&parser->error_list, token->start, token->end, PM_ERR_PARAMETER_ORDER);
+        pm_parser_err_token(parser, token, PM_ERR_PARAMETER_ORDER);
     } else if (state < *current) {
         *current = state;
     }
@@ -9297,7 +10284,7 @@ parse_parameters(
                 if (params->block == NULL) {
                     pm_parameters_node_block_set(params, param);
                 } else {
-                    pm_diagnostic_list_append(&parser->error_list, param->base.location.start, param->base.location.end, PM_ERR_PARAMETER_BLOCK_MULTI);
+                    pm_parser_err_node(parser, (pm_node_t *) param, PM_ERR_PARAMETER_BLOCK_MULTI);
                     pm_parameters_node_posts_append(params, (pm_node_t *) param);
                 }
@@ -9305,7 +10292,7 @@ parse_parameters(
             }
             case PM_TOKEN_UDOT_DOT_DOT: {
                 if (!allows_forwarding_parameter) {
-                    pm_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
+                    pm_parser_err_current(parser, PM_ERR_ARGUMENT_NO_FORWARDING_ELLIPSES);
                 }
                 if (order > PM_PARAMETERS_ORDER_NOTHING_AFTER) {
                     update_parameter_state(parser, &parser->current, &order);
@@ -9318,7 +10305,7 @@ parse_parameters(
                         // forwarding parameter and move the keyword rest parameter to the posts list.
                         pm_node_t *keyword_rest = params->keyword_rest;
                         pm_parameters_node_posts_append(params, keyword_rest);
-                        pm_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, PM_ERR_PARAMETER_UNEXPECTED_FWD);
+                        pm_parser_err_previous(parser, PM_ERR_PARAMETER_UNEXPECTED_FWD);
                         params->keyword_rest = NULL;
                     }
                     pm_parameters_node_keyword_rest_set(params, (pm_node_t *)param);
@@ -9337,19 +10324,19 @@ parse_parameters(
                 parser_lex(parser);
                 switch (parser->previous.type) {
                     case PM_TOKEN_CONSTANT:
-                        pm_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
+                        pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CONSTANT);
                         break;
                     case PM_TOKEN_INSTANCE_VARIABLE:
-                        pm_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, PM_ERR_ARGUMENT_FORMAL_IVAR);
+                        pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_IVAR);
                         break;
                     case PM_TOKEN_GLOBAL_VARIABLE:
-                        pm_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
+                        pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_GLOBAL);
                         break;
                     case PM_TOKEN_CLASS_VARIABLE:
-                        pm_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, PM_ERR_ARGUMENT_FORMAL_CLASS);
+                        pm_parser_err_previous(parser, PM_ERR_ARGUMENT_FORMAL_CLASS);
                         break;
                     case PM_TOKEN_METHOD_NAME:
-                        pm_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, PM_ERR_PARAMETER_METHOD_NAME);
+                        pm_parser_err_previous(parser, PM_ERR_PARAMETER_METHOD_NAME);
                         break;
                     default: break;
                 }
@@ -9466,7 +10453,7 @@ parse_parameters(
                 if (params->rest == NULL) {
                     pm_parameters_node_rest_set(params, param);
                 } else {
-                    pm_diagnostic_list_append(&parser->error_list, param->base.location.start, param->base.location.end, PM_ERR_PARAMETER_SPLAT_MULTI);
+                    pm_parser_err_node(parser, (pm_node_t *) param, PM_ERR_PARAMETER_SPLAT_MULTI);
                     pm_parameters_node_posts_append(params, (pm_node_t *) param);
                 }
@@ -9500,7 +10487,7 @@ parse_parameters(
                 if (params->keyword_rest == NULL) {
                     pm_parameters_node_keyword_rest_set(params, param);
                 } else {
-                    pm_diagnostic_list_append(&parser->error_list, param->location.start, param->location.end, PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI);
+                    pm_parser_err_node(parser, param, PM_ERR_PARAMETER_ASSOC_SPLAT_MULTI);
                     pm_parameters_node_posts_append(params, param);
                 }
@@ -9518,11 +10505,11 @@ parse_parameters(
                         if (params->rest == NULL) {
                             pm_parameters_node_rest_set(params, param);
                         } else {
-                            pm_diagnostic_list_append(&parser->error_list, param->base.location.start, param->base.location.end, PM_ERR_PARAMETER_SPLAT_MULTI);
+                            pm_parser_err_node(parser, (pm_node_t *) param, PM_ERR_PARAMETER_SPLAT_MULTI);
                             pm_parameters_node_posts_append(params, (pm_node_t *) param);
                         }
                     } else {
-                        pm_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
+                        pm_parser_err_previous(parser, PM_ERR_PARAMETER_WILD_LOOSE_COMMA);
                     }
                 }
@@ -9725,9 +10712,10 @@ parse_block_parameters(
     }
     pm_block_parameters_node_t *block_parameters = pm_block_parameters_node_create(parser, parameters, opening);
-    if (accept1(parser, PM_TOKEN_SEMICOLON)) {
+    if ((opening->type != PM_TOKEN_NOT_PROVIDED) && accept1(parser, PM_TOKEN_SEMICOLON)) {
         do {
             expect1(parser, PM_TOKEN_IDENTIFIER, PM_ERR_BLOCK_PARAM_LOCAL_VARIABLE);
+            pm_parser_parameter_name_check(parser, &parser->previous);
             pm_parser_local_add_token(parser, &parser->previous);
             pm_block_local_variable_node_t *local = pm_block_local_variable_node_create(parser, &parser->previous);
@@ -9850,7 +10838,7 @@ parse_arguments_list(pm_parser_t *parser, pm_arguments_t *arguments, bool accept
             if (arguments->block == NULL) {
                 arguments->block = (pm_node_t *) block;
             } else {
-                pm_diagnostic_list_append(&parser->error_list, block->base.location.start, block->base.location.end, PM_ERR_ARGUMENT_BLOCK_MULTI);
+                pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_BLOCK_MULTI);
                 if (arguments->arguments == NULL) {
                     arguments->arguments = pm_arguments_node_create(parser);
                 }
@@ -9873,7 +10861,7 @@ parse_predicate(pm_parser_t *parser, pm_binding_power_t binding_power, pm_contex
     bool predicate_closed = accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
     predicate_closed |= accept1(parser, PM_TOKEN_KEYWORD_THEN);
     if (!predicate_closed) {
-        pm_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, PM_ERR_CONDITIONAL_PREDICATE_TERM);
+        pm_parser_err_current(parser, PM_ERR_CONDITIONAL_PREDICATE_TERM);
     }
     context_pop(parser);
@@ -10057,25 +11045,12 @@ parse_string_part(pm_parser_t *parser) {
         //     "aaa #{bbb} #@ccc ddd"
         //      ^^^^      ^     ^^^^
         case PM_TOKEN_STRING_CONTENT: {
-            pm_unescape_type_t unescape_type = PM_UNESCAPE_ALL;
-            if (parser->lex_modes.current->mode == PM_LEX_HEREDOC) {
-                if (parser->lex_modes.current->as.heredoc.indent == PM_HEREDOC_INDENT_TILDE) {
-                    // If we're in a tilde heredoc, we want to unescape it later
-                    // because we don't want unescaped newlines to disappear
-                    // before we handle them in the dedent.
-                    unescape_type = PM_UNESCAPE_NONE;
-                } else if (parser->lex_modes.current->as.heredoc.quote == PM_HEREDOC_QUOTE_SINGLE) {
-                    unescape_type = PM_UNESCAPE_MINIMAL;
-                }
-            }
-            parser_lex(parser);
             pm_token_t opening = not_provided(parser);
             pm_token_t closing = not_provided(parser);
+            pm_node_t *node = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
-            return (pm_node_t *) pm_string_node_create_and_unescape(parser, &opening, &parser->previous, &closing, unescape_type);
+            parser_lex(parser);
+            return node;
         }
         // Here the lexer has returned the beginning of an embedded expression. In
         // that case we'll parse the inner statements and return that as the part.
@@ -10166,7 +11141,7 @@ parse_string_part(pm_parser_t *parser) {
         }
         default:
             parser_lex(parser);
-            pm_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, PM_ERR_CANNOT_PARSE_STRING_PART);
+            pm_parser_err_previous(parser, PM_ERR_CANNOT_PARSE_STRING_PART);
             return NULL;
     }
 }
@@ -10177,7 +11152,6 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
     if (lex_mode->mode != PM_LEX_STRING) {
         if (next_state != PM_LEX_STATE_NONE) lex_state_set(parser, next_state);
-        pm_token_t symbol;
         switch (parser->current.type) {
             case PM_TOKEN_IDENTIFIER:
@@ -10190,21 +11164,21 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
             case PM_TOKEN_BACK_REFERENCE:
             case PM_CASE_KEYWORD:
                 parser_lex(parser);
-                symbol = parser->previous;
                 break;
             case PM_CASE_OPERATOR:
                 lex_state_set(parser, next_state == PM_LEX_STATE_NONE ? PM_LEX_STATE_ENDFN : next_state);
                 parser_lex(parser);
-                symbol = parser->previous;
                 break;
             default:
                 expect2(parser, PM_TOKEN_IDENTIFIER, PM_TOKEN_METHOD_NAME, PM_ERR_SYMBOL_INVALID);
-                symbol = parser->previous;
                 break;
         }
         pm_token_t closing = not_provided(parser);
-        return (pm_node_t *) pm_symbol_node_create_and_unescape(parser, &opening, &symbol, &closing, PM_UNESCAPE_ALL);
+        pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
+        pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
+        return (pm_node_t *) symbol;
     }
     if (lex_mode->as.string.interpolation) {
@@ -10215,7 +11189,7 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
             pm_token_t content = not_provided(parser);
             pm_token_t closing = parser->previous;
-            return (pm_node_t *) pm_symbol_node_create_and_unescape(parser, &opening, &content, &closing, PM_UNESCAPE_NONE);
+            return (pm_node_t *) pm_symbol_node_create(parser, &opening, &content, &closing);
         }
         // Now we can parse the first part of the symbol.
@@ -10248,18 +11222,23 @@ parse_symbol(pm_parser_t *parser, pm_lex_mode_t *lex_mode, pm_lex_state_t next_s
     }
     pm_token_t content;
-    if (accept1(parser, PM_TOKEN_STRING_CONTENT)) {
-        content = parser->previous;
+    pm_string_t unescaped;
+    if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
+        content = parser->current;
+        unescaped = parser->current_string;
+        parser_lex(parser);
     } else {
         content = (pm_token_t) { .type = PM_TOKEN_STRING_CONTENT, .start = parser->previous.end, .end = parser->previous.end };
+        pm_string_shared_init(&unescaped, content.start, content.end);
     }
     if (next_state != PM_LEX_STATE_NONE) {
         lex_state_set(parser, next_state);
     }
-    expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
-    return (pm_node_t *) pm_symbol_node_create_and_unescape(parser, &opening, &content, &parser->previous, PM_UNESCAPE_ALL);
+    expect1(parser, PM_TOKEN_STRING_END, PM_ERR_SYMBOL_TERM_DYNAMIC);
+    return (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
 }
 // Parse an argument to undef which can either be a bare word, a
@@ -10276,8 +11255,10 @@ parse_undef_argument(pm_parser_t *parser) {
             pm_token_t opening = not_provided(parser);
             pm_token_t closing = not_provided(parser);
+            pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
-            return (pm_node_t *) pm_symbol_node_create_and_unescape(parser, &opening, &parser->previous, &closing, PM_UNESCAPE_ALL);
+            pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
+            return (pm_node_t *) symbol;
         }
         case PM_TOKEN_SYMBOL_BEGIN: {
             pm_lex_mode_t lex_mode = *parser->lex_modes.current;
@@ -10286,7 +11267,7 @@ parse_undef_argument(pm_parser_t *parser) {
             return parse_symbol(parser, &lex_mode, PM_LEX_STATE_NONE);
         }
         default:
-            pm_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, PM_ERR_UNDEF_ARGUMENT);
+            pm_parser_err_current(parser, PM_ERR_UNDEF_ARGUMENT);
             return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
     }
 }
@@ -10310,8 +11291,10 @@ parse_alias_argument(pm_parser_t *parser, bool first) {
             parser_lex(parser);
             pm_token_t opening = not_provided(parser);
             pm_token_t closing = not_provided(parser);
+            pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &parser->previous, &closing);
-            return (pm_node_t *) pm_symbol_node_create_and_unescape(parser, &opening, &parser->previous, &closing, PM_UNESCAPE_ALL);
+            pm_string_shared_init(&symbol->unescaped, parser->previous.start, parser->previous.end);
+            return (pm_node_t *) symbol;
         }
         case PM_TOKEN_SYMBOL_BEGIN: {
             pm_lex_mode_t lex_mode = *parser->lex_modes.current;
@@ -10329,7 +11312,7 @@ parse_alias_argument(pm_parser_t *parser, bool first) {
             parser_lex(parser);
             return (pm_node_t *) pm_global_variable_read_node_create(parser, &parser->previous);
         default:
-            pm_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, PM_ERR_ALIAS_ARGUMENT);
+            pm_parser_err_current(parser, PM_ERR_ALIAS_ARGUMENT);
             return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
     }
 }
@@ -10366,9 +11349,9 @@ parse_variable_call(pm_parser_t *parser) {
             // local variable read. If it's not, then we'll create a normal call
             // node but add an error.
             if (parser->current_scope->explicit_params) {
-                pm_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, PM_ERR_NUMBERED_PARAMETER_NOT_ALLOWED);
+                pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_NOT_ALLOWED);
             } else if (outer_scope_using_numbered_params_p(parser)) {
-                pm_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE);
+                pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE);
             } else {
                 // When you use a numbered parameter, it implies the existence
                 // of all of the locals that exist before it. For example,
@@ -10421,76 +11404,8 @@ parse_method_definition_name(pm_parser_t *parser) {
     }
 }
-static int
-parse_heredoc_common_whitespace_for_single_node(pm_parser_t *parser, pm_node_t *node, int common_whitespace)
-{
-    const pm_location_t *content_loc = &((pm_string_node_t *) node)->content_loc;
-    int cur_whitespace;
-    const uint8_t *cur_char = content_loc->start;
-    while (cur_char && cur_char < content_loc->end) {
-        // Any empty newlines aren't included in the minimum whitespace
-        // calculation.
-        size_t eol_length;
-        while ((eol_length = match_eol_at(parser, cur_char))) {
-            cur_char += eol_length;
-        }
-        if (cur_char == content_loc->end) break;
-        cur_whitespace = 0;
-        while (pm_char_is_inline_whitespace(*cur_char) && cur_char < content_loc->end) {
-            if (cur_char[0] == '\t') {
-                cur_whitespace = (cur_whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
-            } else {
-                cur_whitespace++;
-            }
-            cur_char++;
-        }
-        // If we hit a newline, then we have encountered a line that
-        // contains only whitespace, and it shouldn't be considered in
-        // the calculation of common leading whitespace.
-        eol_length = match_eol_at(parser, cur_char);
-        if (eol_length) {
-            cur_char += eol_length;
-            continue;
-        }
-        if (cur_whitespace < common_whitespace || common_whitespace == -1) {
-            common_whitespace = cur_whitespace;
-        }
-        cur_char = next_newline(cur_char + 1, parser->end - (cur_char + 1));
-        if (cur_char) cur_char++;
-    }
-    return common_whitespace;
-}
-// Calculate the common leading whitespace for each line in a heredoc.
-static int
-parse_heredoc_common_whitespace(pm_parser_t *parser, pm_node_list_t *nodes) {
-    int common_whitespace = -1;
-    for (size_t index = 0; index < nodes->size; index++) {
-        pm_node_t *node = nodes->nodes[index];
-        if (!PM_NODE_TYPE_P(node, PM_STRING_NODE)) continue;
-        // If the previous node wasn't a string node, we don't want to trim
-        // whitespace. This could happen after an interpolated expression or
-        // variable.
-        if (index == 0 || PM_NODE_TYPE_P(nodes->nodes[index - 1], PM_STRING_NODE)) {
-            common_whitespace = parse_heredoc_common_whitespace_for_single_node(parser, node, common_whitespace);
-        }
-    }
-    return common_whitespace;
-}
-static pm_string_t *
-parse_heredoc_dedent_single_node(pm_parser_t *parser, pm_string_t *string, bool dedent_node, int common_whitespace, pm_heredoc_quote_t quote)
-{
+static void
+parse_heredoc_dedent_string(pm_string_t *string, size_t common_whitespace) {
     // Get a reference to the string struct that is being held by the string
     // node. This is the value we're going to actually manipulate.
     pm_string_ensure_owned(string);
@@ -10499,85 +11414,37 @@ parse_heredoc_dedent_single_node(pm_parser_t *parser, pm_string_t *string, bool
     // destination to move bytes into. We'll also use it for bounds checking
     // since we don't require that these strings be null terminated.
     size_t dest_length = pm_string_length(string);
-    uint8_t *source_start = (uint8_t *) string->source;
-    const uint8_t *source_cursor = source_start;
+    const uint8_t *source_cursor = (uint8_t *) string->source;
     const uint8_t *source_end = source_cursor + dest_length;
     // We're going to move bytes backward in the string when we get leading
     // whitespace, so we'll maintain a pointer to the current position in the
     // string that we're writing to.
-    uint8_t *dest_cursor = source_start;
-    while (source_cursor < source_end) {
-        // If we need to dedent the next element within the heredoc or the next
-        // line within the string node, then we'll do it here.
-        if (dedent_node) {
-            int trimmed_whitespace = 0;
-            // While we haven't reached the amount of common whitespace that we need
-            // to trim and we haven't reached the end of the string, we'll keep
-            // trimming whitespace. Trimming in this context means skipping over
-            // these bytes such that they aren't copied into the new string.
-            while ((source_cursor < source_end) && pm_char_is_inline_whitespace(*source_cursor) && trimmed_whitespace < common_whitespace) {
-                if (*source_cursor == '\t') {
-                    trimmed_whitespace = (trimmed_whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
-                    if (trimmed_whitespace > common_whitespace) break;
-                } else {
-                    trimmed_whitespace++;
-                }
-                source_cursor++;
-                dest_length--;
-            }
-        }
-        // At this point we have dedented all that we need to, so we need to find
-        // the next newline.
-        const uint8_t *breakpoint = next_newline(source_cursor, source_end - source_cursor);
-        if (breakpoint == NULL) {
-            // If there isn't another newline, then we can just move the rest of the
-            // string and break from the loop.
-            memmove(dest_cursor, source_cursor, (size_t) (source_end - source_cursor));
-            break;
+    size_t trimmed_whitespace = 0;
+    // While we haven't reached the amount of common whitespace that we need to
+    // trim and we haven't reached the end of the string, we'll keep trimming
+    // whitespace. Trimming in this context means skipping over these bytes such
+    // that they aren't copied into the new string.
+    while ((source_cursor < source_end) && pm_char_is_inline_whitespace(*source_cursor) && trimmed_whitespace < common_whitespace) {
+        if (*source_cursor == '\t') {
+            trimmed_whitespace = (trimmed_whitespace / PM_TAB_WHITESPACE_SIZE + 1) * PM_TAB_WHITESPACE_SIZE;
+            if (trimmed_whitespace > common_whitespace) break;
+        } else {
+            trimmed_whitespace++;
         }
-        // Otherwise, we need to move everything including the newline, and
-        // then set the dedent_node flag to true.
-        if (breakpoint < source_end) breakpoint++;
-        memmove(dest_cursor, source_cursor, (size_t) (breakpoint - source_cursor));
-        dest_cursor += (breakpoint - source_cursor);
-        source_cursor = breakpoint;
-        dedent_node = true;
+        source_cursor++;
+        dest_length--;
     }
-    // We only want to write this node into the list if it has any content.
+    memmove((uint8_t *) string->source, source_cursor, (size_t) (source_end - source_cursor));
     string->length = dest_length;
-    if (dest_length != 0) {
-        pm_unescape_manipulate_string(parser, string, (quote == PM_HEREDOC_QUOTE_SINGLE) ? PM_UNESCAPE_MINIMAL : PM_UNESCAPE_ALL);
-    }
-    return string;
 }
 // Take a heredoc node that is indented by a ~ and trim the leading whitespace.
 static void
-parse_heredoc_dedent(pm_parser_t *parser, pm_node_t *heredoc_node, pm_heredoc_quote_t quote)
-{
-    pm_node_list_t *nodes;
-    if (quote == PM_HEREDOC_QUOTE_BACKTICK) {
-        nodes = &((pm_interpolated_x_string_node_t *) heredoc_node)->parts;
-    } else {
-        nodes = &((pm_interpolated_string_node_t *) heredoc_node)->parts;
-    }
-    // First, calculate how much common whitespace we need to trim. If there is
-    // none or it's 0, then we can return early.
-    int common_whitespace;
-    if ((common_whitespace = parse_heredoc_common_whitespace(parser, nodes)) <= 0) return;
+parse_heredoc_dedent(pm_parser_t *parser, pm_node_list_t *nodes, size_t common_whitespace) {
     // The next node should be dedented if it's the first node in the list or if
     // if follows a string node.
     bool dedent_next = true;
@@ -10600,7 +11467,10 @@ parse_heredoc_dedent(pm_parser_t *parser, pm_node_t *heredoc_node, pm_heredoc_qu
         }
         pm_string_node_t *string_node = ((pm_string_node_t *) node);
-        parse_heredoc_dedent_single_node(parser, &string_node->unescaped, dedent_next, common_whitespace, quote);
+        if (dedent_next) {
+            parse_heredoc_dedent_string(&string_node->unescaped, common_whitespace);
+        }
         if (string_node->unescaped.length == 0) {
             pm_node_destroy(parser, node);
         } else {
@@ -10929,13 +11799,13 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
                     case PM_TOKEN_STRING_BEGIN:
                         key = parse_expression(parser, PM_BINDING_POWER_MAX, PM_ERR_PATTERN_HASH_KEY);
                         if (!pm_symbol_node_label_p(key)) {
-                            pm_diagnostic_list_append(&parser->error_list, key->location.start, key->location.end, PM_ERR_PATTERN_HASH_KEY_LABEL);
+                            pm_parser_err_node(parser, key, PM_ERR_PATTERN_HASH_KEY_LABEL);
                         }
                         break;
                     default:
                         parser_lex(parser);
-                        pm_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, PM_ERR_PATTERN_HASH_KEY);
+                        pm_parser_err_previous(parser, PM_ERR_PATTERN_HASH_KEY);
                         key = (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
                         break;
                 }
@@ -10970,7 +11840,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
                     return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
                 }
                 default: {
-                    pm_diagnostic_list_append(&parser->error_list, operator.start, operator.end, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE);
+                    pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_RANGE);
                     pm_node_t *right = (pm_node_t *) pm_missing_node_create(parser, operator.start, operator.end);
                     return (pm_node_t *) pm_range_node_create(parser, NULL, &operator, right);
                 }
@@ -11058,7 +11928,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
                 default: {
                     // If we get here, then we have a pin operator followed by something
                     // not understood. We'll create a missing node and return that.
-                    pm_diagnostic_list_append(&parser->error_list, operator.start, operator.end, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN);
+                    pm_parser_err_token(parser, &operator, PM_ERR_PATTERN_EXPRESSION_AFTER_PIN);
                     pm_node_t *variable = (pm_node_t *) pm_missing_node_create(parser, operator.start, operator.end);
                     return (pm_node_t *) pm_pinned_variable_node_create(parser, &operator, variable);
                 }
@@ -11082,7 +11952,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
             return parse_pattern_constant_path(parser, node);
         }
         default:
-            pm_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, diag_id);
+            pm_parser_err_current(parser, diag_id);
             return (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
     }
 }
@@ -11126,7 +11996,7 @@ parse_pattern_primitives(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
                 break;
             }
             default: {
-                pm_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, diag_id);
+                pm_parser_err_current(parser, diag_id);
                 pm_node_t *right = (pm_node_t *) pm_missing_node_create(parser, parser->current.start, parser->current.end);
                 if (node == NULL) {
@@ -11218,7 +12088,7 @@ parse_pattern(pm_parser_t *parser, bool top_pattern, pm_diagnostic_id_t diag_id)
                 // will continue to parse the rest of the patterns, but we will indicate
                 // it as an error.
                 if (trailing_rest) {
-                    pm_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, PM_ERR_PATTERN_REST);
+                    pm_parser_err_previous(parser, PM_ERR_PATTERN_REST);
                 }
                 trailing_rest = true;
@@ -11284,6 +12154,7 @@ static inline pm_node_t *
 parse_strings(pm_parser_t *parser) {
     assert(parser->current.type == PM_TOKEN_STRING_BEGIN);
     pm_node_t *result = NULL;
+    bool state_is_arg_labeled = lex_state_p(parser, PM_LEX_STATE_ARG | PM_LEX_STATE_LABELED);
     while (match1(parser, PM_TOKEN_STRING_BEGIN)) {
         pm_node_t *node = NULL;
@@ -11301,17 +12172,30 @@ parse_strings(pm_parser_t *parser) {
             // start. In that case we'll create an empty content token and
             // return an uninterpolated string.
             pm_token_t content = parse_strings_empty_content(parser->previous.start);
-            node = (pm_node_t *) pm_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, PM_UNESCAPE_NONE);
+            pm_string_node_t *string = pm_string_node_create(parser, &opening, &content, &parser->previous);
+            pm_string_shared_init(&string->unescaped, content.start, content.end);
+            node = (pm_node_t *) string;
         } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
             // If we get here, then we have an end of a label immediately
             // after a start. In that case we'll create an empty symbol
             // node.
             pm_token_t opening = not_provided(parser);
             pm_token_t content = parse_strings_empty_content(parser->previous.start);
-            node = (pm_node_t *) pm_symbol_node_create(parser, &opening, &content, &parser->previous);
+            pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous);
+            pm_string_shared_init(&symbol->unescaped, content.start, content.end);
+            node = (pm_node_t *) symbol;
         } else if (!lex_interpolation) {
             // If we don't accept interpolation then we expect the string to
             // start with a single string content node.
+            pm_string_t unescaped;
+            if (match1(parser, PM_TOKEN_EOF)) {
+                unescaped = PM_EMPTY_STRING;
+            } else {
+                unescaped = parser->current_string;
+            }
             expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_EXPECT_STRING_CONTENT);
             pm_token_t content = parser->previous;
@@ -11330,21 +12214,22 @@ parse_strings(pm_parser_t *parser) {
                 pm_node_list_t parts = PM_EMPTY_NODE_LIST;
                 pm_token_t delimiters = not_provided(parser);
-                pm_node_t *part = (pm_node_t *) pm_string_node_create_and_unescape(parser, &delimiters, &content, &delimiters, PM_UNESCAPE_MINIMAL);
+                pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &delimiters, &content, &delimiters, &unescaped);
                 pm_node_list_append(&parts, part);
-                while (accept1(parser, PM_TOKEN_STRING_CONTENT)) {
-                    part = (pm_node_t *) pm_string_node_create_and_unescape(parser, &delimiters, &parser->previous, &delimiters, PM_UNESCAPE_MINIMAL);
+                do {
+                    part = (pm_node_t *) pm_string_node_create_current_string(parser, &delimiters, &parser->current, &delimiters);
                     pm_node_list_append(&parts, part);
-                }
+                    parser_lex(parser);
+                } while (match1(parser, PM_TOKEN_STRING_CONTENT));
                 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_TERM);
                 node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
-            } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
-                node = (pm_node_t *) pm_symbol_node_create_and_unescape(parser, &opening, &content, &parser->previous, PM_UNESCAPE_ALL);
+            } else if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
+                node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
             } else {
                 expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_TERM);
-                node = (pm_node_t *) pm_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, PM_UNESCAPE_MINIMAL);
+                node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
             }
         } else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
             // In this case we've hit string content so we know the string
@@ -11352,12 +12237,14 @@ parse_strings(pm_parser_t *parser) {
             // following token is the end (in which case we can return a
             // plain string) or if it's not then it has interpolation.
             pm_token_t content = parser->current;
+            pm_string_t unescaped = parser->current_string;
             parser_lex(parser);
-            if (accept1(parser, PM_TOKEN_STRING_END)) {
-                node = (pm_node_t *) pm_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, PM_UNESCAPE_ALL);
+            if (match1(parser, PM_TOKEN_STRING_END)) {
+                node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
+                parser_lex(parser);
             } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
-                node = (pm_node_t *) pm_symbol_node_create_and_unescape(parser, &opening, &content, &parser->previous, PM_UNESCAPE_ALL);
+                node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
             } else {
                 // If we get here, then we have interpolation so we'll need
                 // to create a string or symbol node with interpolation.
@@ -11365,7 +12252,7 @@ parse_strings(pm_parser_t *parser) {
                 pm_token_t string_opening = not_provided(parser);
                 pm_token_t string_closing = not_provided(parser);
-                pm_node_t *part = (pm_node_t *) pm_string_node_create_and_unescape(parser, &string_opening, &parser->previous, &string_closing, PM_UNESCAPE_ALL);
+                pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &string_opening, &parser->previous, &string_closing, &unescaped);
                 pm_node_list_append(&parts, part);
                 while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
@@ -11374,7 +12261,7 @@ parse_strings(pm_parser_t *parser) {
                     }
                 }
-                if (accept1(parser, PM_TOKEN_LABEL_END)) {
+                if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
                     node = (pm_node_t *) pm_interpolated_symbol_node_create(parser, &opening, &parts, &parser->previous);
                 } else {
                     expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_INTERPOLATED_TERM);
@@ -11382,11 +12269,11 @@ parse_strings(pm_parser_t *parser) {
                 }
             }
         } else {
-            // If we get here, then the first part of the string is not
-            // plain string content, in which case we need to parse the
-            // string as an interpolated string.
+            // If we get here, then the first part of the string is not plain
+            // string content, in which case we need to parse the string as an
+            // interpolated string.
             pm_node_list_t parts = PM_EMPTY_NODE_LIST;
-            pm_node_t *part = NULL;
+            pm_node_t *part;
             while (!match3(parser, PM_TOKEN_STRING_END, PM_TOKEN_LABEL_END, PM_TOKEN_EOF)) {
                 if ((part = parse_string_part(parser)) != NULL) {
@@ -11418,7 +12305,7 @@ parse_strings(pm_parser_t *parser) {
             // If it cannot be concatenated with the previous node, then we'll
             // need to add a syntax error.
             if (!PM_NODE_TYPE_P(node, PM_STRING_NODE) && !PM_NODE_TYPE_P(node, PM_INTERPOLATED_STRING_NODE)) {
-                pm_diagnostic_list_append(&parser->error_list, node->location.start, node->location.end, PM_ERR_STRING_CONCATENATION);
+                pm_parser_err_node(parser, node, PM_ERR_STRING_CONCATENATION);
             }
             // Either way we will create a concat node to hold the strings
@@ -11464,7 +12351,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
                     element = (pm_node_t *) pm_splat_node_create(parser, &operator, expression);
                 } else if (match2(parser, PM_TOKEN_LABEL, PM_TOKEN_USTAR_STAR)) {
                     if (parsed_bare_hash) {
-                        pm_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, PM_ERR_EXPRESSION_BARE_HASH);
+                        pm_parser_err_current(parser, PM_ERR_EXPRESSION_BARE_HASH);
                     }
                     pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
@@ -11480,7 +12367,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
                     if (pm_symbol_node_label_p(element) || accept1(parser, PM_TOKEN_EQUAL_GREATER)) {
                         if (parsed_bare_hash) {
-                            pm_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, PM_ERR_EXPRESSION_BARE_HASH);
+                            pm_parser_err_previous(parser, PM_ERR_EXPRESSION_BARE_HASH);
                         }
                         pm_keyword_hash_node_t *hash = pm_keyword_hash_node_create(parser);
@@ -11598,7 +12485,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
             // If we didn't find a terminator and we didn't find a right
             // parenthesis, then this is a syntax error.
             if (!terminator_found) {
-                pm_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.start, PM_ERR_EXPECT_EOL_AFTER_STATEMENT);
+                pm_parser_err(parser, parser->current.start, parser->current.start, PM_ERR_EXPECT_EOL_AFTER_STATEMENT);
             }
             // Parse each statement within the parentheses.
@@ -11627,7 +12514,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
                 } else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
                     break;
                 } else {
-                    pm_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.start, PM_ERR_EXPECT_EOL_AFTER_STATEMENT);
+                    pm_parser_err(parser, parser->current.start, parser->current.start, PM_ERR_EXPECT_EOL_AFTER_STATEMENT);
                 }
             }
@@ -11665,7 +12552,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
             content.start = content.start + 1;
             pm_token_t closing = not_provided(parser);
-            pm_node_t *node = (pm_node_t *) pm_char_literal_node_create_and_unescape(parser, &opening, &content, &closing, PM_UNESCAPE_ALL);
+            pm_node_t *node = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &content, &closing);
             // Characters can be followed by strings in which case they are
             // automatically concatenated.
@@ -11839,9 +12726,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
         case PM_TOKEN_HEREDOC_START: {
             // Here we have found a heredoc. We'll parse it and add it to the
             // list of strings.
-            assert(parser->lex_modes.current->mode == PM_LEX_HEREDOC);
-            pm_heredoc_quote_t quote = parser->lex_modes.current->as.heredoc.quote;
-            pm_heredoc_indent_t indent = parser->lex_modes.current->as.heredoc.indent;
+            pm_lex_mode_t *lex_mode = parser->lex_modes.current;
+            assert(lex_mode->mode == PM_LEX_HEREDOC);
+            pm_heredoc_quote_t quote = lex_mode->as.heredoc.quote;
+            pm_heredoc_indent_t indent = lex_mode->as.heredoc.indent;
             parser_lex(parser);
             pm_token_t opening = parser->previous;
@@ -11857,9 +12745,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
                 pm_token_t content = parse_strings_empty_content(parser->previous.start);
                 if (quote == PM_HEREDOC_QUOTE_BACKTICK) {
-                    node = (pm_node_t *) pm_xstring_node_create_and_unescape(parser, &opening, &content, &parser->previous);
+                    node = (pm_node_t *) pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_EMPTY_STRING);
                 } else {
-                    node = (pm_node_t *) pm_string_node_create_and_unescape(parser, &opening, &content, &parser->previous, PM_UNESCAPE_NONE);
+                    node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &PM_EMPTY_STRING);
                 }
                 node->location.end = opening.end;
@@ -11884,15 +12772,14 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
                     cast->base.type = PM_X_STRING_NODE;
                 }
-                lex_state_set(parser, PM_LEX_STATE_END);
-                expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
+                size_t common_whitespace = lex_mode->as.heredoc.common_whitespace;
+                if (indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
+                    parse_heredoc_dedent_string(&cast->unescaped, common_whitespace);
+                }
                 node = (pm_node_t *) cast;
-                if (indent == PM_HEREDOC_INDENT_TILDE) {
-                    int common_whitespace = parse_heredoc_common_whitespace_for_single_node(parser, node, -1);
-                    parse_heredoc_dedent_single_node(parser, &cast->unescaped, true, common_whitespace, quote);
-                }
+                lex_state_set(parser, PM_LEX_STATE_END);
+                expect1(parser, PM_TOKEN_HEREDOC_END, PM_ERR_HEREDOC_TERM);
             } else {
                 // If we get here, then we have multiple parts in the heredoc,
                 // so we'll need to create an interpolated string node to hold
@@ -11931,8 +12818,16 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
                 // If this is a heredoc that is indented with a ~, then we need
                 // to dedent each line by the common leading whitespace.
-                if (indent == PM_HEREDOC_INDENT_TILDE) {
-                    parse_heredoc_dedent(parser, node, quote);
+                size_t common_whitespace = lex_mode->as.heredoc.common_whitespace;
+                if (indent == PM_HEREDOC_INDENT_TILDE && (common_whitespace != (size_t) -1) && (common_whitespace != 0)) {
+                    pm_node_list_t *nodes;
+                    if (quote == PM_HEREDOC_QUOTE_BACKTICK) {
+                        nodes = &((pm_interpolated_x_string_node_t *) node)->parts;
+                    } else {
+                        nodes = &((pm_interpolated_string_node_t *) node)->parts;
+                    }
+                    parse_heredoc_dedent(parser, nodes, common_whitespace);
                 }
             }
@@ -11995,10 +12890,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
                 case PM_GLOBAL_VARIABLE_READ_NODE: {
                     if (PM_NODE_TYPE_P(old_name, PM_BACK_REFERENCE_READ_NODE) || PM_NODE_TYPE_P(old_name, PM_NUMBERED_REFERENCE_READ_NODE) || PM_NODE_TYPE_P(old_name, PM_GLOBAL_VARIABLE_READ_NODE)) {
                         if (PM_NODE_TYPE_P(old_name, PM_NUMBERED_REFERENCE_READ_NODE)) {
-                            pm_diagnostic_list_append(&parser->error_list, old_name->location.start, old_name->location.end, PM_ERR_ALIAS_ARGUMENT);
+                            pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
                         }
                     } else {
-                        pm_diagnostic_list_append(&parser->error_list, old_name->location.start, old_name->location.end, PM_ERR_ALIAS_ARGUMENT);
+                        pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
                     }
                     return (pm_node_t *) pm_alias_global_variable_node_create(parser, &keyword, new_name, old_name);
@@ -12006,7 +12901,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
                 case PM_SYMBOL_NODE:
                 case PM_INTERPOLATED_SYMBOL_NODE: {
                     if (!PM_NODE_TYPE_P(old_name, PM_SYMBOL_NODE) && !PM_NODE_TYPE_P(old_name, PM_INTERPOLATED_SYMBOL_NODE)) {
-                        pm_diagnostic_list_append(&parser->error_list, old_name->location.start, old_name->location.end, PM_ERR_ALIAS_ARGUMENT);
+                        pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
                     }
                 }
                 /* fallthrough */
@@ -12032,7 +12927,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
             }
             if (accept1(parser, PM_TOKEN_KEYWORD_END)) {
-                pm_diagnostic_list_append(&parser->error_list, case_keyword.start, case_keyword.end, PM_ERR_CASE_MISSING_CONDITIONS);
+                pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
                 return (pm_node_t *) pm_case_node_create(parser, &case_keyword, predicate, NULL, &parser->previous);
             }
@@ -12142,7 +13037,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
             // If we didn't parse any conditions (in or when) then we need to
             // indicate that we have an error.
             if (case_node->conditions.size == 0) {
-                pm_diagnostic_list_append(&parser->error_list, case_keyword.start, case_keyword.end, PM_ERR_CASE_MISSING_CONDITIONS);
+                pm_parser_err_token(parser, &case_keyword, PM_ERR_CASE_MISSING_CONDITIONS);
             }
             accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON);
@@ -12185,12 +13080,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
             pm_begin_node_end_keyword_set(begin_node, &parser->previous);
             if ((begin_node->else_clause != NULL) && (begin_node->rescue_clause == NULL)) {
-                pm_diagnostic_list_append(
-                    &parser->error_list,
-                    begin_node->else_clause->base.location.start,
-                    begin_node->else_clause->base.location.end,
-                    PM_ERR_BEGIN_LONELY_ELSE
-                );
+                pm_parser_err_node(parser, (pm_node_t *) begin_node->else_clause, PM_ERR_BEGIN_LONELY_ELSE);
             }
             return (pm_node_t *) begin_node;
@@ -12206,7 +13096,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
             expect1(parser, PM_TOKEN_BRACE_RIGHT, PM_ERR_BEGIN_UPCASE_TERM);
             pm_context_t context = parser->current_context->context;
             if ((context != PM_CONTEXT_MAIN) && (context != PM_CONTEXT_PREEXE)) {
-                pm_diagnostic_list_append(&parser->error_list, keyword.start, keyword.end, PM_ERR_BEGIN_UPCASE_TOPLEVEL);
+                pm_parser_err_token(parser, &keyword, PM_ERR_BEGIN_UPCASE_TOPLEVEL);
             }
             return (pm_node_t *) pm_pre_execution_node_create(parser, &keyword, &opening, statements, &parser->previous);
         }
@@ -12239,7 +13129,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
                         (parser->current_context->context == PM_CONTEXT_CLASS) ||
                         (parser->current_context->context == PM_CONTEXT_MODULE)
                     ) {
-                        pm_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, PM_ERR_RETURN_INVALID);
+                        pm_parser_err_current(parser, PM_ERR_RETURN_INVALID);
                     }
                     return (pm_node_t *) pm_return_node_create(parser, &keyword, arguments.arguments);
                 }
@@ -12305,7 +13195,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
             pm_node_t *constant_path = parse_expression(parser, PM_BINDING_POWER_INDEX, PM_ERR_CLASS_NAME);
             pm_token_t name = parser->previous;
             if (name.type != PM_TOKEN_CONSTANT) {
-                pm_diagnostic_list_append(&parser->error_list, name.start, name.end, PM_ERR_CLASS_NAME);
+                pm_parser_err_token(parser, &name, PM_ERR_CLASS_NAME);
             }
             pm_token_t inheritance_operator;
@@ -12346,7 +13236,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
             expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_CLASS_TERM);
             if (context_def_p(parser)) {
-                pm_diagnostic_list_append(&parser->error_list, class_keyword.start, class_keyword.end, PM_ERR_CLASS_IN_METHOD);
+                pm_parser_err_token(parser, &class_keyword, PM_ERR_CLASS_IN_METHOD);
             }
             pm_constant_id_list_t locals = parser->current_scope->locals;
@@ -12354,7 +13244,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
             pm_do_loop_stack_pop(parser);
             if (!PM_NODE_TYPE_P(constant_path, PM_CONSTANT_PATH_NODE) && !(PM_NODE_TYPE_P(constant_path, PM_CONSTANT_READ_NODE))) {
-                pm_diagnostic_list_append(&parser->error_list, constant_path->location.start, constant_path->location.end, PM_ERR_CLASS_NAME);
+                pm_parser_err_node(parser, constant_path, PM_ERR_CLASS_NAME);
             }
             return (pm_node_t *) pm_class_node_create(parser, &locals, &class_keyword, constant_path, &name, &inheritance_operator, superclass, statements, &parser->previous);
@@ -12486,7 +13376,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
             // If, after all that, we were unable to find a method name, add an
             // error to the error list.
             if (name.type == PM_TOKEN_MISSING) {
-                pm_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, PM_ERR_DEF_NAME);
+                pm_parser_err_previous(parser, PM_ERR_DEF_NAME);
             }
             pm_token_t lparen;
@@ -12538,7 +13428,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
             if (accept1(parser, PM_TOKEN_EQUAL)) {
                 if (token_is_setter_name(&name)) {
-                    pm_diagnostic_list_append(&parser->error_list, name.start, name.end, PM_ERR_DEF_ENDLESS_SETTER);
+                    pm_parser_err_token(parser, &name, PM_ERR_DEF_ENDLESS_SETTER);
                 }
                 equal = parser->previous;
@@ -12656,6 +13546,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
             parser_lex(parser);
             pm_token_t for_keyword = parser->previous;
             pm_node_t *index;
+            pm_parser_scope_push_transparent(parser);
             // First, parse out the first index expression.
             if (accept1(parser, PM_TOKEN_USTAR)) {
@@ -12670,7 +13561,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
             } else if (token_begins_expression_p(parser->current.type)) {
                 index = parse_expression(parser, PM_BINDING_POWER_INDEX, PM_ERR_EXPECT_EXPRESSION_AFTER_COMMA);
             } else {
-                pm_diagnostic_list_append(&parser->error_list, for_keyword.start, for_keyword.end, PM_ERR_FOR_INDEX);
+                pm_parser_err_token(parser, &for_keyword, PM_ERR_FOR_INDEX);
                 index = (pm_node_t *) pm_missing_node_create(parser, for_keyword.start, for_keyword.end);
             }
@@ -12681,6 +13572,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
                 index = parse_target(parser, index);
             }
+            pm_parser_scope_pop(parser);
             pm_do_loop_stack_push(parser, true);
             expect1(parser, PM_TOKEN_KEYWORD_IN, PM_ERR_FOR_IN);
@@ -12700,8 +13592,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
             pm_statements_node_t *statements = NULL;
             if (!accept1(parser, PM_TOKEN_KEYWORD_END)) {
+                pm_parser_scope_push_transparent(parser);
                 statements = parse_statements(parser, PM_CONTEXT_FOR);
                 expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_FOR_TERM);
+                pm_parser_scope_pop(parser);
             }
             return (pm_node_t *) pm_for_node_create(parser, index, collection, statements, &for_keyword, &in_keyword, &do_keyword, &parser->previous);
@@ -12797,7 +13691,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
             // syntax error. We handle that here as well.
             name = parser->previous;
             if (name.type != PM_TOKEN_CONSTANT) {
-                pm_diagnostic_list_append(&parser->error_list, name.start, name.end, PM_ERR_MODULE_NAME);
+                pm_parser_err_token(parser, &name, PM_ERR_MODULE_NAME);
             }
             pm_parser_scope_push(parser, true);
@@ -12821,7 +13715,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
             expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_MODULE_TERM);
             if (context_def_p(parser)) {
-                pm_diagnostic_list_append(&parser->error_list, module_keyword.start, module_keyword.end, PM_ERR_MODULE_IN_METHOD);
+                pm_parser_err_token(parser, &module_keyword, PM_ERR_MODULE_IN_METHOD);
             }
             return (pm_node_t *) pm_module_node_create(parser, &locals, &module_keyword, constant_path, &name, statements, &parser->previous);
@@ -12891,13 +13785,13 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
                 accept1(parser, PM_TOKEN_WORDS_SEP);
                 if (match1(parser, PM_TOKEN_STRING_END)) break;
-                expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_LOWER_ELEMENT);
-                pm_token_t opening = not_provided(parser);
-                pm_token_t closing = not_provided(parser);
+                if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
+                    pm_token_t opening = not_provided(parser);
+                    pm_token_t closing = not_provided(parser);
+                    pm_array_node_elements_append(array, (pm_node_t *) pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing));
+                }
-                pm_node_t *symbol = (pm_node_t *) pm_symbol_node_create_and_unescape(parser, &opening, &parser->previous, &closing, PM_UNESCAPE_MINIMAL);
-                pm_array_node_elements_append(array, symbol);
+                expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_I_LOWER_ELEMENT);
             }
             expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_I_LOWER_TERM);
@@ -12937,26 +13831,26 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
                             // If we hit content and the current node is NULL, then this is
                             // the first string content we've seen. In that case we're going
                             // to create a new string node and set that to the current.
+                            current = (pm_node_t *) pm_symbol_node_create_current_string(parser, &opening, &parser->current, &closing);
                             parser_lex(parser);
-                            current = (pm_node_t *) pm_symbol_node_create_and_unescape(parser, &opening, &parser->previous, &closing, PM_UNESCAPE_ALL);
                         } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_SYMBOL_NODE)) {
                             // If we hit string content and the current node is an
                             // interpolated string, then we need to append the string content
                             // to the list of child nodes.
-                            pm_node_t *part = parse_string_part(parser);
-                            pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, part);
+                            pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
+                            parser_lex(parser);
+                            pm_interpolated_symbol_node_append((pm_interpolated_symbol_node_t *) current, string);
                         } else if (PM_NODE_TYPE_P(current, PM_SYMBOL_NODE)) {
                             // If we hit string content and the current node is a string node,
                             // then we need to convert the current node into an interpolated
                             // string and add the string content to the list of child nodes.
-                            pm_token_t opening = not_provided(parser);
-                            pm_token_t closing = not_provided(parser);
-                            pm_interpolated_symbol_node_t *interpolated =
-                                pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
-                            pm_interpolated_symbol_node_append(interpolated, current);
+                            pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->previous, &closing);
+                            parser_lex(parser);
-                            pm_node_t *part = parse_string_part(parser);
-                            pm_interpolated_symbol_node_append(interpolated, part);
+                            pm_interpolated_symbol_node_t *interpolated = pm_interpolated_symbol_node_create(parser, &opening, NULL, &closing);
+                            pm_interpolated_symbol_node_append(interpolated, current);
+                            pm_interpolated_symbol_node_append(interpolated, string);
                             current = (pm_node_t *) interpolated;
                         } else {
                             assert(false && "unreachable");
@@ -13063,12 +13957,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
                 accept1(parser, PM_TOKEN_WORDS_SEP);
                 if (match1(parser, PM_TOKEN_STRING_END)) break;
-                expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT);
+                if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
+                    pm_token_t opening = not_provided(parser);
+                    pm_token_t closing = not_provided(parser);
-                pm_token_t opening = not_provided(parser);
-                pm_token_t closing = not_provided(parser);
-                pm_node_t *string = (pm_node_t *) pm_string_node_create_and_unescape(parser, &opening, &parser->previous, &closing, PM_UNESCAPE_WHITESPACE);
-                pm_array_node_elements_append(array, string);
+                    pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
+                    pm_array_node_elements_append(array, string);
+                }
+                expect1(parser, PM_TOKEN_STRING_CONTENT, PM_ERR_LIST_W_LOWER_ELEMENT);
             }
             expect1(parser, PM_TOKEN_STRING_END, PM_ERR_LIST_W_LOWER_TERM);
@@ -13101,29 +13998,29 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
                         break;
                     }
                     case PM_TOKEN_STRING_CONTENT: {
+                        pm_token_t opening = not_provided(parser);
+                        pm_token_t closing = not_provided(parser);
+                        pm_node_t *string = (pm_node_t *) pm_string_node_create_current_string(parser, &opening, &parser->current, &closing);
+                        parser_lex(parser);
                         if (current == NULL) {
                             // If we hit content and the current node is NULL, then this is
                             // the first string content we've seen. In that case we're going
                             // to create a new string node and set that to the current.
-                            current = parse_string_part(parser);
+                            current = string;
                         } else if (PM_NODE_TYPE_P(current, PM_INTERPOLATED_STRING_NODE)) {
                             // If we hit string content and the current node is an
                             // interpolated string, then we need to append the string content
                             // to the list of child nodes.
-                            pm_node_t *part = parse_string_part(parser);
-                            pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, part);
+                            pm_interpolated_string_node_append((pm_interpolated_string_node_t *) current, string);
                         } else if (PM_NODE_TYPE_P(current, PM_STRING_NODE)) {
                             // If we hit string content and the current node is a string node,
                             // then we need to convert the current node into an interpolated
                             // string and add the string content to the list of child nodes.
-                            pm_token_t opening = not_provided(parser);
-                            pm_token_t closing = not_provided(parser);
-                            pm_interpolated_string_node_t *interpolated =
-                                pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
+                            pm_interpolated_string_node_t *interpolated = pm_interpolated_string_node_create(parser, &opening, NULL, &closing);
                             pm_interpolated_string_node_append(interpolated, current);
-                            pm_node_t *part = parse_string_part(parser);
-                            pm_interpolated_string_node_append(interpolated, part);
+                            pm_interpolated_string_node_append(interpolated, string);
                             current = (pm_node_t *) interpolated;
                         } else {
                             assert(false && "unreachable");
@@ -13218,7 +14115,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
                 };
                 parser_lex(parser);
-                return (pm_node_t *) pm_regular_expression_node_create_and_unescape(parser, &opening, &content, &parser->previous, PM_UNESCAPE_ALL);
+                return (pm_node_t *) pm_regular_expression_node_create(parser, &opening, &content, &parser->previous);
             }
             pm_interpolated_regular_expression_node_t *node;
@@ -13228,6 +14125,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
                 // expression at least has something in it. We'll need to check if the
                 // following token is the end (in which case we can return a plain
                 // regular expression) or if it's not then it has interpolation.
+                pm_string_t unescaped = parser->current_string;
                 pm_token_t content = parser->current;
                 parser_lex(parser);
@@ -13235,7 +14133,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
                 // without interpolation, which can be represented more succinctly and
                 // more easily compiled.
                 if (accept1(parser, PM_TOKEN_REGEXP_END)) {
-                    return (pm_node_t *) pm_regular_expression_node_create_and_unescape(parser, &opening, &content, &parser->previous, PM_UNESCAPE_ALL);
+                    return (pm_node_t *) pm_regular_expression_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
                 }
                 // If we get here, then we have interpolation so we'll need to create
@@ -13244,7 +14142,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
                 pm_token_t opening = not_provided(parser);
                 pm_token_t closing = not_provided(parser);
-                pm_node_t *part = (pm_node_t *) pm_string_node_create_and_unescape(parser, &opening, &parser->previous, &closing, PM_UNESCAPE_ALL);
+                pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped);
                 pm_interpolated_regular_expression_node_append(node, part);
             } else {
                 // If the first part of the body of the regular expression is not a
@@ -13255,9 +14153,9 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
             // Now that we're here and we have interpolation, we'll parse all of the
             // parts into the list.
+            pm_node_t *part;
             while (!match2(parser, PM_TOKEN_REGEXP_END, PM_TOKEN_EOF)) {
-                pm_node_t *part = parse_string_part(parser);
-                if (part != NULL) {
+                if ((part = parse_string_part(parser)) != NULL) {
                     pm_interpolated_regular_expression_node_append(node, part);
                 }
             }
@@ -13293,35 +14191,37 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power) {
             pm_interpolated_x_string_node_t *node;
             if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
-                // In this case we've hit string content so we know the string at least
-                // has something in it. We'll need to check if the following token is
-                // the end (in which case we can return a plain string) or if it's not
-                // then it has interpolation.
+                // In this case we've hit string content so we know the string
+                // at least has something in it. We'll need to check if the
+                // following token is the end (in which case we can return a
+                // plain string) or if it's not then it has interpolation.
+                pm_string_t unescaped = parser->current_string;
                 pm_token_t content = parser->current;
                 parser_lex(parser);
                 if (accept1(parser, PM_TOKEN_STRING_END)) {
-                    return (pm_node_t *) pm_xstring_node_create_and_unescape(parser, &opening, &content, &parser->previous);
+                    return (pm_node_t *) pm_xstring_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
                 }
-                // If we get here, then we have interpolation so we'll need to create
-                // a string node with interpolation.
+                // If we get here, then we have interpolation so we'll need to
+                // create a string node with interpolation.
                 node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
                 pm_token_t opening = not_provided(parser);
                 pm_token_t closing = not_provided(parser);
-                pm_node_t *part = (pm_node_t *) pm_string_node_create_and_unescape(parser, &opening, &parser->previous, &closing, PM_UNESCAPE_ALL);
+                pm_node_t *part = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &parser->previous, &closing, &unescaped);
                 pm_interpolated_xstring_node_append(node, part);
             } else {
-                // If the first part of the body of the string is not a string content,
-                // then we have interpolation and we need to create an interpolated
-                // string node.
+                // If the first part of the body of the string is not a string
+                // content, then we have interpolation and we need to create an
+                // interpolated string node.
                 node = pm_interpolated_xstring_node_create(parser, &opening, &opening);
             }
+            pm_node_t *part;
             while (!match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
-                pm_node_t *part = parse_string_part(parser);
-                if (part != NULL) {
+                if ((part = parse_string_part(parser)) != NULL) {
                     pm_interpolated_xstring_node_append(node, part);
                 }
             }
@@ -13542,7 +14442,7 @@ parse_assignment_value(pm_parser_t *parser, pm_binding_power_t previous_binding_
 static void
 parse_call_operator_write_block(pm_parser_t *parser, pm_call_node_t *call_node, const pm_token_t *operator) {
     if (call_node->block != NULL) {
-        pm_diagnostic_list_append(&parser->error_list, operator->start, operator->end, PM_ERR_OPERATOR_WRITE_BLOCK);
+        pm_parser_err_token(parser, operator, PM_ERR_OPERATOR_WRITE_BLOCK);
         pm_node_destroy(parser, (pm_node_t *) call_node->block);
         call_node->block = NULL;
     }
@@ -13590,7 +14490,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
                     // In this case we have an = sign, but we don't know what it's for. We
                     // need to treat it as an error. For now, we'll mark it as an error
                     // and just skip right past it.
-                    pm_diagnostic_list_append(&parser->error_list, token.start, token.end, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL);
+                    pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL);
                     return node;
             }
         }
@@ -13598,7 +14498,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
             switch (PM_NODE_TYPE(node)) {
                 case PM_BACK_REFERENCE_READ_NODE:
                 case PM_NUMBERED_REFERENCE_READ_NODE:
-                    pm_diagnostic_list_append(&parser->error_list, node->location.start, node->location.end, PM_ERR_WRITE_TARGET_READONLY);
+                    pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_READONLY);
                 /* fallthrough */
                 case PM_GLOBAL_VARIABLE_READ_NODE: {
                     parser_lex(parser);
@@ -13661,7 +14561,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
                         pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc.start, message_loc.end);
                         if (token_is_numbered_parameter(message_loc.start, message_loc.end)) {
-                            pm_diagnostic_list_append(&parser->error_list, message_loc.start, message_loc.end, PM_ERR_PARAMETER_NUMBERED_RESERVED);
+                            pm_parser_err_location(parser, &message_loc, PM_ERR_PARAMETER_NUMBERED_RESERVED);
                         }
                         parser_lex(parser);
@@ -13683,7 +14583,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
                 }
                 case PM_MULTI_WRITE_NODE: {
                     parser_lex(parser);
-                    pm_diagnostic_list_append(&parser->error_list, token.start, token.end, PM_ERR_AMPAMPEQ_MULTI_ASSIGN);
+                    pm_parser_err_token(parser, &token, PM_ERR_AMPAMPEQ_MULTI_ASSIGN);
                     return node;
                 }
                 default:
@@ -13692,7 +14592,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
                     // In this case we have an &&= sign, but we don't know what it's for.
                     // We need to treat it as an error. For now, we'll mark it as an error
                     // and just skip right past it.
-                    pm_diagnostic_list_append(&parser->error_list, token.start, token.end, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
+                    pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_AMPAMPEQ);
                     return node;
             }
         }
@@ -13700,7 +14600,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
             switch (PM_NODE_TYPE(node)) {
                 case PM_BACK_REFERENCE_READ_NODE:
                 case PM_NUMBERED_REFERENCE_READ_NODE:
-                    pm_diagnostic_list_append(&parser->error_list, node->location.start, node->location.end, PM_ERR_WRITE_TARGET_READONLY);
+                    pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_READONLY);
                 /* fallthrough */
                 case PM_GLOBAL_VARIABLE_READ_NODE: {
                     parser_lex(parser);
@@ -13763,7 +14663,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
                         pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc.start, message_loc.end);
                         if (token_is_numbered_parameter(message_loc.start, message_loc.end)) {
-                            pm_diagnostic_list_append(&parser->error_list, message_loc.start, message_loc.end, PM_ERR_PARAMETER_NUMBERED_RESERVED);
+                            pm_parser_err_location(parser, &message_loc, PM_ERR_PARAMETER_NUMBERED_RESERVED);
                         }
                         parser_lex(parser);
@@ -13785,7 +14685,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
                 }
                 case PM_MULTI_WRITE_NODE: {
                     parser_lex(parser);
-                    pm_diagnostic_list_append(&parser->error_list, token.start, token.end, PM_ERR_PIPEPIPEEQ_MULTI_ASSIGN);
+                    pm_parser_err_token(parser, &token, PM_ERR_PIPEPIPEEQ_MULTI_ASSIGN);
                     return node;
                 }
                 default:
@@ -13794,7 +14694,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
                     // In this case we have an ||= sign, but we don't know what it's for.
                     // We need to treat it as an error. For now, we'll mark it as an error
                     // and just skip right past it.
-                    pm_diagnostic_list_append(&parser->error_list, token.start, token.end, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
+                    pm_parser_err_token(parser, &token, PM_ERR_EXPECT_EXPRESSION_AFTER_PIPEPIPEEQ);
                     return node;
             }
         }
@@ -13812,7 +14712,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
             switch (PM_NODE_TYPE(node)) {
                 case PM_BACK_REFERENCE_READ_NODE:
                 case PM_NUMBERED_REFERENCE_READ_NODE:
-                    pm_diagnostic_list_append(&parser->error_list, node->location.start, node->location.end, PM_ERR_WRITE_TARGET_READONLY);
+                    pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_READONLY);
                 /* fallthrough */
                 case PM_GLOBAL_VARIABLE_READ_NODE: {
                     parser_lex(parser);
@@ -13875,7 +14775,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
                         pm_constant_id_t constant_id = pm_parser_local_add_location(parser, message_loc.start, message_loc.end);
                         if (token_is_numbered_parameter(message_loc.start, message_loc.end)) {
-                            pm_diagnostic_list_append(&parser->error_list, message_loc.start, message_loc.end, PM_ERR_PARAMETER_NUMBERED_RESERVED);
+                            pm_parser_err_location(parser, &message_loc, PM_ERR_PARAMETER_NUMBERED_RESERVED);
                         }
                         parser_lex(parser);
@@ -13897,7 +14797,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
                 }
                 case PM_MULTI_WRITE_NODE: {
                     parser_lex(parser);
-                    pm_diagnostic_list_append(&parser->error_list, token.start, token.end, PM_ERR_OPERATOR_MULTI_ASSIGN);
+                    pm_parser_err_token(parser, &token, PM_ERR_OPERATOR_MULTI_ASSIGN);
                     return node;
                 }
                 default:
@@ -13906,7 +14806,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
                     // In this case we have an operator but we don't know what it's for.
                     // We need to treat it as an error. For now, we'll mark it as an error
                     // and just skip right past it.
-                    pm_diagnostic_list_append(&parser->error_list, parser->previous.start, parser->previous.end, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
+                    pm_parser_err_previous(parser, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR);
                     return node;
             }
         }
@@ -14021,7 +14921,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
                     break;
                 }
                 default: {
-                    pm_diagnostic_list_append(&parser->error_list, parser->current.start, parser->current.end, PM_ERR_DEF_NAME);
+                    pm_parser_err_current(parser, PM_ERR_DEF_NAME);
                     message = (pm_token_t) { .type = PM_TOKEN_MISSING, .start = parser->previous.end, .end = parser->previous.end };
                 }
             }
@@ -14172,7 +15072,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
                     return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &delimiter, &arguments);
                 }
                 default: {
-                    pm_diagnostic_list_append(&parser->error_list, delimiter.start, delimiter.end, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
+                    pm_parser_err_token(parser, &delimiter, PM_ERR_CONSTANT_PATH_COLON_COLON_CONSTANT);
                     pm_node_t *child = (pm_node_t *) pm_missing_node_create(parser, delimiter.start, delimiter.end);
                     return (pm_node_t *)pm_constant_path_node_create(parser, node, &delimiter, child);
                 }
@@ -14220,7 +15120,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
             if (block != NULL) {
                 if (arguments.block != NULL) {
-                    pm_diagnostic_list_append(&parser->error_list, block->base.location.start, block->base.location.end, PM_ERR_ARGUMENT_AFTER_BLOCK);
+                    pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_AFTER_BLOCK);
                     if (arguments.arguments == NULL) {
                         arguments.arguments = pm_arguments_node_create(parser);
                     }
@@ -14283,7 +15183,7 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, pm_diagn
     // parse_expression_prefix is going to be a missing node. In that case we need
     // to add the error message to the parser's error list.
     if (PM_NODE_TYPE_P(node, PM_MISSING_NODE)) {
-        pm_diagnostic_list_append(&parser->error_list, recovery.end, recovery.end, diag_id);
+        pm_parser_err(parser, recovery.end, recovery.end, diag_id);
         return node;
     }
@@ -14428,6 +15328,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const ch
         .next_start = NULL,
         .heredoc_end = NULL,
         .comment_list = PM_LIST_EMPTY,
+        .magic_comment_list = PM_LIST_EMPTY,
         .warning_list = PM_LIST_EMPTY,
         .error_list = PM_LIST_EMPTY,
         .current_scope = NULL,
@@ -14441,6 +15342,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const ch
         .constant_pool = PM_CONSTANT_POOL_EMPTY,
         .newline_list = PM_NEWLINE_LIST_EMPTY,
         .integer_base = 0,
+        .current_string = PM_EMPTY_STRING,
         .command_start = true,
         .recovering = false,
         .encoding_changed = false,
@@ -14521,6 +15423,19 @@ pm_comment_list_free(pm_list_t *list) {
     }
 }
+// Free all of the memory associated with the magic comment list.
+static inline void
+pm_magic_comment_list_free(pm_list_t *list) {
+    pm_list_node_t *node, *next;
+    for (node = list->head; node != NULL; node = next) {
+        next = node->next;
+        pm_magic_comment_t *magic_comment = (pm_magic_comment_t *) node;
+        free(magic_comment);
+    }
+}
 // Free any memory associated with the given parser.
 PRISM_EXPORTED_FUNCTION void
 pm_parser_free(pm_parser_t *parser) {
@@ -14528,6 +15443,7 @@ pm_parser_free(pm_parser_t *parser) {
     pm_diagnostic_list_free(&parser->error_list);
     pm_diagnostic_list_free(&parser->warning_list);
     pm_comment_list_free(&parser->comment_list);
+    pm_magic_comment_list_free(&parser->magic_comment_list);
     pm_constant_pool_free(&parser->constant_pool);
     pm_newline_list_free(&parser->newline_list);
@@ -14578,10 +15494,11 @@ pm_parse_serialize(const uint8_t *source, size_t size, pm_buffer_t *buffer, cons
     pm_parser_free(&parser);
 }
-#undef PM_LOCATION_NULL_VALUE
-#undef PM_LOCATION_TOKEN_VALUE
-#undef PM_LOCATION_NODE_VALUE
-#undef PM_LOCATION_NODE_BASE_VALUE
 #undef PM_CASE_KEYWORD
 #undef PM_CASE_OPERATOR
 #undef PM_CASE_WRITABLE
+#undef PM_EMPTY_STRING
+#undef PM_LOCATION_NODE_BASE_VALUE
+#undef PM_LOCATION_NODE_VALUE
+#undef PM_LOCATION_NULL_VALUE
+#undef PM_LOCATION_TOKEN_VALUE