RubyGems - prism - Versions diffs - 0.20.0 → 0.21.0 - Mend

prism 0.20.0 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +21 -2
data/docs/parser_translation.md +1 -1
data/ext/prism/extension.h +1 -1
data/include/prism/ast.h +1 -1
data/include/prism/parser.h +1 -1
data/include/prism/util/pm_constant_pool.h +11 -0
data/include/prism/version.h +2 -2
data/lib/prism/serialize.rb +1 -1
data/lib/prism/translation/parser/compiler.rb +88 -91
data/lib/prism/translation/parser.rb +19 -11
data/prism.gemspec +1 -1
data/src/encoding.c +1 -1
data/src/prism.c +238 -181
data/src/util/pm_constant_pool.c +25 -0
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 742f60637c4fd77f62b7713f484e87d70249e6e9dfeefc2c2ba0bfe667eed034
-  data.tar.gz: 67bd239271c1d848536389668722a0617419b19b6bb250ed39cf17b7a414da4c
+  metadata.gz: b4d054a1268bf7f8b5947f30ad244c4713c850911e79c1ba469eca0ac36bc47c
+  data.tar.gz: b77e29c93584b79759381d75cfb5ad0753fe8d5f92863cada81895bb67f17572
 SHA512:
-  metadata.gz: d0a90337f2635d35b08b0932ad6d928610406bb3f908c1b7b601f5fcb08b404604745f93bffd9a4bb84fc13cde0b6b4a71015390546a077daa4e05d7d8cf965e
-  data.tar.gz: 231693786022302c486d3c4ea2c8841636e3c94cb37f6b5e410f1ab6ac4ce7fc12e53cd4d67f43d6a38f3292867fda808e655391528619d626823a5163cbf722
+  metadata.gz: 00fa781d854c4f9b716b238c392e48f3bd946b52a5ea100c8fa98bd909bd7d2fcd116b80c7877cbfff59bb991d7c78158ded3ff4154d7d3362df3b8c00fd4d08
+  data.tar.gz: cfea37b3aa825f0bb91a0bd19dec1ec72187790aca39a2b8d560a483d83c1f4604346320d071c93cd605f39c1fd975b1b508395d9673d7bf95c16feaeeee52e6

data/CHANGELOG.md CHANGED Viewed

@@ -6,7 +6,25 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a
 ## [Unreleased]
-## [0.20.0] - 2024-01-01
+## [0.21.0] - 2024-02-05
+### Added
+- Add the `pm_constant_pool_find` API for finding a constant.
+### Changed
+- Fixes for `Prism::Translation::Parser`.
+  - Ensure all errors flow through `parser.diagnostics.process`.
+  - Fix the find pattern node.
+  - Fix block forwarding with `NumberedParametersNode`.
+  - Ensure we can parse strings with invalid bytes for the encoding.
+  - Fix hash pairs in pattern matching.
+- Properly reject operator writes on operator calls, e.g., `a.+ -= b`.
+- Fix multi-byte escapes.
+- Handle missing body in `begin` within the receiver of a method call.
+## [0.20.0] - 2024-02-01
 ### Added
@@ -323,7 +341,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a
 - 🎉 Initial release! 🎉
-[unreleased]: https://github.com/ruby/prism/compare/v0.20.0...HEAD
+[unreleased]: https://github.com/ruby/prism/compare/v0.21.0...HEAD
+[0.21.0]: https://github.com/ruby/prism/compare/v0.20.0...v0.21.0
 [0.20.0]: https://github.com/ruby/prism/compare/v0.19.0...v0.20.0
 [0.19.0]: https://github.com/ruby/prism/compare/v0.18.0...v0.19.0
 [0.18.0]: https://github.com/ruby/prism/compare/v0.17.1...v0.18.0

data/docs/parser_translation.md CHANGED Viewed

@@ -9,7 +9,7 @@ The `parser` gem provides multiple parsers to support different versions of the
 You can use the `prism` parser like you would any other. After requiring the parser, you should be able to call any of the regular `Parser::Base` APIs that you would normally use.
 ```ruby
-require "prism/translation/parser"
+require "prism"
 Prism::Translation::Parser.parse_file("path/to/file.rb")
 ```

data/ext/prism/extension.h CHANGED Viewed

@@ -1,7 +1,7 @@
 #ifndef PRISM_EXT_NODE_H
 #define PRISM_EXT_NODE_H
-#define EXPECTED_PRISM_VERSION "0.20.0"
+#define EXPECTED_PRISM_VERSION "0.21.0"
 #include <ruby.h>
 #include <ruby/encoding.h>

data/include/prism/ast.h CHANGED Viewed

@@ -1042,7 +1042,7 @@ static const pm_node_flags_t PM_NODE_FLAG_COMMON_MASK = (1 << (PM_NODE_FLAG_BITS
  * Cast the type to an enum to allow the compiler to provide exhaustiveness
  * checking.
  */
-#define PM_NODE_TYPE(node) ((enum pm_node_type) node->type)
+#define PM_NODE_TYPE(node) ((enum pm_node_type) (node)->type)
 /**
  * Return true if the type of the given node matches the given type.

data/include/prism/parser.h CHANGED Viewed

@@ -626,7 +626,7 @@ struct pm_parser {
      * This is the path of the file being parsed. We use the filepath when
      * constructing SourceFileNodes.
      */
-    pm_string_t filepath_string;
+    pm_string_t filepath;
     /**
      * This constant pool keeps all of the constants defined throughout the file

data/include/prism/util/pm_constant_pool.h CHANGED Viewed

@@ -154,6 +154,17 @@ bool pm_constant_pool_init(pm_constant_pool_t *pool, uint32_t capacity);
  */
 pm_constant_t * pm_constant_pool_id_to_constant(const pm_constant_pool_t *pool, pm_constant_id_t constant_id);
+/**
+ * Find a constant in a constant pool. Returns the id of the constant, or 0 if
+ * the constant is not found.
+ *
+ * @param pool The pool to find the constant in.
+ * @param start A pointer to the start of the constant.
+ * @param length The length of the constant.
+ * @return The id of the constant.
+ */
+pm_constant_id_t pm_constant_pool_find(pm_constant_pool_t *pool, const uint8_t *start, size_t length);
 /**
  * Insert a constant into a constant pool that is a slice of a source string.
  * Returns the id of the constant, or 0 if any potential calls to resize fail.

data/include/prism/version.h CHANGED Viewed

@@ -14,7 +14,7 @@
 /**
  * The minor version of the Prism library as an int.
  */
-#define PRISM_VERSION_MINOR 20
+#define PRISM_VERSION_MINOR 21
 /**
  * The patch version of the Prism library as an int.
@@ -24,6 +24,6 @@
 /**
  * The version of the Prism library as a constant string.
  */
-#define PRISM_VERSION "0.20.0"
+#define PRISM_VERSION "0.21.0"
 #endif

data/lib/prism/serialize.rb CHANGED Viewed

@@ -27,7 +27,7 @@ module Prism
     # The minor version of prism that we are expecting to find in the serialized
     # strings.
-    MINOR_VERSION = 20
+    MINOR_VERSION = 21
     # The patch version of prism that we are expecting to find in the serialized
     # strings.

data/lib/prism/translation/parser/compiler.rb CHANGED Viewed

@@ -105,14 +105,18 @@ module Prism
         # { a: 1 }
         #   ^^^^
         def visit_assoc_node(node)
-          if node.value.is_a?(ImplicitNode)
-            builder.pair_label([node.key.slice.chomp(":"), srange(node.key.location)])
-          elsif in_pattern && node.value.nil?
-            if node.key.is_a?(SymbolNode)
-              builder.match_hash_var([node.key.unescaped, srange(node.key.location)])
+          if in_pattern
+            if node.value.is_a?(ImplicitNode)
+              if node.key.is_a?(SymbolNode)
+                builder.match_hash_var([node.key.unescaped, srange(node.key.location)])
+              else
+                builder.match_hash_var_from_str(token(node.key.opening_loc), visit_all(node.key.parts), token(node.key.closing_loc))
+              end
             else
-              builder.match_hash_var_from_str(token(node.key.opening_loc), visit_all(node.key.parts), token(node.key.closing_loc))
+              builder.pair_keyword([node.key.unescaped, srange(node.key.location)], visit(node.value))
             end
+          elsif node.value.is_a?(ImplicitNode)
+            builder.pair_label([node.key.unescaped, srange(node.key.location)])
           elsif node.operator_loc
             builder.pair(visit(node.key), token(node.operator_loc), visit(node.value))
           elsif node.key.is_a?(SymbolNode) && node.key.opening_loc.nil?
@@ -241,53 +245,51 @@ module Prism
             block = nil
           end
+          if node.call_operator_loc.nil?
+            case name
+            when :!
+              return visit_block(builder.not_op(token(node.message_loc), token(node.opening_loc), visit(node.receiver), token(node.closing_loc)), block)
+            when :[]
+              return visit_block(builder.index(visit(node.receiver), token(node.opening_loc), visit_all(arguments), token(node.closing_loc)), block)
+            when :[]=
+              if node.message != "[]=" && node.arguments && block.nil? && !node.safe_navigation?
+                return visit_block(
+                  builder.assign(
+                    builder.index_asgn(
+                      visit(node.receiver),
+                      token(node.opening_loc),
+                      visit_all(node.arguments.arguments[...-1]),
+                      token(node.closing_loc),
+                    ),
+                    srange_find(node.message_loc.end_offset, node.arguments.arguments.last.location.start_offset, ["="]),
+                    visit(node.arguments.arguments.last)
+                  ),
+                  block
+                )
+              end
+            end
+          end
+          message_loc = node.message_loc
+          call_operator_loc = node.call_operator_loc
+          call_operator = [{ "." => :dot, "&." => :anddot, "::" => "::" }.fetch(call_operator_loc.slice), srange(call_operator_loc)] if call_operator_loc
           visit_block(
-            if name == :!
-              builder.not_op(
-                token(node.message_loc),
-                token(node.opening_loc),
-                visit(node.receiver),
-                token(node.closing_loc)
+            if name.end_with?("=") && !message_loc.slice.end_with?("=") && node.arguments && block.nil?
+              builder.assign(
+                builder.attr_asgn(visit(node.receiver), call_operator, token(message_loc)),
+                srange_find(message_loc.end_offset, node.arguments.location.start_offset, ["="]),
+                visit(node.arguments.arguments.last)
               )
-            elsif name == :[]
-              builder.index(
+            else
+              builder.call_method(
                 visit(node.receiver),
+                call_operator,
+                message_loc ? [node.name, srange(message_loc)] : nil,
                 token(node.opening_loc),
                 visit_all(arguments),
                 token(node.closing_loc)
               )
-            elsif name == :[]= && node.message != "[]=" && node.arguments && block.nil?
-              builder.assign(
-                builder.index_asgn(
-                  visit(node.receiver),
-                  token(node.opening_loc),
-                  visit_all(node.arguments.arguments[...-1]),
-                  token(node.closing_loc),
-                ),
-                srange_find(node.message_loc.end_offset, node.arguments.arguments.last.location.start_offset, ["="]),
-                visit(node.arguments.arguments.last)
-              )
-            else
-              message_loc = node.message_loc
-              call_operator_loc = node.call_operator_loc
-              call_operator = [{ "." => :dot, "&." => :anddot, "::" => "::" }.fetch(call_operator_loc.slice), srange(call_operator_loc)] if call_operator_loc
-              if name.end_with?("=") && !message_loc.slice.end_with?("=") && node.arguments && block.nil?
-                builder.assign(
-                  builder.attr_asgn(visit(node.receiver), call_operator, token(message_loc)),
-                  srange_find(message_loc.end_offset, node.arguments.location.start_offset, ["="]),
-                  visit(node.arguments.arguments.last)
-                )
-              else
-                builder.call_method(
-                  visit(node.receiver),
-                  call_operator,
-                  message_loc ? [node.name, srange(message_loc)] : nil,
-                  token(node.opening_loc),
-                  visit_all(arguments),
-                  token(node.closing_loc)
-                )
-              end
             end,
             block
           )
@@ -519,8 +521,6 @@ module Prism
         # def self.foo; end
         # ^^^^^^^^^^^^^^^^^
         def visit_def_node(node)
-          forwarding = find_forwarding(node.parameters)
           if node.equal_loc
             if node.receiver
               builder.def_endless_singleton(
@@ -530,7 +530,7 @@ module Prism
                 token(node.name_loc),
                 builder.args(token(node.lparen_loc), visit(node.parameters) || [], token(node.rparen_loc), false),
                 token(node.equal_loc),
-                node.body&.accept(copy_compiler(forwarding: forwarding))
+                node.body&.accept(copy_compiler(forwarding: find_forwarding(node.parameters)))
               )
             else
               builder.def_endless_method(
@@ -538,7 +538,7 @@ module Prism
                 token(node.name_loc),
                 builder.args(token(node.lparen_loc), visit(node.parameters) || [], token(node.rparen_loc), false),
                 token(node.equal_loc),
-                node.body&.accept(copy_compiler(forwarding: forwarding))
+                node.body&.accept(copy_compiler(forwarding: find_forwarding(node.parameters)))
               )
             end
           elsif node.receiver
@@ -548,7 +548,7 @@ module Prism
               token(node.operator_loc),
               token(node.name_loc),
               builder.args(token(node.lparen_loc), visit(node.parameters) || [], token(node.rparen_loc), false),
-              node.body&.accept(copy_compiler(forwarding: forwarding)),
+              node.body&.accept(copy_compiler(forwarding: find_forwarding(node.parameters))),
               token(node.end_keyword_loc)
             )
           else
@@ -556,7 +556,7 @@ module Prism
               token(node.def_keyword_loc),
               token(node.name_loc),
               builder.args(token(node.lparen_loc), visit(node.parameters) || [], token(node.rparen_loc), false),
-              node.body&.accept(copy_compiler(forwarding: forwarding)),
+              node.body&.accept(copy_compiler(forwarding: find_forwarding(node.parameters))),
               token(node.end_keyword_loc)
             )
           end
@@ -614,9 +614,7 @@ module Prism
         # foo => [*, bar, *]
         #        ^^^^^^^^^^^
         def visit_find_pattern_node(node)
-          elements = [*node.requireds]
-          elements << node.rest if !node.rest.nil? && !node.rest.is_a?(ImplicitRestNode)
-          elements.concat(node.posts)
+          elements = [node.left, *node.requireds, node.right]
           if node.constant
             builder.const_pattern(visit(node.constant), token(node.opening_loc), builder.find_pattern(nil, visit_all(elements), nil), token(node.closing_loc))
@@ -993,24 +991,24 @@ module Prism
         # -> {}
         def visit_lambda_node(node)
+          parameters = node.parameters
           builder.block(
             builder.call_lambda(token(node.operator_loc)),
             [node.opening, srange(node.opening_loc)],
-            if node.parameters
-              if node.parameters.is_a?(NumberedParametersNode)
-                visit(node.parameters)
-              else
-                builder.args(
-                  token(node.parameters.opening_loc),
-                  visit(node.parameters),
-                  token(node.parameters.closing_loc),
-                  false
-                )
-              end
-            else
+            if parameters.nil?
               builder.args(nil, [], nil, false)
+            elsif node.parameters.is_a?(NumberedParametersNode)
+              visit(node.parameters)
+            else
+              builder.args(
+                token(node.parameters.opening_loc),
+                visit(node.parameters),
+                token(node.parameters.closing_loc),
+                false
+              )
             end,
-            node.body&.accept(copy_compiler(forwarding: find_forwarding(node.parameters&.parameters))),
+            node.body&.accept(copy_compiler(forwarding: parameters.is_a?(NumberedParametersNode) ? [] : find_forwarding(parameters&.parameters))),
             [node.closing, srange(node.closing_loc)]
           )
         end
@@ -1096,7 +1094,7 @@ module Prism
         # case of a syntax error. The parser gem doesn't have such a concept, so
         # we invent our own here.
         def visit_missing_node(node)
-          raise CompilationError, "Cannot compile missing nodes"
+          ::AST::Node.new(:missing, [], location: ::Parser::Source::Map.new(srange(node.location)))
         end
         # module Foo; end
@@ -1727,29 +1725,29 @@ module Prism
         # Visit a block node on a call.
         def visit_block(call, block)
           if block
+            parameters = block.parameters
             builder.block(
               call,
               token(block.opening_loc),
-              if (parameters = block.parameters)
-                if parameters.is_a?(NumberedParametersNode)
-                  visit(parameters)
-                else
-                  builder.args(
-                    token(parameters.opening_loc),
-                    if procarg0?(parameters.parameters)
-                      parameter = parameters.parameters.requireds.first
-                      [builder.procarg0(visit(parameter))].concat(visit_all(parameters.locals))
-                    else
-                      visit(parameters)
-                    end,
-                    token(parameters.closing_loc),
-                    false
-                  )
-                end
-              else
+              if parameters.nil?
                 builder.args(nil, [], nil, false)
+              elsif parameters.is_a?(NumberedParametersNode)
+                visit(parameters)
+              else
+                builder.args(
+                  token(parameters.opening_loc),
+                  if procarg0?(parameters.parameters)
+                    parameter = parameters.parameters.requireds.first
+                    [builder.procarg0(visit(parameter))].concat(visit_all(parameters.locals))
+                  else
+                    visit(parameters)
+                  end,
+                  token(parameters.closing_loc),
+                  false
+                )
               end,
-              block.body&.accept(copy_compiler(forwarding: find_forwarding(block.parameters&.parameters))),
+              block.body&.accept(copy_compiler(forwarding: parameters.is_a?(NumberedParametersNode) ? [] : find_forwarding(parameters&.parameters))),
               token(block.closing_loc)
             )
           else
@@ -1762,9 +1760,9 @@ module Prism
           children = []
           node.parts.each do |part|
             pushing =
-              if part.is_a?(StringNode) && part.unescaped.count("\n") > 1
-                unescaped = part.unescaped.split("\n")
-                escaped = part.content.split("\n")
+              if part.is_a?(StringNode) && part.unescaped.include?("\n")
+                unescaped = part.unescaped.lines(chomp: true)
+                escaped = part.content.lines(chomp: true)
                 escaped_lengths =
                   if node.opening.end_with?("'")
@@ -1779,7 +1777,6 @@ module Prism
                 unescaped.zip(escaped_lengths).map do |unescaped_line, escaped_length|
                   end_offset = start_offset + (escaped_length || 0)
                   inner_part = builder.string_internal(["#{unescaped_line}\n", srange_offsets(start_offset, end_offset)])
                   start_offset = end_offset
                   inner_part
                 end

data/lib/prism/translation/parser.rb CHANGED Viewed

@@ -26,7 +26,7 @@ module Prism
       Racc_debug_parser = false # :nodoc:
       def version # :nodoc:
-        33
+        34
       end
       # The default encoding for Ruby files is UTF-8.
@@ -42,9 +42,10 @@ module Prism
         @source_buffer = source_buffer
         source = source_buffer.source
-        result = unwrap(Prism.parse(source, filepath: source_buffer.name))
+        offset_cache = build_offset_cache(source)
+        result = unwrap(Prism.parse(source, filepath: source_buffer.name), offset_cache)
-        build_ast(result.value, build_offset_cache(source))
+        build_ast(result.value, offset_cache)
       ensure
         @source_buffer = nil
       end
@@ -55,7 +56,7 @@ module Prism
         source = source_buffer.source
         offset_cache = build_offset_cache(source)
-        result = unwrap(Prism.parse(source, filepath: source_buffer.name))
+        result = unwrap(Prism.parse(source, filepath: source_buffer.name), offset_cache)
         [
           build_ast(result.value, offset_cache),
@@ -72,7 +73,7 @@ module Prism
         source = source_buffer.source
         offset_cache = build_offset_cache(source)
-        result = unwrap(Prism.parse_lex(source, filepath: source_buffer.name))
+        result = unwrap(Prism.parse_lex(source, filepath: source_buffer.name), offset_cache)
         program, tokens = result.value
@@ -93,16 +94,23 @@ module Prism
       private
+      # This is a hook to allow consumers to disable some errors if they don't
+      # want them to block creating the syntax tree.
+      def valid_error?(error)
+        true
+      end
       # If there was a error generated during the parse, then raise an
       # appropriate syntax error. Otherwise return the result.
-      def unwrap(result)
-        return result if result.success?
+      def unwrap(result, offset_cache)
+        result.errors.each do |error|
+          next unless valid_error?(error)
-        error = result.errors.first
-        offset_cache = build_offset_cache(source_buffer.source)
+          location = build_range(error.location, offset_cache)
+          diagnostics.process(Diagnostic.new(error.message, location))
+        end
-        diagnostic = Diagnostic.new(error.message, build_range(error.location, offset_cache))
-        raise ::Parser::SyntaxError, diagnostic
+        result
       end
       # Prism deals with offsets in bytes, while the parser gem deals with

data/prism.gemspec CHANGED Viewed

@@ -2,7 +2,7 @@
 Gem::Specification.new do |spec|
   spec.name = "prism"
-  spec.version = "0.20.0"
+  spec.version = "0.21.0"
   spec.authors = ["Shopify"]
   spec.email = ["ruby@shopify.com"]

data/src/encoding.c CHANGED Viewed

@@ -2252,7 +2252,7 @@ static const uint8_t pm_utf_8_dfa[] = {
  */
 static pm_unicode_codepoint_t
 pm_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
-    assert(n >= 1);
+    assert(n >= 0);
     size_t maximum = (size_t) n;
     uint32_t codepoint;

data/src/prism.c CHANGED Viewed

@@ -870,6 +870,105 @@ pm_arguments_validate_block(pm_parser_t *parser, pm_arguments_t *arguments, pm_b
     pm_parser_err_node(parser, (pm_node_t *) block, PM_ERR_ARGUMENT_UNEXPECTED_BLOCK);
 }
+/******************************************************************************/
+/* Basic character checks                                                     */
+/******************************************************************************/
+/**
+ * This function is used extremely frequently to lex all of the identifiers in a
+ * source file, so it's important that it be as fast as possible. For this
+ * reason we have the encoding_changed boolean to check if we need to go through
+ * the function pointer or can just directly use the UTF-8 functions.
+ */
+static inline size_t
+char_is_identifier_start(const pm_parser_t *parser, const uint8_t *b) {
+    if (parser->encoding_changed) {
+        size_t width;
+        if ((width = parser->encoding->alpha_char(b, parser->end - b)) != 0) {
+            return width;
+        } else if (*b == '_') {
+            return 1;
+        } else if (*b >= 0x80) {
+            return parser->encoding->char_width(b, parser->end - b);
+        } else {
+            return 0;
+        }
+    } else if (*b < 0x80) {
+        return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
+    } else {
+        return pm_encoding_utf_8_char_width(b, parser->end - b);
+    }
+}
+/**
+ * Similar to char_is_identifier but this function assumes that the encoding
+ * has not been changed.
+ */
+static inline size_t
+char_is_identifier_utf8(const uint8_t *b, const uint8_t *end) {
+    if (*b < 0x80) {
+        return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
+    } else {
+        return pm_encoding_utf_8_char_width(b, end - b);
+    }
+}
+/**
+ * Like the above, this function is also used extremely frequently to lex all of
+ * the identifiers in a source file once the first character has been found. So
+ * it's important that it be as fast as possible.
+ */
+static inline size_t
+char_is_identifier(pm_parser_t *parser, const uint8_t *b) {
+    if (parser->encoding_changed) {
+        size_t width;
+        if ((width = parser->encoding->alnum_char(b, parser->end - b)) != 0) {
+            return width;
+        } else if (*b == '_') {
+            return 1;
+        } else if (*b >= 0x80) {
+            return parser->encoding->char_width(b, parser->end - b);
+        } else {
+            return 0;
+        }
+    }
+    return char_is_identifier_utf8(b, parser->end);
+}
+// Here we're defining a perfect hash for the characters that are allowed in
+// global names. This is used to quickly check the next character after a $ to
+// see if it's a valid character for a global name.
+#define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
+#define PUNCT(idx) ( \
+                BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
+                BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
+                BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
+                BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
+                BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
+                BIT('0', idx))
+const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = { PUNCT(0), PUNCT(1), PUNCT(2) };
+#undef BIT
+#undef PUNCT
+static inline bool
+char_is_global_name_punctuation(const uint8_t b) {
+    const unsigned int i = (const unsigned int) b;
+    if (i <= 0x20 || 0x7e < i) return false;
+    return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
+}
+static inline bool
+token_is_setter_name(pm_token_t *token) {
+    return (
+        (token->type == PM_TOKEN_IDENTIFIER) &&
+        (token->end - token->start >= 2) &&
+        (token->end[-1] == '=')
+    );
+}
 /******************************************************************************/
 /* Node flag handling functions                                               */
 /******************************************************************************/
@@ -1923,11 +2022,12 @@ pm_call_node_index_p(pm_call_node_t *node) {
  * operator assignment.
  */
 static inline bool
-pm_call_node_writable_p(pm_call_node_t *node) {
+pm_call_node_writable_p(const pm_parser_t *parser, const pm_call_node_t *node) {
     return (
         (node->message_loc.start != NULL) &&
         (node->message_loc.end[-1] != '!') &&
         (node->message_loc.end[-1] != '?') &&
+        char_is_identifier_start(parser, node->message_loc.start) &&
         (node->opening_loc.start == NULL) &&
         (node->arguments == NULL) &&
         (node->block == NULL)
@@ -2744,19 +2844,21 @@ pm_constant_write_node_create(pm_parser_t *parser, pm_constant_read_node_t *targ
  * Check if the receiver of a `def` node is allowed.
  */
 static void
-pm_check_def_receiver(pm_parser_t *parser, pm_node_t *receiver) {
-    switch (receiver->type) {
+pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
+    switch (PM_NODE_TYPE(node)) {
         case PM_BEGIN_NODE: {
-            pm_begin_node_t *begin_node = (pm_begin_node_t *)receiver;
-            pm_check_def_receiver(parser, (pm_node_t *) begin_node->statements);
+            const pm_begin_node_t *cast = (pm_begin_node_t *) node;
+            if (cast->statements != NULL) pm_def_node_receiver_check(parser, (pm_node_t *) cast->statements);
             break;
         }
-        case PM_PARENTHESES_NODE:
-            pm_check_def_receiver(parser, ((pm_parentheses_node_t *) receiver)->body);
+        case PM_PARENTHESES_NODE: {
+            const pm_parentheses_node_t *cast = (const pm_parentheses_node_t *) node;
+            if (cast->body != NULL) pm_def_node_receiver_check(parser, cast->body);
             break;
+        }
         case PM_STATEMENTS_NODE: {
-            pm_statements_node_t *statements_node = (pm_statements_node_t *)receiver;
-            pm_check_def_receiver(parser, statements_node->body.nodes[statements_node->body.size - 1]);
+            const pm_statements_node_t *cast = (const pm_statements_node_t *) node;
+            pm_def_node_receiver_check(parser, cast->body.nodes[cast->body.size - 1]);
             break;
         }
         case PM_ARRAY_NODE:
@@ -2775,7 +2877,10 @@ pm_check_def_receiver(pm_parser_t *parser, pm_node_t *receiver) {
         case PM_STRING_NODE:
         case PM_SYMBOL_NODE:
         case PM_X_STRING_NODE:
-            pm_parser_err_node(parser, receiver, PM_ERR_SINGLETON_FOR_LITERALS);
+            pm_parser_err_node(parser, node, PM_ERR_SINGLETON_FOR_LITERALS);
+            break;
+        default:
+            break;
     }
 }
@@ -2807,7 +2912,7 @@ pm_def_node_create(
     }
     if ((receiver != NULL) && PM_NODE_TYPE_P(receiver, PM_PARENTHESES_NODE)) {
-        pm_check_def_receiver(parser, receiver);
+        pm_def_node_receiver_check(parser, receiver);
     }
     *node = (pm_def_node_t) {
@@ -5330,7 +5435,7 @@ pm_source_file_node_create(pm_parser_t *parser, const pm_token_t *file_keyword)
             .flags = PM_NODE_FLAG_STATIC_LITERAL,
             .location = PM_LOCATION_TOKEN_VALUE(file_keyword),
         },
-        .filepath = parser->filepath_string,
+        .filepath = parser->filepath
     };
     return node;
@@ -6220,6 +6325,16 @@ pm_parser_local_add_owned(pm_parser_t *parser, const uint8_t *start, size_t leng
     return constant_id;
 }
+/**
+ * Add a local variable from a constant string to the current scope.
+ */
+static pm_constant_id_t
+pm_parser_local_add_constant(pm_parser_t *parser, const char *start, size_t length) {
+    pm_constant_id_t constant_id = pm_parser_constant_id_constant(parser, start, length);
+    if (constant_id != 0) pm_parser_local_add(parser, constant_id);
+    return constant_id;
+}
 /**
  * Add a parameter name to the current scope and check whether the name of the
  * parameter is unique or not.
@@ -6259,105 +6374,6 @@ pm_parser_scope_pop(pm_parser_t *parser) {
     free(scope);
 }
-/******************************************************************************/
-/* Basic character checks                                                     */
-/******************************************************************************/
-/**
- * This function is used extremely frequently to lex all of the identifiers in a
- * source file, so it's important that it be as fast as possible. For this
- * reason we have the encoding_changed boolean to check if we need to go through
- * the function pointer or can just directly use the UTF-8 functions.
- */
-static inline size_t
-char_is_identifier_start(pm_parser_t *parser, const uint8_t *b) {
-    if (parser->encoding_changed) {
-        size_t width;
-        if ((width = parser->encoding->alpha_char(b, parser->end - b)) != 0) {
-            return width;
-        } else if (*b == '_') {
-            return 1;
-        } else if (*b >= 0x80) {
-            return parser->encoding->char_width(b, parser->end - b);
-        } else {
-            return 0;
-        }
-    } else if (*b < 0x80) {
-        return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT ? 1 : 0) || (*b == '_');
-    } else {
-        return pm_encoding_utf_8_char_width(b, parser->end - b);
-    }
-}
-/**
- * Similar to char_is_identifier but this function assumes that the encoding
- * has not been changed.
- */
-static inline size_t
-char_is_identifier_utf8(const uint8_t *b, const uint8_t *end) {
-    if (*b < 0x80) {
-        return (*b == '_') || (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHANUMERIC_BIT ? 1 : 0);
-    } else {
-        return pm_encoding_utf_8_char_width(b, end - b);
-    }
-}
-/**
- * Like the above, this function is also used extremely frequently to lex all of
- * the identifiers in a source file once the first character has been found. So
- * it's important that it be as fast as possible.
- */
-static inline size_t
-char_is_identifier(pm_parser_t *parser, const uint8_t *b) {
-    if (parser->encoding_changed) {
-        size_t width;
-        if ((width = parser->encoding->alnum_char(b, parser->end - b)) != 0) {
-            return width;
-        } else if (*b == '_') {
-            return 1;
-        } else if (*b >= 0x80) {
-            return parser->encoding->char_width(b, parser->end - b);
-        } else {
-            return 0;
-        }
-    }
-    return char_is_identifier_utf8(b, parser->end);
-}
-// Here we're defining a perfect hash for the characters that are allowed in
-// global names. This is used to quickly check the next character after a $ to
-// see if it's a valid character for a global name.
-#define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
-#define PUNCT(idx) ( \
-                BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
-                BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
-                BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
-                BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
-                BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
-                BIT('0', idx))
-const unsigned int pm_global_name_punctuation_hash[(0x7e - 0x20 + 31) / 32] = { PUNCT(0), PUNCT(1), PUNCT(2) };
-#undef BIT
-#undef PUNCT
-static inline bool
-char_is_global_name_punctuation(const uint8_t b) {
-    const unsigned int i = (const unsigned int) b;
-    if (i <= 0x20 || 0x7e < i) return false;
-    return (pm_global_name_punctuation_hash[(i - 0x20) / 32] >> (i % 32)) & 1;
-}
-static inline bool
-token_is_setter_name(pm_token_t *token) {
-    return (
-        (token->type == PM_TOKEN_IDENTIFIER) &&
-        (token->end - token->start >= 2) &&
-        (token->end[-1] == '=')
-    );
-}
 /******************************************************************************/
 /* Stack helpers                                                              */
 /******************************************************************************/
@@ -7673,6 +7689,28 @@ escape_write_byte_encoded(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t byte
     pm_buffer_append_byte(buffer, byte);
 }
+/**
+ * Write each byte of the given escaped character into the buffer.
+ */
+static inline void
+escape_write_escape_encoded(pm_parser_t *parser, pm_buffer_t *buffer) {
+    size_t width;
+    if (parser->encoding_changed) {
+        width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
+    } else {
+        width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
+    }
+    // TODO: If the character is invalid in the given encoding, then we'll just
+    // push one byte into the buffer. This should actually be an error.
+    width = (width == 0) ? 1 : width;
+    for (size_t index = 0; index < width; index++) {
+        escape_write_byte_encoded(parser, buffer, *parser->current.end);
+        parser->current.end++;
+    }
+}
 /**
  * The regular expression engine doesn't support the same escape sequences as
  * Ruby does. So first we have to read the escape sequence, and then we have to
@@ -8011,7 +8049,7 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, uint8_t flags) {
         /* fallthrough */
         default: {
             if (parser->current.end < parser->end) {
-                escape_write_byte_encoded(parser, buffer, *parser->current.end++);
+                escape_write_escape_encoded(parser, buffer);
             }
             return;
         }
@@ -8288,10 +8326,40 @@ typedef struct {
  * Push the given byte into the token buffer.
  */
 static inline void
-pm_token_buffer_push(pm_token_buffer_t *token_buffer, uint8_t byte) {
+pm_token_buffer_push_byte(pm_token_buffer_t *token_buffer, uint8_t byte) {
     pm_buffer_append_byte(&token_buffer->buffer, byte);
 }
+/**
+ * Append the given bytes into the token buffer.
+ */
+static inline void
+pm_token_buffer_push_bytes(pm_token_buffer_t *token_buffer, const uint8_t *bytes, size_t length) {
+    pm_buffer_append_bytes(&token_buffer->buffer, bytes, length);
+}
+/**
+ * Push an escaped character into the token buffer.
+ */
+static inline void
+pm_token_buffer_push_escaped(pm_token_buffer_t *token_buffer, pm_parser_t *parser) {
+    // First, determine the width of the character to be escaped.
+    size_t width;
+    if (parser->encoding_changed) {
+        width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
+    } else {
+        width = pm_encoding_utf_8_char_width(parser->current.end, parser->end - parser->current.end);
+    }
+    // TODO: If the character is invalid in the given encoding, then we'll just
+    // push one byte into the buffer. This should actually be an error.
+    width = (width == 0 ? 1 : width);
+    // Now, push the bytes into the buffer.
+    pm_token_buffer_push_bytes(token_buffer, parser->current.end, width);
+    parser->current.end += width;
+}
 /**
  * When we're about to return from lexing the current token and we know for sure
  * that we have found an escape sequence, this function is called to copy the
@@ -9704,18 +9772,18 @@ parser_lex(pm_parser_t *parser) {
                         case '\t':
                         case '\v':
                         case '\\':
-                            pm_token_buffer_push(&token_buffer, peeked);
+                            pm_token_buffer_push_byte(&token_buffer, peeked);
                             parser->current.end++;
                             break;
                         case '\r':
                             parser->current.end++;
                             if (peek(parser) != '\n') {
-                                pm_token_buffer_push(&token_buffer, '\r');
+                                pm_token_buffer_push_byte(&token_buffer, '\r');
                                 break;
                             }
                         /* fallthrough */
                         case '\n':
-                            pm_token_buffer_push(&token_buffer, '\n');
+                            pm_token_buffer_push_byte(&token_buffer, '\n');
                             if (parser->heredoc_end) {
                                 // ... if we are on the same line as a heredoc,
@@ -9733,14 +9801,13 @@ parser_lex(pm_parser_t *parser) {
                             break;
                         default:
                             if (peeked == lex_mode->as.list.incrementor || peeked == lex_mode->as.list.terminator) {
-                                pm_token_buffer_push(&token_buffer, peeked);
+                                pm_token_buffer_push_byte(&token_buffer, peeked);
                                 parser->current.end++;
                             } else if (lex_mode->as.list.interpolation) {
                                 escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_NONE);
                             } else {
-                                pm_token_buffer_push(&token_buffer, '\\');
-                                pm_token_buffer_push(&token_buffer, peeked);
-                                parser->current.end++;
+                                pm_token_buffer_push_byte(&token_buffer, '\\');
+                                pm_token_buffer_push_escaped(&token_buffer, parser);
                             }
                             break;
@@ -9898,9 +9965,9 @@ parser_lex(pm_parser_t *parser) {
                             parser->current.end++;
                             if (peek(parser) != '\n') {
                                 if (lex_mode->as.regexp.terminator != '\r') {
-                                    pm_token_buffer_push(&token_buffer, '\\');
+                                    pm_token_buffer_push_byte(&token_buffer, '\\');
                                 }
-                                pm_token_buffer_push(&token_buffer, '\r');
+                                pm_token_buffer_push_byte(&token_buffer, '\r');
                                 break;
                             }
                         /* fallthrough */
@@ -9935,20 +10002,19 @@ parser_lex(pm_parser_t *parser) {
                                     case '$': case ')': case '*': case '+':
                                     case '.': case '>': case '?': case ']':
                                     case '^': case '|': case '}':
-                                        pm_token_buffer_push(&token_buffer, '\\');
+                                        pm_token_buffer_push_byte(&token_buffer, '\\');
                                         break;
                                     default:
                                         break;
                                 }
-                                pm_token_buffer_push(&token_buffer, peeked);
+                                pm_token_buffer_push_byte(&token_buffer, peeked);
                                 parser->current.end++;
                                 break;
                             }
-                            if (peeked < 0x80) pm_token_buffer_push(&token_buffer, '\\');
-                            pm_token_buffer_push(&token_buffer, peeked);
-                            parser->current.end++;
+                            if (peeked < 0x80) pm_token_buffer_push_byte(&token_buffer, '\\');
+                            pm_token_buffer_push_escaped(&token_buffer, parser);
                             break;
                     }
@@ -10115,23 +10181,23 @@ parser_lex(pm_parser_t *parser) {
                         switch (peeked) {
                             case '\\':
-                                pm_token_buffer_push(&token_buffer, '\\');
+                                pm_token_buffer_push_byte(&token_buffer, '\\');
                                 parser->current.end++;
                                 break;
                             case '\r':
                                 parser->current.end++;
                                 if (peek(parser) != '\n') {
                                     if (!lex_mode->as.string.interpolation) {
-                                        pm_token_buffer_push(&token_buffer, '\\');
+                                        pm_token_buffer_push_byte(&token_buffer, '\\');
                                     }
-                                    pm_token_buffer_push(&token_buffer, '\r');
+                                    pm_token_buffer_push_byte(&token_buffer, '\r');
                                     break;
                                 }
                             /* fallthrough */
                             case '\n':
                                 if (!lex_mode->as.string.interpolation) {
-                                    pm_token_buffer_push(&token_buffer, '\\');
-                                    pm_token_buffer_push(&token_buffer, '\n');
+                                    pm_token_buffer_push_byte(&token_buffer, '\\');
+                                    pm_token_buffer_push_byte(&token_buffer, '\n');
                                 }
                                 if (parser->heredoc_end) {
@@ -10150,17 +10216,16 @@ parser_lex(pm_parser_t *parser) {
                                 break;
                             default:
                                 if (lex_mode->as.string.incrementor != '\0' && peeked == lex_mode->as.string.incrementor) {
-                                    pm_token_buffer_push(&token_buffer, peeked);
+                                    pm_token_buffer_push_byte(&token_buffer, peeked);
                                     parser->current.end++;
                                 } else if (lex_mode->as.string.terminator != '\0' && peeked == lex_mode->as.string.terminator) {
-                                    pm_token_buffer_push(&token_buffer, peeked);
+                                    pm_token_buffer_push_byte(&token_buffer, peeked);
                                     parser->current.end++;
                                 } else if (lex_mode->as.string.interpolation) {
                                     escape_read(parser, &token_buffer.buffer, PM_ESCAPE_FLAG_NONE);
                                 } else {
-                                    pm_token_buffer_push(&token_buffer, '\\');
-                                    pm_token_buffer_push(&token_buffer, peeked);
-                                    parser->current.end++;
+                                    pm_token_buffer_push_byte(&token_buffer, '\\');
+                                    pm_token_buffer_push_escaped(&token_buffer, parser);
                                 }
                                 break;
@@ -10417,21 +10482,20 @@ parser_lex(pm_parser_t *parser) {
                                 case '\r':
                                     parser->current.end++;
                                     if (peek(parser) != '\n') {
-                                        pm_token_buffer_push(&token_buffer, '\\');
-                                        pm_token_buffer_push(&token_buffer, '\r');
+                                        pm_token_buffer_push_byte(&token_buffer, '\\');
+                                        pm_token_buffer_push_byte(&token_buffer, '\r');
                                         break;
                                     }
                                 /* fallthrough */
                                 case '\n':
-                                    pm_token_buffer_push(&token_buffer, '\\');
-                                    pm_token_buffer_push(&token_buffer, '\n');
+                                    pm_token_buffer_push_byte(&token_buffer, '\\');
+                                    pm_token_buffer_push_byte(&token_buffer, '\n');
                                     token_buffer.cursor = parser->current.end + 1;
                                     breakpoint = parser->current.end;
                                     continue;
                                 default:
-                                    parser->current.end++;
-                                    pm_token_buffer_push(&token_buffer, '\\');
-                                    pm_token_buffer_push(&token_buffer, peeked);
+                                    pm_token_buffer_push_byte(&token_buffer, '\\');
+                                    pm_token_buffer_push_escaped(&token_buffer, parser);
                                     break;
                             }
                         } else {
@@ -10439,7 +10503,7 @@ parser_lex(pm_parser_t *parser) {
                                 case '\r':
                                     parser->current.end++;
                                     if (peek(parser) != '\n') {
-                                        pm_token_buffer_push(&token_buffer, '\r');
+                                        pm_token_buffer_push_byte(&token_buffer, '\r');
                                         break;
                                     }
                                 /* fallthrough */
@@ -10715,14 +10779,6 @@ match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2,
     return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
 }
-/**
- * Returns true if the current token is any of the five given types.
- */
-static inline bool
-match5(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5) {
-    return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5);
-}
 /**
  * Returns true if the current token is any of the six given types.
  */
@@ -11359,7 +11415,7 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
             break;
         }
-        // If we have a terminator, then we will parse all consequtive terminators
+        // If we have a terminator, then we will parse all consecutive terminators
         // and then continue parsing the statements list.
         if (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
             // If we have a terminator, then we will continue parsing the statements
@@ -13149,6 +13205,15 @@ outer_scope_using_numbered_parameters_p(pm_parser_t *parser) {
     return false;
 }
+/**
+ * These are the names of the various numbered parameters. We have them here so
+ * that when we insert them into the constant pool we can use a constant string
+ * and not have to allocate.
+ */
+static const char * const pm_numbered_parameter_names[] = {
+    "_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9"
+};
 /**
  * Parse an identifier into either a local variable read. If the local variable
  * is not found, it returns NULL instead.
@@ -13171,12 +13236,10 @@ parse_variable(pm_parser_t *parser) {
             pm_parser_err_previous(parser, PM_ERR_NUMBERED_PARAMETER_OUTER_SCOPE);
         } else {
             // Indicate that this scope is using numbered params so that child
-            // scopes cannot.
-            uint8_t number = parser->previous.start[1];
-            // We subtract the value for the character '0' to get the actual
-            // integer value of the number (only _1 through _9 are valid)
-            uint8_t numbered_parameters = (uint8_t) (number - '0');
+            // scopes cannot. We subtract the value for the character '0' to get
+            // the actual integer value of the number (only _1 through _9 are
+            // valid).
+            uint8_t numbered_parameters = (uint8_t) (parser->previous.start[1] - '0');
             if (numbered_parameters > parser->current_scope->numbered_parameters) {
                 parser->current_scope->numbered_parameters = numbered_parameters;
                 pm_parser_numbered_parameters_set(parser, numbered_parameters);
@@ -13187,21 +13250,13 @@ parse_variable(pm_parser_t *parser) {
             // referencing _2 means that _1 must exist. Therefore here we
             // loop through all of the possibilities and add them into the
             // constant pool.
-            uint8_t current = '1';
-            uint8_t *value;
-            while (current < number) {
-                value = malloc(2);
-                value[0] = '_';
-                value[1] = current++;
-                pm_parser_local_add_owned(parser, value, 2);
+            for (uint8_t numbered_parameter = 1; numbered_parameter <= numbered_parameters - 1; numbered_parameter++) {
+                pm_parser_local_add_constant(parser, pm_numbered_parameter_names[numbered_parameter - 1], 2);
             }
-            // Now we can add the actual token that is being used. For
-            // this one we can add a shared version since it is directly
-            // referenced in the source.
-            pm_parser_local_add_token(parser, &parser->previous);
-            return pm_local_variable_read_node_create(parser, &parser->previous, 0);
+            // Finally we can create the local variable read node.
+            pm_constant_id_t name_id = pm_parser_local_add_constant(parser, pm_numbered_parameter_names[numbered_parameters - 1], 2);
+            return pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0);
         }
     }
@@ -14010,7 +14065,7 @@ parse_pattern(pm_parser_t *parser, bool top_pattern, pm_diagnostic_id_t diag_id)
         // Gather up all of the patterns into the list.
         while (accept1(parser, PM_TOKEN_COMMA)) {
             // Break early here in case we have a trailing comma.
-            if (match5(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON)) {
+            if (match6(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_TOKEN_EOF)) {
                 node = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
                 pm_node_list_append(&nodes, node);
                 break;
@@ -16927,7 +16982,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
                     }
                     // If this node cannot be writable, then we have an error.
-                    if (pm_call_node_writable_p(cast)) {
+                    if (pm_call_node_writable_p(parser, cast)) {
                         parse_write_name(parser, &cast->name);
                     } else {
                         pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
@@ -17038,7 +17093,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
                     }
                     // If this node cannot be writable, then we have an error.
-                    if (pm_call_node_writable_p(cast)) {
+                    if (pm_call_node_writable_p(parser, cast)) {
                         parse_write_name(parser, &cast->name);
                     } else {
                         pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
@@ -17159,7 +17214,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
                     }
                     // If this node cannot be writable, then we have an error.
-                    if (pm_call_node_writable_p(cast)) {
+                    if (pm_call_node_writable_p(parser, cast)) {
                         parse_write_name(parser, &cast->name);
                     } else {
                         pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_UNEXPECTED);
@@ -17751,7 +17806,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
         .encoding_changed_callback = NULL,
         .encoding_comment_start = source,
         .lex_callback = NULL,
-        .filepath_string = { 0 },
+        .filepath = { 0 },
         .constant_pool = { 0 },
         .newline_list = { 0 },
         .integer_base = 0,
@@ -17794,7 +17849,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
     // If options were provided to this parse, establish them here.
     if (options != NULL) {
         // filepath option
-        parser->filepath_string = options->filepath;
+        parser->filepath = options->filepath;
         // line option
         parser->start_line = options->line;
@@ -17896,7 +17951,7 @@ pm_magic_comment_list_free(pm_list_t *list) {
  */
 PRISM_EXPORTED_FUNCTION void
 pm_parser_free(pm_parser_t *parser) {
-    pm_string_free(&parser->filepath_string);
+    pm_string_free(&parser->filepath);
     pm_diagnostic_list_free(&parser->error_list);
     pm_diagnostic_list_free(&parser->warning_list);
     pm_comment_list_free(&parser->comment_list);
@@ -18060,7 +18115,9 @@ pm_parser_errors_format_sort(const pm_list_t *error_list, const pm_newline_list_
         // Now we're going to shift all of the errors after this one down one
         // index to make room for the new error.
-        memcpy(&errors[index + 1], &errors[index], sizeof(pm_error_t) * (error_list->size - index - 1));
+        if (index + 1 < error_list->size) {
+            memmove(&errors[index + 1], &errors[index], sizeof(pm_error_t) * (error_list->size - index - 1));
+        }
         // Finally, we'll insert the error into the array.
         uint32_t column_end;

data/src/util/pm_constant_pool.c CHANGED Viewed

@@ -181,6 +181,31 @@ pm_constant_pool_id_to_constant(const pm_constant_pool_t *pool, pm_constant_id_t
     return &pool->constants[constant_id - 1];
 }
+/**
+ * Find a constant in a constant pool. Returns the id of the constant, or 0 if
+ * the constant is not found.
+ */
+pm_constant_id_t
+pm_constant_pool_find(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
+    assert(is_power_of_two(pool->capacity));
+    const uint32_t mask = pool->capacity - 1;
+    uint32_t hash = pm_constant_pool_hash(start, length);
+    uint32_t index = hash & mask;
+    pm_constant_pool_bucket_t *bucket;
+    while (bucket = &pool->buckets[index], bucket->id != PM_CONSTANT_ID_UNSET) {
+        pm_constant_t *constant = &pool->constants[bucket->id - 1];
+        if ((constant->length == length) && memcmp(constant->start, start, length) == 0) {
+            return bucket->id;
+        }
+        index = (index + 1) & mask;
+    }
+    return PM_CONSTANT_ID_UNSET;
+}
 /**
  * Insert a constant into a constant pool and return its index in the pool.
  */

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: prism
 version: !ruby/object:Gem::Version
-  version: 0.20.0
+  version: 0.21.0
 platform: ruby
 authors:
 - Shopify
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2024-02-01 00:00:00.000000000 Z
+date: 2024-02-05 00:00:00.000000000 Z
 dependencies: []
 description:
 email: