RubyGems - prism - Versions diffs - 1.1.0 → 1.2.0 - Mend

prism 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +17 -1
data/config.yml +2 -1
data/ext/prism/extension.h +1 -1
data/include/prism/ast.h +1 -1
data/include/prism/diagnostic.h +1 -0
data/include/prism/parser.h +25 -12
data/include/prism/version.h +2 -2
data/lib/prism/node.rb +1 -1
data/lib/prism/parse_result.rb +140 -3
data/lib/prism/serialize.rb +13 -1
data/lib/prism/translation/parser.rb +3 -3
data/lib/prism/translation/ripper.rb +1 -5
data/lib/prism/translation/ruby_parser.rb +2 -2
data/prism.gemspec +1 -1
data/rbi/prism/node.rbi +5777 -1701
data/rbi/prism/parse_result.rbi +29 -0
data/rbi/prism.rbi +34 -34
data/sig/prism/node.rbs +1 -90
data/sig/prism/parse_result.rbs +20 -0
data/src/diagnostic.c +3 -1
data/src/prism.c +223 -115
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: ea89f88aef2ec51d2cfb5868cf873ab256393f5ba632381a3a53631c2506dbc3
-  data.tar.gz: 4da85d79e85d5cca843eb6e71cea07efc80a6e77608aa3027e1cd75c3c3b735a
+  metadata.gz: f16f842a06eec8141246c60a39f509a59817de34ab4be3a33502ada040ac1602
+  data.tar.gz: 38bd30b4ba63fe67892a0138be5b7b2c84fb6bd66e011871a07ee453be7b0aec
 SHA512:
-  metadata.gz: 1b7f92a58fa176b04aab230f49dcdc08f9b810575426402db6fc9eee0921ac5ddddf1f01f18bb9429a914955759e8d4cee5a70674e14eb0b0169018ce615780e
-  data.tar.gz: e4a5a6ba40bc7692c6c904f452dd8ff18881703c99d51969507619368748eec29936dc3f521e8967b6d728ea331f26f9f670b44daedde2106f649104aefe0c30
+  metadata.gz: 9877dc80270515e91c5357418a67721fa832c6de44d755047f576d69a3b09129d64da19e5ac767d184df74afe40982f56c88be3851ce7fc7c99c1d0bbf15ec77
+  data.tar.gz: 4578c2f1e2e934f763d6c55ae84dc076b0676ac0560c13364db1ade75425da76d64b7a4cf8fb07ea723568fbfc58c20fb8055e9dec364ac5da05765d26398d39

data/CHANGELOG.md CHANGED Viewed

@@ -6,6 +6,21 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a
 ## [Unreleased]
+## [1.2.0] - 2024-10-10
+### Added
+- Introduce `Prism::CodeUnitsCache`.
+### Changed
+- Properly handle lexing global variables that begin with `$-`.
+- Properly reject invalid multi writes within parentheses.
+- Fix unary `*` binding power.
+- Set `contains_keywords` flag for implicit `gets` calls when `-p` is used.
+- Properly reject invalid non-associative operator patterns.
+- Do not warn about unused variables declared on negative lines.
 ## [1.1.0] - 2024-10-02
 ### Added
@@ -591,7 +606,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a
 - 🎉 Initial release! 🎉
-[unreleased]: https://github.com/ruby/prism/compare/v1.1.0...HEAD
+[unreleased]: https://github.com/ruby/prism/compare/v1.2.0...HEAD
+[1.2.0]: https://github.com/ruby/prism/compare/v1.1.0...v1.2.0
 [1.1.0]: https://github.com/ruby/prism/compare/v1.0.0...v1.1.0
 [1.0.0]: https://github.com/ruby/prism/compare/v0.30.0...v1.0.0
 [0.30.0]: https://github.com/ruby/prism/compare/v0.29.0...v0.30.0

data/config.yml CHANGED Viewed

@@ -141,6 +141,7 @@ errors:
   - INSTANCE_VARIABLE_BARE
   - INVALID_BLOCK_EXIT
   - INVALID_CHARACTER
+  - INVALID_COMMA
   - INVALID_ENCODING_MAGIC_COMMENT
   - INVALID_ESCAPE_CHARACTER
   - INVALID_FLOAT_EXPONENT
@@ -3684,7 +3685,7 @@ nodes:
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
           end
-      `Foo, *splat, Bar` are in the `exceptions` field. `ex` is in the `exception` field.
+      `Foo, *splat, Bar` are in the `exceptions` field. `ex` is in the `reference` field.
   - name: RestParameterNode
     flags: ParameterFlags
     fields:

data/ext/prism/extension.h CHANGED Viewed

@@ -1,7 +1,7 @@
 #ifndef PRISM_EXT_NODE_H
 #define PRISM_EXT_NODE_H
-#define EXPECTED_PRISM_VERSION "1.1.0"
+#define EXPECTED_PRISM_VERSION "1.2.0"
 #include <ruby.h>
 #include <ruby/encoding.h>

data/include/prism/ast.h CHANGED Viewed

@@ -6490,7 +6490,7 @@ typedef struct pm_rescue_modifier_node {
  *     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  *     end
  *
- * `Foo, *splat, Bar` are in the `exceptions` field. `ex` is in the `exception` field.
+ * `Foo, *splat, Bar` are in the `exceptions` field. `ex` is in the `reference` field.
  *
  * Type: ::PM_RESCUE_NODE
  *

data/include/prism/diagnostic.h CHANGED Viewed

@@ -170,6 +170,7 @@ typedef enum {
     PM_ERR_INSTANCE_VARIABLE_BARE,
     PM_ERR_INVALID_BLOCK_EXIT,
     PM_ERR_INVALID_CHARACTER,
+    PM_ERR_INVALID_COMMA,
     PM_ERR_INVALID_ENCODING_MAGIC_COMMENT,
     PM_ERR_INVALID_ESCAPE_CHARACTER,
     PM_ERR_INVALID_FLOAT_EXPONENT,

data/include/prism/parser.h CHANGED Viewed

@@ -82,6 +82,23 @@ typedef enum {
     PM_HEREDOC_INDENT_TILDE,
 } pm_heredoc_indent_t;
+/**
+ * All of the information necessary to store to lexing a heredoc.
+ */
+typedef struct {
+    /** A pointer to the start of the heredoc identifier. */
+    const uint8_t *ident_start;
+    /** The length of the heredoc identifier. */
+    size_t ident_length;
+    /** The type of quote that the heredoc uses. */
+    pm_heredoc_quote_t quote;
+    /** The type of indentation that the heredoc uses. */
+    pm_heredoc_indent_t indent;
+} pm_heredoc_lex_mode_t;
 /**
  * When lexing Ruby source, the lexer has a small amount of state to tell which
  * kind of token it is currently lexing. For example, when we find the start of
@@ -210,17 +227,10 @@ typedef struct pm_lex_mode {
         } string;
         struct {
-            /** A pointer to the start of the heredoc identifier. */
-            const uint8_t *ident_start;
-            /** The length of the heredoc identifier. */
-            size_t ident_length;
-            /** The type of quote that the heredoc uses. */
-            pm_heredoc_quote_t quote;
-            /** The type of indentation that the heredoc uses. */
-            pm_heredoc_indent_t indent;
+            /**
+             * All of the data necessary to lex a heredoc.
+             */
+            pm_heredoc_lex_mode_t base;
             /**
              * This is the pointer to the character where lexing should resume
@@ -233,7 +243,7 @@ typedef struct pm_lex_mode {
              * line so that we know how much to dedent each line in the case of
              * a tilde heredoc.
              */
-            size_t common_whitespace;
+            size_t *common_whitespace;
             /** True if the previous token ended with a line continuation. */
             bool line_continuation;
@@ -382,6 +392,9 @@ typedef enum {
     /** a rescue statement within a module statement */
     PM_CONTEXT_MODULE_RESCUE,
+    /** a multiple target expression */
+    PM_CONTEXT_MULTI_TARGET,
     /** a parenthesized expression */
     PM_CONTEXT_PARENS,

data/include/prism/version.h CHANGED Viewed

@@ -14,7 +14,7 @@
 /**
  * The minor version of the Prism library as an int.
  */
-#define PRISM_VERSION_MINOR 1
+#define PRISM_VERSION_MINOR 2
 /**
  * The patch version of the Prism library as an int.
@@ -24,6 +24,6 @@
 /**
  * The version of the Prism library as a constant string.
  */
-#define PRISM_VERSION "1.1.0"
+#define PRISM_VERSION "1.2.0"
 #endif

data/lib/prism/node.rb CHANGED Viewed

@@ -14219,7 +14219,7 @@ module Prism
   #     ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
   #     end
   #
-  # `Foo, *splat, Bar` are in the `exceptions` field. `ex` is in the `exception` field.
+  # `Foo, *splat, Bar` are in the `exceptions` field. `ex` is in the `reference` field.
   class RescueNode < Node
     # Initialize a new RescueNode node.
     def initialize(source, node_id, location, flags, keyword_loc, exceptions, operator_loc, reference, statements, subsequent)

data/lib/prism/parse_result.rb CHANGED Viewed

@@ -12,6 +12,21 @@ module Prism
     def self.for(source, start_line = 1, offsets = [])
       if source.ascii_only?
         ASCIISource.new(source, start_line, offsets)
+      elsif source.encoding == Encoding::BINARY
+        source.force_encoding(Encoding::UTF_8)
+        if source.valid_encoding?
+          new(source, start_line, offsets)
+        else
+          # This is an extremely niche use case where the file is marked as
+          # binary, contains multi-byte characters, and those characters are not
+          # valid UTF-8. In this case we'll mark it as binary and fall back to
+          # treating everything as a single-byte character. This _may_ cause
+          # problems when asking for code units, but it appears to be the
+          # cleanest solution at the moment.
+          source.force_encoding(Encoding::BINARY)
+          ASCIISource.new(source, start_line, offsets)
+        end
       else
         new(source, start_line, offsets)
       end
@@ -89,8 +104,14 @@ module Prism
     # This method is tested with UTF-8, UTF-16, and UTF-32. If there is the
     # concept of code units that differs from the number of characters in other
     # encodings, it is not captured here.
+    #
+    # We purposefully replace invalid and undefined characters with replacement
+    # characters in this conversion. This happens for two reasons. First, it's
+    # possible that the given byte offset will not occur on a character
+    # boundary. Second, it's possible that the source code will contain a
+    # character that has no equivalent in the given encoding.
     def code_units_offset(byte_offset, encoding)
-      byteslice = (source.byteslice(0, byte_offset) or raise).encode(encoding)
+      byteslice = (source.byteslice(0, byte_offset) or raise).encode(encoding, invalid: :replace, undef: :replace)
       if encoding == Encoding::UTF_16LE || encoding == Encoding::UTF_16BE
         byteslice.bytesize / 2
@@ -99,6 +120,12 @@ module Prism
       end
     end
+    # Generate a cache that targets a specific encoding for calculating code
+    # unit offsets.
+    def code_units_cache(encoding)
+      CodeUnitsCache.new(source, encoding)
+    end
     # Returns the column number in code units for the given encoding for the
     # given byte offset.
     def code_units_column(byte_offset, encoding)
@@ -128,10 +155,84 @@ module Prism
     end
   end
+  # A cache that can be used to quickly compute code unit offsets from byte
+  # offsets. It purposefully provides only a single #[] method to access the
+  # cache in order to minimize surface area.
+  #
+  # Note that there are some known issues here that may or may not be addressed
+  # in the future:
+  #
+  # * The first is that there are issues when the cache computes values that are
+  #   not on character boundaries. This can result in subsequent computations
+  #   being off by one or more code units.
+  # * The second is that this cache is currently unbounded. In theory we could
+  #   introduce some kind of LRU cache to limit the number of entries, but this
+  #   has not yet been implemented.
+  #
+  class CodeUnitsCache
+    class UTF16Counter # :nodoc:
+      def initialize(source, encoding)
+        @source = source
+        @encoding = encoding
+      end
+      def count(byte_offset, byte_length)
+        @source.byteslice(byte_offset, byte_length).encode(@encoding, invalid: :replace, undef: :replace).bytesize / 2
+      end
+    end
+    class LengthCounter # :nodoc:
+      def initialize(source, encoding)
+        @source = source
+        @encoding = encoding
+      end
+      def count(byte_offset, byte_length)
+        @source.byteslice(byte_offset, byte_length).encode(@encoding, invalid: :replace, undef: :replace).length
+      end
+    end
+    private_constant :UTF16Counter, :LengthCounter
+    # Initialize a new cache with the given source and encoding.
+    def initialize(source, encoding)
+      @source = source
+      @counter =
+        if encoding == Encoding::UTF_16LE || encoding == Encoding::UTF_16BE
+          UTF16Counter.new(source, encoding)
+        else
+          LengthCounter.new(source, encoding)
+        end
+      @cache = {}
+      @offsets = []
+    end
+    # Retrieve the code units offset from the given byte offset.
+    def [](byte_offset)
+      @cache[byte_offset] ||=
+        if (index = @offsets.bsearch_index { |offset| offset > byte_offset }).nil?
+          @offsets << byte_offset
+          @counter.count(0, byte_offset)
+        elsif index == 0
+          @offsets.unshift(byte_offset)
+          @counter.count(0, byte_offset)
+        else
+          @offsets.insert(index, byte_offset)
+          offset = @offsets[index - 1]
+          @cache[offset] + @counter.count(offset, byte_offset - offset)
+        end
+    end
+  end
   # Specialized version of Prism::Source for source code that includes ASCII
   # characters only. This class is used to apply performance optimizations that
-  # cannot be applied to sources that include multibyte characters. Sources that
-  # include multibyte characters are represented by the Prism::Source class.
+  # cannot be applied to sources that include multibyte characters.
+  #
+  # In the extremely rare case that a source includes multi-byte characters but
+  # is marked as binary because of a magic encoding comment and it cannot be
+  # eagerly converted to UTF-8, this class will be used as well. This is because
+  # at that point we will treat everything as single-byte characters.
   class ASCIISource < Source
     # Return the character offset for the given byte offset.
     def character_offset(byte_offset)
@@ -153,6 +254,13 @@ module Prism
       byte_offset
     end
+    # Returns a cache that is the identity function in order to maintain the
+    # same interface. We can do this because code units are always equivalent to
+    # byte offsets for ASCII-only sources.
+    def code_units_cache(encoding)
+      ->(byte_offset) { byte_offset }
+    end
     # Specialized version of `code_units_column` that does not depend on
     # `code_units_offset`, which is a more expensive operation. This is
     # essentially the same as `Prism::Source#column`.
@@ -262,6 +370,12 @@ module Prism
       source.code_units_offset(start_offset, encoding)
     end
+    # The start offset from the start of the file in code units using the given
+    # cache to fetch or calculate the value.
+    def cached_start_code_units_offset(cache)
+      cache[start_offset]
+    end
     # The byte offset from the beginning of the source where this location ends.
     def end_offset
       start_offset + length
@@ -278,6 +392,12 @@ module Prism
       source.code_units_offset(end_offset, encoding)
     end
+    # The end offset from the start of the file in code units using the given
+    # cache to fetch or calculate the value.
+    def cached_end_code_units_offset(cache)
+      cache[end_offset]
+    end
     # The line number where this location starts.
     def start_line
       source.line(start_offset)
@@ -312,6 +432,12 @@ module Prism
       source.code_units_column(start_offset, encoding)
     end
+    # The start column in code units using the given cache to fetch or calculate
+    # the value.
+    def cached_start_code_units_column(cache)
+      cache[start_offset] - cache[source.line_start(start_offset)]
+    end
     # The column number in bytes where this location ends from the start of the
     # line.
     def end_column
@@ -330,6 +456,12 @@ module Prism
       source.code_units_column(end_offset, encoding)
     end
+    # The end column in code units using the given cache to fetch or calculate
+    # the value.
+    def cached_end_code_units_column(cache)
+      cache[end_offset] - cache[source.line_start(end_offset)]
+    end
     # Implement the hash pattern matching interface for Location.
     def deconstruct_keys(keys)
       { start_offset: start_offset, end_offset: end_offset }
@@ -579,6 +711,11 @@ module Prism
     def failure?
       !success?
     end
+    # Create a code units cache for the given encoding.
+    def code_units_cache(encoding)
+      source.code_units_cache(encoding)
+    end
   end
   # This is a result specific to the `parse` and `parse_file` methods.

data/lib/prism/serialize.rb CHANGED Viewed

@@ -18,7 +18,7 @@ module Prism
     # The minor version of prism that we are expecting to find in the serialized
     # strings.
-    MINOR_VERSION = 1
+    MINOR_VERSION = 2
     # The patch version of prism that we are expecting to find in the serialized
     # strings.
@@ -28,10 +28,21 @@ module Prism
     def self.load(input, serialized)
       input = input.dup
       source = Source.for(input)
       loader = Loader.new(source, serialized)
       result = loader.load_result
       input.force_encoding(loader.encoding)
+      # This is an extremely niche use-case where the file was marked as binary
+      # but it contained UTF-8-encoded characters. In that case we will actually
+      # put it back to UTF-8 to give the location APIs the best chance of being
+      # correct.
+      if !input.ascii_only? && input.encoding == Encoding::BINARY
+        input.force_encoding(Encoding::UTF_8)
+        input.force_encoding(Encoding::BINARY) unless input.valid_encoding?
+      end
       result
     end
@@ -267,6 +278,7 @@ module Prism
         :instance_variable_bare,
         :invalid_block_exit,
         :invalid_character,
+        :invalid_comma,
         :invalid_encoding_magic_comment,
         :invalid_escape_character,
         :invalid_float_exponent,

data/lib/prism/translation/parser.rb CHANGED Viewed

@@ -51,7 +51,7 @@ module Prism
         source = source_buffer.source
         offset_cache = build_offset_cache(source)
-        result = unwrap(Prism.parse(source, filepath: source_buffer.name, version: convert_for_prism(version), scopes: [[]], encoding: false), offset_cache)
+        result = unwrap(Prism.parse(source, filepath: source_buffer.name, version: convert_for_prism(version), partial_script: true, encoding: false), offset_cache)
         build_ast(result.value, offset_cache)
       ensure
@@ -64,7 +64,7 @@ module Prism
         source = source_buffer.source
         offset_cache = build_offset_cache(source)
-        result = unwrap(Prism.parse(source, filepath: source_buffer.name, version: convert_for_prism(version), scopes: [[]], encoding: false), offset_cache)
+        result = unwrap(Prism.parse(source, filepath: source_buffer.name, version: convert_for_prism(version), partial_script: true, encoding: false), offset_cache)
         [
           build_ast(result.value, offset_cache),
@@ -83,7 +83,7 @@ module Prism
         offset_cache = build_offset_cache(source)
         result =
           begin
-            unwrap(Prism.parse_lex(source, filepath: source_buffer.name, version: convert_for_prism(version), scopes: [[]], encoding: false), offset_cache)
+            unwrap(Prism.parse_lex(source, filepath: source_buffer.name, version: convert_for_prism(version), partial_script: true, encoding: false), offset_cache)
           rescue ::Parser::SyntaxError
             raise if !recover
           end

data/lib/prism/translation/ripper.rb CHANGED Viewed

@@ -3269,11 +3269,7 @@ module Prism
       # Lazily initialize the parse result.
       def result
-        @result ||=
-          begin
-            scopes = RUBY_VERSION >= "3.3.0" ? [] : [[]]
-            Prism.parse(source, scopes: scopes)
-          end
+        @result ||= Prism.parse(source, partial_script: true)
       end
       ##########################################################################

data/lib/prism/translation/ruby_parser.rb CHANGED Viewed

@@ -1596,13 +1596,13 @@ module Prism
       # Parse the given source and translate it into the seattlerb/ruby_parser
       # gem's Sexp format.
       def parse(source, filepath = "(string)")
-        translate(Prism.parse(source, filepath: filepath, scopes: [[]]), filepath)
+        translate(Prism.parse(source, filepath: filepath, partial_script: true), filepath)
       end
       # Parse the given file and translate it into the seattlerb/ruby_parser
       # gem's Sexp format.
       def parse_file(filepath)
-        translate(Prism.parse_file(filepath, scopes: [[]]), filepath)
+        translate(Prism.parse_file(filepath, partial_script: true), filepath)
       end
       class << self

data/prism.gemspec CHANGED Viewed

@@ -2,7 +2,7 @@
 Gem::Specification.new do |spec|
   spec.name = "prism"
-  spec.version = "1.1.0"
+  spec.version = "1.2.0"
   spec.authors = ["Shopify"]
   spec.email = ["ruby@shopify.com"]