RubyGems - prism - Versions diffs - 1.4.0 → 1.7.0 - Mend

prism 1.4.0 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +73 -1
data/Makefile +7 -5
data/README.md +3 -1
data/config.yml +294 -41
data/docs/build_system.md +2 -2
data/docs/cruby_compilation.md +1 -1
data/docs/design.md +2 -2
data/docs/parser_translation.md +8 -23
data/docs/releasing.md +6 -25
data/docs/ripper_translation.md +1 -1
data/ext/prism/api_node.c +9 -3
data/ext/prism/extconf.rb +1 -1
data/ext/prism/extension.c +24 -3
data/ext/prism/extension.h +1 -1
data/include/prism/ast.h +360 -70
data/include/prism/diagnostic.h +7 -0
data/include/prism/options.h +49 -3
data/include/prism/parser.h +3 -0
data/include/prism/regexp.h +2 -2
data/include/prism/util/pm_buffer.h +8 -0
data/include/prism/util/pm_integer.h +4 -0
data/include/prism/util/pm_list.h +6 -0
data/include/prism/util/pm_string.h +12 -2
data/include/prism/version.h +2 -2
data/include/prism.h +40 -15
data/lib/prism/compiler.rb +456 -151
data/lib/prism/desugar_compiler.rb +1 -0
data/lib/prism/dispatcher.rb +16 -0
data/lib/prism/dot_visitor.rb +10 -1
data/lib/prism/dsl.rb +5 -2
data/lib/prism/ffi.rb +28 -10
data/lib/prism/inspect_visitor.rb +4 -0
data/lib/prism/lex_compat.rb +1 -0
data/lib/prism/mutation_compiler.rb +3 -0
data/lib/prism/node.rb +559 -349
data/lib/prism/node_ext.rb +4 -1
data/lib/prism/pack.rb +2 -0
data/lib/prism/parse_result/comments.rb +1 -0
data/lib/prism/parse_result/errors.rb +1 -0
data/lib/prism/parse_result/newlines.rb +1 -0
data/lib/prism/parse_result.rb +3 -15
data/lib/prism/pattern.rb +1 -0
data/lib/prism/polyfill/scan_byte.rb +14 -0
data/lib/prism/polyfill/warn.rb +36 -0
data/lib/prism/reflection.rb +4 -1
data/lib/prism/relocation.rb +1 -0
data/lib/prism/serialize.rb +30 -22
data/lib/prism/string_query.rb +1 -0
data/lib/prism/translation/parser/builder.rb +1 -0
data/lib/prism/translation/parser/compiler.rb +63 -41
data/lib/prism/translation/parser/lexer.rb +29 -21
data/lib/prism/translation/parser.rb +25 -4
data/lib/prism/translation/parser33.rb +1 -0
data/lib/prism/translation/parser34.rb +1 -0
data/lib/prism/translation/parser35.rb +2 -6
data/lib/prism/translation/parser40.rb +13 -0
data/lib/prism/translation/parser41.rb +13 -0
data/lib/prism/translation/parser_current.rb +26 -0
data/lib/prism/translation/ripper/sexp.rb +1 -0
data/lib/prism/translation/ripper.rb +19 -3
data/lib/prism/translation/ruby_parser.rb +340 -22
data/lib/prism/translation.rb +4 -0
data/lib/prism/visitor.rb +457 -152
data/lib/prism.rb +22 -0
data/prism.gemspec +9 -1
data/rbi/prism/dsl.rbi +6 -6
data/rbi/prism/node.rbi +42 -17
data/rbi/prism/translation/parser35.rbi +0 -2
data/rbi/prism/translation/parser40.rbi +6 -0
data/rbi/prism/translation/parser41.rbi +6 -0
data/sig/prism/dispatcher.rbs +3 -0
data/sig/prism/dsl.rbs +5 -5
data/sig/prism/node.rbs +462 -38
data/sig/prism/node_ext.rbs +84 -17
data/sig/prism/parse_result/comments.rbs +38 -0
data/sig/prism/parse_result.rbs +4 -0
data/sig/prism/reflection.rbs +1 -1
data/sig/prism.rbs +4 -0
data/src/diagnostic.c +13 -1
data/src/encoding.c +172 -67
data/src/node.c +11 -0
data/src/options.c +17 -7
data/src/prettyprint.c +18 -0
data/src/prism.c +1495 -2021
data/src/serialize.c +9 -1
data/src/token_type.c +38 -36
data/src/util/pm_constant_pool.c +1 -1
data/src/util/pm_string.c +6 -8
metadata +11 -3

data/lib/prism/translation/parser/lexer.rb CHANGED Viewed

@@ -1,7 +1,9 @@
 # frozen_string_literal: true
+# :markup: markdown
 require "strscan"
 require_relative "../../polyfill/append_as_bytes"
+require_relative "../../polyfill/scan_byte"
 module Prism
   module Translation
@@ -200,8 +202,8 @@ module Prism
         # The `PARENTHESIS_LEFT` token in Prism is classified as either `tLPAREN` or `tLPAREN2` in the Parser gem.
         # The following token types are listed as those classified as `tLPAREN`.
         LPAREN_CONVERSION_TOKEN_TYPES = Set.new([
-          :kBREAK, :kCASE, :tDIVIDE, :kFOR, :kIF, :kNEXT, :kRETURN, :kUNTIL, :kWHILE, :tAMPER, :tANDOP, :tBANG, :tCOMMA, :tDOT2, :tDOT3,
-          :tEQL, :tLPAREN, :tLPAREN2, :tLPAREN_ARG, :tLSHFT, :tNL, :tOP_ASGN, :tOROP, :tPIPE, :tSEMI, :tSTRING_DBEG, :tUMINUS, :tUPLUS
+          :kBREAK, :tCARET, :kCASE, :tDIVIDE, :kFOR, :kIF, :kNEXT, :kRETURN, :kUNTIL, :kWHILE, :tAMPER, :tANDOP, :tBANG, :tCOMMA, :tDOT2, :tDOT3,
+          :tEQL, :tLPAREN, :tLPAREN2, :tLPAREN_ARG, :tLSHFT, :tNL, :tOP_ASGN, :tOROP, :tPIPE, :tSEMI, :tSTRING_DBEG, :tUMINUS, :tUPLUS, :tLCURLY
         ])
         # Types of tokens that are allowed to continue a method call with comments in-between.
@@ -275,20 +277,20 @@ module Prism
             when :tCOMMENT
               if token.type == :EMBDOC_BEGIN
-                while !((next_token = lexed[index][0]) && next_token.type == :EMBDOC_END) && (index < length - 1)
+                while !((next_token = lexed[index]&.first) && next_token.type == :EMBDOC_END) && (index < length - 1)
                   value += next_token.value
                   index += 1
                 end
                 value += next_token.value
-                location = range(token.location.start_offset, lexed[index][0].location.end_offset)
+                location = range(token.location.start_offset, next_token.location.end_offset)
                 index += 1
               else
                 is_at_eol = value.chomp!.nil?
                 location = range(token.location.start_offset, token.location.end_offset + (is_at_eol ? 0 : -1))
-                prev_token = lexed[index - 2][0] if index - 2 >= 0
-                next_token = lexed[index][0]
+                prev_token, _ = lexed[index - 2] if index - 2 >= 0
+                next_token, _ = lexed[index]
                 is_inline_comment = prev_token&.location&.start_line == token.location.start_line
                 if is_inline_comment && !is_at_eol && !COMMENT_CONTINUATION_TYPES.include?(next_token&.type)
@@ -307,7 +309,7 @@ module Prism
                 end
               end
             when :tNL
-              next_token = next_token = lexed[index][0]
+              next_token, _ = lexed[index]
               # Newlines after comments are emitted out of order.
               if next_token&.type == :COMMENT
                 comment_newline_location = location
@@ -344,8 +346,8 @@ module Prism
               location = range(token.location.start_offset, token.location.start_offset + percent_array_leading_whitespace(value))
               value = nil
             when :tSTRING_BEG
-              next_token = lexed[index][0]
-              next_next_token = lexed[index + 1][0]
+              next_token, _ = lexed[index]
+              next_next_token, _ = lexed[index + 1]
               basic_quotes = value == '"' || value == "'"
               if basic_quotes && next_token&.type == :STRING_END
@@ -413,7 +415,8 @@ module Prism
                 while token.type == :STRING_CONTENT
                   current_length += token.value.bytesize
                   # Heredoc interpolation can have multiple STRING_CONTENT nodes on the same line.
-                  is_first_token_on_line = lexed[index - 1] && token.location.start_line != lexed[index - 2][0].location&.start_line
+                  prev_token, _ = lexed[index - 2] if index - 2 >= 0
+                  is_first_token_on_line = prev_token && token.location.start_line != prev_token.location.start_line
                   # The parser gem only removes indentation when the heredoc is not nested
                   not_nested = heredoc_stack.size == 1
                   if is_percent_array
@@ -423,11 +426,16 @@ module Prism
                   end
                   current_string << unescape_string(value, quote_stack.last)
-                  if (backslash_count = token.value[/(\\{1,})\n/, 1]&.length).nil? || backslash_count.even? || !interpolation?(quote_stack.last)
+                  relevant_backslash_count = if quote_stack.last.start_with?("%W", "%I")
+                                               0 # the last backslash escapes the newline
+                                             else
+                                               token.value[/(\\{1,})\n/, 1]&.length || 0
+                                             end
+                  if relevant_backslash_count.even? || !interpolation?(quote_stack.last)
                     tokens << [:tSTRING_CONTENT, [current_string, range(start_offset, start_offset + current_length)]]
                     break
                   end
-                  token = lexed[index][0]
+                  token, _ = lexed[index]
                   index += 1
                 end
               else
@@ -482,7 +490,7 @@ module Prism
               end
               if percent_array?(quote_stack.pop)
-                prev_token = lexed[index - 2][0] if index - 2 >= 0
+                prev_token, _ = lexed[index - 2] if index - 2 >= 0
                 empty = %i[PERCENT_LOWER_I PERCENT_LOWER_W PERCENT_UPPER_I PERCENT_UPPER_W].include?(prev_token&.type)
                 ends_with_whitespace = prev_token&.type == :WORDS_SEP
                 # parser always emits a space token after content in a percent array, even if no actual whitespace is present.
@@ -491,7 +499,7 @@ module Prism
                 end
               end
             when :tSYMBEG
-              if (next_token = lexed[index][0]) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR && next_token.type != :STRING_END
+              if (next_token = lexed[index]&.first) && next_token.type != :STRING_CONTENT && next_token.type != :EMBEXPR_BEGIN && next_token.type != :EMBVAR && next_token.type != :STRING_END
                 next_location = token.location.join(next_token.location)
                 type = :tSYMBOL
                 value = next_token.value
@@ -506,13 +514,13 @@ module Prism
                 type = :tIDENTIFIER
               end
             when :tXSTRING_BEG
-              if (next_token = lexed[index][0]) && !%i[STRING_CONTENT STRING_END EMBEXPR_BEGIN].include?(next_token.type)
+              if (next_token = lexed[index]&.first) && !%i[STRING_CONTENT STRING_END EMBEXPR_BEGIN].include?(next_token.type)
                 # self.`()
                 type = :tBACK_REF2
               end
               quote_stack.push(value)
             when :tSYMBOLS_BEG, :tQSYMBOLS_BEG, :tWORDS_BEG, :tQWORDS_BEG
-              if (next_token = lexed[index][0]) && next_token.type == :WORDS_SEP
+              if (next_token = lexed[index]&.first) && next_token.type == :WORDS_SEP
                 index += 1
               end
@@ -588,9 +596,9 @@ module Prism
           previous_line = -1
           result = Float::MAX
-          while (lexed[next_token_index] && next_token = lexed[next_token_index][0])
+          while (next_token = lexed[next_token_index]&.first)
             next_token_index += 1
-            next_next_token = lexed[next_token_index] && lexed[next_token_index][0]
+            next_next_token, _ = lexed[next_token_index]
             first_token_on_line = next_token.location.start_column == 0
             # String content inside nested heredocs and interpolation is ignored
@@ -761,12 +769,12 @@ module Prism
           elsif (value = scanner.scan(/M-\\?(?=[[:print:]])/))
             # \M-x where x is an ASCII printable character
             escape_read(result, scanner, control, true)
-          elsif (byte = scanner.get_byte)
+          elsif (byte = scanner.scan_byte)
             # Something else after an escape.
-            if control && byte == "?"
+            if control && byte == 0x3f # ASCII '?'
               result.append_as_bytes(escape_build(0x7f, false, meta))
             else
-              result.append_as_bytes(escape_build(byte.ord, control, meta))
+              result.append_as_bytes(escape_build(byte, control, meta))
             end
           end
         end

data/lib/prism/translation/parser.rb CHANGED Viewed

@@ -1,9 +1,15 @@
 # frozen_string_literal: true
+# :markup: markdown
 begin
+  required_version = ">= 3.3.7.2"
+  gem "parser", required_version
   require "parser"
 rescue LoadError
-  warn(%q{Error: Unable to load parser. Add `gem "parser"` to your Gemfile.})
+  warn(<<~MSG)
+    Error: Unable to load parser #{required_version}. \
+    Add `gem "parser"` to your Gemfile or run `bundle update parser`.
+  MSG
   exit(1)
 end
@@ -13,6 +19,13 @@ module Prism
     # whitequark/parser gem's syntax tree. It inherits from the base parser for
     # the parser gem, and overrides the parse* methods to parse with prism and
     # then translate.
+    #
+    # Note that this version of the parser always parses using the latest
+    # version of Ruby syntax supported by Prism. If you want specific version
+    # support, use one of the version-specific subclasses, such as
+    # `Prism::Translation::Parser34`. If you want to parse using the same
+    # version of Ruby syntax as the currently running version of Ruby, use
+    # `Prism::Translation::ParserCurrent`.
     class Parser < ::Parser::Base
       Diagnostic = ::Parser::Diagnostic # :nodoc:
       private_constant :Diagnostic
@@ -59,13 +72,19 @@ module Prism
       # should be implemented as needed.
       #
       def initialize(builder = Prism::Translation::Parser::Builder.new, parser: Prism)
+        if !builder.is_a?(Prism::Translation::Parser::Builder)
+          warn(<<~MSG, uplevel: 1, category: :deprecated)
+            [deprecation]: The builder passed to `Prism::Translation::Parser.new` is not a \
+            `Prism::Translation::Parser::Builder` subclass. This will raise in the next major version.
+          MSG
+        end
         @parser = parser
         super(builder)
       end
       def version # :nodoc:
-        34
+        41
       end
       # The default encoding for Ruby files is UTF-8.
@@ -337,8 +356,10 @@ module Prism
           "3.3.1"
         when 34
           "3.4.0"
-        when 35
-          "3.5.0"
+        when 35, 40
+          "4.0.0"
+        when 41
+          "4.1.0"
         else
           "latest"
         end

data/lib/prism/translation/parser33.rb CHANGED Viewed

@@ -1,4 +1,5 @@
 # frozen_string_literal: true
+# :markup: markdown
 module Prism
   module Translation

data/lib/prism/translation/parser34.rb CHANGED Viewed

@@ -1,4 +1,5 @@
 # frozen_string_literal: true
+# :markup: markdown
 module Prism
   module Translation

data/lib/prism/translation/parser35.rb CHANGED Viewed

@@ -1,12 +1,8 @@
 # frozen_string_literal: true
+# :markup: markdown
 module Prism
   module Translation
-    # This class is the entry-point for Ruby 3.5 of `Prism::Translation::Parser`.
-    class Parser35 < Parser
-      def version # :nodoc:
-        35
-      end
-    end
+    Parser35 = Parser40 # :nodoc:
   end
 end

data/lib/prism/translation/parser40.rb ADDED Viewed

@@ -0,0 +1,13 @@
+# frozen_string_literal: true
+# :markup: markdown
+module Prism
+  module Translation
+    # This class is the entry-point for Ruby 4.0 of `Prism::Translation::Parser`.
+    class Parser40 < Parser
+      def version # :nodoc:
+        40
+      end
+    end
+  end
+end

data/lib/prism/translation/parser41.rb ADDED Viewed

@@ -0,0 +1,13 @@
+# frozen_string_literal: true
+# :markup: markdown
+module Prism
+  module Translation
+    # This class is the entry-point for Ruby 4.1 of `Prism::Translation::Parser`.
+    class Parser41 < Parser
+      def version # :nodoc:
+        41
+      end
+    end
+  end
+end

data/lib/prism/translation/parser_current.rb ADDED Viewed

@@ -0,0 +1,26 @@
+# frozen_string_literal: true
+# :markup: markdown
+# typed: ignore
+#
+module Prism
+  module Translation
+    case RUBY_VERSION
+    when /^3\.3\./
+      ParserCurrent = Parser33
+    when /^3\.4\./
+      ParserCurrent = Parser34
+    when /^3\.5\./, /^4\.0\./
+      ParserCurrent = Parser40
+    when /^4\.1\./
+      ParserCurrent = Parser41
+    else
+      # Keep this in sync with released Ruby.
+      parser = Parser34
+      major, minor, _patch = Gem::Version.new(RUBY_VERSION).segments
+      warn "warning: `Prism::Translation::Current` is loading #{parser.name}, " \
+           "but you are running #{major}.#{minor}."
+      ParserCurrent = parser
+    end
+  end
+end

data/lib/prism/translation/ripper/sexp.rb CHANGED Viewed

@@ -1,4 +1,5 @@
 # frozen_string_literal: true
+# :markup: markdown
 require_relative "../ripper"

data/lib/prism/translation/ripper.rb CHANGED Viewed

@@ -1,4 +1,5 @@
 # frozen_string_literal: true
+# :markup: markdown
 require "ripper"
@@ -70,7 +71,7 @@ module Prism
       #          [[1, 13], :on_kw,     "end", END      ]]
       #
       def self.lex(src, filename = "-", lineno = 1, raise_errors: false)
-        result = Prism.lex_compat(src, filepath: filename, line: lineno)
+        result = Prism.lex_compat(src, filepath: filename, line: lineno, version: "current")
         if result.failure? && raise_errors
           raise SyntaxError, result.errors.first.message
@@ -1615,8 +1616,23 @@ module Prism
       # defined?(a)
       # ^^^^^^^^^^^
       def visit_defined_node(node)
+        expression = visit(node.value)
+        # Very weird circumstances here where something like:
+        #
+        #     defined?
+        #     (1)
+        #
+        # gets parsed in Ruby as having only the `1` expression but in Ripper it
+        # gets parsed as having a parentheses node. In this case we need to
+        # synthesize that node to match Ripper's behavior.
+        if node.lparen_loc && node.keyword_loc.join(node.lparen_loc).slice.include?("\n")
+          bounds(node.lparen_loc.join(node.rparen_loc))
+          expression = on_paren(on_stmts_add(on_stmts_new, expression))
+        end
         bounds(node.location)
-        on_defined(visit(node.value))
+        on_defined(expression)
       end
       # if foo then bar else baz end
@@ -3279,7 +3295,7 @@ module Prism
       # Lazily initialize the parse result.
       def result
-        @result ||= Prism.parse(source, partial_script: true)
+        @result ||= Prism.parse(source, partial_script: true, version: "current")
       end
       ##########################################################################