RubyGems - parser-prism - Versions diffs - 0.1.0 - Mend

parser-prism 0.1.0

Files changed (18) hide show

checksums.yaml +7 -0
data/.github/workflows/main.yml +18 -0
data/.gitignore +3 -0
data/.rubocop.yml +4 -0
data/Gemfile +11 -0
data/Gemfile.lock +57 -0
data/LICENSE +21 -0
data/README.md +96 -0
data/Rakefile +96 -0
data/bin/bench +15 -0
data/bin/parse +23 -0
data/lib/parser/prism/compare.rb +148 -0
data/lib/parser/prism/compiler.rb +1758 -0
data/lib/parser/prism/lexer.rb +293 -0
data/lib/parser/prism/rubocop.rb +27 -0
data/lib/parser/prism.rb +128 -0
data/parser-prism.gemspec +25 -0
metadata +87 -0

data/lib/parser/prism/lexer.rb ADDED Viewed

@@ -0,0 +1,293 @@
+# frozen_string_literal: true
+module Parser
+  class Prism
+    # Accepts a list of prism tokens and converts them into the expected format
+    # for the parser gem.
+    class Lexer
+      TYPES = {
+        # These tokens should never appear in the output of the lexer.
+        EOF: nil,
+        MISSING: nil,
+        NOT_PROVIDED: nil,
+        IGNORED_NEWLINE: nil,
+        EMBDOC_END: nil,
+        EMBDOC_LINE: nil,
+        __END__: nil,
+        # These tokens have more or less direct mappings.
+        AMPERSAND: :tAMPER2,
+        AMPERSAND_AMPERSAND: :tANDOP,
+        AMPERSAND_AMPERSAND_EQUAL: :tOP_ASGN,
+        AMPERSAND_DOT: :tANDDOT,
+        AMPERSAND_EQUAL: :tOP_ASGN,
+        BACK_REFERENCE: :tBACK_REF,
+        BACKTICK: :tXSTRING_BEG,
+        BANG: :tBANG,
+        BANG_EQUAL: :tNEQ,
+        BANG_TILDE: :tNMATCH,
+        BRACE_LEFT: :tLCURLY,
+        BRACE_RIGHT: :tRCURLY,
+        BRACKET_LEFT: :tLBRACK2,
+        BRACKET_LEFT_ARRAY: :tLBRACK,
+        BRACKET_LEFT_RIGHT: :tAREF,
+        BRACKET_LEFT_RIGHT_EQUAL: :tASET,
+        BRACKET_RIGHT: :tRBRACK,
+        CARET: :tCARET,
+        CARET_EQUAL: :tOP_ASGN,
+        CHARACTER_LITERAL: :tCHARACTER,
+        CLASS_VARIABLE: :tCVAR,
+        COLON: :tCOLON,
+        COLON_COLON: :tCOLON2,
+        COMMA: :tCOMMA,
+        COMMENT: :tCOMMENT,
+        CONSTANT: :tCONSTANT,
+        DOT: :tDOT,
+        DOT_DOT: :tDOT2,
+        DOT_DOT_DOT: :tDOT3,
+        EMBDOC_BEGIN: :tCOMMENT,
+        EMBEXPR_BEGIN: :tSTRING_DBEG,
+        EMBEXPR_END: :tSTRING_DEND,
+        EMBVAR: :tSTRING_DVAR,
+        EQUAL: :tEQL,
+        EQUAL_EQUAL: :tEQ,
+        EQUAL_EQUAL_EQUAL: :tEQQ,
+        EQUAL_GREATER: :tASSOC,
+        EQUAL_TILDE: :tMATCH,
+        FLOAT: :tFLOAT,
+        FLOAT_IMAGINARY: :tIMAGINARY,
+        FLOAT_RATIONAL: :tRATIONAL,
+        FLOAT_RATIONAL_IMAGINARY: :tIMAGINARY,
+        GLOBAL_VARIABLE: :tGVAR,
+        GREATER: :tGT,
+        GREATER_EQUAL: :tGEQ,
+        GREATER_GREATER: :tRSHFT,
+        GREATER_GREATER_EQUAL: :tOP_ASGN,
+        HEREDOC_START: :tSTRING_BEG,
+        HEREDOC_END: :tSTRING_END,
+        IDENTIFIER: :tIDENTIFIER,
+        INSTANCE_VARIABLE: :tIVAR,
+        INTEGER: :tINTEGER,
+        INTEGER_IMAGINARY: :tIMAGINARY,
+        INTEGER_RATIONAL: :tRATIONAL,
+        INTEGER_RATIONAL_IMAGINARY: :tIMAGINARY,
+        KEYWORD_ALIAS: :kALIAS,
+        KEYWORD_AND: :kAND,
+        KEYWORD_BEGIN: :kBEGIN,
+        KEYWORD_BEGIN_UPCASE: :klBEGIN,
+        KEYWORD_BREAK: :kBREAK,
+        KEYWORD_CASE: :kCASE,
+        KEYWORD_CLASS: :kCLASS,
+        KEYWORD_DEF: :kDEF,
+        KEYWORD_DEFINED: :kDEFINED,
+        KEYWORD_DO: :kDO,
+        KEYWORD_DO_LOOP: :kDO_COND,
+        KEYWORD_END: :kEND,
+        KEYWORD_END_UPCASE: :klEND,
+        KEYWORD_ENSURE: :kENSURE,
+        KEYWORD_ELSE: :kELSE,
+        KEYWORD_ELSIF: :kELSIF,
+        KEYWORD_FALSE: :kFALSE,
+        KEYWORD_FOR: :kFOR,
+        KEYWORD_IF: :kIF,
+        KEYWORD_IF_MODIFIER: :kIF_MOD,
+        KEYWORD_IN: :kIN,
+        KEYWORD_MODULE: :kMODULE,
+        KEYWORD_NEXT: :kNEXT,
+        KEYWORD_NIL: :kNIL,
+        KEYWORD_NOT: :kNOT,
+        KEYWORD_OR: :kOR,
+        KEYWORD_REDO: :kREDO,
+        KEYWORD_RESCUE: :kRESCUE,
+        KEYWORD_RESCUE_MODIFIER: :kRESCUE_MOD,
+        KEYWORD_RETRY: :kRETRY,
+        KEYWORD_RETURN: :kRETURN,
+        KEYWORD_SELF: :kSELF,
+        KEYWORD_SUPER: :kSUPER,
+        KEYWORD_THEN: :kTHEN,
+        KEYWORD_TRUE: :kTRUE,
+        KEYWORD_UNDEF: :kUNDEF,
+        KEYWORD_UNLESS: :kUNLESS,
+        KEYWORD_UNLESS_MODIFIER: :kUNLESS_MOD,
+        KEYWORD_UNTIL: :kUNTIL,
+        KEYWORD_UNTIL_MODIFIER: :kUNTIL_MOD,
+        KEYWORD_WHEN: :kWHEN,
+        KEYWORD_WHILE: :kWHILE,
+        KEYWORD_WHILE_MODIFIER: :kWHILE_MOD,
+        KEYWORD_YIELD: :kYIELD,
+        KEYWORD___ENCODING__: :k__ENCODING__,
+        KEYWORD___FILE__: :k__FILE__,
+        KEYWORD___LINE__: :k__LINE__,
+        LABEL: :tLABEL,
+        LABEL_END: :tLABEL_END,
+        LAMBDA_BEGIN: :tLAMBEG,
+        LESS: :tLT,
+        LESS_EQUAL: :tLEQ,
+        LESS_EQUAL_GREATER: :tCMP,
+        LESS_LESS: :tLSHFT,
+        LESS_LESS_EQUAL: :tOP_ASGN,
+        METHOD_NAME: :tFID,
+        MINUS: :tMINUS,
+        MINUS_EQUAL: :tOP_ASGN,
+        MINUS_GREATER: :tLAMBDA,
+        NEWLINE: :tNL,
+        NUMBERED_REFERENCE: :tNTH_REF,
+        PARENTHESIS_LEFT: :tLPAREN,
+        PARENTHESIS_LEFT_PARENTHESES: :tLPAREN_ARG,
+        PARENTHESIS_RIGHT: :tRPAREN,
+        PERCENT: :tPERCENT,
+        PERCENT_EQUAL: :tOP_ASGN,
+        PERCENT_LOWER_I: :tQSYMBOLS_BEG,
+        PERCENT_LOWER_W: :tQWORDS_BEG,
+        PERCENT_UPPER_I: :tSYMBOLS_BEG,
+        PERCENT_UPPER_W: :tWORDS_BEG,
+        PERCENT_LOWER_X: :tXSTRING_BEG,
+        PLUS: :tPLUS,
+        PLUS_EQUAL: :tOP_ASGN,
+        PIPE_EQUAL: :tOP_ASGN,
+        PIPE: :tPIPE,
+        PIPE_PIPE: :tOROP,
+        PIPE_PIPE_EQUAL: :tOP_ASGN,
+        QUESTION_MARK: :tEH,
+        REGEXP_BEGIN: :tREGEXP_BEG,
+        REGEXP_END: :tSTRING_END,
+        SEMICOLON: :tSEMI,
+        SLASH: :tDIVIDE,
+        SLASH_EQUAL: :tOP_ASGN,
+        STAR: :tSTAR2,
+        STAR_EQUAL: :tOP_ASGN,
+        STAR_STAR: :tPOW,
+        STAR_STAR_EQUAL: :tOP_ASGN,
+        STRING_BEGIN: :tSTRING_BEG,
+        STRING_CONTENT: :tSTRING_CONTENT,
+        STRING_END: :tSTRING_END,
+        SYMBOL_BEGIN: :tSYMBEG,
+        TILDE: :tTILDE,
+        UAMPERSAND: :tAMPER,
+        UCOLON_COLON: :tCOLON3,
+        UDOT_DOT: :tDOT2,
+        UDOT_DOT_DOT: :tBDOT3,
+        UMINUS: :tUMINUS,
+        UMINUS_NUM: :tUNARY_NUM,
+        UPLUS: :tUPLUS,
+        USTAR: :tSTAR,
+        USTAR_STAR: :tPOW,
+        WORDS_SEP: :tSPACE
+      }
+      private_constant :TYPES
+      attr_reader :buffer, :lexed, :offset_cache
+      def initialize(buffer, lexed, offset_cache)
+        @buffer = buffer
+        @lexed = lexed
+        @offset_cache = offset_cache
+      end
+      def to_a
+        tokens = []
+        index = 0
+        while index < lexed.length
+          token, = lexed[index]
+          index += 1
+          next if token.type == :IGNORED_NEWLINE || token.type == :EOF
+          type = TYPES.fetch(token.type)
+          value = token.value
+          location = Source::Range.new(buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset])
+          case type
+          when :tCHARACTER
+            value.delete_prefix!("?")
+          when :tCOMMENT
+            if token.type == :EMBDOC_BEGIN
+              until (next_token = lexed[index]) && next_token.type == :EMBDOC_END
+                value += next_token.value
+                index += 1
+              end
+              value += next_token.value
+              location = Source::Range.new(buffer, offset_cache[token.location.start_offset], offset_cache[lexed[index].location.end_offset])
+              index += 1
+            else
+              value.chomp!
+              location = Source::Range.new(buffer, offset_cache[token.location.start_offset], offset_cache[token.location.end_offset - 1])
+            end
+          when :tNL
+            value = nil
+          when :tFLOAT
+            value = Float(value)
+          when :tIMAGINARY
+            value.chomp!("i")
+            value = Complex(0, value.end_with?("r") ? Rational(value.chomp("r")) : value)
+          when :tINTEGER
+            if value.start_with?("+")
+              tokens << [:tUNARY_NUM, ["+", Source::Range.new(buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1])]]
+              location = Source::Range.new(buffer, token.location.start_offset + 1, token.location.end_offset)
+            end
+            value = Integer(value)
+          when :tLABEL
+            value.chomp!(":")
+          when :tLABEL_END
+            value.chomp!(":")
+          when :tNTH_REF
+            value = Integer(value.delete_prefix("$"))
+          when :tOP_ASGN
+            value.chomp!("=")
+          when :tRATIONAL
+            value = Rational(value.chomp("r"))
+          when :tSPACE
+            value = nil
+          when :tSTRING_BEG
+            if ["\"", "'"].include?(value) && (next_token = lexed[index]) && next_token.type == :STRING_END
+              next_location = token.location.join(next_token.location)
+              type = :tSTRING
+              value = ""
+              location = Source::Range.new(buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
+              index += 1
+            elsif ["\"", "'"].include?(value) && (next_token = lexed[index]) && next_token.type == :STRING_CONTENT && (next_next_token = lexed[index + 1]) && next_next_token.type == :STRING_END
+              next_location = token.location.join(next_next_token.location)
+              type = :tSTRING
+              value = next_token.value
+              location = Source::Range.new(buffer, offset_cache[next_location.start_offset], offset_cache[next_location.end_offset])
+              index += 2
+            elsif value.start_with?("<<")
+              quote = value[2] == "-" || value[2] == "~" ? value[3] : value[2]
+              value = "<<#{quote == "'" || quote == "\"" ? quote : "\""}"
+            end
+          when :tSTRING_DVAR
+            value = nil
+          when :tSTRING_END
+            if token.type == :REGEXP_END
+              value = value[0]
+              location = Source::Range.new(buffer, offset_cache[token.location.start_offset], offset_cache[token.location.start_offset + 1])
+            end
+          when :tSYMBEG
+            if (next_token = lexed[index]) && next_token.type != :STRING_CONTENT
+              next_location = token.location.join(next_token.location)
+              type = :tSYMBOL
+              value = next_token.value
+              location = Source::Range.new(buffer, next_location.start_offset, next_location.end_offset)
+              index += 1
+            end
+          when :tFID
+            if tokens[-1][0] == :kDEF
+              type = :tIDENTIFIER
+            end
+          end
+          tokens << [type, [value, location]]
+          if token.type == :REGEXP_END
+            tokens << [:tREGEXP_OPT, [token.value[1..], Source::Range.new(buffer, offset_cache[token.location.start_offset + 1], offset_cache[token.location.end_offset])]]
+          end
+        end
+        tokens
+      end
+    end
+  end
+end

data/lib/parser/prism/rubocop.rb ADDED Viewed

@@ -0,0 +1,27 @@
+# frozen_string_literal: true
+require "parser"
+require "rubocop"
+module Parser
+  class Prism < Base
+    VERSION_3_3 = 80_82_73_83_77.33
+  end
+end
+RuboCop::AST::ProcessedSource.prepend(
+  Module.new do
+    def parser_class(ruby_version)
+      if ruby_version == Parser::Prism::VERSION_3_3
+        require "parser/prism"
+        Parser::Prism
+      else
+        super
+      end
+    end
+  end
+)
+known_rubies = RuboCop::TargetRuby.const_get(:KNOWN_RUBIES)
+RuboCop::TargetRuby.send(:remove_const, :KNOWN_RUBIES)
+RuboCop::TargetRuby::KNOWN_RUBIES = [*known_rubies, Parser::Prism::VERSION_3_3].freeze

data/lib/parser/prism.rb ADDED Viewed

@@ -0,0 +1,128 @@
+# frozen_string_literal: true
+require "parser"
+require "prism"
+module Parser
+  class Prism < Base
+    Racc_debug_parser = false
+    def version
+      33
+    end
+    def default_encoding
+      Encoding::UTF_8
+    end
+    def yyerror
+    end
+    ##
+    # Parses a source buffer and returns the AST.
+    #
+    # @param [Parser::Source::Buffer] source_buffer The source buffer to parse.
+    # @return Parser::AST::Node
+    #
+    def parse(source_buffer)
+      @source_buffer = source_buffer
+      source = source_buffer.source
+      build_ast(
+        ::Prism.parse(source, filepath: source_buffer.name).value,
+        build_offset_cache(source)
+      )
+    ensure
+      @source_buffer = nil
+    end
+    ##
+    # Parses a source buffer and returns the AST and the source code comments.
+    #
+    # @see #parse
+    # @see Parser::Source::Comment#associate
+    # @return [Array]
+    #
+    def parse_with_comments(source_buffer)
+      @source_buffer = source_buffer
+      source = source_buffer.source
+      result = ::Prism.parse(source, filepath: source_buffer.name)
+      [
+        build_ast(result.value, build_offset_cache(source)),
+        build_comments(result.comments)
+      ]
+    ensure
+      @source_buffer = nil
+    end
+    ##
+    # Parses a source buffer and returns the AST, the source code comments,
+    # and the tokens emitted by the lexer.
+    #
+    # @param [Parser::Source::Buffer] source_buffer
+    # @return [Array]
+    #
+    def tokenize(source_buffer, _recover = false)
+      @source_buffer = source_buffer
+      source = source_buffer.source
+      offset_cache = build_offset_cache(source)
+      result = ::Prism.parse_lex(source, filepath: source_buffer.name)
+      program, tokens = result.value
+      [
+        build_ast(program, offset_cache),
+        build_comments(result.comments),
+        build_tokens(tokens, offset_cache)
+      ]
+    ensure
+      @source_buffer = nil
+    end
+    # Since prism resolves num params for us, we don't need to support this kind
+    # of logic here.
+    def try_declare_numparam(node)
+      node.children[0].match?(/\A_[1-9]\z/)
+    end
+    private
+    # Prism deals with offsets in bytes, while the parser gem deals with offsets
+    # in characters. We need to handle this conversion in order to build the
+    # parser gem AST.
+    #
+    # If the bytesize of the source is the same as the length, then we can just
+    # use the offset directly. Otherwise, we build a hash that functions as a
+    # cache for the conversion.
+    def build_offset_cache(source)
+      if source.bytesize == source.length
+        -> (offset) { offset }
+      else
+        Hash.new { |hash, offset| hash[offset] = source.byteslice(0, offset).length }
+      end
+    end
+    # Build the parser gem AST from the prism AST.
+    def build_ast(program, offset_cache)
+      program.accept(Compiler.new(self, offset_cache))
+    end
+    # Build the parser gem comments from the prism comments.
+    def build_comments(comments)
+      comments.map do |comment|
+        location = comment.location
+        Source::Comment.new(Source::Range.new(source_buffer, location.start_offset, location.end_offset))
+      end
+    end
+    # Build the parser gem tokens from the prism tokens.
+    def build_tokens(tokens, offset_cache)
+      Lexer.new(source_buffer, tokens.map(&:first), offset_cache).to_a
+    end
+  end
+end
+require_relative "prism/compiler"
+require_relative "prism/lexer"

data/parser-prism.gemspec ADDED Viewed

@@ -0,0 +1,25 @@
+# frozen_string_literal: true
+Gem::Specification.new do |spec|
+  spec.name = "parser-prism"
+  spec.version = "0.1.0"
+  spec.authors = ["Kevin Newton"]
+  spec.email = ["kddnewton@gmail.com"]
+  spec.summary = "A prism parser backend"
+  spec.homepage = "https://github.com/kddnewton/parser-prism"
+  spec.license = "MIT"
+  spec.files =
+    Dir.chdir(__dir__) do
+      `git ls-files -z`.split("\x0")
+        .reject { |f| f.match(%r{^(test|spec|features)/}) }
+    end
+  spec.bindir = "exe"
+  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
+  spec.require_paths = ["lib"]
+  spec.add_dependency "parser"
+  spec.add_dependency "prism"
+end

metadata ADDED Viewed

@@ -0,0 +1,87 @@
+--- !ruby/object:Gem::Specification
+name: parser-prism
+version: !ruby/object:Gem::Version
+  version: 0.1.0
+platform: ruby
+authors:
+- Kevin Newton
+autorequire:
+bindir: exe
+cert_chain: []
+date: 2024-01-03 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: parser
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+- !ruby/object:Gem::Dependency
+  name: prism
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+description:
+email:
+- kddnewton@gmail.com
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- ".github/workflows/main.yml"
+- ".gitignore"
+- ".rubocop.yml"
+- Gemfile
+- Gemfile.lock
+- LICENSE
+- README.md
+- Rakefile
+- bin/bench
+- bin/parse
+- lib/parser/prism.rb
+- lib/parser/prism/compare.rb
+- lib/parser/prism/compiler.rb
+- lib/parser/prism/lexer.rb
+- lib/parser/prism/rubocop.rb
+- parser-prism.gemspec
+homepage: https://github.com/kddnewton/parser-prism
+licenses:
+- MIT
+metadata: {}
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubygems_version: 3.4.1
+signing_key:
+specification_version: 4
+summary: A prism parser backend
+test_files: []