RubyGems - syntax_suggest - Versions diffs - 2.0.3 → 3.0.0 - Mend

syntax_suggest 2.0.3 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

checksums.yaml +4 -4
data/.github/workflows/ci.yml +4 -20
data/.github/workflows/sync-ruby.yml +1 -1
data/.standard.yml +1 -1
data/CHANGELOG.md +6 -0
data/Gemfile +1 -0
data/lib/syntax_suggest/api.rb +7 -40
data/lib/syntax_suggest/clean_document.rb +11 -94
data/lib/syntax_suggest/code_line.rb +55 -76
data/lib/syntax_suggest/core_ext.rb +39 -88
data/lib/syntax_suggest/explain_syntax.rb +5 -13
data/lib/syntax_suggest/{left_right_lex_count.rb → left_right_token_count.rb} +21 -27
data/lib/syntax_suggest/token.rb +49 -0
data/lib/syntax_suggest/version.rb +1 -1
data/lib/syntax_suggest/visitor.rb +80 -0
data/syntax_suggest.gemspec +1 -1
metadata +5 -6
data/lib/syntax_suggest/lex_all.rb +0 -74
data/lib/syntax_suggest/lex_value.rb +0 -70
data/lib/syntax_suggest/ripper_errors.rb +0 -39

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: de1d75012724df4b1222ba39772c0fbfb6205cfd8713dd4f19ff53e473d7e4f3
-  data.tar.gz: 1a705bbd28e351a682343089c14a2277953d917725f970eaa7884912a093be45
+  metadata.gz: af5147af49a48cc1639c03e5b4f89d9bd2d841981548fef41477786fc1955fa4
+  data.tar.gz: 21bbc4796ad0444e965258c645637f1faf853392d27a54a4a28096db92f44257
 SHA512:
-  metadata.gz: d883c10ef026cfe8c26c930d0857ef02f7655421cdd83f69bd971e231f674124fcca4957032f0a9311c87d2fd798ffb7b9921aee6e9859b112213cfda0c48cd5
-  data.tar.gz: e3d0a25404aa3ea0a802400ee42b3d8337f9180420772409e920683dd631e59c8c2f98bcf85a107dcb3ba888246a3962aeb7ca7ec036b3d0b08e17496406f145
+  metadata.gz: a48e4e6788b73a404d67cafd519713cce379ad1e84dc270e81f7047ea523d84b75d91420f3915614a68233eb9c4c07ab237e090741d2d7ffd4326001d6e4fcd7
+  data.tar.gz: e29a44a49a2185bec367895efda2ee6bc978adc24930711e9ddff7a9e4881519519ed2890d4ba9828608e30bf9b8e196f4b45bac42d78f1be0c8b3f745b87f02

data/.github/workflows/ci.yml CHANGED Viewed

@@ -24,6 +24,7 @@ jobs:
     uses: ruby/actions/.github/workflows/ruby_versions.yml@master
     with:
       engine: cruby
+      min_version: 3.3
   test:
     needs: ruby-versions
@@ -33,7 +34,9 @@ jobs:
       matrix:
         ruby: ${{ fromJson(needs.ruby-versions.outputs.versions) }}
         prism_version:
-          - 1.2.0 # Shipped with Ruby 3.4 as default parser https://www.ruby-lang.org/en/news/2024/12/25/ruby-3-4-0-released/
+          # See https://stdgems.org/prism for which ruby version shipped with which prism version
+          - 0.19.0
+          - 1.2.0
           - 1.8.0
           - head
     env:
@@ -52,22 +55,3 @@ jobs:
       - name: test
         run: bin/rake test
         continue-on-error: ${{ matrix.ruby == 'head' }}
-  test-disable-prism:
-    needs: ruby-versions
-    runs-on: ubuntu-latest
-    strategy:
-      fail-fast: false
-      matrix:
-        ruby: ${{ fromJson(needs.ruby-versions.outputs.versions) }}
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v6
-      - name: Set up Ruby
-        uses: ruby/setup-ruby@v1
-        with:
-          ruby-version: ${{ matrix.ruby }}
-          bundler-cache: true
-      - name: test
-        run: SYNTAX_SUGGEST_DISABLE_PRISM=1 bin/rake test
-        continue-on-error: ${{ matrix.ruby == 'head' }}

data/.github/workflows/sync-ruby.yml CHANGED Viewed

@@ -12,7 +12,7 @@ jobs:
       - name: Create GitHub App token
         id: app-token
-        uses: actions/create-github-app-token@v2
+        uses: actions/create-github-app-token@v3
         with:
           app-id: 2060836
           private-key: ${{ secrets.RUBY_SYNC_DEFAULT_GEMS_PRIVATE_KEY }}

data/.standard.yml CHANGED Viewed

	@@ -1 +1 @@
1	- ruby_version: 3.0.0
1	+ ruby_version: 3.3.0

data/CHANGELOG.md CHANGED Viewed

@@ -1,8 +1,14 @@
 ## HEAD (unreleased)
+## 3.0.0
+- Changed: Minimum supported Ruby version is now 3.3. (https://github.com/ruby/syntax_suggest/pull/246)
+- Changed: Use native prism to analyse. (https://github.com/ruby/syntax_suggest/pull/251)
 ## 2.0.3
 - Fix: Correctly identify trailing slashes when using Prism > 1.8.0. (https://github.com/ruby/syntax_suggest/pull/243)
+- Fix: Correctly handle `%I` delimiters. (https://github.com/ruby/syntax_suggest/pull/249)
 - Internal: Add tests to multiple versions of prism
 ## 2.0.2

data/Gemfile CHANGED Viewed

@@ -11,6 +11,7 @@ gem "stackprof"
 gem "standard"
 gem "ruby-prof"
+gem "benchmark"
 gem "benchmark-ips"
 case ENV["PRISM_VERSION"]&.strip&.downcase

data/lib/syntax_suggest/api.rb CHANGED Viewed

@@ -7,25 +7,8 @@ require "stringio"
 require "pathname"
 require "timeout"
-# We need Ripper loaded for `Prism.lex_compat` even if we're using Prism
-# for lexing and parsing
-require "ripper"
 # Prism is the new parser, replacing Ripper
-#
-# We need to "dual boot" both for now because syntax_suggest
-# supports older rubies that do not ship with syntax suggest.
-#
-# We also need the ability to control loading of this library
-# so we can test that both modes work correctly in CI.
-if (value = ENV["SYNTAX_SUGGEST_DISABLE_PRISM"])
-  warn "Skipping loading prism due to SYNTAX_SUGGEST_DISABLE_PRISM=#{value}"
-else
-  begin
-    require "prism"
-  rescue LoadError
-  end
-end
+require "prism"
 module SyntaxSuggest
   # Used to indicate a default value that cannot
@@ -35,14 +18,6 @@ module SyntaxSuggest
   class Error < StandardError; end
   TIMEOUT_DEFAULT = ENV.fetch("SYNTAX_SUGGEST_TIMEOUT", 1).to_i
-  # SyntaxSuggest.use_prism_parser? [Private]
-  #
-  # Tells us if the prism parser is available for use
-  # or if we should fallback to `Ripper`
-  def self.use_prism_parser?
-    defined?(Prism)
-  end
   # SyntaxSuggest.handle_error [Public]
   #
   # Takes a `SyntaxError` exception, uses the
@@ -152,20 +127,11 @@ module SyntaxSuggest
   # SyntaxSuggest.invalid? [Private]
   #
   # Opposite of `SyntaxSuggest.valid?`
-  if defined?(Prism)
-    def self.invalid?(source)
-      source = source.join if source.is_a?(Array)
-      source = source.to_s
-      Prism.parse(source).failure?
-    end
-  else
-    def self.invalid?(source)
-      source = source.join if source.is_a?(Array)
-      source = source.to_s
+  def self.invalid?(source)
+    source = source.join if source.is_a?(Array)
+    source = source.to_s
-      Ripper.new(source).tap(&:parse).error?
-    end
+    Prism.parse(source).failure?
   end
   # SyntaxSuggest.valid? [Private]
@@ -219,7 +185,6 @@ require_relative "explain_syntax"
 require_relative "clean_document"
 # Helpers
-require_relative "lex_all"
 require_relative "code_line"
 require_relative "code_block"
 require_relative "block_expand"
@@ -231,3 +196,5 @@ require_relative "priority_engulf_queue"
 require_relative "pathname_from_message"
 require_relative "display_invalid_blocks"
 require_relative "parse_blocks_from_indent_line"
+require_relative "visitor"
+require_relative "token"

data/lib/syntax_suggest/clean_document.rb CHANGED Viewed

@@ -66,27 +66,9 @@ module SyntaxSuggest
   #
   # All of these problems are fixed by joining the whole heredoc into a single
   # line.
-  #
-  # ## Comments and whitespace
-  #
-  # Comments can throw off the way the lexer tells us that the line
-  # logically belongs with the next line. This is valid ruby but
-  # results in a different lex output than before:
-  #
-  #     1 User.
-  #     2   where(name: "schneems").
-  #     3   # Comment here
-  #     4   first
-  #
-  # To handle this we can replace comment lines with empty lines
-  # and then re-lex the source. This removal and re-lexing preserves
-  # line index and document size, but generates an easier to work with
-  # document.
-  #
   class CleanDocument
     def initialize(source:)
-      lines = clean_sweep(source: source)
-      @document = CodeLine.from_source(lines.join, lines: lines)
+      @document = CodeLine.from_source(source)
     end
     # Call all of the document "cleaners"
@@ -110,62 +92,6 @@ module SyntaxSuggest
       @document.join
     end
-    # Remove comments
-    #
-    # replace with empty newlines
-    #
-    #     source = <<~'EOM'
-    #       # Comment 1
-    #       puts "hello"
-    #       # Comment 2
-    #       puts "world"
-    #     EOM
-    #
-    #     lines = CleanDocument.new(source: source).lines
-    #     expect(lines[0].to_s).to eq("\n")
-    #     expect(lines[1].to_s).to eq("puts "hello")
-    #     expect(lines[2].to_s).to eq("\n")
-    #     expect(lines[3].to_s).to eq("puts "world")
-    #
-    # Important: This must be done before lexing.
-    #
-    # After this change is made, we lex the document because
-    # removing comments can change how the doc is parsed.
-    #
-    # For example:
-    #
-    #     values = LexAll.new(source: <<~EOM))
-    #       User.
-    #         # comment
-    #         where(name: 'schneems')
-    #     EOM
-    #     expect(
-    #       values.count {|v| v.type == :on_ignored_nl}
-    #     ).to eq(1)
-    #
-    # After the comment is removed:
-    #
-    #     values = LexAll.new(source: <<~EOM))
-    #       User.
-    #
-    #         where(name: 'schneems')
-    #     EOM
-    #     expect(
-    #      values.count {|v| v.type == :on_ignored_nl}
-    #    ).to eq(2)
-    #
-    def clean_sweep(source:)
-      # Match comments, but not HEREDOC strings with #{variable} interpolation
-      # https://rubular.com/r/HPwtW9OYxKUHXQ
-      source.lines.map do |line|
-        if line.match?(/^\s*#([^{].*|)$/)
-          $/
-        else
-          line
-        end
-      end
-    end
     # Smushes all heredoc lines into one line
     #
     #     source = <<~'EOM'
@@ -182,11 +108,11 @@ module SyntaxSuggest
       start_index_stack = []
       heredoc_beg_end_index = []
       lines.each do |line|
-        line.lex.each do |lex_value|
-          case lex_value.type
-          when :on_heredoc_beg
+        line.tokens.each do |token|
+          case token.type
+          when :HEREDOC_START
             start_index_stack << line.index
-          when :on_heredoc_end
+          when :HEREDOC_END
             start_index = start_index_stack.pop
             end_index = line.index
             heredoc_beg_end_index << [start_index, end_index]
@@ -212,20 +138,10 @@ module SyntaxSuggest
     #     expect(lines[0].to_s).to eq(source)
     #     expect(lines[1].to_s).to eq("")
     #
-    # The one known case this doesn't handle is:
-    #
-    #     Ripper.lex <<~EOM
-    #       a &&
-    #        b ||
-    #        c
-    #     EOM
-    #
-    # For some reason this introduces `on_ignore_newline` but with BEG type
-    #
     def join_consecutive!
-      consecutive_groups = @document.select(&:ignore_newline_not_beg?).map do |code_line|
+      consecutive_groups = @document.select(&:consecutive?).map do |code_line|
         take_while_including(code_line.index..) do |line|
-          line.ignore_newline_not_beg?
+          line.consecutive?
         end
       end
@@ -273,16 +189,17 @@ module SyntaxSuggest
         # Join group into the first line
         @document[line.index] = CodeLine.new(
-          lex: lines.map(&:lex).flatten,
+          tokens: lines.map(&:tokens).flatten,
           line: lines.join,
-          index: line.index
+          index: line.index,
+          consecutive: false
         )
         # Hide the rest of the lines
         lines[1..].each do |line|
           # The above lines already have newlines in them, if add more
           # then there will be double newline, use an empty line instead
-          @document[line.index] = CodeLine.new(line: "", index: line.index, lex: [])
+          @document[line.index] = CodeLine.new(line: "", index: line.index, tokens: [], consecutive: false)
         end
       end
       self

data/lib/syntax_suggest/code_line.rb CHANGED Viewed

@@ -26,23 +26,57 @@ module SyntaxSuggest
     # Returns an array of CodeLine objects
     # from the source string
-    def self.from_source(source, lines: nil)
-      lines ||= source.lines
-      lex_array_for_line = LexAll.new(source: source, source_lines: lines).each_with_object(Hash.new { |h, k| h[k] = [] }) { |lex, hash| hash[lex.line] << lex }
-      lines.map.with_index do |line, index|
+    def self.from_source(source)
+      source = +source
+      parse_result = Prism.parse_lex(source)
+      ast, tokens = parse_result.value
+      clean_comments!(source, parse_result.comments)
+      visitor = Visitor.new
+      visitor.visit(ast)
+      tokens.sort_by! { |token, _state| token.location.start_line }
+      prev_token = nil
+      tokens.map! do |token, _state|
+        prev_token = Token.new(token, prev_token, visitor)
+      end
+      tokens_for_line = tokens.each_with_object(Hash.new { |h, k| h[k] = [] }) { |token, hash| hash[token.line] << token }
+      source.lines.map.with_index do |line, index|
         CodeLine.new(
           line: line,
           index: index,
-          lex: lex_array_for_line[index + 1]
+          tokens: tokens_for_line[index + 1],
+          consecutive: visitor.consecutive_lines.include?(index + 1)
         )
       end
     end
-    attr_reader :line, :index, :lex, :line_number, :indent
-    def initialize(line:, index:, lex:)
-      @lex = lex
+    # Remove comments that apear on their own in source. They will never be the cause
+    # of syntax errors and are just visual noise. Example:
+    #
+    #   source = +<<~RUBY
+    #     # Comment-only line
+    #     foo # Inline comment
+    #   RUBY
+    #   CodeLine.clean_comments!(source, Prism.parse(source).comments)
+    #   source # => "\nfoo # Inline comment\n"
+    def self.clean_comments!(source, comments)
+      # Iterate backwards since we are modifying the source in place and must preserve
+      # the offsets. Prism comments are sorted by their location in the source.
+      comments.reverse_each do |comment|
+        next if comment.trailing?
+        source.bytesplice(comment.location.start_offset, comment.location.length, "")
+      end
+    end
+    attr_reader :line, :index, :tokens, :line_number, :indent
+    def initialize(line:, index:, tokens:, consecutive:)
+      @tokens = tokens
       @line = line
       @index = index
+      @consecutive = consecutive
       @original = line
       @line_number = @index + 1
       strip_line = line.dup
@@ -151,29 +185,16 @@ module SyntaxSuggest
       index <=> other.index
     end
-    # [Not stable API]
-    #
-    # Lines that have a `on_ignored_nl` type token and NOT
-    # a `BEG` type seem to be a good proxy for the ability
-    # to join multiple lines into one.
-    #
-    # This predicate method is used to determine when those
-    # two criteria have been met.
-    #
-    # The one known case this doesn't handle is:
-    #
-    #     Ripper.lex <<~EOM
-    #       a &&
-    #        b ||
-    #        c
-    #     EOM
-    #
-    # For some reason this introduces `on_ignore_newline` but with BEG type
-    def ignore_newline_not_beg?
-      @ignore_newline_not_beg
+    # Can this line be logically joined together
+    # with the following line? Determined by walking
+    # the AST
+    def consecutive?
+      @consecutive
     end
-    # Determines if the given line has a trailing slash
+    # Determines if the given line has a trailing slash.
+    # Simply check if the line contains a backslash after
+    # the content of the last token.
     #
     #     lines = CodeLine.from_source(<<~EOM)
     #       it "foo" \
@@ -181,61 +202,19 @@ module SyntaxSuggest
     #     expect(lines.first.trailing_slash?).to eq(true)
     #
     def trailing_slash?
-      last = @lex.last
-      # Older versions of prism diverged slightly from Ripper in compatibility mode
-      case last&.type
-      when :on_sp
-        last.token == TRAILING_SLASH
-      when :on_tstring_end
-        true
-      else
-        false
-      end
+      return unless (last = @tokens.last)
+      @line.byteindex(TRAILING_SLASH, last.location.end_column) != nil
     end
-    # Endless method detection
-    #
-    # From https://github.com/ruby/irb/commit/826ae909c9c93a2ddca6f9cfcd9c94dbf53d44ab
-    # Detecting a "oneliner" seems to need a state machine.
-    # This can be done by looking mostly at the "state" (last value):
-    #
-    #   ENDFN -> BEG (token = '=' ) -> END
-    #
     private def set_kw_end
-      oneliner_count = 0
-      in_oneliner_def = nil
       kw_count = 0
       end_count = 0
-      @ignore_newline_not_beg = false
-      @lex.each do |lex|
-        kw_count += 1 if lex.is_kw?
-        end_count += 1 if lex.is_end?
-        if lex.type == :on_ignored_nl
-          @ignore_newline_not_beg = !lex.expr_beg?
-        end
-        if in_oneliner_def.nil?
-          in_oneliner_def = :ENDFN if lex.state.allbits?(Ripper::EXPR_ENDFN)
-        elsif lex.state.allbits?(Ripper::EXPR_ENDFN)
-          # Continue
-        elsif lex.state.allbits?(Ripper::EXPR_BEG)
-          in_oneliner_def = :BODY if lex.token == "="
-        elsif lex.state.allbits?(Ripper::EXPR_END)
-          # We found an endless method, count it
-          oneliner_count += 1 if in_oneliner_def == :BODY
-          in_oneliner_def = nil
-        else
-          in_oneliner_def = nil
-        end
+      @tokens.each do |token|
+        kw_count += 1 if token.is_kw?
+        end_count += 1 if token.is_end?
       end
-      kw_count -= oneliner_count
       @is_kw = (kw_count - end_count) > 0
       @is_end = (end_count - kw_count) > 0
     end

data/lib/syntax_suggest/core_ext.rb CHANGED Viewed

@@ -1,96 +1,47 @@
 # frozen_string_literal: true
-# Ruby 3.2+ has a cleaner way to hook into Ruby that doesn't use `require`
-if SyntaxError.method_defined?(:detailed_message)
-  module SyntaxSuggest
-    # SyntaxSuggest.module_for_detailed_message [Private]
-    #
-    # Used to monkeypatch SyntaxError via Module.prepend
-    def self.module_for_detailed_message
-      Module.new {
-        def detailed_message(highlight: true, syntax_suggest: true, **kwargs)
-          return super unless syntax_suggest
-          require "syntax_suggest/api" unless defined?(SyntaxSuggest::DEFAULT_VALUE)
-          message = super
-          if path
-            file = Pathname.new(path)
-            io = SyntaxSuggest::MiniStringIO.new
-            SyntaxSuggest.call(
-              io: io,
-              source: file.read,
-              filename: file,
-              terminal: highlight
-            )
-            annotation = io.string
-            annotation += "\n" unless annotation.end_with?("\n")
-            annotation + message
-          else
-            message
-          end
-        rescue => e
-          if ENV["SYNTAX_SUGGEST_DEBUG"]
-            $stderr.warn(e.message)
-            $stderr.warn(e.backtrace)
-          end
-          # Ignore internal errors
+module SyntaxSuggest
+  # SyntaxSuggest.module_for_detailed_message [Private]
+  #
+  # Used to monkeypatch SyntaxError via Module.prepend
+  def self.module_for_detailed_message
+    Module.new {
+      def detailed_message(highlight: true, syntax_suggest: true, **kwargs)
+        return super unless syntax_suggest
+        require "syntax_suggest/api" unless defined?(SyntaxSuggest::DEFAULT_VALUE)
+        message = super
+        if path
+          file = Pathname.new(path)
+          io = SyntaxSuggest::MiniStringIO.new
+          SyntaxSuggest.call(
+            io: io,
+            source: file.read,
+            filename: file,
+            terminal: highlight
+          )
+          annotation = io.string
+          annotation += "\n" unless annotation.end_with?("\n")
+          annotation + message
+        else
           message
         end
-      }
-    end
-  end
-  SyntaxError.prepend(SyntaxSuggest.module_for_detailed_message)
-else
-  autoload :Pathname, "pathname"
-  #--
-  # Monkey patch kernel to ensure that all `require` calls call the same
-  # method
-  #++
-  module Kernel
-    # :stopdoc:
-    module_function
-    alias_method :syntax_suggest_original_require, :require
-    alias_method :syntax_suggest_original_require_relative, :require_relative
-    alias_method :syntax_suggest_original_load, :load
-    def load(file, wrap = false)
-      syntax_suggest_original_load(file)
-    rescue SyntaxError => e
-      require "syntax_suggest/api" unless defined?(SyntaxSuggest::DEFAULT_VALUE)
-      SyntaxSuggest.handle_error(e)
-    end
-    def require(file)
-      syntax_suggest_original_require(file)
-    rescue SyntaxError => e
-      require "syntax_suggest/api" unless defined?(SyntaxSuggest::DEFAULT_VALUE)
-      SyntaxSuggest.handle_error(e)
-    end
+      rescue => e
+        if ENV["SYNTAX_SUGGEST_DEBUG"]
+          $stderr.warn(e.message)
+          $stderr.warn(e.backtrace)
+        end
-    def require_relative(file)
-      if Pathname.new(file).absolute?
-        syntax_suggest_original_require file
-      else
-        relative_from = caller_locations(1..1).first
-        relative_from_path = relative_from.absolute_path || relative_from.path
-        syntax_suggest_original_require File.expand_path("../#{file}", relative_from_path)
+        # Ignore internal errors
+        message
       end
-    rescue SyntaxError => e
-      require "syntax_suggest/api" unless defined?(SyntaxSuggest::DEFAULT_VALUE)
-      SyntaxSuggest.handle_error(e)
-    end
+    }
   end
 end
+SyntaxError.prepend(SyntaxSuggest.module_for_detailed_message)

data/lib/syntax_suggest/explain_syntax.rb CHANGED Viewed

@@ -1,19 +1,11 @@
 # frozen_string_literal: true
-require_relative "left_right_lex_count"
-if !SyntaxSuggest.use_prism_parser?
-  require_relative "ripper_errors"
-end
+require_relative "left_right_token_count"
 module SyntaxSuggest
   class GetParseErrors
     def self.errors(source)
-      if SyntaxSuggest.use_prism_parser?
-        Prism.parse(source).errors.map(&:message)
-      else
-        RipperErrors.new(source).call.errors
-      end
+      Prism.parse(source).errors.map(&:message)
     end
   end
@@ -53,14 +45,14 @@ module SyntaxSuggest
     def initialize(code_lines:)
       @code_lines = code_lines
-      @left_right = LeftRightLexCount.new
+      @left_right = LeftRightTokenCount.new
       @missing = nil
     end
     def call
       @code_lines.each do |line|
-        line.lex.each do |lex|
-          @left_right.count_lex(lex)
+        line.tokens.each do |token|
+          @left_right.count_token(token)
         end
       end

data/lib/syntax_suggest/{left_right_lex_count.rb → left_right_token_count.rb} RENAMED Viewed

@@ -9,19 +9,19 @@ module SyntaxSuggest
   #
   # Example:
   #
-  #   left_right = LeftRightLexCount.new
+  #   left_right = LeftRightTokenCount.new
   #   left_right.count_kw
   #   left_right.missing.first
   #   # => "end"
   #
-  #   left_right = LeftRightLexCount.new
+  #   left_right = LeftRightTokenCount.new
   #   source = "{ a: b, c: d" # Note missing '}'
-  #   LexAll.new(source: source).each do |lex|
-  #     left_right.count_lex(lex)
+  #   LexAll.new(source: source).each do |token|
+  #     left_right.count_token(token)
   #   end
   #   left_right.missing.first
   #   # => "}"
-  class LeftRightLexCount
+  class LeftRightTokenCount
     def initialize
       @kw_count = 0
       @end_count = 0
@@ -49,52 +49,46 @@ module SyntaxSuggest
     #
     # Example:
     #
-    #   left_right = LeftRightLexCount.new
-    #   left_right.count_lex(LexValue.new(1, :on_lbrace, "{", Ripper::EXPR_BEG))
+    #   token = CodeLine.from_source("{").first.tokens.first
+    #   left_right = LeftRightTokenCount.new
+    #   left_right.count_token(Token.new(token)
     #   left_right.count_for_char("{")
     #   # => 1
     #   left_right.count_for_char("}")
     #   # => 0
-    def count_lex(lex)
-      case lex.type
-      when :on_tstring_content
+    def count_token(token)
+      case token.type
+      when :STRING_CONTENT
         # ^^^
         # Means it's a string or a symbol `"{"` rather than being
         # part of a data structure (like a hash) `{ a: b }`
         # ignore it.
-      when :on_words_beg, :on_symbos_beg, :on_qwords_beg,
-           :on_qsymbols_beg, :on_regexp_beg, :on_tstring_beg
+      when :PERCENT_UPPER_W, :PERCENT_UPPER_I, :PERCENT_LOWER_W,
+           :PERCENT_LOWER_I, :REGEXP_BEGIN, :STRING_BEGIN
         # ^^^
         # Handle shorthand syntaxes like `%Q{ i am a string }`
         #
         # The start token will be the full thing `%Q{` but we
         # need to count it as if it's a `{`. Any token
         # can be used
-        char = lex.token[-1]
+        char = token.value[-1]
         @count_for_char[char] += 1 if @count_for_char.key?(char)
-      when :on_embexpr_beg
+      when :EMBEXPR_BEGIN
         # ^^^
         # Embedded string expressions like `"#{foo} <-embed"`
         # are parsed with chars:
         #
-        # `#{` as :on_embexpr_beg
-        #  `}` as :on_embexpr_end
-        #
-        # We cannot ignore both :on_emb_expr_beg and :on_embexpr_end
-        # because sometimes the lexer thinks something is an embed
-        # string end, when it is not like `lol = }` (no clue why).
+        # `#{` as :EMBEXPR_BEGIN
+        #  `}` as :EMBEXPR_END
         #
         # When we see `#{` count it as a `{` or we will
         # have a mis-match count.
         #
-        case lex.token
-        when "\#{"
-          @count_for_char["{"] += 1
-        end
+        @count_for_char["{"] += 1
       else
-        @end_count += 1 if lex.is_end?
-        @kw_count += 1 if lex.is_kw?
-        @count_for_char[lex.token] += 1 if @count_for_char.key?(lex.token)
+        @end_count += 1 if token.is_end?
+        @kw_count += 1 if token.is_kw?
+        @count_for_char[token.value] += 1 if @count_for_char.key?(token.value)
       end
     end

data/lib/syntax_suggest/token.rb ADDED Viewed

@@ -0,0 +1,49 @@
+# frozen_string_literal: true
+module SyntaxSuggest
+  # Value object for accessing lex values
+  #
+  # This lex:
+  #
+  #   [IDENTIFIER(1,0)-(1,8)("describe"), 32]
+  #
+  # Would translate into:
+  #
+  #  lex.location # => (1,0)-(1,8)
+  #  lex.type # => :IDENTIFIER
+  #  lex.token # => "describe"
+  class Token
+    attr_reader :location, :type, :value
+    KW_TYPES = %i[
+      KEYWORD_IF KEYWORD_UNLESS KEYWORD_WHILE KEYWORD_UNTIL
+      KEYWORD_DEF KEYWORD_CASE KEYWORD_FOR KEYWORD_BEGIN KEYWORD_CLASS KEYWORD_MODULE KEYWORD_DO KEYWORD_DO_LOOP
+    ].to_set.freeze
+    private_constant :KW_TYPES
+    def initialize(prism_token, previous_prism_token, visitor)
+      @location = prism_token.location
+      @type = prism_token.type
+      @value = prism_token.value
+      # Prism lexes `:module` as SYMBOL_BEGIN, KEYWORD_MODULE
+      # https://github.com/ruby/prism/issues/3940
+      symbol_content = previous_prism_token&.type == :SYMBOL_BEGIN
+      @is_kw = KW_TYPES.include?(@type)
+      @is_kw = false if symbol_content || visitor.endless_def_keyword_offsets.include?(@location.start_offset)
+      @is_end = @type == :KEYWORD_END
+    end
+    def line
+      @location.start_line
+    end
+    def is_end?
+      @is_end
+    end
+    def is_kw?
+      @is_kw
+    end
+  end
+end

data/lib/syntax_suggest/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module SyntaxSuggest
-  VERSION = "2.0.3"
+  VERSION = "3.0.0"
 end

data/lib/syntax_suggest/visitor.rb ADDED Viewed

@@ -0,0 +1,80 @@
+# frozen_string_literal: true
+module SyntaxSuggest
+  # Walks the Prism AST to extract structural info that cannot be reliably determined from tokens
+  # alone.
+  #
+  # Such as the location of lines that must be logically joined so the search algorithm will
+  # treat them as one. Example:
+  #
+  #   source = <<~RUBY
+  #     User                        # 1
+  #       .where(name: "Earlopain") # 2
+  #       .first                    # 3
+  #   RUBY
+  #   ast, _tokens = Prism.parse_lex(source).value
+  #   visitor = Visitor.new
+  #   visitor.visit(ast)
+  #   visitor.consecutive_lines # => Set[2, 1]
+  #
+  # This output means that line 1 and line 2 need to be joined with their next line.
+  #
+  # And determining the location of "endless" method definitions. For example:
+  #
+  #   source = <<~RUBY
+  #     def cube(x)
+  #       x * x * x
+  #     end
+  #     def square(x) = x * x # 1
+  #   RUBY
+  #
+  #   ast, _tokens = Prism.parse_lex(source).value
+  #   visitor = Visitor.new
+  #   visitor.visit(ast)
+  #   visitor.endless_def_keyword_offsets # => Set[28]
+  class Visitor < Prism::Visitor
+    attr_reader :endless_def_keyword_offsets, :consecutive_lines
+    def initialize
+      @endless_def_keyword_offsets = Set.new
+      @consecutive_lines = Set.new
+    end
+    # Called by Prism::Visitor for every method-call node in the AST
+    # (e.g. `foo.bar`, `foo.bar.baz`).
+    def visit_call_node(node)
+      receiver_loc = node.receiver&.location
+      call_operator_loc = node.call_operator_loc
+      message_loc = node.message_loc
+      if receiver_loc && call_operator_loc && message_loc
+        # dot-leading (dot on the next line)
+        #   foo        # line 1 - consecutive
+        #     .bar     # line 2
+        if receiver_loc.end_line != call_operator_loc.start_line && call_operator_loc.start_line == message_loc.start_line
+          (receiver_loc.end_line..call_operator_loc.start_line - 1).each do |line|
+            @consecutive_lines << line
+          end
+        end
+        # dot-trailing (dot on the same line as the receiver)
+        #   foo.       # line 1 - consecutive
+        #     bar      # line 2
+        if receiver_loc.end_line == call_operator_loc.start_line && call_operator_loc.start_line != message_loc.start_line
+          (call_operator_loc.start_line..message_loc.start_line - 1).each do |line|
+            @consecutive_lines << line
+          end
+        end
+      end
+      super
+    end
+    # Called by Prism::Visitor for every `def` node in the AST.
+    # Records the keyword start location for endless method definitions
+    # like `def foo = 123`. These are valid without a matching `end`,
+    # so Token must exclude them when deciding if a line is a keyword.
+    def visit_def_node(node)
+      @endless_def_keyword_offsets << node.def_keyword_loc.start_offset if node.equal_loc
+      super
+    end
+  end
+end

data/syntax_suggest.gemspec CHANGED Viewed

@@ -16,7 +16,7 @@ Gem::Specification.new do |spec|
   spec.description = 'When you get an "unexpected end" in your syntax this gem helps you find it'
   spec.homepage = "https://github.com/ruby/syntax_suggest.git"
   spec.license = "MIT"
-  spec.required_ruby_version = Gem::Requirement.new(">= 3.0.0")
+  spec.required_ruby_version = Gem::Requirement.new(">= 3.3.0")
   spec.metadata["homepage_uri"] = spec.homepage
   spec.metadata["source_code_uri"] = "https://github.com/ruby/syntax_suggest.git"

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: syntax_suggest
 version: !ruby/object:Gem::Version
-  version: 2.0.3
+  version: 3.0.0
 platform: ruby
 authors:
 - schneems
@@ -54,18 +54,17 @@ files:
 - lib/syntax_suggest/display_code_with_line_numbers.rb
 - lib/syntax_suggest/display_invalid_blocks.rb
 - lib/syntax_suggest/explain_syntax.rb
-- lib/syntax_suggest/left_right_lex_count.rb
-- lib/syntax_suggest/lex_all.rb
-- lib/syntax_suggest/lex_value.rb
+- lib/syntax_suggest/left_right_token_count.rb
 - lib/syntax_suggest/mini_stringio.rb
 - lib/syntax_suggest/parse_blocks_from_indent_line.rb
 - lib/syntax_suggest/pathname_from_message.rb
 - lib/syntax_suggest/priority_engulf_queue.rb
 - lib/syntax_suggest/priority_queue.rb
-- lib/syntax_suggest/ripper_errors.rb
 - lib/syntax_suggest/scan_history.rb
+- lib/syntax_suggest/token.rb
 - lib/syntax_suggest/unvisited_lines.rb
 - lib/syntax_suggest/version.rb
+- lib/syntax_suggest/visitor.rb
 - syntax_suggest.gemspec
 homepage: https://github.com/ruby/syntax_suggest.git
 licenses:
@@ -80,7 +79,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
-      version: 3.0.0
+      version: 3.3.0
 required_rubygems_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="

data/lib/syntax_suggest/lex_all.rb DELETED Viewed

@@ -1,74 +0,0 @@
-# frozen_string_literal: true
-module SyntaxSuggest
-  # Ripper.lex is not guaranteed to lex the entire source document
-  #
-  # This class guarantees the whole document is lex-ed by iteratively
-  # lexing the document where ripper stopped.
-  #
-  # Prism likely doesn't have the same problem. Once ripper support is removed
-  # we can likely reduce the complexity here if not remove the whole concept.
-  #
-  # Example usage:
-  #
-  #   lex = LexAll.new(source: source)
-  #   lex.each do |value|
-  #     puts value.line
-  #   end
-  class LexAll
-    include Enumerable
-    def initialize(source:, source_lines: nil)
-      @lex = self.class.lex(source, 1)
-      lineno = @lex.last[0][0] + 1
-      source_lines ||= source.lines
-      last_lineno = source_lines.length
-      until lineno >= last_lineno
-        lines = source_lines[lineno..]
-        @lex.concat(
-          self.class.lex(lines.join, lineno + 1)
-        )
-        lineno = @lex.last[0].first + 1
-      end
-      last_lex = nil
-      @lex.map! { |elem|
-        last_lex = LexValue.new(elem[0].first, elem[1], elem[2], elem[3], last_lex)
-      }
-    end
-    if SyntaxSuggest.use_prism_parser?
-      def self.lex(source, line_number)
-        Prism.lex_compat(source, line: line_number).value.sort_by { |values| values[0] }
-      end
-    else
-      def self.lex(source, line_number)
-        Ripper::Lexer.new(source, "-", line_number).parse.sort_by(&:pos)
-      end
-    end
-    def to_a
-      @lex
-    end
-    def each
-      return @lex.each unless block_given?
-      @lex.each do |x|
-        yield x
-      end
-    end
-    def [](index)
-      @lex[index]
-    end
-    def last
-      @lex.last
-    end
-  end
-end
-require_relative "lex_value"

data/lib/syntax_suggest/lex_value.rb DELETED Viewed

@@ -1,70 +0,0 @@
-# frozen_string_literal: true
-module SyntaxSuggest
-  # Value object for accessing lex values
-  #
-  # This lex:
-  #
-  #   [1, 0], :on_ident, "describe", CMDARG
-  #
-  # Would translate into:
-  #
-  #  lex.line # => 1
-  #  lex.type # => :on_indent
-  #  lex.token # => "describe"
-  class LexValue
-    attr_reader :line, :type, :token, :state
-    def initialize(line, type, token, state, last_lex = nil)
-      @line = line
-      @type = type
-      @token = token
-      @state = state
-      set_kw_end(last_lex)
-    end
-    private def set_kw_end(last_lex)
-      @is_end = false
-      @is_kw = false
-      return if type != :on_kw
-      return if last_lex && last_lex.fname? # https://github.com/ruby/ruby/commit/776759e300e4659bb7468e2b97c8c2d4359a2953
-      case token
-      when "if", "unless", "while", "until"
-        # Only count if/unless when it's not a "trailing" if/unless
-        # https://github.com/ruby/ruby/blob/06b44f819eb7b5ede1ff69cecb25682b56a1d60c/lib/irb/ruby-lex.rb#L374-L375
-        @is_kw = true unless expr_label?
-      when "def", "case", "for", "begin", "class", "module", "do"
-        @is_kw = true
-      when "end"
-        @is_end = true
-      end
-    end
-    def fname?
-      state.allbits?(Ripper::EXPR_FNAME)
-    end
-    def ignore_newline?
-      type == :on_ignored_nl
-    end
-    def is_end?
-      @is_end
-    end
-    def is_kw?
-      @is_kw
-    end
-    def expr_beg?
-      state.anybits?(Ripper::EXPR_BEG)
-    end
-    def expr_label?
-      state.allbits?(Ripper::EXPR_LABEL)
-    end
-  end
-end

data/lib/syntax_suggest/ripper_errors.rb DELETED Viewed

@@ -1,39 +0,0 @@
-# frozen_string_literal: true
-module SyntaxSuggest
-  # Capture parse errors from Ripper
-  #
-  # Prism returns the errors with their messages, but Ripper
-  # does not. To get them we must make a custom subclass.
-  #
-  # Example:
-  #
-  #   puts RipperErrors.new(" def foo").call.errors
-  #   # => ["syntax error, unexpected end-of-input, expecting ';' or '\\n'"]
-  class RipperErrors < Ripper
-    attr_reader :errors
-    # Comes from ripper, called
-    # on every parse error, msg
-    # is a string
-    def on_parse_error(msg)
-      @errors ||= []
-      @errors << msg
-    end
-    alias_method :on_alias_error, :on_parse_error
-    alias_method :on_assign_error, :on_parse_error
-    alias_method :on_class_name_error, :on_parse_error
-    alias_method :on_param_error, :on_parse_error
-    alias_method :compile_error, :on_parse_error
-    def call
-      @run_once ||= begin
-        @errors = []
-        parse
-        true
-      end
-      self
-    end
-  end
-end