RubyGems - syntax_suggest - Versions diffs - 0.0.1 - Mend

syntax_suggest 0.0.1

Files changed (43) hide show

checksums.yaml +7 -0
data/.circleci/config.yml +91 -0
data/.github/workflows/check_changelog.yml +20 -0
data/.gitignore +14 -0
data/.rspec +3 -0
data/.standard.yml +1 -0
data/CHANGELOG.md +158 -0
data/CODE_OF_CONDUCT.md +74 -0
data/Gemfile +14 -0
data/Gemfile.lock +67 -0
data/LICENSE.txt +21 -0
data/README.md +229 -0
data/Rakefile +8 -0
data/bin/console +14 -0
data/bin/setup +8 -0
data/dead_end.gemspec +32 -0
data/exe/syntax_suggest +7 -0
data/lib/syntax_suggest/api.rb +199 -0
data/lib/syntax_suggest/around_block_scan.rb +224 -0
data/lib/syntax_suggest/block_expand.rb +74 -0
data/lib/syntax_suggest/capture_code_context.rb +233 -0
data/lib/syntax_suggest/clean_document.rb +304 -0
data/lib/syntax_suggest/cli.rb +129 -0
data/lib/syntax_suggest/code_block.rb +100 -0
data/lib/syntax_suggest/code_frontier.rb +178 -0
data/lib/syntax_suggest/code_line.rb +239 -0
data/lib/syntax_suggest/code_search.rb +139 -0
data/lib/syntax_suggest/core_ext.rb +101 -0
data/lib/syntax_suggest/display_code_with_line_numbers.rb +70 -0
data/lib/syntax_suggest/display_invalid_blocks.rb +84 -0
data/lib/syntax_suggest/explain_syntax.rb +103 -0
data/lib/syntax_suggest/left_right_lex_count.rb +168 -0
data/lib/syntax_suggest/lex_all.rb +55 -0
data/lib/syntax_suggest/lex_value.rb +70 -0
data/lib/syntax_suggest/parse_blocks_from_indent_line.rb +60 -0
data/lib/syntax_suggest/pathname_from_message.rb +59 -0
data/lib/syntax_suggest/priority_engulf_queue.rb +63 -0
data/lib/syntax_suggest/priority_queue.rb +105 -0
data/lib/syntax_suggest/ripper_errors.rb +36 -0
data/lib/syntax_suggest/unvisited_lines.rb +36 -0
data/lib/syntax_suggest/version.rb +5 -0
data/lib/syntax_suggest.rb +3 -0
metadata +88 -0

data/lib/syntax_suggest/clean_document.rb ADDED Viewed

@@ -0,0 +1,304 @@
+# frozen_string_literal: true
+module SyntaxSuggest
+  # Parses and sanitizes source into a lexically aware document
+  #
+  # Internally the document is represented by an array with each
+  # index containing a CodeLine correlating to a line from the source code.
+  #
+  # There are three main phases in the algorithm:
+  #
+  # 1. Sanitize/format input source
+  # 2. Search for invalid blocks
+  # 3. Format invalid blocks into something meaninful
+  #
+  # This class handles the first part.
+  #
+  # The reason this class exists is to format input source
+  # for better/easier/cleaner exploration.
+  #
+  # The CodeSearch class operates at the line level so
+  # we must be careful to not introduce lines that look
+  # valid by themselves, but when removed will trigger syntax errors
+  # or strange behavior.
+  #
+  # ## Join Trailing slashes
+  #
+  # Code with a trailing slash is logically treated as a single line:
+  #
+  #     1 it "code can be split" \
+  #     2    "across multiple lines" do
+  #
+  # In this case removing line 2 would add a syntax error. We get around
+  # this by internally joining the two lines into a single "line" object
+  #
+  # ## Logically Consecutive lines
+  #
+  # Code that can be broken over multiple
+  # lines such as method calls are on different lines:
+  #
+  #     1 User.
+  #     2   where(name: "schneems").
+  #     3   first
+  #
+  # Removing line 2 can introduce a syntax error. To fix this, all lines
+  # are joined into one.
+  #
+  # ## Heredocs
+  #
+  # A heredoc is an way of defining a multi-line string. They can cause many
+  # problems. If left as a single line, Ripper would try to parse the contents
+  # as ruby code rather than as a string. Even without this problem, we still
+  # hit an issue with indentation
+  #
+  #    1 foo = <<~HEREDOC
+  #    2  "Be yourself; everyone else is already taken.""
+  #    3    ― Oscar Wilde
+  #    4      puts "I look like ruby code" # but i'm still a heredoc
+  #    5 HEREDOC
+  #
+  # If we didn't join these lines then our algorithm would think that line 4
+  # is separate from the rest, has a higher indentation, then look at it first
+  # and remove it.
+  #
+  # If the code evaluates line 5 by itself it will think line 5 is a constant,
+  # remove it, and introduce a syntax errror.
+  #
+  # All of these problems are fixed by joining the whole heredoc into a single
+  # line.
+  #
+  # ## Comments and whitespace
+  #
+  # Comments can throw off the way the lexer tells us that the line
+  # logically belongs with the next line. This is valid ruby but
+  # results in a different lex output than before:
+  #
+  #     1 User.
+  #     2   where(name: "schneems").
+  #     3   # Comment here
+  #     4   first
+  #
+  # To handle this we can replace comment lines with empty lines
+  # and then re-lex the source. This removal and re-lexing preserves
+  # line index and document size, but generates an easier to work with
+  # document.
+  #
+  class CleanDocument
+    def initialize(source:)
+      lines = clean_sweep(source: source)
+      @document = CodeLine.from_source(lines.join, lines: lines)
+    end
+    # Call all of the document "cleaners"
+    # and return self
+    def call
+      join_trailing_slash!
+      join_consecutive!
+      join_heredoc!
+      self
+    end
+    # Return an array of CodeLines in the
+    # document
+    def lines
+      @document
+    end
+    # Renders the document back to a string
+    def to_s
+      @document.join
+    end
+    # Remove comments and whitespace only lines
+    #
+    # replace with empty newlines
+    #
+    #     source = <<~'EOM'
+    #       # Comment 1
+    #       puts "hello"
+    #       # Comment 2
+    #       puts "world"
+    #     EOM
+    #
+    #     lines = CleanDocument.new(source: source).lines
+    #     expect(lines[0].to_s).to eq("\n")
+    #     expect(lines[1].to_s).to eq("puts "hello")
+    #     expect(lines[2].to_s).to eq("\n")
+    #     expect(lines[3].to_s).to eq("puts "world")
+    #
+    # Important: This must be done before lexing.
+    #
+    # After this change is made, we lex the document because
+    # removing comments can change how the doc is parsed.
+    #
+    # For example:
+    #
+    #     values = LexAll.new(source: <<~EOM))
+    #       User.
+    #         # comment
+    #         where(name: 'schneems')
+    #     EOM
+    #     expect(
+    #       values.count {|v| v.type == :on_ignored_nl}
+    #     ).to eq(1)
+    #
+    # After the comment is removed:
+    #
+    #     values = LexAll.new(source: <<~EOM))
+    #       User.
+    #
+    #         where(name: 'schneems')
+    #     EOM
+    #     expect(
+    #      values.count {|v| v.type == :on_ignored_nl}
+    #    ).to eq(2)
+    #
+    def clean_sweep(source:)
+      source.lines.map do |line|
+        if line.match?(/^\s*(#[^{].*)?$/) # https://rubular.com/r/LLE10D8HKMkJvs
+          $/
+        else
+          line
+        end
+      end
+    end
+    # Smushes all heredoc lines into one line
+    #
+    #     source = <<~'EOM'
+    #       foo = <<~HEREDOC
+    #          lol
+    #          hehehe
+    #       HEREDOC
+    #     EOM
+    #
+    #     lines = CleanDocument.new(source: source).join_heredoc!.lines
+    #     expect(lines[0].to_s).to eq(source)
+    #     expect(lines[1].to_s).to eq("")
+    def join_heredoc!
+      start_index_stack = []
+      heredoc_beg_end_index = []
+      lines.each do |line|
+        line.lex.each do |lex_value|
+          case lex_value.type
+          when :on_heredoc_beg
+            start_index_stack << line.index
+          when :on_heredoc_end
+            start_index = start_index_stack.pop
+            end_index = line.index
+            heredoc_beg_end_index << [start_index, end_index]
+          end
+        end
+      end
+      heredoc_groups = heredoc_beg_end_index.map { |start_index, end_index| @document[start_index..end_index] }
+      join_groups(heredoc_groups)
+      self
+    end
+    # Smushes logically "consecutive" lines
+    #
+    #     source = <<~'EOM'
+    #       User.
+    #         where(name: 'schneems').
+    #         first
+    #     EOM
+    #
+    #     lines = CleanDocument.new(source: source).join_consecutive!.lines
+    #     expect(lines[0].to_s).to eq(source)
+    #     expect(lines[1].to_s).to eq("")
+    #
+    # The one known case this doesn't handle is:
+    #
+    #     Ripper.lex <<~EOM
+    #       a &&
+    #        b ||
+    #        c
+    #     EOM
+    #
+    # For some reason this introduces `on_ignore_newline` but with BEG type
+    #
+    def join_consecutive!
+      consecutive_groups = @document.select(&:ignore_newline_not_beg?).map do |code_line|
+        take_while_including(code_line.index..-1) do |line|
+          line.ignore_newline_not_beg?
+        end
+      end
+      join_groups(consecutive_groups)
+      self
+    end
+    # Join lines with a trailing slash
+    #
+    #     source = <<~'EOM'
+    #       it "code can be split" \
+    #          "across multiple lines" do
+    #     EOM
+    #
+    #     lines = CleanDocument.new(source: source).join_consecutive!.lines
+    #     expect(lines[0].to_s).to eq(source)
+    #     expect(lines[1].to_s).to eq("")
+    def join_trailing_slash!
+      trailing_groups = @document.select(&:trailing_slash?).map do |code_line|
+        take_while_including(code_line.index..-1) { |x| x.trailing_slash? }
+      end
+      join_groups(trailing_groups)
+      self
+    end
+    # Helper method for joining "groups" of lines
+    #
+    # Input is expected to be type Array<Array<CodeLine>>
+    #
+    # The outer array holds the various "groups" while the
+    # inner array holds code lines.
+    #
+    # All code lines are "joined" into the first line in
+    # their group.
+    #
+    # To preserve document size, empty lines are placed
+    # in the place of the lines that were "joined"
+    def join_groups(groups)
+      groups.each do |lines|
+        line = lines.first
+        # Handle the case of multiple groups in a a row
+        # if one is already replaced, move on
+        next if @document[line.index].empty?
+        # Join group into the first line
+        @document[line.index] = CodeLine.new(
+          lex: lines.map(&:lex).flatten,
+          line: lines.join,
+          index: line.index
+        )
+        # Hide the rest of the lines
+        lines[1..-1].each do |line|
+          # The above lines already have newlines in them, if add more
+          # then there will be double newline, use an empty line instead
+          @document[line.index] = CodeLine.new(line: "", index: line.index, lex: [])
+        end
+      end
+      self
+    end
+    # Helper method for grabbing elements from document
+    #
+    # Like `take_while` except when it stops
+    # iterating, it also returns the line
+    # that caused it to stop
+    def take_while_including(range = 0..-1)
+      take_next_and_stop = false
+      @document[range].take_while do |line|
+        next if take_next_and_stop
+        take_next_and_stop = !(yield line)
+        true
+      end
+    end
+  end
+end

data/lib/syntax_suggest/cli.rb ADDED Viewed

@@ -0,0 +1,129 @@
+# frozen_string_literal: true
+require "pathname"
+require "optparse"
+module SyntaxSuggest
+  # All the logic of the exe/syntax_suggest CLI in one handy spot
+  #
+  #   Cli.new(argv: ["--help"]).call
+  #   Cli.new(argv: ["<path/to/file>.rb"]).call
+  #   Cli.new(argv: ["<path/to/file>.rb", "--record=tmp"]).call
+  #   Cli.new(argv: ["<path/to/file>.rb", "--terminal"]).call
+  #
+  class Cli
+    attr_accessor :options
+    # ARGV is Everything passed to the executable, does not include executable name
+    #
+    # All other intputs are dependency injection for testing
+    def initialize(argv:, exit_obj: Kernel, io: $stdout, env: ENV)
+      @options = {}
+      @parser = nil
+      options[:record_dir] = env["SYNTAX_SUGGEST_RECORD_DIR"]
+      options[:record_dir] = "tmp" if env["DEBUG"]
+      options[:terminal] = SyntaxSuggest::DEFAULT_VALUE
+      @io = io
+      @argv = argv
+      @exit_obj = exit_obj
+    end
+    def call
+      if @argv.empty?
+        # Display help if raw command
+        parser.parse! %w[--help]
+        return
+      else
+        # Mutates @argv
+        parse
+        return if options[:exit]
+      end
+      file_name = @argv.first
+      if file_name.nil?
+        @io.puts "No file given"
+        @exit_obj.exit(1)
+        return
+      end
+      file = Pathname(file_name)
+      if !file.exist?
+        @io.puts "file not found: #{file.expand_path} "
+        @exit_obj.exit(1)
+        return
+      end
+      @io.puts "Record dir: #{options[:record_dir]}" if options[:record_dir]
+      display = SyntaxSuggest.call(
+        io: @io,
+        source: file.read,
+        filename: file.expand_path,
+        terminal: options.fetch(:terminal, SyntaxSuggest::DEFAULT_VALUE),
+        record_dir: options[:record_dir]
+      )
+      if display.document_ok?
+        @exit_obj.exit(0)
+      else
+        @exit_obj.exit(1)
+      end
+    end
+    def parse
+      parser.parse!(@argv)
+      self
+    end
+    def parser
+      @parser ||= OptionParser.new do |opts|
+        opts.banner = <<~EOM
+          Usage: syntax_suggest <file> [options]
+          Parses a ruby source file and searches for syntax error(s) such as
+          unexpected `end', expecting end-of-input.
+          Example:
+            $ syntax_suggest dog.rb
+            # ...
+              ❯ 10  defdog
+              ❯ 15  end
+          ENV options:
+            SYNTAX_SUGGEST_RECORD_DIR=<dir>
+            Records the steps used to search for a syntax error
+            to the given directory
+          Options:
+        EOM
+        opts.version = SyntaxSuggest::VERSION
+        opts.on("--help", "Help - displays this message") do |v|
+          @io.puts opts
+          options[:exit] = true
+          @exit_obj.exit
+        end
+        opts.on("--record <dir>", "Records the steps used to search for a syntax error to the given directory") do |v|
+          options[:record_dir] = v
+        end
+        opts.on("--terminal", "Enable terminal highlighting") do |v|
+          options[:terminal] = true
+        end
+        opts.on("--no-terminal", "Disable terminal highlighting") do |v|
+          options[:terminal] = false
+        end
+      end
+    end
+  end
+end

data/lib/syntax_suggest/code_block.rb ADDED Viewed

@@ -0,0 +1,100 @@
+# frozen_string_literal: true
+module SyntaxSuggest
+  # Multiple lines form a singular CodeBlock
+  #
+  # Source code is made of multiple CodeBlocks.
+  #
+  # Example:
+  #
+  #   code_block.to_s # =>
+  #     #   def foo
+  #     #     puts "foo"
+  #     #   end
+  #
+  #   code_block.valid? # => true
+  #   code_block.in_valid? # => false
+  #
+  #
+  class CodeBlock
+    UNSET = Object.new.freeze
+    attr_reader :lines, :starts_at, :ends_at
+    def initialize(lines: [])
+      @lines = Array(lines)
+      @valid = UNSET
+      @deleted = false
+      @starts_at = @lines.first.number
+      @ends_at = @lines.last.number
+    end
+    def delete
+      @deleted = true
+    end
+    def deleted?
+      @deleted
+    end
+    def visible_lines
+      @lines.select(&:visible?).select(&:not_empty?)
+    end
+    def mark_invisible
+      @lines.map(&:mark_invisible)
+    end
+    def is_end?
+      to_s.strip == "end"
+    end
+    def hidden?
+      @lines.all?(&:hidden?)
+    end
+    # This is used for frontier ordering, we are searching from
+    # the largest indentation to the smallest. This allows us to
+    # populate an array with multiple code blocks then call `sort!`
+    # on it without having to specify the sorting criteria
+    def <=>(other)
+      out = current_indent <=> other.current_indent
+      return out if out != 0
+      # Stable sort
+      starts_at <=> other.starts_at
+    end
+    def current_indent
+      @current_indent ||= lines.select(&:not_empty?).map(&:indent).min || 0
+    end
+    def invalid?
+      !valid?
+    end
+    def valid?
+      if @valid == UNSET
+        # Performance optimization
+        #
+        # If all the lines were previously hidden
+        # and we expand to capture additional empty
+        # lines then the result cannot be invalid
+        #
+        # That means there's no reason to re-check all
+        # lines with ripper (which is expensive).
+        # Benchmark in commit message
+        @valid = if lines.all? { |l| l.hidden? || l.empty? }
+          true
+        else
+          SyntaxSuggest.valid?(lines.map(&:original).join)
+        end
+      else
+        @valid
+      end
+    end
+    def to_s
+      @lines.join
+    end
+  end
+end

data/lib/syntax_suggest/code_frontier.rb ADDED Viewed

@@ -0,0 +1,178 @@
+# frozen_string_literal: true
+module SyntaxSuggest
+  # The main function of the frontier is to hold the edges of our search and to
+  # evaluate when we can stop searching.
+  # There are three main phases in the algorithm:
+  #
+  # 1. Sanitize/format input source
+  # 2. Search for invalid blocks
+  # 3. Format invalid blocks into something meaninful
+  #
+  # The Code frontier is a critical part of the second step
+  #
+  # ## Knowing where we've been
+  #
+  # Once a code block is generated it is added onto the frontier. Then it will be
+  # sorted by indentation and frontier can be filtered. Large blocks that fully enclose a
+  # smaller block will cause the smaller block to be evicted.
+  #
+  #   CodeFrontier#<<(block) # Adds block to frontier
+  #   CodeFrontier#pop # Removes block from frontier
+  #
+  # ## Knowing where we can go
+  #
+  # Internally the frontier keeps track of "unvisited" lines which are exposed via `next_indent_line`
+  # when called, this method returns, a line of code with the highest indentation.
+  #
+  # The returned line of code can be used to build a CodeBlock and then that code block
+  # is added back to the frontier. Then, the lines are removed from the
+  # "unvisited" so we don't double-create the same block.
+  #
+  #   CodeFrontier#next_indent_line # Shows next line
+  #   CodeFrontier#register_indent_block(block) # Removes lines from unvisited
+  #
+  # ## Knowing when to stop
+  #
+  # The frontier knows how to check the entire document for a syntax error. When blocks
+  # are added onto the frontier, they're removed from the document. When all code containing
+  # syntax errors has been added to the frontier, the document will be parsable without a
+  # syntax error and the search can stop.
+  #
+  #   CodeFrontier#holds_all_syntax_errors? # Returns true when frontier holds all syntax errors
+  #
+  # ## Filtering false positives
+  #
+  # Once the search is completed, the frontier may have multiple blocks that do not contain
+  # the syntax error. To limit the result to the smallest subset of "invalid blocks" call:
+  #
+  #   CodeFrontier#detect_invalid_blocks
+  #
+  class CodeFrontier
+    def initialize(code_lines:, unvisited: UnvisitedLines.new(code_lines: code_lines))
+      @code_lines = code_lines
+      @unvisited = unvisited
+      @queue = PriorityEngulfQueue.new
+      @check_next = true
+    end
+    def count
+      @queue.length
+    end
+    # Performance optimization
+    #
+    # Parsing with ripper is expensive
+    # If we know we don't have any blocks with invalid
+    # syntax, then we know we cannot have found
+    # the incorrect syntax yet.
+    #
+    # When an invalid block is added onto the frontier
+    # check document state
+    private def can_skip_check?
+      check_next = @check_next
+      @check_next = false
+      if check_next
+        false
+      else
+        true
+      end
+    end
+    # Returns true if the document is valid with all lines
+    # removed. By default it checks all blocks in present in
+    # the frontier array, but can be used for arbitrary arrays
+    # of codeblocks as well
+    def holds_all_syntax_errors?(block_array = @queue, can_cache: true)
+      return false if can_cache && can_skip_check?
+      without_lines = block_array.to_a.flat_map do |block|
+        block.lines
+      end
+      SyntaxSuggest.valid_without?(
+        without_lines: without_lines,
+        code_lines: @code_lines
+      )
+    end
+    # Returns a code block with the largest indentation possible
+    def pop
+      @queue.pop
+    end
+    def next_indent_line
+      @unvisited.peek
+    end
+    def expand?
+      return false if @queue.empty?
+      return true if @unvisited.empty?
+      frontier_indent = @queue.peek.current_indent
+      unvisited_indent = next_indent_line.indent
+      if ENV["SYNTAX_SUGGEST_DEBUG"]
+        puts "```"
+        puts @queue.peek.to_s
+        puts "```"
+        puts "  @frontier indent:  #{frontier_indent}"
+        puts "  @unvisited indent: #{unvisited_indent}"
+      end
+      # Expand all blocks before moving to unvisited lines
+      frontier_indent >= unvisited_indent
+    end
+    # Keeps track of what lines have been added to blocks and which are not yet
+    # visited.
+    def register_indent_block(block)
+      @unvisited.visit_block(block)
+      self
+    end
+    # When one element fully encapsulates another we remove the smaller
+    # block from the frontier. This prevents double expansions and all-around
+    # weird behavior. However this guarantee is quite expensive to maintain
+    def register_engulf_block(block)
+    end
+    # Add a block to the frontier
+    #
+    # This method ensures the frontier always remains sorted (in indentation order)
+    # and that each code block's lines are removed from the indentation hash so we
+    # don't re-evaluate the same line multiple times.
+    def <<(block)
+      @unvisited.visit_block(block)
+      @queue.push(block)
+      @check_next = true if block.invalid?
+      self
+    end
+    # Example:
+    #
+    #   combination([:a, :b, :c, :d])
+    #   # => [[:a], [:b], [:c], [:d], [:a, :b], [:a, :c], [:a, :d], [:b, :c], [:b, :d], [:c, :d], [:a, :b, :c], [:a, :b, :d], [:a, :c, :d], [:b, :c, :d], [:a, :b, :c, :d]]
+    def self.combination(array)
+      guesses = []
+      1.upto(array.length).each do |size|
+        guesses.concat(array.combination(size).to_a)
+      end
+      guesses
+    end
+    # Given that we know our syntax error exists somewhere in our frontier, we want to find
+    # the smallest possible set of blocks that contain all the syntax errors
+    def detect_invalid_blocks
+      self.class.combination(@queue.to_a.select(&:invalid?)).detect do |block_array|
+        holds_all_syntax_errors?(block_array, can_cache: false)
+      end || []
+    end
+  end
+end