RubyGems - syntax_search - Versions diffs - 0.1.0 - Mend

syntax_search 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

checksums.yaml +7 -0
data/.circleci/config.yml +41 -0
data/.gitignore +13 -0
data/.rspec +3 -0
data/.travis.yml +6 -0
data/CODE_OF_CONDUCT.md +74 -0
data/Gemfile +9 -0
data/Gemfile.lock +38 -0
data/LICENSE.txt +21 -0
data/README.md +98 -0
data/Rakefile +8 -0
data/bin/console +14 -0
data/bin/setup +8 -0
data/exe/syntax_search +73 -0
data/lib/syntax_search.rb +136 -0
data/lib/syntax_search/auto.rb +51 -0
data/lib/syntax_search/code_block.rb +219 -0
data/lib/syntax_search/code_frontier.rb +312 -0
data/lib/syntax_search/code_line.rb +87 -0
data/lib/syntax_search/code_search.rb +114 -0
data/lib/syntax_search/display_invalid_blocks.rb +99 -0
data/lib/syntax_search/fyi.rb +7 -0
data/lib/syntax_search/version.rb +5 -0
data/syntax_search.gemspec +30 -0
metadata +84 -0

data/lib/syntax_search/auto.rb ADDED

@@ -0,0 +1,51 @@
+require_relative "../syntax_search"
+# Monkey patch kernel to ensure that all `require` calls call the same
+# method
+module Kernel
+  alias_method :original_require, :require
+  alias_method :original_require_relative, :require_relative
+  alias_method :original_load, :load
+  def load(file, wrap = false)
+    original_load(file)
+  rescue SyntaxError => e
+    SyntaxErrorSearch.handle_error(e)
+  end
+  def require(file)
+    original_require(file)
+  rescue SyntaxError => e
+    SyntaxErrorSearch.handle_error(e)
+  end
+  def require_relative(file)
+    if Pathname.new(file).absolute?
+      original_require file
+    else
+      original_require File.expand_path("../#{file}", caller_locations(1, 1)[0].absolute_path)
+    end
+  rescue SyntaxError => e
+    SyntaxErrorSearch.handle_error(e)
+  end
+end
+# I honestly have no idea why this Object delegation is needed
+# I keep staring at bootsnap and it doesn't have to do this
+# is there a bug in their implementation they haven't caught or
+# am I doing something different?
+class Object
+  private
+  def load(path, wrap = false)
+    Kernel.load(path, wrap)
+  rescue SyntaxError => e
+    SyntaxErrorSearch.handle_error(e)
+  end
+  def require(path)
+    Kernel.require(path)
+  rescue SyntaxError => e
+    SyntaxErrorSearch.handle_error(e)
+  end
+end

data/lib/syntax_search/code_block.rb ADDED

@@ -0,0 +1,219 @@
+# frozen_string_literal: true
+module SyntaxErrorSearch
+  # Multiple lines form a singular CodeBlock
+  #
+  # Source code is made of multiple CodeBlocks. A code block
+  # has a reference to the source code that created itself, this allows
+  # a code block to "expand" when needed
+  #
+  # The most important ability of a CodeBlock is this ability to expand:
+  #
+  # Example:
+  #
+  #   code_block.to_s # =>
+  #     #   def foo
+  #     #     puts "foo"
+  #     #   end
+  #
+  #   code_block.expand_until_next_boundry
+  #
+  #   code_block.to_s # =>
+  #     # class Foo
+  #     #   def foo
+  #     #     puts "foo"
+  #     #   end
+  #     # end
+  #
+  class CodeBlock
+    attr_reader :lines
+    def initialize(code_lines: nil, lines: [])
+      @lines = Array(lines)
+      @code_lines = code_lines
+    end
+    def is_end?
+      to_s.strip == "end"
+    end
+    def starts_at
+      @lines.first&.line_number
+    end
+    def code_lines
+      @code_lines
+    end
+    # This is used for frontier ordering, we are searching from
+    # the largest indentation to the smallest. This allows us to
+    # populate an array with multiple code blocks then call `sort!`
+    # on it without having to specify the sorting criteria
+    def <=>(other)
+      self.current_indent <=> other.current_indent
+    end
+    # Only the lines that are not empty and visible
+    def visible_lines
+      @lines
+        .select(&:not_empty?)
+        .select(&:visible?)
+    end
+    # This method is used to expand a code block to capture it's calling context
+    def expand_until_next_boundry
+      expand_to_indent(next_indent)
+      self
+    end
+    # This method expands the given code block until it captures
+    # its nearest neighbors. This is used to expand a single line of code
+    # to its smallest likely block.
+    #
+    #   code_block.to_s # =>
+    #     #     puts "foo"
+    #   code_block.expand_until_neighbors
+    #
+    #   code_block.to_s # =>
+    #     #     puts "foo"
+    #     #     puts "bar"
+    #     #     puts "baz"
+    #
+    def expand_until_neighbors
+      expand_to_indent(current_indent)
+      expand_hidden_parner_line if self.to_s.strip == "end"
+      self
+    end
+    def expand_hidden_parner_line
+      index = @lines.first.index
+      indent = current_indent
+      partner_line  = code_lines.select {|line| line.index < index && line.indent == indent }.last
+      if partner_line&.hidden?
+        partner_line.mark_visible
+        @lines.prepend(partner_line)
+      end
+    end
+    # This method expands the existing code block up (before)
+    # and down (after). It will break on change in indentation
+    # and empty lines.
+    #
+    #   code_block.to_s # =>
+    #     #   def foo
+    #     #     puts "foo"
+    #     #   end
+    #
+    #   code_block.expand_to_indent(0)
+    #   code_block.to_s # =>
+    #     # class Foo
+    #     #   def foo
+    #     #     puts "foo"
+    #     #   end
+    #     # end
+    #
+    private def expand_to_indent(indent)
+      array = []
+      before_lines(skip_empty: false).each do |line|
+        if line.empty?
+          array.prepend(line)
+          break
+        end
+        if line.indent == indent
+          array.prepend(line)
+        else
+          break
+        end
+      end
+      array << @lines
+      after_lines(skip_empty: false).each do |line|
+        if line.empty?
+          array << line
+          break
+        end
+        if line.indent == indent
+          array << line
+        else
+          break
+        end
+      end
+      @lines = array.flatten
+    end
+    def next_indent
+      [
+        before_line&.indent || 0,
+        after_line&.indent || 0
+      ].max
+    end
+    def current_indent
+      lines.detect(&:not_empty?)&.indent || 0
+    end
+    def before_line
+      before_lines.first
+    end
+    def after_line
+      after_lines.first
+    end
+    def before_lines(skip_empty: true)
+      index = @lines.first.index
+      lines = code_lines.select {|line| line.index < index }
+      lines.select!(&:not_empty?) if skip_empty
+      lines.select!(&:visible?)
+      lines.reverse!
+      lines
+    end
+    def after_lines(skip_empty: true)
+      index = @lines.last.index
+      lines = code_lines.select {|line| line.index > index }
+      lines.select!(&:not_empty?) if skip_empty
+      lines.select!(&:visible?)
+      lines
+    end
+    # Returns a code block of the source that does not include
+    # the current lines. This is useful for checking if a source
+    # with the given lines removed parses successfully. If so
+    #
+    # Then it's proof that the current block is invalid
+    def block_without
+      @block_without ||= CodeBlock.new(
+        source: @source,
+        lines: @source.code_lines - @lines
+      )
+    end
+    def document_valid_without?
+      block_without.valid?
+    end
+    def valid_without?
+      block_without.valid?
+    end
+    def invalid?
+      !valid?
+    end
+    def valid?
+      SyntaxErrorSearch.valid?(self.to_s)
+    end
+    def to_s
+      @lines.join
+    end
+  end
+end

data/lib/syntax_search/code_frontier.rb ADDED

@@ -0,0 +1,312 @@
+# frozen_string_literal: true
+module SyntaxErrorSearch
+  # This class is responsible for generating, storing, and sorting code blocks
+  #
+  # The search algorithm for finding our syntax errors isn't in this class, but
+  # this is class holds the bulk of the logic for generating, storing, detecting
+  # and filtering invalid code.
+  #
+  # This is loosely based on the idea of a "frontier" for searching for a path
+  # example: https://en.wikipedia.org/wiki/Dijkstra%27s_algorithm
+  #
+  # In this case our path is going from code with a syntax error to code without a
+  # syntax error. We're currently doing that by evaluating individual lines
+  # with respect to indentation and other whitespace (empty lines). As represented
+  # by individual "code blocks".
+  #
+  # This class does not just store the frontier that we're searching, but is responsible
+  # for generating new code blocks as well. This is not ideal, but the state of generating
+  # and evaluating paths i.e. codeblocks is very tightly coupled.
+  #
+  # ## Creation
+  #
+  # This example code is re-used in the other sections
+  #
+  # Example:
+  #
+  #   code_lines = [
+  #     CodeLine.new(line: "def cinco\n", index: 0)
+  #     CodeLine.new(line: "  def dog\n", index: 1) # Syntax error 1
+  #     CodeLine.new(line: "  def cat\n", index: 2) # Syntax error 2
+  #     CodeLine.new(line: "end\n",       index: 3)
+  #   ]
+  #
+  #   frontier = CodeFrontier.new(code_lines: code_lines)
+  #
+  #   frontier << frontier.next_block if frontier.next_block?
+  #   frontier << frontier.next_block if frontier.next_block?
+  #
+  #   frontier.holds_all_syntax_errors? # => true
+  #   block = frontier.pop
+  #   frontier.holds_all_syntax_errors? # => false
+  #   frontier << block
+  #   frontier.holds_all_syntax_errors? # => true
+  #
+  #   frontier.detect_invalid_blocks.map(&:to_s) # =>
+  #   [
+  #     "def dog\n",
+  #     "def cat\n"
+  #   ]
+  #
+  # ## Block Generation
+  #
+  # Currently code blocks are generated based off of indentation. With the idea that blocks are,
+  # well, indented. Once a code block is added to the frontier or it is expanded, or it is generated
+  # then we also need to remove those lines from our generation code so we don't generate the same block
+  # twice by accident.
+  #
+  # This is block generation is currently done via the "indent_hash" internally by starting at the outer
+  # most indentation.
+  #
+  # Example:
+  #
+  #   ```
+  #   def river
+  #     puts "lol" # <=== Start looking here and expand outwards
+  #   end
+  #   ```
+  #
+  # Generating new code blocks is a little verbose but looks like this:
+  #
+  #   frontier << frontier.next_block if frontier.next_block?
+  #
+  # Once a block is in the frontier, it can be popped off:
+  #
+  #   frontier.pop
+  #   # => <# CodeBlock >
+  #
+  # ## Block (frontier) storage, ordering and retrieval
+  #
+  # Once a block is generated it is stored internally in a frontier array. This is very similar to a search algorithm.
+  # The array is sorted by indentation order, so that when a block is popped off the array, the one with
+  # the largest current indentation is evaluated first.
+  #
+  # For example, if we have these two blocks in the frontier:
+  #
+  #   ```
+  #   # Block A - 0 spaces for indentation
+  #
+  #   def cinco
+  #     puts "lol"
+  #   end
+  #   ```
+  #
+  #   ```
+  #   # Block B - 2 spaces for indentation
+  #
+  #     def river
+  #       puts "hehe"
+  #     end
+  #   ```
+  #
+  # The "Block B" has more current indentation, so it would be evaluated first.
+  #
+  # ## Frontier evaluation (Find the syntax error)
+  #
+  # Another key difference between this and a normal search "frontier" is that we're not checking if
+  # an individual code block meets the goal (turning invalid code to valid code) since there can
+  # be multiple syntax errors and this will require multiple code blocks. To handle this, we're
+  # evaluating all the contents of the frontier at the same time to see if the solution exists in any
+  # of our search blocks.
+  #
+  #   # Using the previously generated frontier
+  #
+  #   frontier << Block.new(lines: code_lines[1], code_lines: code_lines)
+  #   frontier.holds_all_syntax_errors? # => false
+  #
+  #   frontier << Block.new(lines: code_lines[2], code_lines: code_lines)
+  #   frontier.holds_all_syntax_errors? # => true
+  #
+  # ## Detect invalid blocks (Filter for smallest solution)
+  #
+  # After we prove that a solution exists and we've found it to be in our frontier, we can start stop searching.
+  # Once we've done this, we need to search through the existing frontier code blocks to find the minimum combination
+  # of blocks that hold the solution. This is done in: `detect_invalid_blocks`.
+  #
+  #   # Using the previously generated frontier
+  #
+  #   frontier << CodeBlock.new(lines: code_lines[0], code_lines: code_lines)
+  #   frontier << CodeBlock.new(lines: code_lines[1], code_lines: code_lines)
+  #   frontier << CodeBlock.new(lines: code_lines[2], code_lines: code_lines)
+  #   frontier << CodeBlock.new(lines: code_lines[3], code_lines: code_lines)
+  #
+  #   frontier.count # => 4
+  #   frontier.detect_invalid_blocks.length => 2
+  #   frontier.detect_invalid_blocks.map(&:to_s) # =>
+  #   [
+  #     "def dog\n",
+  #     "def cat\n"
+  #   ]
+  #
+  # Once invalid blocks are found and filtered, then they can be passed to a formatter.
+  #
+  #
+  #
+  class IndentScan
+    attr_reader :code_lines
+    def initialize(code_lines: )
+      @code_lines = code_lines
+    end
+    def neighbors_from_top(top_line)
+      code_lines
+        .select {|l| l.index >= top_line.index }
+        .select {|l| l.not_empty? }
+        .select {|l| l.visible? }
+        .take_while {|l| l.indent >= top_line.indent }
+    end
+    def each_neighbor_block(top_line)
+      neighbors = neighbors_from_top(top_line)
+      until neighbors.empty?
+        lines = [neighbors.pop]
+        while (block = CodeBlock.new(lines: lines, code_lines: code_lines)) && block.invalid? && neighbors.any?
+          lines.prepend neighbors.pop
+        end
+        yield block if block
+      end
+    end
+  end
+  class CodeFrontier
+    def initialize(code_lines: )
+      @code_lines = code_lines
+      @frontier = []
+      @indent_hash = {}
+      code_lines.each do |line|
+        next if line.empty?
+        @indent_hash[line.indent] ||= []
+        @indent_hash[line.indent] << line
+      end
+    end
+    def count
+      @frontier.count
+    end
+    # Returns true if the document is valid with all lines
+    # removed. By default it checks all blocks in present in
+    # the frontier array, but can be used for arbitrary arrays
+    # of codeblocks as well
+    def holds_all_syntax_errors?(block_array = @frontier)
+      without_lines = block_array.map do |block|
+        block.lines
+      end
+      SyntaxErrorSearch.valid_without?(
+        without_lines: without_lines,
+        code_lines: @code_lines
+      )
+    end
+    # Returns a code block with the largest indentation possible
+    def pop
+      return nil if empty?
+      return @frontier.pop
+    end
+    def next_block?
+      !@indent_hash.empty?
+    end
+    def indent_hash_indent
+      @indent_hash.keys.sort.last
+    end
+    def next_indent_line
+      indent = @indent_hash.keys.sort.last
+      @indent_hash[indent]&.first
+    end
+    def generate_blocks
+    end
+    def next_block
+      indent = @indent_hash.keys.sort.last
+      lines = @indent_hash[indent].first
+      block = CodeBlock.new(
+        lines: lines,
+        code_lines: @code_lines
+      ).expand_until_neighbors
+      register(block)
+      block
+    end
+    def expand?
+      return false if @frontier.empty?
+      return true if @indent_hash.empty?
+      @frontier.last.current_indent >= @indent_hash.keys.sort.last
+    end
+    # This method is responsible for determining if a new code
+    # block should be generated instead of evaluating an already
+    # existing block in the frontier
+    def generate_new_block?
+      return false if @indent_hash.empty?
+      return true if @frontier.empty?
+      @frontier.last.current_indent <= @indent_hash.keys.sort.last
+    end
+    def register(block)
+      block.lines.each do |line|
+        @indent_hash[line.indent]&.delete(line)
+      end
+      @indent_hash.select! {|k, v| !v.empty?}
+      self
+    end
+    # Add a block to the frontier
+    #
+    # This method ensures the frontier always remains sorted (in indentation order)
+    # and that each code block's lines are removed from the indentation hash so we
+    # don't re-evaluate the same line multiple times.
+    def <<(block)
+      register(block)
+      @frontier << block
+      @frontier.sort!
+      self
+    end
+    def any?
+      !empty?
+    end
+    def empty?
+      @frontier.empty? && @indent_hash.empty?
+    end
+    # Example:
+    #
+    #   combination([:a, :b, :c, :d])
+    #   # => [[:a], [:b], [:c], [:d], [:a, :b], [:a, :c], [:a, :d], [:b, :c], [:b, :d], [:c, :d], [:a, :b, :c], [:a, :b, :d], [:a, :c, :d], [:b, :c, :d], [:a, :b, :c, :d]]
+    def self.combination(array)
+      guesses = []
+      1.upto(array.length).each do |size|
+        guesses.concat(array.combination(size).to_a)
+      end
+      guesses
+    end
+    # Given that we know our syntax error exists somewhere in our frontier, we want to find
+    # the smallest possible set of blocks that contain all the syntax errors
+    def detect_invalid_blocks
+      self.class.combination(@frontier).detect do |block_array|
+        holds_all_syntax_errors?(block_array)
+      end || []
+    end
+  end
+end