RubyGems - syntax_search - Versions diffs - 0.1.1 → 0.2.0 - Mend

syntax_search 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

checksums.yaml +4 -4
data/.github/workflows/check_changelog.yml +13 -0
data/CHANGELOG.md +24 -1
data/Gemfile +1 -0
data/Gemfile.lock +3 -5
data/README.md +28 -15
data/assets/syntax_search.gif +0 -0
data/exe/syntax_search +1 -0
data/lib/syntax_search.rb +23 -14
data/lib/syntax_search/around_block_scan.rb +193 -0
data/lib/syntax_search/block_expand.rb +74 -0
data/lib/syntax_search/capture_code_context.rb +62 -0
data/lib/syntax_search/code_block.rb +24 -165
data/lib/syntax_search/code_frontier.rb +40 -201
data/lib/syntax_search/code_line.rb +42 -1
data/lib/syntax_search/code_search.rb +60 -20
data/lib/syntax_search/display_code_with_line_numbers.rb +56 -0
data/lib/syntax_search/display_invalid_blocks.rb +46 -45
data/lib/syntax_search/heredoc_block_parse.rb +30 -0
data/lib/syntax_search/lex_all.rb +58 -0
data/lib/syntax_search/parse_blocks_from_indent_line.rb +56 -0
data/lib/syntax_search/version.rb +1 -1
data/lib/syntax_search/who_dis_syntax_error.rb +32 -0
data/syntax_search.gemspec +0 -2
metadata +13 -17

data/lib/syntax_search/block_expand.rb ADDED

@@ -0,0 +1,74 @@
+# frozen_string_literal: true
+module SyntaxErrorSearch
+  # This class is responsible for taking a code block that exists
+  # at a far indentaion and then iteratively increasing the block
+  # so that it captures everything within the same indentation block.
+  #
+  #   def dog
+  #     puts "bow"
+  #     puts "wow"
+  #   end
+  #
+  # block = BlockExpand.new(code_lines: code_lines)
+  #   .call(CodeBlock.new(lines: code_lines[1]))
+  #
+  # puts block.to_s
+  # # => puts "bow"
+  #      puts "wow"
+  #
+  #
+  # Once a code block has captured everything at a given indentation level
+  # then it will expand to capture surrounding indentation.
+  #
+  # block = BlockExpand.new(code_lines: code_lines)
+  #   .call(block)
+  #
+  # block.to_s
+  # # => def dog
+  #        puts "bow"
+  #        puts "wow"
+  #      end
+  #
+  class BlockExpand
+    def initialize(code_lines: )
+      @code_lines = code_lines
+    end
+    def call(block)
+      if (next_block = expand_neighbors(block, grab_empty: true))
+        return next_block
+      end
+      expand_indent(block)
+    end
+    def expand_indent(block)
+      block = AroundBlockScan.new(code_lines: @code_lines, block: block)
+        .skip(:hidden?)
+        .stop_after_kw
+        .scan_adjacent_indent
+        .code_block
+    end
+    def expand_neighbors(block, grab_empty: true)
+      scan = AroundBlockScan.new(code_lines: @code_lines, block: block)
+        .skip(:hidden?)
+        .stop_after_kw
+        .scan_neighbors
+      # Slurp up empties
+      if grab_empty
+        scan = AroundBlockScan.new(code_lines: @code_lines, block: scan.code_block)
+          .scan_while {|line| line.empty? || line.hidden? }
+      end
+      new_block = scan.code_block
+      if block.lines == new_block.lines
+        return nil
+      else
+        return new_block
+      end
+    end
+  end
+end

data/lib/syntax_search/capture_code_context.rb ADDED

@@ -0,0 +1,62 @@
+# frozen_string_literal: true
+module SyntaxErrorSearch
+  # Given a block, this method will capture surrounding
+  # code to give the user more context for the location of
+  # the problem.
+  #
+  # Return is an array of CodeLines to be rendered.
+  #
+  # Surrounding code is captured regardless of visible state
+  #
+  #   puts block.to_s # => "def bark"
+  #
+  #   context = CaptureCodeContext.new(
+  #     blocks: block,
+  #     code_lines: code_lines
+  #   )
+  #
+  #   puts context.call.join
+  #   # =>
+  #     class Dog
+  #       def bark
+  #     end
+  #
+  class CaptureCodeContext
+    attr_reader :code_lines
+    def initialize(blocks: , code_lines:)
+      @blocks = Array(blocks)
+      @code_lines = code_lines
+      @visible_lines = @blocks.map(&:visible_lines).flatten
+      @lines_to_output = @visible_lines.dup
+    end
+    def call
+      @blocks.each do |block|
+        around_lines = AroundBlockScan.new(code_lines: @code_lines, block: block)
+          .start_at_next_line
+          .capture_neighbor_context
+        around_lines -= block.lines
+        @lines_to_output.concat(around_lines)
+        AroundBlockScan.new(
+          block: block,
+          code_lines: @code_lines,
+        ).on_falling_indent do |line|
+          @lines_to_output << line
+        end
+      end
+      @lines_to_output.select!(&:not_empty?)
+      @lines_to_output.select!(&:not_comment?)
+      @lines_to_output.uniq!
+      @lines_to_output.sort!
+      return @lines_to_output
+    end
+  end
+end

data/lib/syntax_search/code_block.rb CHANGED

@@ -3,11 +3,7 @@
 module SyntaxErrorSearch
   # Multiple lines form a singular CodeBlock
   #
-  # Source code is made of multiple CodeBlocks. A code block
-  # has a reference to the source code that created itself, this allows
-  # a code block to "expand" when needed
-  #
-  # The most important ability of a CodeBlock is this ability to expand:
+  # Source code is made of multiple CodeBlocks.
   #
   # Example:
   #
@@ -16,33 +12,39 @@ module SyntaxErrorSearch
   #     #     puts "foo"
   #     #   end
   #
-  #   code_block.expand_until_next_boundry
+  #   code_block.valid? # => true
+  #   code_block.in_valid? # => false
   #
-  #   code_block.to_s # =>
-  #     # class Foo
-  #     #   def foo
-  #     #     puts "foo"
-  #     #   end
-  #     # end
   #
   class CodeBlock
     attr_reader :lines
-    def initialize(code_lines: nil, lines: [])
+    def initialize(lines: [])
       @lines = Array(lines)
-      @code_lines = code_lines
+    end
+    def visible_lines
+      @lines.select(&:visible?).select(&:not_empty?)
+    end
+    def mark_invisible
+      @lines.map(&:mark_invisible)
     end
     def is_end?
       to_s.strip == "end"
     end
+    def hidden?
+      @lines.all?(&:hidden?)
+    end
     def starts_at
-      @lines.first&.line_number
+      @starts_at ||= @lines.first&.line_number
     end
-    def code_lines
-      @code_lines
+    def ends_at
+      @ends_at ||= @lines.last&.line_number
     end
     # This is used for frontier ordering, we are searching from
@@ -50,158 +52,15 @@ module SyntaxErrorSearch
     # populate an array with multiple code blocks then call `sort!`
     # on it without having to specify the sorting criteria
     def <=>(other)
-      self.current_indent <=> other.current_indent
-    end
-    # Only the lines that are not empty and visible
-    def visible_lines
-      @lines
-        .select(&:not_empty?)
-        .select(&:visible?)
-    end
-    # This method is used to expand a code block to capture it's calling context
-    def expand_until_next_boundry
-      expand_to_indent(next_indent)
-      self
-    end
-    # This method expands the given code block until it captures
-    # its nearest neighbors. This is used to expand a single line of code
-    # to its smallest likely block.
-    #
-    #   code_block.to_s # =>
-    #     #     puts "foo"
-    #   code_block.expand_until_neighbors
-    #
-    #   code_block.to_s # =>
-    #     #     puts "foo"
-    #     #     puts "bar"
-    #     #     puts "baz"
-    #
-    def expand_until_neighbors
-      expand_to_indent(current_indent)
-      expand_hidden_parner_line if self.to_s.strip == "end"
-      self
-    end
-    def expand_hidden_parner_line
-      index = @lines.first.index
-      indent = current_indent
-      partner_line  = code_lines.select {|line| line.index < index && line.indent == indent }.last
-      if partner_line&.hidden?
-        partner_line.mark_visible
-        @lines.prepend(partner_line)
-      end
-    end
-    # This method expands the existing code block up (before)
-    # and down (after). It will break on change in indentation
-    # and empty lines.
-    #
-    #   code_block.to_s # =>
-    #     #   def foo
-    #     #     puts "foo"
-    #     #   end
-    #
-    #   code_block.expand_to_indent(0)
-    #   code_block.to_s # =>
-    #     # class Foo
-    #     #   def foo
-    #     #     puts "foo"
-    #     #   end
-    #     # end
-    #
-    private def expand_to_indent(indent)
-      array = []
-      before_lines(skip_empty: false).each do |line|
-        if line.empty?
-          array.prepend(line)
-          break
-        end
-        if line.indent == indent
-          array.prepend(line)
-        else
-          break
-        end
-      end
-      array << @lines
-      after_lines(skip_empty: false).each do |line|
-        if line.empty?
-          array << line
-          break
-        end
-        if line.indent == indent
-          array << line
-        else
-          break
-        end
-      end
+      out = self.current_indent <=> other.current_indent
+      return out if out != 0
-      @lines = array.flatten
-    end
-    def next_indent
-      [
-        before_line&.indent || 0,
-        after_line&.indent || 0
-      ].max
+      # Stable sort
+      self.starts_at <=> other.starts_at
     end
     def current_indent
-      lines.detect(&:not_empty?)&.indent || 0
-    end
-    def before_line
-      before_lines.first
-    end
-    def after_line
-      after_lines.first
-    end
-    def before_lines(skip_empty: true)
-      index = @lines.first.index
-      lines = code_lines.select {|line| line.index < index }
-      lines.select!(&:not_empty?) if skip_empty
-      lines.select!(&:visible?)
-      lines.reverse!
-      lines
-    end
-    def after_lines(skip_empty: true)
-      index = @lines.last.index
-      lines = code_lines.select {|line| line.index > index }
-      lines.select!(&:not_empty?) if skip_empty
-      lines.select!(&:visible?)
-      lines
-    end
-    # Returns a code block of the source that does not include
-    # the current lines. This is useful for checking if a source
-    # with the given lines removed parses successfully. If so
-    #
-    # Then it's proof that the current block is invalid
-    def block_without
-      @block_without ||= CodeBlock.new(
-        source: @source,
-        lines: @source.code_lines - @lines
-      )
-    end
-    def document_valid_without?
-      block_without.valid?
-    end
-    def valid_without?
-      block_without.valid?
+      @current_indent ||= lines.select(&:not_empty?).map(&:indent).min || 0
     end
     def invalid?

data/lib/syntax_search/code_frontier.rb CHANGED

@@ -1,178 +1,43 @@
 # frozen_string_literal: true
 module SyntaxErrorSearch
-  # This class is responsible for generating, storing, and sorting code blocks
+  # The main function of the frontier is to hold the edges of our search and to
+  # evaluate when we can stop searching.
   #
-  # The search algorithm for finding our syntax errors isn't in this class, but
-  # this is class holds the bulk of the logic for generating, storing, detecting
-  # and filtering invalid code.
+  # ## Knowing where we've been
   #
-  # This is loosely based on the idea of a "frontier" for searching for a path
-  # example: https://en.wikipedia.org/wiki/Dijkstra%27s_algorithm
+  # Once a code block is generated it is added onto the frontier where it will be
+  # sorted and then the frontier can be filtered. Large blocks that totally contain a
+  # smaller block will cause the smaller block to be evicted.
   #
-  # In this case our path is going from code with a syntax error to code without a
-  # syntax error. We're currently doing that by evaluating individual lines
-  # with respect to indentation and other whitespace (empty lines). As represented
-  # by individual "code blocks".
+  #   CodeFrontier#<<
+  #   CodeFrontier#pop
   #
-  # This class does not just store the frontier that we're searching, but is responsible
-  # for generating new code blocks as well. This is not ideal, but the state of generating
-  # and evaluating paths i.e. codeblocks is very tightly coupled.
+  # ## Knowing where we can go
   #
-  # ## Creation
+  # Internally it keeps track of an "indent hash" which is exposed via `next_indent_line`
+  # when called this will return a line of code with the most indentation.
   #
-  # This example code is re-used in the other sections
+  # This line of code can be used to build a CodeBlock via and then when that code block
+  # is added back to the frontier, then the lines in the code block are removed from the
+  # indent hash so we don't double-create the same block.
   #
-  # Example:
+  #   CodeFrontier#next_indent_line
+  #   CodeFrontier#register_indent_block
   #
-  #   code_lines = [
-  #     CodeLine.new(line: "def cinco\n", index: 0)
-  #     CodeLine.new(line: "  def dog\n", index: 1) # Syntax error 1
-  #     CodeLine.new(line: "  def cat\n", index: 2) # Syntax error 2
-  #     CodeLine.new(line: "end\n",       index: 3)
-  #   ]
+  # ## Knowing when to stop
   #
-  #   frontier = CodeFrontier.new(code_lines: code_lines)
+  # The frontier holds the syntax error when removing all code blocks from the original
+  # source document allows it to be parsed as syntatically valid:
   #
-  #   frontier << frontier.next_block if frontier.next_block?
-  #   frontier << frontier.next_block if frontier.next_block?
+  #   CodeFrontier#holds_all_syntax_errors?
   #
-  #   frontier.holds_all_syntax_errors? # => true
-  #   block = frontier.pop
-  #   frontier.holds_all_syntax_errors? # => false
-  #   frontier << block
-  #   frontier.holds_all_syntax_errors? # => true
+  # ## Filtering false positives
   #
-  #   frontier.detect_invalid_blocks.map(&:to_s) # =>
-  #   [
-  #     "def dog\n",
-  #     "def cat\n"
-  #   ]
+  # Once the search is completed, the frontier will have many blocks that do not contain
+  # the syntax error. To filter to the smallest subset that does call:
   #
-  # ## Block Generation
-  #
-  # Currently code blocks are generated based off of indentation. With the idea that blocks are,
-  # well, indented. Once a code block is added to the frontier or it is expanded, or it is generated
-  # then we also need to remove those lines from our generation code so we don't generate the same block
-  # twice by accident.
-  #
-  # This is block generation is currently done via the "indent_hash" internally by starting at the outer
-  # most indentation.
-  #
-  # Example:
-  #
-  #   ```
-  #   def river
-  #     puts "lol" # <=== Start looking here and expand outwards
-  #   end
-  #   ```
-  #
-  # Generating new code blocks is a little verbose but looks like this:
-  #
-  #   frontier << frontier.next_block if frontier.next_block?
-  #
-  # Once a block is in the frontier, it can be popped off:
-  #
-  #   frontier.pop
-  #   # => <# CodeBlock >
-  #
-  # ## Block (frontier) storage, ordering and retrieval
-  #
-  # Once a block is generated it is stored internally in a frontier array. This is very similar to a search algorithm.
-  # The array is sorted by indentation order, so that when a block is popped off the array, the one with
-  # the largest current indentation is evaluated first.
-  #
-  # For example, if we have these two blocks in the frontier:
-  #
-  #   ```
-  #   # Block A - 0 spaces for indentation
-  #
-  #   def cinco
-  #     puts "lol"
-  #   end
-  #   ```
-  #
-  #   ```
-  #   # Block B - 2 spaces for indentation
-  #
-  #     def river
-  #       puts "hehe"
-  #     end
-  #   ```
-  #
-  # The "Block B" has more current indentation, so it would be evaluated first.
-  #
-  # ## Frontier evaluation (Find the syntax error)
-  #
-  # Another key difference between this and a normal search "frontier" is that we're not checking if
-  # an individual code block meets the goal (turning invalid code to valid code) since there can
-  # be multiple syntax errors and this will require multiple code blocks. To handle this, we're
-  # evaluating all the contents of the frontier at the same time to see if the solution exists in any
-  # of our search blocks.
-  #
-  #   # Using the previously generated frontier
-  #
-  #   frontier << Block.new(lines: code_lines[1], code_lines: code_lines)
-  #   frontier.holds_all_syntax_errors? # => false
-  #
-  #   frontier << Block.new(lines: code_lines[2], code_lines: code_lines)
-  #   frontier.holds_all_syntax_errors? # => true
-  #
-  # ## Detect invalid blocks (Filter for smallest solution)
-  #
-  # After we prove that a solution exists and we've found it to be in our frontier, we can start stop searching.
-  # Once we've done this, we need to search through the existing frontier code blocks to find the minimum combination
-  # of blocks that hold the solution. This is done in: `detect_invalid_blocks`.
-  #
-  #   # Using the previously generated frontier
-  #
-  #   frontier << CodeBlock.new(lines: code_lines[0], code_lines: code_lines)
-  #   frontier << CodeBlock.new(lines: code_lines[1], code_lines: code_lines)
-  #   frontier << CodeBlock.new(lines: code_lines[2], code_lines: code_lines)
-  #   frontier << CodeBlock.new(lines: code_lines[3], code_lines: code_lines)
-  #
-  #   frontier.count # => 4
-  #   frontier.detect_invalid_blocks.length => 2
-  #   frontier.detect_invalid_blocks.map(&:to_s) # =>
-  #   [
-  #     "def dog\n",
-  #     "def cat\n"
-  #   ]
-  #
-  # Once invalid blocks are found and filtered, then they can be passed to a formatter.
-  #
-  #
-  #
-  class IndentScan
-    attr_reader :code_lines
-    def initialize(code_lines: )
-      @code_lines = code_lines
-    end
-    def neighbors_from_top(top_line)
-      code_lines
-        .select {|l| l.index >= top_line.index }
-        .select {|l| l.not_empty? }
-        .select {|l| l.visible? }
-        .take_while {|l| l.indent >= top_line.indent }
-    end
-    def each_neighbor_block(top_line)
-      neighbors = neighbors_from_top(top_line)
-      until neighbors.empty?
-        lines = [neighbors.pop]
-        while (block = CodeBlock.new(lines: lines, code_lines: code_lines)) && block.invalid? && neighbors.any?
-          lines.prepend neighbors.pop
-        end
-        yield block if block
-      end
-    end
-  end
+  #   CodeFrontier#detect_invalid_blocks
   class CodeFrontier
     def initialize(code_lines: )
       @code_lines = code_lines
@@ -207,16 +72,9 @@ module SyntaxErrorSearch
     # Returns a code block with the largest indentation possible
     def pop
-      return nil if empty?
       return @frontier.pop
     end
-    def next_block?
-      !@indent_hash.empty?
-    end
     def indent_hash_indent
       @indent_hash.keys.sort.last
     end
@@ -226,40 +84,25 @@ module SyntaxErrorSearch
       @indent_hash[indent]&.first
     end
-    def generate_blocks
-    end
-    def next_block
-      indent = @indent_hash.keys.sort.last
-      lines = @indent_hash[indent].first
-      block = CodeBlock.new(
-        lines: lines,
-        code_lines: @code_lines
-      ).expand_until_neighbors
-      register(block)
-      block
-    end
     def expand?
       return false if @frontier.empty?
       return true if @indent_hash.empty?
-      @frontier.last.current_indent >= @indent_hash.keys.sort.last
-    end
+      frontier_indent = @frontier.last.current_indent
+      hash_indent = @indent_hash.keys.sort.last
-    # This method is responsible for determining if a new code
-    # block should be generated instead of evaluating an already
-    # existing block in the frontier
-    def generate_new_block?
-      return false if @indent_hash.empty?
-      return true if @frontier.empty?
+      if ENV["DEBUG"]
+        puts "```"
+        puts @frontier.last.to_s
+        puts "```"
+        puts "  @frontier indent: #{frontier_indent}"
+        puts "  @hash indent:     #{hash_indent}"
+      end
-      @frontier.last.current_indent <= @indent_hash.keys.sort.last
+      frontier_indent >= hash_indent
     end
-    def register(block)
+    def register_indent_block(block)
       block.lines.each do |line|
         @indent_hash[line.indent]&.delete(line)
       end
@@ -273,22 +116,18 @@ module SyntaxErrorSearch
     # and that each code block's lines are removed from the indentation hash so we
     # don't re-evaluate the same line multiple times.
     def <<(block)
-      register(block)
+      register_indent_block(block)
+      # Make sure we don't double expand, if a code block fully engulfs another code block, keep the bigger one
+      @frontier.reject! {|b|
+        b.starts_at >= block.starts_at && b.ends_at <= block.ends_at
+      }
       @frontier << block
       @frontier.sort!
       self
     end
-    def any?
-      !empty?
-    end
-    def empty?
-      @frontier.empty? && @indent_hash.empty?
-    end
     # Example:
     #
     #   combination([:a, :b, :c, :d])