RubyGems - syntax_search - Versions diffs - 0.1.0 → 0.1.5 - Mend

syntax_search 0.1.0 → 0.1.5

Files changed (20) hide show

checksums.yaml +4 -4
data/.github/workflows/check_changelog.yml +13 -0
data/CHANGELOG.md +26 -0
data/Gemfile +1 -0
data/Gemfile.lock +3 -5
data/README.md +39 -20
data/assets/syntax_search.gif +0 -0
data/lib/syntax_search.rb +23 -15
data/lib/syntax_search/around_block_scan.rb +91 -0
data/lib/syntax_search/block_expand.rb +78 -0
data/lib/syntax_search/code_block.rb +16 -165
data/lib/syntax_search/code_frontier.rb +40 -201
data/lib/syntax_search/code_search.rb +45 -20
data/lib/syntax_search/display_invalid_blocks.rb +24 -13
data/lib/syntax_search/heredoc_block_parse.rb +30 -0
data/lib/syntax_search/parse_blocks_from_indent_line.rb +56 -0
data/lib/syntax_search/version.rb +1 -1
data/lib/syntax_search/who_dis_syntax_error.rb +32 -0
data/syntax_search.gemspec +0 -2
metadata +12 -18

data/lib/syntax_search/code_frontier.rb CHANGED

@@ -1,178 +1,43 @@
 # frozen_string_literal: true
 module SyntaxErrorSearch
-  # This class is responsible for generating, storing, and sorting code blocks
+  # The main function of the frontier is to hold the edges of our search and to
+  # evaluate when we can stop searching.
   #
-  # The search algorithm for finding our syntax errors isn't in this class, but
-  # this is class holds the bulk of the logic for generating, storing, detecting
-  # and filtering invalid code.
+  # ## Knowing where we've been
   #
-  # This is loosely based on the idea of a "frontier" for searching for a path
-  # example: https://en.wikipedia.org/wiki/Dijkstra%27s_algorithm
+  # Once a code block is generated it is added onto the frontier where it will be
+  # sorted and then the frontier can be filtered. Large blocks that totally contain a
+  # smaller block will cause the smaller block to be evicted.
   #
-  # In this case our path is going from code with a syntax error to code without a
-  # syntax error. We're currently doing that by evaluating individual lines
-  # with respect to indentation and other whitespace (empty lines). As represented
-  # by individual "code blocks".
+  #   CodeFrontier#<<
+  #   CodeFrontier#pop
   #
-  # This class does not just store the frontier that we're searching, but is responsible
-  # for generating new code blocks as well. This is not ideal, but the state of generating
-  # and evaluating paths i.e. codeblocks is very tightly coupled.
+  # ## Knowing where we can go
   #
-  # ## Creation
+  # Internally it keeps track of an "indent hash" which is exposed via `next_indent_line`
+  # when called this will return a line of code with the most indentation.
   #
-  # This example code is re-used in the other sections
+  # This line of code can be used to build a CodeBlock via and then when that code block
+  # is added back to the frontier, then the lines in the code block are removed from the
+  # indent hash so we don't double-create the same block.
   #
-  # Example:
+  #   CodeFrontier#next_indent_line
+  #   CodeFrontier#register_indent_block
   #
-  #   code_lines = [
-  #     CodeLine.new(line: "def cinco\n", index: 0)
-  #     CodeLine.new(line: "  def dog\n", index: 1) # Syntax error 1
-  #     CodeLine.new(line: "  def cat\n", index: 2) # Syntax error 2
-  #     CodeLine.new(line: "end\n",       index: 3)
-  #   ]
+  # ## Knowing when to stop
   #
-  #   frontier = CodeFrontier.new(code_lines: code_lines)
+  # The frontier holds the syntax error when removing all code blocks from the original
+  # source document allows it to be parsed as syntatically valid:
   #
-  #   frontier << frontier.next_block if frontier.next_block?
-  #   frontier << frontier.next_block if frontier.next_block?
+  #   CodeFrontier#holds_all_syntax_errors?
   #
-  #   frontier.holds_all_syntax_errors? # => true
-  #   block = frontier.pop
-  #   frontier.holds_all_syntax_errors? # => false
-  #   frontier << block
-  #   frontier.holds_all_syntax_errors? # => true
+  # ## Filtering false positives
   #
-  #   frontier.detect_invalid_blocks.map(&:to_s) # =>
-  #   [
-  #     "def dog\n",
-  #     "def cat\n"
-  #   ]
+  # Once the search is completed, the frontier will have many blocks that do not contain
+  # the syntax error. To filter to the smallest subset that does call:
   #
-  # ## Block Generation
-  #
-  # Currently code blocks are generated based off of indentation. With the idea that blocks are,
-  # well, indented. Once a code block is added to the frontier or it is expanded, or it is generated
-  # then we also need to remove those lines from our generation code so we don't generate the same block
-  # twice by accident.
-  #
-  # This is block generation is currently done via the "indent_hash" internally by starting at the outer
-  # most indentation.
-  #
-  # Example:
-  #
-  #   ```
-  #   def river
-  #     puts "lol" # <=== Start looking here and expand outwards
-  #   end
-  #   ```
-  #
-  # Generating new code blocks is a little verbose but looks like this:
-  #
-  #   frontier << frontier.next_block if frontier.next_block?
-  #
-  # Once a block is in the frontier, it can be popped off:
-  #
-  #   frontier.pop
-  #   # => <# CodeBlock >
-  #
-  # ## Block (frontier) storage, ordering and retrieval
-  #
-  # Once a block is generated it is stored internally in a frontier array. This is very similar to a search algorithm.
-  # The array is sorted by indentation order, so that when a block is popped off the array, the one with
-  # the largest current indentation is evaluated first.
-  #
-  # For example, if we have these two blocks in the frontier:
-  #
-  #   ```
-  #   # Block A - 0 spaces for indentation
-  #
-  #   def cinco
-  #     puts "lol"
-  #   end
-  #   ```
-  #
-  #   ```
-  #   # Block B - 2 spaces for indentation
-  #
-  #     def river
-  #       puts "hehe"
-  #     end
-  #   ```
-  #
-  # The "Block B" has more current indentation, so it would be evaluated first.
-  #
-  # ## Frontier evaluation (Find the syntax error)
-  #
-  # Another key difference between this and a normal search "frontier" is that we're not checking if
-  # an individual code block meets the goal (turning invalid code to valid code) since there can
-  # be multiple syntax errors and this will require multiple code blocks. To handle this, we're
-  # evaluating all the contents of the frontier at the same time to see if the solution exists in any
-  # of our search blocks.
-  #
-  #   # Using the previously generated frontier
-  #
-  #   frontier << Block.new(lines: code_lines[1], code_lines: code_lines)
-  #   frontier.holds_all_syntax_errors? # => false
-  #
-  #   frontier << Block.new(lines: code_lines[2], code_lines: code_lines)
-  #   frontier.holds_all_syntax_errors? # => true
-  #
-  # ## Detect invalid blocks (Filter for smallest solution)
-  #
-  # After we prove that a solution exists and we've found it to be in our frontier, we can start stop searching.
-  # Once we've done this, we need to search through the existing frontier code blocks to find the minimum combination
-  # of blocks that hold the solution. This is done in: `detect_invalid_blocks`.
-  #
-  #   # Using the previously generated frontier
-  #
-  #   frontier << CodeBlock.new(lines: code_lines[0], code_lines: code_lines)
-  #   frontier << CodeBlock.new(lines: code_lines[1], code_lines: code_lines)
-  #   frontier << CodeBlock.new(lines: code_lines[2], code_lines: code_lines)
-  #   frontier << CodeBlock.new(lines: code_lines[3], code_lines: code_lines)
-  #
-  #   frontier.count # => 4
-  #   frontier.detect_invalid_blocks.length => 2
-  #   frontier.detect_invalid_blocks.map(&:to_s) # =>
-  #   [
-  #     "def dog\n",
-  #     "def cat\n"
-  #   ]
-  #
-  # Once invalid blocks are found and filtered, then they can be passed to a formatter.
-  #
-  #
-  #
-  class IndentScan
-    attr_reader :code_lines
-    def initialize(code_lines: )
-      @code_lines = code_lines
-    end
-    def neighbors_from_top(top_line)
-      code_lines
-        .select {|l| l.index >= top_line.index }
-        .select {|l| l.not_empty? }
-        .select {|l| l.visible? }
-        .take_while {|l| l.indent >= top_line.indent }
-    end
-    def each_neighbor_block(top_line)
-      neighbors = neighbors_from_top(top_line)
-      until neighbors.empty?
-        lines = [neighbors.pop]
-        while (block = CodeBlock.new(lines: lines, code_lines: code_lines)) && block.invalid? && neighbors.any?
-          lines.prepend neighbors.pop
-        end
-        yield block if block
-      end
-    end
-  end
+  #   CodeFrontier#detect_invalid_blocks
   class CodeFrontier
     def initialize(code_lines: )
       @code_lines = code_lines
@@ -207,16 +72,9 @@ module SyntaxErrorSearch
     # Returns a code block with the largest indentation possible
     def pop
-      return nil if empty?
       return @frontier.pop
     end
-    def next_block?
-      !@indent_hash.empty?
-    end
     def indent_hash_indent
       @indent_hash.keys.sort.last
     end
@@ -226,40 +84,25 @@ module SyntaxErrorSearch
       @indent_hash[indent]&.first
     end
-    def generate_blocks
-    end
-    def next_block
-      indent = @indent_hash.keys.sort.last
-      lines = @indent_hash[indent].first
-      block = CodeBlock.new(
-        lines: lines,
-        code_lines: @code_lines
-      ).expand_until_neighbors
-      register(block)
-      block
-    end
     def expand?
       return false if @frontier.empty?
       return true if @indent_hash.empty?
-      @frontier.last.current_indent >= @indent_hash.keys.sort.last
-    end
+      frontier_indent = @frontier.last.current_indent
+      hash_indent = @indent_hash.keys.sort.last
-    # This method is responsible for determining if a new code
-    # block should be generated instead of evaluating an already
-    # existing block in the frontier
-    def generate_new_block?
-      return false if @indent_hash.empty?
-      return true if @frontier.empty?
+      if ENV["DEBUG"]
+        puts "```"
+        puts @frontier.last.to_s
+        puts "```"
+        puts "  @frontier indent: #{frontier_indent}"
+        puts "  @hash indent:     #{hash_indent}"
+      end
-      @frontier.last.current_indent <= @indent_hash.keys.sort.last
+      frontier_indent >= hash_indent
     end
-    def register(block)
+    def register_indent_block(block)
       block.lines.each do |line|
         @indent_hash[line.indent]&.delete(line)
       end
@@ -273,22 +116,18 @@ module SyntaxErrorSearch
     # and that each code block's lines are removed from the indentation hash so we
     # don't re-evaluate the same line multiple times.
     def <<(block)
-      register(block)
+      register_indent_block(block)
+      # Make sure we don't double expand, if a code block fully engulfs another code block, keep the bigger one
+      @frontier.reject! {|b|
+        b.starts_at >= block.starts_at && b.ends_at <= block.ends_at
+      }
       @frontier << block
       @frontier.sort!
       self
     end
-    def any?
-      !empty?
-    end
-    def empty?
-      @frontier.empty? && @indent_hash.empty?
-    end
     # Example:
     #
     #   combination([:a, :b, :c, :d])

data/lib/syntax_search/code_search.rb CHANGED

@@ -3,15 +3,16 @@
 module SyntaxErrorSearch
   # Searches code for a syntax error
   #
-  # The bulk of the heavy lifting is done by the CodeFrontier
+  # The bulk of the heavy lifting is done in:
   #
-  # The flow looks like this:
+  #  - CodeFrontier (Holds information for generating blocks and determining if we can stop searching)
+  #  - ParseBlocksFromLine (Creates blocks into the frontier)
+  #  - BlockExpand (Expands existing blocks to search more code
   #
   # ## Syntax error detection
   #
   # When the frontier holds the syntax error, we can stop searching
   #
-  #
   #   search = CodeSearch.new(<<~EOM)
   #     def dog
   #       def lol
@@ -23,42 +24,51 @@ module SyntaxErrorSearch
   #   search.invalid_blocks.map(&:to_s) # =>
   #   # => ["def lol\n"]
   #
-  #
   class CodeSearch
     private; attr_reader :frontier; public
     public; attr_reader :invalid_blocks, :record_dir, :code_lines
-    def initialize(string, record_dir: ENV["SYNTAX_SEARCH_RECORD_DIR"])
+    def initialize(source, record_dir: ENV["SYNTAX_SEARCH_RECORD_DIR"])
+      @source = source
       if record_dir
         @time = Time.now.strftime('%Y-%m-%d-%H-%M-%s-%N')
         @record_dir = Pathname(record_dir).join(@time).tap {|p| p.mkpath }
         @write_count = 0
       end
-      @code_lines = string.lines.map.with_index do |line, i|
+      @code_lines = source.lines.map.with_index do |line, i|
         CodeLine.new(line: line, index: i)
       end
       @frontier = CodeFrontier.new(code_lines: @code_lines)
       @invalid_blocks = []
       @name_tick = Hash.new {|hash, k| hash[k] = 0 }
       @tick = 0
-      @scan = IndentScan.new(code_lines: @code_lines)
+      @block_expand = BlockExpand.new(code_lines: code_lines)
+      @parse_blocks_from_indent_line = ParseBlocksFromIndentLine.new(code_lines: @code_lines)
     end
+    # Used for debugging
     def record(block:, name: "record")
       return if !@record_dir
       @name_tick[name] += 1
       filename = "#{@write_count += 1}-#{name}-#{@name_tick[name]}.txt"
+      if ENV["DEBUG"]
+        puts "\n\n==== #{filename} ===="
+        puts "\n```#{block.starts_at}:#{block.ends_at}"
+        puts "#{block.to_s}"
+        puts "```"
+        puts "  block indent:     #{block.current_indent}"
+      end
       @record_dir.join(filename).open(mode: "a") do |f|
         display = DisplayInvalidBlocks.new(
           blocks: block,
-          terminal: false
+          terminal: false,
+          code_lines: @code_lines,
         )
         f.write(display.indent display.code_with_lines)
       end
     end
-    def push_if_invalid(block, name: )
-      frontier.register(block)
+    def push(block, name: )
       record(block: block, name: name)
       if block.valid?
@@ -69,33 +79,48 @@ module SyntaxErrorSearch
       end
     end
+    # Parses the most indented lines into blocks that are marked
+    # and added to the frontier
     def add_invalid_blocks
       max_indent = frontier.next_indent_line&.indent
       while (line = frontier.next_indent_line) && (line.indent == max_indent)
-        neighbors = @scan.neighbors_from_top(frontier.next_indent_line)
-        @scan.each_neighbor_block(frontier.next_indent_line) do |block|
+        @parse_blocks_from_indent_line.each_neighbor_block(frontier.next_indent_line) do |block|
           record(block: block, name: "add")
-          if block.valid?
-            block.lines.each(&:mark_invisible)
-          end
-        end
-        block = CodeBlock.new(lines: neighbors, code_lines: @code_lines)
-        push_if_invalid(block, name: "add")
+          block.mark_invisible if block.valid?
+          push(block, name: "add")
+        end
       end
     end
+    # Given an already existing block in the frontier, expand it to see
+    # if it contains our invalid syntax
     def expand_invalid_block
       block = frontier.pop
       return unless block
-      block.expand_until_next_boundry
-      push_if_invalid(block, name: "expand")
+      record(block: block, name: "pop")
+      # block = block.expand_until_next_boundry
+      block = @block_expand.call(block)
+      push(block, name: "expand")
+    end
+    def sweep_heredocs
+      HeredocBlockParse.new(
+        source: @source,
+        code_lines: @code_lines
+      ).call.each do |block|
+        push(block, name: "heredoc")
+      end
     end
+    # Main search loop
     def call
+      sweep_heredocs
       until frontier.holds_all_syntax_errors?
         @tick += 1

data/lib/syntax_search/display_invalid_blocks.rb CHANGED

@@ -5,21 +5,22 @@ module SyntaxErrorSearch
   class DisplayInvalidBlocks
     attr_reader :filename
-    def initialize(blocks:, io: $stderr, filename: nil, terminal: false)
+    def initialize(code_lines: ,blocks:, io: $stderr, filename: nil, terminal: false, invalid_type: :unmatched_end)
       @terminal = terminal
       @filename = filename
       @io = io
       @blocks = Array(blocks)
       @lines = @blocks.map(&:lines).flatten
-      @code_lines = @blocks.first&.code_lines || []
+      @code_lines = code_lines
       @digit_count = @code_lines.last&.line_number.to_s.length
       @invalid_line_hash = @lines.each_with_object({}) {|line, h| h[line] = true  }
+      @invalid_type = invalid_type
     end
     def call
-      if @blocks.any?
+      if @blocks.any? { |b| !b.hidden? }
         found_invalid_blocks
       else
         @io.puts "Syntax OK"
@@ -33,15 +34,28 @@ module SyntaxErrorSearch
     end
     private def found_invalid_blocks
-      @io.puts <<~EOM
+      case @invalid_type
+      when :missing_end
+        @io.puts <<~EOM
-        SyntaxErrorSearch: A syntax error was detected
+          SyntaxSearch: Missing `end` detected
-        This code has an unmatched `end` this is caused by either
-        missing a syntax keyword (`def`,  `do`, etc.) or inclusion
-        of an extra `end` line
+          This code has a missing `end`. Ensure that all
+          syntax keywords (`def`, `do`, etc.) have a matching `end`.
+        EOM
+      when :unmatched_end
+        @io.puts <<~EOM
+          SyntaxSearch: Unmatched `end` detected
+          This code has an unmatched `end`. Ensure that all `end` lines
+          in your code have a matching syntax keyword  (`def`,  `do`, etc.)
+          and that you don't have any extra `end` lines.
+        EOM
+      end
-      EOM
       @io.puts("file: #{filename}") if filename
       @io.puts <<~EOM
         simplified:
@@ -50,16 +64,13 @@ module SyntaxErrorSearch
       EOM
     end
-    def indent(string, with: "  ")
+    def indent(string, with: "    ")
       string.each_line.map {|l| with  + l }.join
     end
     def code_block
       string = String.new("")
-      string << "```\n"
-      # string << "#".rjust(@digit_count) + " filename: #{filename}\n\n" if filename
       string << code_with_lines
-      string << "```\n"
       string
     end