RubyGems - dead_end - Versions diffs - 1.0.0 - Mend

dead_end 1.0.0

Files changed (38) hide show

checksums.yaml +7 -0
data/.circleci/config.yml +41 -0
data/.github/workflows/check_changelog.yml +13 -0
data/.gitignore +14 -0
data/.rspec +3 -0
data/.travis.yml +6 -0
data/CHANGELOG.md +39 -0
data/CODE_OF_CONDUCT.md +74 -0
data/Gemfile +10 -0
data/Gemfile.lock +36 -0
data/LICENSE.txt +21 -0
data/README.md +122 -0
data/Rakefile +8 -0
data/assets/syntax_search.gif +0 -0
data/bin/console +14 -0
data/bin/setup +8 -0
data/dead_end.gemspec +28 -0
data/exe/dead_end +70 -0
data/lib/dead_end.rb +4 -0
data/lib/dead_end/around_block_scan.rb +193 -0
data/lib/dead_end/auto.rb +51 -0
data/lib/dead_end/block_expand.rb +74 -0
data/lib/dead_end/capture_code_context.rb +62 -0
data/lib/dead_end/code_block.rb +78 -0
data/lib/dead_end/code_frontier.rb +151 -0
data/lib/dead_end/code_line.rb +139 -0
data/lib/dead_end/code_search.rb +157 -0
data/lib/dead_end/display_code_with_line_numbers.rb +71 -0
data/lib/dead_end/display_invalid_blocks.rb +122 -0
data/lib/dead_end/fyi.rb +7 -0
data/lib/dead_end/heredoc_block_parse.rb +30 -0
data/lib/dead_end/internals.rb +156 -0
data/lib/dead_end/lex_all.rb +58 -0
data/lib/dead_end/parse_blocks_from_indent_line.rb +56 -0
data/lib/dead_end/trailing_slash_join.rb +53 -0
data/lib/dead_end/version.rb +5 -0
data/lib/dead_end/who_dis_syntax_error.rb +66 -0
metadata +83 -0

@@ -0,0 +1,78 @@
+# frozen_string_literal: true
+module DeadEnd
+  # Multiple lines form a singular CodeBlock
+  #
+  # Source code is made of multiple CodeBlocks.
+  #
+  # Example:
+  #
+  #   code_block.to_s # =>
+  #     #   def foo
+  #     #     puts "foo"
+  #     #   end
+  #
+  #   code_block.valid? # => true
+  #   code_block.in_valid? # => false
+  #
+  #
+  class CodeBlock
+    attr_reader :lines
+    def initialize(lines: [])
+      @lines = Array(lines)
+    end
+    def visible_lines
+      @lines.select(&:visible?).select(&:not_empty?)
+    end
+    def mark_invisible
+      @lines.map(&:mark_invisible)
+    end
+    def is_end?
+      to_s.strip == "end"
+    end
+    def hidden?
+      @lines.all?(&:hidden?)
+    end
+    def starts_at
+      @starts_at ||= @lines.first&.line_number
+    end
+    def ends_at
+      @ends_at ||= @lines.last&.line_number
+    end
+    # This is used for frontier ordering, we are searching from
+    # the largest indentation to the smallest. This allows us to
+    # populate an array with multiple code blocks then call `sort!`
+    # on it without having to specify the sorting criteria
+    def <=>(other)
+      out = self.current_indent <=> other.current_indent
+      return out if out != 0
+      # Stable sort
+      self.starts_at <=> other.starts_at
+    end
+    def current_indent
+      @current_indent ||= lines.select(&:not_empty?).map(&:indent).min || 0
+    end
+    def invalid?
+      !valid?
+    end
+    def valid?
+      DeadEnd.valid?(self.to_s)
+    end
+    def to_s
+      @lines.join
+    end
+  end
+end

data/lib/dead_end/code_frontier.rb ADDED

@@ -0,0 +1,151 @@
+# frozen_string_literal: true
+module DeadEnd
+  # The main function of the frontier is to hold the edges of our search and to
+  # evaluate when we can stop searching.
+  #
+  # ## Knowing where we've been
+  #
+  # Once a code block is generated it is added onto the frontier where it will be
+  # sorted and then the frontier can be filtered. Large blocks that totally contain a
+  # smaller block will cause the smaller block to be evicted.
+  #
+  #   CodeFrontier#<<
+  #   CodeFrontier#pop
+  #
+  # ## Knowing where we can go
+  #
+  # Internally it keeps track of an "indent hash" which is exposed via `next_indent_line`
+  # when called this will return a line of code with the most indentation.
+  #
+  # This line of code can be used to build a CodeBlock via and then when that code block
+  # is added back to the frontier, then the lines in the code block are removed from the
+  # indent hash so we don't double-create the same block.
+  #
+  #   CodeFrontier#next_indent_line
+  #   CodeFrontier#register_indent_block
+  #
+  # ## Knowing when to stop
+  #
+  # The frontier holds the syntax error when removing all code blocks from the original
+  # source document allows it to be parsed as syntatically valid:
+  #
+  #   CodeFrontier#holds_all_syntax_errors?
+  #
+  # ## Filtering false positives
+  #
+  # Once the search is completed, the frontier will have many blocks that do not contain
+  # the syntax error. To filter to the smallest subset that does call:
+  #
+  #   CodeFrontier#detect_invalid_blocks
+  class CodeFrontier
+    def initialize(code_lines: )
+      @code_lines = code_lines
+      @frontier = []
+      @indent_hash = {}
+      code_lines.each do |line|
+        next if line.empty?
+        @indent_hash[line.indent] ||= []
+        @indent_hash[line.indent] << line
+      end
+    end
+    def count
+      @frontier.count
+    end
+    # Returns true if the document is valid with all lines
+    # removed. By default it checks all blocks in present in
+    # the frontier array, but can be used for arbitrary arrays
+    # of codeblocks as well
+    def holds_all_syntax_errors?(block_array = @frontier)
+      without_lines = block_array.map do |block|
+        block.lines
+      end
+      DeadEnd.valid_without?(
+        without_lines: without_lines,
+        code_lines: @code_lines
+      )
+    end
+    # Returns a code block with the largest indentation possible
+    def pop
+      return @frontier.pop
+    end
+    def indent_hash_indent
+      @indent_hash.keys.sort.last
+    end
+    def next_indent_line
+      indent = @indent_hash.keys.sort.last
+      @indent_hash[indent]&.first
+    end
+    def expand?
+      return false if @frontier.empty?
+      return true if @indent_hash.empty?
+      frontier_indent = @frontier.last.current_indent
+      hash_indent = @indent_hash.keys.sort.last
+      if ENV["DEBUG"]
+        puts "```"
+        puts @frontier.last.to_s
+        puts "```"
+        puts "  @frontier indent: #{frontier_indent}"
+        puts "  @hash indent:     #{hash_indent}"
+      end
+      frontier_indent >= hash_indent
+    end
+    def register_indent_block(block)
+      block.lines.each do |line|
+        @indent_hash[line.indent]&.delete(line)
+      end
+      @indent_hash.select! {|k, v| !v.empty?}
+      self
+    end
+    # Add a block to the frontier
+    #
+    # This method ensures the frontier always remains sorted (in indentation order)
+    # and that each code block's lines are removed from the indentation hash so we
+    # don't re-evaluate the same line multiple times.
+    def <<(block)
+      register_indent_block(block)
+      # Make sure we don't double expand, if a code block fully engulfs another code block, keep the bigger one
+      @frontier.reject! {|b|
+        b.starts_at >= block.starts_at && b.ends_at <= block.ends_at
+      }
+      @frontier << block
+      @frontier.sort!
+      self
+    end
+    # Example:
+    #
+    #   combination([:a, :b, :c, :d])
+    #   # => [[:a], [:b], [:c], [:d], [:a, :b], [:a, :c], [:a, :d], [:b, :c], [:b, :d], [:c, :d], [:a, :b, :c], [:a, :b, :d], [:a, :c, :d], [:b, :c, :d], [:a, :b, :c, :d]]
+    def self.combination(array)
+      guesses = []
+      1.upto(array.length).each do |size|
+        guesses.concat(array.combination(size).to_a)
+      end
+      guesses
+    end
+    # Given that we know our syntax error exists somewhere in our frontier, we want to find
+    # the smallest possible set of blocks that contain all the syntax errors
+    def detect_invalid_blocks
+      self.class.combination(@frontier).detect do |block_array|
+        holds_all_syntax_errors?(block_array)
+      end || []
+    end
+  end
+end

data/lib/dead_end/code_line.rb ADDED

@@ -0,0 +1,139 @@
+# frozen_string_literal: true
+module DeadEnd
+  # Represents a single line of code of a given source file
+  #
+  # This object contains metadata about the line such as
+  # amount of indentation. An if it is empty or not.
+  #
+  # While a given search for syntax errors is being performed
+  # state about the search can be stored in individual lines such
+  # as :valid or :invalid.
+  #
+  # Visibility of lines can be toggled on and off.
+  #
+  # Example:
+  #
+  #   line = CodeLine.new(line: "def foo\n", index: 0)
+  #   line.line_number => 1
+  #   line.empty? # => false
+  #   line.visible? # => true
+  #   line.mark_invisible
+  #   line.visible? # => false
+  #
+  # A CodeBlock is made of multiple CodeLines
+  #
+  # Marking a line as invisible indicates that it should not be used
+  # for syntax checks. It's essentially the same as commenting it out
+  #
+  # Marking a line as invisible also lets the overall program know
+  # that it should not check that area for syntax errors.
+  class CodeLine
+    TRAILING_SLASH = ("\\" + $/).freeze
+    attr_reader :line, :index, :indent, :original_line
+    def initialize(line: , index:)
+      @original_line = line.freeze
+      @line = @original_line
+      @empty = line.strip.empty?
+      @index = index
+      @indent = SpaceCount.indent(line)
+      @status = nil # valid, invalid, unknown
+      @invalid = false
+      lex_detect!
+    end
+    private def lex_detect!
+      lex = LexAll.new(source: line)
+      kw_count = 0
+      end_count = 0
+      lex.each do |lex|
+        next unless lex.type == :on_kw
+        case lex.token
+        when 'def', 'case', 'for', 'begin', 'class', 'module', 'if', 'unless', 'while', 'until' , 'do'
+          kw_count += 1
+        when 'end'
+          end_count += 1
+        end
+      end
+      @is_kw = (kw_count - end_count) > 0
+      @is_end = (end_count - kw_count) > 0
+      @is_comment = lex.detect {|lex| lex.type != :on_sp}&.type == :on_comment
+      @is_trailing_slash = lex.last.token == TRAILING_SLASH
+    end
+    alias :original :original_line
+    def trailing_slash?
+      @is_trailing_slash
+    end
+    def <=>(b)
+      self.index <=> b.index
+    end
+    def is_comment?
+      @is_comment
+    end
+    def not_comment?
+      !is_comment?
+    end
+    def is_kw?
+      @is_kw
+    end
+    def is_end?
+      @is_end
+    end
+    def mark_invalid
+      @invalid = true
+      self
+    end
+    def marked_invalid?
+      @invalid
+    end
+    def mark_invisible
+      @line = ""
+      self
+    end
+    def mark_visible
+      @line = @original_line
+      self
+    end
+    def visible?
+      !line.empty?
+    end
+    def hidden?
+      !visible?
+    end
+    def line_number
+      index + 1
+    end
+    alias :number :line_number
+    def not_empty?
+      !empty?
+    end
+    def empty?
+      @empty
+    end
+    def to_s
+      self.line
+    end
+  end
+end

data/lib/dead_end/code_search.rb ADDED

@@ -0,0 +1,157 @@
+# frozen_string_literal: true
+module DeadEnd
+  # Searches code for a syntax error
+  #
+  # The bulk of the heavy lifting is done in:
+  #
+  #  - CodeFrontier (Holds information for generating blocks and determining if we can stop searching)
+  #  - ParseBlocksFromLine (Creates blocks into the frontier)
+  #  - BlockExpand (Expands existing blocks to search more code
+  #
+  # ## Syntax error detection
+  #
+  # When the frontier holds the syntax error, we can stop searching
+  #
+  #   search = CodeSearch.new(<<~EOM)
+  #     def dog
+  #       def lol
+  #     end
+  #   EOM
+  #
+  #   search.call
+  #
+  #   search.invalid_blocks.map(&:to_s) # =>
+  #   # => ["def lol\n"]
+  #
+  class CodeSearch
+    private; attr_reader :frontier; public
+    public; attr_reader :invalid_blocks, :record_dir, :code_lines
+    def initialize(source, record_dir: ENV["DEAD_END_RECORD_DIR"] || ENV["DEBUG"] ? "tmp" : nil)
+      @source = source
+      if record_dir
+        @time = Time.now.strftime('%Y-%m-%d-%H-%M-%s-%N')
+        @record_dir = Pathname(record_dir).join(@time).tap {|p| p.mkpath }
+        @write_count = 0
+      end
+      code_lines = source.lines.map.with_index do |line, i|
+        CodeLine.new(line: line, index: i)
+      end
+      @code_lines = TrailingSlashJoin.new(code_lines: code_lines).call
+      @frontier = CodeFrontier.new(code_lines: @code_lines)
+      @invalid_blocks = []
+      @name_tick = Hash.new {|hash, k| hash[k] = 0 }
+      @tick = 0
+      @block_expand = BlockExpand.new(code_lines: code_lines)
+      @parse_blocks_from_indent_line = ParseBlocksFromIndentLine.new(code_lines: @code_lines)
+    end
+    # Used for debugging
+    def record(block:, name: "record")
+      return if !@record_dir
+      @name_tick[name] += 1
+      filename = "#{@write_count += 1}-#{name}-#{@name_tick[name]}.txt"
+      if ENV["DEBUG"]
+        puts "\n\n==== #{filename} ===="
+        puts "\n```#{block.starts_at}:#{block.ends_at}"
+        puts "#{block.to_s}"
+        puts "```"
+        puts "  block indent:     #{block.current_indent}"
+      end
+      @record_dir.join(filename).open(mode: "a") do |f|
+        display = DisplayInvalidBlocks.new(
+          blocks: block,
+          terminal: false,
+          code_lines: @code_lines,
+        )
+        f.write(display.indent display.code_with_lines)
+      end
+    end
+    def push(block, name: )
+      record(block: block, name: name)
+      if block.valid?
+        block.lines.each(&:mark_invisible)
+        frontier << block
+      else
+        frontier << block
+      end
+    end
+    # Removes the block without putting it back in the frontier
+    def sweep(block:, name: )
+      record(block: block, name: name)
+      block.lines.each(&:mark_invisible)
+      frontier.register_indent_block(block)
+    end
+    # Parses the most indented lines into blocks that are marked
+    # and added to the frontier
+    def add_invalid_blocks
+      max_indent = frontier.next_indent_line&.indent
+      while (line = frontier.next_indent_line) && (line.indent == max_indent)
+        @parse_blocks_from_indent_line.each_neighbor_block(frontier.next_indent_line) do |block|
+          record(block: block, name: "add")
+          block.mark_invisible if block.valid?
+          push(block, name: "add")
+        end
+      end
+    end
+    # Given an already existing block in the frontier, expand it to see
+    # if it contains our invalid syntax
+    def expand_invalid_block
+      block = frontier.pop
+      return unless block
+      record(block: block, name: "pop")
+      # block = block.expand_until_next_boundry
+      block = @block_expand.call(block)
+      push(block, name: "expand")
+    end
+    def sweep_heredocs
+      HeredocBlockParse.new(
+        source: @source,
+        code_lines: @code_lines
+      ).call.each do |block|
+        push(block, name: "heredoc")
+      end
+    end
+    def sweep_comments
+      lines = @code_lines.select(&:is_comment?)
+      return if lines.empty?
+      block = CodeBlock.new(lines: lines)
+      sweep(block: block, name: "comments")
+    end
+    # Main search loop
+    def call
+      sweep_heredocs
+      sweep_comments
+      until frontier.holds_all_syntax_errors?
+        @tick += 1
+        if frontier.expand?
+          expand_invalid_block
+        else
+          add_invalid_blocks
+        end
+      end
+      @invalid_blocks.concat(frontier.detect_invalid_blocks )
+      @invalid_blocks.sort_by! {|block| block.starts_at }
+      self
+    end
+  end
+end