RubyGems - dead_end - Versions diffs - 1.2.0 → 3.0.0 - Mend

dead_end 1.2.0 → 3.0.0

Files changed (33) hide show

checksums.yaml +4 -4
data/.circleci/config.yml +9 -0
data/.github/workflows/check_changelog.yml +14 -7
data/.standard.yml +1 -1
data/CHANGELOG.md +29 -0
data/Gemfile.lock +2 -2
data/README.md +89 -21
data/exe/dead_end +3 -66
data/lib/dead_end/around_block_scan.rb +6 -9
data/lib/dead_end/auto.rb +1 -21
data/lib/dead_end/capture_code_context.rb +123 -16
data/lib/dead_end/clean_document.rb +313 -0
data/lib/dead_end/cli.rb +118 -0
data/lib/dead_end/code_block.rb +18 -2
data/lib/dead_end/code_frontier.rb +53 -16
data/lib/dead_end/code_line.rb +159 -76
data/lib/dead_end/code_search.rb +24 -37
data/lib/dead_end/display_code_with_line_numbers.rb +0 -1
data/lib/dead_end/display_invalid_blocks.rb +41 -78
data/lib/dead_end/explain_syntax.rb +103 -0
data/lib/dead_end/left_right_lex_count.rb +157 -0
data/lib/dead_end/lex_all.rb +11 -27
data/lib/dead_end/lex_value.rb +62 -0
data/lib/dead_end/parse_blocks_from_indent_line.rb +1 -1
data/lib/dead_end/ripper_errors.rb +30 -0
data/lib/dead_end/version.rb +1 -1
data/lib/dead_end.rb +145 -1
metadata +8 -7
data/lib/dead_end/fyi.rb +0 -6
data/lib/dead_end/heredoc_block_parse.rb +0 -34
data/lib/dead_end/internals.rb +0 -158
data/lib/dead_end/trailing_slash_join.rb +0 -53
data/lib/dead_end/who_dis_syntax_error.rb +0 -74

data/lib/dead_end/clean_document.rb ADDED Viewed

@@ -0,0 +1,313 @@
+# frozen_string_literal: true
+module DeadEnd
+  # Parses and sanitizes source into a lexically aware document
+  #
+  # Internally the document is represented by an array with each
+  # index containing a CodeLine correlating to a line from the source code.
+  #
+  # There are three main phases in the algorithm:
+  #
+  # 1. Sanitize/format input source
+  # 2. Search for invalid blocks
+  # 3. Format invalid blocks into something meaninful
+  #
+  # This class handles the first part.
+  #
+  # The reason this class exists is to format input source
+  # for better/easier/cleaner exploration.
+  #
+  # The CodeSearch class operates at the line level so
+  # we must be careful to not introduce lines that look
+  # valid by themselves, but when removed will trigger syntax errors
+  # or strange behavior.
+  #
+  # ## Join Trailing slashes
+  #
+  # Code with a trailing slash is logically treated as a single line:
+  #
+  #     1 it "code can be split" \
+  #     2    "across multiple lines" do
+  #
+  # In this case removing line 2 would add a syntax error. We get around
+  # this by internally joining the two lines into a single "line" object
+  #
+  # ## Logically Consecutive lines
+  #
+  # Code that can be broken over multiple
+  # lines such as method calls are on different lines:
+  #
+  #     1 User.
+  #     2   where(name: "schneems").
+  #     3   first
+  #
+  # Removing line 2 can introduce a syntax error. To fix this, all lines
+  # are joined into one.
+  #
+  # ## Heredocs
+  #
+  # A heredoc is an way of defining a multi-line string. They can cause many
+  # problems. If left as a single line, Ripper would try to parse the contents
+  # as ruby code rather than as a string. Even without this problem, we still
+  # hit an issue with indentation
+  #
+  #    1 foo = <<~HEREDOC
+  #    2  "Be yourself; everyone else is already taken.""
+  #    3    ― Oscar Wilde
+  #    4      puts "I look like ruby code" # but i'm still a heredoc
+  #    5 HEREDOC
+  #
+  # If we didn't join these lines then our algorithm would think that line 4
+  # is separate from the rest, has a higher indentation, then look at it first
+  # and remove it.
+  #
+  # If the code evaluates line 5 by itself it will think line 5 is a constant,
+  # remove it, and introduce a syntax errror.
+  #
+  # All of these problems are fixed by joining the whole heredoc into a single
+  # line.
+  #
+  # ## Comments and whitespace
+  #
+  # Comments can throw off the way the lexer tells us that the line
+  # logically belongs with the next line. This is valid ruby but
+  # results in a different lex output than before:
+  #
+  #     1 User.
+  #     2   where(name: "schneems").
+  #     3   # Comment here
+  #     4   first
+  #
+  # To handle this we can replace comment lines with empty lines
+  # and then re-lex the source. This removal and re-lexing preserves
+  # line index and document size, but generates an easier to work with
+  # document.
+  #
+  class CleanDocument
+    def initialize(source:)
+      @source = source
+      @document = CodeLine.from_source(@source)
+    end
+    # Call all of the document "cleaners"
+    # and return self
+    def call
+      clean_sweep
+        .join_trailing_slash!
+        .join_consecutive!
+        .join_heredoc!
+      self
+    end
+    # Return an array of CodeLines in the
+    # document
+    def lines
+      @document
+    end
+    # Renders the document back to a string
+    def to_s
+      @document.join
+    end
+    # Remove comments and whitespace only lines
+    #
+    # replace with empty newlines
+    #
+    #     source = <<~'EOM'
+    #       # Comment 1
+    #       puts "hello"
+    #       # Comment 2
+    #       puts "world"
+    #     EOM
+    #
+    #     lines = CleanDocument.new(source: source).clean_sweep.lines
+    #     expect(lines[0].to_s).to eq("\n")
+    #     expect(lines[1].to_s).to eq("puts "hello")
+    #     expect(lines[2].to_s).to eq("\n")
+    #     expect(lines[3].to_s).to eq("puts "world")
+    #
+    # WARNING:
+    # If you run this after any of the "join" commands, they
+    # will be un-joined.
+    #
+    # After this change is made, we re-lex the document because
+    # removing comments can change how the doc is parsed.
+    #
+    # For example:
+    #
+    #     values = LexAll.new(source: <<~EOM))
+    #       User.
+    #         # comment
+    #         where(name: 'schneems')
+    #     EOM
+    #     expect(values.count {|v| v.type == :on_ignored_nl}).to eq(1)
+    #
+    # After the comment is removed:
+    #
+    #     values = LexAll.new(source: <<~EOM))
+    #       User.
+    #
+    #         where(name: 'schneems')
+    #     EOM
+    #     expect(values.count {|v| v.type == :on_ignored_nl}).to eq(2)
+    #
+    def clean_sweep
+      source = @document.map do |code_line|
+        # Clean trailing whitespace on empty line
+        if code_line.line.strip.empty?
+          next CodeLine.new(line: "\n", index: code_line.index, lex: [])
+        end
+        # Remove comments
+        if code_line.lex.detect { |lex| lex.type != :on_sp }&.type == :on_comment
+          next CodeLine.new(line: "\n", index: code_line.index, lex: [])
+        end
+        code_line
+      end.join
+      @source = source
+      @document = CodeLine.from_source(source)
+      self
+    end
+    # Smushes all heredoc lines into one line
+    #
+    #     source = <<~'EOM'
+    #       foo = <<~HEREDOC
+    #          lol
+    #          hehehe
+    #       HEREDOC
+    #     EOM
+    #
+    #     lines = CleanDocument.new(source: source).join_heredoc!.lines
+    #     expect(lines[0].to_s).to eq(source)
+    #     expect(lines[1].to_s).to eq("")
+    def join_heredoc!
+      start_index_stack = []
+      heredoc_beg_end_index = []
+      lines.each do |line|
+        line.lex.each do |lex_value|
+          case lex_value.type
+          when :on_heredoc_beg
+            start_index_stack << line.index
+          when :on_heredoc_end
+            start_index = start_index_stack.pop
+            end_index = line.index
+            heredoc_beg_end_index << [start_index, end_index]
+          end
+        end
+      end
+      heredoc_groups = heredoc_beg_end_index.map { |start_index, end_index| @document[start_index..end_index] }
+      join_groups(heredoc_groups)
+      self
+    end
+    # Smushes logically "consecutive" lines
+    #
+    #     source = <<~'EOM'
+    #       User.
+    #         where(name: 'schneems').
+    #         first
+    #     EOM
+    #
+    #     lines = CleanDocument.new(source: source).join_consecutive!.lines
+    #     expect(lines[0].to_s).to eq(source)
+    #     expect(lines[1].to_s).to eq("")
+    #
+    # The one known case this doesn't handle is:
+    #
+    #     Ripper.lex <<~EOM
+    #       a &&
+    #        b ||
+    #        c
+    #     EOM
+    #
+    # For some reason this introduces `on_ignore_newline` but with BEG type
+    #
+    def join_consecutive!
+      consecutive_groups = @document.select(&:ignore_newline_not_beg?).map do |code_line|
+        take_while_including(code_line.index..-1) do |line|
+          line.ignore_newline_not_beg?
+        end
+      end
+      join_groups(consecutive_groups)
+      self
+    end
+    # Join lines with a trailing slash
+    #
+    #     source = <<~'EOM'
+    #       it "code can be split" \
+    #          "across multiple lines" do
+    #     EOM
+    #
+    #     lines = CleanDocument.new(source: source).join_consecutive!.lines
+    #     expect(lines[0].to_s).to eq(source)
+    #     expect(lines[1].to_s).to eq("")
+    def join_trailing_slash!
+      trailing_groups = @document.select(&:trailing_slash?).map do |code_line|
+        take_while_including(code_line.index..-1) { |x| x.trailing_slash? }
+      end
+      join_groups(trailing_groups)
+      self
+    end
+    # Helper method for joining "groups" of lines
+    #
+    # Input is expected to be type Array<Array<CodeLine>>
+    #
+    # The outer array holds the various "groups" while the
+    # inner array holds code lines.
+    #
+    # All code lines are "joined" into the first line in
+    # their group.
+    #
+    # To preserve document size, empty lines are placed
+    # in the place of the lines that were "joined"
+    def join_groups(groups)
+      groups.each do |lines|
+        line = lines.first
+        # Handle the case of multiple groups in a a row
+        # if one is already replaced, move on
+        next if @document[line.index].empty?
+        # Join group into the first line
+        @document[line.index] = CodeLine.new(
+          lex: lines.map(&:lex).flatten,
+          line: lines.join,
+          index: line.index
+        )
+        # Hide the rest of the lines
+        lines[1..-1].each do |line|
+          # The above lines already have newlines in them, if add more
+          # then there will be double newline, use an empty line instead
+          @document[line.index] = CodeLine.new(line: "", index: line.index, lex: [])
+        end
+      end
+      self
+    end
+    # Helper method for grabbing elements from document
+    #
+    # Like `take_while` except when it stops
+    # iterating, it also returns the line
+    # that caused it to stop
+    def take_while_including(range = 0..-1)
+      take_next_and_stop = false
+      @document[range].take_while do |line|
+        next if take_next_and_stop
+        take_next_and_stop = !(yield line)
+        true
+      end
+    end
+  end
+end

data/lib/dead_end/cli.rb ADDED Viewed

@@ -0,0 +1,118 @@
+# frozen_string_literal: true
+require "pathname"
+require "optparse"
+module DeadEnd
+  # All the logic of the exe/dead_end CLI in one handy spot
+  #
+  #   Cli.new(argv: ["--help"]).call
+  #   Cli.new(argv: ["<path/to/file>.rb"]).call
+  #   Cli.new(argv: ["<path/to/file>.rb", "--record=tmp"]).call
+  #   Cli.new(argv: ["<path/to/file>.rb", "--terminal"]).call
+  #
+  class Cli
+    attr_accessor :options, :file_name
+    # ARGV is Everything passed to the executable, does not include executable name
+    #
+    # All other intputs are dependency injection for testing
+    def initialize(argv:, exit_obj: Kernel, io: $stdout, env: ENV)
+      @options = {}
+      @parser = nil
+      options[:record_dir] = env["DEAD_END_RECORD_DIR"]
+      options[:record_dir] = "tmp" if env["DEBUG"]
+      options[:terminal] = DeadEnd::DEFAULT_VALUE
+      @io = io
+      @argv = argv
+      @file_name = argv[0]
+      @exit_obj = exit_obj
+    end
+    def call
+      if file_name.nil? || file_name.empty?
+        # Display help if raw command
+        parser.parse! %w[--help]
+      else
+        parse
+      end
+      # Needed for testing since we fake exit
+      return if options[:exit]
+      file = Pathname(file_name)
+      @io.puts "Record dir: #{options[:record_dir]}" if options[:record_dir]
+      display = DeadEnd.call(
+        io: @io,
+        source: file.read,
+        filename: file.expand_path,
+        terminal: options.fetch(:terminal, DeadEnd::DEFAULT_VALUE),
+        record_dir: options[:record_dir]
+      )
+      if display.document_ok?
+        @exit_obj.exit(0)
+      else
+        @exit_obj.exit(1)
+      end
+    end
+    def parse
+      parser.parse!(@argv)
+      self
+    end
+    def parser
+      @parser ||= OptionParser.new do |opts|
+        opts.banner = <<~EOM
+          Usage: dead_end <file> [options]
+          Parses a ruby source file and searches for syntax error(s) such as
+          unexpected `end', expecting end-of-input.
+          Example:
+            $ dead_end dog.rb
+            # ...
+              ❯ 10  defdog
+              ❯ 15  end
+          ENV options:
+            DEAD_END_RECORD_DIR=<dir>
+            Records the steps used to search for a syntax error
+            to the given directory
+          Options:
+        EOM
+        opts.version = DeadEnd::VERSION
+        opts.on("--help", "Help - displays this message") do |v|
+          @io.puts opts
+          options[:exit] = true
+          @exit_obj.exit
+        end
+        opts.on("--record <dir>", "Records the steps used to search for a syntax error to the given directory") do |v|
+          options[:record_dir] = v
+        end
+        opts.on("--terminal", "Enable terminal highlighting") do |v|
+          options[:terminal] = true
+        end
+        opts.on("--no-terminal", "Disable terminal highlighting") do |v|
+          options[:terminal] = false
+        end
+      end
+    end
+  end
+end

data/lib/dead_end/code_block.rb CHANGED Viewed

@@ -70,8 +70,24 @@ module DeadEnd
     end
     def valid?
-      return @valid if @valid != UNSET
-      @valid = DeadEnd.valid?(to_s)
+      if @valid == UNSET
+        # Performance optimization
+        #
+        # If all the lines were previously hidden
+        # and we expand to capture additional empty
+        # lines then the result cannot be invalid
+        #
+        # That means there's no reason to re-check all
+        # lines with ripper (which is expensive).
+        # Benchmark in commit message
+        @valid = if lines.all? { |l| l.hidden? || l.empty? }
+          true
+        else
+          DeadEnd.valid?(lines.map(&:original).join)
+        end
+      else
+        @valid
+      end
     end
     def to_s

data/lib/dead_end/code_frontier.rb CHANGED Viewed

@@ -3,11 +3,19 @@
 module DeadEnd
   # The main function of the frontier is to hold the edges of our search and to
   # evaluate when we can stop searching.
+  # There are three main phases in the algorithm:
+  #
+  # 1. Sanitize/format input source
+  # 2. Search for invalid blocks
+  # 3. Format invalid blocks into something meaninful
+  #
+  # The Code frontier is a critical part of the second step
   #
   # ## Knowing where we've been
   #
-  # Once a code block is generated it is added onto the frontier where it will be
-  # sorted and then the frontier can be filtered. Large blocks that totally contain a
+  # Once a code block is generated it is added onto the frontier. Then it will be
+  # sorted by indentation and frontier can be filtered. Large blocks that fully enclose a
   # smaller block will cause the smaller block to be evicted.
   #
   #   CodeFrontier#<<(block) # Adds block to frontier
@@ -15,11 +23,11 @@ module DeadEnd
   #
   # ## Knowing where we can go
   #
-  # Internally it keeps track of "unvisited" lines which is exposed via `next_indent_line`
-  # when called this will return a line of code with the most indentation.
+  # Internally the frontier keeps track of "unvisited" lines which are exposed via `next_indent_line`
+  # when called, this method returns, a line of code with the highest indentation.
   #
-  # This line of code can be used to build a CodeBlock and then when that code block
-  # is added back to the frontier, then the lines are removed from the
+  # The returned line of code can be used to build a CodeBlock and then that code block
+  # is added back to the frontier. Then, the lines are removed from the
   # "unvisited" so we don't double-create the same block.
   #
   #   CodeFrontier#next_indent_line # Shows next line
@@ -27,34 +35,61 @@ module DeadEnd
   #
   # ## Knowing when to stop
   #
-  # The frontier holds the syntax error when removing all code blocks from the original
-  # source document allows it to be parsed as syntatically valid:
+  # The frontier knows how to check the entire document for a syntax error. When blocks
+  # are added onto the frontier, they're removed from the document. When all code containing
+  # syntax errors has been added to the frontier, the document will be parsable without a
+  # syntax error and the search can stop.
   #
-  #   CodeFrontier#holds_all_syntax_errors?
+  #   CodeFrontier#holds_all_syntax_errors? # Returns true when frontier holds all syntax errors
   #
   # ## Filtering false positives
   #
-  # Once the search is completed, the frontier will have many blocks that do not contain
-  # the syntax error. To filter to the smallest subset that does call:
+  # Once the search is completed, the frontier may have multiple blocks that do not contain
+  # the syntax error. To limit the result to the smallest subset of "invalid blocks" call:
   #
   #   CodeFrontier#detect_invalid_blocks
+  #
   class CodeFrontier
     def initialize(code_lines:)
       @code_lines = code_lines
       @frontier = []
       @unvisited_lines = @code_lines.sort_by(&:indent_index)
+      @has_run = false
+      @check_next = true
     end
     def count
       @frontier.count
     end
+    # Performance optimization
+    #
+    # Parsing with ripper is expensive
+    # If we know we don't have any blocks with invalid
+    # syntax, then we know we cannot have found
+    # the incorrect syntax yet.
+    #
+    # When an invalid block is added onto the frontier
+    # check document state
+    private def can_skip_check?
+      check_next = @check_next
+      @check_next = false
+      if check_next
+        false
+      else
+        true
+      end
+    end
     # Returns true if the document is valid with all lines
     # removed. By default it checks all blocks in present in
     # the frontier array, but can be used for arbitrary arrays
     # of codeblocks as well
-    def holds_all_syntax_errors?(block_array = @frontier)
-      without_lines = block_array.map do |block|
+    def holds_all_syntax_errors?(block_array = @frontier, can_cache: true)
+      return false if can_cache && can_skip_check?
+      without_lines = block_array.flat_map do |block|
         block.lines
       end
@@ -84,8 +119,8 @@ module DeadEnd
         puts "```"
         puts @frontier.last.to_s
         puts "```"
-        puts "  @frontier indent: #{frontier_indent}"
-        puts "  @unvisited indent:     #{unvisited_indent}"
+        puts "  @frontier indent:  #{frontier_indent}"
+        puts "  @unvisited indent: #{unvisited_indent}"
       end
       # Expand all blocks before moving to unvisited lines
@@ -109,6 +144,8 @@ module DeadEnd
       @frontier.reject! { |b|
         b.starts_at >= block.starts_at && b.ends_at <= block.ends_at
       }
+      @check_next = true if block.invalid?
       @frontier << block
       @frontier.sort!
@@ -131,7 +168,7 @@ module DeadEnd
     # the smallest possible set of blocks that contain all the syntax errors
     def detect_invalid_blocks
       self.class.combination(@frontier.select(&:invalid?)).detect do |block_array|
-        holds_all_syntax_errors?(block_array)
+        holds_all_syntax_errors?(block_array, can_cache: false)
       end || []
     end
   end