RubyGems - dead_end - Versions diffs - 1.2.0 → 2.0.0 - Mend

dead_end 1.2.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +7 -0
data/Gemfile.lock +1 -1
data/lib/dead_end/around_block_scan.rb +5 -8
data/lib/dead_end/capture_code_context.rb +123 -16
data/lib/dead_end/clean_document.rb +313 -0
data/lib/dead_end/code_frontier.rb +24 -13
data/lib/dead_end/code_line.rb +159 -76
data/lib/dead_end/code_search.rb +18 -32
data/lib/dead_end/display_code_with_line_numbers.rb +0 -1
data/lib/dead_end/display_invalid_blocks.rb +4 -2
data/lib/dead_end/fyi.rb +2 -0
data/lib/dead_end/internals.rb +9 -13
data/lib/dead_end/lex_all.rb +10 -26
data/lib/dead_end/lex_value.rb +62 -0
data/lib/dead_end/parse_blocks_from_indent_line.rb +1 -1
data/lib/dead_end/version.rb +1 -1
data/lib/dead_end/who_dis_syntax_error.rb +1 -1
metadata +4 -4
data/lib/dead_end/heredoc_block_parse.rb +0 -34
data/lib/dead_end/trailing_slash_join.rb +0 -53

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: c7db29aed59a901a58a0b1ed50873b6f2c17692f7724ffad2736b99694b78ba0
-  data.tar.gz: e1cf2a11fa38af85df30d89a559b0c6e5a74beec2c0add8b6f67dd142d793de1
+  metadata.gz: 9fb84957790492d9f453b8863ea276ffc603ccb365fee3621f322e3f19e172e5
+  data.tar.gz: 798626bcc0dfa8457ef1fed8fb9ab8c9783a12ee109d94310a0cf787b2a0491a
 SHA512:
-  metadata.gz: 444cfdfd7df93038d1714729a21a3b7a20f9000e9cf0541521d51a125d00be4968e83bbc8a51bf4c4689e2b99706cf5eb0032a318647ecb3fd4452a343798e7a
-  data.tar.gz: c532b87160ae6231776b72ff68f3c8f940064051e14c27da28fa22a15c1be0060d9b2afd600bb66dd4571cd39b9372b1956782565040a5e159776f9dac9f9e62
+  metadata.gz: 44624834e772d2c0d5c0035eb373571cb379cf6417a1d3422528f312c06771ef00c45e3c1ceebb8f78fa4ebce232fd99798f957c408a8a3a78710e65f18da7ce
+  data.tar.gz: 478fce76c26ffcf975111bb51a8ea71c20aa0ecbabb606d621d715e3d7055a818e82e5b1bc11cc7143e5cd4e120da257c1b33dc30a1b07356dacb782ad7299bf

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,12 @@
 ## HEAD (unreleased)
+## 2.0.0
+- Support "endless" oneline method definitions for Ruby 3+ (https://github.com/zombocom/dead_end/pull/80)
+- Reduce timeout to 1 second (https://github.com/zombocom/dead_end/pull/79)
+- Logically consecutive lines (such as chained methods are now joined) (https://github.com/zombocom/dead_end/pull/78)
+- Output improvement for cases where the only line is an single `end` (https://github.com/zombocom/dead_end/pull/78)
 ## 1.2.0
 - Output improvements via less greedy unmatched kw capture https://github.com/zombocom/dead_end/pull/73

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    dead_end (1.2.0)
+    dead_end (2.0.0)
 GEM
   remote: https://rubygems.org/

data/lib/dead_end/around_block_scan.rb CHANGED Viewed

@@ -9,10 +9,10 @@ module DeadEnd
   #
   # Example:
   #
-  #   def dog
-  #     puts "bark"
-  #     puts "bark"
-  #   end
+  #   def dog         # 1
+  #     puts "bark"   # 2
+  #     puts "bark"   # 3
+  #   end             # 4
   #
   #   scan = AroundBlockScan.new(
   #     code_lines: code_lines
@@ -22,7 +22,7 @@ module DeadEnd
   #   scan.scan_while { true }
   #
   #   puts scan.before_index # => 0
-  #   puts scan.after_index # => 3
+  #   puts scan.after_index  # => 3
   #
   # Contents can also be filtered using AroundBlockScan#skip
   #
@@ -109,8 +109,6 @@ module DeadEnd
       kw_count = 0
       end_count = 0
       after_lines.each do |line|
-        # puts "line: #{line.number} #{line.original_line}, indent: #{line.indent}, #{line.empty?} #{line.indent == @orig_indent}"
         next if line.empty?
         break if line.indent < @orig_indent
         next if line.indent != @orig_indent
@@ -124,7 +122,6 @@ module DeadEnd
         lines << line
       end
-      lines.select! { |line| !line.is_comment? }
       lines
     end

data/lib/dead_end/capture_code_context.rb CHANGED Viewed

@@ -1,13 +1,27 @@
 # frozen_string_literal: true
 module DeadEnd
-  # Given a block, this method will capture surrounding
-  # code to give the user more context for the location of
-  # the problem.
+  # Turns a "invalid block(s)" into useful context
   #
-  # Return is an array of CodeLines to be rendered.
+  # There are three main phases in the algorithm:
   #
-  # Surrounding code is captured regardless of visible state
+  # 1. Sanitize/format input source
+  # 2. Search for invalid blocks
+  # 3. Format invalid blocks into something meaninful
+  #
+  # This class handles the third part.
+  #
+  # The algorithm is very good at capturing all of a syntax
+  # error in a single block in number 2, however the results
+  # can contain ambiguities. Humans are good at pattern matching
+  # and filtering and can mentally remove extraneous data, but
+  # they can't add extra data that's not present.
+  #
+  # In the case of known ambiguious cases, this class adds context
+  # back to the ambiguitiy so the programmer has full information.
+  #
+  # Beyond handling these ambiguities, it also captures surrounding
+  # code context information:
   #
   #   puts block.to_s # => "def bark"
   #
@@ -16,7 +30,8 @@ module DeadEnd
   #     code_lines: code_lines
   #   )
   #
-  #   puts context.call.join
+  #   lines = context.call.map(&:original)
+  #   puts lines.join
   #   # =>
   #     class Dog
   #       def bark
@@ -34,19 +49,34 @@ module DeadEnd
     def call
       @blocks.each do |block|
+        capture_first_kw_end_same_indent(block)
         capture_last_end_same_indent(block)
         capture_before_after_kws(block)
         capture_falling_indent(block)
       end
       @lines_to_output.select!(&:not_empty?)
-      @lines_to_output.select!(&:not_comment?)
       @lines_to_output.uniq!
       @lines_to_output.sort!
       @lines_to_output
     end
+    # Shows the context around code provided by "falling" indentation
+    #
+    # Converts:
+    #
+    #       it "foo" do
+    #
+    # into:
+    #
+    #   class OH
+    #     def hello
+    #       it "foo" do
+    #     end
+    #   end
+    #
+    #
     def capture_falling_indent(block)
       AroundBlockScan.new(
         block: block,
@@ -56,7 +86,36 @@ module DeadEnd
       end
     end
+    # Shows surrounding kw/end pairs
+    #
+    # The purpose of showing these extra pairs is due to cases
+    # of ambiguity when only one visible line is matched.
+    #
+    # For example:
+    #
+    #     1  class Dog
+    #     2    def bark
+    #     4    def eat
+    #     5    end
+    #     6  end
+    #
+    # In this case either line 2 could be missing an `end` or
+    # line 4 was an extra line added by mistake (it happens).
+    #
+    # When we detect the above problem it shows the issue
+    # as only being on line 2
+    #
+    #     2    def bark
+    #
+    # Showing "neighbor" keyword pairs gives extra context:
+    #
+    #     2    def bark
+    #     4    def eat
+    #     5    end
+    #
     def capture_before_after_kws(block)
+      return unless block.visible_lines.count == 1
       around_lines = AroundBlockScan.new(code_lines: @code_lines, block: block)
         .start_at_next_line
         .capture_neighbor_context
@@ -66,9 +125,10 @@ module DeadEnd
       @lines_to_output.concat(around_lines)
     end
-    # When there is an invalid with a keyword
-    # right before an end, it's unclear where
-    # the correct code should be.
+    # When there is an invalid block with a keyword
+    # missing an end right before another end,
+    # it is unclear where which keyword is missing the
+    # end
     #
     # Take this example:
     #
@@ -87,20 +147,21 @@ module DeadEnd
     # line 4. Also work backwards and if there's a mis-matched keyword, show it
     # too
     def capture_last_end_same_indent(block)
-      start_index = block.visible_lines.first.index
-      lines = @code_lines[start_index..block.lines.last.index]
+      return if block.visible_lines.length != 1
+      return unless block.visible_lines.first.is_kw?
+      visible_line = block.visible_lines.first
+      lines = @code_lines[visible_line.index..block.lines.last.index]
       # Find first end with same indent
       # (this would return line 4)
       #
       #   end             # 4
-      matching_end = lines.find { |line| line.indent == block.current_indent && line.is_end? }
+      matching_end = lines.detect { |line| line.indent == block.current_indent && line.is_end? }
       return unless matching_end
       @lines_to_output << matching_end
-      lines = @code_lines[start_index..matching_end.index]
       # Work backwards from the end to
       # see if there are mis-matched
       # keyword/end pairs
@@ -113,7 +174,7 @@ module DeadEnd
       #   end             # 4
       end_count = 0
       kw_count = 0
-      kw_line = lines.reverse.detect do |line|
+      kw_line = @code_lines[visible_line.index..matching_end.index].reverse.detect do |line|
         end_count += 1 if line.is_end?
         kw_count += 1 if line.is_kw?
@@ -122,5 +183,51 @@ module DeadEnd
       return unless kw_line
       @lines_to_output << kw_line
     end
+    # The logical inverse of `capture_last_end_same_indent`
+    #
+    # When there is an invalid block with an `end`
+    # missing a keyword right after another `end`,
+    # it is unclear where which end is missing the
+    # keyword.
+    #
+    # Take this example:
+    #
+    #   class Dog       # 1
+    #       puts "woof" # 2
+    #     end           # 3
+    #   end             # 4
+    #
+    # the problem line will be identified as:
+    #
+    #  ❯ end            # 4
+    #
+    # This happens because lines 1, 2, and 3 are technically valid code and are expanded
+    # first, deemed valid, and hidden. We need to un-hide the matching keyword on
+    # line 1. Also work backwards and if there's a mis-matched end, show it
+    # too
+    def capture_first_kw_end_same_indent(block)
+      return if block.visible_lines.length != 1
+      return unless block.visible_lines.first.is_end?
+      visible_line = block.visible_lines.first
+      lines = @code_lines[block.lines.first.index..visible_line.index]
+      matching_kw = lines.reverse.detect { |line| line.indent == block.current_indent && line.is_kw? }
+      return unless matching_kw
+      @lines_to_output << matching_kw
+      kw_count = 0
+      end_count = 0
+      orphan_end = @code_lines[matching_kw.index..visible_line.index].detect do |line|
+        kw_count += 1 if line.is_kw?
+        end_count += 1 if line.is_end?
+        end_count >= kw_count
+      end
+      return unless orphan_end
+      @lines_to_output << orphan_end
+    end
   end
 end

data/lib/dead_end/clean_document.rb ADDED Viewed

@@ -0,0 +1,313 @@
+# frozen_string_literal: true
+module DeadEnd
+  # Parses and sanitizes source into a lexically aware document
+  #
+  # Internally the document is represented by an array with each
+  # index containing a CodeLine correlating to a line from the source code.
+  #
+  # There are three main phases in the algorithm:
+  #
+  # 1. Sanitize/format input source
+  # 2. Search for invalid blocks
+  # 3. Format invalid blocks into something meaninful
+  #
+  # This class handles the first part.
+  #
+  # The reason this class exists is to format input source
+  # for better/easier/cleaner exploration.
+  #
+  # The CodeSearch class operates at the line level so
+  # we must be careful to not introduce lines that look
+  # valid by themselves, but when removed will trigger syntax errors
+  # or strange behavior.
+  #
+  # ## Join Trailing slashes
+  #
+  # Code with a trailing slash is logically treated as a single line:
+  #
+  #     1 it "code can be split" \
+  #     2    "across multiple lines" do
+  #
+  # In this case removing line 2 would add a syntax error. We get around
+  # this by internally joining the two lines into a single "line" object
+  #
+  # ## Logically Consecutive lines
+  #
+  # Code that can be broken over multiple
+  # lines such as method calls are on different lines:
+  #
+  #     1 User.
+  #     2   where(name: "schneems").
+  #     3   first
+  #
+  # Removing line 2 can introduce a syntax error. To fix this, all lines
+  # are joined into one.
+  #
+  # ## Heredocs
+  #
+  # A heredoc is an way of defining a multi-line string. They can cause many
+  # problems. If left as a single line, Ripper would try to parse the contents
+  # as ruby code rather than as a string. Even without this problem, we still
+  # hit an issue with indentation
+  #
+  #    1 foo = <<~HEREDOC
+  #    2  "Be yourself; everyone else is already taken.""
+  #    3    ― Oscar Wilde
+  #    4      puts "I look like ruby code" # but i'm still a heredoc
+  #    5 HEREDOC
+  #
+  # If we didn't join these lines then our algorithm would think that line 4
+  # is separate from the rest, has a higher indentation, then look at it first
+  # and remove it.
+  #
+  # If the code evaluates line 5 by itself it will think line 5 is a constant,
+  # remove it, and introduce a syntax errror.
+  #
+  # All of these problems are fixed by joining the whole heredoc into a single
+  # line.
+  #
+  # ## Comments and whitespace
+  #
+  # Comments can throw off the way the lexer tells us that the line
+  # logically belongs with the next line. This is valid ruby but
+  # results in a different lex output than before:
+  #
+  #     1 User.
+  #     2   where(name: "schneems").
+  #     3   # Comment here
+  #     4   first
+  #
+  # To handle this we can replace comment lines with empty lines
+  # and then re-lex the source. This removal and re-lexing preserves
+  # line index and document size, but generates an easier to work with
+  # document.
+  #
+  class CleanDocument
+    def initialize(source:)
+      @source = source
+      @document = CodeLine.from_source(@source)
+    end
+    # Call all of the document "cleaners"
+    # and return self
+    def call
+      clean_sweep
+        .join_trailing_slash!
+        .join_consecutive!
+        .join_heredoc!
+      self
+    end
+    # Return an array of CodeLines in the
+    # document
+    def lines
+      @document
+    end
+    # Renders the document back to a string
+    def to_s
+      @document.join
+    end
+    # Remove comments and whitespace only lines
+    #
+    # replace with empty newlines
+    #
+    #     source = <<~'EOM'
+    #       # Comment 1
+    #       puts "hello"
+    #       # Comment 2
+    #       puts "world"
+    #     EOM
+    #
+    #     lines = CleanDocument.new(source: source).clean_sweep.lines
+    #     expect(lines[0].to_s).to eq("\n")
+    #     expect(lines[1].to_s).to eq("puts "hello")
+    #     expect(lines[2].to_s).to eq("\n")
+    #     expect(lines[3].to_s).to eq("puts "world")
+    #
+    # WARNING:
+    # If you run this after any of the "join" commands, they
+    # will be un-joined.
+    #
+    # After this change is made, we re-lex the document because
+    # removing comments can change how the doc is parsed.
+    #
+    # For example:
+    #
+    #     values = LexAll.new(source: <<~EOM))
+    #       User.
+    #         # comment
+    #         where(name: 'schneems')
+    #     EOM
+    #     expect(values.count {|v| v.type == :on_ignored_nl}).to eq(1)
+    #
+    # After the comment is removed:
+    #
+    #     values = LexAll.new(source: <<~EOM))
+    #       User.
+    #
+    #         where(name: 'schneems')
+    #     EOM
+    #     expect(values.count {|v| v.type == :on_ignored_nl}).to eq(2)
+    #
+    def clean_sweep
+      source = @document.map do |code_line|
+        # Clean trailing whitespace on empty line
+        if code_line.line.strip.empty?
+          next CodeLine.new(line: "\n", index: code_line.index, lex: [])
+        end
+        # Remove comments
+        if code_line.lex.detect { |lex| lex.type != :on_sp }&.type == :on_comment
+          next CodeLine.new(line: "\n", index: code_line.index, lex: [])
+        end
+        code_line
+      end.join
+      @source = source
+      @document = CodeLine.from_source(source)
+      self
+    end
+    # Smushes all heredoc lines into one line
+    #
+    #     source = <<~'EOM'
+    #       foo = <<~HEREDOC
+    #          lol
+    #          hehehe
+    #       HEREDOC
+    #     EOM
+    #
+    #     lines = CleanDocument.new(source: source).join_heredoc!.lines
+    #     expect(lines[0].to_s).to eq(source)
+    #     expect(lines[1].to_s).to eq("")
+    def join_heredoc!
+      start_index_stack = []
+      heredoc_beg_end_index = []
+      lines.each do |line|
+        line.lex.each do |lex_value|
+          case lex_value.type
+          when :on_heredoc_beg
+            start_index_stack << line.index
+          when :on_heredoc_end
+            start_index = start_index_stack.pop
+            end_index = line.index
+            heredoc_beg_end_index << [start_index, end_index]
+          end
+        end
+      end
+      heredoc_groups = heredoc_beg_end_index.map { |start_index, end_index| @document[start_index..end_index] }
+      join_groups(heredoc_groups)
+      self
+    end
+    # Smushes logically "consecutive" lines
+    #
+    #     source = <<~'EOM'
+    #       User.
+    #         where(name: 'schneems').
+    #         first
+    #     EOM
+    #
+    #     lines = CleanDocument.new(source: source).join_consecutive!.lines
+    #     expect(lines[0].to_s).to eq(source)
+    #     expect(lines[1].to_s).to eq("")
+    #
+    # The one known case this doesn't handle is:
+    #
+    #     Ripper.lex <<~EOM
+    #       a &&
+    #        b ||
+    #        c
+    #     EOM
+    #
+    # For some reason this introduces `on_ignore_newline` but with BEG type
+    #
+    def join_consecutive!
+      consecutive_groups = @document.select(&:ignore_newline_not_beg?).map do |code_line|
+        take_while_including(code_line.index..) do |line|
+          line.ignore_newline_not_beg?
+        end
+      end
+      join_groups(consecutive_groups)
+      self
+    end
+    # Join lines with a trailing slash
+    #
+    #     source = <<~'EOM'
+    #       it "code can be split" \
+    #          "across multiple lines" do
+    #     EOM
+    #
+    #     lines = CleanDocument.new(source: source).join_consecutive!.lines
+    #     expect(lines[0].to_s).to eq(source)
+    #     expect(lines[1].to_s).to eq("")
+    def join_trailing_slash!
+      trailing_groups = @document.select(&:trailing_slash?).map do |code_line|
+        take_while_including(code_line.index..) { |x| x.trailing_slash? }
+      end
+      join_groups(trailing_groups)
+      self
+    end
+    # Helper method for joining "groups" of lines
+    #
+    # Input is expected to be type Array<Array<CodeLine>>
+    #
+    # The outer array holds the various "groups" while the
+    # inner array holds code lines.
+    #
+    # All code lines are "joined" into the first line in
+    # their group.
+    #
+    # To preserve document size, empty lines are placed
+    # in the place of the lines that were "joined"
+    def join_groups(groups)
+      groups.each do |lines|
+        line = lines.first
+        # Handle the case of multiple groups in a a row
+        # if one is already replaced, move on
+        next if @document[line.index].empty?
+        # Join group into the first line
+        @document[line.index] = CodeLine.new(
+          lex: lines.map(&:lex).flatten,
+          line: lines.join,
+          index: line.index
+        )
+        # Hide the rest of the lines
+        lines[1..].each do |line|
+          # The above lines already have newlines in them, if add more
+          # then there will be double newline, use an empty line instead
+          @document[line.index] = CodeLine.new(line: "", index: line.index, lex: [])
+        end
+      end
+      self
+    end
+    # Helper method for grabbing elements from document
+    #
+    # Like `take_while` except when it stops
+    # iterating, it also returns the line
+    # that caused it to stop
+    def take_while_including(range = 0..)
+      take_next_and_stop = false
+      @document[range].take_while do |line|
+        next if take_next_and_stop
+        take_next_and_stop = !(yield line)
+        true
+      end
+    end
+  end
+end

data/lib/dead_end/code_frontier.rb CHANGED Viewed

@@ -3,11 +3,19 @@
 module DeadEnd
   # The main function of the frontier is to hold the edges of our search and to
   # evaluate when we can stop searching.
+  # There are three main phases in the algorithm:
+  #
+  # 1. Sanitize/format input source
+  # 2. Search for invalid blocks
+  # 3. Format invalid blocks into something meaninful
+  #
+  # The Code frontier is a critical part of the second step
   #
   # ## Knowing where we've been
   #
-  # Once a code block is generated it is added onto the frontier where it will be
-  # sorted and then the frontier can be filtered. Large blocks that totally contain a
+  # Once a code block is generated it is added onto the frontier. Then it will be
+  # sorted by indentation and frontier can be filtered. Large blocks that fully enclose a
   # smaller block will cause the smaller block to be evicted.
   #
   #   CodeFrontier#<<(block) # Adds block to frontier
@@ -15,11 +23,11 @@ module DeadEnd
   #
   # ## Knowing where we can go
   #
-  # Internally it keeps track of "unvisited" lines which is exposed via `next_indent_line`
-  # when called this will return a line of code with the most indentation.
+  # Internally the frontier keeps track of "unvisited" lines which are exposed via `next_indent_line`
+  # when called, this method returns, a line of code with the highest indentation.
   #
-  # This line of code can be used to build a CodeBlock and then when that code block
-  # is added back to the frontier, then the lines are removed from the
+  # The returned line of code can be used to build a CodeBlock and then that code block
+  # is added back to the frontier. Then, the lines are removed from the
   # "unvisited" so we don't double-create the same block.
   #
   #   CodeFrontier#next_indent_line # Shows next line
@@ -27,17 +35,20 @@ module DeadEnd
   #
   # ## Knowing when to stop
   #
-  # The frontier holds the syntax error when removing all code blocks from the original
-  # source document allows it to be parsed as syntatically valid:
+  # The frontier knows how to check the entire document for a syntax error. When blocks
+  # are added onto the frontier, they're removed from the document. When all code containing
+  # syntax errors has been added to the frontier, the document will be parsable without a
+  # syntax error and the search can stop.
   #
-  #   CodeFrontier#holds_all_syntax_errors?
+  #   CodeFrontier#holds_all_syntax_errors? # Returns true when frontier holds all syntax errors
   #
   # ## Filtering false positives
   #
-  # Once the search is completed, the frontier will have many blocks that do not contain
-  # the syntax error. To filter to the smallest subset that does call:
+  # Once the search is completed, the frontier may have multiple blocks that do not contain
+  # the syntax error. To limit the result to the smallest subset of "invalid blocks" call:
   #
   #   CodeFrontier#detect_invalid_blocks
+  #
   class CodeFrontier
     def initialize(code_lines:)
       @code_lines = code_lines
@@ -84,8 +95,8 @@ module DeadEnd
         puts "```"
         puts @frontier.last.to_s
         puts "```"
-        puts "  @frontier indent: #{frontier_indent}"
-        puts "  @unvisited indent:     #{unvisited_indent}"
+        puts "  @frontier indent:  #{frontier_indent}"
+        puts "  @unvisited indent: #{unvisited_indent}"
       end
       # Expand all blocks before moving to unvisited lines