dead_end 1.1.6 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,11 +3,19 @@
3
3
  module DeadEnd
4
4
  # The main function of the frontier is to hold the edges of our search and to
5
5
  # evaluate when we can stop searching.
6
+
7
+ # There are three main phases in the algorithm:
8
+ #
9
+ # 1. Sanitize/format input source
10
+ # 2. Search for invalid blocks
11
+ # 3. Format invalid blocks into something meaninful
12
+ #
13
+ # The Code frontier is a critical part of the second step
6
14
  #
7
15
  # ## Knowing where we've been
8
16
  #
9
- # Once a code block is generated it is added onto the frontier where it will be
10
- # sorted and then the frontier can be filtered. Large blocks that totally contain a
17
+ # Once a code block is generated it is added onto the frontier. Then it will be
18
+ # sorted by indentation and frontier can be filtered. Large blocks that fully enclose a
11
19
  # smaller block will cause the smaller block to be evicted.
12
20
  #
13
21
  # CodeFrontier#<<(block) # Adds block to frontier
@@ -15,11 +23,11 @@ module DeadEnd
15
23
  #
16
24
  # ## Knowing where we can go
17
25
  #
18
- # Internally it keeps track of "unvisited" lines which is exposed via `next_indent_line`
19
- # when called this will return a line of code with the most indentation.
26
+ # Internally the frontier keeps track of "unvisited" lines which are exposed via `next_indent_line`
27
+ # when called, this method returns, a line of code with the highest indentation.
20
28
  #
21
- # This line of code can be used to build a CodeBlock and then when that code block
22
- # is added back to the frontier, then the lines are removed from the
29
+ # The returned line of code can be used to build a CodeBlock and then that code block
30
+ # is added back to the frontier. Then, the lines are removed from the
23
31
  # "unvisited" so we don't double-create the same block.
24
32
  #
25
33
  # CodeFrontier#next_indent_line # Shows next line
@@ -27,19 +35,22 @@ module DeadEnd
27
35
  #
28
36
  # ## Knowing when to stop
29
37
  #
30
- # The frontier holds the syntax error when removing all code blocks from the original
31
- # source document allows it to be parsed as syntatically valid:
38
+ # The frontier knows how to check the entire document for a syntax error. When blocks
39
+ # are added onto the frontier, they're removed from the document. When all code containing
40
+ # syntax errors has been added to the frontier, the document will be parsable without a
41
+ # syntax error and the search can stop.
32
42
  #
33
- # CodeFrontier#holds_all_syntax_errors?
43
+ # CodeFrontier#holds_all_syntax_errors? # Returns true when frontier holds all syntax errors
34
44
  #
35
45
  # ## Filtering false positives
36
46
  #
37
- # Once the search is completed, the frontier will have many blocks that do not contain
38
- # the syntax error. To filter to the smallest subset that does call:
47
+ # Once the search is completed, the frontier may have multiple blocks that do not contain
48
+ # the syntax error. To limit the result to the smallest subset of "invalid blocks" call:
39
49
  #
40
50
  # CodeFrontier#detect_invalid_blocks
51
+ #
41
52
  class CodeFrontier
42
- def initialize(code_lines: )
53
+ def initialize(code_lines:)
43
54
  @code_lines = code_lines
44
55
  @frontier = []
45
56
  @unvisited_lines = @code_lines.sort_by(&:indent_index)
@@ -66,7 +77,7 @@ module DeadEnd
66
77
 
67
78
  # Returns a code block with the largest indentation possible
68
79
  def pop
69
- return @frontier.pop
80
+ @frontier.pop
70
81
  end
71
82
 
72
83
  def next_indent_line
@@ -78,14 +89,14 @@ module DeadEnd
78
89
  return true if @unvisited_lines.empty?
79
90
 
80
91
  frontier_indent = @frontier.last.current_indent
81
- unvisited_indent= next_indent_line.indent
92
+ unvisited_indent = next_indent_line.indent
82
93
 
83
94
  if ENV["DEBUG"]
84
95
  puts "```"
85
96
  puts @frontier.last.to_s
86
97
  puts "```"
87
- puts " @frontier indent: #{frontier_indent}"
88
- puts " @unvisited indent: #{unvisited_indent}"
98
+ puts " @frontier indent: #{frontier_indent}"
99
+ puts " @unvisited indent: #{unvisited_indent}"
89
100
  end
90
101
 
91
102
  # Expand all blocks before moving to unvisited lines
@@ -106,7 +117,7 @@ module DeadEnd
106
117
  register_indent_block(block)
107
118
 
108
119
  # Make sure we don't double expand, if a code block fully engulfs another code block, keep the bigger one
109
- @frontier.reject! {|b|
120
+ @frontier.reject! { |b|
110
121
  b.starts_at >= block.starts_at && b.ends_at <= block.ends_at
111
122
  }
112
123
  @frontier << block
@@ -4,44 +4,47 @@ module DeadEnd
4
4
  # Represents a single line of code of a given source file
5
5
  #
6
6
  # This object contains metadata about the line such as
7
- # amount of indentation. An if it is empty or not.
7
+ # amount of indentation, if it is empty or not, and
8
+ # lexical data, such as if it has an `end` or a keyword
9
+ # in it.
8
10
  #
9
- # While a given search for syntax errors is being performed
10
- # state about the search can be stored in individual lines such
11
- # as :valid or :invalid.
12
- #
13
- # Visibility of lines can be toggled on and off.
11
+ # Visibility of lines can be toggled off. Marking a line as invisible
12
+ # indicates that it should not be used for syntax checks.
13
+ # It's functionally the same as commenting it out.
14
14
  #
15
15
  # Example:
16
16
  #
17
- # line = CodeLine.new(line: "def foo\n", index: 0)
18
- # line.line_number => 1
17
+ # line = CodeLine.from_source("def foo\n").first
18
+ # line.number => 1
19
19
  # line.empty? # => false
20
20
  # line.visible? # => true
21
21
  # line.mark_invisible
22
22
  # line.visible? # => false
23
23
  #
24
- # A CodeBlock is made of multiple CodeLines
25
- #
26
- # Marking a line as invisible indicates that it should not be used
27
- # for syntax checks. It's essentially the same as commenting it out
28
- #
29
- # Marking a line as invisible also lets the overall program know
30
- # that it should not check that area for syntax errors.
31
24
  class CodeLine
32
25
  TRAILING_SLASH = ("\\" + $/).freeze
33
26
 
34
- def self.parse(source)
27
+ # Returns an array of CodeLine objects
28
+ # from the source string
29
+ def self.from_source(source)
30
+ lex_array_for_line = LexAll.new(source: source).each_with_object(Hash.new { |h, k| h[k] = [] }) { |lex, hash| hash[lex.line] << lex }
35
31
  source.lines.map.with_index do |line, index|
36
- CodeLine.new(line: line, index: index)
32
+ CodeLine.new(
33
+ line: line,
34
+ index: index,
35
+ lex: lex_array_for_line[index + 1]
36
+ )
37
37
  end
38
38
  end
39
39
 
40
- attr_reader :line, :index, :indent, :original_line
40
+ attr_reader :line, :index, :lex, :line_number, :indent
41
+ def initialize(line:, index:, lex:)
42
+ @lex = lex
43
+ @line = line
44
+ @index = index
45
+ @original = line.freeze
46
+ @line_number = @index + 1
41
47
 
42
- def initialize(line: , index:)
43
- @original_line = line.freeze
44
- @line = @original_line
45
48
  if line.strip.empty?
46
49
  @empty = true
47
50
  @indent = 0
@@ -49,102 +52,182 @@ module DeadEnd
49
52
  @empty = false
50
53
  @indent = SpaceCount.indent(line)
51
54
  end
52
- @index = index
53
- @status = nil # valid, invalid, unknown
54
- @invalid = false
55
-
56
- lex_detect!
57
- end
58
55
 
59
- private def lex_detect!
60
- lex_array = LexAll.new(source: line)
61
56
  kw_count = 0
62
57
  end_count = 0
63
- lex_array.each_with_index do |lex, index|
64
- next unless lex.type == :on_kw
65
-
66
- case lex.token
67
- when 'if', 'unless', 'while', 'until'
68
- # Only count if/unless when it's not a "trailing" if/unless
69
- # https://github.com/ruby/ruby/blob/06b44f819eb7b5ede1ff69cecb25682b56a1d60c/lib/irb/ruby-lex.rb#L374-L375
70
- kw_count += 1 if !lex.expr_label?
71
- when 'def', 'case', 'for', 'begin', 'class', 'module', 'do'
72
- kw_count += 1
73
- when 'end'
74
- end_count += 1
75
- end
58
+ @lex.each do |lex|
59
+ kw_count += 1 if lex.is_kw?
60
+ end_count += 1 if lex.is_end?
76
61
  end
77
62
 
78
- @is_comment = lex_array.detect {|lex| lex.type != :on_sp}&.type == :on_comment
79
- return if @is_comment
63
+ kw_count -= oneliner_method_count
64
+
80
65
  @is_kw = (kw_count - end_count) > 0
81
66
  @is_end = (end_count - kw_count) > 0
82
- @is_trailing_slash = lex_array.last.token == TRAILING_SLASH
83
- end
84
-
85
- alias :original :original_line
86
-
87
- def trailing_slash?
88
- @is_trailing_slash
89
67
  end
90
68
 
69
+ # Used for stable sort via indentation level
70
+ #
71
+ # Ruby's sort is not "stable" meaning that when
72
+ # multiple elements have the same value, they are
73
+ # not guaranteed to return in the same order they
74
+ # were put in.
75
+ #
76
+ # So when multiple code lines have the same indentation
77
+ # level, they're sorted by their index value which is unique
78
+ # and consistent.
79
+ #
80
+ # This is mostly needed for consistency of the test suite
91
81
  def indent_index
92
82
  @indent_index ||= [indent, index]
93
83
  end
84
+ alias_method :number, :line_number
94
85
 
95
- def <=>(b)
96
- self.index <=> b.index
97
- end
98
-
99
- def is_comment?
100
- @is_comment
101
- end
102
-
103
- def not_comment?
104
- !is_comment?
105
- end
106
-
86
+ # Returns true if the code line is determined
87
+ # to contain a keyword that matches with an `end`
88
+ #
89
+ # For example: `def`, `do`, `begin`, `ensure`, etc.
107
90
  def is_kw?
108
91
  @is_kw
109
92
  end
110
93
 
94
+ # Returns true if the code line is determined
95
+ # to contain an `end` keyword
111
96
  def is_end?
112
97
  @is_end
113
98
  end
114
99
 
100
+ # Used to hide lines
101
+ #
102
+ # The search alorithm will group lines into blocks
103
+ # then if those blocks are determined to represent
104
+ # valid code they will be hidden
115
105
  def mark_invisible
116
106
  @line = ""
117
- self
118
- end
119
-
120
- def mark_visible
121
- @line = @original_line
122
- self
123
107
  end
124
108
 
109
+ # Means the line was marked as "invisible"
110
+ # Confusingly, "empty" lines are visible...they
111
+ # just don't contain any source code other than a newline ("\n").
125
112
  def visible?
126
113
  !line.empty?
127
114
  end
128
115
 
116
+ # Opposite or `visible?` (note: different than `empty?`)
129
117
  def hidden?
130
118
  !visible?
131
119
  end
132
120
 
133
- def line_number
134
- index + 1
121
+ # An `empty?` line is one that was originally left
122
+ # empty in the source code, while a "hidden" line
123
+ # is one that we've since marked as "invisible"
124
+ def empty?
125
+ @empty
135
126
  end
136
- alias :number :line_number
137
127
 
128
+ # Opposite of `empty?` (note: different than `visible?`)
138
129
  def not_empty?
139
130
  !empty?
140
131
  end
141
132
 
142
- def empty?
143
- @empty
144
- end
145
-
133
+ # Renders the given line
134
+ #
135
+ # Also allows us to represent source code as
136
+ # an array of code lines.
137
+ #
138
+ # When we have an array of code line elements
139
+ # calling `join` on the array will call `to_s`
140
+ # on each element, which essentially converts
141
+ # it back into it's original source string.
146
142
  def to_s
147
- self.line
143
+ line
144
+ end
145
+
146
+ # When the code line is marked invisible
147
+ # we retain the original value of it's line
148
+ # this is useful for debugging and for
149
+ # showing extra context
150
+ #
151
+ # DisplayCodeWithLineNumbers will render
152
+ # all lines given to it, not just visible
153
+ # lines, it uses the original method to
154
+ # obtain them.
155
+ attr_reader :original
156
+
157
+ # Comparison operator, needed for equality
158
+ # and sorting
159
+ def <=>(other)
160
+ index <=> other.index
161
+ end
162
+
163
+ # [Not stable API]
164
+ #
165
+ # Lines that have a `on_ignored_nl` type token and NOT
166
+ # a `BEG` type seem to be a good proxy for the ability
167
+ # to join multiple lines into one.
168
+ #
169
+ # This predicate method is used to determine when those
170
+ # two criteria have been met.
171
+ #
172
+ # The one known case this doesn't handle is:
173
+ #
174
+ # Ripper.lex <<~EOM
175
+ # a &&
176
+ # b ||
177
+ # c
178
+ # EOM
179
+ #
180
+ # For some reason this introduces `on_ignore_newline` but with BEG type
181
+ def ignore_newline_not_beg?
182
+ lex_value = lex.detect { |l| l.type == :on_ignored_nl }
183
+ !!(lex_value && !lex_value.expr_beg?)
184
+ end
185
+
186
+ # Determines if the given line has a trailing slash
187
+ #
188
+ # lines = CodeLine.from_source(<<~EOM)
189
+ # it "foo" \
190
+ # EOM
191
+ # expect(lines.first.trailing_slash?).to eq(true)
192
+ #
193
+ def trailing_slash?
194
+ last = @lex.last
195
+ return false unless last
196
+ return false unless last.type == :on_sp
197
+
198
+ last.token == TRAILING_SLASH
199
+ end
200
+
201
+ # Endless method detection
202
+ #
203
+ # From https://github.com/ruby/irb/commit/826ae909c9c93a2ddca6f9cfcd9c94dbf53d44ab
204
+ # Detecting a "oneliner" seems to need a state machine.
205
+ # This can be done by looking mostly at the "state" (last value):
206
+ #
207
+ # ENDFN -> BEG (token = '=' ) -> END
208
+ #
209
+ private def oneliner_method_count
210
+ oneliner_count = 0
211
+ in_oneliner_def = nil
212
+
213
+ @lex.each do |lex|
214
+ if in_oneliner_def.nil?
215
+ in_oneliner_def = :ENDFN if lex.state.allbits?(Ripper::EXPR_ENDFN)
216
+ elsif lex.state.allbits?(Ripper::EXPR_ENDFN)
217
+ # Continue
218
+ elsif lex.state.allbits?(Ripper::EXPR_BEG)
219
+ in_oneliner_def = :BODY if lex.token == "="
220
+ elsif lex.state.allbits?(Ripper::EXPR_END)
221
+ # We found an endless method, count it
222
+ oneliner_count += 1 if in_oneliner_def == :BODY
223
+
224
+ in_oneliner_def = nil
225
+ else
226
+ in_oneliner_def = nil
227
+ end
228
+ end
229
+
230
+ oneliner_count
148
231
  end
149
232
  end
150
233
  end
@@ -3,11 +3,19 @@
3
3
  module DeadEnd
4
4
  # Searches code for a syntax error
5
5
  #
6
+ # There are three main phases in the algorithm:
7
+ #
8
+ # 1. Sanitize/format input source
9
+ # 2. Search for invalid blocks
10
+ # 3. Format invalid blocks into something meaninful
11
+ #
12
+ # This class handles the part.
13
+ #
6
14
  # The bulk of the heavy lifting is done in:
7
15
  #
8
16
  # - CodeFrontier (Holds information for generating blocks and determining if we can stop searching)
9
17
  # - ParseBlocksFromLine (Creates blocks into the frontier)
10
- # - BlockExpand (Expands existing blocks to search more code
18
+ # - BlockExpand (Expands existing blocks to search more code)
11
19
  #
12
20
  # ## Syntax error detection
13
21
  #
@@ -25,65 +33,64 @@ module DeadEnd
25
33
  # # => ["def lol\n"]
26
34
  #
27
35
  class CodeSearch
28
- private; attr_reader :frontier; public
29
- public; attr_reader :invalid_blocks, :record_dir, :code_lines
36
+ private
37
+
38
+ attr_reader :frontier
39
+
40
+ public
41
+
42
+ attr_reader :invalid_blocks, :record_dir, :code_lines
30
43
 
31
44
  def initialize(source, record_dir: ENV["DEAD_END_RECORD_DIR"] || ENV["DEBUG"] ? "tmp" : nil)
32
- @source = source
33
45
  if record_dir
34
- @time = Time.now.strftime('%Y-%m-%d-%H-%M-%s-%N')
35
- @record_dir = Pathname(record_dir).join(@time).tap {|p| p.mkpath }
46
+ @time = Time.now.strftime("%Y-%m-%d-%H-%M-%s-%N")
47
+ @record_dir = Pathname(record_dir).join(@time).tap { |p| p.mkpath }
36
48
  @write_count = 0
37
49
  end
38
- code_lines = source.lines.map.with_index do |line, i|
39
- CodeLine.new(line: line, index: i)
40
- end
41
50
 
42
- @code_lines = TrailingSlashJoin.new(code_lines: code_lines).call
51
+ @tick = 0
52
+ @source = source
53
+ @name_tick = Hash.new { |hash, k| hash[k] = 0 }
54
+ @invalid_blocks = []
55
+
56
+ @code_lines = CleanDocument.new(source: source).call.lines
43
57
 
44
58
  @frontier = CodeFrontier.new(code_lines: @code_lines)
45
- @invalid_blocks = []
46
- @name_tick = Hash.new {|hash, k| hash[k] = 0 }
47
- @tick = 0
48
- @block_expand = BlockExpand.new(code_lines: code_lines)
59
+ @block_expand = BlockExpand.new(code_lines: @code_lines)
49
60
  @parse_blocks_from_indent_line = ParseBlocksFromIndentLine.new(code_lines: @code_lines)
50
61
  end
51
62
 
52
63
  # Used for debugging
53
64
  def record(block:, name: "record")
54
- return if !@record_dir
65
+ return unless @record_dir
55
66
  @name_tick[name] += 1
56
67
  filename = "#{@write_count += 1}-#{name}-#{@name_tick[name]}.txt"
57
68
  if ENV["DEBUG"]
58
69
  puts "\n\n==== #{filename} ===="
59
- puts "\n```#{block.starts_at}:#{block.ends_at}"
60
- puts "#{block.to_s}"
70
+ puts "\n```#{block.starts_at}..#{block.ends_at}"
71
+ puts block.to_s
61
72
  puts "```"
62
- puts " block indent: #{block.current_indent}"
73
+ puts " block indent: #{block.current_indent}"
63
74
  end
64
75
  @record_dir.join(filename).open(mode: "a") do |f|
65
76
  display = DisplayInvalidBlocks.new(
66
77
  blocks: block,
67
78
  terminal: false,
68
- code_lines: @code_lines,
79
+ code_lines: @code_lines
69
80
  )
70
- f.write(display.indent display.code_with_lines)
81
+ f.write(display.indent(display.code_with_lines))
71
82
  end
72
83
  end
73
84
 
74
- def push(block, name: )
85
+ def push(block, name:)
75
86
  record(block: block, name: name)
76
87
 
77
- if block.valid?
78
- block.mark_invisible
79
- frontier << block
80
- else
81
- frontier << block
82
- end
88
+ block.mark_invisible if block.valid?
89
+ frontier << block
83
90
  end
84
91
 
85
92
  # Removes the block without putting it back in the frontier
86
- def sweep(block:, name: )
93
+ def sweep(block:, name:)
87
94
  record(block: block, name: name)
88
95
 
89
96
  block.lines.each(&:mark_invisible)
@@ -119,26 +126,8 @@ module DeadEnd
119
126
  push(block, name: "expand")
120
127
  end
121
128
 
122
- def sweep_heredocs
123
- HeredocBlockParse.new(
124
- source: @source,
125
- code_lines: @code_lines
126
- ).call.each do |block|
127
- push(block, name: "heredoc")
128
- end
129
- end
130
-
131
- def sweep_comments
132
- lines = @code_lines.select(&:is_comment?)
133
- return if lines.empty?
134
- block = CodeBlock.new(lines: lines)
135
- sweep(block: block, name: "comments")
136
- end
137
-
138
129
  # Main search loop
139
130
  def call
140
- sweep_heredocs
141
- sweep_comments
142
131
  until frontier.holds_all_syntax_errors?
143
132
  @tick += 1
144
133
 
@@ -149,8 +138,8 @@ module DeadEnd
149
138
  end
150
139
  end
151
140
 
152
- @invalid_blocks.concat(frontier.detect_invalid_blocks )
153
- @invalid_blocks.sort_by! {|block| block.starts_at }
141
+ @invalid_blocks.concat(frontier.detect_invalid_blocks)
142
+ @invalid_blocks.sort_by! { |block| block.starts_at }
154
143
  self
155
144
  end
156
145
  end
@@ -7,7 +7,6 @@ module DeadEnd
7
7
  # even if it is "marked invisible" any filtering of
8
8
  # output should be done before calling this class.
9
9
  #
10
- #
11
10
  # DisplayCodeWithLineNumbers.new(
12
11
  # lines: lines,
13
12
  # highlight_lines: [lines[2], lines[3]]
@@ -23,10 +22,10 @@ module DeadEnd
23
22
  TERMINAL_HIGHLIGHT = "\e[1;3m" # Bold, italics
24
23
  TERMINAL_END = "\e[0m"
25
24
 
26
- def initialize(lines: , highlight_lines: [], terminal: false)
25
+ def initialize(lines:, highlight_lines: [], terminal: false)
27
26
  @lines = Array(lines).sort
28
27
  @terminal = terminal
29
- @highlight_line_hash = Array(highlight_lines).each_with_object({}) {|line, h| h[line] = true }
28
+ @highlight_line_hash = Array(highlight_lines).each_with_object({}) { |line, h| h[line] = true }
30
29
  @digit_count = @lines.last&.line_number.to_s.length
31
30
  end
32
31
 
@@ -48,12 +47,12 @@ module DeadEnd
48
47
  end.join
49
48
  end
50
49
 
51
- private def format(contents: , number: , highlight: false, empty:)
52
- string = String.new("")
53
- if highlight
54
- string << "❯ "
50
+ private def format(contents:, number:, empty:, highlight: false)
51
+ string = +""
52
+ string << if highlight
53
+ "❯ "
55
54
  else
56
- string << " "
55
+ " "
57
56
  end
58
57
 
59
58
  string << number.rjust(@digit_count).to_s