dead_end 1.1.7 → 3.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.circleci/config.yml +27 -1
  3. data/.github/workflows/check_changelog.yml +14 -7
  4. data/.standard.yml +1 -0
  5. data/CHANGELOG.md +60 -0
  6. data/CODE_OF_CONDUCT.md +2 -2
  7. data/Gemfile +2 -0
  8. data/Gemfile.lock +31 -2
  9. data/README.md +122 -35
  10. data/Rakefile +1 -1
  11. data/dead_end.gemspec +12 -12
  12. data/exe/dead_end +4 -67
  13. data/lib/dead_end/{internals.rb → api.rb} +90 -52
  14. data/lib/dead_end/around_block_scan.rb +16 -18
  15. data/lib/dead_end/auto.rb +3 -101
  16. data/lib/dead_end/block_expand.rb +6 -5
  17. data/lib/dead_end/capture_code_context.rb +167 -50
  18. data/lib/dead_end/clean_document.rb +304 -0
  19. data/lib/dead_end/cli.rb +129 -0
  20. data/lib/dead_end/code_block.rb +20 -4
  21. data/lib/dead_end/code_frontier.rb +74 -29
  22. data/lib/dead_end/code_line.rb +176 -87
  23. data/lib/dead_end/code_search.rb +40 -51
  24. data/lib/dead_end/core_ext.rb +35 -0
  25. data/lib/dead_end/display_code_with_line_numbers.rb +7 -8
  26. data/lib/dead_end/display_invalid_blocks.rb +42 -80
  27. data/lib/dead_end/explain_syntax.rb +103 -0
  28. data/lib/dead_end/insertion_sort.rb +46 -0
  29. data/lib/dead_end/left_right_lex_count.rb +168 -0
  30. data/lib/dead_end/lex_all.rb +25 -34
  31. data/lib/dead_end/lex_value.rb +70 -0
  32. data/lib/dead_end/parse_blocks_from_indent_line.rb +3 -4
  33. data/lib/dead_end/pathname_from_message.rb +47 -0
  34. data/lib/dead_end/ripper_errors.rb +36 -0
  35. data/lib/dead_end/version.rb +1 -1
  36. data/lib/dead_end.rb +2 -2
  37. metadata +14 -9
  38. data/.travis.yml +0 -6
  39. data/lib/dead_end/fyi.rb +0 -7
  40. data/lib/dead_end/heredoc_block_parse.rb +0 -30
  41. data/lib/dead_end/trailing_slash_join.rb +0 -53
  42. data/lib/dead_end/who_dis_syntax_error.rb +0 -69
@@ -54,11 +54,11 @@ module DeadEnd
54
54
  # populate an array with multiple code blocks then call `sort!`
55
55
  # on it without having to specify the sorting criteria
56
56
  def <=>(other)
57
- out = self.current_indent <=> other.current_indent
57
+ out = current_indent <=> other.current_indent
58
58
  return out if out != 0
59
59
 
60
60
  # Stable sort
61
- self.starts_at <=> other.starts_at
61
+ starts_at <=> other.starts_at
62
62
  end
63
63
 
64
64
  def current_indent
@@ -70,8 +70,24 @@ module DeadEnd
70
70
  end
71
71
 
72
72
  def valid?
73
- return @valid if @valid != UNSET
74
- @valid = DeadEnd.valid?(self.to_s)
73
+ if @valid == UNSET
74
+ # Performance optimization
75
+ #
76
+ # If all the lines were previously hidden
77
+ # and we expand to capture additional empty
78
+ # lines then the result cannot be invalid
79
+ #
80
+ # That means there's no reason to re-check all
81
+ # lines with ripper (which is expensive).
82
+ # Benchmark in commit message
83
+ @valid = if lines.all? { |l| l.hidden? || l.empty? }
84
+ true
85
+ else
86
+ DeadEnd.valid?(lines.map(&:original).join)
87
+ end
88
+ else
89
+ @valid
90
+ end
75
91
  end
76
92
 
77
93
  def to_s
@@ -3,11 +3,19 @@
3
3
  module DeadEnd
4
4
  # The main function of the frontier is to hold the edges of our search and to
5
5
  # evaluate when we can stop searching.
6
+
7
+ # There are three main phases in the algorithm:
8
+ #
9
+ # 1. Sanitize/format input source
10
+ # 2. Search for invalid blocks
11
+ # 3. Format invalid blocks into something meaninful
12
+ #
13
+ # The Code frontier is a critical part of the second step
6
14
  #
7
15
  # ## Knowing where we've been
8
16
  #
9
- # Once a code block is generated it is added onto the frontier where it will be
10
- # sorted and then the frontier can be filtered. Large blocks that totally contain a
17
+ # Once a code block is generated it is added onto the frontier. Then it will be
18
+ # sorted by indentation and frontier can be filtered. Large blocks that fully enclose a
11
19
  # smaller block will cause the smaller block to be evicted.
12
20
  #
13
21
  # CodeFrontier#<<(block) # Adds block to frontier
@@ -15,11 +23,11 @@ module DeadEnd
15
23
  #
16
24
  # ## Knowing where we can go
17
25
  #
18
- # Internally it keeps track of "unvisited" lines which is exposed via `next_indent_line`
19
- # when called this will return a line of code with the most indentation.
26
+ # Internally the frontier keeps track of "unvisited" lines which are exposed via `next_indent_line`
27
+ # when called, this method returns, a line of code with the highest indentation.
20
28
  #
21
- # This line of code can be used to build a CodeBlock and then when that code block
22
- # is added back to the frontier, then the lines are removed from the
29
+ # The returned line of code can be used to build a CodeBlock and then that code block
30
+ # is added back to the frontier. Then, the lines are removed from the
23
31
  # "unvisited" so we don't double-create the same block.
24
32
  #
25
33
  # CodeFrontier#next_indent_line # Shows next line
@@ -27,34 +35,63 @@ module DeadEnd
27
35
  #
28
36
  # ## Knowing when to stop
29
37
  #
30
- # The frontier holds the syntax error when removing all code blocks from the original
31
- # source document allows it to be parsed as syntatically valid:
38
+ # The frontier knows how to check the entire document for a syntax error. When blocks
39
+ # are added onto the frontier, they're removed from the document. When all code containing
40
+ # syntax errors has been added to the frontier, the document will be parsable without a
41
+ # syntax error and the search can stop.
32
42
  #
33
- # CodeFrontier#holds_all_syntax_errors?
43
+ # CodeFrontier#holds_all_syntax_errors? # Returns true when frontier holds all syntax errors
34
44
  #
35
45
  # ## Filtering false positives
36
46
  #
37
- # Once the search is completed, the frontier will have many blocks that do not contain
38
- # the syntax error. To filter to the smallest subset that does call:
47
+ # Once the search is completed, the frontier may have multiple blocks that do not contain
48
+ # the syntax error. To limit the result to the smallest subset of "invalid blocks" call:
39
49
  #
40
50
  # CodeFrontier#detect_invalid_blocks
51
+ #
41
52
  class CodeFrontier
42
- def initialize(code_lines: )
53
+ def initialize(code_lines:)
43
54
  @code_lines = code_lines
44
- @frontier = []
55
+ @frontier = InsertionSort.new
45
56
  @unvisited_lines = @code_lines.sort_by(&:indent_index)
57
+ @visited_lines = {}
58
+
59
+ @has_run = false
60
+ @check_next = true
46
61
  end
47
62
 
48
63
  def count
49
- @frontier.count
64
+ @frontier.to_a.length
65
+ end
66
+
67
+ # Performance optimization
68
+ #
69
+ # Parsing with ripper is expensive
70
+ # If we know we don't have any blocks with invalid
71
+ # syntax, then we know we cannot have found
72
+ # the incorrect syntax yet.
73
+ #
74
+ # When an invalid block is added onto the frontier
75
+ # check document state
76
+ private def can_skip_check?
77
+ check_next = @check_next
78
+ @check_next = false
79
+
80
+ if check_next
81
+ false
82
+ else
83
+ true
84
+ end
50
85
  end
51
86
 
52
87
  # Returns true if the document is valid with all lines
53
88
  # removed. By default it checks all blocks in present in
54
89
  # the frontier array, but can be used for arbitrary arrays
55
90
  # of codeblocks as well
56
- def holds_all_syntax_errors?(block_array = @frontier)
57
- without_lines = block_array.map do |block|
91
+ def holds_all_syntax_errors?(block_array = @frontier, can_cache: true)
92
+ return false if can_cache && can_skip_check?
93
+
94
+ without_lines = block_array.to_a.flat_map do |block|
58
95
  block.lines
59
96
  end
60
97
 
@@ -66,7 +103,7 @@ module DeadEnd
66
103
 
67
104
  # Returns a code block with the largest indentation possible
68
105
  def pop
69
- return @frontier.pop
106
+ @frontier.to_a.pop
70
107
  end
71
108
 
72
109
  def next_indent_line
@@ -74,18 +111,18 @@ module DeadEnd
74
111
  end
75
112
 
76
113
  def expand?
77
- return false if @frontier.empty?
78
- return true if @unvisited_lines.empty?
114
+ return false if @frontier.to_a.empty?
115
+ return true if @unvisited_lines.to_a.empty?
79
116
 
80
- frontier_indent = @frontier.last.current_indent
81
- unvisited_indent= next_indent_line.indent
117
+ frontier_indent = @frontier.to_a.last.current_indent
118
+ unvisited_indent = next_indent_line.indent
82
119
 
83
120
  if ENV["DEBUG"]
84
121
  puts "```"
85
- puts @frontier.last.to_s
122
+ puts @frontier.to_a.last.to_s
86
123
  puts "```"
87
- puts " @frontier indent: #{frontier_indent}"
88
- puts " @unvisited indent: #{unvisited_indent}"
124
+ puts " @frontier indent: #{frontier_indent}"
125
+ puts " @unvisited indent: #{unvisited_indent}"
89
126
  end
90
127
 
91
128
  # Expand all blocks before moving to unvisited lines
@@ -93,7 +130,13 @@ module DeadEnd
93
130
  end
94
131
 
95
132
  def register_indent_block(block)
96
- @unvisited_lines -= block.lines
133
+ block.lines.each do |line|
134
+ next if @visited_lines[line]
135
+ @visited_lines[line] = true
136
+
137
+ index = @unvisited_lines.bsearch_index { |l| line.indent_index <=> l.indent_index }
138
+ @unvisited_lines.delete_at(index)
139
+ end
97
140
  self
98
141
  end
99
142
 
@@ -106,11 +149,13 @@ module DeadEnd
106
149
  register_indent_block(block)
107
150
 
108
151
  # Make sure we don't double expand, if a code block fully engulfs another code block, keep the bigger one
109
- @frontier.reject! {|b|
152
+ @frontier.to_a.reject! { |b|
110
153
  b.starts_at >= block.starts_at && b.ends_at <= block.ends_at
111
154
  }
155
+
156
+ @check_next = true if block.invalid?
112
157
  @frontier << block
113
- @frontier.sort!
158
+ # @frontier.sort!
114
159
 
115
160
  self
116
161
  end
@@ -130,8 +175,8 @@ module DeadEnd
130
175
  # Given that we know our syntax error exists somewhere in our frontier, we want to find
131
176
  # the smallest possible set of blocks that contain all the syntax errors
132
177
  def detect_invalid_blocks
133
- self.class.combination(@frontier.select(&:invalid?)).detect do |block_array|
134
- holds_all_syntax_errors?(block_array)
178
+ self.class.combination(@frontier.to_a.select(&:invalid?)).detect do |block_array|
179
+ holds_all_syntax_errors?(block_array, can_cache: false)
135
180
  end || []
136
181
  end
137
182
  end
@@ -4,147 +4,236 @@ module DeadEnd
4
4
  # Represents a single line of code of a given source file
5
5
  #
6
6
  # This object contains metadata about the line such as
7
- # amount of indentation. An if it is empty or not.
7
+ # amount of indentation, if it is empty or not, and
8
+ # lexical data, such as if it has an `end` or a keyword
9
+ # in it.
8
10
  #
9
- # While a given search for syntax errors is being performed
10
- # state about the search can be stored in individual lines such
11
- # as :valid or :invalid.
12
- #
13
- # Visibility of lines can be toggled on and off.
11
+ # Visibility of lines can be toggled off. Marking a line as invisible
12
+ # indicates that it should not be used for syntax checks.
13
+ # It's functionally the same as commenting it out.
14
14
  #
15
15
  # Example:
16
16
  #
17
- # line = CodeLine.new(line: "def foo\n", index: 0)
18
- # line.line_number => 1
17
+ # line = CodeLine.from_source("def foo\n").first
18
+ # line.number => 1
19
19
  # line.empty? # => false
20
20
  # line.visible? # => true
21
21
  # line.mark_invisible
22
22
  # line.visible? # => false
23
23
  #
24
- # A CodeBlock is made of multiple CodeLines
25
- #
26
- # Marking a line as invisible indicates that it should not be used
27
- # for syntax checks. It's essentially the same as commenting it out
28
- #
29
- # Marking a line as invisible also lets the overall program know
30
- # that it should not check that area for syntax errors.
31
24
  class CodeLine
32
25
  TRAILING_SLASH = ("\\" + $/).freeze
33
26
 
34
- def self.parse(source)
35
- source.lines.map.with_index do |line, index|
36
- CodeLine.new(line: line, index: index)
27
+ # Returns an array of CodeLine objects
28
+ # from the source string
29
+ def self.from_source(source, lines: nil)
30
+ lines ||= source.lines
31
+ lex_array_for_line = LexAll.new(source: source, source_lines: lines).each_with_object(Hash.new { |h, k| h[k] = [] }) { |lex, hash| hash[lex.line] << lex }
32
+ lines.map.with_index do |line, index|
33
+ CodeLine.new(
34
+ line: line,
35
+ index: index,
36
+ lex: lex_array_for_line[index + 1]
37
+ )
37
38
  end
38
39
  end
39
40
 
40
- attr_reader :line, :index, :indent, :original_line
41
+ attr_reader :line, :index, :lex, :line_number, :indent
42
+ def initialize(line:, index:, lex:)
43
+ @lex = lex
44
+ @line = line
45
+ @index = index
46
+ @original = line
47
+ @line_number = @index + 1
48
+ strip_line = line.dup
49
+ strip_line.lstrip!
41
50
 
42
- def initialize(line: , index:)
43
- @original_line = line.freeze
44
- @line = @original_line
45
- if line.strip.empty?
51
+ if strip_line.empty?
46
52
  @empty = true
47
53
  @indent = 0
48
54
  else
49
55
  @empty = false
50
- @indent = SpaceCount.indent(line)
56
+ @indent = line.length - strip_line.length
51
57
  end
52
- @index = index
53
- @status = nil # valid, invalid, unknown
54
- @invalid = false
55
-
56
- lex_detect!
57
- end
58
-
59
- private def lex_detect!
60
- lex_array = LexAll.new(source: line)
61
- kw_count = 0
62
- end_count = 0
63
- lex_array.each_with_index do |lex, index|
64
- next unless lex.type == :on_kw
65
-
66
- case lex.token
67
- when 'if', 'unless', 'while', 'until'
68
- # Only count if/unless when it's not a "trailing" if/unless
69
- # https://github.com/ruby/ruby/blob/06b44f819eb7b5ede1ff69cecb25682b56a1d60c/lib/irb/ruby-lex.rb#L374-L375
70
- kw_count += 1 if !lex.expr_label?
71
- when 'def', 'case', 'for', 'begin', 'class', 'module', 'do'
72
- kw_count += 1
73
- when 'end'
74
- end_count += 1
75
- end
76
- end
77
-
78
- @is_comment = lex_array.detect {|lex| lex.type != :on_sp}&.type == :on_comment
79
- return if @is_comment
80
- @is_kw = (kw_count - end_count) > 0
81
- @is_end = (end_count - kw_count) > 0
82
- @is_trailing_slash = lex_array.last.token == TRAILING_SLASH
83
- end
84
-
85
- alias :original :original_line
86
-
87
- def trailing_slash?
88
- @is_trailing_slash
89
- end
90
58
 
59
+ set_kw_end
60
+ end
61
+
62
+ # Used for stable sort via indentation level
63
+ #
64
+ # Ruby's sort is not "stable" meaning that when
65
+ # multiple elements have the same value, they are
66
+ # not guaranteed to return in the same order they
67
+ # were put in.
68
+ #
69
+ # So when multiple code lines have the same indentation
70
+ # level, they're sorted by their index value which is unique
71
+ # and consistent.
72
+ #
73
+ # This is mostly needed for consistency of the test suite
91
74
  def indent_index
92
75
  @indent_index ||= [indent, index]
93
76
  end
77
+ alias_method :number, :line_number
94
78
 
95
- def <=>(b)
96
- self.index <=> b.index
97
- end
98
-
99
- def is_comment?
100
- @is_comment
101
- end
102
-
103
- def not_comment?
104
- !is_comment?
105
- end
106
-
79
+ # Returns true if the code line is determined
80
+ # to contain a keyword that matches with an `end`
81
+ #
82
+ # For example: `def`, `do`, `begin`, `ensure`, etc.
107
83
  def is_kw?
108
84
  @is_kw
109
85
  end
110
86
 
87
+ # Returns true if the code line is determined
88
+ # to contain an `end` keyword
111
89
  def is_end?
112
90
  @is_end
113
91
  end
114
92
 
93
+ # Used to hide lines
94
+ #
95
+ # The search alorithm will group lines into blocks
96
+ # then if those blocks are determined to represent
97
+ # valid code they will be hidden
115
98
  def mark_invisible
116
99
  @line = ""
117
- self
118
- end
119
-
120
- def mark_visible
121
- @line = @original_line
122
- self
123
100
  end
124
101
 
102
+ # Means the line was marked as "invisible"
103
+ # Confusingly, "empty" lines are visible...they
104
+ # just don't contain any source code other than a newline ("\n").
125
105
  def visible?
126
106
  !line.empty?
127
107
  end
128
108
 
109
+ # Opposite or `visible?` (note: different than `empty?`)
129
110
  def hidden?
130
111
  !visible?
131
112
  end
132
113
 
133
- def line_number
134
- index + 1
114
+ # An `empty?` line is one that was originally left
115
+ # empty in the source code, while a "hidden" line
116
+ # is one that we've since marked as "invisible"
117
+ def empty?
118
+ @empty
135
119
  end
136
- alias :number :line_number
137
120
 
121
+ # Opposite of `empty?` (note: different than `visible?`)
138
122
  def not_empty?
139
123
  !empty?
140
124
  end
141
125
 
142
- def empty?
143
- @empty
126
+ # Renders the given line
127
+ #
128
+ # Also allows us to represent source code as
129
+ # an array of code lines.
130
+ #
131
+ # When we have an array of code line elements
132
+ # calling `join` on the array will call `to_s`
133
+ # on each element, which essentially converts
134
+ # it back into it's original source string.
135
+ def to_s
136
+ line
137
+ end
138
+
139
+ # When the code line is marked invisible
140
+ # we retain the original value of it's line
141
+ # this is useful for debugging and for
142
+ # showing extra context
143
+ #
144
+ # DisplayCodeWithLineNumbers will render
145
+ # all lines given to it, not just visible
146
+ # lines, it uses the original method to
147
+ # obtain them.
148
+ attr_reader :original
149
+
150
+ # Comparison operator, needed for equality
151
+ # and sorting
152
+ def <=>(other)
153
+ index <=> other.index
154
+ end
155
+
156
+ # [Not stable API]
157
+ #
158
+ # Lines that have a `on_ignored_nl` type token and NOT
159
+ # a `BEG` type seem to be a good proxy for the ability
160
+ # to join multiple lines into one.
161
+ #
162
+ # This predicate method is used to determine when those
163
+ # two criteria have been met.
164
+ #
165
+ # The one known case this doesn't handle is:
166
+ #
167
+ # Ripper.lex <<~EOM
168
+ # a &&
169
+ # b ||
170
+ # c
171
+ # EOM
172
+ #
173
+ # For some reason this introduces `on_ignore_newline` but with BEG type
174
+ def ignore_newline_not_beg?
175
+ @ignore_newline_not_beg
176
+ end
177
+
178
+ # Determines if the given line has a trailing slash
179
+ #
180
+ # lines = CodeLine.from_source(<<~EOM)
181
+ # it "foo" \
182
+ # EOM
183
+ # expect(lines.first.trailing_slash?).to eq(true)
184
+ #
185
+ def trailing_slash?
186
+ last = @lex.last
187
+ return false unless last
188
+ return false unless last.type == :on_sp
189
+
190
+ last.token == TRAILING_SLASH
144
191
  end
145
192
 
146
- def to_s
147
- self.line
193
+ # Endless method detection
194
+ #
195
+ # From https://github.com/ruby/irb/commit/826ae909c9c93a2ddca6f9cfcd9c94dbf53d44ab
196
+ # Detecting a "oneliner" seems to need a state machine.
197
+ # This can be done by looking mostly at the "state" (last value):
198
+ #
199
+ # ENDFN -> BEG (token = '=' ) -> END
200
+ #
201
+ private def set_kw_end
202
+ oneliner_count = 0
203
+ in_oneliner_def = nil
204
+
205
+ kw_count = 0
206
+ end_count = 0
207
+
208
+ @ignore_newline_not_beg = false
209
+ @lex.each do |lex|
210
+ kw_count += 1 if lex.is_kw?
211
+ end_count += 1 if lex.is_end?
212
+
213
+ if lex.type == :on_ignored_nl
214
+ @ignore_newline_not_beg = !lex.expr_beg?
215
+ end
216
+
217
+ if in_oneliner_def.nil?
218
+ in_oneliner_def = :ENDFN if lex.state.allbits?(Ripper::EXPR_ENDFN)
219
+ elsif lex.state.allbits?(Ripper::EXPR_ENDFN)
220
+ # Continue
221
+ elsif lex.state.allbits?(Ripper::EXPR_BEG)
222
+ in_oneliner_def = :BODY if lex.token == "="
223
+ elsif lex.state.allbits?(Ripper::EXPR_END)
224
+ # We found an endless method, count it
225
+ oneliner_count += 1 if in_oneliner_def == :BODY
226
+
227
+ in_oneliner_def = nil
228
+ else
229
+ in_oneliner_def = nil
230
+ end
231
+ end
232
+
233
+ kw_count -= oneliner_count
234
+
235
+ @is_kw = (kw_count - end_count) > 0
236
+ @is_end = (end_count - kw_count) > 0
148
237
  end
149
238
  end
150
239
  end