dead_end 1.1.7 → 3.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.circleci/config.yml +27 -1
  3. data/.github/workflows/check_changelog.yml +14 -7
  4. data/.standard.yml +1 -0
  5. data/CHANGELOG.md +60 -0
  6. data/CODE_OF_CONDUCT.md +2 -2
  7. data/Gemfile +2 -0
  8. data/Gemfile.lock +31 -2
  9. data/README.md +122 -35
  10. data/Rakefile +1 -1
  11. data/dead_end.gemspec +12 -12
  12. data/exe/dead_end +4 -67
  13. data/lib/dead_end/{internals.rb → api.rb} +90 -52
  14. data/lib/dead_end/around_block_scan.rb +16 -18
  15. data/lib/dead_end/auto.rb +3 -101
  16. data/lib/dead_end/block_expand.rb +6 -5
  17. data/lib/dead_end/capture_code_context.rb +167 -50
  18. data/lib/dead_end/clean_document.rb +304 -0
  19. data/lib/dead_end/cli.rb +129 -0
  20. data/lib/dead_end/code_block.rb +20 -4
  21. data/lib/dead_end/code_frontier.rb +74 -29
  22. data/lib/dead_end/code_line.rb +176 -87
  23. data/lib/dead_end/code_search.rb +40 -51
  24. data/lib/dead_end/core_ext.rb +35 -0
  25. data/lib/dead_end/display_code_with_line_numbers.rb +7 -8
  26. data/lib/dead_end/display_invalid_blocks.rb +42 -80
  27. data/lib/dead_end/explain_syntax.rb +103 -0
  28. data/lib/dead_end/insertion_sort.rb +46 -0
  29. data/lib/dead_end/left_right_lex_count.rb +168 -0
  30. data/lib/dead_end/lex_all.rb +25 -34
  31. data/lib/dead_end/lex_value.rb +70 -0
  32. data/lib/dead_end/parse_blocks_from_indent_line.rb +3 -4
  33. data/lib/dead_end/pathname_from_message.rb +47 -0
  34. data/lib/dead_end/ripper_errors.rb +36 -0
  35. data/lib/dead_end/version.rb +1 -1
  36. data/lib/dead_end.rb +2 -2
  37. metadata +14 -9
  38. data/.travis.yml +0 -6
  39. data/lib/dead_end/fyi.rb +0 -7
  40. data/lib/dead_end/heredoc_block_parse.rb +0 -30
  41. data/lib/dead_end/trailing_slash_join.rb +0 -53
  42. data/lib/dead_end/who_dis_syntax_error.rb +0 -69
@@ -54,11 +54,11 @@ module DeadEnd
54
54
  # populate an array with multiple code blocks then call `sort!`
55
55
  # on it without having to specify the sorting criteria
56
56
  def <=>(other)
57
- out = self.current_indent <=> other.current_indent
57
+ out = current_indent <=> other.current_indent
58
58
  return out if out != 0
59
59
 
60
60
  # Stable sort
61
- self.starts_at <=> other.starts_at
61
+ starts_at <=> other.starts_at
62
62
  end
63
63
 
64
64
  def current_indent
@@ -70,8 +70,24 @@ module DeadEnd
70
70
  end
71
71
 
72
72
  def valid?
73
- return @valid if @valid != UNSET
74
- @valid = DeadEnd.valid?(self.to_s)
73
+ if @valid == UNSET
74
+ # Performance optimization
75
+ #
76
+ # If all the lines were previously hidden
77
+ # and we expand to capture additional empty
78
+ # lines then the result cannot be invalid
79
+ #
80
+ # That means there's no reason to re-check all
81
+ # lines with ripper (which is expensive).
82
+ # Benchmark in commit message
83
+ @valid = if lines.all? { |l| l.hidden? || l.empty? }
84
+ true
85
+ else
86
+ DeadEnd.valid?(lines.map(&:original).join)
87
+ end
88
+ else
89
+ @valid
90
+ end
75
91
  end
76
92
 
77
93
  def to_s
@@ -3,11 +3,19 @@
3
3
  module DeadEnd
4
4
  # The main function of the frontier is to hold the edges of our search and to
5
5
  # evaluate when we can stop searching.
6
+
7
+ # There are three main phases in the algorithm:
8
+ #
9
+ # 1. Sanitize/format input source
10
+ # 2. Search for invalid blocks
11
+ # 3. Format invalid blocks into something meaninful
12
+ #
13
+ # The Code frontier is a critical part of the second step
6
14
  #
7
15
  # ## Knowing where we've been
8
16
  #
9
- # Once a code block is generated it is added onto the frontier where it will be
10
- # sorted and then the frontier can be filtered. Large blocks that totally contain a
17
+ # Once a code block is generated it is added onto the frontier. Then it will be
18
+ # sorted by indentation and frontier can be filtered. Large blocks that fully enclose a
11
19
  # smaller block will cause the smaller block to be evicted.
12
20
  #
13
21
  # CodeFrontier#<<(block) # Adds block to frontier
@@ -15,11 +23,11 @@ module DeadEnd
15
23
  #
16
24
  # ## Knowing where we can go
17
25
  #
18
- # Internally it keeps track of "unvisited" lines which is exposed via `next_indent_line`
19
- # when called this will return a line of code with the most indentation.
26
+ # Internally the frontier keeps track of "unvisited" lines which are exposed via `next_indent_line`
27
+ # when called, this method returns, a line of code with the highest indentation.
20
28
  #
21
- # This line of code can be used to build a CodeBlock and then when that code block
22
- # is added back to the frontier, then the lines are removed from the
29
+ # The returned line of code can be used to build a CodeBlock and then that code block
30
+ # is added back to the frontier. Then, the lines are removed from the
23
31
  # "unvisited" so we don't double-create the same block.
24
32
  #
25
33
  # CodeFrontier#next_indent_line # Shows next line
@@ -27,34 +35,63 @@ module DeadEnd
27
35
  #
28
36
  # ## Knowing when to stop
29
37
  #
30
- # The frontier holds the syntax error when removing all code blocks from the original
31
- # source document allows it to be parsed as syntatically valid:
38
+ # The frontier knows how to check the entire document for a syntax error. When blocks
39
+ # are added onto the frontier, they're removed from the document. When all code containing
40
+ # syntax errors has been added to the frontier, the document will be parsable without a
41
+ # syntax error and the search can stop.
32
42
  #
33
- # CodeFrontier#holds_all_syntax_errors?
43
+ # CodeFrontier#holds_all_syntax_errors? # Returns true when frontier holds all syntax errors
34
44
  #
35
45
  # ## Filtering false positives
36
46
  #
37
- # Once the search is completed, the frontier will have many blocks that do not contain
38
- # the syntax error. To filter to the smallest subset that does call:
47
+ # Once the search is completed, the frontier may have multiple blocks that do not contain
48
+ # the syntax error. To limit the result to the smallest subset of "invalid blocks" call:
39
49
  #
40
50
  # CodeFrontier#detect_invalid_blocks
51
+ #
41
52
  class CodeFrontier
42
- def initialize(code_lines: )
53
+ def initialize(code_lines:)
43
54
  @code_lines = code_lines
44
- @frontier = []
55
+ @frontier = InsertionSort.new
45
56
  @unvisited_lines = @code_lines.sort_by(&:indent_index)
57
+ @visited_lines = {}
58
+
59
+ @has_run = false
60
+ @check_next = true
46
61
  end
47
62
 
48
63
  def count
49
- @frontier.count
64
+ @frontier.to_a.length
65
+ end
66
+
67
+ # Performance optimization
68
+ #
69
+ # Parsing with ripper is expensive
70
+ # If we know we don't have any blocks with invalid
71
+ # syntax, then we know we cannot have found
72
+ # the incorrect syntax yet.
73
+ #
74
+ # When an invalid block is added onto the frontier
75
+ # check document state
76
+ private def can_skip_check?
77
+ check_next = @check_next
78
+ @check_next = false
79
+
80
+ if check_next
81
+ false
82
+ else
83
+ true
84
+ end
50
85
  end
51
86
 
52
87
  # Returns true if the document is valid with all lines
53
88
  # removed. By default it checks all blocks in present in
54
89
  # the frontier array, but can be used for arbitrary arrays
55
90
  # of codeblocks as well
56
- def holds_all_syntax_errors?(block_array = @frontier)
57
- without_lines = block_array.map do |block|
91
+ def holds_all_syntax_errors?(block_array = @frontier, can_cache: true)
92
+ return false if can_cache && can_skip_check?
93
+
94
+ without_lines = block_array.to_a.flat_map do |block|
58
95
  block.lines
59
96
  end
60
97
 
@@ -66,7 +103,7 @@ module DeadEnd
66
103
 
67
104
  # Returns a code block with the largest indentation possible
68
105
  def pop
69
- return @frontier.pop
106
+ @frontier.to_a.pop
70
107
  end
71
108
 
72
109
  def next_indent_line
@@ -74,18 +111,18 @@ module DeadEnd
74
111
  end
75
112
 
76
113
  def expand?
77
- return false if @frontier.empty?
78
- return true if @unvisited_lines.empty?
114
+ return false if @frontier.to_a.empty?
115
+ return true if @unvisited_lines.to_a.empty?
79
116
 
80
- frontier_indent = @frontier.last.current_indent
81
- unvisited_indent= next_indent_line.indent
117
+ frontier_indent = @frontier.to_a.last.current_indent
118
+ unvisited_indent = next_indent_line.indent
82
119
 
83
120
  if ENV["DEBUG"]
84
121
  puts "```"
85
- puts @frontier.last.to_s
122
+ puts @frontier.to_a.last.to_s
86
123
  puts "```"
87
- puts " @frontier indent: #{frontier_indent}"
88
- puts " @unvisited indent: #{unvisited_indent}"
124
+ puts " @frontier indent: #{frontier_indent}"
125
+ puts " @unvisited indent: #{unvisited_indent}"
89
126
  end
90
127
 
91
128
  # Expand all blocks before moving to unvisited lines
@@ -93,7 +130,13 @@ module DeadEnd
93
130
  end
94
131
 
95
132
  def register_indent_block(block)
96
- @unvisited_lines -= block.lines
133
+ block.lines.each do |line|
134
+ next if @visited_lines[line]
135
+ @visited_lines[line] = true
136
+
137
+ index = @unvisited_lines.bsearch_index { |l| line.indent_index <=> l.indent_index }
138
+ @unvisited_lines.delete_at(index)
139
+ end
97
140
  self
98
141
  end
99
142
 
@@ -106,11 +149,13 @@ module DeadEnd
106
149
  register_indent_block(block)
107
150
 
108
151
  # Make sure we don't double expand, if a code block fully engulfs another code block, keep the bigger one
109
- @frontier.reject! {|b|
152
+ @frontier.to_a.reject! { |b|
110
153
  b.starts_at >= block.starts_at && b.ends_at <= block.ends_at
111
154
  }
155
+
156
+ @check_next = true if block.invalid?
112
157
  @frontier << block
113
- @frontier.sort!
158
+ # @frontier.sort!
114
159
 
115
160
  self
116
161
  end
@@ -130,8 +175,8 @@ module DeadEnd
130
175
  # Given that we know our syntax error exists somewhere in our frontier, we want to find
131
176
  # the smallest possible set of blocks that contain all the syntax errors
132
177
  def detect_invalid_blocks
133
- self.class.combination(@frontier.select(&:invalid?)).detect do |block_array|
134
- holds_all_syntax_errors?(block_array)
178
+ self.class.combination(@frontier.to_a.select(&:invalid?)).detect do |block_array|
179
+ holds_all_syntax_errors?(block_array, can_cache: false)
135
180
  end || []
136
181
  end
137
182
  end
@@ -4,147 +4,236 @@ module DeadEnd
4
4
  # Represents a single line of code of a given source file
5
5
  #
6
6
  # This object contains metadata about the line such as
7
- # amount of indentation. An if it is empty or not.
7
+ # amount of indentation, if it is empty or not, and
8
+ # lexical data, such as if it has an `end` or a keyword
9
+ # in it.
8
10
  #
9
- # While a given search for syntax errors is being performed
10
- # state about the search can be stored in individual lines such
11
- # as :valid or :invalid.
12
- #
13
- # Visibility of lines can be toggled on and off.
11
+ # Visibility of lines can be toggled off. Marking a line as invisible
12
+ # indicates that it should not be used for syntax checks.
13
+ # It's functionally the same as commenting it out.
14
14
  #
15
15
  # Example:
16
16
  #
17
- # line = CodeLine.new(line: "def foo\n", index: 0)
18
- # line.line_number => 1
17
+ # line = CodeLine.from_source("def foo\n").first
18
+ # line.number => 1
19
19
  # line.empty? # => false
20
20
  # line.visible? # => true
21
21
  # line.mark_invisible
22
22
  # line.visible? # => false
23
23
  #
24
- # A CodeBlock is made of multiple CodeLines
25
- #
26
- # Marking a line as invisible indicates that it should not be used
27
- # for syntax checks. It's essentially the same as commenting it out
28
- #
29
- # Marking a line as invisible also lets the overall program know
30
- # that it should not check that area for syntax errors.
31
24
  class CodeLine
32
25
  TRAILING_SLASH = ("\\" + $/).freeze
33
26
 
34
- def self.parse(source)
35
- source.lines.map.with_index do |line, index|
36
- CodeLine.new(line: line, index: index)
27
+ # Returns an array of CodeLine objects
28
+ # from the source string
29
+ def self.from_source(source, lines: nil)
30
+ lines ||= source.lines
31
+ lex_array_for_line = LexAll.new(source: source, source_lines: lines).each_with_object(Hash.new { |h, k| h[k] = [] }) { |lex, hash| hash[lex.line] << lex }
32
+ lines.map.with_index do |line, index|
33
+ CodeLine.new(
34
+ line: line,
35
+ index: index,
36
+ lex: lex_array_for_line[index + 1]
37
+ )
37
38
  end
38
39
  end
39
40
 
40
- attr_reader :line, :index, :indent, :original_line
41
+ attr_reader :line, :index, :lex, :line_number, :indent
42
+ def initialize(line:, index:, lex:)
43
+ @lex = lex
44
+ @line = line
45
+ @index = index
46
+ @original = line
47
+ @line_number = @index + 1
48
+ strip_line = line.dup
49
+ strip_line.lstrip!
41
50
 
42
- def initialize(line: , index:)
43
- @original_line = line.freeze
44
- @line = @original_line
45
- if line.strip.empty?
51
+ if strip_line.empty?
46
52
  @empty = true
47
53
  @indent = 0
48
54
  else
49
55
  @empty = false
50
- @indent = SpaceCount.indent(line)
56
+ @indent = line.length - strip_line.length
51
57
  end
52
- @index = index
53
- @status = nil # valid, invalid, unknown
54
- @invalid = false
55
-
56
- lex_detect!
57
- end
58
-
59
- private def lex_detect!
60
- lex_array = LexAll.new(source: line)
61
- kw_count = 0
62
- end_count = 0
63
- lex_array.each_with_index do |lex, index|
64
- next unless lex.type == :on_kw
65
-
66
- case lex.token
67
- when 'if', 'unless', 'while', 'until'
68
- # Only count if/unless when it's not a "trailing" if/unless
69
- # https://github.com/ruby/ruby/blob/06b44f819eb7b5ede1ff69cecb25682b56a1d60c/lib/irb/ruby-lex.rb#L374-L375
70
- kw_count += 1 if !lex.expr_label?
71
- when 'def', 'case', 'for', 'begin', 'class', 'module', 'do'
72
- kw_count += 1
73
- when 'end'
74
- end_count += 1
75
- end
76
- end
77
-
78
- @is_comment = lex_array.detect {|lex| lex.type != :on_sp}&.type == :on_comment
79
- return if @is_comment
80
- @is_kw = (kw_count - end_count) > 0
81
- @is_end = (end_count - kw_count) > 0
82
- @is_trailing_slash = lex_array.last.token == TRAILING_SLASH
83
- end
84
-
85
- alias :original :original_line
86
-
87
- def trailing_slash?
88
- @is_trailing_slash
89
- end
90
58
 
59
+ set_kw_end
60
+ end
61
+
62
+ # Used for stable sort via indentation level
63
+ #
64
+ # Ruby's sort is not "stable" meaning that when
65
+ # multiple elements have the same value, they are
66
+ # not guaranteed to return in the same order they
67
+ # were put in.
68
+ #
69
+ # So when multiple code lines have the same indentation
70
+ # level, they're sorted by their index value which is unique
71
+ # and consistent.
72
+ #
73
+ # This is mostly needed for consistency of the test suite
91
74
  def indent_index
92
75
  @indent_index ||= [indent, index]
93
76
  end
77
+ alias_method :number, :line_number
94
78
 
95
- def <=>(b)
96
- self.index <=> b.index
97
- end
98
-
99
- def is_comment?
100
- @is_comment
101
- end
102
-
103
- def not_comment?
104
- !is_comment?
105
- end
106
-
79
+ # Returns true if the code line is determined
80
+ # to contain a keyword that matches with an `end`
81
+ #
82
+ # For example: `def`, `do`, `begin`, `ensure`, etc.
107
83
  def is_kw?
108
84
  @is_kw
109
85
  end
110
86
 
87
+ # Returns true if the code line is determined
88
+ # to contain an `end` keyword
111
89
  def is_end?
112
90
  @is_end
113
91
  end
114
92
 
93
+ # Used to hide lines
94
+ #
95
+ # The search alorithm will group lines into blocks
96
+ # then if those blocks are determined to represent
97
+ # valid code they will be hidden
115
98
  def mark_invisible
116
99
  @line = ""
117
- self
118
- end
119
-
120
- def mark_visible
121
- @line = @original_line
122
- self
123
100
  end
124
101
 
102
+ # Means the line was marked as "invisible"
103
+ # Confusingly, "empty" lines are visible...they
104
+ # just don't contain any source code other than a newline ("\n").
125
105
  def visible?
126
106
  !line.empty?
127
107
  end
128
108
 
109
+ # Opposite or `visible?` (note: different than `empty?`)
129
110
  def hidden?
130
111
  !visible?
131
112
  end
132
113
 
133
- def line_number
134
- index + 1
114
+ # An `empty?` line is one that was originally left
115
+ # empty in the source code, while a "hidden" line
116
+ # is one that we've since marked as "invisible"
117
+ def empty?
118
+ @empty
135
119
  end
136
- alias :number :line_number
137
120
 
121
+ # Opposite of `empty?` (note: different than `visible?`)
138
122
  def not_empty?
139
123
  !empty?
140
124
  end
141
125
 
142
- def empty?
143
- @empty
126
+ # Renders the given line
127
+ #
128
+ # Also allows us to represent source code as
129
+ # an array of code lines.
130
+ #
131
+ # When we have an array of code line elements
132
+ # calling `join` on the array will call `to_s`
133
+ # on each element, which essentially converts
134
+ # it back into it's original source string.
135
+ def to_s
136
+ line
137
+ end
138
+
139
+ # When the code line is marked invisible
140
+ # we retain the original value of it's line
141
+ # this is useful for debugging and for
142
+ # showing extra context
143
+ #
144
+ # DisplayCodeWithLineNumbers will render
145
+ # all lines given to it, not just visible
146
+ # lines, it uses the original method to
147
+ # obtain them.
148
+ attr_reader :original
149
+
150
+ # Comparison operator, needed for equality
151
+ # and sorting
152
+ def <=>(other)
153
+ index <=> other.index
154
+ end
155
+
156
+ # [Not stable API]
157
+ #
158
+ # Lines that have a `on_ignored_nl` type token and NOT
159
+ # a `BEG` type seem to be a good proxy for the ability
160
+ # to join multiple lines into one.
161
+ #
162
+ # This predicate method is used to determine when those
163
+ # two criteria have been met.
164
+ #
165
+ # The one known case this doesn't handle is:
166
+ #
167
+ # Ripper.lex <<~EOM
168
+ # a &&
169
+ # b ||
170
+ # c
171
+ # EOM
172
+ #
173
+ # For some reason this introduces `on_ignore_newline` but with BEG type
174
+ def ignore_newline_not_beg?
175
+ @ignore_newline_not_beg
176
+ end
177
+
178
+ # Determines if the given line has a trailing slash
179
+ #
180
+ # lines = CodeLine.from_source(<<~EOM)
181
+ # it "foo" \
182
+ # EOM
183
+ # expect(lines.first.trailing_slash?).to eq(true)
184
+ #
185
+ def trailing_slash?
186
+ last = @lex.last
187
+ return false unless last
188
+ return false unless last.type == :on_sp
189
+
190
+ last.token == TRAILING_SLASH
144
191
  end
145
192
 
146
- def to_s
147
- self.line
193
+ # Endless method detection
194
+ #
195
+ # From https://github.com/ruby/irb/commit/826ae909c9c93a2ddca6f9cfcd9c94dbf53d44ab
196
+ # Detecting a "oneliner" seems to need a state machine.
197
+ # This can be done by looking mostly at the "state" (last value):
198
+ #
199
+ # ENDFN -> BEG (token = '=' ) -> END
200
+ #
201
+ private def set_kw_end
202
+ oneliner_count = 0
203
+ in_oneliner_def = nil
204
+
205
+ kw_count = 0
206
+ end_count = 0
207
+
208
+ @ignore_newline_not_beg = false
209
+ @lex.each do |lex|
210
+ kw_count += 1 if lex.is_kw?
211
+ end_count += 1 if lex.is_end?
212
+
213
+ if lex.type == :on_ignored_nl
214
+ @ignore_newline_not_beg = !lex.expr_beg?
215
+ end
216
+
217
+ if in_oneliner_def.nil?
218
+ in_oneliner_def = :ENDFN if lex.state.allbits?(Ripper::EXPR_ENDFN)
219
+ elsif lex.state.allbits?(Ripper::EXPR_ENDFN)
220
+ # Continue
221
+ elsif lex.state.allbits?(Ripper::EXPR_BEG)
222
+ in_oneliner_def = :BODY if lex.token == "="
223
+ elsif lex.state.allbits?(Ripper::EXPR_END)
224
+ # We found an endless method, count it
225
+ oneliner_count += 1 if in_oneliner_def == :BODY
226
+
227
+ in_oneliner_def = nil
228
+ else
229
+ in_oneliner_def = nil
230
+ end
231
+ end
232
+
233
+ kw_count -= oneliner_count
234
+
235
+ @is_kw = (kw_count - end_count) > 0
236
+ @is_end = (end_count - kw_count) > 0
148
237
  end
149
238
  end
150
239
  end