dead_end 1.1.5 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,14 +1,27 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module DeadEnd
4
-
5
- # Given a block, this method will capture surrounding
6
- # code to give the user more context for the location of
7
- # the problem.
4
+ # Turns a "invalid block(s)" into useful context
5
+ #
6
+ # There are three main phases in the algorithm:
7
+ #
8
+ # 1. Sanitize/format input source
9
+ # 2. Search for invalid blocks
10
+ # 3. Format invalid blocks into something meaninful
11
+ #
12
+ # This class handles the third part.
8
13
  #
9
- # Return is an array of CodeLines to be rendered.
14
+ # The algorithm is very good at capturing all of a syntax
15
+ # error in a single block in number 2, however the results
16
+ # can contain ambiguities. Humans are good at pattern matching
17
+ # and filtering and can mentally remove extraneous data, but
18
+ # they can't add extra data that's not present.
10
19
  #
11
- # Surrounding code is captured regardless of visible state
20
+ # In the case of known ambiguious cases, this class adds context
21
+ # back to the ambiguitiy so the programmer has full information.
22
+ #
23
+ # Beyond handling these ambiguities, it also captures surrounding
24
+ # code context information:
12
25
  #
13
26
  # puts block.to_s # => "def bark"
14
27
  #
@@ -17,7 +30,8 @@ module DeadEnd
17
30
  # code_lines: code_lines
18
31
  # )
19
32
  #
20
- # puts context.call.join
33
+ # lines = context.call.map(&:original)
34
+ # puts lines.join
21
35
  # # =>
22
36
  # class Dog
23
37
  # def bark
@@ -26,7 +40,7 @@ module DeadEnd
26
40
  class CaptureCodeContext
27
41
  attr_reader :code_lines
28
42
 
29
- def initialize(blocks: , code_lines:)
43
+ def initialize(blocks:, code_lines:)
30
44
  @blocks = Array(blocks)
31
45
  @code_lines = code_lines
32
46
  @visible_lines = @blocks.map(&:visible_lines).flatten
@@ -35,29 +49,73 @@ module DeadEnd
35
49
 
36
50
  def call
37
51
  @blocks.each do |block|
52
+ capture_first_kw_end_same_indent(block)
38
53
  capture_last_end_same_indent(block)
39
54
  capture_before_after_kws(block)
40
55
  capture_falling_indent(block)
41
56
  end
42
57
 
43
58
  @lines_to_output.select!(&:not_empty?)
44
- @lines_to_output.select!(&:not_comment?)
45
59
  @lines_to_output.uniq!
46
60
  @lines_to_output.sort!
47
61
 
48
- return @lines_to_output
62
+ @lines_to_output
49
63
  end
50
64
 
65
+ # Shows the context around code provided by "falling" indentation
66
+ #
67
+ # Converts:
68
+ #
69
+ # it "foo" do
70
+ #
71
+ # into:
72
+ #
73
+ # class OH
74
+ # def hello
75
+ # it "foo" do
76
+ # end
77
+ # end
78
+ #
79
+ #
51
80
  def capture_falling_indent(block)
52
81
  AroundBlockScan.new(
53
82
  block: block,
54
- code_lines: @code_lines,
83
+ code_lines: @code_lines
55
84
  ).on_falling_indent do |line|
56
85
  @lines_to_output << line
57
86
  end
58
87
  end
59
88
 
89
+ # Shows surrounding kw/end pairs
90
+ #
91
+ # The purpose of showing these extra pairs is due to cases
92
+ # of ambiguity when only one visible line is matched.
93
+ #
94
+ # For example:
95
+ #
96
+ # 1 class Dog
97
+ # 2 def bark
98
+ # 4 def eat
99
+ # 5 end
100
+ # 6 end
101
+ #
102
+ # In this case either line 2 could be missing an `end` or
103
+ # line 4 was an extra line added by mistake (it happens).
104
+ #
105
+ # When we detect the above problem it shows the issue
106
+ # as only being on line 2
107
+ #
108
+ # 2 def bark
109
+ #
110
+ # Showing "neighbor" keyword pairs gives extra context:
111
+ #
112
+ # 2 def bark
113
+ # 4 def eat
114
+ # 5 end
115
+ #
60
116
  def capture_before_after_kws(block)
117
+ return unless block.visible_lines.count == 1
118
+
61
119
  around_lines = AroundBlockScan.new(code_lines: @code_lines, block: block)
62
120
  .start_at_next_line
63
121
  .capture_neighbor_context
@@ -67,50 +125,109 @@ module DeadEnd
67
125
  @lines_to_output.concat(around_lines)
68
126
  end
69
127
 
70
- # Problems heredocs are back in play
128
+ # When there is an invalid block with a keyword
129
+ # missing an end right before another end,
130
+ # it is unclear where which keyword is missing the
131
+ # end
132
+ #
133
+ # Take this example:
134
+ #
135
+ # class Dog # 1
136
+ # def bark # 2
137
+ # puts "woof" # 3
138
+ # end # 4
139
+ #
140
+ # However due to https://github.com/zombocom/dead_end/issues/32
141
+ # the problem line will be identified as:
142
+ #
143
+ # ❯ class Dog # 1
144
+ #
145
+ # Because lines 2, 3, and 4 are technically valid code and are expanded
146
+ # first, deemed valid, and hidden. We need to un-hide the matching end
147
+ # line 4. Also work backwards and if there's a mis-matched keyword, show it
148
+ # too
71
149
  def capture_last_end_same_indent(block)
72
- start_index = block.visible_lines.first.index
73
- lines = @code_lines[start_index..block.lines.last.index]
74
- kw_end_lines = lines.select {|line| line.indent == block.current_indent && (line.is_end? || line.is_kw?) }
150
+ return if block.visible_lines.length != 1
151
+ return unless block.visible_lines.first.is_kw?
152
+
153
+ visible_line = block.visible_lines.first
154
+ lines = @code_lines[visible_line.index..block.lines.last.index]
75
155
 
156
+ # Find first end with same indent
157
+ # (this would return line 4)
158
+ #
159
+ # end # 4
160
+ matching_end = lines.detect { |line| line.indent == block.current_indent && line.is_end? }
161
+ return unless matching_end
162
+
163
+ @lines_to_output << matching_end
76
164
 
77
- # TODO handle case of heredocs showing up here
165
+ # Work backwards from the end to
166
+ # see if there are mis-matched
167
+ # keyword/end pairs
78
168
  #
79
- # Due to https://github.com/zombocom/dead_end/issues/32
80
- # There's a special case where a keyword right before the last
81
- # end of a valid block accidentally ends up identifying that the problem
82
- # was with the block instead of before it. To handle that
83
- # special case, we can re-parse back through the internals of blocks
84
- # and if they have mis-matched keywords and ends show the last one
85
- end_lines = kw_end_lines.select(&:is_end?)
86
- end_lines.each_with_index do |end_line, i|
87
- start_index = i.zero? ? 0 : end_lines[i-1].index
88
- end_index = end_line.index - 1
89
- lines = @code_lines[start_index..end_index]
90
-
91
- stop_next = false
92
- kw_count = 0
93
- end_count = 0
94
- lines = lines.reverse.take_while do |line|
95
- next false if stop_next
96
-
97
- end_count += 1 if line.is_end?
98
- kw_count += 1 if line.is_kw?
99
-
100
- stop_next = true if !kw_count.zero? && kw_count >= end_count
101
- true
102
- end.reverse
103
-
104
- next unless kw_count > end_count
105
-
106
- lines = lines.select {|line| line.is_kw? || line.is_end? }
107
-
108
- next if lines.empty?
109
-
110
- @lines_to_output << end_line
111
- @lines_to_output << lines.first
112
- @lines_to_output << lines.last
169
+ # Return the first mis-matched keyword
170
+ # this would find line 2
171
+ #
172
+ # def bark # 2
173
+ # puts "woof" # 3
174
+ # end # 4
175
+ end_count = 0
176
+ kw_count = 0
177
+ kw_line = @code_lines[visible_line.index..matching_end.index].reverse.detect do |line|
178
+ end_count += 1 if line.is_end?
179
+ kw_count += 1 if line.is_kw?
180
+
181
+ !kw_count.zero? && kw_count >= end_count
113
182
  end
183
+ return unless kw_line
184
+ @lines_to_output << kw_line
185
+ end
186
+
187
+ # The logical inverse of `capture_last_end_same_indent`
188
+ #
189
+ # When there is an invalid block with an `end`
190
+ # missing a keyword right after another `end`,
191
+ # it is unclear where which end is missing the
192
+ # keyword.
193
+ #
194
+ # Take this example:
195
+ #
196
+ # class Dog # 1
197
+ # puts "woof" # 2
198
+ # end # 3
199
+ # end # 4
200
+ #
201
+ # the problem line will be identified as:
202
+ #
203
+ # ❯ end # 4
204
+ #
205
+ # This happens because lines 1, 2, and 3 are technically valid code and are expanded
206
+ # first, deemed valid, and hidden. We need to un-hide the matching keyword on
207
+ # line 1. Also work backwards and if there's a mis-matched end, show it
208
+ # too
209
+ def capture_first_kw_end_same_indent(block)
210
+ return if block.visible_lines.length != 1
211
+ return unless block.visible_lines.first.is_end?
212
+
213
+ visible_line = block.visible_lines.first
214
+ lines = @code_lines[block.lines.first.index..visible_line.index]
215
+ matching_kw = lines.reverse.detect { |line| line.indent == block.current_indent && line.is_kw? }
216
+ return unless matching_kw
217
+
218
+ @lines_to_output << matching_kw
219
+
220
+ kw_count = 0
221
+ end_count = 0
222
+ orphan_end = @code_lines[matching_kw.index..visible_line.index].detect do |line|
223
+ kw_count += 1 if line.is_kw?
224
+ end_count += 1 if line.is_end?
225
+
226
+ end_count >= kw_count
227
+ end
228
+
229
+ return unless orphan_end
230
+ @lines_to_output << orphan_end
114
231
  end
115
232
  end
116
233
  end
@@ -0,0 +1,313 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DeadEnd
4
+ # Parses and sanitizes source into a lexically aware document
5
+ #
6
+ # Internally the document is represented by an array with each
7
+ # index containing a CodeLine correlating to a line from the source code.
8
+ #
9
+ # There are three main phases in the algorithm:
10
+ #
11
+ # 1. Sanitize/format input source
12
+ # 2. Search for invalid blocks
13
+ # 3. Format invalid blocks into something meaninful
14
+ #
15
+ # This class handles the first part.
16
+ #
17
+ # The reason this class exists is to format input source
18
+ # for better/easier/cleaner exploration.
19
+ #
20
+ # The CodeSearch class operates at the line level so
21
+ # we must be careful to not introduce lines that look
22
+ # valid by themselves, but when removed will trigger syntax errors
23
+ # or strange behavior.
24
+ #
25
+ # ## Join Trailing slashes
26
+ #
27
+ # Code with a trailing slash is logically treated as a single line:
28
+ #
29
+ # 1 it "code can be split" \
30
+ # 2 "across multiple lines" do
31
+ #
32
+ # In this case removing line 2 would add a syntax error. We get around
33
+ # this by internally joining the two lines into a single "line" object
34
+ #
35
+ # ## Logically Consecutive lines
36
+ #
37
+ # Code that can be broken over multiple
38
+ # lines such as method calls are on different lines:
39
+ #
40
+ # 1 User.
41
+ # 2 where(name: "schneems").
42
+ # 3 first
43
+ #
44
+ # Removing line 2 can introduce a syntax error. To fix this, all lines
45
+ # are joined into one.
46
+ #
47
+ # ## Heredocs
48
+ #
49
+ # A heredoc is an way of defining a multi-line string. They can cause many
50
+ # problems. If left as a single line, Ripper would try to parse the contents
51
+ # as ruby code rather than as a string. Even without this problem, we still
52
+ # hit an issue with indentation
53
+ #
54
+ # 1 foo = <<~HEREDOC
55
+ # 2 "Be yourself; everyone else is already taken.""
56
+ # 3 ― Oscar Wilde
57
+ # 4 puts "I look like ruby code" # but i'm still a heredoc
58
+ # 5 HEREDOC
59
+ #
60
+ # If we didn't join these lines then our algorithm would think that line 4
61
+ # is separate from the rest, has a higher indentation, then look at it first
62
+ # and remove it.
63
+ #
64
+ # If the code evaluates line 5 by itself it will think line 5 is a constant,
65
+ # remove it, and introduce a syntax errror.
66
+ #
67
+ # All of these problems are fixed by joining the whole heredoc into a single
68
+ # line.
69
+ #
70
+ # ## Comments and whitespace
71
+ #
72
+ # Comments can throw off the way the lexer tells us that the line
73
+ # logically belongs with the next line. This is valid ruby but
74
+ # results in a different lex output than before:
75
+ #
76
+ # 1 User.
77
+ # 2 where(name: "schneems").
78
+ # 3 # Comment here
79
+ # 4 first
80
+ #
81
+ # To handle this we can replace comment lines with empty lines
82
+ # and then re-lex the source. This removal and re-lexing preserves
83
+ # line index and document size, but generates an easier to work with
84
+ # document.
85
+ #
86
+ class CleanDocument
87
+ def initialize(source:)
88
+ @source = source
89
+ @document = CodeLine.from_source(@source)
90
+ end
91
+
92
+ # Call all of the document "cleaners"
93
+ # and return self
94
+ def call
95
+ clean_sweep
96
+ .join_trailing_slash!
97
+ .join_consecutive!
98
+ .join_heredoc!
99
+
100
+ self
101
+ end
102
+
103
+ # Return an array of CodeLines in the
104
+ # document
105
+ def lines
106
+ @document
107
+ end
108
+
109
+ # Renders the document back to a string
110
+ def to_s
111
+ @document.join
112
+ end
113
+
114
+ # Remove comments and whitespace only lines
115
+ #
116
+ # replace with empty newlines
117
+ #
118
+ # source = <<~'EOM'
119
+ # # Comment 1
120
+ # puts "hello"
121
+ # # Comment 2
122
+ # puts "world"
123
+ # EOM
124
+ #
125
+ # lines = CleanDocument.new(source: source).clean_sweep.lines
126
+ # expect(lines[0].to_s).to eq("\n")
127
+ # expect(lines[1].to_s).to eq("puts "hello")
128
+ # expect(lines[2].to_s).to eq("\n")
129
+ # expect(lines[3].to_s).to eq("puts "world")
130
+ #
131
+ # WARNING:
132
+ # If you run this after any of the "join" commands, they
133
+ # will be un-joined.
134
+ #
135
+ # After this change is made, we re-lex the document because
136
+ # removing comments can change how the doc is parsed.
137
+ #
138
+ # For example:
139
+ #
140
+ # values = LexAll.new(source: <<~EOM))
141
+ # User.
142
+ # # comment
143
+ # where(name: 'schneems')
144
+ # EOM
145
+ # expect(values.count {|v| v.type == :on_ignored_nl}).to eq(1)
146
+ #
147
+ # After the comment is removed:
148
+ #
149
+ # values = LexAll.new(source: <<~EOM))
150
+ # User.
151
+ #
152
+ # where(name: 'schneems')
153
+ # EOM
154
+ # expect(values.count {|v| v.type == :on_ignored_nl}).to eq(2)
155
+ #
156
+ def clean_sweep
157
+ source = @document.map do |code_line|
158
+ # Clean trailing whitespace on empty line
159
+ if code_line.line.strip.empty?
160
+ next CodeLine.new(line: "\n", index: code_line.index, lex: [])
161
+ end
162
+
163
+ # Remove comments
164
+ if code_line.lex.detect { |lex| lex.type != :on_sp }&.type == :on_comment
165
+ next CodeLine.new(line: "\n", index: code_line.index, lex: [])
166
+ end
167
+
168
+ code_line
169
+ end.join
170
+
171
+ @source = source
172
+ @document = CodeLine.from_source(source)
173
+ self
174
+ end
175
+
176
+ # Smushes all heredoc lines into one line
177
+ #
178
+ # source = <<~'EOM'
179
+ # foo = <<~HEREDOC
180
+ # lol
181
+ # hehehe
182
+ # HEREDOC
183
+ # EOM
184
+ #
185
+ # lines = CleanDocument.new(source: source).join_heredoc!.lines
186
+ # expect(lines[0].to_s).to eq(source)
187
+ # expect(lines[1].to_s).to eq("")
188
+ def join_heredoc!
189
+ start_index_stack = []
190
+ heredoc_beg_end_index = []
191
+ lines.each do |line|
192
+ line.lex.each do |lex_value|
193
+ case lex_value.type
194
+ when :on_heredoc_beg
195
+ start_index_stack << line.index
196
+ when :on_heredoc_end
197
+ start_index = start_index_stack.pop
198
+ end_index = line.index
199
+ heredoc_beg_end_index << [start_index, end_index]
200
+ end
201
+ end
202
+ end
203
+
204
+ heredoc_groups = heredoc_beg_end_index.map { |start_index, end_index| @document[start_index..end_index] }
205
+
206
+ join_groups(heredoc_groups)
207
+ self
208
+ end
209
+
210
+ # Smushes logically "consecutive" lines
211
+ #
212
+ # source = <<~'EOM'
213
+ # User.
214
+ # where(name: 'schneems').
215
+ # first
216
+ # EOM
217
+ #
218
+ # lines = CleanDocument.new(source: source).join_consecutive!.lines
219
+ # expect(lines[0].to_s).to eq(source)
220
+ # expect(lines[1].to_s).to eq("")
221
+ #
222
+ # The one known case this doesn't handle is:
223
+ #
224
+ # Ripper.lex <<~EOM
225
+ # a &&
226
+ # b ||
227
+ # c
228
+ # EOM
229
+ #
230
+ # For some reason this introduces `on_ignore_newline` but with BEG type
231
+ #
232
+ def join_consecutive!
233
+ consecutive_groups = @document.select(&:ignore_newline_not_beg?).map do |code_line|
234
+ take_while_including(code_line.index..) do |line|
235
+ line.ignore_newline_not_beg?
236
+ end
237
+ end
238
+
239
+ join_groups(consecutive_groups)
240
+ self
241
+ end
242
+
243
+ # Join lines with a trailing slash
244
+ #
245
+ # source = <<~'EOM'
246
+ # it "code can be split" \
247
+ # "across multiple lines" do
248
+ # EOM
249
+ #
250
+ # lines = CleanDocument.new(source: source).join_consecutive!.lines
251
+ # expect(lines[0].to_s).to eq(source)
252
+ # expect(lines[1].to_s).to eq("")
253
+ def join_trailing_slash!
254
+ trailing_groups = @document.select(&:trailing_slash?).map do |code_line|
255
+ take_while_including(code_line.index..) { |x| x.trailing_slash? }
256
+ end
257
+ join_groups(trailing_groups)
258
+ self
259
+ end
260
+
261
+ # Helper method for joining "groups" of lines
262
+ #
263
+ # Input is expected to be type Array<Array<CodeLine>>
264
+ #
265
+ # The outer array holds the various "groups" while the
266
+ # inner array holds code lines.
267
+ #
268
+ # All code lines are "joined" into the first line in
269
+ # their group.
270
+ #
271
+ # To preserve document size, empty lines are placed
272
+ # in the place of the lines that were "joined"
273
+ def join_groups(groups)
274
+ groups.each do |lines|
275
+ line = lines.first
276
+
277
+ # Handle the case of multiple groups in a a row
278
+ # if one is already replaced, move on
279
+ next if @document[line.index].empty?
280
+
281
+ # Join group into the first line
282
+ @document[line.index] = CodeLine.new(
283
+ lex: lines.map(&:lex).flatten,
284
+ line: lines.join,
285
+ index: line.index
286
+ )
287
+
288
+ # Hide the rest of the lines
289
+ lines[1..].each do |line|
290
+ # The above lines already have newlines in them, if add more
291
+ # then there will be double newline, use an empty line instead
292
+ @document[line.index] = CodeLine.new(line: "", index: line.index, lex: [])
293
+ end
294
+ end
295
+ self
296
+ end
297
+
298
+ # Helper method for grabbing elements from document
299
+ #
300
+ # Like `take_while` except when it stops
301
+ # iterating, it also returns the line
302
+ # that caused it to stop
303
+ def take_while_including(range = 0..)
304
+ take_next_and_stop = false
305
+ @document[range].take_while do |line|
306
+ next if take_next_and_stop
307
+
308
+ take_next_and_stop = !(yield line)
309
+ true
310
+ end
311
+ end
312
+ end
313
+ end
@@ -54,11 +54,11 @@ module DeadEnd
54
54
  # populate an array with multiple code blocks then call `sort!`
55
55
  # on it without having to specify the sorting criteria
56
56
  def <=>(other)
57
- out = self.current_indent <=> other.current_indent
57
+ out = current_indent <=> other.current_indent
58
58
  return out if out != 0
59
59
 
60
60
  # Stable sort
61
- self.starts_at <=> other.starts_at
61
+ starts_at <=> other.starts_at
62
62
  end
63
63
 
64
64
  def current_indent
@@ -71,7 +71,7 @@ module DeadEnd
71
71
 
72
72
  def valid?
73
73
  return @valid if @valid != UNSET
74
- @valid = DeadEnd.valid?(self.to_s)
74
+ @valid = DeadEnd.valid?(to_s)
75
75
  end
76
76
 
77
77
  def to_s