dead_end 1.1.6 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,14 +1,27 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module DeadEnd
4
-
5
- # Given a block, this method will capture surrounding
6
- # code to give the user more context for the location of
7
- # the problem.
4
+ # Turns a "invalid block(s)" into useful context
5
+ #
6
+ # There are three main phases in the algorithm:
7
+ #
8
+ # 1. Sanitize/format input source
9
+ # 2. Search for invalid blocks
10
+ # 3. Format invalid blocks into something meaninful
11
+ #
12
+ # This class handles the third part.
8
13
  #
9
- # Return is an array of CodeLines to be rendered.
14
+ # The algorithm is very good at capturing all of a syntax
15
+ # error in a single block in number 2, however the results
16
+ # can contain ambiguities. Humans are good at pattern matching
17
+ # and filtering and can mentally remove extraneous data, but
18
+ # they can't add extra data that's not present.
10
19
  #
11
- # Surrounding code is captured regardless of visible state
20
+ # In the case of known ambiguious cases, this class adds context
21
+ # back to the ambiguitiy so the programmer has full information.
22
+ #
23
+ # Beyond handling these ambiguities, it also captures surrounding
24
+ # code context information:
12
25
  #
13
26
  # puts block.to_s # => "def bark"
14
27
  #
@@ -17,7 +30,8 @@ module DeadEnd
17
30
  # code_lines: code_lines
18
31
  # )
19
32
  #
20
- # puts context.call.join
33
+ # lines = context.call.map(&:original)
34
+ # puts lines.join
21
35
  # # =>
22
36
  # class Dog
23
37
  # def bark
@@ -26,7 +40,7 @@ module DeadEnd
26
40
  class CaptureCodeContext
27
41
  attr_reader :code_lines
28
42
 
29
- def initialize(blocks: , code_lines:)
43
+ def initialize(blocks:, code_lines:)
30
44
  @blocks = Array(blocks)
31
45
  @code_lines = code_lines
32
46
  @visible_lines = @blocks.map(&:visible_lines).flatten
@@ -35,29 +49,73 @@ module DeadEnd
35
49
 
36
50
  def call
37
51
  @blocks.each do |block|
52
+ capture_first_kw_end_same_indent(block)
38
53
  capture_last_end_same_indent(block)
39
54
  capture_before_after_kws(block)
40
55
  capture_falling_indent(block)
41
56
  end
42
57
 
43
58
  @lines_to_output.select!(&:not_empty?)
44
- @lines_to_output.select!(&:not_comment?)
45
59
  @lines_to_output.uniq!
46
60
  @lines_to_output.sort!
47
61
 
48
- return @lines_to_output
62
+ @lines_to_output
49
63
  end
50
64
 
65
+ # Shows the context around code provided by "falling" indentation
66
+ #
67
+ # Converts:
68
+ #
69
+ # it "foo" do
70
+ #
71
+ # into:
72
+ #
73
+ # class OH
74
+ # def hello
75
+ # it "foo" do
76
+ # end
77
+ # end
78
+ #
79
+ #
51
80
  def capture_falling_indent(block)
52
81
  AroundBlockScan.new(
53
82
  block: block,
54
- code_lines: @code_lines,
83
+ code_lines: @code_lines
55
84
  ).on_falling_indent do |line|
56
85
  @lines_to_output << line
57
86
  end
58
87
  end
59
88
 
89
+ # Shows surrounding kw/end pairs
90
+ #
91
+ # The purpose of showing these extra pairs is due to cases
92
+ # of ambiguity when only one visible line is matched.
93
+ #
94
+ # For example:
95
+ #
96
+ # 1 class Dog
97
+ # 2 def bark
98
+ # 4 def eat
99
+ # 5 end
100
+ # 6 end
101
+ #
102
+ # In this case either line 2 could be missing an `end` or
103
+ # line 4 was an extra line added by mistake (it happens).
104
+ #
105
+ # When we detect the above problem it shows the issue
106
+ # as only being on line 2
107
+ #
108
+ # 2 def bark
109
+ #
110
+ # Showing "neighbor" keyword pairs gives extra context:
111
+ #
112
+ # 2 def bark
113
+ # 4 def eat
114
+ # 5 end
115
+ #
60
116
  def capture_before_after_kws(block)
117
+ return unless block.visible_lines.count == 1
118
+
61
119
  around_lines = AroundBlockScan.new(code_lines: @code_lines, block: block)
62
120
  .start_at_next_line
63
121
  .capture_neighbor_context
@@ -67,50 +125,109 @@ module DeadEnd
67
125
  @lines_to_output.concat(around_lines)
68
126
  end
69
127
 
70
- # Problems heredocs are back in play
128
+ # When there is an invalid block with a keyword
129
+ # missing an end right before another end,
130
+ # it is unclear where which keyword is missing the
131
+ # end
132
+ #
133
+ # Take this example:
134
+ #
135
+ # class Dog # 1
136
+ # def bark # 2
137
+ # puts "woof" # 3
138
+ # end # 4
139
+ #
140
+ # However due to https://github.com/zombocom/dead_end/issues/32
141
+ # the problem line will be identified as:
142
+ #
143
+ # ❯ class Dog # 1
144
+ #
145
+ # Because lines 2, 3, and 4 are technically valid code and are expanded
146
+ # first, deemed valid, and hidden. We need to un-hide the matching end
147
+ # line 4. Also work backwards and if there's a mis-matched keyword, show it
148
+ # too
71
149
  def capture_last_end_same_indent(block)
72
- start_index = block.visible_lines.first.index
73
- lines = @code_lines[start_index..block.lines.last.index]
74
- kw_end_lines = lines.select {|line| line.indent == block.current_indent && (line.is_end? || line.is_kw?) }
150
+ return if block.visible_lines.length != 1
151
+ return unless block.visible_lines.first.is_kw?
152
+
153
+ visible_line = block.visible_lines.first
154
+ lines = @code_lines[visible_line.index..block.lines.last.index]
75
155
 
156
+ # Find first end with same indent
157
+ # (this would return line 4)
158
+ #
159
+ # end # 4
160
+ matching_end = lines.detect { |line| line.indent == block.current_indent && line.is_end? }
161
+ return unless matching_end
162
+
163
+ @lines_to_output << matching_end
76
164
 
77
- # TODO handle case of heredocs showing up here
165
+ # Work backwards from the end to
166
+ # see if there are mis-matched
167
+ # keyword/end pairs
78
168
  #
79
- # Due to https://github.com/zombocom/dead_end/issues/32
80
- # There's a special case where a keyword right before the last
81
- # end of a valid block accidentally ends up identifying that the problem
82
- # was with the block instead of before it. To handle that
83
- # special case, we can re-parse back through the internals of blocks
84
- # and if they have mis-matched keywords and ends show the last one
85
- end_lines = kw_end_lines.select(&:is_end?)
86
- end_lines.each_with_index do |end_line, i|
87
- start_index = i.zero? ? 0 : end_lines[i-1].index
88
- end_index = end_line.index - 1
89
- lines = @code_lines[start_index..end_index]
90
-
91
- stop_next = false
92
- kw_count = 0
93
- end_count = 0
94
- lines = lines.reverse.take_while do |line|
95
- next false if stop_next
96
-
97
- end_count += 1 if line.is_end?
98
- kw_count += 1 if line.is_kw?
99
-
100
- stop_next = true if !kw_count.zero? && kw_count >= end_count
101
- true
102
- end.reverse
103
-
104
- next unless kw_count > end_count
105
-
106
- lines = lines.select {|line| line.is_kw? || line.is_end? }
107
-
108
- next if lines.empty?
109
-
110
- @lines_to_output << end_line
111
- @lines_to_output << lines.first
112
- @lines_to_output << lines.last
169
+ # Return the first mis-matched keyword
170
+ # this would find line 2
171
+ #
172
+ # def bark # 2
173
+ # puts "woof" # 3
174
+ # end # 4
175
+ end_count = 0
176
+ kw_count = 0
177
+ kw_line = @code_lines[visible_line.index..matching_end.index].reverse.detect do |line|
178
+ end_count += 1 if line.is_end?
179
+ kw_count += 1 if line.is_kw?
180
+
181
+ !kw_count.zero? && kw_count >= end_count
113
182
  end
183
+ return unless kw_line
184
+ @lines_to_output << kw_line
185
+ end
186
+
187
+ # The logical inverse of `capture_last_end_same_indent`
188
+ #
189
+ # When there is an invalid block with an `end`
190
+ # missing a keyword right after another `end`,
191
+ # it is unclear where which end is missing the
192
+ # keyword.
193
+ #
194
+ # Take this example:
195
+ #
196
+ # class Dog # 1
197
+ # puts "woof" # 2
198
+ # end # 3
199
+ # end # 4
200
+ #
201
+ # the problem line will be identified as:
202
+ #
203
+ # ❯ end # 4
204
+ #
205
+ # This happens because lines 1, 2, and 3 are technically valid code and are expanded
206
+ # first, deemed valid, and hidden. We need to un-hide the matching keyword on
207
+ # line 1. Also work backwards and if there's a mis-matched end, show it
208
+ # too
209
+ def capture_first_kw_end_same_indent(block)
210
+ return if block.visible_lines.length != 1
211
+ return unless block.visible_lines.first.is_end?
212
+
213
+ visible_line = block.visible_lines.first
214
+ lines = @code_lines[block.lines.first.index..visible_line.index]
215
+ matching_kw = lines.reverse.detect { |line| line.indent == block.current_indent && line.is_kw? }
216
+ return unless matching_kw
217
+
218
+ @lines_to_output << matching_kw
219
+
220
+ kw_count = 0
221
+ end_count = 0
222
+ orphan_end = @code_lines[matching_kw.index..visible_line.index].detect do |line|
223
+ kw_count += 1 if line.is_kw?
224
+ end_count += 1 if line.is_end?
225
+
226
+ end_count >= kw_count
227
+ end
228
+
229
+ return unless orphan_end
230
+ @lines_to_output << orphan_end
114
231
  end
115
232
  end
116
233
  end
@@ -0,0 +1,313 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DeadEnd
4
+ # Parses and sanitizes source into a lexically aware document
5
+ #
6
+ # Internally the document is represented by an array with each
7
+ # index containing a CodeLine correlating to a line from the source code.
8
+ #
9
+ # There are three main phases in the algorithm:
10
+ #
11
+ # 1. Sanitize/format input source
12
+ # 2. Search for invalid blocks
13
+ # 3. Format invalid blocks into something meaninful
14
+ #
15
+ # This class handles the first part.
16
+ #
17
+ # The reason this class exists is to format input source
18
+ # for better/easier/cleaner exploration.
19
+ #
20
+ # The CodeSearch class operates at the line level so
21
+ # we must be careful to not introduce lines that look
22
+ # valid by themselves, but when removed will trigger syntax errors
23
+ # or strange behavior.
24
+ #
25
+ # ## Join Trailing slashes
26
+ #
27
+ # Code with a trailing slash is logically treated as a single line:
28
+ #
29
+ # 1 it "code can be split" \
30
+ # 2 "across multiple lines" do
31
+ #
32
+ # In this case removing line 2 would add a syntax error. We get around
33
+ # this by internally joining the two lines into a single "line" object
34
+ #
35
+ # ## Logically Consecutive lines
36
+ #
37
+ # Code that can be broken over multiple
38
+ # lines such as method calls are on different lines:
39
+ #
40
+ # 1 User.
41
+ # 2 where(name: "schneems").
42
+ # 3 first
43
+ #
44
+ # Removing line 2 can introduce a syntax error. To fix this, all lines
45
+ # are joined into one.
46
+ #
47
+ # ## Heredocs
48
+ #
49
+ # A heredoc is an way of defining a multi-line string. They can cause many
50
+ # problems. If left as a single line, Ripper would try to parse the contents
51
+ # as ruby code rather than as a string. Even without this problem, we still
52
+ # hit an issue with indentation
53
+ #
54
+ # 1 foo = <<~HEREDOC
55
+ # 2 "Be yourself; everyone else is already taken.""
56
+ # 3 ― Oscar Wilde
57
+ # 4 puts "I look like ruby code" # but i'm still a heredoc
58
+ # 5 HEREDOC
59
+ #
60
+ # If we didn't join these lines then our algorithm would think that line 4
61
+ # is separate from the rest, has a higher indentation, then look at it first
62
+ # and remove it.
63
+ #
64
+ # If the code evaluates line 5 by itself it will think line 5 is a constant,
65
+ # remove it, and introduce a syntax errror.
66
+ #
67
+ # All of these problems are fixed by joining the whole heredoc into a single
68
+ # line.
69
+ #
70
+ # ## Comments and whitespace
71
+ #
72
+ # Comments can throw off the way the lexer tells us that the line
73
+ # logically belongs with the next line. This is valid ruby but
74
+ # results in a different lex output than before:
75
+ #
76
+ # 1 User.
77
+ # 2 where(name: "schneems").
78
+ # 3 # Comment here
79
+ # 4 first
80
+ #
81
+ # To handle this we can replace comment lines with empty lines
82
+ # and then re-lex the source. This removal and re-lexing preserves
83
+ # line index and document size, but generates an easier to work with
84
+ # document.
85
+ #
86
+ class CleanDocument
87
+ def initialize(source:)
88
+ @source = source
89
+ @document = CodeLine.from_source(@source)
90
+ end
91
+
92
+ # Call all of the document "cleaners"
93
+ # and return self
94
+ def call
95
+ clean_sweep
96
+ .join_trailing_slash!
97
+ .join_consecutive!
98
+ .join_heredoc!
99
+
100
+ self
101
+ end
102
+
103
+ # Return an array of CodeLines in the
104
+ # document
105
+ def lines
106
+ @document
107
+ end
108
+
109
+ # Renders the document back to a string
110
+ def to_s
111
+ @document.join
112
+ end
113
+
114
+ # Remove comments and whitespace only lines
115
+ #
116
+ # replace with empty newlines
117
+ #
118
+ # source = <<~'EOM'
119
+ # # Comment 1
120
+ # puts "hello"
121
+ # # Comment 2
122
+ # puts "world"
123
+ # EOM
124
+ #
125
+ # lines = CleanDocument.new(source: source).clean_sweep.lines
126
+ # expect(lines[0].to_s).to eq("\n")
127
+ # expect(lines[1].to_s).to eq("puts "hello")
128
+ # expect(lines[2].to_s).to eq("\n")
129
+ # expect(lines[3].to_s).to eq("puts "world")
130
+ #
131
+ # WARNING:
132
+ # If you run this after any of the "join" commands, they
133
+ # will be un-joined.
134
+ #
135
+ # After this change is made, we re-lex the document because
136
+ # removing comments can change how the doc is parsed.
137
+ #
138
+ # For example:
139
+ #
140
+ # values = LexAll.new(source: <<~EOM))
141
+ # User.
142
+ # # comment
143
+ # where(name: 'schneems')
144
+ # EOM
145
+ # expect(values.count {|v| v.type == :on_ignored_nl}).to eq(1)
146
+ #
147
+ # After the comment is removed:
148
+ #
149
+ # values = LexAll.new(source: <<~EOM))
150
+ # User.
151
+ #
152
+ # where(name: 'schneems')
153
+ # EOM
154
+ # expect(values.count {|v| v.type == :on_ignored_nl}).to eq(2)
155
+ #
156
+ def clean_sweep
157
+ source = @document.map do |code_line|
158
+ # Clean trailing whitespace on empty line
159
+ if code_line.line.strip.empty?
160
+ next CodeLine.new(line: "\n", index: code_line.index, lex: [])
161
+ end
162
+
163
+ # Remove comments
164
+ if code_line.lex.detect { |lex| lex.type != :on_sp }&.type == :on_comment
165
+ next CodeLine.new(line: "\n", index: code_line.index, lex: [])
166
+ end
167
+
168
+ code_line
169
+ end.join
170
+
171
+ @source = source
172
+ @document = CodeLine.from_source(source)
173
+ self
174
+ end
175
+
176
+ # Smushes all heredoc lines into one line
177
+ #
178
+ # source = <<~'EOM'
179
+ # foo = <<~HEREDOC
180
+ # lol
181
+ # hehehe
182
+ # HEREDOC
183
+ # EOM
184
+ #
185
+ # lines = CleanDocument.new(source: source).join_heredoc!.lines
186
+ # expect(lines[0].to_s).to eq(source)
187
+ # expect(lines[1].to_s).to eq("")
188
+ def join_heredoc!
189
+ start_index_stack = []
190
+ heredoc_beg_end_index = []
191
+ lines.each do |line|
192
+ line.lex.each do |lex_value|
193
+ case lex_value.type
194
+ when :on_heredoc_beg
195
+ start_index_stack << line.index
196
+ when :on_heredoc_end
197
+ start_index = start_index_stack.pop
198
+ end_index = line.index
199
+ heredoc_beg_end_index << [start_index, end_index]
200
+ end
201
+ end
202
+ end
203
+
204
+ heredoc_groups = heredoc_beg_end_index.map { |start_index, end_index| @document[start_index..end_index] }
205
+
206
+ join_groups(heredoc_groups)
207
+ self
208
+ end
209
+
210
+ # Smushes logically "consecutive" lines
211
+ #
212
+ # source = <<~'EOM'
213
+ # User.
214
+ # where(name: 'schneems').
215
+ # first
216
+ # EOM
217
+ #
218
+ # lines = CleanDocument.new(source: source).join_consecutive!.lines
219
+ # expect(lines[0].to_s).to eq(source)
220
+ # expect(lines[1].to_s).to eq("")
221
+ #
222
+ # The one known case this doesn't handle is:
223
+ #
224
+ # Ripper.lex <<~EOM
225
+ # a &&
226
+ # b ||
227
+ # c
228
+ # EOM
229
+ #
230
+ # For some reason this introduces `on_ignore_newline` but with BEG type
231
+ #
232
+ def join_consecutive!
233
+ consecutive_groups = @document.select(&:ignore_newline_not_beg?).map do |code_line|
234
+ take_while_including(code_line.index..-1) do |line|
235
+ line.ignore_newline_not_beg?
236
+ end
237
+ end
238
+
239
+ join_groups(consecutive_groups)
240
+ self
241
+ end
242
+
243
+ # Join lines with a trailing slash
244
+ #
245
+ # source = <<~'EOM'
246
+ # it "code can be split" \
247
+ # "across multiple lines" do
248
+ # EOM
249
+ #
250
+ # lines = CleanDocument.new(source: source).join_consecutive!.lines
251
+ # expect(lines[0].to_s).to eq(source)
252
+ # expect(lines[1].to_s).to eq("")
253
+ def join_trailing_slash!
254
+ trailing_groups = @document.select(&:trailing_slash?).map do |code_line|
255
+ take_while_including(code_line.index..-1) { |x| x.trailing_slash? }
256
+ end
257
+ join_groups(trailing_groups)
258
+ self
259
+ end
260
+
261
+ # Helper method for joining "groups" of lines
262
+ #
263
+ # Input is expected to be type Array<Array<CodeLine>>
264
+ #
265
+ # The outer array holds the various "groups" while the
266
+ # inner array holds code lines.
267
+ #
268
+ # All code lines are "joined" into the first line in
269
+ # their group.
270
+ #
271
+ # To preserve document size, empty lines are placed
272
+ # in the place of the lines that were "joined"
273
+ def join_groups(groups)
274
+ groups.each do |lines|
275
+ line = lines.first
276
+
277
+ # Handle the case of multiple groups in a a row
278
+ # if one is already replaced, move on
279
+ next if @document[line.index].empty?
280
+
281
+ # Join group into the first line
282
+ @document[line.index] = CodeLine.new(
283
+ lex: lines.map(&:lex).flatten,
284
+ line: lines.join,
285
+ index: line.index
286
+ )
287
+
288
+ # Hide the rest of the lines
289
+ lines[1..-1].each do |line|
290
+ # The above lines already have newlines in them, if add more
291
+ # then there will be double newline, use an empty line instead
292
+ @document[line.index] = CodeLine.new(line: "", index: line.index, lex: [])
293
+ end
294
+ end
295
+ self
296
+ end
297
+
298
+ # Helper method for grabbing elements from document
299
+ #
300
+ # Like `take_while` except when it stops
301
+ # iterating, it also returns the line
302
+ # that caused it to stop
303
+ def take_while_including(range = 0..-1)
304
+ take_next_and_stop = false
305
+ @document[range].take_while do |line|
306
+ next if take_next_and_stop
307
+
308
+ take_next_and_stop = !(yield line)
309
+ true
310
+ end
311
+ end
312
+ end
313
+ end
@@ -54,11 +54,11 @@ module DeadEnd
54
54
  # populate an array with multiple code blocks then call `sort!`
55
55
  # on it without having to specify the sorting criteria
56
56
  def <=>(other)
57
- out = self.current_indent <=> other.current_indent
57
+ out = current_indent <=> other.current_indent
58
58
  return out if out != 0
59
59
 
60
60
  # Stable sort
61
- self.starts_at <=> other.starts_at
61
+ starts_at <=> other.starts_at
62
62
  end
63
63
 
64
64
  def current_indent
@@ -71,7 +71,7 @@ module DeadEnd
71
71
 
72
72
  def valid?
73
73
  return @valid if @valid != UNSET
74
- @valid = DeadEnd.valid?(self.to_s)
74
+ @valid = DeadEnd.valid?(to_s)
75
75
  end
76
76
 
77
77
  def to_s