dead_end 1.2.0 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,313 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DeadEnd
4
+ # Parses and sanitizes source into a lexically aware document
5
+ #
6
+ # Internally the document is represented by an array with each
7
+ # index containing a CodeLine correlating to a line from the source code.
8
+ #
9
+ # There are three main phases in the algorithm:
10
+ #
11
+ # 1. Sanitize/format input source
12
+ # 2. Search for invalid blocks
13
+ # 3. Format invalid blocks into something meaninful
14
+ #
15
+ # This class handles the first part.
16
+ #
17
+ # The reason this class exists is to format input source
18
+ # for better/easier/cleaner exploration.
19
+ #
20
+ # The CodeSearch class operates at the line level so
21
+ # we must be careful to not introduce lines that look
22
+ # valid by themselves, but when removed will trigger syntax errors
23
+ # or strange behavior.
24
+ #
25
+ # ## Join Trailing slashes
26
+ #
27
+ # Code with a trailing slash is logically treated as a single line:
28
+ #
29
+ # 1 it "code can be split" \
30
+ # 2 "across multiple lines" do
31
+ #
32
+ # In this case removing line 2 would add a syntax error. We get around
33
+ # this by internally joining the two lines into a single "line" object
34
+ #
35
+ # ## Logically Consecutive lines
36
+ #
37
+ # Code that can be broken over multiple
38
+ # lines such as method calls are on different lines:
39
+ #
40
+ # 1 User.
41
+ # 2 where(name: "schneems").
42
+ # 3 first
43
+ #
44
+ # Removing line 2 can introduce a syntax error. To fix this, all lines
45
+ # are joined into one.
46
+ #
47
+ # ## Heredocs
48
+ #
49
+ # A heredoc is an way of defining a multi-line string. They can cause many
50
+ # problems. If left as a single line, Ripper would try to parse the contents
51
+ # as ruby code rather than as a string. Even without this problem, we still
52
+ # hit an issue with indentation
53
+ #
54
+ # 1 foo = <<~HEREDOC
55
+ # 2 "Be yourself; everyone else is already taken.""
56
+ # 3 ― Oscar Wilde
57
+ # 4 puts "I look like ruby code" # but i'm still a heredoc
58
+ # 5 HEREDOC
59
+ #
60
+ # If we didn't join these lines then our algorithm would think that line 4
61
+ # is separate from the rest, has a higher indentation, then look at it first
62
+ # and remove it.
63
+ #
64
+ # If the code evaluates line 5 by itself it will think line 5 is a constant,
65
+ # remove it, and introduce a syntax errror.
66
+ #
67
+ # All of these problems are fixed by joining the whole heredoc into a single
68
+ # line.
69
+ #
70
+ # ## Comments and whitespace
71
+ #
72
+ # Comments can throw off the way the lexer tells us that the line
73
+ # logically belongs with the next line. This is valid ruby but
74
+ # results in a different lex output than before:
75
+ #
76
+ # 1 User.
77
+ # 2 where(name: "schneems").
78
+ # 3 # Comment here
79
+ # 4 first
80
+ #
81
+ # To handle this we can replace comment lines with empty lines
82
+ # and then re-lex the source. This removal and re-lexing preserves
83
+ # line index and document size, but generates an easier to work with
84
+ # document.
85
+ #
86
+ class CleanDocument
87
+ def initialize(source:)
88
+ @source = source
89
+ @document = CodeLine.from_source(@source)
90
+ end
91
+
92
+ # Call all of the document "cleaners"
93
+ # and return self
94
+ def call
95
+ clean_sweep
96
+ .join_trailing_slash!
97
+ .join_consecutive!
98
+ .join_heredoc!
99
+
100
+ self
101
+ end
102
+
103
+ # Return an array of CodeLines in the
104
+ # document
105
+ def lines
106
+ @document
107
+ end
108
+
109
+ # Renders the document back to a string
110
+ def to_s
111
+ @document.join
112
+ end
113
+
114
+ # Remove comments and whitespace only lines
115
+ #
116
+ # replace with empty newlines
117
+ #
118
+ # source = <<~'EOM'
119
+ # # Comment 1
120
+ # puts "hello"
121
+ # # Comment 2
122
+ # puts "world"
123
+ # EOM
124
+ #
125
+ # lines = CleanDocument.new(source: source).clean_sweep.lines
126
+ # expect(lines[0].to_s).to eq("\n")
127
+ # expect(lines[1].to_s).to eq("puts "hello")
128
+ # expect(lines[2].to_s).to eq("\n")
129
+ # expect(lines[3].to_s).to eq("puts "world")
130
+ #
131
+ # WARNING:
132
+ # If you run this after any of the "join" commands, they
133
+ # will be un-joined.
134
+ #
135
+ # After this change is made, we re-lex the document because
136
+ # removing comments can change how the doc is parsed.
137
+ #
138
+ # For example:
139
+ #
140
+ # values = LexAll.new(source: <<~EOM))
141
+ # User.
142
+ # # comment
143
+ # where(name: 'schneems')
144
+ # EOM
145
+ # expect(values.count {|v| v.type == :on_ignored_nl}).to eq(1)
146
+ #
147
+ # After the comment is removed:
148
+ #
149
+ # values = LexAll.new(source: <<~EOM))
150
+ # User.
151
+ #
152
+ # where(name: 'schneems')
153
+ # EOM
154
+ # expect(values.count {|v| v.type == :on_ignored_nl}).to eq(2)
155
+ #
156
+ def clean_sweep
157
+ source = @document.map do |code_line|
158
+ # Clean trailing whitespace on empty line
159
+ if code_line.line.strip.empty?
160
+ next CodeLine.new(line: "\n", index: code_line.index, lex: [])
161
+ end
162
+
163
+ # Remove comments
164
+ if code_line.lex.detect { |lex| lex.type != :on_sp }&.type == :on_comment
165
+ next CodeLine.new(line: "\n", index: code_line.index, lex: [])
166
+ end
167
+
168
+ code_line
169
+ end.join
170
+
171
+ @source = source
172
+ @document = CodeLine.from_source(source)
173
+ self
174
+ end
175
+
176
+ # Smushes all heredoc lines into one line
177
+ #
178
+ # source = <<~'EOM'
179
+ # foo = <<~HEREDOC
180
+ # lol
181
+ # hehehe
182
+ # HEREDOC
183
+ # EOM
184
+ #
185
+ # lines = CleanDocument.new(source: source).join_heredoc!.lines
186
+ # expect(lines[0].to_s).to eq(source)
187
+ # expect(lines[1].to_s).to eq("")
188
+ def join_heredoc!
189
+ start_index_stack = []
190
+ heredoc_beg_end_index = []
191
+ lines.each do |line|
192
+ line.lex.each do |lex_value|
193
+ case lex_value.type
194
+ when :on_heredoc_beg
195
+ start_index_stack << line.index
196
+ when :on_heredoc_end
197
+ start_index = start_index_stack.pop
198
+ end_index = line.index
199
+ heredoc_beg_end_index << [start_index, end_index]
200
+ end
201
+ end
202
+ end
203
+
204
+ heredoc_groups = heredoc_beg_end_index.map { |start_index, end_index| @document[start_index..end_index] }
205
+
206
+ join_groups(heredoc_groups)
207
+ self
208
+ end
209
+
210
+ # Smushes logically "consecutive" lines
211
+ #
212
+ # source = <<~'EOM'
213
+ # User.
214
+ # where(name: 'schneems').
215
+ # first
216
+ # EOM
217
+ #
218
+ # lines = CleanDocument.new(source: source).join_consecutive!.lines
219
+ # expect(lines[0].to_s).to eq(source)
220
+ # expect(lines[1].to_s).to eq("")
221
+ #
222
+ # The one known case this doesn't handle is:
223
+ #
224
+ # Ripper.lex <<~EOM
225
+ # a &&
226
+ # b ||
227
+ # c
228
+ # EOM
229
+ #
230
+ # For some reason this introduces `on_ignore_newline` but with BEG type
231
+ #
232
+ def join_consecutive!
233
+ consecutive_groups = @document.select(&:ignore_newline_not_beg?).map do |code_line|
234
+ take_while_including(code_line.index..-1) do |line|
235
+ line.ignore_newline_not_beg?
236
+ end
237
+ end
238
+
239
+ join_groups(consecutive_groups)
240
+ self
241
+ end
242
+
243
+ # Join lines with a trailing slash
244
+ #
245
+ # source = <<~'EOM'
246
+ # it "code can be split" \
247
+ # "across multiple lines" do
248
+ # EOM
249
+ #
250
+ # lines = CleanDocument.new(source: source).join_consecutive!.lines
251
+ # expect(lines[0].to_s).to eq(source)
252
+ # expect(lines[1].to_s).to eq("")
253
+ def join_trailing_slash!
254
+ trailing_groups = @document.select(&:trailing_slash?).map do |code_line|
255
+ take_while_including(code_line.index..-1) { |x| x.trailing_slash? }
256
+ end
257
+ join_groups(trailing_groups)
258
+ self
259
+ end
260
+
261
+ # Helper method for joining "groups" of lines
262
+ #
263
+ # Input is expected to be type Array<Array<CodeLine>>
264
+ #
265
+ # The outer array holds the various "groups" while the
266
+ # inner array holds code lines.
267
+ #
268
+ # All code lines are "joined" into the first line in
269
+ # their group.
270
+ #
271
+ # To preserve document size, empty lines are placed
272
+ # in the place of the lines that were "joined"
273
+ def join_groups(groups)
274
+ groups.each do |lines|
275
+ line = lines.first
276
+
277
+ # Handle the case of multiple groups in a a row
278
+ # if one is already replaced, move on
279
+ next if @document[line.index].empty?
280
+
281
+ # Join group into the first line
282
+ @document[line.index] = CodeLine.new(
283
+ lex: lines.map(&:lex).flatten,
284
+ line: lines.join,
285
+ index: line.index
286
+ )
287
+
288
+ # Hide the rest of the lines
289
+ lines[1..-1].each do |line|
290
+ # The above lines already have newlines in them, if add more
291
+ # then there will be double newline, use an empty line instead
292
+ @document[line.index] = CodeLine.new(line: "", index: line.index, lex: [])
293
+ end
294
+ end
295
+ self
296
+ end
297
+
298
+ # Helper method for grabbing elements from document
299
+ #
300
+ # Like `take_while` except when it stops
301
+ # iterating, it also returns the line
302
+ # that caused it to stop
303
+ def take_while_including(range = 0..-1)
304
+ take_next_and_stop = false
305
+ @document[range].take_while do |line|
306
+ next if take_next_and_stop
307
+
308
+ take_next_and_stop = !(yield line)
309
+ true
310
+ end
311
+ end
312
+ end
313
+ end
@@ -0,0 +1,118 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "pathname"
4
+ require "optparse"
5
+
6
+ module DeadEnd
7
+ # All the logic of the exe/dead_end CLI in one handy spot
8
+ #
9
+ # Cli.new(argv: ["--help"]).call
10
+ # Cli.new(argv: ["<path/to/file>.rb"]).call
11
+ # Cli.new(argv: ["<path/to/file>.rb", "--record=tmp"]).call
12
+ # Cli.new(argv: ["<path/to/file>.rb", "--terminal"]).call
13
+ #
14
+ class Cli
15
+ attr_accessor :options, :file_name
16
+
17
+ # ARGV is Everything passed to the executable, does not include executable name
18
+ #
19
+ # All other intputs are dependency injection for testing
20
+ def initialize(argv:, exit_obj: Kernel, io: $stdout, env: ENV)
21
+ @options = {}
22
+ @parser = nil
23
+ options[:record_dir] = env["DEAD_END_RECORD_DIR"]
24
+ options[:record_dir] = "tmp" if env["DEBUG"]
25
+ options[:terminal] = DeadEnd::DEFAULT_VALUE
26
+
27
+ @io = io
28
+ @argv = argv
29
+ @file_name = argv[0]
30
+ @exit_obj = exit_obj
31
+ end
32
+
33
+ def call
34
+ if file_name.nil? || file_name.empty?
35
+ # Display help if raw command
36
+ parser.parse! %w[--help]
37
+ else
38
+ parse
39
+ end
40
+
41
+ # Needed for testing since we fake exit
42
+ return if options[:exit]
43
+
44
+ file = Pathname(file_name)
45
+
46
+ @io.puts "Record dir: #{options[:record_dir]}" if options[:record_dir]
47
+
48
+ display = DeadEnd.call(
49
+ io: @io,
50
+ source: file.read,
51
+ filename: file.expand_path,
52
+ terminal: options.fetch(:terminal, DeadEnd::DEFAULT_VALUE),
53
+ record_dir: options[:record_dir]
54
+ )
55
+
56
+ if display.document_ok?
57
+ @exit_obj.exit(0)
58
+ else
59
+ @exit_obj.exit(1)
60
+ end
61
+ end
62
+
63
+ def parse
64
+ parser.parse!(@argv)
65
+
66
+ self
67
+ end
68
+
69
+ def parser
70
+ @parser ||= OptionParser.new do |opts|
71
+ opts.banner = <<~EOM
72
+ Usage: dead_end <file> [options]
73
+
74
+ Parses a ruby source file and searches for syntax error(s) such as
75
+ unexpected `end', expecting end-of-input.
76
+
77
+ Example:
78
+
79
+ $ dead_end dog.rb
80
+
81
+ # ...
82
+
83
+ ❯ 10 defdog
84
+ ❯ 15 end
85
+
86
+ ENV options:
87
+
88
+ DEAD_END_RECORD_DIR=<dir>
89
+
90
+ Records the steps used to search for a syntax error
91
+ to the given directory
92
+
93
+ Options:
94
+ EOM
95
+
96
+ opts.version = DeadEnd::VERSION
97
+
98
+ opts.on("--help", "Help - displays this message") do |v|
99
+ @io.puts opts
100
+ options[:exit] = true
101
+ @exit_obj.exit
102
+ end
103
+
104
+ opts.on("--record <dir>", "Records the steps used to search for a syntax error to the given directory") do |v|
105
+ options[:record_dir] = v
106
+ end
107
+
108
+ opts.on("--terminal", "Enable terminal highlighting") do |v|
109
+ options[:terminal] = true
110
+ end
111
+
112
+ opts.on("--no-terminal", "Disable terminal highlighting") do |v|
113
+ options[:terminal] = false
114
+ end
115
+ end
116
+ end
117
+ end
118
+ end
@@ -70,8 +70,24 @@ module DeadEnd
70
70
  end
71
71
 
72
72
  def valid?
73
- return @valid if @valid != UNSET
74
- @valid = DeadEnd.valid?(to_s)
73
+ if @valid == UNSET
74
+ # Performance optimization
75
+ #
76
+ # If all the lines were previously hidden
77
+ # and we expand to capture additional empty
78
+ # lines then the result cannot be invalid
79
+ #
80
+ # That means there's no reason to re-check all
81
+ # lines with ripper (which is expensive).
82
+ # Benchmark in commit message
83
+ @valid = if lines.all? { |l| l.hidden? || l.empty? }
84
+ true
85
+ else
86
+ DeadEnd.valid?(lines.map(&:original).join)
87
+ end
88
+ else
89
+ @valid
90
+ end
75
91
  end
76
92
 
77
93
  def to_s
@@ -3,11 +3,19 @@
3
3
  module DeadEnd
4
4
  # The main function of the frontier is to hold the edges of our search and to
5
5
  # evaluate when we can stop searching.
6
+
7
+ # There are three main phases in the algorithm:
8
+ #
9
+ # 1. Sanitize/format input source
10
+ # 2. Search for invalid blocks
11
+ # 3. Format invalid blocks into something meaninful
12
+ #
13
+ # The Code frontier is a critical part of the second step
6
14
  #
7
15
  # ## Knowing where we've been
8
16
  #
9
- # Once a code block is generated it is added onto the frontier where it will be
10
- # sorted and then the frontier can be filtered. Large blocks that totally contain a
17
+ # Once a code block is generated it is added onto the frontier. Then it will be
18
+ # sorted by indentation and frontier can be filtered. Large blocks that fully enclose a
11
19
  # smaller block will cause the smaller block to be evicted.
12
20
  #
13
21
  # CodeFrontier#<<(block) # Adds block to frontier
@@ -15,11 +23,11 @@ module DeadEnd
15
23
  #
16
24
  # ## Knowing where we can go
17
25
  #
18
- # Internally it keeps track of "unvisited" lines which is exposed via `next_indent_line`
19
- # when called this will return a line of code with the most indentation.
26
+ # Internally the frontier keeps track of "unvisited" lines which are exposed via `next_indent_line`
27
+ # when called, this method returns, a line of code with the highest indentation.
20
28
  #
21
- # This line of code can be used to build a CodeBlock and then when that code block
22
- # is added back to the frontier, then the lines are removed from the
29
+ # The returned line of code can be used to build a CodeBlock and then that code block
30
+ # is added back to the frontier. Then, the lines are removed from the
23
31
  # "unvisited" so we don't double-create the same block.
24
32
  #
25
33
  # CodeFrontier#next_indent_line # Shows next line
@@ -27,34 +35,61 @@ module DeadEnd
27
35
  #
28
36
  # ## Knowing when to stop
29
37
  #
30
- # The frontier holds the syntax error when removing all code blocks from the original
31
- # source document allows it to be parsed as syntatically valid:
38
+ # The frontier knows how to check the entire document for a syntax error. When blocks
39
+ # are added onto the frontier, they're removed from the document. When all code containing
40
+ # syntax errors has been added to the frontier, the document will be parsable without a
41
+ # syntax error and the search can stop.
32
42
  #
33
- # CodeFrontier#holds_all_syntax_errors?
43
+ # CodeFrontier#holds_all_syntax_errors? # Returns true when frontier holds all syntax errors
34
44
  #
35
45
  # ## Filtering false positives
36
46
  #
37
- # Once the search is completed, the frontier will have many blocks that do not contain
38
- # the syntax error. To filter to the smallest subset that does call:
47
+ # Once the search is completed, the frontier may have multiple blocks that do not contain
48
+ # the syntax error. To limit the result to the smallest subset of "invalid blocks" call:
39
49
  #
40
50
  # CodeFrontier#detect_invalid_blocks
51
+ #
41
52
  class CodeFrontier
42
53
  def initialize(code_lines:)
43
54
  @code_lines = code_lines
44
55
  @frontier = []
45
56
  @unvisited_lines = @code_lines.sort_by(&:indent_index)
57
+ @has_run = false
58
+ @check_next = true
46
59
  end
47
60
 
48
61
  def count
49
62
  @frontier.count
50
63
  end
51
64
 
65
+ # Performance optimization
66
+ #
67
+ # Parsing with ripper is expensive
68
+ # If we know we don't have any blocks with invalid
69
+ # syntax, then we know we cannot have found
70
+ # the incorrect syntax yet.
71
+ #
72
+ # When an invalid block is added onto the frontier
73
+ # check document state
74
+ private def can_skip_check?
75
+ check_next = @check_next
76
+ @check_next = false
77
+
78
+ if check_next
79
+ false
80
+ else
81
+ true
82
+ end
83
+ end
84
+
52
85
  # Returns true if the document is valid with all lines
53
86
  # removed. By default it checks all blocks in present in
54
87
  # the frontier array, but can be used for arbitrary arrays
55
88
  # of codeblocks as well
56
- def holds_all_syntax_errors?(block_array = @frontier)
57
- without_lines = block_array.map do |block|
89
+ def holds_all_syntax_errors?(block_array = @frontier, can_cache: true)
90
+ return false if can_cache && can_skip_check?
91
+
92
+ without_lines = block_array.flat_map do |block|
58
93
  block.lines
59
94
  end
60
95
 
@@ -84,8 +119,8 @@ module DeadEnd
84
119
  puts "```"
85
120
  puts @frontier.last.to_s
86
121
  puts "```"
87
- puts " @frontier indent: #{frontier_indent}"
88
- puts " @unvisited indent: #{unvisited_indent}"
122
+ puts " @frontier indent: #{frontier_indent}"
123
+ puts " @unvisited indent: #{unvisited_indent}"
89
124
  end
90
125
 
91
126
  # Expand all blocks before moving to unvisited lines
@@ -109,6 +144,8 @@ module DeadEnd
109
144
  @frontier.reject! { |b|
110
145
  b.starts_at >= block.starts_at && b.ends_at <= block.ends_at
111
146
  }
147
+
148
+ @check_next = true if block.invalid?
112
149
  @frontier << block
113
150
  @frontier.sort!
114
151
 
@@ -131,7 +168,7 @@ module DeadEnd
131
168
  # the smallest possible set of blocks that contain all the syntax errors
132
169
  def detect_invalid_blocks
133
170
  self.class.combination(@frontier.select(&:invalid?)).detect do |block_array|
134
- holds_all_syntax_errors?(block_array)
171
+ holds_all_syntax_errors?(block_array, can_cache: false)
135
172
  end || []
136
173
  end
137
174
  end