dead_end 1.2.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,313 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DeadEnd
4
+ # Parses and sanitizes source into a lexically aware document
5
+ #
6
+ # Internally the document is represented by an array with each
7
+ # index containing a CodeLine correlating to a line from the source code.
8
+ #
9
+ # There are three main phases in the algorithm:
10
+ #
11
+ # 1. Sanitize/format input source
12
+ # 2. Search for invalid blocks
13
+ # 3. Format invalid blocks into something meaninful
14
+ #
15
+ # This class handles the first part.
16
+ #
17
+ # The reason this class exists is to format input source
18
+ # for better/easier/cleaner exploration.
19
+ #
20
+ # The CodeSearch class operates at the line level so
21
+ # we must be careful to not introduce lines that look
22
+ # valid by themselves, but when removed will trigger syntax errors
23
+ # or strange behavior.
24
+ #
25
+ # ## Join Trailing slashes
26
+ #
27
+ # Code with a trailing slash is logically treated as a single line:
28
+ #
29
+ # 1 it "code can be split" \
30
+ # 2 "across multiple lines" do
31
+ #
32
+ # In this case removing line 2 would add a syntax error. We get around
33
+ # this by internally joining the two lines into a single "line" object
34
+ #
35
+ # ## Logically Consecutive lines
36
+ #
37
+ # Code that can be broken over multiple
38
+ # lines such as method calls are on different lines:
39
+ #
40
+ # 1 User.
41
+ # 2 where(name: "schneems").
42
+ # 3 first
43
+ #
44
+ # Removing line 2 can introduce a syntax error. To fix this, all lines
45
+ # are joined into one.
46
+ #
47
+ # ## Heredocs
48
+ #
49
+ # A heredoc is an way of defining a multi-line string. They can cause many
50
+ # problems. If left as a single line, Ripper would try to parse the contents
51
+ # as ruby code rather than as a string. Even without this problem, we still
52
+ # hit an issue with indentation
53
+ #
54
+ # 1 foo = <<~HEREDOC
55
+ # 2 "Be yourself; everyone else is already taken.""
56
+ # 3 ― Oscar Wilde
57
+ # 4 puts "I look like ruby code" # but i'm still a heredoc
58
+ # 5 HEREDOC
59
+ #
60
+ # If we didn't join these lines then our algorithm would think that line 4
61
+ # is separate from the rest, has a higher indentation, then look at it first
62
+ # and remove it.
63
+ #
64
+ # If the code evaluates line 5 by itself it will think line 5 is a constant,
65
+ # remove it, and introduce a syntax errror.
66
+ #
67
+ # All of these problems are fixed by joining the whole heredoc into a single
68
+ # line.
69
+ #
70
+ # ## Comments and whitespace
71
+ #
72
+ # Comments can throw off the way the lexer tells us that the line
73
+ # logically belongs with the next line. This is valid ruby but
74
+ # results in a different lex output than before:
75
+ #
76
+ # 1 User.
77
+ # 2 where(name: "schneems").
78
+ # 3 # Comment here
79
+ # 4 first
80
+ #
81
+ # To handle this we can replace comment lines with empty lines
82
+ # and then re-lex the source. This removal and re-lexing preserves
83
+ # line index and document size, but generates an easier to work with
84
+ # document.
85
+ #
86
+ class CleanDocument
87
+ def initialize(source:)
88
+ @source = source
89
+ @document = CodeLine.from_source(@source)
90
+ end
91
+
92
+ # Call all of the document "cleaners"
93
+ # and return self
94
+ def call
95
+ clean_sweep
96
+ .join_trailing_slash!
97
+ .join_consecutive!
98
+ .join_heredoc!
99
+
100
+ self
101
+ end
102
+
103
+ # Return an array of CodeLines in the
104
+ # document
105
+ def lines
106
+ @document
107
+ end
108
+
109
+ # Renders the document back to a string
110
+ def to_s
111
+ @document.join
112
+ end
113
+
114
+ # Remove comments and whitespace only lines
115
+ #
116
+ # replace with empty newlines
117
+ #
118
+ # source = <<~'EOM'
119
+ # # Comment 1
120
+ # puts "hello"
121
+ # # Comment 2
122
+ # puts "world"
123
+ # EOM
124
+ #
125
+ # lines = CleanDocument.new(source: source).clean_sweep.lines
126
+ # expect(lines[0].to_s).to eq("\n")
127
+ # expect(lines[1].to_s).to eq("puts "hello")
128
+ # expect(lines[2].to_s).to eq("\n")
129
+ # expect(lines[3].to_s).to eq("puts "world")
130
+ #
131
+ # WARNING:
132
+ # If you run this after any of the "join" commands, they
133
+ # will be un-joined.
134
+ #
135
+ # After this change is made, we re-lex the document because
136
+ # removing comments can change how the doc is parsed.
137
+ #
138
+ # For example:
139
+ #
140
+ # values = LexAll.new(source: <<~EOM))
141
+ # User.
142
+ # # comment
143
+ # where(name: 'schneems')
144
+ # EOM
145
+ # expect(values.count {|v| v.type == :on_ignored_nl}).to eq(1)
146
+ #
147
+ # After the comment is removed:
148
+ #
149
+ # values = LexAll.new(source: <<~EOM))
150
+ # User.
151
+ #
152
+ # where(name: 'schneems')
153
+ # EOM
154
+ # expect(values.count {|v| v.type == :on_ignored_nl}).to eq(2)
155
+ #
156
+ def clean_sweep
157
+ source = @document.map do |code_line|
158
+ # Clean trailing whitespace on empty line
159
+ if code_line.line.strip.empty?
160
+ next CodeLine.new(line: "\n", index: code_line.index, lex: [])
161
+ end
162
+
163
+ # Remove comments
164
+ if code_line.lex.detect { |lex| lex.type != :on_sp }&.type == :on_comment
165
+ next CodeLine.new(line: "\n", index: code_line.index, lex: [])
166
+ end
167
+
168
+ code_line
169
+ end.join
170
+
171
+ @source = source
172
+ @document = CodeLine.from_source(source)
173
+ self
174
+ end
175
+
176
+ # Smushes all heredoc lines into one line
177
+ #
178
+ # source = <<~'EOM'
179
+ # foo = <<~HEREDOC
180
+ # lol
181
+ # hehehe
182
+ # HEREDOC
183
+ # EOM
184
+ #
185
+ # lines = CleanDocument.new(source: source).join_heredoc!.lines
186
+ # expect(lines[0].to_s).to eq(source)
187
+ # expect(lines[1].to_s).to eq("")
188
+ def join_heredoc!
189
+ start_index_stack = []
190
+ heredoc_beg_end_index = []
191
+ lines.each do |line|
192
+ line.lex.each do |lex_value|
193
+ case lex_value.type
194
+ when :on_heredoc_beg
195
+ start_index_stack << line.index
196
+ when :on_heredoc_end
197
+ start_index = start_index_stack.pop
198
+ end_index = line.index
199
+ heredoc_beg_end_index << [start_index, end_index]
200
+ end
201
+ end
202
+ end
203
+
204
+ heredoc_groups = heredoc_beg_end_index.map { |start_index, end_index| @document[start_index..end_index] }
205
+
206
+ join_groups(heredoc_groups)
207
+ self
208
+ end
209
+
210
+ # Smushes logically "consecutive" lines
211
+ #
212
+ # source = <<~'EOM'
213
+ # User.
214
+ # where(name: 'schneems').
215
+ # first
216
+ # EOM
217
+ #
218
+ # lines = CleanDocument.new(source: source).join_consecutive!.lines
219
+ # expect(lines[0].to_s).to eq(source)
220
+ # expect(lines[1].to_s).to eq("")
221
+ #
222
+ # The one known case this doesn't handle is:
223
+ #
224
+ # Ripper.lex <<~EOM
225
+ # a &&
226
+ # b ||
227
+ # c
228
+ # EOM
229
+ #
230
+ # For some reason this introduces `on_ignore_newline` but with BEG type
231
+ #
232
+ def join_consecutive!
233
+ consecutive_groups = @document.select(&:ignore_newline_not_beg?).map do |code_line|
234
+ take_while_including(code_line.index..-1) do |line|
235
+ line.ignore_newline_not_beg?
236
+ end
237
+ end
238
+
239
+ join_groups(consecutive_groups)
240
+ self
241
+ end
242
+
243
+ # Join lines with a trailing slash
244
+ #
245
+ # source = <<~'EOM'
246
+ # it "code can be split" \
247
+ # "across multiple lines" do
248
+ # EOM
249
+ #
250
+ # lines = CleanDocument.new(source: source).join_consecutive!.lines
251
+ # expect(lines[0].to_s).to eq(source)
252
+ # expect(lines[1].to_s).to eq("")
253
+ def join_trailing_slash!
254
+ trailing_groups = @document.select(&:trailing_slash?).map do |code_line|
255
+ take_while_including(code_line.index..-1) { |x| x.trailing_slash? }
256
+ end
257
+ join_groups(trailing_groups)
258
+ self
259
+ end
260
+
261
+ # Helper method for joining "groups" of lines
262
+ #
263
+ # Input is expected to be type Array<Array<CodeLine>>
264
+ #
265
+ # The outer array holds the various "groups" while the
266
+ # inner array holds code lines.
267
+ #
268
+ # All code lines are "joined" into the first line in
269
+ # their group.
270
+ #
271
+ # To preserve document size, empty lines are placed
272
+ # in the place of the lines that were "joined"
273
+ def join_groups(groups)
274
+ groups.each do |lines|
275
+ line = lines.first
276
+
277
+ # Handle the case of multiple groups in a a row
278
+ # if one is already replaced, move on
279
+ next if @document[line.index].empty?
280
+
281
+ # Join group into the first line
282
+ @document[line.index] = CodeLine.new(
283
+ lex: lines.map(&:lex).flatten,
284
+ line: lines.join,
285
+ index: line.index
286
+ )
287
+
288
+ # Hide the rest of the lines
289
+ lines[1..-1].each do |line|
290
+ # The above lines already have newlines in them, if add more
291
+ # then there will be double newline, use an empty line instead
292
+ @document[line.index] = CodeLine.new(line: "", index: line.index, lex: [])
293
+ end
294
+ end
295
+ self
296
+ end
297
+
298
+ # Helper method for grabbing elements from document
299
+ #
300
+ # Like `take_while` except when it stops
301
+ # iterating, it also returns the line
302
+ # that caused it to stop
303
+ def take_while_including(range = 0..-1)
304
+ take_next_and_stop = false
305
+ @document[range].take_while do |line|
306
+ next if take_next_and_stop
307
+
308
+ take_next_and_stop = !(yield line)
309
+ true
310
+ end
311
+ end
312
+ end
313
+ end
@@ -0,0 +1,118 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "pathname"
4
+ require "optparse"
5
+
6
+ module DeadEnd
7
+ # All the logic of the exe/dead_end CLI in one handy spot
8
+ #
9
+ # Cli.new(argv: ["--help"]).call
10
+ # Cli.new(argv: ["<path/to/file>.rb"]).call
11
+ # Cli.new(argv: ["<path/to/file>.rb", "--record=tmp"]).call
12
+ # Cli.new(argv: ["<path/to/file>.rb", "--terminal"]).call
13
+ #
14
+ class Cli
15
+ attr_accessor :options, :file_name
16
+
17
+ # ARGV is Everything passed to the executable, does not include executable name
18
+ #
19
+ # All other intputs are dependency injection for testing
20
+ def initialize(argv:, exit_obj: Kernel, io: $stdout, env: ENV)
21
+ @options = {}
22
+ @parser = nil
23
+ options[:record_dir] = env["DEAD_END_RECORD_DIR"]
24
+ options[:record_dir] = "tmp" if env["DEBUG"]
25
+ options[:terminal] = DeadEnd::DEFAULT_VALUE
26
+
27
+ @io = io
28
+ @argv = argv
29
+ @file_name = argv[0]
30
+ @exit_obj = exit_obj
31
+ end
32
+
33
+ def call
34
+ if file_name.nil? || file_name.empty?
35
+ # Display help if raw command
36
+ parser.parse! %w[--help]
37
+ else
38
+ parse
39
+ end
40
+
41
+ # Needed for testing since we fake exit
42
+ return if options[:exit]
43
+
44
+ file = Pathname(file_name)
45
+
46
+ @io.puts "Record dir: #{options[:record_dir]}" if options[:record_dir]
47
+
48
+ display = DeadEnd.call(
49
+ io: @io,
50
+ source: file.read,
51
+ filename: file.expand_path,
52
+ terminal: options.fetch(:terminal, DeadEnd::DEFAULT_VALUE),
53
+ record_dir: options[:record_dir]
54
+ )
55
+
56
+ if display.document_ok?
57
+ @exit_obj.exit(0)
58
+ else
59
+ @exit_obj.exit(1)
60
+ end
61
+ end
62
+
63
+ def parse
64
+ parser.parse!(@argv)
65
+
66
+ self
67
+ end
68
+
69
+ def parser
70
+ @parser ||= OptionParser.new do |opts|
71
+ opts.banner = <<~EOM
72
+ Usage: dead_end <file> [options]
73
+
74
+ Parses a ruby source file and searches for syntax error(s) such as
75
+ unexpected `end', expecting end-of-input.
76
+
77
+ Example:
78
+
79
+ $ dead_end dog.rb
80
+
81
+ # ...
82
+
83
+ ❯ 10 defdog
84
+ ❯ 15 end
85
+
86
+ ENV options:
87
+
88
+ DEAD_END_RECORD_DIR=<dir>
89
+
90
+ Records the steps used to search for a syntax error
91
+ to the given directory
92
+
93
+ Options:
94
+ EOM
95
+
96
+ opts.version = DeadEnd::VERSION
97
+
98
+ opts.on("--help", "Help - displays this message") do |v|
99
+ @io.puts opts
100
+ options[:exit] = true
101
+ @exit_obj.exit
102
+ end
103
+
104
+ opts.on("--record <dir>", "Records the steps used to search for a syntax error to the given directory") do |v|
105
+ options[:record_dir] = v
106
+ end
107
+
108
+ opts.on("--terminal", "Enable terminal highlighting") do |v|
109
+ options[:terminal] = true
110
+ end
111
+
112
+ opts.on("--no-terminal", "Disable terminal highlighting") do |v|
113
+ options[:terminal] = false
114
+ end
115
+ end
116
+ end
117
+ end
118
+ end
@@ -70,8 +70,24 @@ module DeadEnd
70
70
  end
71
71
 
72
72
  def valid?
73
- return @valid if @valid != UNSET
74
- @valid = DeadEnd.valid?(to_s)
73
+ if @valid == UNSET
74
+ # Performance optimization
75
+ #
76
+ # If all the lines were previously hidden
77
+ # and we expand to capture additional empty
78
+ # lines then the result cannot be invalid
79
+ #
80
+ # That means there's no reason to re-check all
81
+ # lines with ripper (which is expensive).
82
+ # Benchmark in commit message
83
+ @valid = if lines.all? { |l| l.hidden? || l.empty? }
84
+ true
85
+ else
86
+ DeadEnd.valid?(lines.map(&:original).join)
87
+ end
88
+ else
89
+ @valid
90
+ end
75
91
  end
76
92
 
77
93
  def to_s
@@ -3,11 +3,19 @@
3
3
  module DeadEnd
4
4
  # The main function of the frontier is to hold the edges of our search and to
5
5
  # evaluate when we can stop searching.
6
+
7
+ # There are three main phases in the algorithm:
8
+ #
9
+ # 1. Sanitize/format input source
10
+ # 2. Search for invalid blocks
11
+ # 3. Format invalid blocks into something meaninful
12
+ #
13
+ # The Code frontier is a critical part of the second step
6
14
  #
7
15
  # ## Knowing where we've been
8
16
  #
9
- # Once a code block is generated it is added onto the frontier where it will be
10
- # sorted and then the frontier can be filtered. Large blocks that totally contain a
17
+ # Once a code block is generated it is added onto the frontier. Then it will be
18
+ # sorted by indentation and frontier can be filtered. Large blocks that fully enclose a
11
19
  # smaller block will cause the smaller block to be evicted.
12
20
  #
13
21
  # CodeFrontier#<<(block) # Adds block to frontier
@@ -15,11 +23,11 @@ module DeadEnd
15
23
  #
16
24
  # ## Knowing where we can go
17
25
  #
18
- # Internally it keeps track of "unvisited" lines which is exposed via `next_indent_line`
19
- # when called this will return a line of code with the most indentation.
26
+ # Internally the frontier keeps track of "unvisited" lines which are exposed via `next_indent_line`
27
+ # when called, this method returns, a line of code with the highest indentation.
20
28
  #
21
- # This line of code can be used to build a CodeBlock and then when that code block
22
- # is added back to the frontier, then the lines are removed from the
29
+ # The returned line of code can be used to build a CodeBlock and then that code block
30
+ # is added back to the frontier. Then, the lines are removed from the
23
31
  # "unvisited" so we don't double-create the same block.
24
32
  #
25
33
  # CodeFrontier#next_indent_line # Shows next line
@@ -27,34 +35,61 @@ module DeadEnd
27
35
  #
28
36
  # ## Knowing when to stop
29
37
  #
30
- # The frontier holds the syntax error when removing all code blocks from the original
31
- # source document allows it to be parsed as syntatically valid:
38
+ # The frontier knows how to check the entire document for a syntax error. When blocks
39
+ # are added onto the frontier, they're removed from the document. When all code containing
40
+ # syntax errors has been added to the frontier, the document will be parsable without a
41
+ # syntax error and the search can stop.
32
42
  #
33
- # CodeFrontier#holds_all_syntax_errors?
43
+ # CodeFrontier#holds_all_syntax_errors? # Returns true when frontier holds all syntax errors
34
44
  #
35
45
  # ## Filtering false positives
36
46
  #
37
- # Once the search is completed, the frontier will have many blocks that do not contain
38
- # the syntax error. To filter to the smallest subset that does call:
47
+ # Once the search is completed, the frontier may have multiple blocks that do not contain
48
+ # the syntax error. To limit the result to the smallest subset of "invalid blocks" call:
39
49
  #
40
50
  # CodeFrontier#detect_invalid_blocks
51
+ #
41
52
  class CodeFrontier
42
53
  def initialize(code_lines:)
43
54
  @code_lines = code_lines
44
55
  @frontier = []
45
56
  @unvisited_lines = @code_lines.sort_by(&:indent_index)
57
+ @has_run = false
58
+ @check_next = true
46
59
  end
47
60
 
48
61
  def count
49
62
  @frontier.count
50
63
  end
51
64
 
65
+ # Performance optimization
66
+ #
67
+ # Parsing with ripper is expensive
68
+ # If we know we don't have any blocks with invalid
69
+ # syntax, then we know we cannot have found
70
+ # the incorrect syntax yet.
71
+ #
72
+ # When an invalid block is added onto the frontier
73
+ # check document state
74
+ private def can_skip_check?
75
+ check_next = @check_next
76
+ @check_next = false
77
+
78
+ if check_next
79
+ false
80
+ else
81
+ true
82
+ end
83
+ end
84
+
52
85
  # Returns true if the document is valid with all lines
53
86
  # removed. By default it checks all blocks in present in
54
87
  # the frontier array, but can be used for arbitrary arrays
55
88
  # of codeblocks as well
56
- def holds_all_syntax_errors?(block_array = @frontier)
57
- without_lines = block_array.map do |block|
89
+ def holds_all_syntax_errors?(block_array = @frontier, can_cache: true)
90
+ return false if can_cache && can_skip_check?
91
+
92
+ without_lines = block_array.flat_map do |block|
58
93
  block.lines
59
94
  end
60
95
 
@@ -84,8 +119,8 @@ module DeadEnd
84
119
  puts "```"
85
120
  puts @frontier.last.to_s
86
121
  puts "```"
87
- puts " @frontier indent: #{frontier_indent}"
88
- puts " @unvisited indent: #{unvisited_indent}"
122
+ puts " @frontier indent: #{frontier_indent}"
123
+ puts " @unvisited indent: #{unvisited_indent}"
89
124
  end
90
125
 
91
126
  # Expand all blocks before moving to unvisited lines
@@ -109,6 +144,8 @@ module DeadEnd
109
144
  @frontier.reject! { |b|
110
145
  b.starts_at >= block.starts_at && b.ends_at <= block.ends_at
111
146
  }
147
+
148
+ @check_next = true if block.invalid?
112
149
  @frontier << block
113
150
  @frontier.sort!
114
151
 
@@ -131,7 +168,7 @@ module DeadEnd
131
168
  # the smallest possible set of blocks that contain all the syntax errors
132
169
  def detect_invalid_blocks
133
170
  self.class.combination(@frontier.select(&:invalid?)).detect do |block_array|
134
- holds_all_syntax_errors?(block_array)
171
+ holds_all_syntax_errors?(block_array, can_cache: false)
135
172
  end || []
136
173
  end
137
174
  end