syntax_search 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,51 @@
1
+ require_relative "../syntax_search"
2
+
3
+ # Monkey patch kernel to ensure that all `require` calls call the same
4
+ # method
5
+ module Kernel
6
+ alias_method :original_require, :require
7
+ alias_method :original_require_relative, :require_relative
8
+ alias_method :original_load, :load
9
+
10
+ def load(file, wrap = false)
11
+ original_load(file)
12
+ rescue SyntaxError => e
13
+ SyntaxErrorSearch.handle_error(e)
14
+ end
15
+
16
+ def require(file)
17
+ original_require(file)
18
+ rescue SyntaxError => e
19
+ SyntaxErrorSearch.handle_error(e)
20
+ end
21
+
22
+ def require_relative(file)
23
+ if Pathname.new(file).absolute?
24
+ original_require file
25
+ else
26
+ original_require File.expand_path("../#{file}", caller_locations(1, 1)[0].absolute_path)
27
+ end
28
+ rescue SyntaxError => e
29
+ SyntaxErrorSearch.handle_error(e)
30
+ end
31
+ end
32
+
33
+ # I honestly have no idea why this Object delegation is needed
34
+ # I keep staring at bootsnap and it doesn't have to do this
35
+ # is there a bug in their implementation they haven't caught or
36
+ # am I doing something different?
37
+ class Object
38
+ private
39
+ def load(path, wrap = false)
40
+ Kernel.load(path, wrap)
41
+ rescue SyntaxError => e
42
+ SyntaxErrorSearch.handle_error(e)
43
+ end
44
+
45
+ def require(path)
46
+ Kernel.require(path)
47
+ rescue SyntaxError => e
48
+ SyntaxErrorSearch.handle_error(e)
49
+ end
50
+ end
51
+
@@ -0,0 +1,219 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SyntaxErrorSearch
4
+ # Multiple lines form a singular CodeBlock
5
+ #
6
+ # Source code is made of multiple CodeBlocks. A code block
7
+ # has a reference to the source code that created itself, this allows
8
+ # a code block to "expand" when needed
9
+ #
10
+ # The most important ability of a CodeBlock is this ability to expand:
11
+ #
12
+ # Example:
13
+ #
14
+ # code_block.to_s # =>
15
+ # # def foo
16
+ # # puts "foo"
17
+ # # end
18
+ #
19
+ # code_block.expand_until_next_boundry
20
+ #
21
+ # code_block.to_s # =>
22
+ # # class Foo
23
+ # # def foo
24
+ # # puts "foo"
25
+ # # end
26
+ # # end
27
+ #
28
+ class CodeBlock
29
+ attr_reader :lines
30
+
31
+ def initialize(code_lines: nil, lines: [])
32
+ @lines = Array(lines)
33
+ @code_lines = code_lines
34
+ end
35
+
36
+ def is_end?
37
+ to_s.strip == "end"
38
+ end
39
+
40
+ def starts_at
41
+ @lines.first&.line_number
42
+ end
43
+
44
+ def code_lines
45
+ @code_lines
46
+ end
47
+
48
+ # This is used for frontier ordering, we are searching from
49
+ # the largest indentation to the smallest. This allows us to
50
+ # populate an array with multiple code blocks then call `sort!`
51
+ # on it without having to specify the sorting criteria
52
+ def <=>(other)
53
+ self.current_indent <=> other.current_indent
54
+ end
55
+
56
+ # Only the lines that are not empty and visible
57
+ def visible_lines
58
+ @lines
59
+ .select(&:not_empty?)
60
+ .select(&:visible?)
61
+ end
62
+
63
+ # This method is used to expand a code block to capture it's calling context
64
+ def expand_until_next_boundry
65
+ expand_to_indent(next_indent)
66
+ self
67
+ end
68
+
69
+ # This method expands the given code block until it captures
70
+ # its nearest neighbors. This is used to expand a single line of code
71
+ # to its smallest likely block.
72
+ #
73
+ # code_block.to_s # =>
74
+ # # puts "foo"
75
+ # code_block.expand_until_neighbors
76
+ #
77
+ # code_block.to_s # =>
78
+ # # puts "foo"
79
+ # # puts "bar"
80
+ # # puts "baz"
81
+ #
82
+ def expand_until_neighbors
83
+ expand_to_indent(current_indent)
84
+
85
+ expand_hidden_parner_line if self.to_s.strip == "end"
86
+ self
87
+ end
88
+
89
+ def expand_hidden_parner_line
90
+ index = @lines.first.index
91
+ indent = current_indent
92
+ partner_line = code_lines.select {|line| line.index < index && line.indent == indent }.last
93
+
94
+ if partner_line&.hidden?
95
+ partner_line.mark_visible
96
+ @lines.prepend(partner_line)
97
+ end
98
+ end
99
+
100
+ # This method expands the existing code block up (before)
101
+ # and down (after). It will break on change in indentation
102
+ # and empty lines.
103
+ #
104
+ # code_block.to_s # =>
105
+ # # def foo
106
+ # # puts "foo"
107
+ # # end
108
+ #
109
+ # code_block.expand_to_indent(0)
110
+ # code_block.to_s # =>
111
+ # # class Foo
112
+ # # def foo
113
+ # # puts "foo"
114
+ # # end
115
+ # # end
116
+ #
117
+ private def expand_to_indent(indent)
118
+ array = []
119
+ before_lines(skip_empty: false).each do |line|
120
+ if line.empty?
121
+ array.prepend(line)
122
+ break
123
+ end
124
+
125
+ if line.indent == indent
126
+ array.prepend(line)
127
+ else
128
+ break
129
+ end
130
+ end
131
+
132
+ array << @lines
133
+
134
+ after_lines(skip_empty: false).each do |line|
135
+ if line.empty?
136
+ array << line
137
+ break
138
+ end
139
+
140
+ if line.indent == indent
141
+ array << line
142
+ else
143
+ break
144
+ end
145
+ end
146
+
147
+ @lines = array.flatten
148
+ end
149
+
150
+ def next_indent
151
+ [
152
+ before_line&.indent || 0,
153
+ after_line&.indent || 0
154
+ ].max
155
+ end
156
+
157
+ def current_indent
158
+ lines.detect(&:not_empty?)&.indent || 0
159
+ end
160
+
161
+ def before_line
162
+ before_lines.first
163
+ end
164
+
165
+ def after_line
166
+ after_lines.first
167
+ end
168
+
169
+ def before_lines(skip_empty: true)
170
+ index = @lines.first.index
171
+ lines = code_lines.select {|line| line.index < index }
172
+ lines.select!(&:not_empty?) if skip_empty
173
+ lines.select!(&:visible?)
174
+ lines.reverse!
175
+
176
+ lines
177
+ end
178
+
179
+ def after_lines(skip_empty: true)
180
+ index = @lines.last.index
181
+ lines = code_lines.select {|line| line.index > index }
182
+ lines.select!(&:not_empty?) if skip_empty
183
+ lines.select!(&:visible?)
184
+ lines
185
+ end
186
+
187
+ # Returns a code block of the source that does not include
188
+ # the current lines. This is useful for checking if a source
189
+ # with the given lines removed parses successfully. If so
190
+ #
191
+ # Then it's proof that the current block is invalid
192
+ def block_without
193
+ @block_without ||= CodeBlock.new(
194
+ source: @source,
195
+ lines: @source.code_lines - @lines
196
+ )
197
+ end
198
+
199
+ def document_valid_without?
200
+ block_without.valid?
201
+ end
202
+
203
+ def valid_without?
204
+ block_without.valid?
205
+ end
206
+
207
+ def invalid?
208
+ !valid?
209
+ end
210
+
211
+ def valid?
212
+ SyntaxErrorSearch.valid?(self.to_s)
213
+ end
214
+
215
+ def to_s
216
+ @lines.join
217
+ end
218
+ end
219
+ end
@@ -0,0 +1,312 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SyntaxErrorSearch
4
+ # This class is responsible for generating, storing, and sorting code blocks
5
+ #
6
+ # The search algorithm for finding our syntax errors isn't in this class, but
7
+ # this is class holds the bulk of the logic for generating, storing, detecting
8
+ # and filtering invalid code.
9
+ #
10
+ # This is loosely based on the idea of a "frontier" for searching for a path
11
+ # example: https://en.wikipedia.org/wiki/Dijkstra%27s_algorithm
12
+ #
13
+ # In this case our path is going from code with a syntax error to code without a
14
+ # syntax error. We're currently doing that by evaluating individual lines
15
+ # with respect to indentation and other whitespace (empty lines). As represented
16
+ # by individual "code blocks".
17
+ #
18
+ # This class does not just store the frontier that we're searching, but is responsible
19
+ # for generating new code blocks as well. This is not ideal, but the state of generating
20
+ # and evaluating paths i.e. codeblocks is very tightly coupled.
21
+ #
22
+ # ## Creation
23
+ #
24
+ # This example code is re-used in the other sections
25
+ #
26
+ # Example:
27
+ #
28
+ # code_lines = [
29
+ # CodeLine.new(line: "def cinco\n", index: 0)
30
+ # CodeLine.new(line: " def dog\n", index: 1) # Syntax error 1
31
+ # CodeLine.new(line: " def cat\n", index: 2) # Syntax error 2
32
+ # CodeLine.new(line: "end\n", index: 3)
33
+ # ]
34
+ #
35
+ # frontier = CodeFrontier.new(code_lines: code_lines)
36
+ #
37
+ # frontier << frontier.next_block if frontier.next_block?
38
+ # frontier << frontier.next_block if frontier.next_block?
39
+ #
40
+ # frontier.holds_all_syntax_errors? # => true
41
+ # block = frontier.pop
42
+ # frontier.holds_all_syntax_errors? # => false
43
+ # frontier << block
44
+ # frontier.holds_all_syntax_errors? # => true
45
+ #
46
+ # frontier.detect_invalid_blocks.map(&:to_s) # =>
47
+ # [
48
+ # "def dog\n",
49
+ # "def cat\n"
50
+ # ]
51
+ #
52
+ # ## Block Generation
53
+ #
54
+ # Currently code blocks are generated based off of indentation. With the idea that blocks are,
55
+ # well, indented. Once a code block is added to the frontier or it is expanded, or it is generated
56
+ # then we also need to remove those lines from our generation code so we don't generate the same block
57
+ # twice by accident.
58
+ #
59
+ # This is block generation is currently done via the "indent_hash" internally by starting at the outer
60
+ # most indentation.
61
+ #
62
+ # Example:
63
+ #
64
+ # ```
65
+ # def river
66
+ # puts "lol" # <=== Start looking here and expand outwards
67
+ # end
68
+ # ```
69
+ #
70
+ # Generating new code blocks is a little verbose but looks like this:
71
+ #
72
+ # frontier << frontier.next_block if frontier.next_block?
73
+ #
74
+ # Once a block is in the frontier, it can be popped off:
75
+ #
76
+ # frontier.pop
77
+ # # => <# CodeBlock >
78
+ #
79
+ # ## Block (frontier) storage, ordering and retrieval
80
+ #
81
+ # Once a block is generated it is stored internally in a frontier array. This is very similar to a search algorithm.
82
+ # The array is sorted by indentation order, so that when a block is popped off the array, the one with
83
+ # the largest current indentation is evaluated first.
84
+ #
85
+ # For example, if we have these two blocks in the frontier:
86
+ #
87
+ # ```
88
+ # # Block A - 0 spaces for indentation
89
+ #
90
+ # def cinco
91
+ # puts "lol"
92
+ # end
93
+ # ```
94
+ #
95
+ # ```
96
+ # # Block B - 2 spaces for indentation
97
+ #
98
+ # def river
99
+ # puts "hehe"
100
+ # end
101
+ # ```
102
+ #
103
+ # The "Block B" has more current indentation, so it would be evaluated first.
104
+ #
105
+ # ## Frontier evaluation (Find the syntax error)
106
+ #
107
+ # Another key difference between this and a normal search "frontier" is that we're not checking if
108
+ # an individual code block meets the goal (turning invalid code to valid code) since there can
109
+ # be multiple syntax errors and this will require multiple code blocks. To handle this, we're
110
+ # evaluating all the contents of the frontier at the same time to see if the solution exists in any
111
+ # of our search blocks.
112
+ #
113
+ # # Using the previously generated frontier
114
+ #
115
+ # frontier << Block.new(lines: code_lines[1], code_lines: code_lines)
116
+ # frontier.holds_all_syntax_errors? # => false
117
+ #
118
+ # frontier << Block.new(lines: code_lines[2], code_lines: code_lines)
119
+ # frontier.holds_all_syntax_errors? # => true
120
+ #
121
+ # ## Detect invalid blocks (Filter for smallest solution)
122
+ #
123
+ # After we prove that a solution exists and we've found it to be in our frontier, we can start stop searching.
124
+ # Once we've done this, we need to search through the existing frontier code blocks to find the minimum combination
125
+ # of blocks that hold the solution. This is done in: `detect_invalid_blocks`.
126
+ #
127
+ # # Using the previously generated frontier
128
+ #
129
+ # frontier << CodeBlock.new(lines: code_lines[0], code_lines: code_lines)
130
+ # frontier << CodeBlock.new(lines: code_lines[1], code_lines: code_lines)
131
+ # frontier << CodeBlock.new(lines: code_lines[2], code_lines: code_lines)
132
+ # frontier << CodeBlock.new(lines: code_lines[3], code_lines: code_lines)
133
+ #
134
+ # frontier.count # => 4
135
+ # frontier.detect_invalid_blocks.length => 2
136
+ # frontier.detect_invalid_blocks.map(&:to_s) # =>
137
+ # [
138
+ # "def dog\n",
139
+ # "def cat\n"
140
+ # ]
141
+ #
142
+ # Once invalid blocks are found and filtered, then they can be passed to a formatter.
143
+ #
144
+ #
145
+ #
146
+
147
+ class IndentScan
148
+ attr_reader :code_lines
149
+
150
+ def initialize(code_lines: )
151
+ @code_lines = code_lines
152
+ end
153
+
154
+ def neighbors_from_top(top_line)
155
+ code_lines
156
+ .select {|l| l.index >= top_line.index }
157
+ .select {|l| l.not_empty? }
158
+ .select {|l| l.visible? }
159
+ .take_while {|l| l.indent >= top_line.indent }
160
+ end
161
+
162
+ def each_neighbor_block(top_line)
163
+ neighbors = neighbors_from_top(top_line)
164
+
165
+ until neighbors.empty?
166
+ lines = [neighbors.pop]
167
+ while (block = CodeBlock.new(lines: lines, code_lines: code_lines)) && block.invalid? && neighbors.any?
168
+ lines.prepend neighbors.pop
169
+ end
170
+
171
+ yield block if block
172
+ end
173
+ end
174
+ end
175
+
176
+ class CodeFrontier
177
+ def initialize(code_lines: )
178
+ @code_lines = code_lines
179
+ @frontier = []
180
+ @indent_hash = {}
181
+ code_lines.each do |line|
182
+ next if line.empty?
183
+
184
+ @indent_hash[line.indent] ||= []
185
+ @indent_hash[line.indent] << line
186
+ end
187
+ end
188
+
189
+ def count
190
+ @frontier.count
191
+ end
192
+
193
+ # Returns true if the document is valid with all lines
194
+ # removed. By default it checks all blocks in present in
195
+ # the frontier array, but can be used for arbitrary arrays
196
+ # of codeblocks as well
197
+ def holds_all_syntax_errors?(block_array = @frontier)
198
+ without_lines = block_array.map do |block|
199
+ block.lines
200
+ end
201
+
202
+ SyntaxErrorSearch.valid_without?(
203
+ without_lines: without_lines,
204
+ code_lines: @code_lines
205
+ )
206
+ end
207
+
208
+ # Returns a code block with the largest indentation possible
209
+ def pop
210
+ return nil if empty?
211
+
212
+ return @frontier.pop
213
+ end
214
+
215
+ def next_block?
216
+ !@indent_hash.empty?
217
+ end
218
+
219
+
220
+ def indent_hash_indent
221
+ @indent_hash.keys.sort.last
222
+ end
223
+
224
+ def next_indent_line
225
+ indent = @indent_hash.keys.sort.last
226
+ @indent_hash[indent]&.first
227
+ end
228
+
229
+ def generate_blocks
230
+ end
231
+
232
+ def next_block
233
+ indent = @indent_hash.keys.sort.last
234
+ lines = @indent_hash[indent].first
235
+
236
+ block = CodeBlock.new(
237
+ lines: lines,
238
+ code_lines: @code_lines
239
+ ).expand_until_neighbors
240
+
241
+ register(block)
242
+ block
243
+ end
244
+
245
+ def expand?
246
+ return false if @frontier.empty?
247
+ return true if @indent_hash.empty?
248
+
249
+ @frontier.last.current_indent >= @indent_hash.keys.sort.last
250
+ end
251
+
252
+ # This method is responsible for determining if a new code
253
+ # block should be generated instead of evaluating an already
254
+ # existing block in the frontier
255
+ def generate_new_block?
256
+ return false if @indent_hash.empty?
257
+ return true if @frontier.empty?
258
+
259
+ @frontier.last.current_indent <= @indent_hash.keys.sort.last
260
+ end
261
+
262
+ def register(block)
263
+ block.lines.each do |line|
264
+ @indent_hash[line.indent]&.delete(line)
265
+ end
266
+ @indent_hash.select! {|k, v| !v.empty?}
267
+ self
268
+ end
269
+
270
+ # Add a block to the frontier
271
+ #
272
+ # This method ensures the frontier always remains sorted (in indentation order)
273
+ # and that each code block's lines are removed from the indentation hash so we
274
+ # don't re-evaluate the same line multiple times.
275
+ def <<(block)
276
+ register(block)
277
+
278
+ @frontier << block
279
+ @frontier.sort!
280
+
281
+ self
282
+ end
283
+
284
+ def any?
285
+ !empty?
286
+ end
287
+
288
+ def empty?
289
+ @frontier.empty? && @indent_hash.empty?
290
+ end
291
+
292
+ # Example:
293
+ #
294
+ # combination([:a, :b, :c, :d])
295
+ # # => [[:a], [:b], [:c], [:d], [:a, :b], [:a, :c], [:a, :d], [:b, :c], [:b, :d], [:c, :d], [:a, :b, :c], [:a, :b, :d], [:a, :c, :d], [:b, :c, :d], [:a, :b, :c, :d]]
296
+ def self.combination(array)
297
+ guesses = []
298
+ 1.upto(array.length).each do |size|
299
+ guesses.concat(array.combination(size).to_a)
300
+ end
301
+ guesses
302
+ end
303
+
304
+ # Given that we know our syntax error exists somewhere in our frontier, we want to find
305
+ # the smallest possible set of blocks that contain all the syntax errors
306
+ def detect_invalid_blocks
307
+ self.class.combination(@frontier).detect do |block_array|
308
+ holds_all_syntax_errors?(block_array)
309
+ end || []
310
+ end
311
+ end
312
+ end