syntax_search 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,51 @@
1
+ require_relative "../syntax_search"
2
+
3
+ # Monkey patch kernel to ensure that all `require` calls call the same
4
+ # method
5
+ module Kernel
6
+ alias_method :original_require, :require
7
+ alias_method :original_require_relative, :require_relative
8
+ alias_method :original_load, :load
9
+
10
+ def load(file, wrap = false)
11
+ original_load(file)
12
+ rescue SyntaxError => e
13
+ SyntaxErrorSearch.handle_error(e)
14
+ end
15
+
16
+ def require(file)
17
+ original_require(file)
18
+ rescue SyntaxError => e
19
+ SyntaxErrorSearch.handle_error(e)
20
+ end
21
+
22
+ def require_relative(file)
23
+ if Pathname.new(file).absolute?
24
+ original_require file
25
+ else
26
+ original_require File.expand_path("../#{file}", caller_locations(1, 1)[0].absolute_path)
27
+ end
28
+ rescue SyntaxError => e
29
+ SyntaxErrorSearch.handle_error(e)
30
+ end
31
+ end
32
+
33
+ # I honestly have no idea why this Object delegation is needed
34
+ # I keep staring at bootsnap and it doesn't have to do this
35
+ # is there a bug in their implementation they haven't caught or
36
+ # am I doing something different?
37
+ class Object
38
+ private
39
+ def load(path, wrap = false)
40
+ Kernel.load(path, wrap)
41
+ rescue SyntaxError => e
42
+ SyntaxErrorSearch.handle_error(e)
43
+ end
44
+
45
+ def require(path)
46
+ Kernel.require(path)
47
+ rescue SyntaxError => e
48
+ SyntaxErrorSearch.handle_error(e)
49
+ end
50
+ end
51
+
@@ -0,0 +1,219 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SyntaxErrorSearch
4
+ # Multiple lines form a singular CodeBlock
5
+ #
6
+ # Source code is made of multiple CodeBlocks. A code block
7
+ # has a reference to the source code that created itself, this allows
8
+ # a code block to "expand" when needed
9
+ #
10
+ # The most important ability of a CodeBlock is this ability to expand:
11
+ #
12
+ # Example:
13
+ #
14
+ # code_block.to_s # =>
15
+ # # def foo
16
+ # # puts "foo"
17
+ # # end
18
+ #
19
+ # code_block.expand_until_next_boundry
20
+ #
21
+ # code_block.to_s # =>
22
+ # # class Foo
23
+ # # def foo
24
+ # # puts "foo"
25
+ # # end
26
+ # # end
27
+ #
28
+ class CodeBlock
29
+ attr_reader :lines
30
+
31
+ def initialize(code_lines: nil, lines: [])
32
+ @lines = Array(lines)
33
+ @code_lines = code_lines
34
+ end
35
+
36
+ def is_end?
37
+ to_s.strip == "end"
38
+ end
39
+
40
+ def starts_at
41
+ @lines.first&.line_number
42
+ end
43
+
44
+ def code_lines
45
+ @code_lines
46
+ end
47
+
48
+ # This is used for frontier ordering, we are searching from
49
+ # the largest indentation to the smallest. This allows us to
50
+ # populate an array with multiple code blocks then call `sort!`
51
+ # on it without having to specify the sorting criteria
52
+ def <=>(other)
53
+ self.current_indent <=> other.current_indent
54
+ end
55
+
56
+ # Only the lines that are not empty and visible
57
+ def visible_lines
58
+ @lines
59
+ .select(&:not_empty?)
60
+ .select(&:visible?)
61
+ end
62
+
63
+ # This method is used to expand a code block to capture it's calling context
64
+ def expand_until_next_boundry
65
+ expand_to_indent(next_indent)
66
+ self
67
+ end
68
+
69
+ # This method expands the given code block until it captures
70
+ # its nearest neighbors. This is used to expand a single line of code
71
+ # to its smallest likely block.
72
+ #
73
+ # code_block.to_s # =>
74
+ # # puts "foo"
75
+ # code_block.expand_until_neighbors
76
+ #
77
+ # code_block.to_s # =>
78
+ # # puts "foo"
79
+ # # puts "bar"
80
+ # # puts "baz"
81
+ #
82
+ def expand_until_neighbors
83
+ expand_to_indent(current_indent)
84
+
85
+ expand_hidden_parner_line if self.to_s.strip == "end"
86
+ self
87
+ end
88
+
89
+ def expand_hidden_parner_line
90
+ index = @lines.first.index
91
+ indent = current_indent
92
+ partner_line = code_lines.select {|line| line.index < index && line.indent == indent }.last
93
+
94
+ if partner_line&.hidden?
95
+ partner_line.mark_visible
96
+ @lines.prepend(partner_line)
97
+ end
98
+ end
99
+
100
+ # This method expands the existing code block up (before)
101
+ # and down (after). It will break on change in indentation
102
+ # and empty lines.
103
+ #
104
+ # code_block.to_s # =>
105
+ # # def foo
106
+ # # puts "foo"
107
+ # # end
108
+ #
109
+ # code_block.expand_to_indent(0)
110
+ # code_block.to_s # =>
111
+ # # class Foo
112
+ # # def foo
113
+ # # puts "foo"
114
+ # # end
115
+ # # end
116
+ #
117
+ private def expand_to_indent(indent)
118
+ array = []
119
+ before_lines(skip_empty: false).each do |line|
120
+ if line.empty?
121
+ array.prepend(line)
122
+ break
123
+ end
124
+
125
+ if line.indent == indent
126
+ array.prepend(line)
127
+ else
128
+ break
129
+ end
130
+ end
131
+
132
+ array << @lines
133
+
134
+ after_lines(skip_empty: false).each do |line|
135
+ if line.empty?
136
+ array << line
137
+ break
138
+ end
139
+
140
+ if line.indent == indent
141
+ array << line
142
+ else
143
+ break
144
+ end
145
+ end
146
+
147
+ @lines = array.flatten
148
+ end
149
+
150
+ def next_indent
151
+ [
152
+ before_line&.indent || 0,
153
+ after_line&.indent || 0
154
+ ].max
155
+ end
156
+
157
+ def current_indent
158
+ lines.detect(&:not_empty?)&.indent || 0
159
+ end
160
+
161
+ def before_line
162
+ before_lines.first
163
+ end
164
+
165
+ def after_line
166
+ after_lines.first
167
+ end
168
+
169
+ def before_lines(skip_empty: true)
170
+ index = @lines.first.index
171
+ lines = code_lines.select {|line| line.index < index }
172
+ lines.select!(&:not_empty?) if skip_empty
173
+ lines.select!(&:visible?)
174
+ lines.reverse!
175
+
176
+ lines
177
+ end
178
+
179
+ def after_lines(skip_empty: true)
180
+ index = @lines.last.index
181
+ lines = code_lines.select {|line| line.index > index }
182
+ lines.select!(&:not_empty?) if skip_empty
183
+ lines.select!(&:visible?)
184
+ lines
185
+ end
186
+
187
+ # Returns a code block of the source that does not include
188
+ # the current lines. This is useful for checking if a source
189
+ # with the given lines removed parses successfully. If so
190
+ #
191
+ # Then it's proof that the current block is invalid
192
+ def block_without
193
+ @block_without ||= CodeBlock.new(
194
+ source: @source,
195
+ lines: @source.code_lines - @lines
196
+ )
197
+ end
198
+
199
+ def document_valid_without?
200
+ block_without.valid?
201
+ end
202
+
203
+ def valid_without?
204
+ block_without.valid?
205
+ end
206
+
207
+ def invalid?
208
+ !valid?
209
+ end
210
+
211
+ def valid?
212
+ SyntaxErrorSearch.valid?(self.to_s)
213
+ end
214
+
215
+ def to_s
216
+ @lines.join
217
+ end
218
+ end
219
+ end
@@ -0,0 +1,312 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SyntaxErrorSearch
4
+ # This class is responsible for generating, storing, and sorting code blocks
5
+ #
6
+ # The search algorithm for finding our syntax errors isn't in this class, but
7
+ # this is class holds the bulk of the logic for generating, storing, detecting
8
+ # and filtering invalid code.
9
+ #
10
+ # This is loosely based on the idea of a "frontier" for searching for a path
11
+ # example: https://en.wikipedia.org/wiki/Dijkstra%27s_algorithm
12
+ #
13
+ # In this case our path is going from code with a syntax error to code without a
14
+ # syntax error. We're currently doing that by evaluating individual lines
15
+ # with respect to indentation and other whitespace (empty lines). As represented
16
+ # by individual "code blocks".
17
+ #
18
+ # This class does not just store the frontier that we're searching, but is responsible
19
+ # for generating new code blocks as well. This is not ideal, but the state of generating
20
+ # and evaluating paths i.e. codeblocks is very tightly coupled.
21
+ #
22
+ # ## Creation
23
+ #
24
+ # This example code is re-used in the other sections
25
+ #
26
+ # Example:
27
+ #
28
+ # code_lines = [
29
+ # CodeLine.new(line: "def cinco\n", index: 0)
30
+ # CodeLine.new(line: " def dog\n", index: 1) # Syntax error 1
31
+ # CodeLine.new(line: " def cat\n", index: 2) # Syntax error 2
32
+ # CodeLine.new(line: "end\n", index: 3)
33
+ # ]
34
+ #
35
+ # frontier = CodeFrontier.new(code_lines: code_lines)
36
+ #
37
+ # frontier << frontier.next_block if frontier.next_block?
38
+ # frontier << frontier.next_block if frontier.next_block?
39
+ #
40
+ # frontier.holds_all_syntax_errors? # => true
41
+ # block = frontier.pop
42
+ # frontier.holds_all_syntax_errors? # => false
43
+ # frontier << block
44
+ # frontier.holds_all_syntax_errors? # => true
45
+ #
46
+ # frontier.detect_invalid_blocks.map(&:to_s) # =>
47
+ # [
48
+ # "def dog\n",
49
+ # "def cat\n"
50
+ # ]
51
+ #
52
+ # ## Block Generation
53
+ #
54
+ # Currently code blocks are generated based off of indentation. With the idea that blocks are,
55
+ # well, indented. Once a code block is added to the frontier or it is expanded, or it is generated
56
+ # then we also need to remove those lines from our generation code so we don't generate the same block
57
+ # twice by accident.
58
+ #
59
+ # This is block generation is currently done via the "indent_hash" internally by starting at the outer
60
+ # most indentation.
61
+ #
62
+ # Example:
63
+ #
64
+ # ```
65
+ # def river
66
+ # puts "lol" # <=== Start looking here and expand outwards
67
+ # end
68
+ # ```
69
+ #
70
+ # Generating new code blocks is a little verbose but looks like this:
71
+ #
72
+ # frontier << frontier.next_block if frontier.next_block?
73
+ #
74
+ # Once a block is in the frontier, it can be popped off:
75
+ #
76
+ # frontier.pop
77
+ # # => <# CodeBlock >
78
+ #
79
+ # ## Block (frontier) storage, ordering and retrieval
80
+ #
81
+ # Once a block is generated it is stored internally in a frontier array. This is very similar to a search algorithm.
82
+ # The array is sorted by indentation order, so that when a block is popped off the array, the one with
83
+ # the largest current indentation is evaluated first.
84
+ #
85
+ # For example, if we have these two blocks in the frontier:
86
+ #
87
+ # ```
88
+ # # Block A - 0 spaces for indentation
89
+ #
90
+ # def cinco
91
+ # puts "lol"
92
+ # end
93
+ # ```
94
+ #
95
+ # ```
96
+ # # Block B - 2 spaces for indentation
97
+ #
98
+ # def river
99
+ # puts "hehe"
100
+ # end
101
+ # ```
102
+ #
103
+ # The "Block B" has more current indentation, so it would be evaluated first.
104
+ #
105
+ # ## Frontier evaluation (Find the syntax error)
106
+ #
107
+ # Another key difference between this and a normal search "frontier" is that we're not checking if
108
+ # an individual code block meets the goal (turning invalid code to valid code) since there can
109
+ # be multiple syntax errors and this will require multiple code blocks. To handle this, we're
110
+ # evaluating all the contents of the frontier at the same time to see if the solution exists in any
111
+ # of our search blocks.
112
+ #
113
+ # # Using the previously generated frontier
114
+ #
115
+ # frontier << Block.new(lines: code_lines[1], code_lines: code_lines)
116
+ # frontier.holds_all_syntax_errors? # => false
117
+ #
118
+ # frontier << Block.new(lines: code_lines[2], code_lines: code_lines)
119
+ # frontier.holds_all_syntax_errors? # => true
120
+ #
121
+ # ## Detect invalid blocks (Filter for smallest solution)
122
+ #
123
+ # After we prove that a solution exists and we've found it to be in our frontier, we can start stop searching.
124
+ # Once we've done this, we need to search through the existing frontier code blocks to find the minimum combination
125
+ # of blocks that hold the solution. This is done in: `detect_invalid_blocks`.
126
+ #
127
+ # # Using the previously generated frontier
128
+ #
129
+ # frontier << CodeBlock.new(lines: code_lines[0], code_lines: code_lines)
130
+ # frontier << CodeBlock.new(lines: code_lines[1], code_lines: code_lines)
131
+ # frontier << CodeBlock.new(lines: code_lines[2], code_lines: code_lines)
132
+ # frontier << CodeBlock.new(lines: code_lines[3], code_lines: code_lines)
133
+ #
134
+ # frontier.count # => 4
135
+ # frontier.detect_invalid_blocks.length => 2
136
+ # frontier.detect_invalid_blocks.map(&:to_s) # =>
137
+ # [
138
+ # "def dog\n",
139
+ # "def cat\n"
140
+ # ]
141
+ #
142
+ # Once invalid blocks are found and filtered, then they can be passed to a formatter.
143
+ #
144
+ #
145
+ #
146
+
147
+ class IndentScan
148
+ attr_reader :code_lines
149
+
150
+ def initialize(code_lines: )
151
+ @code_lines = code_lines
152
+ end
153
+
154
+ def neighbors_from_top(top_line)
155
+ code_lines
156
+ .select {|l| l.index >= top_line.index }
157
+ .select {|l| l.not_empty? }
158
+ .select {|l| l.visible? }
159
+ .take_while {|l| l.indent >= top_line.indent }
160
+ end
161
+
162
+ def each_neighbor_block(top_line)
163
+ neighbors = neighbors_from_top(top_line)
164
+
165
+ until neighbors.empty?
166
+ lines = [neighbors.pop]
167
+ while (block = CodeBlock.new(lines: lines, code_lines: code_lines)) && block.invalid? && neighbors.any?
168
+ lines.prepend neighbors.pop
169
+ end
170
+
171
+ yield block if block
172
+ end
173
+ end
174
+ end
175
+
176
+ class CodeFrontier
177
+ def initialize(code_lines: )
178
+ @code_lines = code_lines
179
+ @frontier = []
180
+ @indent_hash = {}
181
+ code_lines.each do |line|
182
+ next if line.empty?
183
+
184
+ @indent_hash[line.indent] ||= []
185
+ @indent_hash[line.indent] << line
186
+ end
187
+ end
188
+
189
+ def count
190
+ @frontier.count
191
+ end
192
+
193
+ # Returns true if the document is valid with all lines
194
+ # removed. By default it checks all blocks in present in
195
+ # the frontier array, but can be used for arbitrary arrays
196
+ # of codeblocks as well
197
+ def holds_all_syntax_errors?(block_array = @frontier)
198
+ without_lines = block_array.map do |block|
199
+ block.lines
200
+ end
201
+
202
+ SyntaxErrorSearch.valid_without?(
203
+ without_lines: without_lines,
204
+ code_lines: @code_lines
205
+ )
206
+ end
207
+
208
+ # Returns a code block with the largest indentation possible
209
+ def pop
210
+ return nil if empty?
211
+
212
+ return @frontier.pop
213
+ end
214
+
215
+ def next_block?
216
+ !@indent_hash.empty?
217
+ end
218
+
219
+
220
+ def indent_hash_indent
221
+ @indent_hash.keys.sort.last
222
+ end
223
+
224
+ def next_indent_line
225
+ indent = @indent_hash.keys.sort.last
226
+ @indent_hash[indent]&.first
227
+ end
228
+
229
+ def generate_blocks
230
+ end
231
+
232
+ def next_block
233
+ indent = @indent_hash.keys.sort.last
234
+ lines = @indent_hash[indent].first
235
+
236
+ block = CodeBlock.new(
237
+ lines: lines,
238
+ code_lines: @code_lines
239
+ ).expand_until_neighbors
240
+
241
+ register(block)
242
+ block
243
+ end
244
+
245
+ def expand?
246
+ return false if @frontier.empty?
247
+ return true if @indent_hash.empty?
248
+
249
+ @frontier.last.current_indent >= @indent_hash.keys.sort.last
250
+ end
251
+
252
+ # This method is responsible for determining if a new code
253
+ # block should be generated instead of evaluating an already
254
+ # existing block in the frontier
255
+ def generate_new_block?
256
+ return false if @indent_hash.empty?
257
+ return true if @frontier.empty?
258
+
259
+ @frontier.last.current_indent <= @indent_hash.keys.sort.last
260
+ end
261
+
262
+ def register(block)
263
+ block.lines.each do |line|
264
+ @indent_hash[line.indent]&.delete(line)
265
+ end
266
+ @indent_hash.select! {|k, v| !v.empty?}
267
+ self
268
+ end
269
+
270
+ # Add a block to the frontier
271
+ #
272
+ # This method ensures the frontier always remains sorted (in indentation order)
273
+ # and that each code block's lines are removed from the indentation hash so we
274
+ # don't re-evaluate the same line multiple times.
275
+ def <<(block)
276
+ register(block)
277
+
278
+ @frontier << block
279
+ @frontier.sort!
280
+
281
+ self
282
+ end
283
+
284
+ def any?
285
+ !empty?
286
+ end
287
+
288
+ def empty?
289
+ @frontier.empty? && @indent_hash.empty?
290
+ end
291
+
292
+ # Example:
293
+ #
294
+ # combination([:a, :b, :c, :d])
295
+ # # => [[:a], [:b], [:c], [:d], [:a, :b], [:a, :c], [:a, :d], [:b, :c], [:b, :d], [:c, :d], [:a, :b, :c], [:a, :b, :d], [:a, :c, :d], [:b, :c, :d], [:a, :b, :c, :d]]
296
+ def self.combination(array)
297
+ guesses = []
298
+ 1.upto(array.length).each do |size|
299
+ guesses.concat(array.combination(size).to_a)
300
+ end
301
+ guesses
302
+ end
303
+
304
+ # Given that we know our syntax error exists somewhere in our frontier, we want to find
305
+ # the smallest possible set of blocks that contain all the syntax errors
306
+ def detect_invalid_blocks
307
+ self.class.combination(@frontier).detect do |block_array|
308
+ holds_all_syntax_errors?(block_array)
309
+ end || []
310
+ end
311
+ end
312
+ end