syntax_search 0.1.2 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,219 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module SyntaxErrorSearch
4
- # Multiple lines form a singular CodeBlock
5
- #
6
- # Source code is made of multiple CodeBlocks. A code block
7
- # has a reference to the source code that created itself, this allows
8
- # a code block to "expand" when needed
9
- #
10
- # The most important ability of a CodeBlock is this ability to expand:
11
- #
12
- # Example:
13
- #
14
- # code_block.to_s # =>
15
- # # def foo
16
- # # puts "foo"
17
- # # end
18
- #
19
- # code_block.expand_until_next_boundry
20
- #
21
- # code_block.to_s # =>
22
- # # class Foo
23
- # # def foo
24
- # # puts "foo"
25
- # # end
26
- # # end
27
- #
28
- class CodeBlock
29
- attr_reader :lines
30
-
31
- def initialize(code_lines: nil, lines: [])
32
- @lines = Array(lines)
33
- @code_lines = code_lines
34
- end
35
-
36
- def is_end?
37
- to_s.strip == "end"
38
- end
39
-
40
- def starts_at
41
- @lines.first&.line_number
42
- end
43
-
44
- def code_lines
45
- @code_lines
46
- end
47
-
48
- # This is used for frontier ordering, we are searching from
49
- # the largest indentation to the smallest. This allows us to
50
- # populate an array with multiple code blocks then call `sort!`
51
- # on it without having to specify the sorting criteria
52
- def <=>(other)
53
- self.current_indent <=> other.current_indent
54
- end
55
-
56
- # Only the lines that are not empty and visible
57
- def visible_lines
58
- @lines
59
- .select(&:not_empty?)
60
- .select(&:visible?)
61
- end
62
-
63
- # This method is used to expand a code block to capture it's calling context
64
- def expand_until_next_boundry
65
- expand_to_indent(next_indent)
66
- self
67
- end
68
-
69
- # This method expands the given code block until it captures
70
- # its nearest neighbors. This is used to expand a single line of code
71
- # to its smallest likely block.
72
- #
73
- # code_block.to_s # =>
74
- # # puts "foo"
75
- # code_block.expand_until_neighbors
76
- #
77
- # code_block.to_s # =>
78
- # # puts "foo"
79
- # # puts "bar"
80
- # # puts "baz"
81
- #
82
- def expand_until_neighbors
83
- expand_to_indent(current_indent)
84
-
85
- expand_hidden_parner_line if self.to_s.strip == "end"
86
- self
87
- end
88
-
89
- def expand_hidden_parner_line
90
- index = @lines.first.index
91
- indent = current_indent
92
- partner_line = code_lines.select {|line| line.index < index && line.indent == indent }.last
93
-
94
- if partner_line&.hidden?
95
- partner_line.mark_visible
96
- @lines.prepend(partner_line)
97
- end
98
- end
99
-
100
- # This method expands the existing code block up (before)
101
- # and down (after). It will break on change in indentation
102
- # and empty lines.
103
- #
104
- # code_block.to_s # =>
105
- # # def foo
106
- # # puts "foo"
107
- # # end
108
- #
109
- # code_block.expand_to_indent(0)
110
- # code_block.to_s # =>
111
- # # class Foo
112
- # # def foo
113
- # # puts "foo"
114
- # # end
115
- # # end
116
- #
117
- private def expand_to_indent(indent)
118
- array = []
119
- before_lines(skip_empty: false).each do |line|
120
- if line.empty?
121
- array.prepend(line)
122
- break
123
- end
124
-
125
- if line.indent == indent
126
- array.prepend(line)
127
- else
128
- break
129
- end
130
- end
131
-
132
- array << @lines
133
-
134
- after_lines(skip_empty: false).each do |line|
135
- if line.empty?
136
- array << line
137
- break
138
- end
139
-
140
- if line.indent == indent
141
- array << line
142
- else
143
- break
144
- end
145
- end
146
-
147
- @lines = array.flatten
148
- end
149
-
150
- def next_indent
151
- [
152
- before_line&.indent || 0,
153
- after_line&.indent || 0
154
- ].max
155
- end
156
-
157
- def current_indent
158
- lines.detect(&:not_empty?)&.indent || 0
159
- end
160
-
161
- def before_line
162
- before_lines.first
163
- end
164
-
165
- def after_line
166
- after_lines.first
167
- end
168
-
169
- def before_lines(skip_empty: true)
170
- index = @lines.first.index
171
- lines = code_lines.select {|line| line.index < index }
172
- lines.select!(&:not_empty?) if skip_empty
173
- lines.select!(&:visible?)
174
- lines.reverse!
175
-
176
- lines
177
- end
178
-
179
- def after_lines(skip_empty: true)
180
- index = @lines.last.index
181
- lines = code_lines.select {|line| line.index > index }
182
- lines.select!(&:not_empty?) if skip_empty
183
- lines.select!(&:visible?)
184
- lines
185
- end
186
-
187
- # Returns a code block of the source that does not include
188
- # the current lines. This is useful for checking if a source
189
- # with the given lines removed parses successfully. If so
190
- #
191
- # Then it's proof that the current block is invalid
192
- def block_without
193
- @block_without ||= CodeBlock.new(
194
- source: @source,
195
- lines: @source.code_lines - @lines
196
- )
197
- end
198
-
199
- def document_valid_without?
200
- block_without.valid?
201
- end
202
-
203
- def valid_without?
204
- block_without.valid?
205
- end
206
-
207
- def invalid?
208
- !valid?
209
- end
210
-
211
- def valid?
212
- SyntaxErrorSearch.valid?(self.to_s)
213
- end
214
-
215
- def to_s
216
- @lines.join
217
- end
218
- end
219
- end
@@ -1,312 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module SyntaxErrorSearch
4
- # This class is responsible for generating, storing, and sorting code blocks
5
- #
6
- # The search algorithm for finding our syntax errors isn't in this class, but
7
- # this is class holds the bulk of the logic for generating, storing, detecting
8
- # and filtering invalid code.
9
- #
10
- # This is loosely based on the idea of a "frontier" for searching for a path
11
- # example: https://en.wikipedia.org/wiki/Dijkstra%27s_algorithm
12
- #
13
- # In this case our path is going from code with a syntax error to code without a
14
- # syntax error. We're currently doing that by evaluating individual lines
15
- # with respect to indentation and other whitespace (empty lines). As represented
16
- # by individual "code blocks".
17
- #
18
- # This class does not just store the frontier that we're searching, but is responsible
19
- # for generating new code blocks as well. This is not ideal, but the state of generating
20
- # and evaluating paths i.e. codeblocks is very tightly coupled.
21
- #
22
- # ## Creation
23
- #
24
- # This example code is re-used in the other sections
25
- #
26
- # Example:
27
- #
28
- # code_lines = [
29
- # CodeLine.new(line: "def cinco\n", index: 0)
30
- # CodeLine.new(line: " def dog\n", index: 1) # Syntax error 1
31
- # CodeLine.new(line: " def cat\n", index: 2) # Syntax error 2
32
- # CodeLine.new(line: "end\n", index: 3)
33
- # ]
34
- #
35
- # frontier = CodeFrontier.new(code_lines: code_lines)
36
- #
37
- # frontier << frontier.next_block if frontier.next_block?
38
- # frontier << frontier.next_block if frontier.next_block?
39
- #
40
- # frontier.holds_all_syntax_errors? # => true
41
- # block = frontier.pop
42
- # frontier.holds_all_syntax_errors? # => false
43
- # frontier << block
44
- # frontier.holds_all_syntax_errors? # => true
45
- #
46
- # frontier.detect_invalid_blocks.map(&:to_s) # =>
47
- # [
48
- # "def dog\n",
49
- # "def cat\n"
50
- # ]
51
- #
52
- # ## Block Generation
53
- #
54
- # Currently code blocks are generated based off of indentation. With the idea that blocks are,
55
- # well, indented. Once a code block is added to the frontier or it is expanded, or it is generated
56
- # then we also need to remove those lines from our generation code so we don't generate the same block
57
- # twice by accident.
58
- #
59
- # This is block generation is currently done via the "indent_hash" internally by starting at the outer
60
- # most indentation.
61
- #
62
- # Example:
63
- #
64
- # ```
65
- # def river
66
- # puts "lol" # <=== Start looking here and expand outwards
67
- # end
68
- # ```
69
- #
70
- # Generating new code blocks is a little verbose but looks like this:
71
- #
72
- # frontier << frontier.next_block if frontier.next_block?
73
- #
74
- # Once a block is in the frontier, it can be popped off:
75
- #
76
- # frontier.pop
77
- # # => <# CodeBlock >
78
- #
79
- # ## Block (frontier) storage, ordering and retrieval
80
- #
81
- # Once a block is generated it is stored internally in a frontier array. This is very similar to a search algorithm.
82
- # The array is sorted by indentation order, so that when a block is popped off the array, the one with
83
- # the largest current indentation is evaluated first.
84
- #
85
- # For example, if we have these two blocks in the frontier:
86
- #
87
- # ```
88
- # # Block A - 0 spaces for indentation
89
- #
90
- # def cinco
91
- # puts "lol"
92
- # end
93
- # ```
94
- #
95
- # ```
96
- # # Block B - 2 spaces for indentation
97
- #
98
- # def river
99
- # puts "hehe"
100
- # end
101
- # ```
102
- #
103
- # The "Block B" has more current indentation, so it would be evaluated first.
104
- #
105
- # ## Frontier evaluation (Find the syntax error)
106
- #
107
- # Another key difference between this and a normal search "frontier" is that we're not checking if
108
- # an individual code block meets the goal (turning invalid code to valid code) since there can
109
- # be multiple syntax errors and this will require multiple code blocks. To handle this, we're
110
- # evaluating all the contents of the frontier at the same time to see if the solution exists in any
111
- # of our search blocks.
112
- #
113
- # # Using the previously generated frontier
114
- #
115
- # frontier << Block.new(lines: code_lines[1], code_lines: code_lines)
116
- # frontier.holds_all_syntax_errors? # => false
117
- #
118
- # frontier << Block.new(lines: code_lines[2], code_lines: code_lines)
119
- # frontier.holds_all_syntax_errors? # => true
120
- #
121
- # ## Detect invalid blocks (Filter for smallest solution)
122
- #
123
- # After we prove that a solution exists and we've found it to be in our frontier, we can start stop searching.
124
- # Once we've done this, we need to search through the existing frontier code blocks to find the minimum combination
125
- # of blocks that hold the solution. This is done in: `detect_invalid_blocks`.
126
- #
127
- # # Using the previously generated frontier
128
- #
129
- # frontier << CodeBlock.new(lines: code_lines[0], code_lines: code_lines)
130
- # frontier << CodeBlock.new(lines: code_lines[1], code_lines: code_lines)
131
- # frontier << CodeBlock.new(lines: code_lines[2], code_lines: code_lines)
132
- # frontier << CodeBlock.new(lines: code_lines[3], code_lines: code_lines)
133
- #
134
- # frontier.count # => 4
135
- # frontier.detect_invalid_blocks.length => 2
136
- # frontier.detect_invalid_blocks.map(&:to_s) # =>
137
- # [
138
- # "def dog\n",
139
- # "def cat\n"
140
- # ]
141
- #
142
- # Once invalid blocks are found and filtered, then they can be passed to a formatter.
143
- #
144
- #
145
- #
146
-
147
- class IndentScan
148
- attr_reader :code_lines
149
-
150
- def initialize(code_lines: )
151
- @code_lines = code_lines
152
- end
153
-
154
- def neighbors_from_top(top_line)
155
- code_lines
156
- .select {|l| l.index >= top_line.index }
157
- .select {|l| l.not_empty? }
158
- .select {|l| l.visible? }
159
- .take_while {|l| l.indent >= top_line.indent }
160
- end
161
-
162
- def each_neighbor_block(top_line)
163
- neighbors = neighbors_from_top(top_line)
164
-
165
- until neighbors.empty?
166
- lines = [neighbors.pop]
167
- while (block = CodeBlock.new(lines: lines, code_lines: code_lines)) && block.invalid? && neighbors.any?
168
- lines.prepend neighbors.pop
169
- end
170
-
171
- yield block if block
172
- end
173
- end
174
- end
175
-
176
- class CodeFrontier
177
- def initialize(code_lines: )
178
- @code_lines = code_lines
179
- @frontier = []
180
- @indent_hash = {}
181
- code_lines.each do |line|
182
- next if line.empty?
183
-
184
- @indent_hash[line.indent] ||= []
185
- @indent_hash[line.indent] << line
186
- end
187
- end
188
-
189
- def count
190
- @frontier.count
191
- end
192
-
193
- # Returns true if the document is valid with all lines
194
- # removed. By default it checks all blocks in present in
195
- # the frontier array, but can be used for arbitrary arrays
196
- # of codeblocks as well
197
- def holds_all_syntax_errors?(block_array = @frontier)
198
- without_lines = block_array.map do |block|
199
- block.lines
200
- end
201
-
202
- SyntaxErrorSearch.valid_without?(
203
- without_lines: without_lines,
204
- code_lines: @code_lines
205
- )
206
- end
207
-
208
- # Returns a code block with the largest indentation possible
209
- def pop
210
- return nil if empty?
211
-
212
- return @frontier.pop
213
- end
214
-
215
- def next_block?
216
- !@indent_hash.empty?
217
- end
218
-
219
-
220
- def indent_hash_indent
221
- @indent_hash.keys.sort.last
222
- end
223
-
224
- def next_indent_line
225
- indent = @indent_hash.keys.sort.last
226
- @indent_hash[indent]&.first
227
- end
228
-
229
- def generate_blocks
230
- end
231
-
232
- def next_block
233
- indent = @indent_hash.keys.sort.last
234
- lines = @indent_hash[indent].first
235
-
236
- block = CodeBlock.new(
237
- lines: lines,
238
- code_lines: @code_lines
239
- ).expand_until_neighbors
240
-
241
- register(block)
242
- block
243
- end
244
-
245
- def expand?
246
- return false if @frontier.empty?
247
- return true if @indent_hash.empty?
248
-
249
- @frontier.last.current_indent >= @indent_hash.keys.sort.last
250
- end
251
-
252
- # This method is responsible for determining if a new code
253
- # block should be generated instead of evaluating an already
254
- # existing block in the frontier
255
- def generate_new_block?
256
- return false if @indent_hash.empty?
257
- return true if @frontier.empty?
258
-
259
- @frontier.last.current_indent <= @indent_hash.keys.sort.last
260
- end
261
-
262
- def register(block)
263
- block.lines.each do |line|
264
- @indent_hash[line.indent]&.delete(line)
265
- end
266
- @indent_hash.select! {|k, v| !v.empty?}
267
- self
268
- end
269
-
270
- # Add a block to the frontier
271
- #
272
- # This method ensures the frontier always remains sorted (in indentation order)
273
- # and that each code block's lines are removed from the indentation hash so we
274
- # don't re-evaluate the same line multiple times.
275
- def <<(block)
276
- register(block)
277
-
278
- @frontier << block
279
- @frontier.sort!
280
-
281
- self
282
- end
283
-
284
- def any?
285
- !empty?
286
- end
287
-
288
- def empty?
289
- @frontier.empty? && @indent_hash.empty?
290
- end
291
-
292
- # Example:
293
- #
294
- # combination([:a, :b, :c, :d])
295
- # # => [[:a], [:b], [:c], [:d], [:a, :b], [:a, :c], [:a, :d], [:b, :c], [:b, :d], [:c, :d], [:a, :b, :c], [:a, :b, :d], [:a, :c, :d], [:b, :c, :d], [:a, :b, :c, :d]]
296
- def self.combination(array)
297
- guesses = []
298
- 1.upto(array.length).each do |size|
299
- guesses.concat(array.combination(size).to_a)
300
- end
301
- guesses
302
- end
303
-
304
- # Given that we know our syntax error exists somewhere in our frontier, we want to find
305
- # the smallest possible set of blocks that contain all the syntax errors
306
- def detect_invalid_blocks
307
- self.class.combination(@frontier).detect do |block_array|
308
- holds_all_syntax_errors?(block_array)
309
- end || []
310
- end
311
- end
312
- end