syntax_search 0.1.2 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/CHANGELOG.md +2 -11
- data/Gemfile +1 -0
- data/Gemfile.lock +5 -5
- data/README.md +28 -15
- data/assets/syntax_search.gif +0 -0
- data/exe/syntax_search +5 -70
- data/lib/syntax_search/auto.rb +5 -49
- data/syntax_search.gemspec +2 -4
- metadata +6 -13
- data/lib/syntax_search.rb +0 -156
- data/lib/syntax_search/code_block.rb +0 -219
- data/lib/syntax_search/code_frontier.rb +0 -312
- data/lib/syntax_search/code_line.rb +0 -87
- data/lib/syntax_search/code_search.rb +0 -114
- data/lib/syntax_search/display_invalid_blocks.rb +0 -110
- data/lib/syntax_search/fyi.rb +0 -7
- data/lib/syntax_search/version.rb +0 -5
@@ -1,219 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module SyntaxErrorSearch
|
4
|
-
# Multiple lines form a singular CodeBlock
|
5
|
-
#
|
6
|
-
# Source code is made of multiple CodeBlocks. A code block
|
7
|
-
# has a reference to the source code that created itself, this allows
|
8
|
-
# a code block to "expand" when needed
|
9
|
-
#
|
10
|
-
# The most important ability of a CodeBlock is this ability to expand:
|
11
|
-
#
|
12
|
-
# Example:
|
13
|
-
#
|
14
|
-
# code_block.to_s # =>
|
15
|
-
# # def foo
|
16
|
-
# # puts "foo"
|
17
|
-
# # end
|
18
|
-
#
|
19
|
-
# code_block.expand_until_next_boundry
|
20
|
-
#
|
21
|
-
# code_block.to_s # =>
|
22
|
-
# # class Foo
|
23
|
-
# # def foo
|
24
|
-
# # puts "foo"
|
25
|
-
# # end
|
26
|
-
# # end
|
27
|
-
#
|
28
|
-
class CodeBlock
|
29
|
-
attr_reader :lines
|
30
|
-
|
31
|
-
def initialize(code_lines: nil, lines: [])
|
32
|
-
@lines = Array(lines)
|
33
|
-
@code_lines = code_lines
|
34
|
-
end
|
35
|
-
|
36
|
-
def is_end?
|
37
|
-
to_s.strip == "end"
|
38
|
-
end
|
39
|
-
|
40
|
-
def starts_at
|
41
|
-
@lines.first&.line_number
|
42
|
-
end
|
43
|
-
|
44
|
-
def code_lines
|
45
|
-
@code_lines
|
46
|
-
end
|
47
|
-
|
48
|
-
# This is used for frontier ordering, we are searching from
|
49
|
-
# the largest indentation to the smallest. This allows us to
|
50
|
-
# populate an array with multiple code blocks then call `sort!`
|
51
|
-
# on it without having to specify the sorting criteria
|
52
|
-
def <=>(other)
|
53
|
-
self.current_indent <=> other.current_indent
|
54
|
-
end
|
55
|
-
|
56
|
-
# Only the lines that are not empty and visible
|
57
|
-
def visible_lines
|
58
|
-
@lines
|
59
|
-
.select(&:not_empty?)
|
60
|
-
.select(&:visible?)
|
61
|
-
end
|
62
|
-
|
63
|
-
# This method is used to expand a code block to capture it's calling context
|
64
|
-
def expand_until_next_boundry
|
65
|
-
expand_to_indent(next_indent)
|
66
|
-
self
|
67
|
-
end
|
68
|
-
|
69
|
-
# This method expands the given code block until it captures
|
70
|
-
# its nearest neighbors. This is used to expand a single line of code
|
71
|
-
# to its smallest likely block.
|
72
|
-
#
|
73
|
-
# code_block.to_s # =>
|
74
|
-
# # puts "foo"
|
75
|
-
# code_block.expand_until_neighbors
|
76
|
-
#
|
77
|
-
# code_block.to_s # =>
|
78
|
-
# # puts "foo"
|
79
|
-
# # puts "bar"
|
80
|
-
# # puts "baz"
|
81
|
-
#
|
82
|
-
def expand_until_neighbors
|
83
|
-
expand_to_indent(current_indent)
|
84
|
-
|
85
|
-
expand_hidden_parner_line if self.to_s.strip == "end"
|
86
|
-
self
|
87
|
-
end
|
88
|
-
|
89
|
-
def expand_hidden_parner_line
|
90
|
-
index = @lines.first.index
|
91
|
-
indent = current_indent
|
92
|
-
partner_line = code_lines.select {|line| line.index < index && line.indent == indent }.last
|
93
|
-
|
94
|
-
if partner_line&.hidden?
|
95
|
-
partner_line.mark_visible
|
96
|
-
@lines.prepend(partner_line)
|
97
|
-
end
|
98
|
-
end
|
99
|
-
|
100
|
-
# This method expands the existing code block up (before)
|
101
|
-
# and down (after). It will break on change in indentation
|
102
|
-
# and empty lines.
|
103
|
-
#
|
104
|
-
# code_block.to_s # =>
|
105
|
-
# # def foo
|
106
|
-
# # puts "foo"
|
107
|
-
# # end
|
108
|
-
#
|
109
|
-
# code_block.expand_to_indent(0)
|
110
|
-
# code_block.to_s # =>
|
111
|
-
# # class Foo
|
112
|
-
# # def foo
|
113
|
-
# # puts "foo"
|
114
|
-
# # end
|
115
|
-
# # end
|
116
|
-
#
|
117
|
-
private def expand_to_indent(indent)
|
118
|
-
array = []
|
119
|
-
before_lines(skip_empty: false).each do |line|
|
120
|
-
if line.empty?
|
121
|
-
array.prepend(line)
|
122
|
-
break
|
123
|
-
end
|
124
|
-
|
125
|
-
if line.indent == indent
|
126
|
-
array.prepend(line)
|
127
|
-
else
|
128
|
-
break
|
129
|
-
end
|
130
|
-
end
|
131
|
-
|
132
|
-
array << @lines
|
133
|
-
|
134
|
-
after_lines(skip_empty: false).each do |line|
|
135
|
-
if line.empty?
|
136
|
-
array << line
|
137
|
-
break
|
138
|
-
end
|
139
|
-
|
140
|
-
if line.indent == indent
|
141
|
-
array << line
|
142
|
-
else
|
143
|
-
break
|
144
|
-
end
|
145
|
-
end
|
146
|
-
|
147
|
-
@lines = array.flatten
|
148
|
-
end
|
149
|
-
|
150
|
-
def next_indent
|
151
|
-
[
|
152
|
-
before_line&.indent || 0,
|
153
|
-
after_line&.indent || 0
|
154
|
-
].max
|
155
|
-
end
|
156
|
-
|
157
|
-
def current_indent
|
158
|
-
lines.detect(&:not_empty?)&.indent || 0
|
159
|
-
end
|
160
|
-
|
161
|
-
def before_line
|
162
|
-
before_lines.first
|
163
|
-
end
|
164
|
-
|
165
|
-
def after_line
|
166
|
-
after_lines.first
|
167
|
-
end
|
168
|
-
|
169
|
-
def before_lines(skip_empty: true)
|
170
|
-
index = @lines.first.index
|
171
|
-
lines = code_lines.select {|line| line.index < index }
|
172
|
-
lines.select!(&:not_empty?) if skip_empty
|
173
|
-
lines.select!(&:visible?)
|
174
|
-
lines.reverse!
|
175
|
-
|
176
|
-
lines
|
177
|
-
end
|
178
|
-
|
179
|
-
def after_lines(skip_empty: true)
|
180
|
-
index = @lines.last.index
|
181
|
-
lines = code_lines.select {|line| line.index > index }
|
182
|
-
lines.select!(&:not_empty?) if skip_empty
|
183
|
-
lines.select!(&:visible?)
|
184
|
-
lines
|
185
|
-
end
|
186
|
-
|
187
|
-
# Returns a code block of the source that does not include
|
188
|
-
# the current lines. This is useful for checking if a source
|
189
|
-
# with the given lines removed parses successfully. If so
|
190
|
-
#
|
191
|
-
# Then it's proof that the current block is invalid
|
192
|
-
def block_without
|
193
|
-
@block_without ||= CodeBlock.new(
|
194
|
-
source: @source,
|
195
|
-
lines: @source.code_lines - @lines
|
196
|
-
)
|
197
|
-
end
|
198
|
-
|
199
|
-
def document_valid_without?
|
200
|
-
block_without.valid?
|
201
|
-
end
|
202
|
-
|
203
|
-
def valid_without?
|
204
|
-
block_without.valid?
|
205
|
-
end
|
206
|
-
|
207
|
-
def invalid?
|
208
|
-
!valid?
|
209
|
-
end
|
210
|
-
|
211
|
-
def valid?
|
212
|
-
SyntaxErrorSearch.valid?(self.to_s)
|
213
|
-
end
|
214
|
-
|
215
|
-
def to_s
|
216
|
-
@lines.join
|
217
|
-
end
|
218
|
-
end
|
219
|
-
end
|
@@ -1,312 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module SyntaxErrorSearch
|
4
|
-
# This class is responsible for generating, storing, and sorting code blocks
|
5
|
-
#
|
6
|
-
# The search algorithm for finding our syntax errors isn't in this class, but
|
7
|
-
# this is class holds the bulk of the logic for generating, storing, detecting
|
8
|
-
# and filtering invalid code.
|
9
|
-
#
|
10
|
-
# This is loosely based on the idea of a "frontier" for searching for a path
|
11
|
-
# example: https://en.wikipedia.org/wiki/Dijkstra%27s_algorithm
|
12
|
-
#
|
13
|
-
# In this case our path is going from code with a syntax error to code without a
|
14
|
-
# syntax error. We're currently doing that by evaluating individual lines
|
15
|
-
# with respect to indentation and other whitespace (empty lines). As represented
|
16
|
-
# by individual "code blocks".
|
17
|
-
#
|
18
|
-
# This class does not just store the frontier that we're searching, but is responsible
|
19
|
-
# for generating new code blocks as well. This is not ideal, but the state of generating
|
20
|
-
# and evaluating paths i.e. codeblocks is very tightly coupled.
|
21
|
-
#
|
22
|
-
# ## Creation
|
23
|
-
#
|
24
|
-
# This example code is re-used in the other sections
|
25
|
-
#
|
26
|
-
# Example:
|
27
|
-
#
|
28
|
-
# code_lines = [
|
29
|
-
# CodeLine.new(line: "def cinco\n", index: 0)
|
30
|
-
# CodeLine.new(line: " def dog\n", index: 1) # Syntax error 1
|
31
|
-
# CodeLine.new(line: " def cat\n", index: 2) # Syntax error 2
|
32
|
-
# CodeLine.new(line: "end\n", index: 3)
|
33
|
-
# ]
|
34
|
-
#
|
35
|
-
# frontier = CodeFrontier.new(code_lines: code_lines)
|
36
|
-
#
|
37
|
-
# frontier << frontier.next_block if frontier.next_block?
|
38
|
-
# frontier << frontier.next_block if frontier.next_block?
|
39
|
-
#
|
40
|
-
# frontier.holds_all_syntax_errors? # => true
|
41
|
-
# block = frontier.pop
|
42
|
-
# frontier.holds_all_syntax_errors? # => false
|
43
|
-
# frontier << block
|
44
|
-
# frontier.holds_all_syntax_errors? # => true
|
45
|
-
#
|
46
|
-
# frontier.detect_invalid_blocks.map(&:to_s) # =>
|
47
|
-
# [
|
48
|
-
# "def dog\n",
|
49
|
-
# "def cat\n"
|
50
|
-
# ]
|
51
|
-
#
|
52
|
-
# ## Block Generation
|
53
|
-
#
|
54
|
-
# Currently code blocks are generated based off of indentation. With the idea that blocks are,
|
55
|
-
# well, indented. Once a code block is added to the frontier or it is expanded, or it is generated
|
56
|
-
# then we also need to remove those lines from our generation code so we don't generate the same block
|
57
|
-
# twice by accident.
|
58
|
-
#
|
59
|
-
# This is block generation is currently done via the "indent_hash" internally by starting at the outer
|
60
|
-
# most indentation.
|
61
|
-
#
|
62
|
-
# Example:
|
63
|
-
#
|
64
|
-
# ```
|
65
|
-
# def river
|
66
|
-
# puts "lol" # <=== Start looking here and expand outwards
|
67
|
-
# end
|
68
|
-
# ```
|
69
|
-
#
|
70
|
-
# Generating new code blocks is a little verbose but looks like this:
|
71
|
-
#
|
72
|
-
# frontier << frontier.next_block if frontier.next_block?
|
73
|
-
#
|
74
|
-
# Once a block is in the frontier, it can be popped off:
|
75
|
-
#
|
76
|
-
# frontier.pop
|
77
|
-
# # => <# CodeBlock >
|
78
|
-
#
|
79
|
-
# ## Block (frontier) storage, ordering and retrieval
|
80
|
-
#
|
81
|
-
# Once a block is generated it is stored internally in a frontier array. This is very similar to a search algorithm.
|
82
|
-
# The array is sorted by indentation order, so that when a block is popped off the array, the one with
|
83
|
-
# the largest current indentation is evaluated first.
|
84
|
-
#
|
85
|
-
# For example, if we have these two blocks in the frontier:
|
86
|
-
#
|
87
|
-
# ```
|
88
|
-
# # Block A - 0 spaces for indentation
|
89
|
-
#
|
90
|
-
# def cinco
|
91
|
-
# puts "lol"
|
92
|
-
# end
|
93
|
-
# ```
|
94
|
-
#
|
95
|
-
# ```
|
96
|
-
# # Block B - 2 spaces for indentation
|
97
|
-
#
|
98
|
-
# def river
|
99
|
-
# puts "hehe"
|
100
|
-
# end
|
101
|
-
# ```
|
102
|
-
#
|
103
|
-
# The "Block B" has more current indentation, so it would be evaluated first.
|
104
|
-
#
|
105
|
-
# ## Frontier evaluation (Find the syntax error)
|
106
|
-
#
|
107
|
-
# Another key difference between this and a normal search "frontier" is that we're not checking if
|
108
|
-
# an individual code block meets the goal (turning invalid code to valid code) since there can
|
109
|
-
# be multiple syntax errors and this will require multiple code blocks. To handle this, we're
|
110
|
-
# evaluating all the contents of the frontier at the same time to see if the solution exists in any
|
111
|
-
# of our search blocks.
|
112
|
-
#
|
113
|
-
# # Using the previously generated frontier
|
114
|
-
#
|
115
|
-
# frontier << Block.new(lines: code_lines[1], code_lines: code_lines)
|
116
|
-
# frontier.holds_all_syntax_errors? # => false
|
117
|
-
#
|
118
|
-
# frontier << Block.new(lines: code_lines[2], code_lines: code_lines)
|
119
|
-
# frontier.holds_all_syntax_errors? # => true
|
120
|
-
#
|
121
|
-
# ## Detect invalid blocks (Filter for smallest solution)
|
122
|
-
#
|
123
|
-
# After we prove that a solution exists and we've found it to be in our frontier, we can start stop searching.
|
124
|
-
# Once we've done this, we need to search through the existing frontier code blocks to find the minimum combination
|
125
|
-
# of blocks that hold the solution. This is done in: `detect_invalid_blocks`.
|
126
|
-
#
|
127
|
-
# # Using the previously generated frontier
|
128
|
-
#
|
129
|
-
# frontier << CodeBlock.new(lines: code_lines[0], code_lines: code_lines)
|
130
|
-
# frontier << CodeBlock.new(lines: code_lines[1], code_lines: code_lines)
|
131
|
-
# frontier << CodeBlock.new(lines: code_lines[2], code_lines: code_lines)
|
132
|
-
# frontier << CodeBlock.new(lines: code_lines[3], code_lines: code_lines)
|
133
|
-
#
|
134
|
-
# frontier.count # => 4
|
135
|
-
# frontier.detect_invalid_blocks.length => 2
|
136
|
-
# frontier.detect_invalid_blocks.map(&:to_s) # =>
|
137
|
-
# [
|
138
|
-
# "def dog\n",
|
139
|
-
# "def cat\n"
|
140
|
-
# ]
|
141
|
-
#
|
142
|
-
# Once invalid blocks are found and filtered, then they can be passed to a formatter.
|
143
|
-
#
|
144
|
-
#
|
145
|
-
#
|
146
|
-
|
147
|
-
class IndentScan
|
148
|
-
attr_reader :code_lines
|
149
|
-
|
150
|
-
def initialize(code_lines: )
|
151
|
-
@code_lines = code_lines
|
152
|
-
end
|
153
|
-
|
154
|
-
def neighbors_from_top(top_line)
|
155
|
-
code_lines
|
156
|
-
.select {|l| l.index >= top_line.index }
|
157
|
-
.select {|l| l.not_empty? }
|
158
|
-
.select {|l| l.visible? }
|
159
|
-
.take_while {|l| l.indent >= top_line.indent }
|
160
|
-
end
|
161
|
-
|
162
|
-
def each_neighbor_block(top_line)
|
163
|
-
neighbors = neighbors_from_top(top_line)
|
164
|
-
|
165
|
-
until neighbors.empty?
|
166
|
-
lines = [neighbors.pop]
|
167
|
-
while (block = CodeBlock.new(lines: lines, code_lines: code_lines)) && block.invalid? && neighbors.any?
|
168
|
-
lines.prepend neighbors.pop
|
169
|
-
end
|
170
|
-
|
171
|
-
yield block if block
|
172
|
-
end
|
173
|
-
end
|
174
|
-
end
|
175
|
-
|
176
|
-
class CodeFrontier
|
177
|
-
def initialize(code_lines: )
|
178
|
-
@code_lines = code_lines
|
179
|
-
@frontier = []
|
180
|
-
@indent_hash = {}
|
181
|
-
code_lines.each do |line|
|
182
|
-
next if line.empty?
|
183
|
-
|
184
|
-
@indent_hash[line.indent] ||= []
|
185
|
-
@indent_hash[line.indent] << line
|
186
|
-
end
|
187
|
-
end
|
188
|
-
|
189
|
-
def count
|
190
|
-
@frontier.count
|
191
|
-
end
|
192
|
-
|
193
|
-
# Returns true if the document is valid with all lines
|
194
|
-
# removed. By default it checks all blocks in present in
|
195
|
-
# the frontier array, but can be used for arbitrary arrays
|
196
|
-
# of codeblocks as well
|
197
|
-
def holds_all_syntax_errors?(block_array = @frontier)
|
198
|
-
without_lines = block_array.map do |block|
|
199
|
-
block.lines
|
200
|
-
end
|
201
|
-
|
202
|
-
SyntaxErrorSearch.valid_without?(
|
203
|
-
without_lines: without_lines,
|
204
|
-
code_lines: @code_lines
|
205
|
-
)
|
206
|
-
end
|
207
|
-
|
208
|
-
# Returns a code block with the largest indentation possible
|
209
|
-
def pop
|
210
|
-
return nil if empty?
|
211
|
-
|
212
|
-
return @frontier.pop
|
213
|
-
end
|
214
|
-
|
215
|
-
def next_block?
|
216
|
-
!@indent_hash.empty?
|
217
|
-
end
|
218
|
-
|
219
|
-
|
220
|
-
def indent_hash_indent
|
221
|
-
@indent_hash.keys.sort.last
|
222
|
-
end
|
223
|
-
|
224
|
-
def next_indent_line
|
225
|
-
indent = @indent_hash.keys.sort.last
|
226
|
-
@indent_hash[indent]&.first
|
227
|
-
end
|
228
|
-
|
229
|
-
def generate_blocks
|
230
|
-
end
|
231
|
-
|
232
|
-
def next_block
|
233
|
-
indent = @indent_hash.keys.sort.last
|
234
|
-
lines = @indent_hash[indent].first
|
235
|
-
|
236
|
-
block = CodeBlock.new(
|
237
|
-
lines: lines,
|
238
|
-
code_lines: @code_lines
|
239
|
-
).expand_until_neighbors
|
240
|
-
|
241
|
-
register(block)
|
242
|
-
block
|
243
|
-
end
|
244
|
-
|
245
|
-
def expand?
|
246
|
-
return false if @frontier.empty?
|
247
|
-
return true if @indent_hash.empty?
|
248
|
-
|
249
|
-
@frontier.last.current_indent >= @indent_hash.keys.sort.last
|
250
|
-
end
|
251
|
-
|
252
|
-
# This method is responsible for determining if a new code
|
253
|
-
# block should be generated instead of evaluating an already
|
254
|
-
# existing block in the frontier
|
255
|
-
def generate_new_block?
|
256
|
-
return false if @indent_hash.empty?
|
257
|
-
return true if @frontier.empty?
|
258
|
-
|
259
|
-
@frontier.last.current_indent <= @indent_hash.keys.sort.last
|
260
|
-
end
|
261
|
-
|
262
|
-
def register(block)
|
263
|
-
block.lines.each do |line|
|
264
|
-
@indent_hash[line.indent]&.delete(line)
|
265
|
-
end
|
266
|
-
@indent_hash.select! {|k, v| !v.empty?}
|
267
|
-
self
|
268
|
-
end
|
269
|
-
|
270
|
-
# Add a block to the frontier
|
271
|
-
#
|
272
|
-
# This method ensures the frontier always remains sorted (in indentation order)
|
273
|
-
# and that each code block's lines are removed from the indentation hash so we
|
274
|
-
# don't re-evaluate the same line multiple times.
|
275
|
-
def <<(block)
|
276
|
-
register(block)
|
277
|
-
|
278
|
-
@frontier << block
|
279
|
-
@frontier.sort!
|
280
|
-
|
281
|
-
self
|
282
|
-
end
|
283
|
-
|
284
|
-
def any?
|
285
|
-
!empty?
|
286
|
-
end
|
287
|
-
|
288
|
-
def empty?
|
289
|
-
@frontier.empty? && @indent_hash.empty?
|
290
|
-
end
|
291
|
-
|
292
|
-
# Example:
|
293
|
-
#
|
294
|
-
# combination([:a, :b, :c, :d])
|
295
|
-
# # => [[:a], [:b], [:c], [:d], [:a, :b], [:a, :c], [:a, :d], [:b, :c], [:b, :d], [:c, :d], [:a, :b, :c], [:a, :b, :d], [:a, :c, :d], [:b, :c, :d], [:a, :b, :c, :d]]
|
296
|
-
def self.combination(array)
|
297
|
-
guesses = []
|
298
|
-
1.upto(array.length).each do |size|
|
299
|
-
guesses.concat(array.combination(size).to_a)
|
300
|
-
end
|
301
|
-
guesses
|
302
|
-
end
|
303
|
-
|
304
|
-
# Given that we know our syntax error exists somewhere in our frontier, we want to find
|
305
|
-
# the smallest possible set of blocks that contain all the syntax errors
|
306
|
-
def detect_invalid_blocks
|
307
|
-
self.class.combination(@frontier).detect do |block_array|
|
308
|
-
holds_all_syntax_errors?(block_array)
|
309
|
-
end || []
|
310
|
-
end
|
311
|
-
end
|
312
|
-
end
|