syntax_search 0.1.0 → 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/check_changelog.yml +13 -0
- data/CHANGELOG.md +26 -0
- data/Gemfile +1 -0
- data/Gemfile.lock +3 -5
- data/README.md +39 -20
- data/assets/syntax_search.gif +0 -0
- data/lib/syntax_search.rb +23 -15
- data/lib/syntax_search/around_block_scan.rb +91 -0
- data/lib/syntax_search/block_expand.rb +78 -0
- data/lib/syntax_search/code_block.rb +16 -165
- data/lib/syntax_search/code_frontier.rb +40 -201
- data/lib/syntax_search/code_search.rb +45 -20
- data/lib/syntax_search/display_invalid_blocks.rb +24 -13
- data/lib/syntax_search/heredoc_block_parse.rb +30 -0
- data/lib/syntax_search/parse_blocks_from_indent_line.rb +56 -0
- data/lib/syntax_search/version.rb +1 -1
- data/lib/syntax_search/who_dis_syntax_error.rb +32 -0
- data/syntax_search.gemspec +0 -2
- metadata +12 -18
@@ -1,178 +1,43 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module SyntaxErrorSearch
|
4
|
-
#
|
4
|
+
# The main function of the frontier is to hold the edges of our search and to
|
5
|
+
# evaluate when we can stop searching.
|
5
6
|
#
|
6
|
-
#
|
7
|
-
# this is class holds the bulk of the logic for generating, storing, detecting
|
8
|
-
# and filtering invalid code.
|
7
|
+
# ## Knowing where we've been
|
9
8
|
#
|
10
|
-
#
|
11
|
-
#
|
9
|
+
# Once a code block is generated it is added onto the frontier where it will be
|
10
|
+
# sorted and then the frontier can be filtered. Large blocks that totally contain a
|
11
|
+
# smaller block will cause the smaller block to be evicted.
|
12
12
|
#
|
13
|
-
#
|
14
|
-
#
|
15
|
-
# with respect to indentation and other whitespace (empty lines). As represented
|
16
|
-
# by individual "code blocks".
|
13
|
+
# CodeFrontier#<<
|
14
|
+
# CodeFrontier#pop
|
17
15
|
#
|
18
|
-
#
|
19
|
-
# for generating new code blocks as well. This is not ideal, but the state of generating
|
20
|
-
# and evaluating paths i.e. codeblocks is very tightly coupled.
|
16
|
+
# ## Knowing where we can go
|
21
17
|
#
|
22
|
-
#
|
18
|
+
# Internally it keeps track of an "indent hash" which is exposed via `next_indent_line`
|
19
|
+
# when called this will return a line of code with the most indentation.
|
23
20
|
#
|
24
|
-
# This
|
21
|
+
# This line of code can be used to build a CodeBlock via and then when that code block
|
22
|
+
# is added back to the frontier, then the lines in the code block are removed from the
|
23
|
+
# indent hash so we don't double-create the same block.
|
25
24
|
#
|
26
|
-
#
|
25
|
+
# CodeFrontier#next_indent_line
|
26
|
+
# CodeFrontier#register_indent_block
|
27
27
|
#
|
28
|
-
#
|
29
|
-
# CodeLine.new(line: "def cinco\n", index: 0)
|
30
|
-
# CodeLine.new(line: " def dog\n", index: 1) # Syntax error 1
|
31
|
-
# CodeLine.new(line: " def cat\n", index: 2) # Syntax error 2
|
32
|
-
# CodeLine.new(line: "end\n", index: 3)
|
33
|
-
# ]
|
28
|
+
# ## Knowing when to stop
|
34
29
|
#
|
35
|
-
#
|
30
|
+
# The frontier holds the syntax error when removing all code blocks from the original
|
31
|
+
# source document allows it to be parsed as syntatically valid:
|
36
32
|
#
|
37
|
-
#
|
38
|
-
# frontier << frontier.next_block if frontier.next_block?
|
33
|
+
# CodeFrontier#holds_all_syntax_errors?
|
39
34
|
#
|
40
|
-
#
|
41
|
-
# block = frontier.pop
|
42
|
-
# frontier.holds_all_syntax_errors? # => false
|
43
|
-
# frontier << block
|
44
|
-
# frontier.holds_all_syntax_errors? # => true
|
35
|
+
# ## Filtering false positives
|
45
36
|
#
|
46
|
-
#
|
47
|
-
#
|
48
|
-
# "def dog\n",
|
49
|
-
# "def cat\n"
|
50
|
-
# ]
|
37
|
+
# Once the search is completed, the frontier will have many blocks that do not contain
|
38
|
+
# the syntax error. To filter to the smallest subset that does call:
|
51
39
|
#
|
52
|
-
#
|
53
|
-
#
|
54
|
-
# Currently code blocks are generated based off of indentation. With the idea that blocks are,
|
55
|
-
# well, indented. Once a code block is added to the frontier or it is expanded, or it is generated
|
56
|
-
# then we also need to remove those lines from our generation code so we don't generate the same block
|
57
|
-
# twice by accident.
|
58
|
-
#
|
59
|
-
# This is block generation is currently done via the "indent_hash" internally by starting at the outer
|
60
|
-
# most indentation.
|
61
|
-
#
|
62
|
-
# Example:
|
63
|
-
#
|
64
|
-
# ```
|
65
|
-
# def river
|
66
|
-
# puts "lol" # <=== Start looking here and expand outwards
|
67
|
-
# end
|
68
|
-
# ```
|
69
|
-
#
|
70
|
-
# Generating new code blocks is a little verbose but looks like this:
|
71
|
-
#
|
72
|
-
# frontier << frontier.next_block if frontier.next_block?
|
73
|
-
#
|
74
|
-
# Once a block is in the frontier, it can be popped off:
|
75
|
-
#
|
76
|
-
# frontier.pop
|
77
|
-
# # => <# CodeBlock >
|
78
|
-
#
|
79
|
-
# ## Block (frontier) storage, ordering and retrieval
|
80
|
-
#
|
81
|
-
# Once a block is generated it is stored internally in a frontier array. This is very similar to a search algorithm.
|
82
|
-
# The array is sorted by indentation order, so that when a block is popped off the array, the one with
|
83
|
-
# the largest current indentation is evaluated first.
|
84
|
-
#
|
85
|
-
# For example, if we have these two blocks in the frontier:
|
86
|
-
#
|
87
|
-
# ```
|
88
|
-
# # Block A - 0 spaces for indentation
|
89
|
-
#
|
90
|
-
# def cinco
|
91
|
-
# puts "lol"
|
92
|
-
# end
|
93
|
-
# ```
|
94
|
-
#
|
95
|
-
# ```
|
96
|
-
# # Block B - 2 spaces for indentation
|
97
|
-
#
|
98
|
-
# def river
|
99
|
-
# puts "hehe"
|
100
|
-
# end
|
101
|
-
# ```
|
102
|
-
#
|
103
|
-
# The "Block B" has more current indentation, so it would be evaluated first.
|
104
|
-
#
|
105
|
-
# ## Frontier evaluation (Find the syntax error)
|
106
|
-
#
|
107
|
-
# Another key difference between this and a normal search "frontier" is that we're not checking if
|
108
|
-
# an individual code block meets the goal (turning invalid code to valid code) since there can
|
109
|
-
# be multiple syntax errors and this will require multiple code blocks. To handle this, we're
|
110
|
-
# evaluating all the contents of the frontier at the same time to see if the solution exists in any
|
111
|
-
# of our search blocks.
|
112
|
-
#
|
113
|
-
# # Using the previously generated frontier
|
114
|
-
#
|
115
|
-
# frontier << Block.new(lines: code_lines[1], code_lines: code_lines)
|
116
|
-
# frontier.holds_all_syntax_errors? # => false
|
117
|
-
#
|
118
|
-
# frontier << Block.new(lines: code_lines[2], code_lines: code_lines)
|
119
|
-
# frontier.holds_all_syntax_errors? # => true
|
120
|
-
#
|
121
|
-
# ## Detect invalid blocks (Filter for smallest solution)
|
122
|
-
#
|
123
|
-
# After we prove that a solution exists and we've found it to be in our frontier, we can start stop searching.
|
124
|
-
# Once we've done this, we need to search through the existing frontier code blocks to find the minimum combination
|
125
|
-
# of blocks that hold the solution. This is done in: `detect_invalid_blocks`.
|
126
|
-
#
|
127
|
-
# # Using the previously generated frontier
|
128
|
-
#
|
129
|
-
# frontier << CodeBlock.new(lines: code_lines[0], code_lines: code_lines)
|
130
|
-
# frontier << CodeBlock.new(lines: code_lines[1], code_lines: code_lines)
|
131
|
-
# frontier << CodeBlock.new(lines: code_lines[2], code_lines: code_lines)
|
132
|
-
# frontier << CodeBlock.new(lines: code_lines[3], code_lines: code_lines)
|
133
|
-
#
|
134
|
-
# frontier.count # => 4
|
135
|
-
# frontier.detect_invalid_blocks.length => 2
|
136
|
-
# frontier.detect_invalid_blocks.map(&:to_s) # =>
|
137
|
-
# [
|
138
|
-
# "def dog\n",
|
139
|
-
# "def cat\n"
|
140
|
-
# ]
|
141
|
-
#
|
142
|
-
# Once invalid blocks are found and filtered, then they can be passed to a formatter.
|
143
|
-
#
|
144
|
-
#
|
145
|
-
#
|
146
|
-
|
147
|
-
class IndentScan
|
148
|
-
attr_reader :code_lines
|
149
|
-
|
150
|
-
def initialize(code_lines: )
|
151
|
-
@code_lines = code_lines
|
152
|
-
end
|
153
|
-
|
154
|
-
def neighbors_from_top(top_line)
|
155
|
-
code_lines
|
156
|
-
.select {|l| l.index >= top_line.index }
|
157
|
-
.select {|l| l.not_empty? }
|
158
|
-
.select {|l| l.visible? }
|
159
|
-
.take_while {|l| l.indent >= top_line.indent }
|
160
|
-
end
|
161
|
-
|
162
|
-
def each_neighbor_block(top_line)
|
163
|
-
neighbors = neighbors_from_top(top_line)
|
164
|
-
|
165
|
-
until neighbors.empty?
|
166
|
-
lines = [neighbors.pop]
|
167
|
-
while (block = CodeBlock.new(lines: lines, code_lines: code_lines)) && block.invalid? && neighbors.any?
|
168
|
-
lines.prepend neighbors.pop
|
169
|
-
end
|
170
|
-
|
171
|
-
yield block if block
|
172
|
-
end
|
173
|
-
end
|
174
|
-
end
|
175
|
-
|
40
|
+
# CodeFrontier#detect_invalid_blocks
|
176
41
|
class CodeFrontier
|
177
42
|
def initialize(code_lines: )
|
178
43
|
@code_lines = code_lines
|
@@ -207,16 +72,9 @@ module SyntaxErrorSearch
|
|
207
72
|
|
208
73
|
# Returns a code block with the largest indentation possible
|
209
74
|
def pop
|
210
|
-
return nil if empty?
|
211
|
-
|
212
75
|
return @frontier.pop
|
213
76
|
end
|
214
77
|
|
215
|
-
def next_block?
|
216
|
-
!@indent_hash.empty?
|
217
|
-
end
|
218
|
-
|
219
|
-
|
220
78
|
def indent_hash_indent
|
221
79
|
@indent_hash.keys.sort.last
|
222
80
|
end
|
@@ -226,40 +84,25 @@ module SyntaxErrorSearch
|
|
226
84
|
@indent_hash[indent]&.first
|
227
85
|
end
|
228
86
|
|
229
|
-
def generate_blocks
|
230
|
-
end
|
231
|
-
|
232
|
-
def next_block
|
233
|
-
indent = @indent_hash.keys.sort.last
|
234
|
-
lines = @indent_hash[indent].first
|
235
|
-
|
236
|
-
block = CodeBlock.new(
|
237
|
-
lines: lines,
|
238
|
-
code_lines: @code_lines
|
239
|
-
).expand_until_neighbors
|
240
|
-
|
241
|
-
register(block)
|
242
|
-
block
|
243
|
-
end
|
244
|
-
|
245
87
|
def expand?
|
246
88
|
return false if @frontier.empty?
|
247
89
|
return true if @indent_hash.empty?
|
248
90
|
|
249
|
-
@frontier.last.current_indent
|
250
|
-
|
91
|
+
frontier_indent = @frontier.last.current_indent
|
92
|
+
hash_indent = @indent_hash.keys.sort.last
|
251
93
|
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
94
|
+
if ENV["DEBUG"]
|
95
|
+
puts "```"
|
96
|
+
puts @frontier.last.to_s
|
97
|
+
puts "```"
|
98
|
+
puts " @frontier indent: #{frontier_indent}"
|
99
|
+
puts " @hash indent: #{hash_indent}"
|
100
|
+
end
|
258
101
|
|
259
|
-
|
102
|
+
frontier_indent >= hash_indent
|
260
103
|
end
|
261
104
|
|
262
|
-
def
|
105
|
+
def register_indent_block(block)
|
263
106
|
block.lines.each do |line|
|
264
107
|
@indent_hash[line.indent]&.delete(line)
|
265
108
|
end
|
@@ -273,22 +116,18 @@ module SyntaxErrorSearch
|
|
273
116
|
# and that each code block's lines are removed from the indentation hash so we
|
274
117
|
# don't re-evaluate the same line multiple times.
|
275
118
|
def <<(block)
|
276
|
-
|
119
|
+
register_indent_block(block)
|
277
120
|
|
121
|
+
# Make sure we don't double expand, if a code block fully engulfs another code block, keep the bigger one
|
122
|
+
@frontier.reject! {|b|
|
123
|
+
b.starts_at >= block.starts_at && b.ends_at <= block.ends_at
|
124
|
+
}
|
278
125
|
@frontier << block
|
279
126
|
@frontier.sort!
|
280
127
|
|
281
128
|
self
|
282
129
|
end
|
283
130
|
|
284
|
-
def any?
|
285
|
-
!empty?
|
286
|
-
end
|
287
|
-
|
288
|
-
def empty?
|
289
|
-
@frontier.empty? && @indent_hash.empty?
|
290
|
-
end
|
291
|
-
|
292
131
|
# Example:
|
293
132
|
#
|
294
133
|
# combination([:a, :b, :c, :d])
|
@@ -3,15 +3,16 @@
|
|
3
3
|
module SyntaxErrorSearch
|
4
4
|
# Searches code for a syntax error
|
5
5
|
#
|
6
|
-
# The bulk of the heavy lifting is done
|
6
|
+
# The bulk of the heavy lifting is done in:
|
7
7
|
#
|
8
|
-
#
|
8
|
+
# - CodeFrontier (Holds information for generating blocks and determining if we can stop searching)
|
9
|
+
# - ParseBlocksFromLine (Creates blocks into the frontier)
|
10
|
+
# - BlockExpand (Expands existing blocks to search more code
|
9
11
|
#
|
10
12
|
# ## Syntax error detection
|
11
13
|
#
|
12
14
|
# When the frontier holds the syntax error, we can stop searching
|
13
15
|
#
|
14
|
-
#
|
15
16
|
# search = CodeSearch.new(<<~EOM)
|
16
17
|
# def dog
|
17
18
|
# def lol
|
@@ -23,42 +24,51 @@ module SyntaxErrorSearch
|
|
23
24
|
# search.invalid_blocks.map(&:to_s) # =>
|
24
25
|
# # => ["def lol\n"]
|
25
26
|
#
|
26
|
-
#
|
27
27
|
class CodeSearch
|
28
28
|
private; attr_reader :frontier; public
|
29
29
|
public; attr_reader :invalid_blocks, :record_dir, :code_lines
|
30
30
|
|
31
|
-
def initialize(
|
31
|
+
def initialize(source, record_dir: ENV["SYNTAX_SEARCH_RECORD_DIR"])
|
32
|
+
@source = source
|
32
33
|
if record_dir
|
33
34
|
@time = Time.now.strftime('%Y-%m-%d-%H-%M-%s-%N')
|
34
35
|
@record_dir = Pathname(record_dir).join(@time).tap {|p| p.mkpath }
|
35
36
|
@write_count = 0
|
36
37
|
end
|
37
|
-
@code_lines =
|
38
|
+
@code_lines = source.lines.map.with_index do |line, i|
|
38
39
|
CodeLine.new(line: line, index: i)
|
39
40
|
end
|
40
41
|
@frontier = CodeFrontier.new(code_lines: @code_lines)
|
41
42
|
@invalid_blocks = []
|
42
43
|
@name_tick = Hash.new {|hash, k| hash[k] = 0 }
|
43
44
|
@tick = 0
|
44
|
-
@
|
45
|
+
@block_expand = BlockExpand.new(code_lines: code_lines)
|
46
|
+
@parse_blocks_from_indent_line = ParseBlocksFromIndentLine.new(code_lines: @code_lines)
|
45
47
|
end
|
46
48
|
|
49
|
+
# Used for debugging
|
47
50
|
def record(block:, name: "record")
|
48
51
|
return if !@record_dir
|
49
52
|
@name_tick[name] += 1
|
50
53
|
filename = "#{@write_count += 1}-#{name}-#{@name_tick[name]}.txt"
|
54
|
+
if ENV["DEBUG"]
|
55
|
+
puts "\n\n==== #{filename} ===="
|
56
|
+
puts "\n```#{block.starts_at}:#{block.ends_at}"
|
57
|
+
puts "#{block.to_s}"
|
58
|
+
puts "```"
|
59
|
+
puts " block indent: #{block.current_indent}"
|
60
|
+
end
|
51
61
|
@record_dir.join(filename).open(mode: "a") do |f|
|
52
62
|
display = DisplayInvalidBlocks.new(
|
53
63
|
blocks: block,
|
54
|
-
terminal: false
|
64
|
+
terminal: false,
|
65
|
+
code_lines: @code_lines,
|
55
66
|
)
|
56
67
|
f.write(display.indent display.code_with_lines)
|
57
68
|
end
|
58
69
|
end
|
59
70
|
|
60
|
-
def
|
61
|
-
frontier.register(block)
|
71
|
+
def push(block, name: )
|
62
72
|
record(block: block, name: name)
|
63
73
|
|
64
74
|
if block.valid?
|
@@ -69,33 +79,48 @@ module SyntaxErrorSearch
|
|
69
79
|
end
|
70
80
|
end
|
71
81
|
|
82
|
+
# Parses the most indented lines into blocks that are marked
|
83
|
+
# and added to the frontier
|
72
84
|
def add_invalid_blocks
|
73
85
|
max_indent = frontier.next_indent_line&.indent
|
74
86
|
|
75
87
|
while (line = frontier.next_indent_line) && (line.indent == max_indent)
|
76
|
-
neighbors = @scan.neighbors_from_top(frontier.next_indent_line)
|
77
88
|
|
78
|
-
@
|
89
|
+
@parse_blocks_from_indent_line.each_neighbor_block(frontier.next_indent_line) do |block|
|
79
90
|
record(block: block, name: "add")
|
80
|
-
if block.valid?
|
81
|
-
block.lines.each(&:mark_invisible)
|
82
|
-
end
|
83
|
-
end
|
84
91
|
|
85
|
-
|
86
|
-
|
92
|
+
block.mark_invisible if block.valid?
|
93
|
+
push(block, name: "add")
|
94
|
+
end
|
87
95
|
end
|
88
96
|
end
|
89
97
|
|
98
|
+
# Given an already existing block in the frontier, expand it to see
|
99
|
+
# if it contains our invalid syntax
|
90
100
|
def expand_invalid_block
|
91
101
|
block = frontier.pop
|
92
102
|
return unless block
|
93
103
|
|
94
|
-
block
|
95
|
-
|
104
|
+
record(block: block, name: "pop")
|
105
|
+
|
106
|
+
# block = block.expand_until_next_boundry
|
107
|
+
block = @block_expand.call(block)
|
108
|
+
push(block, name: "expand")
|
109
|
+
end
|
110
|
+
|
111
|
+
|
112
|
+
def sweep_heredocs
|
113
|
+
HeredocBlockParse.new(
|
114
|
+
source: @source,
|
115
|
+
code_lines: @code_lines
|
116
|
+
).call.each do |block|
|
117
|
+
push(block, name: "heredoc")
|
118
|
+
end
|
96
119
|
end
|
97
120
|
|
121
|
+
# Main search loop
|
98
122
|
def call
|
123
|
+
sweep_heredocs
|
99
124
|
until frontier.holds_all_syntax_errors?
|
100
125
|
@tick += 1
|
101
126
|
|
@@ -5,21 +5,22 @@ module SyntaxErrorSearch
|
|
5
5
|
class DisplayInvalidBlocks
|
6
6
|
attr_reader :filename
|
7
7
|
|
8
|
-
def initialize(blocks:, io: $stderr, filename: nil, terminal: false)
|
8
|
+
def initialize(code_lines: ,blocks:, io: $stderr, filename: nil, terminal: false, invalid_type: :unmatched_end)
|
9
9
|
@terminal = terminal
|
10
10
|
@filename = filename
|
11
11
|
@io = io
|
12
12
|
|
13
13
|
@blocks = Array(blocks)
|
14
14
|
@lines = @blocks.map(&:lines).flatten
|
15
|
-
@code_lines =
|
15
|
+
@code_lines = code_lines
|
16
16
|
@digit_count = @code_lines.last&.line_number.to_s.length
|
17
17
|
|
18
18
|
@invalid_line_hash = @lines.each_with_object({}) {|line, h| h[line] = true }
|
19
|
+
@invalid_type = invalid_type
|
19
20
|
end
|
20
21
|
|
21
22
|
def call
|
22
|
-
if @blocks.any?
|
23
|
+
if @blocks.any? { |b| !b.hidden? }
|
23
24
|
found_invalid_blocks
|
24
25
|
else
|
25
26
|
@io.puts "Syntax OK"
|
@@ -33,15 +34,28 @@ module SyntaxErrorSearch
|
|
33
34
|
end
|
34
35
|
|
35
36
|
private def found_invalid_blocks
|
36
|
-
@
|
37
|
+
case @invalid_type
|
38
|
+
when :missing_end
|
39
|
+
@io.puts <<~EOM
|
37
40
|
|
38
|
-
|
41
|
+
SyntaxSearch: Missing `end` detected
|
39
42
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
+
This code has a missing `end`. Ensure that all
|
44
|
+
syntax keywords (`def`, `do`, etc.) have a matching `end`.
|
45
|
+
|
46
|
+
EOM
|
47
|
+
when :unmatched_end
|
48
|
+
@io.puts <<~EOM
|
49
|
+
|
50
|
+
SyntaxSearch: Unmatched `end` detected
|
51
|
+
|
52
|
+
This code has an unmatched `end`. Ensure that all `end` lines
|
53
|
+
in your code have a matching syntax keyword (`def`, `do`, etc.)
|
54
|
+
and that you don't have any extra `end` lines.
|
55
|
+
|
56
|
+
EOM
|
57
|
+
end
|
43
58
|
|
44
|
-
EOM
|
45
59
|
@io.puts("file: #{filename}") if filename
|
46
60
|
@io.puts <<~EOM
|
47
61
|
simplified:
|
@@ -50,16 +64,13 @@ module SyntaxErrorSearch
|
|
50
64
|
EOM
|
51
65
|
end
|
52
66
|
|
53
|
-
def indent(string, with: "
|
67
|
+
def indent(string, with: " ")
|
54
68
|
string.each_line.map {|l| with + l }.join
|
55
69
|
end
|
56
70
|
|
57
71
|
def code_block
|
58
72
|
string = String.new("")
|
59
|
-
string << "```\n"
|
60
|
-
# string << "#".rjust(@digit_count) + " filename: #{filename}\n\n" if filename
|
61
73
|
string << code_with_lines
|
62
|
-
string << "```\n"
|
63
74
|
string
|
64
75
|
end
|
65
76
|
|