syntax_search 0.1.0 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/check_changelog.yml +13 -0
- data/CHANGELOG.md +26 -0
- data/Gemfile +1 -0
- data/Gemfile.lock +3 -5
- data/README.md +39 -20
- data/assets/syntax_search.gif +0 -0
- data/lib/syntax_search.rb +23 -15
- data/lib/syntax_search/around_block_scan.rb +91 -0
- data/lib/syntax_search/block_expand.rb +78 -0
- data/lib/syntax_search/code_block.rb +16 -165
- data/lib/syntax_search/code_frontier.rb +40 -201
- data/lib/syntax_search/code_search.rb +45 -20
- data/lib/syntax_search/display_invalid_blocks.rb +24 -13
- data/lib/syntax_search/heredoc_block_parse.rb +30 -0
- data/lib/syntax_search/parse_blocks_from_indent_line.rb +56 -0
- data/lib/syntax_search/version.rb +1 -1
- data/lib/syntax_search/who_dis_syntax_error.rb +32 -0
- data/syntax_search.gemspec +0 -2
- metadata +12 -18
@@ -1,178 +1,43 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module SyntaxErrorSearch
|
4
|
-
#
|
4
|
+
# The main function of the frontier is to hold the edges of our search and to
|
5
|
+
# evaluate when we can stop searching.
|
5
6
|
#
|
6
|
-
#
|
7
|
-
# this is class holds the bulk of the logic for generating, storing, detecting
|
8
|
-
# and filtering invalid code.
|
7
|
+
# ## Knowing where we've been
|
9
8
|
#
|
10
|
-
#
|
11
|
-
#
|
9
|
+
# Once a code block is generated it is added onto the frontier where it will be
|
10
|
+
# sorted and then the frontier can be filtered. Large blocks that totally contain a
|
11
|
+
# smaller block will cause the smaller block to be evicted.
|
12
12
|
#
|
13
|
-
#
|
14
|
-
#
|
15
|
-
# with respect to indentation and other whitespace (empty lines). As represented
|
16
|
-
# by individual "code blocks".
|
13
|
+
# CodeFrontier#<<
|
14
|
+
# CodeFrontier#pop
|
17
15
|
#
|
18
|
-
#
|
19
|
-
# for generating new code blocks as well. This is not ideal, but the state of generating
|
20
|
-
# and evaluating paths i.e. codeblocks is very tightly coupled.
|
16
|
+
# ## Knowing where we can go
|
21
17
|
#
|
22
|
-
#
|
18
|
+
# Internally it keeps track of an "indent hash" which is exposed via `next_indent_line`
|
19
|
+
# when called this will return a line of code with the most indentation.
|
23
20
|
#
|
24
|
-
# This
|
21
|
+
# This line of code can be used to build a CodeBlock via and then when that code block
|
22
|
+
# is added back to the frontier, then the lines in the code block are removed from the
|
23
|
+
# indent hash so we don't double-create the same block.
|
25
24
|
#
|
26
|
-
#
|
25
|
+
# CodeFrontier#next_indent_line
|
26
|
+
# CodeFrontier#register_indent_block
|
27
27
|
#
|
28
|
-
#
|
29
|
-
# CodeLine.new(line: "def cinco\n", index: 0)
|
30
|
-
# CodeLine.new(line: " def dog\n", index: 1) # Syntax error 1
|
31
|
-
# CodeLine.new(line: " def cat\n", index: 2) # Syntax error 2
|
32
|
-
# CodeLine.new(line: "end\n", index: 3)
|
33
|
-
# ]
|
28
|
+
# ## Knowing when to stop
|
34
29
|
#
|
35
|
-
#
|
30
|
+
# The frontier holds the syntax error when removing all code blocks from the original
|
31
|
+
# source document allows it to be parsed as syntatically valid:
|
36
32
|
#
|
37
|
-
#
|
38
|
-
# frontier << frontier.next_block if frontier.next_block?
|
33
|
+
# CodeFrontier#holds_all_syntax_errors?
|
39
34
|
#
|
40
|
-
#
|
41
|
-
# block = frontier.pop
|
42
|
-
# frontier.holds_all_syntax_errors? # => false
|
43
|
-
# frontier << block
|
44
|
-
# frontier.holds_all_syntax_errors? # => true
|
35
|
+
# ## Filtering false positives
|
45
36
|
#
|
46
|
-
#
|
47
|
-
#
|
48
|
-
# "def dog\n",
|
49
|
-
# "def cat\n"
|
50
|
-
# ]
|
37
|
+
# Once the search is completed, the frontier will have many blocks that do not contain
|
38
|
+
# the syntax error. To filter to the smallest subset that does call:
|
51
39
|
#
|
52
|
-
#
|
53
|
-
#
|
54
|
-
# Currently code blocks are generated based off of indentation. With the idea that blocks are,
|
55
|
-
# well, indented. Once a code block is added to the frontier or it is expanded, or it is generated
|
56
|
-
# then we also need to remove those lines from our generation code so we don't generate the same block
|
57
|
-
# twice by accident.
|
58
|
-
#
|
59
|
-
# This is block generation is currently done via the "indent_hash" internally by starting at the outer
|
60
|
-
# most indentation.
|
61
|
-
#
|
62
|
-
# Example:
|
63
|
-
#
|
64
|
-
# ```
|
65
|
-
# def river
|
66
|
-
# puts "lol" # <=== Start looking here and expand outwards
|
67
|
-
# end
|
68
|
-
# ```
|
69
|
-
#
|
70
|
-
# Generating new code blocks is a little verbose but looks like this:
|
71
|
-
#
|
72
|
-
# frontier << frontier.next_block if frontier.next_block?
|
73
|
-
#
|
74
|
-
# Once a block is in the frontier, it can be popped off:
|
75
|
-
#
|
76
|
-
# frontier.pop
|
77
|
-
# # => <# CodeBlock >
|
78
|
-
#
|
79
|
-
# ## Block (frontier) storage, ordering and retrieval
|
80
|
-
#
|
81
|
-
# Once a block is generated it is stored internally in a frontier array. This is very similar to a search algorithm.
|
82
|
-
# The array is sorted by indentation order, so that when a block is popped off the array, the one with
|
83
|
-
# the largest current indentation is evaluated first.
|
84
|
-
#
|
85
|
-
# For example, if we have these two blocks in the frontier:
|
86
|
-
#
|
87
|
-
# ```
|
88
|
-
# # Block A - 0 spaces for indentation
|
89
|
-
#
|
90
|
-
# def cinco
|
91
|
-
# puts "lol"
|
92
|
-
# end
|
93
|
-
# ```
|
94
|
-
#
|
95
|
-
# ```
|
96
|
-
# # Block B - 2 spaces for indentation
|
97
|
-
#
|
98
|
-
# def river
|
99
|
-
# puts "hehe"
|
100
|
-
# end
|
101
|
-
# ```
|
102
|
-
#
|
103
|
-
# The "Block B" has more current indentation, so it would be evaluated first.
|
104
|
-
#
|
105
|
-
# ## Frontier evaluation (Find the syntax error)
|
106
|
-
#
|
107
|
-
# Another key difference between this and a normal search "frontier" is that we're not checking if
|
108
|
-
# an individual code block meets the goal (turning invalid code to valid code) since there can
|
109
|
-
# be multiple syntax errors and this will require multiple code blocks. To handle this, we're
|
110
|
-
# evaluating all the contents of the frontier at the same time to see if the solution exists in any
|
111
|
-
# of our search blocks.
|
112
|
-
#
|
113
|
-
# # Using the previously generated frontier
|
114
|
-
#
|
115
|
-
# frontier << Block.new(lines: code_lines[1], code_lines: code_lines)
|
116
|
-
# frontier.holds_all_syntax_errors? # => false
|
117
|
-
#
|
118
|
-
# frontier << Block.new(lines: code_lines[2], code_lines: code_lines)
|
119
|
-
# frontier.holds_all_syntax_errors? # => true
|
120
|
-
#
|
121
|
-
# ## Detect invalid blocks (Filter for smallest solution)
|
122
|
-
#
|
123
|
-
# After we prove that a solution exists and we've found it to be in our frontier, we can start stop searching.
|
124
|
-
# Once we've done this, we need to search through the existing frontier code blocks to find the minimum combination
|
125
|
-
# of blocks that hold the solution. This is done in: `detect_invalid_blocks`.
|
126
|
-
#
|
127
|
-
# # Using the previously generated frontier
|
128
|
-
#
|
129
|
-
# frontier << CodeBlock.new(lines: code_lines[0], code_lines: code_lines)
|
130
|
-
# frontier << CodeBlock.new(lines: code_lines[1], code_lines: code_lines)
|
131
|
-
# frontier << CodeBlock.new(lines: code_lines[2], code_lines: code_lines)
|
132
|
-
# frontier << CodeBlock.new(lines: code_lines[3], code_lines: code_lines)
|
133
|
-
#
|
134
|
-
# frontier.count # => 4
|
135
|
-
# frontier.detect_invalid_blocks.length => 2
|
136
|
-
# frontier.detect_invalid_blocks.map(&:to_s) # =>
|
137
|
-
# [
|
138
|
-
# "def dog\n",
|
139
|
-
# "def cat\n"
|
140
|
-
# ]
|
141
|
-
#
|
142
|
-
# Once invalid blocks are found and filtered, then they can be passed to a formatter.
|
143
|
-
#
|
144
|
-
#
|
145
|
-
#
|
146
|
-
|
147
|
-
class IndentScan
|
148
|
-
attr_reader :code_lines
|
149
|
-
|
150
|
-
def initialize(code_lines: )
|
151
|
-
@code_lines = code_lines
|
152
|
-
end
|
153
|
-
|
154
|
-
def neighbors_from_top(top_line)
|
155
|
-
code_lines
|
156
|
-
.select {|l| l.index >= top_line.index }
|
157
|
-
.select {|l| l.not_empty? }
|
158
|
-
.select {|l| l.visible? }
|
159
|
-
.take_while {|l| l.indent >= top_line.indent }
|
160
|
-
end
|
161
|
-
|
162
|
-
def each_neighbor_block(top_line)
|
163
|
-
neighbors = neighbors_from_top(top_line)
|
164
|
-
|
165
|
-
until neighbors.empty?
|
166
|
-
lines = [neighbors.pop]
|
167
|
-
while (block = CodeBlock.new(lines: lines, code_lines: code_lines)) && block.invalid? && neighbors.any?
|
168
|
-
lines.prepend neighbors.pop
|
169
|
-
end
|
170
|
-
|
171
|
-
yield block if block
|
172
|
-
end
|
173
|
-
end
|
174
|
-
end
|
175
|
-
|
40
|
+
# CodeFrontier#detect_invalid_blocks
|
176
41
|
class CodeFrontier
|
177
42
|
def initialize(code_lines: )
|
178
43
|
@code_lines = code_lines
|
@@ -207,16 +72,9 @@ module SyntaxErrorSearch
|
|
207
72
|
|
208
73
|
# Returns a code block with the largest indentation possible
|
209
74
|
def pop
|
210
|
-
return nil if empty?
|
211
|
-
|
212
75
|
return @frontier.pop
|
213
76
|
end
|
214
77
|
|
215
|
-
def next_block?
|
216
|
-
!@indent_hash.empty?
|
217
|
-
end
|
218
|
-
|
219
|
-
|
220
78
|
def indent_hash_indent
|
221
79
|
@indent_hash.keys.sort.last
|
222
80
|
end
|
@@ -226,40 +84,25 @@ module SyntaxErrorSearch
|
|
226
84
|
@indent_hash[indent]&.first
|
227
85
|
end
|
228
86
|
|
229
|
-
def generate_blocks
|
230
|
-
end
|
231
|
-
|
232
|
-
def next_block
|
233
|
-
indent = @indent_hash.keys.sort.last
|
234
|
-
lines = @indent_hash[indent].first
|
235
|
-
|
236
|
-
block = CodeBlock.new(
|
237
|
-
lines: lines,
|
238
|
-
code_lines: @code_lines
|
239
|
-
).expand_until_neighbors
|
240
|
-
|
241
|
-
register(block)
|
242
|
-
block
|
243
|
-
end
|
244
|
-
|
245
87
|
def expand?
|
246
88
|
return false if @frontier.empty?
|
247
89
|
return true if @indent_hash.empty?
|
248
90
|
|
249
|
-
@frontier.last.current_indent
|
250
|
-
|
91
|
+
frontier_indent = @frontier.last.current_indent
|
92
|
+
hash_indent = @indent_hash.keys.sort.last
|
251
93
|
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
94
|
+
if ENV["DEBUG"]
|
95
|
+
puts "```"
|
96
|
+
puts @frontier.last.to_s
|
97
|
+
puts "```"
|
98
|
+
puts " @frontier indent: #{frontier_indent}"
|
99
|
+
puts " @hash indent: #{hash_indent}"
|
100
|
+
end
|
258
101
|
|
259
|
-
|
102
|
+
frontier_indent >= hash_indent
|
260
103
|
end
|
261
104
|
|
262
|
-
def
|
105
|
+
def register_indent_block(block)
|
263
106
|
block.lines.each do |line|
|
264
107
|
@indent_hash[line.indent]&.delete(line)
|
265
108
|
end
|
@@ -273,22 +116,18 @@ module SyntaxErrorSearch
|
|
273
116
|
# and that each code block's lines are removed from the indentation hash so we
|
274
117
|
# don't re-evaluate the same line multiple times.
|
275
118
|
def <<(block)
|
276
|
-
|
119
|
+
register_indent_block(block)
|
277
120
|
|
121
|
+
# Make sure we don't double expand, if a code block fully engulfs another code block, keep the bigger one
|
122
|
+
@frontier.reject! {|b|
|
123
|
+
b.starts_at >= block.starts_at && b.ends_at <= block.ends_at
|
124
|
+
}
|
278
125
|
@frontier << block
|
279
126
|
@frontier.sort!
|
280
127
|
|
281
128
|
self
|
282
129
|
end
|
283
130
|
|
284
|
-
def any?
|
285
|
-
!empty?
|
286
|
-
end
|
287
|
-
|
288
|
-
def empty?
|
289
|
-
@frontier.empty? && @indent_hash.empty?
|
290
|
-
end
|
291
|
-
|
292
131
|
# Example:
|
293
132
|
#
|
294
133
|
# combination([:a, :b, :c, :d])
|
@@ -3,15 +3,16 @@
|
|
3
3
|
module SyntaxErrorSearch
|
4
4
|
# Searches code for a syntax error
|
5
5
|
#
|
6
|
-
# The bulk of the heavy lifting is done
|
6
|
+
# The bulk of the heavy lifting is done in:
|
7
7
|
#
|
8
|
-
#
|
8
|
+
# - CodeFrontier (Holds information for generating blocks and determining if we can stop searching)
|
9
|
+
# - ParseBlocksFromLine (Creates blocks into the frontier)
|
10
|
+
# - BlockExpand (Expands existing blocks to search more code
|
9
11
|
#
|
10
12
|
# ## Syntax error detection
|
11
13
|
#
|
12
14
|
# When the frontier holds the syntax error, we can stop searching
|
13
15
|
#
|
14
|
-
#
|
15
16
|
# search = CodeSearch.new(<<~EOM)
|
16
17
|
# def dog
|
17
18
|
# def lol
|
@@ -23,42 +24,51 @@ module SyntaxErrorSearch
|
|
23
24
|
# search.invalid_blocks.map(&:to_s) # =>
|
24
25
|
# # => ["def lol\n"]
|
25
26
|
#
|
26
|
-
#
|
27
27
|
class CodeSearch
|
28
28
|
private; attr_reader :frontier; public
|
29
29
|
public; attr_reader :invalid_blocks, :record_dir, :code_lines
|
30
30
|
|
31
|
-
def initialize(
|
31
|
+
def initialize(source, record_dir: ENV["SYNTAX_SEARCH_RECORD_DIR"])
|
32
|
+
@source = source
|
32
33
|
if record_dir
|
33
34
|
@time = Time.now.strftime('%Y-%m-%d-%H-%M-%s-%N')
|
34
35
|
@record_dir = Pathname(record_dir).join(@time).tap {|p| p.mkpath }
|
35
36
|
@write_count = 0
|
36
37
|
end
|
37
|
-
@code_lines =
|
38
|
+
@code_lines = source.lines.map.with_index do |line, i|
|
38
39
|
CodeLine.new(line: line, index: i)
|
39
40
|
end
|
40
41
|
@frontier = CodeFrontier.new(code_lines: @code_lines)
|
41
42
|
@invalid_blocks = []
|
42
43
|
@name_tick = Hash.new {|hash, k| hash[k] = 0 }
|
43
44
|
@tick = 0
|
44
|
-
@
|
45
|
+
@block_expand = BlockExpand.new(code_lines: code_lines)
|
46
|
+
@parse_blocks_from_indent_line = ParseBlocksFromIndentLine.new(code_lines: @code_lines)
|
45
47
|
end
|
46
48
|
|
49
|
+
# Used for debugging
|
47
50
|
def record(block:, name: "record")
|
48
51
|
return if !@record_dir
|
49
52
|
@name_tick[name] += 1
|
50
53
|
filename = "#{@write_count += 1}-#{name}-#{@name_tick[name]}.txt"
|
54
|
+
if ENV["DEBUG"]
|
55
|
+
puts "\n\n==== #{filename} ===="
|
56
|
+
puts "\n```#{block.starts_at}:#{block.ends_at}"
|
57
|
+
puts "#{block.to_s}"
|
58
|
+
puts "```"
|
59
|
+
puts " block indent: #{block.current_indent}"
|
60
|
+
end
|
51
61
|
@record_dir.join(filename).open(mode: "a") do |f|
|
52
62
|
display = DisplayInvalidBlocks.new(
|
53
63
|
blocks: block,
|
54
|
-
terminal: false
|
64
|
+
terminal: false,
|
65
|
+
code_lines: @code_lines,
|
55
66
|
)
|
56
67
|
f.write(display.indent display.code_with_lines)
|
57
68
|
end
|
58
69
|
end
|
59
70
|
|
60
|
-
def
|
61
|
-
frontier.register(block)
|
71
|
+
def push(block, name: )
|
62
72
|
record(block: block, name: name)
|
63
73
|
|
64
74
|
if block.valid?
|
@@ -69,33 +79,48 @@ module SyntaxErrorSearch
|
|
69
79
|
end
|
70
80
|
end
|
71
81
|
|
82
|
+
# Parses the most indented lines into blocks that are marked
|
83
|
+
# and added to the frontier
|
72
84
|
def add_invalid_blocks
|
73
85
|
max_indent = frontier.next_indent_line&.indent
|
74
86
|
|
75
87
|
while (line = frontier.next_indent_line) && (line.indent == max_indent)
|
76
|
-
neighbors = @scan.neighbors_from_top(frontier.next_indent_line)
|
77
88
|
|
78
|
-
@
|
89
|
+
@parse_blocks_from_indent_line.each_neighbor_block(frontier.next_indent_line) do |block|
|
79
90
|
record(block: block, name: "add")
|
80
|
-
if block.valid?
|
81
|
-
block.lines.each(&:mark_invisible)
|
82
|
-
end
|
83
|
-
end
|
84
91
|
|
85
|
-
|
86
|
-
|
92
|
+
block.mark_invisible if block.valid?
|
93
|
+
push(block, name: "add")
|
94
|
+
end
|
87
95
|
end
|
88
96
|
end
|
89
97
|
|
98
|
+
# Given an already existing block in the frontier, expand it to see
|
99
|
+
# if it contains our invalid syntax
|
90
100
|
def expand_invalid_block
|
91
101
|
block = frontier.pop
|
92
102
|
return unless block
|
93
103
|
|
94
|
-
block
|
95
|
-
|
104
|
+
record(block: block, name: "pop")
|
105
|
+
|
106
|
+
# block = block.expand_until_next_boundry
|
107
|
+
block = @block_expand.call(block)
|
108
|
+
push(block, name: "expand")
|
109
|
+
end
|
110
|
+
|
111
|
+
|
112
|
+
def sweep_heredocs
|
113
|
+
HeredocBlockParse.new(
|
114
|
+
source: @source,
|
115
|
+
code_lines: @code_lines
|
116
|
+
).call.each do |block|
|
117
|
+
push(block, name: "heredoc")
|
118
|
+
end
|
96
119
|
end
|
97
120
|
|
121
|
+
# Main search loop
|
98
122
|
def call
|
123
|
+
sweep_heredocs
|
99
124
|
until frontier.holds_all_syntax_errors?
|
100
125
|
@tick += 1
|
101
126
|
|
@@ -5,21 +5,22 @@ module SyntaxErrorSearch
|
|
5
5
|
class DisplayInvalidBlocks
|
6
6
|
attr_reader :filename
|
7
7
|
|
8
|
-
def initialize(blocks:, io: $stderr, filename: nil, terminal: false)
|
8
|
+
def initialize(code_lines: ,blocks:, io: $stderr, filename: nil, terminal: false, invalid_type: :unmatched_end)
|
9
9
|
@terminal = terminal
|
10
10
|
@filename = filename
|
11
11
|
@io = io
|
12
12
|
|
13
13
|
@blocks = Array(blocks)
|
14
14
|
@lines = @blocks.map(&:lines).flatten
|
15
|
-
@code_lines =
|
15
|
+
@code_lines = code_lines
|
16
16
|
@digit_count = @code_lines.last&.line_number.to_s.length
|
17
17
|
|
18
18
|
@invalid_line_hash = @lines.each_with_object({}) {|line, h| h[line] = true }
|
19
|
+
@invalid_type = invalid_type
|
19
20
|
end
|
20
21
|
|
21
22
|
def call
|
22
|
-
if @blocks.any?
|
23
|
+
if @blocks.any? { |b| !b.hidden? }
|
23
24
|
found_invalid_blocks
|
24
25
|
else
|
25
26
|
@io.puts "Syntax OK"
|
@@ -33,15 +34,28 @@ module SyntaxErrorSearch
|
|
33
34
|
end
|
34
35
|
|
35
36
|
private def found_invalid_blocks
|
36
|
-
@
|
37
|
+
case @invalid_type
|
38
|
+
when :missing_end
|
39
|
+
@io.puts <<~EOM
|
37
40
|
|
38
|
-
|
41
|
+
SyntaxSearch: Missing `end` detected
|
39
42
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
+
This code has a missing `end`. Ensure that all
|
44
|
+
syntax keywords (`def`, `do`, etc.) have a matching `end`.
|
45
|
+
|
46
|
+
EOM
|
47
|
+
when :unmatched_end
|
48
|
+
@io.puts <<~EOM
|
49
|
+
|
50
|
+
SyntaxSearch: Unmatched `end` detected
|
51
|
+
|
52
|
+
This code has an unmatched `end`. Ensure that all `end` lines
|
53
|
+
in your code have a matching syntax keyword (`def`, `do`, etc.)
|
54
|
+
and that you don't have any extra `end` lines.
|
55
|
+
|
56
|
+
EOM
|
57
|
+
end
|
43
58
|
|
44
|
-
EOM
|
45
59
|
@io.puts("file: #{filename}") if filename
|
46
60
|
@io.puts <<~EOM
|
47
61
|
simplified:
|
@@ -50,16 +64,13 @@ module SyntaxErrorSearch
|
|
50
64
|
EOM
|
51
65
|
end
|
52
66
|
|
53
|
-
def indent(string, with: "
|
67
|
+
def indent(string, with: " ")
|
54
68
|
string.each_line.map {|l| with + l }.join
|
55
69
|
end
|
56
70
|
|
57
71
|
def code_block
|
58
72
|
string = String.new("")
|
59
|
-
string << "```\n"
|
60
|
-
# string << "#".rjust(@digit_count) + " filename: #{filename}\n\n" if filename
|
61
73
|
string << code_with_lines
|
62
|
-
string << "```\n"
|
63
74
|
string
|
64
75
|
end
|
65
76
|
|