syntax_suggest 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.circleci/config.yml +91 -0
- data/.github/workflows/check_changelog.yml +20 -0
- data/.gitignore +14 -0
- data/.rspec +3 -0
- data/.standard.yml +1 -0
- data/CHANGELOG.md +158 -0
- data/CODE_OF_CONDUCT.md +74 -0
- data/Gemfile +14 -0
- data/Gemfile.lock +67 -0
- data/LICENSE.txt +21 -0
- data/README.md +229 -0
- data/Rakefile +8 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/dead_end.gemspec +32 -0
- data/exe/syntax_suggest +7 -0
- data/lib/syntax_suggest/api.rb +199 -0
- data/lib/syntax_suggest/around_block_scan.rb +224 -0
- data/lib/syntax_suggest/block_expand.rb +74 -0
- data/lib/syntax_suggest/capture_code_context.rb +233 -0
- data/lib/syntax_suggest/clean_document.rb +304 -0
- data/lib/syntax_suggest/cli.rb +129 -0
- data/lib/syntax_suggest/code_block.rb +100 -0
- data/lib/syntax_suggest/code_frontier.rb +178 -0
- data/lib/syntax_suggest/code_line.rb +239 -0
- data/lib/syntax_suggest/code_search.rb +139 -0
- data/lib/syntax_suggest/core_ext.rb +101 -0
- data/lib/syntax_suggest/display_code_with_line_numbers.rb +70 -0
- data/lib/syntax_suggest/display_invalid_blocks.rb +84 -0
- data/lib/syntax_suggest/explain_syntax.rb +103 -0
- data/lib/syntax_suggest/left_right_lex_count.rb +168 -0
- data/lib/syntax_suggest/lex_all.rb +55 -0
- data/lib/syntax_suggest/lex_value.rb +70 -0
- data/lib/syntax_suggest/parse_blocks_from_indent_line.rb +60 -0
- data/lib/syntax_suggest/pathname_from_message.rb +59 -0
- data/lib/syntax_suggest/priority_engulf_queue.rb +63 -0
- data/lib/syntax_suggest/priority_queue.rb +105 -0
- data/lib/syntax_suggest/ripper_errors.rb +36 -0
- data/lib/syntax_suggest/unvisited_lines.rb +36 -0
- data/lib/syntax_suggest/version.rb +5 -0
- data/lib/syntax_suggest.rb +3 -0
- metadata +88 -0
@@ -0,0 +1,304 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SyntaxSuggest
|
4
|
+
# Parses and sanitizes source into a lexically aware document
|
5
|
+
#
|
6
|
+
# Internally the document is represented by an array with each
|
7
|
+
# index containing a CodeLine correlating to a line from the source code.
|
8
|
+
#
|
9
|
+
# There are three main phases in the algorithm:
|
10
|
+
#
|
11
|
+
# 1. Sanitize/format input source
|
12
|
+
# 2. Search for invalid blocks
|
13
|
+
# 3. Format invalid blocks into something meaninful
|
14
|
+
#
|
15
|
+
# This class handles the first part.
|
16
|
+
#
|
17
|
+
# The reason this class exists is to format input source
|
18
|
+
# for better/easier/cleaner exploration.
|
19
|
+
#
|
20
|
+
# The CodeSearch class operates at the line level so
|
21
|
+
# we must be careful to not introduce lines that look
|
22
|
+
# valid by themselves, but when removed will trigger syntax errors
|
23
|
+
# or strange behavior.
|
24
|
+
#
|
25
|
+
# ## Join Trailing slashes
|
26
|
+
#
|
27
|
+
# Code with a trailing slash is logically treated as a single line:
|
28
|
+
#
|
29
|
+
# 1 it "code can be split" \
|
30
|
+
# 2 "across multiple lines" do
|
31
|
+
#
|
32
|
+
# In this case removing line 2 would add a syntax error. We get around
|
33
|
+
# this by internally joining the two lines into a single "line" object
|
34
|
+
#
|
35
|
+
# ## Logically Consecutive lines
|
36
|
+
#
|
37
|
+
# Code that can be broken over multiple
|
38
|
+
# lines such as method calls are on different lines:
|
39
|
+
#
|
40
|
+
# 1 User.
|
41
|
+
# 2 where(name: "schneems").
|
42
|
+
# 3 first
|
43
|
+
#
|
44
|
+
# Removing line 2 can introduce a syntax error. To fix this, all lines
|
45
|
+
# are joined into one.
|
46
|
+
#
|
47
|
+
# ## Heredocs
|
48
|
+
#
|
49
|
+
# A heredoc is an way of defining a multi-line string. They can cause many
|
50
|
+
# problems. If left as a single line, Ripper would try to parse the contents
|
51
|
+
# as ruby code rather than as a string. Even without this problem, we still
|
52
|
+
# hit an issue with indentation
|
53
|
+
#
|
54
|
+
# 1 foo = <<~HEREDOC
|
55
|
+
# 2 "Be yourself; everyone else is already taken.""
|
56
|
+
# 3 ― Oscar Wilde
|
57
|
+
# 4 puts "I look like ruby code" # but i'm still a heredoc
|
58
|
+
# 5 HEREDOC
|
59
|
+
#
|
60
|
+
# If we didn't join these lines then our algorithm would think that line 4
|
61
|
+
# is separate from the rest, has a higher indentation, then look at it first
|
62
|
+
# and remove it.
|
63
|
+
#
|
64
|
+
# If the code evaluates line 5 by itself it will think line 5 is a constant,
|
65
|
+
# remove it, and introduce a syntax errror.
|
66
|
+
#
|
67
|
+
# All of these problems are fixed by joining the whole heredoc into a single
|
68
|
+
# line.
|
69
|
+
#
|
70
|
+
# ## Comments and whitespace
|
71
|
+
#
|
72
|
+
# Comments can throw off the way the lexer tells us that the line
|
73
|
+
# logically belongs with the next line. This is valid ruby but
|
74
|
+
# results in a different lex output than before:
|
75
|
+
#
|
76
|
+
# 1 User.
|
77
|
+
# 2 where(name: "schneems").
|
78
|
+
# 3 # Comment here
|
79
|
+
# 4 first
|
80
|
+
#
|
81
|
+
# To handle this we can replace comment lines with empty lines
|
82
|
+
# and then re-lex the source. This removal and re-lexing preserves
|
83
|
+
# line index and document size, but generates an easier to work with
|
84
|
+
# document.
|
85
|
+
#
|
86
|
+
class CleanDocument
|
87
|
+
def initialize(source:)
|
88
|
+
lines = clean_sweep(source: source)
|
89
|
+
@document = CodeLine.from_source(lines.join, lines: lines)
|
90
|
+
end
|
91
|
+
|
92
|
+
# Call all of the document "cleaners"
|
93
|
+
# and return self
|
94
|
+
def call
|
95
|
+
join_trailing_slash!
|
96
|
+
join_consecutive!
|
97
|
+
join_heredoc!
|
98
|
+
|
99
|
+
self
|
100
|
+
end
|
101
|
+
|
102
|
+
# Return an array of CodeLines in the
|
103
|
+
# document
|
104
|
+
def lines
|
105
|
+
@document
|
106
|
+
end
|
107
|
+
|
108
|
+
# Renders the document back to a string
|
109
|
+
def to_s
|
110
|
+
@document.join
|
111
|
+
end
|
112
|
+
|
113
|
+
# Remove comments and whitespace only lines
|
114
|
+
#
|
115
|
+
# replace with empty newlines
|
116
|
+
#
|
117
|
+
# source = <<~'EOM'
|
118
|
+
# # Comment 1
|
119
|
+
# puts "hello"
|
120
|
+
# # Comment 2
|
121
|
+
# puts "world"
|
122
|
+
# EOM
|
123
|
+
#
|
124
|
+
# lines = CleanDocument.new(source: source).lines
|
125
|
+
# expect(lines[0].to_s).to eq("\n")
|
126
|
+
# expect(lines[1].to_s).to eq("puts "hello")
|
127
|
+
# expect(lines[2].to_s).to eq("\n")
|
128
|
+
# expect(lines[3].to_s).to eq("puts "world")
|
129
|
+
#
|
130
|
+
# Important: This must be done before lexing.
|
131
|
+
#
|
132
|
+
# After this change is made, we lex the document because
|
133
|
+
# removing comments can change how the doc is parsed.
|
134
|
+
#
|
135
|
+
# For example:
|
136
|
+
#
|
137
|
+
# values = LexAll.new(source: <<~EOM))
|
138
|
+
# User.
|
139
|
+
# # comment
|
140
|
+
# where(name: 'schneems')
|
141
|
+
# EOM
|
142
|
+
# expect(
|
143
|
+
# values.count {|v| v.type == :on_ignored_nl}
|
144
|
+
# ).to eq(1)
|
145
|
+
#
|
146
|
+
# After the comment is removed:
|
147
|
+
#
|
148
|
+
# values = LexAll.new(source: <<~EOM))
|
149
|
+
# User.
|
150
|
+
#
|
151
|
+
# where(name: 'schneems')
|
152
|
+
# EOM
|
153
|
+
# expect(
|
154
|
+
# values.count {|v| v.type == :on_ignored_nl}
|
155
|
+
# ).to eq(2)
|
156
|
+
#
|
157
|
+
def clean_sweep(source:)
|
158
|
+
source.lines.map do |line|
|
159
|
+
if line.match?(/^\s*(#[^{].*)?$/) # https://rubular.com/r/LLE10D8HKMkJvs
|
160
|
+
$/
|
161
|
+
else
|
162
|
+
line
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
# Smushes all heredoc lines into one line
|
168
|
+
#
|
169
|
+
# source = <<~'EOM'
|
170
|
+
# foo = <<~HEREDOC
|
171
|
+
# lol
|
172
|
+
# hehehe
|
173
|
+
# HEREDOC
|
174
|
+
# EOM
|
175
|
+
#
|
176
|
+
# lines = CleanDocument.new(source: source).join_heredoc!.lines
|
177
|
+
# expect(lines[0].to_s).to eq(source)
|
178
|
+
# expect(lines[1].to_s).to eq("")
|
179
|
+
def join_heredoc!
|
180
|
+
start_index_stack = []
|
181
|
+
heredoc_beg_end_index = []
|
182
|
+
lines.each do |line|
|
183
|
+
line.lex.each do |lex_value|
|
184
|
+
case lex_value.type
|
185
|
+
when :on_heredoc_beg
|
186
|
+
start_index_stack << line.index
|
187
|
+
when :on_heredoc_end
|
188
|
+
start_index = start_index_stack.pop
|
189
|
+
end_index = line.index
|
190
|
+
heredoc_beg_end_index << [start_index, end_index]
|
191
|
+
end
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
heredoc_groups = heredoc_beg_end_index.map { |start_index, end_index| @document[start_index..end_index] }
|
196
|
+
|
197
|
+
join_groups(heredoc_groups)
|
198
|
+
self
|
199
|
+
end
|
200
|
+
|
201
|
+
# Smushes logically "consecutive" lines
|
202
|
+
#
|
203
|
+
# source = <<~'EOM'
|
204
|
+
# User.
|
205
|
+
# where(name: 'schneems').
|
206
|
+
# first
|
207
|
+
# EOM
|
208
|
+
#
|
209
|
+
# lines = CleanDocument.new(source: source).join_consecutive!.lines
|
210
|
+
# expect(lines[0].to_s).to eq(source)
|
211
|
+
# expect(lines[1].to_s).to eq("")
|
212
|
+
#
|
213
|
+
# The one known case this doesn't handle is:
|
214
|
+
#
|
215
|
+
# Ripper.lex <<~EOM
|
216
|
+
# a &&
|
217
|
+
# b ||
|
218
|
+
# c
|
219
|
+
# EOM
|
220
|
+
#
|
221
|
+
# For some reason this introduces `on_ignore_newline` but with BEG type
|
222
|
+
#
|
223
|
+
def join_consecutive!
|
224
|
+
consecutive_groups = @document.select(&:ignore_newline_not_beg?).map do |code_line|
|
225
|
+
take_while_including(code_line.index..-1) do |line|
|
226
|
+
line.ignore_newline_not_beg?
|
227
|
+
end
|
228
|
+
end
|
229
|
+
|
230
|
+
join_groups(consecutive_groups)
|
231
|
+
self
|
232
|
+
end
|
233
|
+
|
234
|
+
# Join lines with a trailing slash
|
235
|
+
#
|
236
|
+
# source = <<~'EOM'
|
237
|
+
# it "code can be split" \
|
238
|
+
# "across multiple lines" do
|
239
|
+
# EOM
|
240
|
+
#
|
241
|
+
# lines = CleanDocument.new(source: source).join_consecutive!.lines
|
242
|
+
# expect(lines[0].to_s).to eq(source)
|
243
|
+
# expect(lines[1].to_s).to eq("")
|
244
|
+
def join_trailing_slash!
|
245
|
+
trailing_groups = @document.select(&:trailing_slash?).map do |code_line|
|
246
|
+
take_while_including(code_line.index..-1) { |x| x.trailing_slash? }
|
247
|
+
end
|
248
|
+
join_groups(trailing_groups)
|
249
|
+
self
|
250
|
+
end
|
251
|
+
|
252
|
+
# Helper method for joining "groups" of lines
|
253
|
+
#
|
254
|
+
# Input is expected to be type Array<Array<CodeLine>>
|
255
|
+
#
|
256
|
+
# The outer array holds the various "groups" while the
|
257
|
+
# inner array holds code lines.
|
258
|
+
#
|
259
|
+
# All code lines are "joined" into the first line in
|
260
|
+
# their group.
|
261
|
+
#
|
262
|
+
# To preserve document size, empty lines are placed
|
263
|
+
# in the place of the lines that were "joined"
|
264
|
+
def join_groups(groups)
|
265
|
+
groups.each do |lines|
|
266
|
+
line = lines.first
|
267
|
+
|
268
|
+
# Handle the case of multiple groups in a a row
|
269
|
+
# if one is already replaced, move on
|
270
|
+
next if @document[line.index].empty?
|
271
|
+
|
272
|
+
# Join group into the first line
|
273
|
+
@document[line.index] = CodeLine.new(
|
274
|
+
lex: lines.map(&:lex).flatten,
|
275
|
+
line: lines.join,
|
276
|
+
index: line.index
|
277
|
+
)
|
278
|
+
|
279
|
+
# Hide the rest of the lines
|
280
|
+
lines[1..-1].each do |line|
|
281
|
+
# The above lines already have newlines in them, if add more
|
282
|
+
# then there will be double newline, use an empty line instead
|
283
|
+
@document[line.index] = CodeLine.new(line: "", index: line.index, lex: [])
|
284
|
+
end
|
285
|
+
end
|
286
|
+
self
|
287
|
+
end
|
288
|
+
|
289
|
+
# Helper method for grabbing elements from document
|
290
|
+
#
|
291
|
+
# Like `take_while` except when it stops
|
292
|
+
# iterating, it also returns the line
|
293
|
+
# that caused it to stop
|
294
|
+
def take_while_including(range = 0..-1)
|
295
|
+
take_next_and_stop = false
|
296
|
+
@document[range].take_while do |line|
|
297
|
+
next if take_next_and_stop
|
298
|
+
|
299
|
+
take_next_and_stop = !(yield line)
|
300
|
+
true
|
301
|
+
end
|
302
|
+
end
|
303
|
+
end
|
304
|
+
end
|
@@ -0,0 +1,129 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "pathname"
|
4
|
+
require "optparse"
|
5
|
+
|
6
|
+
module SyntaxSuggest
|
7
|
+
# All the logic of the exe/syntax_suggest CLI in one handy spot
|
8
|
+
#
|
9
|
+
# Cli.new(argv: ["--help"]).call
|
10
|
+
# Cli.new(argv: ["<path/to/file>.rb"]).call
|
11
|
+
# Cli.new(argv: ["<path/to/file>.rb", "--record=tmp"]).call
|
12
|
+
# Cli.new(argv: ["<path/to/file>.rb", "--terminal"]).call
|
13
|
+
#
|
14
|
+
class Cli
|
15
|
+
attr_accessor :options
|
16
|
+
|
17
|
+
# ARGV is Everything passed to the executable, does not include executable name
|
18
|
+
#
|
19
|
+
# All other intputs are dependency injection for testing
|
20
|
+
def initialize(argv:, exit_obj: Kernel, io: $stdout, env: ENV)
|
21
|
+
@options = {}
|
22
|
+
@parser = nil
|
23
|
+
options[:record_dir] = env["SYNTAX_SUGGEST_RECORD_DIR"]
|
24
|
+
options[:record_dir] = "tmp" if env["DEBUG"]
|
25
|
+
options[:terminal] = SyntaxSuggest::DEFAULT_VALUE
|
26
|
+
|
27
|
+
@io = io
|
28
|
+
@argv = argv
|
29
|
+
@exit_obj = exit_obj
|
30
|
+
end
|
31
|
+
|
32
|
+
def call
|
33
|
+
if @argv.empty?
|
34
|
+
# Display help if raw command
|
35
|
+
parser.parse! %w[--help]
|
36
|
+
return
|
37
|
+
else
|
38
|
+
# Mutates @argv
|
39
|
+
parse
|
40
|
+
return if options[:exit]
|
41
|
+
end
|
42
|
+
|
43
|
+
file_name = @argv.first
|
44
|
+
if file_name.nil?
|
45
|
+
@io.puts "No file given"
|
46
|
+
@exit_obj.exit(1)
|
47
|
+
return
|
48
|
+
end
|
49
|
+
|
50
|
+
file = Pathname(file_name)
|
51
|
+
if !file.exist?
|
52
|
+
@io.puts "file not found: #{file.expand_path} "
|
53
|
+
@exit_obj.exit(1)
|
54
|
+
return
|
55
|
+
end
|
56
|
+
|
57
|
+
@io.puts "Record dir: #{options[:record_dir]}" if options[:record_dir]
|
58
|
+
|
59
|
+
display = SyntaxSuggest.call(
|
60
|
+
io: @io,
|
61
|
+
source: file.read,
|
62
|
+
filename: file.expand_path,
|
63
|
+
terminal: options.fetch(:terminal, SyntaxSuggest::DEFAULT_VALUE),
|
64
|
+
record_dir: options[:record_dir]
|
65
|
+
)
|
66
|
+
|
67
|
+
if display.document_ok?
|
68
|
+
@exit_obj.exit(0)
|
69
|
+
else
|
70
|
+
@exit_obj.exit(1)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def parse
|
75
|
+
parser.parse!(@argv)
|
76
|
+
|
77
|
+
self
|
78
|
+
end
|
79
|
+
|
80
|
+
def parser
|
81
|
+
@parser ||= OptionParser.new do |opts|
|
82
|
+
opts.banner = <<~EOM
|
83
|
+
Usage: syntax_suggest <file> [options]
|
84
|
+
|
85
|
+
Parses a ruby source file and searches for syntax error(s) such as
|
86
|
+
unexpected `end', expecting end-of-input.
|
87
|
+
|
88
|
+
Example:
|
89
|
+
|
90
|
+
$ syntax_suggest dog.rb
|
91
|
+
|
92
|
+
# ...
|
93
|
+
|
94
|
+
❯ 10 defdog
|
95
|
+
❯ 15 end
|
96
|
+
|
97
|
+
ENV options:
|
98
|
+
|
99
|
+
SYNTAX_SUGGEST_RECORD_DIR=<dir>
|
100
|
+
|
101
|
+
Records the steps used to search for a syntax error
|
102
|
+
to the given directory
|
103
|
+
|
104
|
+
Options:
|
105
|
+
EOM
|
106
|
+
|
107
|
+
opts.version = SyntaxSuggest::VERSION
|
108
|
+
|
109
|
+
opts.on("--help", "Help - displays this message") do |v|
|
110
|
+
@io.puts opts
|
111
|
+
options[:exit] = true
|
112
|
+
@exit_obj.exit
|
113
|
+
end
|
114
|
+
|
115
|
+
opts.on("--record <dir>", "Records the steps used to search for a syntax error to the given directory") do |v|
|
116
|
+
options[:record_dir] = v
|
117
|
+
end
|
118
|
+
|
119
|
+
opts.on("--terminal", "Enable terminal highlighting") do |v|
|
120
|
+
options[:terminal] = true
|
121
|
+
end
|
122
|
+
|
123
|
+
opts.on("--no-terminal", "Disable terminal highlighting") do |v|
|
124
|
+
options[:terminal] = false
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
@@ -0,0 +1,100 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SyntaxSuggest
|
4
|
+
# Multiple lines form a singular CodeBlock
|
5
|
+
#
|
6
|
+
# Source code is made of multiple CodeBlocks.
|
7
|
+
#
|
8
|
+
# Example:
|
9
|
+
#
|
10
|
+
# code_block.to_s # =>
|
11
|
+
# # def foo
|
12
|
+
# # puts "foo"
|
13
|
+
# # end
|
14
|
+
#
|
15
|
+
# code_block.valid? # => true
|
16
|
+
# code_block.in_valid? # => false
|
17
|
+
#
|
18
|
+
#
|
19
|
+
class CodeBlock
|
20
|
+
UNSET = Object.new.freeze
|
21
|
+
attr_reader :lines, :starts_at, :ends_at
|
22
|
+
|
23
|
+
def initialize(lines: [])
|
24
|
+
@lines = Array(lines)
|
25
|
+
@valid = UNSET
|
26
|
+
@deleted = false
|
27
|
+
@starts_at = @lines.first.number
|
28
|
+
@ends_at = @lines.last.number
|
29
|
+
end
|
30
|
+
|
31
|
+
def delete
|
32
|
+
@deleted = true
|
33
|
+
end
|
34
|
+
|
35
|
+
def deleted?
|
36
|
+
@deleted
|
37
|
+
end
|
38
|
+
|
39
|
+
def visible_lines
|
40
|
+
@lines.select(&:visible?).select(&:not_empty?)
|
41
|
+
end
|
42
|
+
|
43
|
+
def mark_invisible
|
44
|
+
@lines.map(&:mark_invisible)
|
45
|
+
end
|
46
|
+
|
47
|
+
def is_end?
|
48
|
+
to_s.strip == "end"
|
49
|
+
end
|
50
|
+
|
51
|
+
def hidden?
|
52
|
+
@lines.all?(&:hidden?)
|
53
|
+
end
|
54
|
+
|
55
|
+
# This is used for frontier ordering, we are searching from
|
56
|
+
# the largest indentation to the smallest. This allows us to
|
57
|
+
# populate an array with multiple code blocks then call `sort!`
|
58
|
+
# on it without having to specify the sorting criteria
|
59
|
+
def <=>(other)
|
60
|
+
out = current_indent <=> other.current_indent
|
61
|
+
return out if out != 0
|
62
|
+
|
63
|
+
# Stable sort
|
64
|
+
starts_at <=> other.starts_at
|
65
|
+
end
|
66
|
+
|
67
|
+
def current_indent
|
68
|
+
@current_indent ||= lines.select(&:not_empty?).map(&:indent).min || 0
|
69
|
+
end
|
70
|
+
|
71
|
+
def invalid?
|
72
|
+
!valid?
|
73
|
+
end
|
74
|
+
|
75
|
+
def valid?
|
76
|
+
if @valid == UNSET
|
77
|
+
# Performance optimization
|
78
|
+
#
|
79
|
+
# If all the lines were previously hidden
|
80
|
+
# and we expand to capture additional empty
|
81
|
+
# lines then the result cannot be invalid
|
82
|
+
#
|
83
|
+
# That means there's no reason to re-check all
|
84
|
+
# lines with ripper (which is expensive).
|
85
|
+
# Benchmark in commit message
|
86
|
+
@valid = if lines.all? { |l| l.hidden? || l.empty? }
|
87
|
+
true
|
88
|
+
else
|
89
|
+
SyntaxSuggest.valid?(lines.map(&:original).join)
|
90
|
+
end
|
91
|
+
else
|
92
|
+
@valid
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def to_s
|
97
|
+
@lines.join
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
@@ -0,0 +1,178 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SyntaxSuggest
|
4
|
+
# The main function of the frontier is to hold the edges of our search and to
|
5
|
+
# evaluate when we can stop searching.
|
6
|
+
|
7
|
+
# There are three main phases in the algorithm:
|
8
|
+
#
|
9
|
+
# 1. Sanitize/format input source
|
10
|
+
# 2. Search for invalid blocks
|
11
|
+
# 3. Format invalid blocks into something meaninful
|
12
|
+
#
|
13
|
+
# The Code frontier is a critical part of the second step
|
14
|
+
#
|
15
|
+
# ## Knowing where we've been
|
16
|
+
#
|
17
|
+
# Once a code block is generated it is added onto the frontier. Then it will be
|
18
|
+
# sorted by indentation and frontier can be filtered. Large blocks that fully enclose a
|
19
|
+
# smaller block will cause the smaller block to be evicted.
|
20
|
+
#
|
21
|
+
# CodeFrontier#<<(block) # Adds block to frontier
|
22
|
+
# CodeFrontier#pop # Removes block from frontier
|
23
|
+
#
|
24
|
+
# ## Knowing where we can go
|
25
|
+
#
|
26
|
+
# Internally the frontier keeps track of "unvisited" lines which are exposed via `next_indent_line`
|
27
|
+
# when called, this method returns, a line of code with the highest indentation.
|
28
|
+
#
|
29
|
+
# The returned line of code can be used to build a CodeBlock and then that code block
|
30
|
+
# is added back to the frontier. Then, the lines are removed from the
|
31
|
+
# "unvisited" so we don't double-create the same block.
|
32
|
+
#
|
33
|
+
# CodeFrontier#next_indent_line # Shows next line
|
34
|
+
# CodeFrontier#register_indent_block(block) # Removes lines from unvisited
|
35
|
+
#
|
36
|
+
# ## Knowing when to stop
|
37
|
+
#
|
38
|
+
# The frontier knows how to check the entire document for a syntax error. When blocks
|
39
|
+
# are added onto the frontier, they're removed from the document. When all code containing
|
40
|
+
# syntax errors has been added to the frontier, the document will be parsable without a
|
41
|
+
# syntax error and the search can stop.
|
42
|
+
#
|
43
|
+
# CodeFrontier#holds_all_syntax_errors? # Returns true when frontier holds all syntax errors
|
44
|
+
#
|
45
|
+
# ## Filtering false positives
|
46
|
+
#
|
47
|
+
# Once the search is completed, the frontier may have multiple blocks that do not contain
|
48
|
+
# the syntax error. To limit the result to the smallest subset of "invalid blocks" call:
|
49
|
+
#
|
50
|
+
# CodeFrontier#detect_invalid_blocks
|
51
|
+
#
|
52
|
+
class CodeFrontier
|
53
|
+
def initialize(code_lines:, unvisited: UnvisitedLines.new(code_lines: code_lines))
|
54
|
+
@code_lines = code_lines
|
55
|
+
@unvisited = unvisited
|
56
|
+
@queue = PriorityEngulfQueue.new
|
57
|
+
|
58
|
+
@check_next = true
|
59
|
+
end
|
60
|
+
|
61
|
+
def count
|
62
|
+
@queue.length
|
63
|
+
end
|
64
|
+
|
65
|
+
# Performance optimization
|
66
|
+
#
|
67
|
+
# Parsing with ripper is expensive
|
68
|
+
# If we know we don't have any blocks with invalid
|
69
|
+
# syntax, then we know we cannot have found
|
70
|
+
# the incorrect syntax yet.
|
71
|
+
#
|
72
|
+
# When an invalid block is added onto the frontier
|
73
|
+
# check document state
|
74
|
+
private def can_skip_check?
|
75
|
+
check_next = @check_next
|
76
|
+
@check_next = false
|
77
|
+
|
78
|
+
if check_next
|
79
|
+
false
|
80
|
+
else
|
81
|
+
true
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
# Returns true if the document is valid with all lines
|
86
|
+
# removed. By default it checks all blocks in present in
|
87
|
+
# the frontier array, but can be used for arbitrary arrays
|
88
|
+
# of codeblocks as well
|
89
|
+
def holds_all_syntax_errors?(block_array = @queue, can_cache: true)
|
90
|
+
return false if can_cache && can_skip_check?
|
91
|
+
|
92
|
+
without_lines = block_array.to_a.flat_map do |block|
|
93
|
+
block.lines
|
94
|
+
end
|
95
|
+
|
96
|
+
SyntaxSuggest.valid_without?(
|
97
|
+
without_lines: without_lines,
|
98
|
+
code_lines: @code_lines
|
99
|
+
)
|
100
|
+
end
|
101
|
+
|
102
|
+
# Returns a code block with the largest indentation possible
|
103
|
+
def pop
|
104
|
+
@queue.pop
|
105
|
+
end
|
106
|
+
|
107
|
+
def next_indent_line
|
108
|
+
@unvisited.peek
|
109
|
+
end
|
110
|
+
|
111
|
+
def expand?
|
112
|
+
return false if @queue.empty?
|
113
|
+
return true if @unvisited.empty?
|
114
|
+
|
115
|
+
frontier_indent = @queue.peek.current_indent
|
116
|
+
unvisited_indent = next_indent_line.indent
|
117
|
+
|
118
|
+
if ENV["SYNTAX_SUGGEST_DEBUG"]
|
119
|
+
puts "```"
|
120
|
+
puts @queue.peek.to_s
|
121
|
+
puts "```"
|
122
|
+
puts " @frontier indent: #{frontier_indent}"
|
123
|
+
puts " @unvisited indent: #{unvisited_indent}"
|
124
|
+
end
|
125
|
+
|
126
|
+
# Expand all blocks before moving to unvisited lines
|
127
|
+
frontier_indent >= unvisited_indent
|
128
|
+
end
|
129
|
+
|
130
|
+
# Keeps track of what lines have been added to blocks and which are not yet
|
131
|
+
# visited.
|
132
|
+
def register_indent_block(block)
|
133
|
+
@unvisited.visit_block(block)
|
134
|
+
self
|
135
|
+
end
|
136
|
+
|
137
|
+
# When one element fully encapsulates another we remove the smaller
|
138
|
+
# block from the frontier. This prevents double expansions and all-around
|
139
|
+
# weird behavior. However this guarantee is quite expensive to maintain
|
140
|
+
def register_engulf_block(block)
|
141
|
+
end
|
142
|
+
|
143
|
+
# Add a block to the frontier
|
144
|
+
#
|
145
|
+
# This method ensures the frontier always remains sorted (in indentation order)
|
146
|
+
# and that each code block's lines are removed from the indentation hash so we
|
147
|
+
# don't re-evaluate the same line multiple times.
|
148
|
+
def <<(block)
|
149
|
+
@unvisited.visit_block(block)
|
150
|
+
|
151
|
+
@queue.push(block)
|
152
|
+
|
153
|
+
@check_next = true if block.invalid?
|
154
|
+
|
155
|
+
self
|
156
|
+
end
|
157
|
+
|
158
|
+
# Example:
|
159
|
+
#
|
160
|
+
# combination([:a, :b, :c, :d])
|
161
|
+
# # => [[:a], [:b], [:c], [:d], [:a, :b], [:a, :c], [:a, :d], [:b, :c], [:b, :d], [:c, :d], [:a, :b, :c], [:a, :b, :d], [:a, :c, :d], [:b, :c, :d], [:a, :b, :c, :d]]
|
162
|
+
def self.combination(array)
|
163
|
+
guesses = []
|
164
|
+
1.upto(array.length).each do |size|
|
165
|
+
guesses.concat(array.combination(size).to_a)
|
166
|
+
end
|
167
|
+
guesses
|
168
|
+
end
|
169
|
+
|
170
|
+
# Given that we know our syntax error exists somewhere in our frontier, we want to find
|
171
|
+
# the smallest possible set of blocks that contain all the syntax errors
|
172
|
+
def detect_invalid_blocks
|
173
|
+
self.class.combination(@queue.to_a.select(&:invalid?)).detect do |block_array|
|
174
|
+
holds_all_syntax_errors?(block_array, can_cache: false)
|
175
|
+
end || []
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|