syntax_suggest 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.circleci/config.yml +91 -0
- data/.github/workflows/check_changelog.yml +20 -0
- data/.gitignore +14 -0
- data/.rspec +3 -0
- data/.standard.yml +1 -0
- data/CHANGELOG.md +158 -0
- data/CODE_OF_CONDUCT.md +74 -0
- data/Gemfile +14 -0
- data/Gemfile.lock +67 -0
- data/LICENSE.txt +21 -0
- data/README.md +229 -0
- data/Rakefile +8 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/dead_end.gemspec +32 -0
- data/exe/syntax_suggest +7 -0
- data/lib/syntax_suggest/api.rb +199 -0
- data/lib/syntax_suggest/around_block_scan.rb +224 -0
- data/lib/syntax_suggest/block_expand.rb +74 -0
- data/lib/syntax_suggest/capture_code_context.rb +233 -0
- data/lib/syntax_suggest/clean_document.rb +304 -0
- data/lib/syntax_suggest/cli.rb +129 -0
- data/lib/syntax_suggest/code_block.rb +100 -0
- data/lib/syntax_suggest/code_frontier.rb +178 -0
- data/lib/syntax_suggest/code_line.rb +239 -0
- data/lib/syntax_suggest/code_search.rb +139 -0
- data/lib/syntax_suggest/core_ext.rb +101 -0
- data/lib/syntax_suggest/display_code_with_line_numbers.rb +70 -0
- data/lib/syntax_suggest/display_invalid_blocks.rb +84 -0
- data/lib/syntax_suggest/explain_syntax.rb +103 -0
- data/lib/syntax_suggest/left_right_lex_count.rb +168 -0
- data/lib/syntax_suggest/lex_all.rb +55 -0
- data/lib/syntax_suggest/lex_value.rb +70 -0
- data/lib/syntax_suggest/parse_blocks_from_indent_line.rb +60 -0
- data/lib/syntax_suggest/pathname_from_message.rb +59 -0
- data/lib/syntax_suggest/priority_engulf_queue.rb +63 -0
- data/lib/syntax_suggest/priority_queue.rb +105 -0
- data/lib/syntax_suggest/ripper_errors.rb +36 -0
- data/lib/syntax_suggest/unvisited_lines.rb +36 -0
- data/lib/syntax_suggest/version.rb +5 -0
- data/lib/syntax_suggest.rb +3 -0
- metadata +88 -0
@@ -0,0 +1,304 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SyntaxSuggest
|
4
|
+
# Parses and sanitizes source into a lexically aware document
|
5
|
+
#
|
6
|
+
# Internally the document is represented by an array with each
|
7
|
+
# index containing a CodeLine correlating to a line from the source code.
|
8
|
+
#
|
9
|
+
# There are three main phases in the algorithm:
|
10
|
+
#
|
11
|
+
# 1. Sanitize/format input source
|
12
|
+
# 2. Search for invalid blocks
|
13
|
+
# 3. Format invalid blocks into something meaninful
|
14
|
+
#
|
15
|
+
# This class handles the first part.
|
16
|
+
#
|
17
|
+
# The reason this class exists is to format input source
|
18
|
+
# for better/easier/cleaner exploration.
|
19
|
+
#
|
20
|
+
# The CodeSearch class operates at the line level so
|
21
|
+
# we must be careful to not introduce lines that look
|
22
|
+
# valid by themselves, but when removed will trigger syntax errors
|
23
|
+
# or strange behavior.
|
24
|
+
#
|
25
|
+
# ## Join Trailing slashes
|
26
|
+
#
|
27
|
+
# Code with a trailing slash is logically treated as a single line:
|
28
|
+
#
|
29
|
+
# 1 it "code can be split" \
|
30
|
+
# 2 "across multiple lines" do
|
31
|
+
#
|
32
|
+
# In this case removing line 2 would add a syntax error. We get around
|
33
|
+
# this by internally joining the two lines into a single "line" object
|
34
|
+
#
|
35
|
+
# ## Logically Consecutive lines
|
36
|
+
#
|
37
|
+
# Code that can be broken over multiple
|
38
|
+
# lines such as method calls are on different lines:
|
39
|
+
#
|
40
|
+
# 1 User.
|
41
|
+
# 2 where(name: "schneems").
|
42
|
+
# 3 first
|
43
|
+
#
|
44
|
+
# Removing line 2 can introduce a syntax error. To fix this, all lines
|
45
|
+
# are joined into one.
|
46
|
+
#
|
47
|
+
# ## Heredocs
|
48
|
+
#
|
49
|
+
# A heredoc is an way of defining a multi-line string. They can cause many
|
50
|
+
# problems. If left as a single line, Ripper would try to parse the contents
|
51
|
+
# as ruby code rather than as a string. Even without this problem, we still
|
52
|
+
# hit an issue with indentation
|
53
|
+
#
|
54
|
+
# 1 foo = <<~HEREDOC
|
55
|
+
# 2 "Be yourself; everyone else is already taken.""
|
56
|
+
# 3 ― Oscar Wilde
|
57
|
+
# 4 puts "I look like ruby code" # but i'm still a heredoc
|
58
|
+
# 5 HEREDOC
|
59
|
+
#
|
60
|
+
# If we didn't join these lines then our algorithm would think that line 4
|
61
|
+
# is separate from the rest, has a higher indentation, then look at it first
|
62
|
+
# and remove it.
|
63
|
+
#
|
64
|
+
# If the code evaluates line 5 by itself it will think line 5 is a constant,
|
65
|
+
# remove it, and introduce a syntax errror.
|
66
|
+
#
|
67
|
+
# All of these problems are fixed by joining the whole heredoc into a single
|
68
|
+
# line.
|
69
|
+
#
|
70
|
+
# ## Comments and whitespace
|
71
|
+
#
|
72
|
+
# Comments can throw off the way the lexer tells us that the line
|
73
|
+
# logically belongs with the next line. This is valid ruby but
|
74
|
+
# results in a different lex output than before:
|
75
|
+
#
|
76
|
+
# 1 User.
|
77
|
+
# 2 where(name: "schneems").
|
78
|
+
# 3 # Comment here
|
79
|
+
# 4 first
|
80
|
+
#
|
81
|
+
# To handle this we can replace comment lines with empty lines
|
82
|
+
# and then re-lex the source. This removal and re-lexing preserves
|
83
|
+
# line index and document size, but generates an easier to work with
|
84
|
+
# document.
|
85
|
+
#
|
86
|
+
class CleanDocument
|
87
|
+
def initialize(source:)
|
88
|
+
lines = clean_sweep(source: source)
|
89
|
+
@document = CodeLine.from_source(lines.join, lines: lines)
|
90
|
+
end
|
91
|
+
|
92
|
+
# Call all of the document "cleaners"
|
93
|
+
# and return self
|
94
|
+
def call
|
95
|
+
join_trailing_slash!
|
96
|
+
join_consecutive!
|
97
|
+
join_heredoc!
|
98
|
+
|
99
|
+
self
|
100
|
+
end
|
101
|
+
|
102
|
+
# Return an array of CodeLines in the
|
103
|
+
# document
|
104
|
+
def lines
|
105
|
+
@document
|
106
|
+
end
|
107
|
+
|
108
|
+
# Renders the document back to a string
|
109
|
+
def to_s
|
110
|
+
@document.join
|
111
|
+
end
|
112
|
+
|
113
|
+
# Remove comments and whitespace only lines
|
114
|
+
#
|
115
|
+
# replace with empty newlines
|
116
|
+
#
|
117
|
+
# source = <<~'EOM'
|
118
|
+
# # Comment 1
|
119
|
+
# puts "hello"
|
120
|
+
# # Comment 2
|
121
|
+
# puts "world"
|
122
|
+
# EOM
|
123
|
+
#
|
124
|
+
# lines = CleanDocument.new(source: source).lines
|
125
|
+
# expect(lines[0].to_s).to eq("\n")
|
126
|
+
# expect(lines[1].to_s).to eq("puts "hello")
|
127
|
+
# expect(lines[2].to_s).to eq("\n")
|
128
|
+
# expect(lines[3].to_s).to eq("puts "world")
|
129
|
+
#
|
130
|
+
# Important: This must be done before lexing.
|
131
|
+
#
|
132
|
+
# After this change is made, we lex the document because
|
133
|
+
# removing comments can change how the doc is parsed.
|
134
|
+
#
|
135
|
+
# For example:
|
136
|
+
#
|
137
|
+
# values = LexAll.new(source: <<~EOM))
|
138
|
+
# User.
|
139
|
+
# # comment
|
140
|
+
# where(name: 'schneems')
|
141
|
+
# EOM
|
142
|
+
# expect(
|
143
|
+
# values.count {|v| v.type == :on_ignored_nl}
|
144
|
+
# ).to eq(1)
|
145
|
+
#
|
146
|
+
# After the comment is removed:
|
147
|
+
#
|
148
|
+
# values = LexAll.new(source: <<~EOM))
|
149
|
+
# User.
|
150
|
+
#
|
151
|
+
# where(name: 'schneems')
|
152
|
+
# EOM
|
153
|
+
# expect(
|
154
|
+
# values.count {|v| v.type == :on_ignored_nl}
|
155
|
+
# ).to eq(2)
|
156
|
+
#
|
157
|
+
def clean_sweep(source:)
|
158
|
+
source.lines.map do |line|
|
159
|
+
if line.match?(/^\s*(#[^{].*)?$/) # https://rubular.com/r/LLE10D8HKMkJvs
|
160
|
+
$/
|
161
|
+
else
|
162
|
+
line
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
# Smushes all heredoc lines into one line
|
168
|
+
#
|
169
|
+
# source = <<~'EOM'
|
170
|
+
# foo = <<~HEREDOC
|
171
|
+
# lol
|
172
|
+
# hehehe
|
173
|
+
# HEREDOC
|
174
|
+
# EOM
|
175
|
+
#
|
176
|
+
# lines = CleanDocument.new(source: source).join_heredoc!.lines
|
177
|
+
# expect(lines[0].to_s).to eq(source)
|
178
|
+
# expect(lines[1].to_s).to eq("")
|
179
|
+
def join_heredoc!
|
180
|
+
start_index_stack = []
|
181
|
+
heredoc_beg_end_index = []
|
182
|
+
lines.each do |line|
|
183
|
+
line.lex.each do |lex_value|
|
184
|
+
case lex_value.type
|
185
|
+
when :on_heredoc_beg
|
186
|
+
start_index_stack << line.index
|
187
|
+
when :on_heredoc_end
|
188
|
+
start_index = start_index_stack.pop
|
189
|
+
end_index = line.index
|
190
|
+
heredoc_beg_end_index << [start_index, end_index]
|
191
|
+
end
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
heredoc_groups = heredoc_beg_end_index.map { |start_index, end_index| @document[start_index..end_index] }
|
196
|
+
|
197
|
+
join_groups(heredoc_groups)
|
198
|
+
self
|
199
|
+
end
|
200
|
+
|
201
|
+
# Smushes logically "consecutive" lines
|
202
|
+
#
|
203
|
+
# source = <<~'EOM'
|
204
|
+
# User.
|
205
|
+
# where(name: 'schneems').
|
206
|
+
# first
|
207
|
+
# EOM
|
208
|
+
#
|
209
|
+
# lines = CleanDocument.new(source: source).join_consecutive!.lines
|
210
|
+
# expect(lines[0].to_s).to eq(source)
|
211
|
+
# expect(lines[1].to_s).to eq("")
|
212
|
+
#
|
213
|
+
# The one known case this doesn't handle is:
|
214
|
+
#
|
215
|
+
# Ripper.lex <<~EOM
|
216
|
+
# a &&
|
217
|
+
# b ||
|
218
|
+
# c
|
219
|
+
# EOM
|
220
|
+
#
|
221
|
+
# For some reason this introduces `on_ignore_newline` but with BEG type
|
222
|
+
#
|
223
|
+
def join_consecutive!
|
224
|
+
consecutive_groups = @document.select(&:ignore_newline_not_beg?).map do |code_line|
|
225
|
+
take_while_including(code_line.index..-1) do |line|
|
226
|
+
line.ignore_newline_not_beg?
|
227
|
+
end
|
228
|
+
end
|
229
|
+
|
230
|
+
join_groups(consecutive_groups)
|
231
|
+
self
|
232
|
+
end
|
233
|
+
|
234
|
+
# Join lines with a trailing slash
|
235
|
+
#
|
236
|
+
# source = <<~'EOM'
|
237
|
+
# it "code can be split" \
|
238
|
+
# "across multiple lines" do
|
239
|
+
# EOM
|
240
|
+
#
|
241
|
+
# lines = CleanDocument.new(source: source).join_consecutive!.lines
|
242
|
+
# expect(lines[0].to_s).to eq(source)
|
243
|
+
# expect(lines[1].to_s).to eq("")
|
244
|
+
def join_trailing_slash!
|
245
|
+
trailing_groups = @document.select(&:trailing_slash?).map do |code_line|
|
246
|
+
take_while_including(code_line.index..-1) { |x| x.trailing_slash? }
|
247
|
+
end
|
248
|
+
join_groups(trailing_groups)
|
249
|
+
self
|
250
|
+
end
|
251
|
+
|
252
|
+
# Helper method for joining "groups" of lines
|
253
|
+
#
|
254
|
+
# Input is expected to be type Array<Array<CodeLine>>
|
255
|
+
#
|
256
|
+
# The outer array holds the various "groups" while the
|
257
|
+
# inner array holds code lines.
|
258
|
+
#
|
259
|
+
# All code lines are "joined" into the first line in
|
260
|
+
# their group.
|
261
|
+
#
|
262
|
+
# To preserve document size, empty lines are placed
|
263
|
+
# in the place of the lines that were "joined"
|
264
|
+
def join_groups(groups)
|
265
|
+
groups.each do |lines|
|
266
|
+
line = lines.first
|
267
|
+
|
268
|
+
# Handle the case of multiple groups in a a row
|
269
|
+
# if one is already replaced, move on
|
270
|
+
next if @document[line.index].empty?
|
271
|
+
|
272
|
+
# Join group into the first line
|
273
|
+
@document[line.index] = CodeLine.new(
|
274
|
+
lex: lines.map(&:lex).flatten,
|
275
|
+
line: lines.join,
|
276
|
+
index: line.index
|
277
|
+
)
|
278
|
+
|
279
|
+
# Hide the rest of the lines
|
280
|
+
lines[1..-1].each do |line|
|
281
|
+
# The above lines already have newlines in them, if add more
|
282
|
+
# then there will be double newline, use an empty line instead
|
283
|
+
@document[line.index] = CodeLine.new(line: "", index: line.index, lex: [])
|
284
|
+
end
|
285
|
+
end
|
286
|
+
self
|
287
|
+
end
|
288
|
+
|
289
|
+
# Helper method for grabbing elements from document
|
290
|
+
#
|
291
|
+
# Like `take_while` except when it stops
|
292
|
+
# iterating, it also returns the line
|
293
|
+
# that caused it to stop
|
294
|
+
def take_while_including(range = 0..-1)
|
295
|
+
take_next_and_stop = false
|
296
|
+
@document[range].take_while do |line|
|
297
|
+
next if take_next_and_stop
|
298
|
+
|
299
|
+
take_next_and_stop = !(yield line)
|
300
|
+
true
|
301
|
+
end
|
302
|
+
end
|
303
|
+
end
|
304
|
+
end
|
@@ -0,0 +1,129 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "pathname"
|
4
|
+
require "optparse"
|
5
|
+
|
6
|
+
module SyntaxSuggest
|
7
|
+
# All the logic of the exe/syntax_suggest CLI in one handy spot
|
8
|
+
#
|
9
|
+
# Cli.new(argv: ["--help"]).call
|
10
|
+
# Cli.new(argv: ["<path/to/file>.rb"]).call
|
11
|
+
# Cli.new(argv: ["<path/to/file>.rb", "--record=tmp"]).call
|
12
|
+
# Cli.new(argv: ["<path/to/file>.rb", "--terminal"]).call
|
13
|
+
#
|
14
|
+
class Cli
|
15
|
+
attr_accessor :options
|
16
|
+
|
17
|
+
# ARGV is Everything passed to the executable, does not include executable name
|
18
|
+
#
|
19
|
+
# All other intputs are dependency injection for testing
|
20
|
+
def initialize(argv:, exit_obj: Kernel, io: $stdout, env: ENV)
|
21
|
+
@options = {}
|
22
|
+
@parser = nil
|
23
|
+
options[:record_dir] = env["SYNTAX_SUGGEST_RECORD_DIR"]
|
24
|
+
options[:record_dir] = "tmp" if env["DEBUG"]
|
25
|
+
options[:terminal] = SyntaxSuggest::DEFAULT_VALUE
|
26
|
+
|
27
|
+
@io = io
|
28
|
+
@argv = argv
|
29
|
+
@exit_obj = exit_obj
|
30
|
+
end
|
31
|
+
|
32
|
+
def call
|
33
|
+
if @argv.empty?
|
34
|
+
# Display help if raw command
|
35
|
+
parser.parse! %w[--help]
|
36
|
+
return
|
37
|
+
else
|
38
|
+
# Mutates @argv
|
39
|
+
parse
|
40
|
+
return if options[:exit]
|
41
|
+
end
|
42
|
+
|
43
|
+
file_name = @argv.first
|
44
|
+
if file_name.nil?
|
45
|
+
@io.puts "No file given"
|
46
|
+
@exit_obj.exit(1)
|
47
|
+
return
|
48
|
+
end
|
49
|
+
|
50
|
+
file = Pathname(file_name)
|
51
|
+
if !file.exist?
|
52
|
+
@io.puts "file not found: #{file.expand_path} "
|
53
|
+
@exit_obj.exit(1)
|
54
|
+
return
|
55
|
+
end
|
56
|
+
|
57
|
+
@io.puts "Record dir: #{options[:record_dir]}" if options[:record_dir]
|
58
|
+
|
59
|
+
display = SyntaxSuggest.call(
|
60
|
+
io: @io,
|
61
|
+
source: file.read,
|
62
|
+
filename: file.expand_path,
|
63
|
+
terminal: options.fetch(:terminal, SyntaxSuggest::DEFAULT_VALUE),
|
64
|
+
record_dir: options[:record_dir]
|
65
|
+
)
|
66
|
+
|
67
|
+
if display.document_ok?
|
68
|
+
@exit_obj.exit(0)
|
69
|
+
else
|
70
|
+
@exit_obj.exit(1)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def parse
|
75
|
+
parser.parse!(@argv)
|
76
|
+
|
77
|
+
self
|
78
|
+
end
|
79
|
+
|
80
|
+
def parser
|
81
|
+
@parser ||= OptionParser.new do |opts|
|
82
|
+
opts.banner = <<~EOM
|
83
|
+
Usage: syntax_suggest <file> [options]
|
84
|
+
|
85
|
+
Parses a ruby source file and searches for syntax error(s) such as
|
86
|
+
unexpected `end', expecting end-of-input.
|
87
|
+
|
88
|
+
Example:
|
89
|
+
|
90
|
+
$ syntax_suggest dog.rb
|
91
|
+
|
92
|
+
# ...
|
93
|
+
|
94
|
+
❯ 10 defdog
|
95
|
+
❯ 15 end
|
96
|
+
|
97
|
+
ENV options:
|
98
|
+
|
99
|
+
SYNTAX_SUGGEST_RECORD_DIR=<dir>
|
100
|
+
|
101
|
+
Records the steps used to search for a syntax error
|
102
|
+
to the given directory
|
103
|
+
|
104
|
+
Options:
|
105
|
+
EOM
|
106
|
+
|
107
|
+
opts.version = SyntaxSuggest::VERSION
|
108
|
+
|
109
|
+
opts.on("--help", "Help - displays this message") do |v|
|
110
|
+
@io.puts opts
|
111
|
+
options[:exit] = true
|
112
|
+
@exit_obj.exit
|
113
|
+
end
|
114
|
+
|
115
|
+
opts.on("--record <dir>", "Records the steps used to search for a syntax error to the given directory") do |v|
|
116
|
+
options[:record_dir] = v
|
117
|
+
end
|
118
|
+
|
119
|
+
opts.on("--terminal", "Enable terminal highlighting") do |v|
|
120
|
+
options[:terminal] = true
|
121
|
+
end
|
122
|
+
|
123
|
+
opts.on("--no-terminal", "Disable terminal highlighting") do |v|
|
124
|
+
options[:terminal] = false
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
@@ -0,0 +1,100 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SyntaxSuggest
|
4
|
+
# Multiple lines form a singular CodeBlock
|
5
|
+
#
|
6
|
+
# Source code is made of multiple CodeBlocks.
|
7
|
+
#
|
8
|
+
# Example:
|
9
|
+
#
|
10
|
+
# code_block.to_s # =>
|
11
|
+
# # def foo
|
12
|
+
# # puts "foo"
|
13
|
+
# # end
|
14
|
+
#
|
15
|
+
# code_block.valid? # => true
|
16
|
+
# code_block.in_valid? # => false
|
17
|
+
#
|
18
|
+
#
|
19
|
+
class CodeBlock
|
20
|
+
UNSET = Object.new.freeze
|
21
|
+
attr_reader :lines, :starts_at, :ends_at
|
22
|
+
|
23
|
+
def initialize(lines: [])
|
24
|
+
@lines = Array(lines)
|
25
|
+
@valid = UNSET
|
26
|
+
@deleted = false
|
27
|
+
@starts_at = @lines.first.number
|
28
|
+
@ends_at = @lines.last.number
|
29
|
+
end
|
30
|
+
|
31
|
+
def delete
|
32
|
+
@deleted = true
|
33
|
+
end
|
34
|
+
|
35
|
+
def deleted?
|
36
|
+
@deleted
|
37
|
+
end
|
38
|
+
|
39
|
+
def visible_lines
|
40
|
+
@lines.select(&:visible?).select(&:not_empty?)
|
41
|
+
end
|
42
|
+
|
43
|
+
def mark_invisible
|
44
|
+
@lines.map(&:mark_invisible)
|
45
|
+
end
|
46
|
+
|
47
|
+
def is_end?
|
48
|
+
to_s.strip == "end"
|
49
|
+
end
|
50
|
+
|
51
|
+
def hidden?
|
52
|
+
@lines.all?(&:hidden?)
|
53
|
+
end
|
54
|
+
|
55
|
+
# This is used for frontier ordering, we are searching from
|
56
|
+
# the largest indentation to the smallest. This allows us to
|
57
|
+
# populate an array with multiple code blocks then call `sort!`
|
58
|
+
# on it without having to specify the sorting criteria
|
59
|
+
def <=>(other)
|
60
|
+
out = current_indent <=> other.current_indent
|
61
|
+
return out if out != 0
|
62
|
+
|
63
|
+
# Stable sort
|
64
|
+
starts_at <=> other.starts_at
|
65
|
+
end
|
66
|
+
|
67
|
+
def current_indent
|
68
|
+
@current_indent ||= lines.select(&:not_empty?).map(&:indent).min || 0
|
69
|
+
end
|
70
|
+
|
71
|
+
def invalid?
|
72
|
+
!valid?
|
73
|
+
end
|
74
|
+
|
75
|
+
def valid?
|
76
|
+
if @valid == UNSET
|
77
|
+
# Performance optimization
|
78
|
+
#
|
79
|
+
# If all the lines were previously hidden
|
80
|
+
# and we expand to capture additional empty
|
81
|
+
# lines then the result cannot be invalid
|
82
|
+
#
|
83
|
+
# That means there's no reason to re-check all
|
84
|
+
# lines with ripper (which is expensive).
|
85
|
+
# Benchmark in commit message
|
86
|
+
@valid = if lines.all? { |l| l.hidden? || l.empty? }
|
87
|
+
true
|
88
|
+
else
|
89
|
+
SyntaxSuggest.valid?(lines.map(&:original).join)
|
90
|
+
end
|
91
|
+
else
|
92
|
+
@valid
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def to_s
|
97
|
+
@lines.join
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
@@ -0,0 +1,178 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SyntaxSuggest
|
4
|
+
# The main function of the frontier is to hold the edges of our search and to
|
5
|
+
# evaluate when we can stop searching.
|
6
|
+
|
7
|
+
# There are three main phases in the algorithm:
|
8
|
+
#
|
9
|
+
# 1. Sanitize/format input source
|
10
|
+
# 2. Search for invalid blocks
|
11
|
+
# 3. Format invalid blocks into something meaninful
|
12
|
+
#
|
13
|
+
# The Code frontier is a critical part of the second step
|
14
|
+
#
|
15
|
+
# ## Knowing where we've been
|
16
|
+
#
|
17
|
+
# Once a code block is generated it is added onto the frontier. Then it will be
|
18
|
+
# sorted by indentation and frontier can be filtered. Large blocks that fully enclose a
|
19
|
+
# smaller block will cause the smaller block to be evicted.
|
20
|
+
#
|
21
|
+
# CodeFrontier#<<(block) # Adds block to frontier
|
22
|
+
# CodeFrontier#pop # Removes block from frontier
|
23
|
+
#
|
24
|
+
# ## Knowing where we can go
|
25
|
+
#
|
26
|
+
# Internally the frontier keeps track of "unvisited" lines which are exposed via `next_indent_line`
|
27
|
+
# when called, this method returns, a line of code with the highest indentation.
|
28
|
+
#
|
29
|
+
# The returned line of code can be used to build a CodeBlock and then that code block
|
30
|
+
# is added back to the frontier. Then, the lines are removed from the
|
31
|
+
# "unvisited" so we don't double-create the same block.
|
32
|
+
#
|
33
|
+
# CodeFrontier#next_indent_line # Shows next line
|
34
|
+
# CodeFrontier#register_indent_block(block) # Removes lines from unvisited
|
35
|
+
#
|
36
|
+
# ## Knowing when to stop
|
37
|
+
#
|
38
|
+
# The frontier knows how to check the entire document for a syntax error. When blocks
|
39
|
+
# are added onto the frontier, they're removed from the document. When all code containing
|
40
|
+
# syntax errors has been added to the frontier, the document will be parsable without a
|
41
|
+
# syntax error and the search can stop.
|
42
|
+
#
|
43
|
+
# CodeFrontier#holds_all_syntax_errors? # Returns true when frontier holds all syntax errors
|
44
|
+
#
|
45
|
+
# ## Filtering false positives
|
46
|
+
#
|
47
|
+
# Once the search is completed, the frontier may have multiple blocks that do not contain
|
48
|
+
# the syntax error. To limit the result to the smallest subset of "invalid blocks" call:
|
49
|
+
#
|
50
|
+
# CodeFrontier#detect_invalid_blocks
|
51
|
+
#
|
52
|
+
class CodeFrontier
|
53
|
+
def initialize(code_lines:, unvisited: UnvisitedLines.new(code_lines: code_lines))
|
54
|
+
@code_lines = code_lines
|
55
|
+
@unvisited = unvisited
|
56
|
+
@queue = PriorityEngulfQueue.new
|
57
|
+
|
58
|
+
@check_next = true
|
59
|
+
end
|
60
|
+
|
61
|
+
def count
|
62
|
+
@queue.length
|
63
|
+
end
|
64
|
+
|
65
|
+
# Performance optimization
|
66
|
+
#
|
67
|
+
# Parsing with ripper is expensive
|
68
|
+
# If we know we don't have any blocks with invalid
|
69
|
+
# syntax, then we know we cannot have found
|
70
|
+
# the incorrect syntax yet.
|
71
|
+
#
|
72
|
+
# When an invalid block is added onto the frontier
|
73
|
+
# check document state
|
74
|
+
private def can_skip_check?
|
75
|
+
check_next = @check_next
|
76
|
+
@check_next = false
|
77
|
+
|
78
|
+
if check_next
|
79
|
+
false
|
80
|
+
else
|
81
|
+
true
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
# Returns true if the document is valid with all lines
|
86
|
+
# removed. By default it checks all blocks in present in
|
87
|
+
# the frontier array, but can be used for arbitrary arrays
|
88
|
+
# of codeblocks as well
|
89
|
+
def holds_all_syntax_errors?(block_array = @queue, can_cache: true)
|
90
|
+
return false if can_cache && can_skip_check?
|
91
|
+
|
92
|
+
without_lines = block_array.to_a.flat_map do |block|
|
93
|
+
block.lines
|
94
|
+
end
|
95
|
+
|
96
|
+
SyntaxSuggest.valid_without?(
|
97
|
+
without_lines: without_lines,
|
98
|
+
code_lines: @code_lines
|
99
|
+
)
|
100
|
+
end
|
101
|
+
|
102
|
+
# Returns a code block with the largest indentation possible
|
103
|
+
def pop
|
104
|
+
@queue.pop
|
105
|
+
end
|
106
|
+
|
107
|
+
def next_indent_line
|
108
|
+
@unvisited.peek
|
109
|
+
end
|
110
|
+
|
111
|
+
def expand?
|
112
|
+
return false if @queue.empty?
|
113
|
+
return true if @unvisited.empty?
|
114
|
+
|
115
|
+
frontier_indent = @queue.peek.current_indent
|
116
|
+
unvisited_indent = next_indent_line.indent
|
117
|
+
|
118
|
+
if ENV["SYNTAX_SUGGEST_DEBUG"]
|
119
|
+
puts "```"
|
120
|
+
puts @queue.peek.to_s
|
121
|
+
puts "```"
|
122
|
+
puts " @frontier indent: #{frontier_indent}"
|
123
|
+
puts " @unvisited indent: #{unvisited_indent}"
|
124
|
+
end
|
125
|
+
|
126
|
+
# Expand all blocks before moving to unvisited lines
|
127
|
+
frontier_indent >= unvisited_indent
|
128
|
+
end
|
129
|
+
|
130
|
+
# Keeps track of what lines have been added to blocks and which are not yet
|
131
|
+
# visited.
|
132
|
+
def register_indent_block(block)
|
133
|
+
@unvisited.visit_block(block)
|
134
|
+
self
|
135
|
+
end
|
136
|
+
|
137
|
+
# When one element fully encapsulates another we remove the smaller
|
138
|
+
# block from the frontier. This prevents double expansions and all-around
|
139
|
+
# weird behavior. However this guarantee is quite expensive to maintain
|
140
|
+
def register_engulf_block(block)
|
141
|
+
end
|
142
|
+
|
143
|
+
# Add a block to the frontier
|
144
|
+
#
|
145
|
+
# This method ensures the frontier always remains sorted (in indentation order)
|
146
|
+
# and that each code block's lines are removed from the indentation hash so we
|
147
|
+
# don't re-evaluate the same line multiple times.
|
148
|
+
def <<(block)
|
149
|
+
@unvisited.visit_block(block)
|
150
|
+
|
151
|
+
@queue.push(block)
|
152
|
+
|
153
|
+
@check_next = true if block.invalid?
|
154
|
+
|
155
|
+
self
|
156
|
+
end
|
157
|
+
|
158
|
+
# Example:
|
159
|
+
#
|
160
|
+
# combination([:a, :b, :c, :d])
|
161
|
+
# # => [[:a], [:b], [:c], [:d], [:a, :b], [:a, :c], [:a, :d], [:b, :c], [:b, :d], [:c, :d], [:a, :b, :c], [:a, :b, :d], [:a, :c, :d], [:b, :c, :d], [:a, :b, :c, :d]]
|
162
|
+
def self.combination(array)
|
163
|
+
guesses = []
|
164
|
+
1.upto(array.length).each do |size|
|
165
|
+
guesses.concat(array.combination(size).to_a)
|
166
|
+
end
|
167
|
+
guesses
|
168
|
+
end
|
169
|
+
|
170
|
+
# Given that we know our syntax error exists somewhere in our frontier, we want to find
|
171
|
+
# the smallest possible set of blocks that contain all the syntax errors
|
172
|
+
def detect_invalid_blocks
|
173
|
+
self.class.combination(@queue.to_a.select(&:invalid?)).detect do |block_array|
|
174
|
+
holds_all_syntax_errors?(block_array, can_cache: false)
|
175
|
+
end || []
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|