lexer_kit 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +157 -0
- data/exe/lexer_kit +7 -0
- data/ext/lexer_kit_rust/Cargo.toml +17 -0
- data/ext/lexer_kit_rust/extconf.rb +6 -0
- data/ext/lexer_kit_rust/src/deserializer.rs +213 -0
- data/ext/lexer_kit_rust/src/dfa.rs +217 -0
- data/ext/lexer_kit_rust/src/fast_stream.rs +468 -0
- data/ext/lexer_kit_rust/src/lib.rs +248 -0
- data/ext/lexer_kit_rust/src/opcodes.rs +718 -0
- data/ext/lexer_kit_rust/src/safety_test.rs +498 -0
- data/ext/lexer_kit_rust/src/trie.rs +206 -0
- data/ext/lexer_kit_rust/src/types.rs +319 -0
- data/ext/lexer_kit_rust/src/vm.rs +258 -0
- data/lib/lexer_kit/builder/compiler.rb +596 -0
- data/lib/lexer_kit/builder/conflict_detector.rb +209 -0
- data/lib/lexer_kit/builder/mode_def.rb +36 -0
- data/lib/lexer_kit/builder/token_def.rb +65 -0
- data/lib/lexer_kit/builder/validator.rb +84 -0
- data/lib/lexer_kit/builder.rb +230 -0
- data/lib/lexer_kit/cli/commands.rb +389 -0
- data/lib/lexer_kit/cli.rb +88 -0
- data/lib/lexer_kit/core/diagnostic.rb +103 -0
- data/lib/lexer_kit/core/source.rb +154 -0
- data/lib/lexer_kit/core/span.rb +80 -0
- data/lib/lexer_kit/core/token.rb +120 -0
- data/lib/lexer_kit/core.rb +13 -0
- data/lib/lexer_kit/debug/disassembler.rb +143 -0
- data/lib/lexer_kit/debug/visualizer.rb +203 -0
- data/lib/lexer_kit/debug.rb +11 -0
- data/lib/lexer_kit/dfa/byte_class_builder.rb +69 -0
- data/lib/lexer_kit/dfa/case_folding.rb +45 -0
- data/lib/lexer_kit/dfa/char_class_collector.rb +81 -0
- data/lib/lexer_kit/dfa/dfa_builder.rb +95 -0
- data/lib/lexer_kit/dfa/dfa_minimizer.rb +158 -0
- data/lib/lexer_kit/dfa/nfa.rb +304 -0
- data/lib/lexer_kit/dfa/regex_ast.rb +64 -0
- data/lib/lexer_kit/dfa/regex_parser.rb +385 -0
- data/lib/lexer_kit/dfa/utf8_range.rb +175 -0
- data/lib/lexer_kit/dfa/utf8_range_pattern.rb +17 -0
- data/lib/lexer_kit/dfa.rb +37 -0
- data/lib/lexer_kit/errors.rb +76 -0
- data/lib/lexer_kit/format/lkb1/decoder.rb +126 -0
- data/lib/lexer_kit/format/lkb1.rb +199 -0
- data/lib/lexer_kit/format/lkt1.rb +111 -0
- data/lib/lexer_kit/format.rb +19 -0
- data/lib/lexer_kit/ir/compiled_program.rb +228 -0
- data/lib/lexer_kit/ir/constant_pool.rb +107 -0
- data/lib/lexer_kit/ir/dfa_table.rb +125 -0
- data/lib/lexer_kit/ir/instruction.rb +50 -0
- data/lib/lexer_kit/ir/jump_table.rb +94 -0
- data/lib/lexer_kit/ir/keyword_table.rb +168 -0
- data/lib/lexer_kit/ir/opcode.rb +96 -0
- data/lib/lexer_kit/ir/serializer.rb +249 -0
- data/lib/lexer_kit/ir.rb +16 -0
- data/lib/lexer_kit/runner.rb +114 -0
- data/lib/lexer_kit/trie.rb +170 -0
- data/lib/lexer_kit/version.rb +5 -0
- data/lib/lexer_kit.rb +155 -0
- metadata +119 -0
|
@@ -0,0 +1,389 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LexerKit
|
|
4
|
+
module CLI
|
|
5
|
+
module Commands
|
|
6
|
+
# Compile DSL to .lkt1 or .lkb1 binary format
|
|
7
|
+
module Compile
|
|
8
|
+
def self.run(argv)
|
|
9
|
+
options = { output: nil, verbose: false, dry_run: false }
|
|
10
|
+
|
|
11
|
+
parser = OptionParser.new do |opts|
|
|
12
|
+
opts.banner = "Usage: lexer_kit compile [options] <file.rb>"
|
|
13
|
+
|
|
14
|
+
opts.on("-o", "--output FILE", "Output file (default: <input>.lkt1)") do |v|
|
|
15
|
+
options[:output] = v
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
opts.on("--verbose", "Show conflict warnings") do
|
|
19
|
+
options[:verbose] = true
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
opts.on("-n", "--dry-run", "Check for conflicts without generating output") do
|
|
23
|
+
options[:dry_run] = true
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
opts.on("-h", "--help", "Show this help") do
|
|
27
|
+
puts opts
|
|
28
|
+
return 0
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
parser.parse!(argv)
|
|
33
|
+
|
|
34
|
+
if argv.empty?
|
|
35
|
+
warn "error: No input file specified"
|
|
36
|
+
warn parser.banner
|
|
37
|
+
return 1
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
path = argv.shift
|
|
41
|
+
unless File.exist?(path)
|
|
42
|
+
warn "error: File not found: #{path}"
|
|
43
|
+
return 1
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Load builder
|
|
47
|
+
builder = LexerKit.load_builder(path)
|
|
48
|
+
|
|
49
|
+
# Check for conflicts with --verbose or --dry-run
|
|
50
|
+
if options[:verbose] || options[:dry_run]
|
|
51
|
+
conflicts = builder.check_conflicts
|
|
52
|
+
if conflicts.any?
|
|
53
|
+
conflicts.each do |c|
|
|
54
|
+
warn "warning: #{c.token1} vs #{c.token2}: #{c.description}"
|
|
55
|
+
end
|
|
56
|
+
warn ""
|
|
57
|
+
warn "#{conflicts.size} potential conflict(s) found"
|
|
58
|
+
elsif options[:dry_run]
|
|
59
|
+
warn "No conflicts found."
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Exit early if dry-run
|
|
64
|
+
if options[:dry_run]
|
|
65
|
+
return 0
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Compile
|
|
69
|
+
compiled = builder.compile
|
|
70
|
+
|
|
71
|
+
# Write output
|
|
72
|
+
output_path = options[:output] || path.sub(/\.rb$/, ".lkt1")
|
|
73
|
+
if output_path.end_with?(".lkb1")
|
|
74
|
+
Format::LKB1.save(compiled, path: output_path)
|
|
75
|
+
else
|
|
76
|
+
Format::LKT1.save(compiled, path: output_path)
|
|
77
|
+
end
|
|
78
|
+
warn "Compiled: #{output_path}"
|
|
79
|
+
|
|
80
|
+
0
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Show lexer information
|
|
85
|
+
module Info
|
|
86
|
+
def self.run(argv)
|
|
87
|
+
parser = OptionParser.new do |opts|
|
|
88
|
+
opts.banner = "Usage: lexer_kit info <file>"
|
|
89
|
+
|
|
90
|
+
opts.on("-h", "--help", "Show this help") do
|
|
91
|
+
puts opts
|
|
92
|
+
return 0
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
parser.parse!(argv)
|
|
97
|
+
|
|
98
|
+
if argv.empty?
|
|
99
|
+
warn "error: No input file specified"
|
|
100
|
+
warn parser.banner
|
|
101
|
+
return 1
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
path = argv.shift
|
|
105
|
+
lexer = CLI.load_lexer(path)
|
|
106
|
+
|
|
107
|
+
puts "LexerKit Compiled Program"
|
|
108
|
+
puts " Version: #{lexer.version}"
|
|
109
|
+
puts " Native: #{LexerKit.native? ? 'enabled' : 'disabled'}"
|
|
110
|
+
puts
|
|
111
|
+
|
|
112
|
+
puts "Tokens (#{lexer.tokens.size}):"
|
|
113
|
+
lexer.tokens.each_with_index do |name, id|
|
|
114
|
+
puts " #{id.to_s.rjust(3)}: #{name}"
|
|
115
|
+
end
|
|
116
|
+
puts
|
|
117
|
+
|
|
118
|
+
puts "Modes (#{lexer.modes.size}):"
|
|
119
|
+
lexer.modes.each do |name|
|
|
120
|
+
offset = lexer.mode_offset(name)
|
|
121
|
+
puts " - #{name} (offset: #{offset})"
|
|
122
|
+
end
|
|
123
|
+
puts
|
|
124
|
+
|
|
125
|
+
puts "Statistics:"
|
|
126
|
+
puts " Instructions: #{lexer.instructions.size}"
|
|
127
|
+
puts " DFA tables: #{lexer.dfa_tables.size}"
|
|
128
|
+
puts " Jump tables: #{lexer.jump_tables.size}"
|
|
129
|
+
puts " Keywords: #{lexer.keyword_tables.size}"
|
|
130
|
+
puts " Constants: #{lexer.constant_pool.size}"
|
|
131
|
+
|
|
132
|
+
# Calculate binary size
|
|
133
|
+
binary_size = lexer.to_binary.bytesize
|
|
134
|
+
puts " Binary size: #{format_size(binary_size)}"
|
|
135
|
+
|
|
136
|
+
0
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def self.format_size(bytes)
|
|
140
|
+
if bytes < 1024
|
|
141
|
+
"#{bytes} bytes"
|
|
142
|
+
elsif bytes < 1024 * 1024
|
|
143
|
+
"#{(bytes / 1024.0).round(1)} KB"
|
|
144
|
+
else
|
|
145
|
+
"#{(bytes / 1024.0 / 1024.0).round(2)} MB"
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
# Tokenize input
|
|
151
|
+
module Lex
|
|
152
|
+
def self.run(argv)
|
|
153
|
+
options = { format: "table", color: $stdout.tty? }
|
|
154
|
+
|
|
155
|
+
parser = OptionParser.new do |opts|
|
|
156
|
+
opts.banner = "Usage: lexer_kit lex <lexer> [input]"
|
|
157
|
+
|
|
158
|
+
opts.on("-f", "--format FORMAT", %w[table json simple], "Output format (table, json, simple)") do |v|
|
|
159
|
+
options[:format] = v
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
opts.on("--no-color", "Disable colored output") do
|
|
163
|
+
options[:color] = false
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
opts.on("-h", "--help", "Show this help") do
|
|
167
|
+
puts opts
|
|
168
|
+
return 0
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
parser.parse!(argv)
|
|
173
|
+
|
|
174
|
+
if argv.empty?
|
|
175
|
+
warn "error: No lexer file specified"
|
|
176
|
+
warn parser.banner
|
|
177
|
+
return 1
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
lexer_path = argv.shift
|
|
181
|
+
lexer = CLI.load_lexer(lexer_path)
|
|
182
|
+
|
|
183
|
+
# Read input
|
|
184
|
+
input = if argv.empty?
|
|
185
|
+
$stdin.read
|
|
186
|
+
else
|
|
187
|
+
File.read(argv.shift)
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
# Collect tokens
|
|
191
|
+
tokens = []
|
|
192
|
+
source = Core::Source.new(input)
|
|
193
|
+
source.line_index!
|
|
194
|
+
|
|
195
|
+
lexer.lowlevel_each(input) do |tok_id, start, len|
|
|
196
|
+
line, col = source.line_col(start)
|
|
197
|
+
tokens << {
|
|
198
|
+
id: tok_id,
|
|
199
|
+
name: lexer.token_name(tok_id),
|
|
200
|
+
text: input.byteslice(start, len),
|
|
201
|
+
line: line,
|
|
202
|
+
col: col,
|
|
203
|
+
start: start,
|
|
204
|
+
len: len
|
|
205
|
+
}
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
# Output
|
|
209
|
+
case options[:format]
|
|
210
|
+
when "json"
|
|
211
|
+
output_json(tokens)
|
|
212
|
+
when "simple"
|
|
213
|
+
output_simple(tokens)
|
|
214
|
+
else
|
|
215
|
+
output_table(tokens, options[:color])
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
0
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
def self.output_json(tokens)
|
|
222
|
+
result = tokens.map do |t|
|
|
223
|
+
{
|
|
224
|
+
token: t[:name].to_s,
|
|
225
|
+
text: t[:text],
|
|
226
|
+
line: t[:line],
|
|
227
|
+
col: t[:col],
|
|
228
|
+
start: t[:start],
|
|
229
|
+
len: t[:len]
|
|
230
|
+
}
|
|
231
|
+
end
|
|
232
|
+
puts JSON.pretty_generate(result)
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
def self.output_simple(tokens)
|
|
236
|
+
tokens.each do |t|
|
|
237
|
+
puts "#{t[:name]} #{t[:text].inspect}"
|
|
238
|
+
end
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
def self.output_table(tokens, color)
|
|
242
|
+
# Header
|
|
243
|
+
puts " LINE:COL TOKEN TEXT"
|
|
244
|
+
puts " -------- --------------- --------------------"
|
|
245
|
+
|
|
246
|
+
tokens.each do |t|
|
|
247
|
+
pos = "#{t[:line]}:#{t[:col]}"
|
|
248
|
+
name = t[:name].to_s
|
|
249
|
+
text = truncate(t[:text].inspect, 40)
|
|
250
|
+
|
|
251
|
+
if color
|
|
252
|
+
puts " #{pos.rjust(8)} \e[36m#{name.ljust(15)}\e[0m #{text}"
|
|
253
|
+
else
|
|
254
|
+
puts " #{pos.rjust(8)} #{name.ljust(15)} #{text}"
|
|
255
|
+
end
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
puts
|
|
259
|
+
puts "#{tokens.size} tokens"
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
def self.truncate(str, max)
|
|
263
|
+
if str.length > max
|
|
264
|
+
"#{str[0, max - 3]}..."
|
|
265
|
+
else
|
|
266
|
+
str
|
|
267
|
+
end
|
|
268
|
+
end
|
|
269
|
+
end
|
|
270
|
+
|
|
271
|
+
# Disassemble a .lkt1 file
|
|
272
|
+
module Disasm
|
|
273
|
+
def self.run(argv)
|
|
274
|
+
options = { dfa: nil, jump: nil, keyword: nil }
|
|
275
|
+
|
|
276
|
+
parser = OptionParser.new do |opts|
|
|
277
|
+
opts.banner = "Usage: lexer_kit disasm [options] <file.lkt1|file.lkb1>"
|
|
278
|
+
|
|
279
|
+
opts.on("--dfa INDEX", Integer, "Show DFA table at INDEX") do |v|
|
|
280
|
+
options[:dfa] = v
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
opts.on("--jump INDEX", Integer, "Show jump table at INDEX") do |v|
|
|
284
|
+
options[:jump] = v
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
opts.on("--keyword INDEX", Integer, "Show keyword table at INDEX") do |v|
|
|
288
|
+
options[:keyword] = v
|
|
289
|
+
end
|
|
290
|
+
|
|
291
|
+
opts.on("-h", "--help", "Show this help") do
|
|
292
|
+
puts opts
|
|
293
|
+
return 0
|
|
294
|
+
end
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
parser.parse!(argv)
|
|
298
|
+
|
|
299
|
+
if argv.empty?
|
|
300
|
+
warn "error: No input file specified"
|
|
301
|
+
warn parser.banner
|
|
302
|
+
return 1
|
|
303
|
+
end
|
|
304
|
+
|
|
305
|
+
path = argv.shift
|
|
306
|
+
program = CLI.load_lexer(path)
|
|
307
|
+
|
|
308
|
+
require "lexer_kit/debug"
|
|
309
|
+
|
|
310
|
+
if options[:dfa]
|
|
311
|
+
dfa = program.dfa_tables[options[:dfa]]
|
|
312
|
+
raise ArgumentError, "DFA table #{options[:dfa]} not found" unless dfa
|
|
313
|
+
|
|
314
|
+
puts Debug::Visualizer.format_dfa(dfa, program: program)
|
|
315
|
+
elsif options[:jump]
|
|
316
|
+
table = program.jump_tables[options[:jump]]
|
|
317
|
+
raise ArgumentError, "Jump table #{options[:jump]} not found" unless table
|
|
318
|
+
|
|
319
|
+
puts Debug::Visualizer.format_jump_table(table)
|
|
320
|
+
elsif options[:keyword]
|
|
321
|
+
table = program.keyword_tables[options[:keyword]]
|
|
322
|
+
raise ArgumentError, "Keyword table #{options[:keyword]} not found" unless table
|
|
323
|
+
|
|
324
|
+
puts Debug::Visualizer.format_keyword_table(table, program: program)
|
|
325
|
+
else
|
|
326
|
+
puts Debug::Disassembler.new(program).disassemble
|
|
327
|
+
end
|
|
328
|
+
|
|
329
|
+
0
|
|
330
|
+
end
|
|
331
|
+
end
|
|
332
|
+
|
|
333
|
+
# Visualize DFA as Graphviz DOT
|
|
334
|
+
module Visualize
|
|
335
|
+
def self.run(argv)
|
|
336
|
+
options = { dfa: 0, output: nil }
|
|
337
|
+
|
|
338
|
+
parser = OptionParser.new do |opts|
|
|
339
|
+
opts.banner = "Usage: lexer_kit visualize [options] <file.lkt1|file.lkb1>"
|
|
340
|
+
|
|
341
|
+
opts.on("--dfa INDEX", Integer, "DFA table index (default: 0)") do |v|
|
|
342
|
+
options[:dfa] = v
|
|
343
|
+
end
|
|
344
|
+
|
|
345
|
+
opts.on("-o", "--output FILE", "Output file (default: stdout)") do |v|
|
|
346
|
+
options[:output] = v
|
|
347
|
+
end
|
|
348
|
+
|
|
349
|
+
opts.on("-h", "--help", "Show this help") do
|
|
350
|
+
puts opts
|
|
351
|
+
puts
|
|
352
|
+
puts "Examples:"
|
|
353
|
+
puts " lexer_kit visualize lexer.lkt1 | dot -Tpng -o dfa.png"
|
|
354
|
+
puts " lexer_kit visualize --dfa 1 lexer.lkt1 -o dfa.dot"
|
|
355
|
+
return 0
|
|
356
|
+
end
|
|
357
|
+
end
|
|
358
|
+
|
|
359
|
+
parser.parse!(argv)
|
|
360
|
+
|
|
361
|
+
if argv.empty?
|
|
362
|
+
warn "error: No input file specified"
|
|
363
|
+
warn parser.banner
|
|
364
|
+
return 1
|
|
365
|
+
end
|
|
366
|
+
|
|
367
|
+
path = argv.shift
|
|
368
|
+
program = CLI.load_lexer(path)
|
|
369
|
+
|
|
370
|
+
require "lexer_kit/debug"
|
|
371
|
+
|
|
372
|
+
dfa = program.dfa_tables[options[:dfa]]
|
|
373
|
+
raise ArgumentError, "DFA table #{options[:dfa]} not found" unless dfa
|
|
374
|
+
|
|
375
|
+
dot = Debug::Visualizer.dfa_to_dot(dfa, program: program)
|
|
376
|
+
|
|
377
|
+
if options[:output]
|
|
378
|
+
File.write(options[:output], dot)
|
|
379
|
+
warn "Written: #{options[:output]}"
|
|
380
|
+
else
|
|
381
|
+
puts dot
|
|
382
|
+
end
|
|
383
|
+
|
|
384
|
+
0
|
|
385
|
+
end
|
|
386
|
+
end
|
|
387
|
+
end
|
|
388
|
+
end
|
|
389
|
+
end
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "optparse"
|
|
4
|
+
require "json"
|
|
5
|
+
|
|
6
|
+
module LexerKit
|
|
7
|
+
# Command-line interface for LexerKit
|
|
8
|
+
module CLI
|
|
9
|
+
VERSION = LexerKit::VERSION
|
|
10
|
+
|
|
11
|
+
class << self
|
|
12
|
+
def run(argv)
|
|
13
|
+
if argv.empty? || argv[0] == "-h" || argv[0] == "--help"
|
|
14
|
+
print_help
|
|
15
|
+
return 0
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
if ["-v", "--version"].include?(argv[0])
|
|
19
|
+
puts "lexer_kit #{VERSION}"
|
|
20
|
+
return 0
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
command = argv.shift
|
|
24
|
+
case command
|
|
25
|
+
when "compile"
|
|
26
|
+
Commands::Compile.run(argv)
|
|
27
|
+
when "lex"
|
|
28
|
+
Commands::Lex.run(argv)
|
|
29
|
+
when "info"
|
|
30
|
+
Commands::Info.run(argv)
|
|
31
|
+
when "disasm"
|
|
32
|
+
Commands::Disasm.run(argv)
|
|
33
|
+
when "visualize"
|
|
34
|
+
Commands::Visualize.run(argv)
|
|
35
|
+
else
|
|
36
|
+
warn "Unknown command: #{command}"
|
|
37
|
+
warn "Run 'lexer_kit --help' for usage."
|
|
38
|
+
1
|
|
39
|
+
end
|
|
40
|
+
rescue StandardError => e
|
|
41
|
+
render_error(e)
|
|
42
|
+
1
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def render_error(error, color: $stderr.tty?)
|
|
46
|
+
if error.respond_to?(:render)
|
|
47
|
+
warn error.render(color: color)
|
|
48
|
+
else
|
|
49
|
+
warn "error: #{error.message}"
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def print_help
|
|
54
|
+
puts <<~HELP
|
|
55
|
+
Usage: lexer_kit <command> [options] [arguments]
|
|
56
|
+
|
|
57
|
+
Commands:
|
|
58
|
+
compile Compile DSL file to .lkt1 or .lkb1
|
|
59
|
+
lex Tokenize input with a lexer
|
|
60
|
+
info Show lexer information
|
|
61
|
+
disasm Disassemble a .lkt1 or .lkb1 file
|
|
62
|
+
visualize Output DFA as Graphviz DOT format
|
|
63
|
+
|
|
64
|
+
Options:
|
|
65
|
+
-h, --help Show this help
|
|
66
|
+
-v, --version Show version
|
|
67
|
+
|
|
68
|
+
Examples:
|
|
69
|
+
lexer_kit compile json_lexer.rb
|
|
70
|
+
lexer_kit compile json_lexer.rb --verbose # show conflict warnings
|
|
71
|
+
lexer_kit compile json_lexer.rb --dry-run # check conflicts only
|
|
72
|
+
lexer_kit lex json_lexer.lkt1 data.json
|
|
73
|
+
lexer_kit info json_lexer.lkt1
|
|
74
|
+
lexer_kit lex json_lexer.lkb1 data.json
|
|
75
|
+
lexer_kit info json_lexer.lkb1
|
|
76
|
+
lexer_kit visualize json_lexer.lkt1 | dot -Tpng -o dfa.png
|
|
77
|
+
HELP
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Load lexer from .lkt1 or .lkb1 file
|
|
82
|
+
def self.load_lexer(path)
|
|
83
|
+
LexerKit.load_lexer(path)
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
require_relative "cli/commands"
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LexerKit
|
|
4
|
+
module Core
|
|
5
|
+
# Diagnostic holds error information with position and notes.
|
|
6
|
+
# It can render a human-readable error message with source context.
|
|
7
|
+
class Diagnostic
|
|
8
|
+
attr_reader :level, :message, :span, :notes
|
|
9
|
+
|
|
10
|
+
LEVEL_LABELS = {
|
|
11
|
+
error: "error",
|
|
12
|
+
warning: "warning",
|
|
13
|
+
note: "note"
|
|
14
|
+
}.freeze
|
|
15
|
+
|
|
16
|
+
LEVEL_COLORS = {
|
|
17
|
+
error: "\e[1;31m", # bold red
|
|
18
|
+
warning: "\e[1;33m", # bold yellow
|
|
19
|
+
note: "\e[1;36m" # bold cyan
|
|
20
|
+
}.freeze
|
|
21
|
+
|
|
22
|
+
RESET = "\e[0m"
|
|
23
|
+
BOLD = "\e[1m"
|
|
24
|
+
BLUE = "\e[34m"
|
|
25
|
+
|
|
26
|
+
# @param level [Symbol] :error, :warning, or :note
|
|
27
|
+
# @param message [String] main error message
|
|
28
|
+
# @param span [Span] location in source
|
|
29
|
+
# @param notes [Array<String>, nil] additional notes
|
|
30
|
+
def initialize(level:, message:, span:, notes: nil)
|
|
31
|
+
@level = level
|
|
32
|
+
@message = message
|
|
33
|
+
@span = span
|
|
34
|
+
@notes = notes&.freeze
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Render the diagnostic with source context
|
|
38
|
+
# @param source [Source]
|
|
39
|
+
# @param color [Boolean] enable ANSI colors
|
|
40
|
+
# @return [String]
|
|
41
|
+
def render(source, color: $stdout.tty?)
|
|
42
|
+
lines = []
|
|
43
|
+
line_num, col = source.line_col(@span.start)
|
|
44
|
+
|
|
45
|
+
# Header: filename:line:col: level: message
|
|
46
|
+
loc = source.filename ? "#{source.filename}:" : ""
|
|
47
|
+
loc += "#{line_num}:#{col}"
|
|
48
|
+
|
|
49
|
+
if color
|
|
50
|
+
level_str = "#{LEVEL_COLORS[@level]}#{LEVEL_LABELS[@level]}#{RESET}"
|
|
51
|
+
lines << "#{BOLD}#{loc}:#{RESET} #{level_str}: #{BOLD}#{@message}#{RESET}"
|
|
52
|
+
else
|
|
53
|
+
lines << "#{loc}: #{LEVEL_LABELS[@level]}: #{@message}"
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Source line
|
|
57
|
+
line_content = source.line_slice(line_num)
|
|
58
|
+
if line_content
|
|
59
|
+
line_num_str = line_num.to_s
|
|
60
|
+
gutter_width = line_num_str.length
|
|
61
|
+
|
|
62
|
+
# Line number gutter
|
|
63
|
+
lines << if color
|
|
64
|
+
" #{BLUE}#{line_num_str} |#{RESET} #{line_content}"
|
|
65
|
+
else
|
|
66
|
+
" #{line_num_str} | #{line_content}"
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Caret line
|
|
70
|
+
highlight_len = [@span.len, line_content.length - col + 1].min
|
|
71
|
+
highlight_len = 1 if highlight_len < 1
|
|
72
|
+
carets = "^" + ("~" * (highlight_len - 1)) # rubocop:disable Style/StringConcatenation
|
|
73
|
+
padding = " " * (col - 1)
|
|
74
|
+
|
|
75
|
+
lines << if color
|
|
76
|
+
" #{' ' * gutter_width} #{BLUE}|#{RESET} #{padding}#{LEVEL_COLORS[@level]}#{carets}#{RESET}"
|
|
77
|
+
else
|
|
78
|
+
" #{' ' * gutter_width} | #{padding}#{carets}"
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# Notes
|
|
83
|
+
@notes&.each do |note|
|
|
84
|
+
lines << if color
|
|
85
|
+
" #{BOLD}note:#{RESET} #{note}"
|
|
86
|
+
else
|
|
87
|
+
" note: #{note}"
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
lines.join("\n")
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def to_s
|
|
95
|
+
"#{LEVEL_LABELS[@level]}: #{@message} at #{@span}"
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def inspect
|
|
99
|
+
"#<LexerKit::Core::Diagnostic #{@level} #{@span}: #{@message.inspect}>"
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
end
|