lrama 0.5.8 → 0.5.10
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/test.yaml +6 -1
- data/.gitignore +7 -4
- data/Gemfile +10 -6
- data/README.md +3 -3
- data/Rakefile +15 -7
- data/Steepfile +15 -1
- data/lib/lrama/command.rb +6 -1
- data/lib/lrama/context.rb +1 -3
- data/lib/lrama/counterexamples/path.rb +0 -46
- data/lib/lrama/counterexamples/production_path.rb +17 -0
- data/lib/lrama/counterexamples/start_path.rb +21 -0
- data/lib/lrama/counterexamples/transition_path.rb +17 -0
- data/lib/lrama/counterexamples.rb +3 -0
- data/lib/lrama/grammar/code/initial_action_code.rb +28 -0
- data/lib/lrama/grammar/code/no_reference_code.rb +24 -0
- data/lib/lrama/grammar/code/printer_code.rb +34 -0
- data/lib/lrama/grammar/code/rule_action.rb +62 -0
- data/lib/lrama/grammar/code.rb +9 -93
- data/lib/lrama/grammar/counter.rb +15 -0
- data/lib/lrama/grammar/error_token.rb +3 -3
- data/lib/lrama/grammar/parameterizing_rules/builder/base.rb +28 -0
- data/lib/lrama/grammar/parameterizing_rules/builder/list.rb +20 -0
- data/lib/lrama/grammar/parameterizing_rules/builder/nonempty_list.rb +20 -0
- data/lib/lrama/grammar/parameterizing_rules/builder/option.rb +20 -0
- data/lib/lrama/grammar/parameterizing_rules/builder/separated_list.rb +28 -0
- data/lib/lrama/grammar/parameterizing_rules/builder/separated_nonempty_list.rb +27 -0
- data/lib/lrama/grammar/parameterizing_rules/builder.rb +43 -0
- data/lib/lrama/grammar/percent_code.rb +12 -0
- data/lib/lrama/grammar/printer.rb +3 -3
- data/lib/lrama/grammar/reference.rb +7 -16
- data/lib/lrama/grammar/rule.rb +18 -2
- data/lib/lrama/grammar/rule_builder.rb +179 -0
- data/lib/lrama/grammar/symbol.rb +2 -2
- data/lib/lrama/grammar.rb +132 -302
- data/lib/lrama/lexer/location.rb +22 -0
- data/lib/lrama/lexer/token/char.rb +8 -0
- data/lib/lrama/lexer/token/ident.rb +8 -0
- data/lib/lrama/lexer/token/parameterizing.rb +34 -0
- data/lib/lrama/lexer/token/tag.rb +12 -0
- data/lib/lrama/lexer/token/user_code.rb +64 -0
- data/lib/lrama/lexer/token.rb +23 -63
- data/lib/lrama/lexer.rb +38 -37
- data/lib/lrama/option_parser.rb +2 -1
- data/lib/lrama/options.rb +2 -2
- data/lib/lrama/output.rb +11 -2
- data/lib/lrama/parser.rb +607 -488
- data/lib/lrama/report/profile.rb +1 -12
- data/lib/lrama/version.rb +1 -1
- data/parser.y +177 -96
- data/rbs_collection.lock.yaml +17 -1
- data/rbs_collection.yaml +1 -0
- data/sample/calc.y +3 -1
- data/sample/parse.y +5 -1
- data/sig/lrama/grammar/code/printer_code.rbs +15 -0
- data/sig/lrama/grammar/code.rbs +24 -0
- data/sig/lrama/grammar/counter.rbs +11 -0
- data/sig/lrama/grammar/parameterizing_rules/builder.rbs +10 -0
- data/sig/lrama/grammar/percent_code.rbs +10 -0
- data/sig/lrama/grammar/precedence.rbs +11 -0
- data/sig/lrama/grammar/printer.rbs +11 -0
- data/sig/lrama/grammar/reference.rbs +22 -0
- data/sig/lrama/grammar/rule.rbs +13 -0
- data/sig/lrama/grammar/rule_builder.rbs +41 -0
- data/sig/lrama/grammar.rbs +5 -0
- data/sig/lrama/lexer/location.rbs +14 -0
- data/sig/lrama/lexer/token/char.rbs +8 -0
- data/sig/lrama/lexer/token/ident.rbs +8 -0
- data/sig/lrama/lexer/token/parameterizing.rbs +15 -0
- data/sig/lrama/lexer/token/tag.rbs +9 -0
- data/sig/lrama/lexer/token/user_code.rbs +16 -0
- data/sig/lrama/lexer/token.rbs +22 -0
- data/sig/stdlib/strscan/string_scanner.rbs +5 -0
- data/template/bison/_yacc.h +2 -2
- data/template/bison/yacc.c +5 -2
- metadata +44 -4
- data/lib/lrama/lexer/token/type.rb +0 -8
- data/sig/lrama/lexer/token/type.rbs +0 -17
data/lib/lrama/grammar.rb
CHANGED
@@ -1,36 +1,39 @@
|
|
1
|
-
require "strscan"
|
2
|
-
|
3
1
|
require "lrama/grammar/auxiliary"
|
4
2
|
require "lrama/grammar/code"
|
3
|
+
require "lrama/grammar/counter"
|
5
4
|
require "lrama/grammar/error_token"
|
5
|
+
require "lrama/grammar/percent_code"
|
6
6
|
require "lrama/grammar/precedence"
|
7
7
|
require "lrama/grammar/printer"
|
8
8
|
require "lrama/grammar/reference"
|
9
9
|
require "lrama/grammar/rule"
|
10
|
+
require "lrama/grammar/rule_builder"
|
10
11
|
require "lrama/grammar/symbol"
|
11
12
|
require "lrama/grammar/union"
|
12
13
|
require "lrama/lexer"
|
13
14
|
require "lrama/type"
|
14
15
|
|
15
16
|
module Lrama
|
16
|
-
Token = Lrama::Lexer::Token
|
17
|
-
|
18
17
|
# Grammar is the result of parsing an input grammar file
|
19
18
|
class Grammar
|
20
|
-
attr_reader :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol, :aux
|
19
|
+
attr_reader :percent_codes, :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol, :aux
|
21
20
|
attr_accessor :union, :expect,
|
22
21
|
:printers, :error_tokens,
|
23
22
|
:lex_param, :parse_param, :initial_action,
|
24
23
|
:symbols, :types,
|
25
|
-
:rules, :
|
24
|
+
:rules, :rule_builders,
|
26
25
|
:sym_to_rules
|
27
26
|
|
28
|
-
def initialize
|
27
|
+
def initialize(rule_counter)
|
28
|
+
@rule_counter = rule_counter
|
29
|
+
|
30
|
+
# Code defined by "%code"
|
31
|
+
@percent_codes = []
|
29
32
|
@printers = []
|
30
33
|
@error_tokens = []
|
31
34
|
@symbols = []
|
32
35
|
@types = []
|
33
|
-
@
|
36
|
+
@rule_builders = []
|
34
37
|
@rules = []
|
35
38
|
@sym_to_rules = {}
|
36
39
|
@empty_symbol = nil
|
@@ -43,12 +46,16 @@ module Lrama
|
|
43
46
|
append_special_symbols
|
44
47
|
end
|
45
48
|
|
46
|
-
def
|
47
|
-
@
|
49
|
+
def add_percent_code(id:, code:)
|
50
|
+
@percent_codes << PercentCode.new(id, code)
|
48
51
|
end
|
49
52
|
|
50
|
-
def
|
51
|
-
@
|
53
|
+
def add_printer(ident_or_tags:, token_code:, lineno:)
|
54
|
+
@printers << Printer.new(ident_or_tags: ident_or_tags, token_code: token_code, lineno: lineno)
|
55
|
+
end
|
56
|
+
|
57
|
+
def add_error_token(ident_or_tags:, token_code:, lineno:)
|
58
|
+
@error_tokens << ErrorToken.new(ident_or_tags: ident_or_tags, token_code: token_code, lineno: lineno)
|
52
59
|
end
|
53
60
|
|
54
61
|
def add_term(id:, alias_name: nil, tag: nil, token_id: nil, replace: false)
|
@@ -118,21 +125,8 @@ module Lrama
|
|
118
125
|
@union = Union.new(code: code, lineno: lineno)
|
119
126
|
end
|
120
127
|
|
121
|
-
def
|
122
|
-
@
|
123
|
-
end
|
124
|
-
|
125
|
-
def build_references(token_code)
|
126
|
-
token_code.references.map! do |type, value, tag, first_column, last_column|
|
127
|
-
Reference.new(type: type, value: value, ex_tag: tag, first_column: first_column, last_column: last_column)
|
128
|
-
end
|
129
|
-
|
130
|
-
token_code
|
131
|
-
end
|
132
|
-
|
133
|
-
def build_code(type, token_code)
|
134
|
-
build_references(token_code)
|
135
|
-
Code.new(type: type, token_code: token_code)
|
128
|
+
def add_rule_builder(builder)
|
129
|
+
@rule_builders << builder
|
136
130
|
end
|
137
131
|
|
138
132
|
def prologue_first_lineno=(prologue_first_lineno)
|
@@ -162,14 +156,74 @@ module Lrama
|
|
162
156
|
fill_symbol_printer
|
163
157
|
fill_symbol_error_token
|
164
158
|
@symbols.sort_by!(&:number)
|
159
|
+
compute_nullable
|
160
|
+
compute_first_set
|
165
161
|
end
|
166
162
|
|
167
163
|
# TODO: More validation methods
|
164
|
+
#
|
165
|
+
# * Validaiton for no_declared_type_reference
|
168
166
|
def validate!
|
169
167
|
validate_symbol_number_uniqueness!
|
170
|
-
|
168
|
+
validate_symbol_alias_name_uniqueness!
|
169
|
+
validate_rule_lhs_is_nterm!
|
170
|
+
end
|
171
|
+
|
172
|
+
def find_symbol_by_s_value(s_value)
|
173
|
+
@symbols.find do |sym|
|
174
|
+
sym.id.s_value == s_value
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
def find_symbol_by_s_value!(s_value)
|
179
|
+
find_symbol_by_s_value(s_value) || (raise "Symbol not found: #{s_value}")
|
180
|
+
end
|
181
|
+
|
182
|
+
def find_symbol_by_id(id)
|
183
|
+
@symbols.find do |sym|
|
184
|
+
sym.id == id || sym.alias_name == id.s_value
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
def find_symbol_by_id!(id)
|
189
|
+
find_symbol_by_id(id) || (raise "Symbol not found: #{id}")
|
190
|
+
end
|
191
|
+
|
192
|
+
def find_symbol_by_number!(number)
|
193
|
+
sym = @symbols[number]
|
194
|
+
|
195
|
+
raise "Symbol not found: #{number}" unless sym
|
196
|
+
raise "[BUG] Symbol number mismatch. #{number}, #{sym}" if sym.number != number
|
197
|
+
|
198
|
+
sym
|
199
|
+
end
|
200
|
+
|
201
|
+
def find_rules_by_symbol!(sym)
|
202
|
+
find_rules_by_symbol(sym) || (raise "Rules for #{sym} not found")
|
171
203
|
end
|
172
204
|
|
205
|
+
def find_rules_by_symbol(sym)
|
206
|
+
@sym_to_rules[sym.number]
|
207
|
+
end
|
208
|
+
|
209
|
+
def terms_count
|
210
|
+
terms.count
|
211
|
+
end
|
212
|
+
|
213
|
+
def terms
|
214
|
+
@terms ||= @symbols.select(&:term?)
|
215
|
+
end
|
216
|
+
|
217
|
+
def nterms_count
|
218
|
+
nterms.count
|
219
|
+
end
|
220
|
+
|
221
|
+
def nterms
|
222
|
+
@nterms ||= @symbols.select(&:nterm?)
|
223
|
+
end
|
224
|
+
|
225
|
+
private
|
226
|
+
|
173
227
|
def compute_nullable
|
174
228
|
@rules.each do |rule|
|
175
229
|
case
|
@@ -254,167 +308,12 @@ module Lrama
|
|
254
308
|
end
|
255
309
|
end
|
256
310
|
|
257
|
-
def
|
258
|
-
@
|
259
|
-
|
260
|
-
end
|
261
|
-
end
|
262
|
-
|
263
|
-
def find_symbol_by_s_value!(s_value)
|
264
|
-
find_symbol_by_s_value(s_value) || (raise "Symbol not found: #{s_value}")
|
265
|
-
end
|
266
|
-
|
267
|
-
def find_symbol_by_id(id)
|
268
|
-
@symbols.find do |sym|
|
269
|
-
# TODO: validate uniqueness of Token#s_value and Symbol#alias_name
|
270
|
-
sym.id == id || sym.alias_name == id.s_value
|
311
|
+
def setup_rules
|
312
|
+
@rule_builders.each do |builder|
|
313
|
+
builder.setup_rules
|
271
314
|
end
|
272
315
|
end
|
273
316
|
|
274
|
-
def find_symbol_by_id!(id)
|
275
|
-
find_symbol_by_id(id) || (raise "Symbol not found: #{id}")
|
276
|
-
end
|
277
|
-
|
278
|
-
def find_symbol_by_number!(number)
|
279
|
-
sym = @symbols[number]
|
280
|
-
|
281
|
-
raise "Symbol not found: #{number}" unless sym
|
282
|
-
raise "[BUG] Symbol number mismatch. #{number}, #{sym}" if sym.number != number
|
283
|
-
|
284
|
-
sym
|
285
|
-
end
|
286
|
-
|
287
|
-
def find_rules_by_symbol!(sym)
|
288
|
-
find_rules_by_symbol(sym) || (raise "Rules for #{sym} not found")
|
289
|
-
end
|
290
|
-
|
291
|
-
def find_rules_by_symbol(sym)
|
292
|
-
@sym_to_rules[sym.number]
|
293
|
-
end
|
294
|
-
|
295
|
-
def terms_count
|
296
|
-
terms.count
|
297
|
-
end
|
298
|
-
|
299
|
-
def terms
|
300
|
-
@terms ||= @symbols.select(&:term?)
|
301
|
-
end
|
302
|
-
|
303
|
-
def nterms_count
|
304
|
-
nterms.count
|
305
|
-
end
|
306
|
-
|
307
|
-
def nterms
|
308
|
-
@nterms ||= @symbols.select(&:nterm?)
|
309
|
-
end
|
310
|
-
|
311
|
-
def scan_reference(scanner)
|
312
|
-
start = scanner.pos
|
313
|
-
case
|
314
|
-
# $ references
|
315
|
-
# It need to wrap an identifier with brackets to use ".-" for identifiers
|
316
|
-
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
|
317
|
-
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
|
318
|
-
return [:dollar, "$", tag, start, scanner.pos - 1]
|
319
|
-
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
|
320
|
-
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
|
321
|
-
return [:dollar, Integer(scanner[2]), tag, start, scanner.pos - 1]
|
322
|
-
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
|
323
|
-
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
|
324
|
-
return [:dollar, scanner[2], tag, start, scanner.pos - 1]
|
325
|
-
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
|
326
|
-
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
|
327
|
-
return [:dollar, scanner[2], tag, start, scanner.pos - 1]
|
328
|
-
|
329
|
-
# @ references
|
330
|
-
# It need to wrap an identifier with brackets to use ".-" for identifiers
|
331
|
-
when scanner.scan(/@\$/) # @$
|
332
|
-
return [:at, "$", nil, start, scanner.pos - 1]
|
333
|
-
when scanner.scan(/@(\d+)/) # @1
|
334
|
-
return [:at, Integer(scanner[1]), nil, start, scanner.pos - 1]
|
335
|
-
when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
|
336
|
-
return [:at, scanner[1], nil, start, scanner.pos - 1]
|
337
|
-
when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets)
|
338
|
-
return [:at, scanner[1], nil, start, scanner.pos - 1]
|
339
|
-
end
|
340
|
-
end
|
341
|
-
|
342
|
-
def extract_references
|
343
|
-
unless initial_action.nil?
|
344
|
-
scanner = StringScanner.new(initial_action.s_value)
|
345
|
-
references = []
|
346
|
-
|
347
|
-
while !scanner.eos? do
|
348
|
-
if reference = scan_reference(scanner)
|
349
|
-
references << reference
|
350
|
-
else
|
351
|
-
scanner.getch
|
352
|
-
end
|
353
|
-
end
|
354
|
-
|
355
|
-
initial_action.token_code.references = references
|
356
|
-
build_references(initial_action.token_code)
|
357
|
-
end
|
358
|
-
|
359
|
-
@printers.each do |printer|
|
360
|
-
scanner = StringScanner.new(printer.code.s_value)
|
361
|
-
references = []
|
362
|
-
|
363
|
-
while !scanner.eos? do
|
364
|
-
if reference = scan_reference(scanner)
|
365
|
-
references << reference
|
366
|
-
else
|
367
|
-
scanner.getch
|
368
|
-
end
|
369
|
-
end
|
370
|
-
|
371
|
-
printer.code.token_code.references = references
|
372
|
-
build_references(printer.code.token_code)
|
373
|
-
end
|
374
|
-
|
375
|
-
@error_tokens.each do |error_token|
|
376
|
-
scanner = StringScanner.new(error_token.code.s_value)
|
377
|
-
references = []
|
378
|
-
|
379
|
-
while !scanner.eos? do
|
380
|
-
if reference = scan_reference(scanner)
|
381
|
-
references << reference
|
382
|
-
else
|
383
|
-
scanner.getch
|
384
|
-
end
|
385
|
-
end
|
386
|
-
|
387
|
-
error_token.code.token_code.references = references
|
388
|
-
build_references(error_token.code.token_code)
|
389
|
-
end
|
390
|
-
|
391
|
-
@_rules.each do |lhs, rhs, _|
|
392
|
-
rhs.each_with_index do |token, index|
|
393
|
-
next if token.class == Lrama::Grammar::Symbol || token.type != Lrama::Lexer::Token::User_code
|
394
|
-
|
395
|
-
scanner = StringScanner.new(token.s_value)
|
396
|
-
references = []
|
397
|
-
|
398
|
-
while !scanner.eos? do
|
399
|
-
case
|
400
|
-
when reference = scan_reference(scanner)
|
401
|
-
references << reference
|
402
|
-
when scanner.scan(/\/\*/)
|
403
|
-
scanner.scan_until(/\*\//)
|
404
|
-
else
|
405
|
-
scanner.getch
|
406
|
-
end
|
407
|
-
end
|
408
|
-
|
409
|
-
token.references = references
|
410
|
-
token.numberize_references(lhs, rhs)
|
411
|
-
build_references(token)
|
412
|
-
end
|
413
|
-
end
|
414
|
-
end
|
415
|
-
|
416
|
-
private
|
417
|
-
|
418
317
|
def find_nterm_by_id!(id)
|
419
318
|
nterms.find do |nterm|
|
420
319
|
nterm.id == id
|
@@ -428,33 +327,32 @@ module Lrama
|
|
428
327
|
# @empty_symbol = term
|
429
328
|
|
430
329
|
# YYEOF
|
431
|
-
term = add_term(id: Token.new(
|
330
|
+
term = add_term(id: Lrama::Lexer::Token::Ident.new(s_value: "YYEOF"), alias_name: "\"end of file\"", token_id: 0)
|
432
331
|
term.number = 0
|
433
332
|
term.eof_symbol = true
|
434
333
|
@eof_symbol = term
|
435
334
|
|
436
335
|
# YYerror
|
437
|
-
term = add_term(id: Token.new(
|
336
|
+
term = add_term(id: Lrama::Lexer::Token::Ident.new(s_value: "YYerror"), alias_name: "error")
|
438
337
|
term.number = 1
|
439
338
|
term.error_symbol = true
|
440
339
|
@error_symbol = term
|
441
340
|
|
442
341
|
# YYUNDEF
|
443
|
-
term = add_term(id: Token.new(
|
342
|
+
term = add_term(id: Lrama::Lexer::Token::Ident.new(s_value: "YYUNDEF"), alias_name: "\"invalid token\"")
|
444
343
|
term.number = 2
|
445
344
|
term.undef_symbol = true
|
446
345
|
@undef_symbol = term
|
447
346
|
|
448
347
|
# $accept
|
449
|
-
term = add_nterm(id: Token.new(
|
348
|
+
term = add_nterm(id: Lrama::Lexer::Token::Ident.new(s_value: "$accept"))
|
450
349
|
term.accept_symbol = true
|
451
350
|
@accept_symbol = term
|
452
351
|
end
|
453
352
|
|
454
353
|
# 1. Add $accept rule to the top of rules
|
455
|
-
# 2. Extract
|
456
|
-
# 3.
|
457
|
-
# 4. Append id and extract action then create Rule
|
354
|
+
# 2. Extract action in the middle of RHS into new Empty rule
|
355
|
+
# 3. Append id and extract action then create Rule
|
458
356
|
#
|
459
357
|
# Bison 3.8.2 uses different orders for symbol number and rule number
|
460
358
|
# when a rule has actions in the middle of a rule.
|
@@ -475,99 +373,37 @@ module Lrama
|
|
475
373
|
#
|
476
374
|
def normalize_rules
|
477
375
|
# 1. Add $accept rule to the top of rules
|
478
|
-
accept =
|
479
|
-
eof =
|
480
|
-
lineno = @
|
481
|
-
@rules << Rule.new(id: @
|
482
|
-
|
483
|
-
extracted_action_number = 1 # @n as nterm
|
376
|
+
accept = @accept_symbol
|
377
|
+
eof = @eof_symbol
|
378
|
+
lineno = @rule_builders.first ? @rule_builders.first.line : 0
|
379
|
+
@rules << Rule.new(id: @rule_counter.increment, _lhs: accept.id, _rhs: [@rule_builders.first.lhs, eof.id], token_code: nil, lineno: lineno)
|
484
380
|
|
485
|
-
|
486
|
-
a = []
|
487
|
-
rhs1 = []
|
488
|
-
code = nil
|
489
|
-
precedence_sym = nil
|
490
|
-
|
491
|
-
# 2. Extract precedence and last action
|
492
|
-
rhs.reverse.each do |r|
|
493
|
-
case
|
494
|
-
when r.is_a?(Symbol) # precedence_sym
|
495
|
-
precedence_sym = r
|
496
|
-
when (r.type == Token::User_code) && precedence_sym.nil? && code.nil? && rhs1.empty?
|
497
|
-
code = r
|
498
|
-
else
|
499
|
-
rhs1 << r
|
500
|
-
end
|
501
|
-
end
|
502
|
-
rhs1.reverse!
|
503
|
-
|
504
|
-
# Bison n'th component is 1-origin
|
505
|
-
(rhs1 + [code]).compact.each.with_index(1) do |token, i|
|
506
|
-
if token.type == Token::User_code
|
507
|
-
token.references.each do |ref|
|
508
|
-
# Need to keep position_in_rhs for actions in the middle of RHS
|
509
|
-
ref.position_in_rhs = i - 1
|
510
|
-
next if ref.type == :at
|
511
|
-
# $$, $n, @$, @n can be used in any actions
|
512
|
-
|
513
|
-
if ref.value == "$"
|
514
|
-
# TODO: Should be postponed after middle actions are extracted?
|
515
|
-
ref.referring_symbol = lhs
|
516
|
-
elsif ref.value.is_a?(Integer)
|
517
|
-
raise "Can not refer following component. #{ref.value} >= #{i}. #{token}" if ref.value >= i
|
518
|
-
rhs1[ref.value - 1].referred = true
|
519
|
-
ref.referring_symbol = rhs1[ref.value - 1]
|
520
|
-
elsif ref.value.is_a?(String)
|
521
|
-
target_tokens = ([lhs] + rhs1 + [code]).compact.first(i)
|
522
|
-
referring_symbol_candidate = target_tokens.filter {|token| token.referred_by?(ref.value) }
|
523
|
-
raise "Referring symbol `#{ref.value}` is duplicated. #{token}" if referring_symbol_candidate.size >= 2
|
524
|
-
raise "Referring symbol `#{ref.value}` is not found. #{token}" if referring_symbol_candidate.count == 0
|
525
|
-
|
526
|
-
referring_symbol = referring_symbol_candidate.first
|
527
|
-
referring_symbol.referred = true
|
528
|
-
ref.referring_symbol = referring_symbol
|
529
|
-
end
|
530
|
-
end
|
531
|
-
end
|
532
|
-
end
|
381
|
+
setup_rules
|
533
382
|
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
extracted_action_number += 1
|
539
|
-
a << [new_token, token]
|
540
|
-
new_token
|
541
|
-
else
|
542
|
-
token
|
543
|
-
end
|
383
|
+
@rule_builders.each do |builder|
|
384
|
+
# Extract actions in the middle of RHS into new rules.
|
385
|
+
builder.midrule_action_rules.each do |rule|
|
386
|
+
@rules << rule
|
544
387
|
end
|
545
388
|
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
@rules << Rule.new(id: @rules.count, lhs: new_token, rhs: [], code: Code.new(type: :user_code, token_code: code), lineno: code.line)
|
389
|
+
builder.rules.each do |rule|
|
390
|
+
add_nterm(id: rule._lhs)
|
391
|
+
@rules << rule
|
550
392
|
end
|
551
393
|
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
add_nterm(id: lhs)
|
556
|
-
a.each do |new_token, _|
|
557
|
-
add_nterm(id: new_token)
|
394
|
+
builder.midrule_action_rules.each do |rule|
|
395
|
+
add_nterm(id: rule._lhs)
|
558
396
|
end
|
559
397
|
end
|
560
398
|
end
|
561
399
|
|
562
400
|
# Collect symbols from rules
|
563
401
|
def collect_symbols
|
564
|
-
@rules.flat_map(&:
|
402
|
+
@rules.flat_map(&:_rhs).each do |s|
|
565
403
|
case s
|
566
|
-
when Token
|
567
|
-
|
568
|
-
|
569
|
-
end
|
570
|
-
when Symbol
|
404
|
+
when Lrama::Lexer::Token::Char
|
405
|
+
add_term(id: s)
|
406
|
+
when Lrama::Lexer::Token
|
571
407
|
# skip
|
572
408
|
else
|
573
409
|
raise "Unknown class: #{s}"
|
@@ -607,7 +443,7 @@ module Lrama
|
|
607
443
|
|
608
444
|
# If id is Token::Char, it uses ASCII code
|
609
445
|
if sym.term? && sym.token_id.nil?
|
610
|
-
if sym.id.
|
446
|
+
if sym.id.is_a?(Lrama::Lexer::Token::Char)
|
611
447
|
# Ignore ' on the both sides
|
612
448
|
case sym.id.s_value[1..-2]
|
613
449
|
when "\\b"
|
@@ -650,30 +486,18 @@ module Lrama
|
|
650
486
|
|
651
487
|
def replace_token_with_symbol
|
652
488
|
@rules.each do |rule|
|
653
|
-
rule.lhs = token_to_symbol(rule.
|
489
|
+
rule.lhs = token_to_symbol(rule._lhs) if rule._lhs
|
654
490
|
|
655
|
-
rule.rhs.map
|
491
|
+
rule.rhs = rule._rhs.map do |t|
|
656
492
|
token_to_symbol(t)
|
657
493
|
end
|
658
|
-
|
659
|
-
if rule.code
|
660
|
-
rule.code.references.each do |ref|
|
661
|
-
next if ref.type == :at
|
662
|
-
|
663
|
-
if ref.referring_symbol.type != Token::User_code
|
664
|
-
ref.referring_symbol = token_to_symbol(ref.referring_symbol)
|
665
|
-
end
|
666
|
-
end
|
667
|
-
end
|
668
494
|
end
|
669
495
|
end
|
670
496
|
|
671
497
|
def token_to_symbol(token)
|
672
498
|
case token
|
673
|
-
when Token
|
499
|
+
when Lrama::Lexer::Token
|
674
500
|
find_symbol_by_id!(token)
|
675
|
-
when Symbol
|
676
|
-
token
|
677
501
|
else
|
678
502
|
raise "Unknown class: #{token}"
|
679
503
|
end
|
@@ -716,10 +540,10 @@ module Lrama
|
|
716
540
|
@symbols.each do |sym|
|
717
541
|
@printers.each do |printer|
|
718
542
|
printer.ident_or_tags.each do |ident_or_tag|
|
719
|
-
case ident_or_tag
|
720
|
-
when Token::Ident
|
543
|
+
case ident_or_tag
|
544
|
+
when Lrama::Lexer::Token::Ident
|
721
545
|
sym.printer = printer if sym.id == ident_or_tag
|
722
|
-
when Token::Tag
|
546
|
+
when Lrama::Lexer::Token::Tag
|
723
547
|
sym.printer = printer if sym.tag == ident_or_tag
|
724
548
|
else
|
725
549
|
raise "Unknown token type. #{printer}"
|
@@ -733,10 +557,10 @@ module Lrama
|
|
733
557
|
@symbols.each do |sym|
|
734
558
|
@error_tokens.each do |error_token|
|
735
559
|
error_token.ident_or_tags.each do |ident_or_tag|
|
736
|
-
case ident_or_tag
|
737
|
-
when Token::Ident
|
560
|
+
case ident_or_tag
|
561
|
+
when Lrama::Lexer::Token::Ident
|
738
562
|
sym.error_token = error_token if sym.id == ident_or_tag
|
739
|
-
when Token::Tag
|
563
|
+
when Lrama::Lexer::Token::Tag
|
740
564
|
sym.error_token = error_token if sym.tag == ident_or_tag
|
741
565
|
else
|
742
566
|
raise "Unknown token type. #{error_token}"
|
@@ -756,17 +580,23 @@ module Lrama
|
|
756
580
|
raise "Symbol number is duplicated. #{invalid}"
|
757
581
|
end
|
758
582
|
|
759
|
-
def
|
583
|
+
def validate_symbol_alias_name_uniqueness!
|
584
|
+
invalid = @symbols.select(&:alias_name).group_by(&:alias_name).select do |alias_name, syms|
|
585
|
+
syms.count > 1
|
586
|
+
end
|
587
|
+
|
588
|
+
return if invalid.empty?
|
589
|
+
|
590
|
+
raise "Symbol alias name is duplicated. #{invalid}"
|
591
|
+
end
|
592
|
+
|
593
|
+
def validate_rule_lhs_is_nterm!
|
760
594
|
errors = []
|
761
595
|
|
762
596
|
rules.each do |rule|
|
763
|
-
next
|
597
|
+
next if rule.lhs.nterm?
|
764
598
|
|
765
|
-
rule.
|
766
|
-
ref.type == :dollar && !ref.tag
|
767
|
-
end.each do |ref|
|
768
|
-
errors << "$#{ref.value} of '#{rule.lhs.id.s_value}' has no declared type"
|
769
|
-
end
|
599
|
+
errors << "[BUG] LHS of #{rule} (line: #{rule.lineno}) is term. It should be nterm."
|
770
600
|
end
|
771
601
|
|
772
602
|
return if errors.empty?
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module Lrama
|
2
|
+
class Lexer
|
3
|
+
class Location
|
4
|
+
attr_reader :first_line, :first_column, :last_line, :last_column
|
5
|
+
|
6
|
+
def initialize(first_line:, first_column:, last_line:, last_column:)
|
7
|
+
@first_line = first_line
|
8
|
+
@first_column = first_column
|
9
|
+
@last_line = last_line
|
10
|
+
@last_column = last_column
|
11
|
+
end
|
12
|
+
|
13
|
+
def ==(other)
|
14
|
+
self.class == other.class &&
|
15
|
+
self.first_line == other.first_line &&
|
16
|
+
self.first_column == other.first_column &&
|
17
|
+
self.last_line == other.last_line &&
|
18
|
+
self.last_column == other.last_column
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module Lrama
|
2
|
+
class Lexer
|
3
|
+
class Token
|
4
|
+
class Parameterizing < Token
|
5
|
+
attr_accessor :args
|
6
|
+
|
7
|
+
def initialize(s_value: nil, alias_name: nil, location: nil, args: [])
|
8
|
+
super s_value: s_value, alias_name: alias_name, location: location
|
9
|
+
@args = args
|
10
|
+
end
|
11
|
+
|
12
|
+
def option?
|
13
|
+
%w(option ?).include?(self.s_value)
|
14
|
+
end
|
15
|
+
|
16
|
+
def nonempty_list?
|
17
|
+
%w(nonempty_list +).include?(self.s_value)
|
18
|
+
end
|
19
|
+
|
20
|
+
def list?
|
21
|
+
%w(list *).include?(self.s_value)
|
22
|
+
end
|
23
|
+
|
24
|
+
def separated_nonempty_list?
|
25
|
+
%w(separated_nonempty_list).include?(self.s_value)
|
26
|
+
end
|
27
|
+
|
28
|
+
def separated_list?
|
29
|
+
%w(separated_list).include?(self.s_value)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|