lrama 0.5.9 → 0.5.11
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/test.yaml +25 -0
- data/.gitignore +7 -4
- data/Gemfile +9 -5
- data/Rakefile +13 -0
- data/Steepfile +13 -11
- data/lib/lrama/context.rb +1 -3
- data/lib/lrama/counterexamples/path.rb +0 -46
- data/lib/lrama/counterexamples/production_path.rb +17 -0
- data/lib/lrama/counterexamples/start_path.rb +21 -0
- data/lib/lrama/counterexamples/transition_path.rb +17 -0
- data/lib/lrama/counterexamples.rb +3 -0
- data/lib/lrama/grammar/code/initial_action_code.rb +28 -0
- data/lib/lrama/grammar/code/no_reference_code.rb +24 -0
- data/lib/lrama/grammar/code/printer_code.rb +34 -0
- data/lib/lrama/grammar/code/rule_action.rb +62 -0
- data/lib/lrama/grammar/code.rb +9 -93
- data/lib/lrama/grammar/counter.rb +15 -0
- data/lib/lrama/grammar/error_token.rb +3 -3
- data/lib/lrama/grammar/parameterizing_rules/builder/base.rb +36 -0
- data/lib/lrama/grammar/parameterizing_rules/builder/list.rb +28 -0
- data/lib/lrama/grammar/parameterizing_rules/builder/nonempty_list.rb +28 -0
- data/lib/lrama/grammar/parameterizing_rules/builder/option.rb +28 -0
- data/lib/lrama/grammar/parameterizing_rules/builder/separated_list.rb +39 -0
- data/lib/lrama/grammar/parameterizing_rules/builder/separated_nonempty_list.rb +34 -0
- data/lib/lrama/grammar/parameterizing_rules/builder.rb +60 -0
- data/lib/lrama/grammar/printer.rb +3 -3
- data/lib/lrama/grammar/reference.rb +7 -16
- data/lib/lrama/grammar/rule.rb +19 -2
- data/lib/lrama/grammar/rule_builder.rb +177 -0
- data/lib/lrama/grammar/symbol.rb +16 -2
- data/lib/lrama/grammar/type.rb +6 -0
- data/lib/lrama/grammar.rb +115 -325
- data/lib/lrama/lexer/location.rb +22 -0
- data/lib/lrama/lexer/token/parameterizing.rb +18 -3
- data/lib/lrama/lexer/token/tag.rb +4 -0
- data/lib/lrama/lexer/token/user_code.rb +54 -4
- data/lib/lrama/lexer/token.rb +35 -10
- data/lib/lrama/lexer.rb +32 -31
- data/lib/lrama/options.rb +1 -2
- data/lib/lrama/output.rb +2 -2
- data/lib/lrama/parser.rb +514 -424
- data/lib/lrama/report/profile.rb +1 -12
- data/lib/lrama/version.rb +1 -1
- data/lib/lrama.rb +0 -1
- data/parser.y +111 -52
- data/rbs_collection.lock.yaml +6 -8
- data/rbs_collection.yaml +1 -0
- data/sig/lrama/grammar/code/printer_code.rbs +15 -0
- data/sig/lrama/grammar/code.rbs +24 -0
- data/sig/lrama/grammar/counter.rbs +11 -0
- data/sig/lrama/grammar/error_token.rbs +11 -0
- data/sig/lrama/grammar/parameterizing_rules/builder/base.rbs +26 -0
- data/sig/lrama/grammar/parameterizing_rules/builder/list.rbs +10 -0
- data/sig/lrama/grammar/parameterizing_rules/builder/nonempty_list.rbs +10 -0
- data/sig/lrama/grammar/parameterizing_rules/builder/option.rbs +10 -0
- data/sig/lrama/grammar/parameterizing_rules/builder/separated_list.rbs +11 -0
- data/sig/lrama/grammar/parameterizing_rules/builder/separated_nonempty_list.rbs +11 -0
- data/sig/lrama/grammar/parameterizing_rules/builder.rbs +23 -0
- data/sig/lrama/grammar/precedence.rbs +11 -0
- data/sig/lrama/grammar/printer.rbs +11 -0
- data/sig/lrama/grammar/reference.rbs +6 -6
- data/sig/lrama/grammar/rule.rbs +13 -0
- data/sig/lrama/grammar/rule_builder.rbs +42 -0
- data/sig/lrama/grammar/symbol.rbs +37 -0
- data/sig/lrama/lexer/location.rbs +14 -0
- data/sig/lrama/lexer/token/parameterizing.rbs +9 -0
- data/sig/lrama/lexer/token/tag.rbs +1 -0
- data/sig/lrama/lexer/token/user_code.rbs +8 -1
- data/sig/lrama/lexer/token.rbs +9 -4
- data/sig/stdlib/strscan/string_scanner.rbs +5 -0
- data/template/bison/yacc.c +5 -2
- metadata +38 -3
- data/lib/lrama/type.rb +0 -4
data/lib/lrama/grammar.rb
CHANGED
@@ -1,17 +1,17 @@
|
|
1
|
-
require "strscan"
|
2
|
-
|
3
1
|
require "lrama/grammar/auxiliary"
|
4
2
|
require "lrama/grammar/code"
|
3
|
+
require "lrama/grammar/counter"
|
5
4
|
require "lrama/grammar/error_token"
|
6
5
|
require "lrama/grammar/percent_code"
|
7
6
|
require "lrama/grammar/precedence"
|
8
7
|
require "lrama/grammar/printer"
|
9
8
|
require "lrama/grammar/reference"
|
10
9
|
require "lrama/grammar/rule"
|
10
|
+
require "lrama/grammar/rule_builder"
|
11
11
|
require "lrama/grammar/symbol"
|
12
|
+
require "lrama/grammar/type"
|
12
13
|
require "lrama/grammar/union"
|
13
14
|
require "lrama/lexer"
|
14
|
-
require "lrama/type"
|
15
15
|
|
16
16
|
module Lrama
|
17
17
|
# Grammar is the result of parsing an input grammar file
|
@@ -21,17 +21,19 @@ module Lrama
|
|
21
21
|
:printers, :error_tokens,
|
22
22
|
:lex_param, :parse_param, :initial_action,
|
23
23
|
:symbols, :types,
|
24
|
-
:rules, :
|
24
|
+
:rules, :rule_builders,
|
25
25
|
:sym_to_rules
|
26
26
|
|
27
|
-
def initialize
|
27
|
+
def initialize(rule_counter)
|
28
|
+
@rule_counter = rule_counter
|
29
|
+
|
28
30
|
# Code defined by "%code"
|
29
31
|
@percent_codes = []
|
30
32
|
@printers = []
|
31
33
|
@error_tokens = []
|
32
34
|
@symbols = []
|
33
35
|
@types = []
|
34
|
-
@
|
36
|
+
@rule_builders = []
|
35
37
|
@rules = []
|
36
38
|
@sym_to_rules = {}
|
37
39
|
@empty_symbol = nil
|
@@ -48,12 +50,12 @@ module Lrama
|
|
48
50
|
@percent_codes << PercentCode.new(id, code)
|
49
51
|
end
|
50
52
|
|
51
|
-
def add_printer(ident_or_tags:,
|
52
|
-
@printers << Printer.new(ident_or_tags: ident_or_tags,
|
53
|
+
def add_printer(ident_or_tags:, token_code:, lineno:)
|
54
|
+
@printers << Printer.new(ident_or_tags: ident_or_tags, token_code: token_code, lineno: lineno)
|
53
55
|
end
|
54
56
|
|
55
|
-
def add_error_token(ident_or_tags:,
|
56
|
-
@error_tokens << ErrorToken.new(ident_or_tags: ident_or_tags,
|
57
|
+
def add_error_token(ident_or_tags:, token_code:, lineno:)
|
58
|
+
@error_tokens << ErrorToken.new(ident_or_tags: ident_or_tags, token_code: token_code, lineno: lineno)
|
57
59
|
end
|
58
60
|
|
59
61
|
def add_term(id:, alias_name: nil, tag: nil, token_id: nil, replace: false)
|
@@ -123,12 +125,8 @@ module Lrama
|
|
123
125
|
@union = Union.new(code: code, lineno: lineno)
|
124
126
|
end
|
125
127
|
|
126
|
-
def
|
127
|
-
@
|
128
|
-
end
|
129
|
-
|
130
|
-
def build_code(type, token_code)
|
131
|
-
Code.new(type: type, token_code: token_code)
|
128
|
+
def add_rule_builder(builder)
|
129
|
+
@rule_builders << builder
|
132
130
|
end
|
133
131
|
|
134
132
|
def prologue_first_lineno=(prologue_first_lineno)
|
@@ -148,10 +146,9 @@ module Lrama
|
|
148
146
|
end
|
149
147
|
|
150
148
|
def prepare
|
151
|
-
extract_references
|
152
149
|
normalize_rules
|
153
150
|
collect_symbols
|
154
|
-
|
151
|
+
set_lhs_and_rhs
|
155
152
|
fill_symbol_number
|
156
153
|
fill_default_precedence
|
157
154
|
fill_sym_to_rules
|
@@ -159,14 +156,74 @@ module Lrama
|
|
159
156
|
fill_symbol_printer
|
160
157
|
fill_symbol_error_token
|
161
158
|
@symbols.sort_by!(&:number)
|
159
|
+
compute_nullable
|
160
|
+
compute_first_set
|
162
161
|
end
|
163
162
|
|
164
163
|
# TODO: More validation methods
|
164
|
+
#
|
165
|
+
# * Validaiton for no_declared_type_reference
|
165
166
|
def validate!
|
166
167
|
validate_symbol_number_uniqueness!
|
167
|
-
|
168
|
+
validate_symbol_alias_name_uniqueness!
|
169
|
+
validate_rule_lhs_is_nterm!
|
170
|
+
end
|
171
|
+
|
172
|
+
def find_symbol_by_s_value(s_value)
|
173
|
+
@symbols.find do |sym|
|
174
|
+
sym.id.s_value == s_value
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
def find_symbol_by_s_value!(s_value)
|
179
|
+
find_symbol_by_s_value(s_value) || (raise "Symbol not found: #{s_value}")
|
180
|
+
end
|
181
|
+
|
182
|
+
def find_symbol_by_id(id)
|
183
|
+
@symbols.find do |sym|
|
184
|
+
sym.id == id || sym.alias_name == id.s_value
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
def find_symbol_by_id!(id)
|
189
|
+
find_symbol_by_id(id) || (raise "Symbol not found: #{id}")
|
190
|
+
end
|
191
|
+
|
192
|
+
def find_symbol_by_number!(number)
|
193
|
+
sym = @symbols[number]
|
194
|
+
|
195
|
+
raise "Symbol not found: #{number}" unless sym
|
196
|
+
raise "[BUG] Symbol number mismatch. #{number}, #{sym}" if sym.number != number
|
197
|
+
|
198
|
+
sym
|
199
|
+
end
|
200
|
+
|
201
|
+
def find_rules_by_symbol!(sym)
|
202
|
+
find_rules_by_symbol(sym) || (raise "Rules for #{sym} not found")
|
203
|
+
end
|
204
|
+
|
205
|
+
def find_rules_by_symbol(sym)
|
206
|
+
@sym_to_rules[sym.number]
|
207
|
+
end
|
208
|
+
|
209
|
+
def terms_count
|
210
|
+
terms.count
|
168
211
|
end
|
169
212
|
|
213
|
+
def terms
|
214
|
+
@terms ||= @symbols.select(&:term?)
|
215
|
+
end
|
216
|
+
|
217
|
+
def nterms_count
|
218
|
+
nterms.count
|
219
|
+
end
|
220
|
+
|
221
|
+
def nterms
|
222
|
+
@nterms ||= @symbols.select(&:nterm?)
|
223
|
+
end
|
224
|
+
|
225
|
+
private
|
226
|
+
|
170
227
|
def compute_nullable
|
171
228
|
@rules.each do |rule|
|
172
229
|
case
|
@@ -251,160 +308,9 @@ module Lrama
|
|
251
308
|
end
|
252
309
|
end
|
253
310
|
|
254
|
-
def
|
255
|
-
@
|
256
|
-
|
257
|
-
end
|
258
|
-
end
|
259
|
-
|
260
|
-
def find_symbol_by_s_value!(s_value)
|
261
|
-
find_symbol_by_s_value(s_value) || (raise "Symbol not found: #{s_value}")
|
262
|
-
end
|
263
|
-
|
264
|
-
def find_symbol_by_id(id)
|
265
|
-
@symbols.find do |sym|
|
266
|
-
# TODO: validate uniqueness of Token#s_value and Symbol#alias_name
|
267
|
-
sym.id == id || sym.alias_name == id.s_value
|
268
|
-
end
|
269
|
-
end
|
270
|
-
|
271
|
-
def find_symbol_by_id!(id)
|
272
|
-
find_symbol_by_id(id) || (raise "Symbol not found: #{id}")
|
273
|
-
end
|
274
|
-
|
275
|
-
def find_symbol_by_number!(number)
|
276
|
-
sym = @symbols[number]
|
277
|
-
|
278
|
-
raise "Symbol not found: #{number}" unless sym
|
279
|
-
raise "[BUG] Symbol number mismatch. #{number}, #{sym}" if sym.number != number
|
280
|
-
|
281
|
-
sym
|
282
|
-
end
|
283
|
-
|
284
|
-
def find_rules_by_symbol!(sym)
|
285
|
-
find_rules_by_symbol(sym) || (raise "Rules for #{sym} not found")
|
286
|
-
end
|
287
|
-
|
288
|
-
def find_rules_by_symbol(sym)
|
289
|
-
@sym_to_rules[sym.number]
|
290
|
-
end
|
291
|
-
|
292
|
-
def terms_count
|
293
|
-
terms.count
|
294
|
-
end
|
295
|
-
|
296
|
-
def terms
|
297
|
-
@terms ||= @symbols.select(&:term?)
|
298
|
-
end
|
299
|
-
|
300
|
-
def nterms_count
|
301
|
-
nterms.count
|
302
|
-
end
|
303
|
-
|
304
|
-
def nterms
|
305
|
-
@nterms ||= @symbols.select(&:nterm?)
|
306
|
-
end
|
307
|
-
|
308
|
-
def scan_reference(scanner)
|
309
|
-
start = scanner.pos
|
310
|
-
case
|
311
|
-
# $ references
|
312
|
-
# It need to wrap an identifier with brackets to use ".-" for identifiers
|
313
|
-
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
|
314
|
-
tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
|
315
|
-
return Reference.new(type: :dollar, value: "$", ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
|
316
|
-
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
|
317
|
-
tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
|
318
|
-
return Reference.new(type: :dollar, value: Integer(scanner[2]), ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
|
319
|
-
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
|
320
|
-
tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
|
321
|
-
return Reference.new(type: :dollar, value: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
|
322
|
-
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
|
323
|
-
tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
|
324
|
-
return Reference.new(type: :dollar, value: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
|
325
|
-
|
326
|
-
# @ references
|
327
|
-
# It need to wrap an identifier with brackets to use ".-" for identifiers
|
328
|
-
when scanner.scan(/@\$/) # @$
|
329
|
-
return Reference.new(type: :at, value: "$", first_column: start, last_column: scanner.pos - 1)
|
330
|
-
when scanner.scan(/@(\d+)/) # @1
|
331
|
-
return Reference.new(type: :at, value: Integer(scanner[1]), first_column: start, last_column: scanner.pos - 1)
|
332
|
-
when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
|
333
|
-
return Reference.new(type: :at, value: scanner[1], first_column: start, last_column: scanner.pos - 1)
|
334
|
-
when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets)
|
335
|
-
return Reference.new(type: :at, value: scanner[1], first_column: start, last_column: scanner.pos - 1)
|
336
|
-
end
|
337
|
-
end
|
338
|
-
|
339
|
-
private
|
340
|
-
|
341
|
-
def extract_references
|
342
|
-
unless initial_action.nil?
|
343
|
-
scanner = StringScanner.new(initial_action.s_value)
|
344
|
-
references = []
|
345
|
-
|
346
|
-
while !scanner.eos? do
|
347
|
-
if reference = scan_reference(scanner)
|
348
|
-
references << reference
|
349
|
-
else
|
350
|
-
scanner.getch
|
351
|
-
end
|
352
|
-
end
|
353
|
-
|
354
|
-
initial_action.token_code.references = references
|
355
|
-
end
|
356
|
-
|
357
|
-
@printers.each do |printer|
|
358
|
-
scanner = StringScanner.new(printer.code.s_value)
|
359
|
-
references = []
|
360
|
-
|
361
|
-
while !scanner.eos? do
|
362
|
-
if reference = scan_reference(scanner)
|
363
|
-
references << reference
|
364
|
-
else
|
365
|
-
scanner.getch
|
366
|
-
end
|
367
|
-
end
|
368
|
-
|
369
|
-
printer.code.token_code.references = references
|
370
|
-
end
|
371
|
-
|
372
|
-
@error_tokens.each do |error_token|
|
373
|
-
scanner = StringScanner.new(error_token.code.s_value)
|
374
|
-
references = []
|
375
|
-
|
376
|
-
while !scanner.eos? do
|
377
|
-
if reference = scan_reference(scanner)
|
378
|
-
references << reference
|
379
|
-
else
|
380
|
-
scanner.getch
|
381
|
-
end
|
382
|
-
end
|
383
|
-
|
384
|
-
error_token.code.token_code.references = references
|
385
|
-
end
|
386
|
-
|
387
|
-
@_rules.each do |lhs, rhs, _|
|
388
|
-
rhs.each_with_index do |token, index|
|
389
|
-
next unless token.class == Lrama::Lexer::Token::UserCode
|
390
|
-
|
391
|
-
scanner = StringScanner.new(token.s_value)
|
392
|
-
references = []
|
393
|
-
|
394
|
-
while !scanner.eos? do
|
395
|
-
case
|
396
|
-
when reference = scan_reference(scanner)
|
397
|
-
references << reference
|
398
|
-
when scanner.scan(/\/\*/)
|
399
|
-
scanner.scan_until(/\*\//)
|
400
|
-
else
|
401
|
-
scanner.getch
|
402
|
-
end
|
403
|
-
end
|
404
|
-
|
405
|
-
token.references = references
|
406
|
-
numberize_references(lhs, rhs, token.references)
|
407
|
-
end
|
311
|
+
def setup_rules
|
312
|
+
@rule_builders.each do |builder|
|
313
|
+
builder.setup_rules
|
408
314
|
end
|
409
315
|
end
|
410
316
|
|
@@ -444,35 +350,9 @@ module Lrama
|
|
444
350
|
@accept_symbol = term
|
445
351
|
end
|
446
352
|
|
447
|
-
def numberize_references(lhs, rhs, references)
|
448
|
-
references.map! {|ref|
|
449
|
-
ref_name = ref.value
|
450
|
-
if ref_name.is_a?(::String) && ref_name != '$'
|
451
|
-
value =
|
452
|
-
if lhs.referred_by?(ref_name)
|
453
|
-
'$'
|
454
|
-
else
|
455
|
-
index = rhs.find_index {|token| token.referred_by?(ref_name) }
|
456
|
-
|
457
|
-
if index
|
458
|
-
index + 1
|
459
|
-
else
|
460
|
-
raise "'#{ref_name}' is invalid name."
|
461
|
-
end
|
462
|
-
end
|
463
|
-
|
464
|
-
ref.value = value
|
465
|
-
ref
|
466
|
-
else
|
467
|
-
ref
|
468
|
-
end
|
469
|
-
}
|
470
|
-
end
|
471
|
-
|
472
353
|
# 1. Add $accept rule to the top of rules
|
473
|
-
# 2. Extract
|
474
|
-
# 3.
|
475
|
-
# 4. Append id and extract action then create Rule
|
354
|
+
# 2. Extract action in the middle of RHS into new Empty rule
|
355
|
+
# 3. Append id and extract action then create Rule
|
476
356
|
#
|
477
357
|
# Bison 3.8.2 uses different orders for symbol number and rule number
|
478
358
|
# when a rule has actions in the middle of a rule.
|
@@ -493,127 +373,43 @@ module Lrama
|
|
493
373
|
#
|
494
374
|
def normalize_rules
|
495
375
|
# 1. Add $accept rule to the top of rules
|
496
|
-
accept =
|
497
|
-
eof =
|
498
|
-
lineno = @
|
499
|
-
@rules << Rule.new(id: @
|
376
|
+
accept = @accept_symbol
|
377
|
+
eof = @eof_symbol
|
378
|
+
lineno = @rule_builders.first ? @rule_builders.first.line : 0
|
379
|
+
@rules << Rule.new(id: @rule_counter.increment, _lhs: accept.id, _rhs: [@rule_builders.first.lhs, eof.id], token_code: nil, lineno: lineno)
|
500
380
|
|
501
|
-
|
381
|
+
setup_rules
|
502
382
|
|
503
|
-
@
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
precedence_sym = nil
|
508
|
-
|
509
|
-
# 2. Extract precedence and last action
|
510
|
-
rhs.reverse.each do |r|
|
511
|
-
case
|
512
|
-
when r.is_a?(Symbol) # precedence_sym
|
513
|
-
precedence_sym = r
|
514
|
-
when r.is_a?(Lrama::Lexer::Token::UserCode) && precedence_sym.nil? && code.nil? && rhs1.empty?
|
515
|
-
code = r
|
516
|
-
else
|
517
|
-
rhs1 << r
|
518
|
-
end
|
519
|
-
end
|
520
|
-
rhs1.reverse!
|
521
|
-
|
522
|
-
# Bison n'th component is 1-origin
|
523
|
-
(rhs1 + [code]).compact.each.with_index(1) do |token, i|
|
524
|
-
if token.is_a?(Lrama::Lexer::Token::UserCode)
|
525
|
-
token.references.each do |ref|
|
526
|
-
# Need to keep position_in_rhs for actions in the middle of RHS
|
527
|
-
ref.position_in_rhs = i - 1
|
528
|
-
next if ref.type == :at
|
529
|
-
# $$, $n, @$, @n can be used in any actions
|
530
|
-
|
531
|
-
if ref.value == "$"
|
532
|
-
# TODO: Should be postponed after middle actions are extracted?
|
533
|
-
ref.referring_symbol = lhs
|
534
|
-
elsif ref.value.is_a?(Integer)
|
535
|
-
raise "Can not refer following component. #{ref.value} >= #{i}. #{token}" if ref.value >= i
|
536
|
-
rhs1[ref.value - 1].referred = true
|
537
|
-
ref.referring_symbol = rhs1[ref.value - 1]
|
538
|
-
elsif ref.value.is_a?(String)
|
539
|
-
target_tokens = ([lhs] + rhs1 + [code]).compact.first(i)
|
540
|
-
referring_symbol_candidate = target_tokens.filter {|token| token.referred_by?(ref.value) }
|
541
|
-
raise "Referring symbol `#{ref.value}` is duplicated. #{token}" if referring_symbol_candidate.size >= 2
|
542
|
-
raise "Referring symbol `#{ref.value}` is not found. #{token}" if referring_symbol_candidate.count == 0
|
543
|
-
|
544
|
-
referring_symbol = referring_symbol_candidate.first
|
545
|
-
referring_symbol.referred = true
|
546
|
-
ref.referring_symbol = referring_symbol
|
547
|
-
end
|
548
|
-
end
|
549
|
-
end
|
383
|
+
@rule_builders.each do |builder|
|
384
|
+
# Extract actions in the middle of RHS into new rules.
|
385
|
+
builder.midrule_action_rules.each do |rule|
|
386
|
+
@rules << rule
|
550
387
|
end
|
551
388
|
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
new_token = Lrama::Lexer::Token::Ident.new(s_value: prefix + extracted_action_number.to_s)
|
556
|
-
extracted_action_number += 1
|
557
|
-
a << [new_token, token]
|
558
|
-
new_token
|
559
|
-
else
|
560
|
-
token
|
561
|
-
end
|
389
|
+
builder.rules.each do |rule|
|
390
|
+
add_nterm(id: rule._lhs)
|
391
|
+
@rules << rule
|
562
392
|
end
|
563
393
|
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
@rules << Rule.new(id: @rules.count, lhs: new_token, rhs: [], code: Code.new(type: :user_code, token_code: code), lineno: code.line)
|
394
|
+
builder.parameterizing_rules.each do |rule|
|
395
|
+
add_nterm(id: rule._lhs, tag: rule.lhs_tag)
|
396
|
+
@rules << rule
|
568
397
|
end
|
569
398
|
|
570
|
-
|
571
|
-
|
572
|
-
if rhs2.any? {|r| r.is_a?(Lrama::Lexer::Token::Parameterizing) }
|
573
|
-
expand_parameterizing_rules(lhs, rhs2, c, precedence_sym, lineno)
|
574
|
-
else
|
575
|
-
@rules << Rule.new(id: @rules.count, lhs: lhs, rhs: rhs2, code: c, precedence_sym: precedence_sym, lineno: lineno)
|
576
|
-
end
|
577
|
-
add_nterm(id: lhs)
|
578
|
-
a.each do |new_token, _|
|
579
|
-
add_nterm(id: new_token)
|
399
|
+
builder.midrule_action_rules.each do |rule|
|
400
|
+
add_nterm(id: rule._lhs)
|
580
401
|
end
|
581
402
|
end
|
582
403
|
end
|
583
404
|
|
584
|
-
def expand_parameterizing_rules(lhs, rhs, code, precedence_sym, lineno)
|
585
|
-
token = Lrama::Lexer::Token::Ident.new(s_value: rhs[0].s_value)
|
586
|
-
if rhs.any? {|r| r.is_a?(Lrama::Lexer::Token::Parameterizing) && r.option? }
|
587
|
-
option_token = Lrama::Lexer::Token::Ident.new(s_value: "option_#{rhs[0].s_value}")
|
588
|
-
add_term(id: option_token)
|
589
|
-
@rules << Rule.new(id: @rules.count, lhs: lhs, rhs: [option_token], code: code, precedence_sym: precedence_sym, lineno: lineno)
|
590
|
-
@rules << Rule.new(id: @rules.count, lhs: option_token, rhs: [], code: code, precedence_sym: precedence_sym, lineno: lineno)
|
591
|
-
@rules << Rule.new(id: @rules.count, lhs: option_token, rhs: [token], code: code, precedence_sym: precedence_sym, lineno: lineno)
|
592
|
-
elsif rhs.any? {|r| r.is_a?(Lrama::Lexer::Token::Parameterizing) && r.nonempty_list? }
|
593
|
-
nonempty_list_token = Lrama::Lexer::Token::Ident.new(s_value: "nonempty_list_#{rhs[0].s_value}")
|
594
|
-
add_term(id: nonempty_list_token)
|
595
|
-
@rules << Rule.new(id: @rules.count, lhs: lhs, rhs: [nonempty_list_token], code: code, precedence_sym: precedence_sym, lineno: lineno)
|
596
|
-
@rules << Rule.new(id: @rules.count, lhs: nonempty_list_token, rhs: [token], code: code, precedence_sym: precedence_sym, lineno: lineno)
|
597
|
-
@rules << Rule.new(id: @rules.count, lhs: nonempty_list_token, rhs: [nonempty_list_token, token], code: code, precedence_sym: precedence_sym, lineno: lineno)
|
598
|
-
elsif rhs.any? {|r| r.is_a?(Lrama::Lexer::Token::Parameterizing) && r.list? }
|
599
|
-
list_token = Lrama::Lexer::Token::Ident.new(s_value: "list_#{rhs[0].s_value}")
|
600
|
-
add_term(id: list_token)
|
601
|
-
@rules << Rule.new(id: @rules.count, lhs: lhs, rhs: [list_token], code: code, precedence_sym: precedence_sym, lineno: lineno)
|
602
|
-
@rules << Rule.new(id: @rules.count, lhs: list_token, rhs: [], code: code, precedence_sym: precedence_sym, lineno: lineno)
|
603
|
-
@rules << Rule.new(id: @rules.count, lhs: list_token, rhs: [list_token, token], code: code, precedence_sym: precedence_sym, lineno: lineno)
|
604
|
-
end
|
605
|
-
end
|
606
|
-
|
607
405
|
# Collect symbols from rules
|
608
406
|
def collect_symbols
|
609
|
-
@rules.flat_map(&:
|
407
|
+
@rules.flat_map(&:_rhs).each do |s|
|
610
408
|
case s
|
611
409
|
when Lrama::Lexer::Token::Char
|
612
410
|
add_term(id: s)
|
613
411
|
when Lrama::Lexer::Token
|
614
412
|
# skip
|
615
|
-
when Symbol
|
616
|
-
# skip
|
617
413
|
else
|
618
414
|
raise "Unknown class: #{s}"
|
619
415
|
end
|
@@ -693,23 +489,13 @@ module Lrama
|
|
693
489
|
end
|
694
490
|
end
|
695
491
|
|
696
|
-
def
|
492
|
+
def set_lhs_and_rhs
|
697
493
|
@rules.each do |rule|
|
698
|
-
rule.lhs = token_to_symbol(rule.
|
494
|
+
rule.lhs = token_to_symbol(rule._lhs) if rule._lhs
|
699
495
|
|
700
|
-
rule.rhs.map
|
496
|
+
rule.rhs = rule._rhs.map do |t|
|
701
497
|
token_to_symbol(t)
|
702
498
|
end
|
703
|
-
|
704
|
-
if rule.code
|
705
|
-
rule.code.references.each do |ref|
|
706
|
-
next if ref.type == :at
|
707
|
-
|
708
|
-
if !ref.referring_symbol.is_a?(Lrama::Lexer::Token::UserCode)
|
709
|
-
ref.referring_symbol = token_to_symbol(ref.referring_symbol)
|
710
|
-
end
|
711
|
-
end
|
712
|
-
end
|
713
499
|
end
|
714
500
|
end
|
715
501
|
|
@@ -717,8 +503,6 @@ module Lrama
|
|
717
503
|
case token
|
718
504
|
when Lrama::Lexer::Token
|
719
505
|
find_symbol_by_id!(token)
|
720
|
-
when Symbol
|
721
|
-
token
|
722
506
|
else
|
723
507
|
raise "Unknown class: #{token}"
|
724
508
|
end
|
@@ -801,17 +585,23 @@ module Lrama
|
|
801
585
|
raise "Symbol number is duplicated. #{invalid}"
|
802
586
|
end
|
803
587
|
|
804
|
-
def
|
588
|
+
def validate_symbol_alias_name_uniqueness!
|
589
|
+
invalid = @symbols.select(&:alias_name).group_by(&:alias_name).select do |alias_name, syms|
|
590
|
+
syms.count > 1
|
591
|
+
end
|
592
|
+
|
593
|
+
return if invalid.empty?
|
594
|
+
|
595
|
+
raise "Symbol alias name is duplicated. #{invalid}"
|
596
|
+
end
|
597
|
+
|
598
|
+
def validate_rule_lhs_is_nterm!
|
805
599
|
errors = []
|
806
600
|
|
807
601
|
rules.each do |rule|
|
808
|
-
next
|
602
|
+
next if rule.lhs.nterm?
|
809
603
|
|
810
|
-
rule.
|
811
|
-
ref.type == :dollar && !ref.tag
|
812
|
-
end.each do |ref|
|
813
|
-
errors << "$#{ref.value} of '#{rule.lhs.id.s_value}' has no declared type"
|
814
|
-
end
|
604
|
+
errors << "[BUG] LHS of #{rule} (line: #{rule.lineno}) is term. It should be nterm."
|
815
605
|
end
|
816
606
|
|
817
607
|
return if errors.empty?
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module Lrama
|
2
|
+
class Lexer
|
3
|
+
class Location
|
4
|
+
attr_reader :first_line, :first_column, :last_line, :last_column
|
5
|
+
|
6
|
+
def initialize(first_line:, first_column:, last_line:, last_column:)
|
7
|
+
@first_line = first_line
|
8
|
+
@first_column = first_column
|
9
|
+
@last_line = last_line
|
10
|
+
@last_column = last_column
|
11
|
+
end
|
12
|
+
|
13
|
+
def ==(other)
|
14
|
+
self.class == other.class &&
|
15
|
+
self.first_line == other.first_line &&
|
16
|
+
self.first_column == other.first_column &&
|
17
|
+
self.last_line == other.last_line &&
|
18
|
+
self.last_column == other.last_column
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -2,16 +2,31 @@ module Lrama
|
|
2
2
|
class Lexer
|
3
3
|
class Token
|
4
4
|
class Parameterizing < Token
|
5
|
+
attr_accessor :args
|
6
|
+
|
7
|
+
def initialize(s_value:, alias_name: nil, location: nil, args: [])
|
8
|
+
super s_value: s_value, alias_name: alias_name, location: location
|
9
|
+
@args = args
|
10
|
+
end
|
11
|
+
|
5
12
|
def option?
|
6
|
-
self.s_value
|
13
|
+
%w(option ?).include?(self.s_value)
|
7
14
|
end
|
8
15
|
|
9
16
|
def nonempty_list?
|
10
|
-
self.s_value
|
17
|
+
%w(nonempty_list +).include?(self.s_value)
|
11
18
|
end
|
12
19
|
|
13
20
|
def list?
|
14
|
-
self.s_value
|
21
|
+
%w(list *).include?(self.s_value)
|
22
|
+
end
|
23
|
+
|
24
|
+
def separated_nonempty_list?
|
25
|
+
%w(separated_nonempty_list).include?(self.s_value)
|
26
|
+
end
|
27
|
+
|
28
|
+
def separated_list?
|
29
|
+
%w(separated_list).include?(self.s_value)
|
15
30
|
end
|
16
31
|
end
|
17
32
|
end
|