lrama 0.5.8 → 0.5.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/test.yaml +1 -1
- data/Gemfile +1 -1
- data/README.md +3 -3
- data/Rakefile +2 -7
- data/Steepfile +9 -1
- data/lib/lrama/command.rb +6 -1
- data/lib/lrama/grammar/percent_code.rb +12 -0
- data/lib/lrama/grammar/symbol.rb +2 -2
- data/lib/lrama/grammar.rb +100 -55
- data/lib/lrama/lexer/token/char.rb +8 -0
- data/lib/lrama/lexer/token/ident.rb +8 -0
- data/lib/lrama/lexer/token/parameterizing.rb +19 -0
- data/lib/lrama/lexer/token/tag.rb +8 -0
- data/lib/lrama/lexer/token/user_code.rb +14 -0
- data/lib/lrama/lexer/token.rb +9 -67
- data/lib/lrama/lexer.rb +14 -15
- data/lib/lrama/option_parser.rb +2 -1
- data/lib/lrama/options.rb +2 -1
- data/lib/lrama/output.rb +9 -0
- data/lib/lrama/parser.rb +500 -458
- data/lib/lrama/version.rb +1 -1
- data/parser.y +97 -73
- data/rbs_collection.lock.yaml +13 -1
- data/sample/calc.y +3 -1
- data/sample/parse.y +5 -1
- data/sig/lrama/grammar/percent_code.rbs +10 -0
- data/sig/lrama/grammar/reference.rbs +22 -0
- data/sig/lrama/grammar.rbs +5 -0
- data/sig/lrama/lexer/token/char.rbs +8 -0
- data/sig/lrama/lexer/token/ident.rbs +8 -0
- data/sig/lrama/lexer/token/parameterizing.rbs +8 -0
- data/sig/lrama/lexer/token/tag.rbs +8 -0
- data/sig/lrama/lexer/token/user_code.rbs +9 -0
- data/sig/lrama/lexer/token.rbs +17 -0
- data/template/bison/_yacc.h +2 -2
- data/template/bison/yacc.c +0 -2
- metadata +17 -4
- data/lib/lrama/lexer/token/type.rb +0 -8
- data/sig/lrama/lexer/token/type.rbs +0 -17
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5369d73367ea2fee299dbb73336a7801c98faa51bb3f68e66327c12f5a2d6716
|
4
|
+
data.tar.gz: b79943902e78f921d3361800cd1c371cebc594f527e940b83be3d26fcab190a9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 93fd0bb99180b74f7a98b1186273b9324ac8855f599add59a4eb86b7b38b7930975c32463568ae93ff5d829d7c605dbd8a264c8127e40d92f7f7b0bd46637b0a
|
7
|
+
data.tar.gz: 06a43beddb6b78b4a1866164c63ebff7309c134b74dc32b5272e181f88ba4b2ff2ea30d756a16016e194e6c0babf0e6ee1536984e90835798eaa57be8eb09c9f
|
data/.github/workflows/test.yaml
CHANGED
@@ -57,7 +57,7 @@ jobs:
|
|
57
57
|
|
58
58
|
- name: Check for parser.rb is up to date
|
59
59
|
run: |
|
60
|
-
bundle exec rake build:
|
60
|
+
bundle exec rake build:parser
|
61
61
|
git diff --color --no-ext-diff --ignore-submodules --exit-code lib/lrama/parser.rb
|
62
62
|
steep-check:
|
63
63
|
runs-on: ubuntu-20.04
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -65,13 +65,13 @@ This also requires Lrama to be able to run with only default gems because BASERU
|
|
65
65
|
|
66
66
|
## Development
|
67
67
|
|
68
|
-
### How to generate
|
68
|
+
### How to generate parser.rb
|
69
69
|
|
70
70
|
```shell
|
71
|
-
$ rake build:
|
71
|
+
$ rake build:parser
|
72
72
|
```
|
73
73
|
|
74
|
-
`
|
74
|
+
`parser.rb` is generated from `parser.y` by Racc.
|
75
75
|
Run the rake command when you update `parser.y` then commit changes of both files.
|
76
76
|
|
77
77
|
### Test
|
data/Rakefile
CHANGED
@@ -1,13 +1,8 @@
|
|
1
1
|
require "bundler/gem_tasks"
|
2
2
|
|
3
3
|
namespace "build" do
|
4
|
-
desc "build parser from parser.y
|
5
|
-
task :
|
6
|
-
sh "bundle exec racc parser.y --embedded -o lib/lrama/parser.rb"
|
7
|
-
end
|
8
|
-
|
9
|
-
desc "build parser for debugging"
|
10
|
-
task :racc_verbose_parser do
|
4
|
+
desc "build parser from parser.y"
|
5
|
+
task :parser do
|
11
6
|
sh "bundle exec racc parser.y --embedded -o lib/lrama/parser.rb -t --log-file=parser.output"
|
12
7
|
end
|
13
8
|
end
|
data/Steepfile
CHANGED
@@ -6,8 +6,16 @@ target :lib do
|
|
6
6
|
|
7
7
|
check "lib/lrama/bitmap.rb"
|
8
8
|
check "lib/lrama/digraph.rb"
|
9
|
+
check "lib/lrama/grammar/percent_code.rb"
|
10
|
+
# TODO: Include this file once Lrama::Grammar::Symbol type is defined
|
11
|
+
# check "lib/lrama/grammar/reference.rb"
|
12
|
+
check "lib/lrama/lexer/token.rb"
|
13
|
+
check "lib/lrama/lexer/token/char.rb"
|
14
|
+
check "lib/lrama/lexer/token/ident.rb"
|
15
|
+
check "lib/lrama/lexer/token/parameterizing.rb"
|
16
|
+
check "lib/lrama/lexer/token/tag.rb"
|
17
|
+
check "lib/lrama/lexer/token/user_code.rb"
|
9
18
|
check "lib/lrama/report/duration.rb"
|
10
19
|
check "lib/lrama/report/profile.rb"
|
11
|
-
check "lib/lrama/token/type.rb"
|
12
20
|
check "lib/lrama/warning.rb"
|
13
21
|
end
|
data/lib/lrama/command.rb
CHANGED
@@ -8,7 +8,7 @@ module Lrama
|
|
8
8
|
warning = Lrama::Warning.new
|
9
9
|
text = options.y.read
|
10
10
|
options.y.close if options.y != STDIN
|
11
|
-
grammar = Lrama::Parser.new(text, options.grammar_file).parse
|
11
|
+
grammar = Lrama::Parser.new(text, options.grammar_file, options.debug).parse
|
12
12
|
states = Lrama::States.new(grammar, warning, trace_state: (options.trace_opts[:automaton] || options.trace_opts[:closure]))
|
13
13
|
states.compute
|
14
14
|
context = Lrama::Context.new(states)
|
@@ -20,6 +20,11 @@ module Lrama
|
|
20
20
|
end
|
21
21
|
end
|
22
22
|
|
23
|
+
if options.trace_opts && options.trace_opts[:rules]
|
24
|
+
puts "Grammar rules:"
|
25
|
+
puts grammar.rules
|
26
|
+
end
|
27
|
+
|
23
28
|
File.open(options.outfile, "w+") do |f|
|
24
29
|
Lrama::Output.new(
|
25
30
|
out: f,
|
data/lib/lrama/grammar/symbol.rb
CHANGED
@@ -47,9 +47,9 @@ module Lrama
|
|
47
47
|
name = "YYACCEPT"
|
48
48
|
when eof_symbol?
|
49
49
|
name = "YYEOF"
|
50
|
-
when term? && id.
|
50
|
+
when term? && id.is_a?(Lrama::Lexer::Token::Char)
|
51
51
|
name = number.to_s + display_name
|
52
|
-
when term? && id.
|
52
|
+
when term? && id.is_a?(Lrama::Lexer::Token::Ident)
|
53
53
|
name = id.s_value
|
54
54
|
when nterm? && (id.s_value.include?("$") || id.s_value.include?("@"))
|
55
55
|
name = number.to_s + id.s_value
|
data/lib/lrama/grammar.rb
CHANGED
@@ -3,6 +3,7 @@ require "strscan"
|
|
3
3
|
require "lrama/grammar/auxiliary"
|
4
4
|
require "lrama/grammar/code"
|
5
5
|
require "lrama/grammar/error_token"
|
6
|
+
require "lrama/grammar/percent_code"
|
6
7
|
require "lrama/grammar/precedence"
|
7
8
|
require "lrama/grammar/printer"
|
8
9
|
require "lrama/grammar/reference"
|
@@ -13,11 +14,9 @@ require "lrama/lexer"
|
|
13
14
|
require "lrama/type"
|
14
15
|
|
15
16
|
module Lrama
|
16
|
-
Token = Lrama::Lexer::Token
|
17
|
-
|
18
17
|
# Grammar is the result of parsing an input grammar file
|
19
18
|
class Grammar
|
20
|
-
attr_reader :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol, :aux
|
19
|
+
attr_reader :percent_codes, :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol, :aux
|
21
20
|
attr_accessor :union, :expect,
|
22
21
|
:printers, :error_tokens,
|
23
22
|
:lex_param, :parse_param, :initial_action,
|
@@ -26,6 +25,8 @@ module Lrama
|
|
26
25
|
:sym_to_rules
|
27
26
|
|
28
27
|
def initialize
|
28
|
+
# Code defined by "%code"
|
29
|
+
@percent_codes = []
|
29
30
|
@printers = []
|
30
31
|
@error_tokens = []
|
31
32
|
@symbols = []
|
@@ -43,6 +44,10 @@ module Lrama
|
|
43
44
|
append_special_symbols
|
44
45
|
end
|
45
46
|
|
47
|
+
def add_percent_code(id:, code:)
|
48
|
+
@percent_codes << PercentCode.new(id, code)
|
49
|
+
end
|
50
|
+
|
46
51
|
def add_printer(ident_or_tags:, code:, lineno:)
|
47
52
|
@printers << Printer.new(ident_or_tags: ident_or_tags, code: code, lineno: lineno)
|
48
53
|
end
|
@@ -122,16 +127,7 @@ module Lrama
|
|
122
127
|
@_rules << [lhs, rhs, lineno]
|
123
128
|
end
|
124
129
|
|
125
|
-
def build_references(token_code)
|
126
|
-
token_code.references.map! do |type, value, tag, first_column, last_column|
|
127
|
-
Reference.new(type: type, value: value, ex_tag: tag, first_column: first_column, last_column: last_column)
|
128
|
-
end
|
129
|
-
|
130
|
-
token_code
|
131
|
-
end
|
132
|
-
|
133
130
|
def build_code(type, token_code)
|
134
|
-
build_references(token_code)
|
135
131
|
Code.new(type: type, token_code: token_code)
|
136
132
|
end
|
137
133
|
|
@@ -152,6 +148,7 @@ module Lrama
|
|
152
148
|
end
|
153
149
|
|
154
150
|
def prepare
|
151
|
+
extract_references
|
155
152
|
normalize_rules
|
156
153
|
collect_symbols
|
157
154
|
replace_token_with_symbol
|
@@ -314,31 +311,33 @@ module Lrama
|
|
314
311
|
# $ references
|
315
312
|
# It need to wrap an identifier with brackets to use ".-" for identifiers
|
316
313
|
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
|
317
|
-
tag = scanner[1] ? Lrama::Lexer::Token.new(
|
318
|
-
return
|
314
|
+
tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
|
315
|
+
return Reference.new(type: :dollar, value: "$", ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
|
319
316
|
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
|
320
|
-
tag = scanner[1] ? Lrama::Lexer::Token.new(
|
321
|
-
return
|
317
|
+
tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
|
318
|
+
return Reference.new(type: :dollar, value: Integer(scanner[2]), ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
|
322
319
|
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
|
323
|
-
tag = scanner[1] ? Lrama::Lexer::Token.new(
|
324
|
-
return
|
320
|
+
tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
|
321
|
+
return Reference.new(type: :dollar, value: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
|
325
322
|
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
|
326
|
-
tag = scanner[1] ? Lrama::Lexer::Token.new(
|
327
|
-
return
|
323
|
+
tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
|
324
|
+
return Reference.new(type: :dollar, value: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
|
328
325
|
|
329
326
|
# @ references
|
330
327
|
# It need to wrap an identifier with brackets to use ".-" for identifiers
|
331
328
|
when scanner.scan(/@\$/) # @$
|
332
|
-
return
|
329
|
+
return Reference.new(type: :at, value: "$", first_column: start, last_column: scanner.pos - 1)
|
333
330
|
when scanner.scan(/@(\d+)/) # @1
|
334
|
-
return
|
331
|
+
return Reference.new(type: :at, value: Integer(scanner[1]), first_column: start, last_column: scanner.pos - 1)
|
335
332
|
when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
|
336
|
-
return
|
333
|
+
return Reference.new(type: :at, value: scanner[1], first_column: start, last_column: scanner.pos - 1)
|
337
334
|
when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets)
|
338
|
-
return
|
335
|
+
return Reference.new(type: :at, value: scanner[1], first_column: start, last_column: scanner.pos - 1)
|
339
336
|
end
|
340
337
|
end
|
341
338
|
|
339
|
+
private
|
340
|
+
|
342
341
|
def extract_references
|
343
342
|
unless initial_action.nil?
|
344
343
|
scanner = StringScanner.new(initial_action.s_value)
|
@@ -353,7 +352,6 @@ module Lrama
|
|
353
352
|
end
|
354
353
|
|
355
354
|
initial_action.token_code.references = references
|
356
|
-
build_references(initial_action.token_code)
|
357
355
|
end
|
358
356
|
|
359
357
|
@printers.each do |printer|
|
@@ -369,7 +367,6 @@ module Lrama
|
|
369
367
|
end
|
370
368
|
|
371
369
|
printer.code.token_code.references = references
|
372
|
-
build_references(printer.code.token_code)
|
373
370
|
end
|
374
371
|
|
375
372
|
@error_tokens.each do |error_token|
|
@@ -385,12 +382,11 @@ module Lrama
|
|
385
382
|
end
|
386
383
|
|
387
384
|
error_token.code.token_code.references = references
|
388
|
-
build_references(error_token.code.token_code)
|
389
385
|
end
|
390
386
|
|
391
387
|
@_rules.each do |lhs, rhs, _|
|
392
388
|
rhs.each_with_index do |token, index|
|
393
|
-
next
|
389
|
+
next unless token.class == Lrama::Lexer::Token::UserCode
|
394
390
|
|
395
391
|
scanner = StringScanner.new(token.s_value)
|
396
392
|
references = []
|
@@ -407,14 +403,11 @@ module Lrama
|
|
407
403
|
end
|
408
404
|
|
409
405
|
token.references = references
|
410
|
-
|
411
|
-
build_references(token)
|
406
|
+
numberize_references(lhs, rhs, token.references)
|
412
407
|
end
|
413
408
|
end
|
414
409
|
end
|
415
410
|
|
416
|
-
private
|
417
|
-
|
418
411
|
def find_nterm_by_id!(id)
|
419
412
|
nterms.find do |nterm|
|
420
413
|
nterm.id == id
|
@@ -428,29 +421,54 @@ module Lrama
|
|
428
421
|
# @empty_symbol = term
|
429
422
|
|
430
423
|
# YYEOF
|
431
|
-
term = add_term(id: Token.new(
|
424
|
+
term = add_term(id: Lrama::Lexer::Token::Ident.new(s_value: "YYEOF"), alias_name: "\"end of file\"", token_id: 0)
|
432
425
|
term.number = 0
|
433
426
|
term.eof_symbol = true
|
434
427
|
@eof_symbol = term
|
435
428
|
|
436
429
|
# YYerror
|
437
|
-
term = add_term(id: Token.new(
|
430
|
+
term = add_term(id: Lrama::Lexer::Token::Ident.new(s_value: "YYerror"), alias_name: "error")
|
438
431
|
term.number = 1
|
439
432
|
term.error_symbol = true
|
440
433
|
@error_symbol = term
|
441
434
|
|
442
435
|
# YYUNDEF
|
443
|
-
term = add_term(id: Token.new(
|
436
|
+
term = add_term(id: Lrama::Lexer::Token::Ident.new(s_value: "YYUNDEF"), alias_name: "\"invalid token\"")
|
444
437
|
term.number = 2
|
445
438
|
term.undef_symbol = true
|
446
439
|
@undef_symbol = term
|
447
440
|
|
448
441
|
# $accept
|
449
|
-
term = add_nterm(id: Token.new(
|
442
|
+
term = add_nterm(id: Lrama::Lexer::Token::Ident.new(s_value: "$accept"))
|
450
443
|
term.accept_symbol = true
|
451
444
|
@accept_symbol = term
|
452
445
|
end
|
453
446
|
|
447
|
+
def numberize_references(lhs, rhs, references)
|
448
|
+
references.map! {|ref|
|
449
|
+
ref_name = ref.value
|
450
|
+
if ref_name.is_a?(::String) && ref_name != '$'
|
451
|
+
value =
|
452
|
+
if lhs.referred_by?(ref_name)
|
453
|
+
'$'
|
454
|
+
else
|
455
|
+
index = rhs.find_index {|token| token.referred_by?(ref_name) }
|
456
|
+
|
457
|
+
if index
|
458
|
+
index + 1
|
459
|
+
else
|
460
|
+
raise "'#{ref_name}' is invalid name."
|
461
|
+
end
|
462
|
+
end
|
463
|
+
|
464
|
+
ref.value = value
|
465
|
+
ref
|
466
|
+
else
|
467
|
+
ref
|
468
|
+
end
|
469
|
+
}
|
470
|
+
end
|
471
|
+
|
454
472
|
# 1. Add $accept rule to the top of rules
|
455
473
|
# 2. Extract precedence and last action
|
456
474
|
# 3. Extract action in the middle of RHS into new Empty rule
|
@@ -493,7 +511,7 @@ module Lrama
|
|
493
511
|
case
|
494
512
|
when r.is_a?(Symbol) # precedence_sym
|
495
513
|
precedence_sym = r
|
496
|
-
when
|
514
|
+
when r.is_a?(Lrama::Lexer::Token::UserCode) && precedence_sym.nil? && code.nil? && rhs1.empty?
|
497
515
|
code = r
|
498
516
|
else
|
499
517
|
rhs1 << r
|
@@ -503,7 +521,7 @@ module Lrama
|
|
503
521
|
|
504
522
|
# Bison n'th component is 1-origin
|
505
523
|
(rhs1 + [code]).compact.each.with_index(1) do |token, i|
|
506
|
-
if token.
|
524
|
+
if token.is_a?(Lrama::Lexer::Token::UserCode)
|
507
525
|
token.references.each do |ref|
|
508
526
|
# Need to keep position_in_rhs for actions in the middle of RHS
|
509
527
|
ref.position_in_rhs = i - 1
|
@@ -532,9 +550,9 @@ module Lrama
|
|
532
550
|
end
|
533
551
|
|
534
552
|
rhs2 = rhs1.map do |token|
|
535
|
-
if token.
|
553
|
+
if token.is_a?(Lrama::Lexer::Token::UserCode)
|
536
554
|
prefix = token.referred ? "@" : "$@"
|
537
|
-
new_token = Token.new(
|
555
|
+
new_token = Lrama::Lexer::Token::Ident.new(s_value: prefix + extracted_action_number.to_s)
|
538
556
|
extracted_action_number += 1
|
539
557
|
a << [new_token, token]
|
540
558
|
new_token
|
@@ -550,8 +568,12 @@ module Lrama
|
|
550
568
|
end
|
551
569
|
|
552
570
|
c = code ? Code.new(type: :user_code, token_code: code) : nil
|
553
|
-
|
554
|
-
|
571
|
+
# Expand Parameterizing rules
|
572
|
+
if rhs2.any? {|r| r.is_a?(Lrama::Lexer::Token::Parameterizing) }
|
573
|
+
expand_parameterizing_rules(lhs, rhs2, c, precedence_sym, lineno)
|
574
|
+
else
|
575
|
+
@rules << Rule.new(id: @rules.count, lhs: lhs, rhs: rhs2, code: c, precedence_sym: precedence_sym, lineno: lineno)
|
576
|
+
end
|
555
577
|
add_nterm(id: lhs)
|
556
578
|
a.each do |new_token, _|
|
557
579
|
add_nterm(id: new_token)
|
@@ -559,14 +581,37 @@ module Lrama
|
|
559
581
|
end
|
560
582
|
end
|
561
583
|
|
584
|
+
def expand_parameterizing_rules(lhs, rhs, code, precedence_sym, lineno)
|
585
|
+
token = Lrama::Lexer::Token::Ident.new(s_value: rhs[0].s_value)
|
586
|
+
if rhs.any? {|r| r.is_a?(Lrama::Lexer::Token::Parameterizing) && r.option? }
|
587
|
+
option_token = Lrama::Lexer::Token::Ident.new(s_value: "option_#{rhs[0].s_value}")
|
588
|
+
add_term(id: option_token)
|
589
|
+
@rules << Rule.new(id: @rules.count, lhs: lhs, rhs: [option_token], code: code, precedence_sym: precedence_sym, lineno: lineno)
|
590
|
+
@rules << Rule.new(id: @rules.count, lhs: option_token, rhs: [], code: code, precedence_sym: precedence_sym, lineno: lineno)
|
591
|
+
@rules << Rule.new(id: @rules.count, lhs: option_token, rhs: [token], code: code, precedence_sym: precedence_sym, lineno: lineno)
|
592
|
+
elsif rhs.any? {|r| r.is_a?(Lrama::Lexer::Token::Parameterizing) && r.nonempty_list? }
|
593
|
+
nonempty_list_token = Lrama::Lexer::Token::Ident.new(s_value: "nonempty_list_#{rhs[0].s_value}")
|
594
|
+
add_term(id: nonempty_list_token)
|
595
|
+
@rules << Rule.new(id: @rules.count, lhs: lhs, rhs: [nonempty_list_token], code: code, precedence_sym: precedence_sym, lineno: lineno)
|
596
|
+
@rules << Rule.new(id: @rules.count, lhs: nonempty_list_token, rhs: [token], code: code, precedence_sym: precedence_sym, lineno: lineno)
|
597
|
+
@rules << Rule.new(id: @rules.count, lhs: nonempty_list_token, rhs: [nonempty_list_token, token], code: code, precedence_sym: precedence_sym, lineno: lineno)
|
598
|
+
elsif rhs.any? {|r| r.is_a?(Lrama::Lexer::Token::Parameterizing) && r.list? }
|
599
|
+
list_token = Lrama::Lexer::Token::Ident.new(s_value: "list_#{rhs[0].s_value}")
|
600
|
+
add_term(id: list_token)
|
601
|
+
@rules << Rule.new(id: @rules.count, lhs: lhs, rhs: [list_token], code: code, precedence_sym: precedence_sym, lineno: lineno)
|
602
|
+
@rules << Rule.new(id: @rules.count, lhs: list_token, rhs: [], code: code, precedence_sym: precedence_sym, lineno: lineno)
|
603
|
+
@rules << Rule.new(id: @rules.count, lhs: list_token, rhs: [list_token, token], code: code, precedence_sym: precedence_sym, lineno: lineno)
|
604
|
+
end
|
605
|
+
end
|
606
|
+
|
562
607
|
# Collect symbols from rules
|
563
608
|
def collect_symbols
|
564
609
|
@rules.flat_map(&:rhs).each do |s|
|
565
610
|
case s
|
566
|
-
when Token
|
567
|
-
|
568
|
-
|
569
|
-
|
611
|
+
when Lrama::Lexer::Token::Char
|
612
|
+
add_term(id: s)
|
613
|
+
when Lrama::Lexer::Token
|
614
|
+
# skip
|
570
615
|
when Symbol
|
571
616
|
# skip
|
572
617
|
else
|
@@ -607,7 +652,7 @@ module Lrama
|
|
607
652
|
|
608
653
|
# If id is Token::Char, it uses ASCII code
|
609
654
|
if sym.term? && sym.token_id.nil?
|
610
|
-
if sym.id.
|
655
|
+
if sym.id.is_a?(Lrama::Lexer::Token::Char)
|
611
656
|
# Ignore ' on the both sides
|
612
657
|
case sym.id.s_value[1..-2]
|
613
658
|
when "\\b"
|
@@ -660,7 +705,7 @@ module Lrama
|
|
660
705
|
rule.code.references.each do |ref|
|
661
706
|
next if ref.type == :at
|
662
707
|
|
663
|
-
if ref.referring_symbol.
|
708
|
+
if !ref.referring_symbol.is_a?(Lrama::Lexer::Token::UserCode)
|
664
709
|
ref.referring_symbol = token_to_symbol(ref.referring_symbol)
|
665
710
|
end
|
666
711
|
end
|
@@ -670,7 +715,7 @@ module Lrama
|
|
670
715
|
|
671
716
|
def token_to_symbol(token)
|
672
717
|
case token
|
673
|
-
when Token
|
718
|
+
when Lrama::Lexer::Token
|
674
719
|
find_symbol_by_id!(token)
|
675
720
|
when Symbol
|
676
721
|
token
|
@@ -716,10 +761,10 @@ module Lrama
|
|
716
761
|
@symbols.each do |sym|
|
717
762
|
@printers.each do |printer|
|
718
763
|
printer.ident_or_tags.each do |ident_or_tag|
|
719
|
-
case ident_or_tag
|
720
|
-
when Token::Ident
|
764
|
+
case ident_or_tag
|
765
|
+
when Lrama::Lexer::Token::Ident
|
721
766
|
sym.printer = printer if sym.id == ident_or_tag
|
722
|
-
when Token::Tag
|
767
|
+
when Lrama::Lexer::Token::Tag
|
723
768
|
sym.printer = printer if sym.tag == ident_or_tag
|
724
769
|
else
|
725
770
|
raise "Unknown token type. #{printer}"
|
@@ -733,10 +778,10 @@ module Lrama
|
|
733
778
|
@symbols.each do |sym|
|
734
779
|
@error_tokens.each do |error_token|
|
735
780
|
error_token.ident_or_tags.each do |ident_or_tag|
|
736
|
-
case ident_or_tag
|
737
|
-
when Token::Ident
|
781
|
+
case ident_or_tag
|
782
|
+
when Lrama::Lexer::Token::Ident
|
738
783
|
sym.error_token = error_token if sym.id == ident_or_tag
|
739
|
-
when Token::Tag
|
784
|
+
when Lrama::Lexer::Token::Tag
|
740
785
|
sym.error_token = error_token if sym.tag == ident_or_tag
|
741
786
|
else
|
742
787
|
raise "Unknown token type. #{error_token}"
|
data/lib/lrama/lexer/token.rb
CHANGED
@@ -1,84 +1,26 @@
|
|
1
|
-
require 'lrama/lexer/token/type'
|
2
|
-
|
3
1
|
module Lrama
|
4
2
|
class Lexer
|
5
|
-
class Token
|
3
|
+
class Token < Struct.new(:s_value, :alias_name, keyword_init: true)
|
6
4
|
|
7
5
|
attr_accessor :line, :column, :referred
|
8
|
-
# For User_code
|
9
|
-
attr_accessor :references
|
10
6
|
|
11
7
|
def to_s
|
12
8
|
"#{super} line: #{line}, column: #{column}"
|
13
9
|
end
|
14
10
|
|
15
11
|
def referred_by?(string)
|
16
|
-
[self.s_value, self.
|
12
|
+
[self.s_value, self.alias_name].include?(string)
|
17
13
|
end
|
18
14
|
|
19
15
|
def ==(other)
|
20
|
-
self.class == other.class && self.
|
16
|
+
self.class == other.class && self.s_value == other.s_value
|
21
17
|
end
|
22
|
-
|
23
|
-
def numberize_references(lhs, rhs)
|
24
|
-
self.references.map! {|ref|
|
25
|
-
ref_name = ref[1]
|
26
|
-
if ref_name.is_a?(::String) && ref_name != '$'
|
27
|
-
value =
|
28
|
-
if lhs.referred_by?(ref_name)
|
29
|
-
'$'
|
30
|
-
else
|
31
|
-
index = rhs.find_index {|token| token.referred_by?(ref_name) }
|
32
|
-
|
33
|
-
if index
|
34
|
-
index + 1
|
35
|
-
else
|
36
|
-
raise "'#{ref_name}' is invalid name."
|
37
|
-
end
|
38
|
-
end
|
39
|
-
[ref[0], value, ref[2], ref[3], ref[4]]
|
40
|
-
else
|
41
|
-
ref
|
42
|
-
end
|
43
|
-
}
|
44
|
-
end
|
45
|
-
|
46
|
-
@i = 0
|
47
|
-
@types = []
|
48
|
-
|
49
|
-
def self.define_type(name)
|
50
|
-
type = Type.new(id: @i, name: name.to_s)
|
51
|
-
const_set(name, type)
|
52
|
-
@types << type
|
53
|
-
@i += 1
|
54
|
-
end
|
55
|
-
|
56
|
-
# Token types
|
57
|
-
define_type(:P_expect) # %expect
|
58
|
-
define_type(:P_define) # %define
|
59
|
-
define_type(:P_printer) # %printer
|
60
|
-
define_type(:P_error_token) # %error-token
|
61
|
-
define_type(:P_lex_param) # %lex-param
|
62
|
-
define_type(:P_parse_param) # %parse-param
|
63
|
-
define_type(:P_initial_action) # %initial-action
|
64
|
-
define_type(:P_union) # %union
|
65
|
-
define_type(:P_token) # %token
|
66
|
-
define_type(:P_type) # %type
|
67
|
-
define_type(:P_nonassoc) # %nonassoc
|
68
|
-
define_type(:P_left) # %left
|
69
|
-
define_type(:P_right) # %right
|
70
|
-
define_type(:P_precedence) # %precedence
|
71
|
-
define_type(:P_prec) # %prec
|
72
|
-
define_type(:User_code) # { ... }
|
73
|
-
define_type(:Tag) # <int>
|
74
|
-
define_type(:Number) # 0
|
75
|
-
define_type(:Ident_Colon) # k_if:, k_if : (spaces can be there)
|
76
|
-
define_type(:Ident) # api.pure, tNUMBER
|
77
|
-
define_type(:Named_Ref) # [foo]
|
78
|
-
define_type(:Semicolon) # ;
|
79
|
-
define_type(:Bar) # |
|
80
|
-
define_type(:String) # "str"
|
81
|
-
define_type(:Char) # '+'
|
82
18
|
end
|
83
19
|
end
|
84
20
|
end
|
21
|
+
|
22
|
+
require 'lrama/lexer/token/char'
|
23
|
+
require 'lrama/lexer/token/ident'
|
24
|
+
require 'lrama/lexer/token/parameterizing'
|
25
|
+
require 'lrama/lexer/token/tag'
|
26
|
+
require 'lrama/lexer/token/user_code'
|