lrama 0.5.8 → 0.5.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/test.yaml +1 -1
- data/Gemfile +1 -1
- data/README.md +3 -3
- data/Rakefile +2 -7
- data/Steepfile +9 -1
- data/lib/lrama/command.rb +6 -1
- data/lib/lrama/grammar/percent_code.rb +12 -0
- data/lib/lrama/grammar/symbol.rb +2 -2
- data/lib/lrama/grammar.rb +100 -55
- data/lib/lrama/lexer/token/char.rb +8 -0
- data/lib/lrama/lexer/token/ident.rb +8 -0
- data/lib/lrama/lexer/token/parameterizing.rb +19 -0
- data/lib/lrama/lexer/token/tag.rb +8 -0
- data/lib/lrama/lexer/token/user_code.rb +14 -0
- data/lib/lrama/lexer/token.rb +9 -67
- data/lib/lrama/lexer.rb +14 -15
- data/lib/lrama/option_parser.rb +2 -1
- data/lib/lrama/options.rb +2 -1
- data/lib/lrama/output.rb +9 -0
- data/lib/lrama/parser.rb +500 -458
- data/lib/lrama/version.rb +1 -1
- data/parser.y +97 -73
- data/rbs_collection.lock.yaml +13 -1
- data/sample/calc.y +3 -1
- data/sample/parse.y +5 -1
- data/sig/lrama/grammar/percent_code.rbs +10 -0
- data/sig/lrama/grammar/reference.rbs +22 -0
- data/sig/lrama/grammar.rbs +5 -0
- data/sig/lrama/lexer/token/char.rbs +8 -0
- data/sig/lrama/lexer/token/ident.rbs +8 -0
- data/sig/lrama/lexer/token/parameterizing.rbs +8 -0
- data/sig/lrama/lexer/token/tag.rbs +8 -0
- data/sig/lrama/lexer/token/user_code.rbs +9 -0
- data/sig/lrama/lexer/token.rbs +17 -0
- data/template/bison/_yacc.h +2 -2
- data/template/bison/yacc.c +0 -2
- metadata +17 -4
- data/lib/lrama/lexer/token/type.rb +0 -8
- data/sig/lrama/lexer/token/type.rbs +0 -17
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5369d73367ea2fee299dbb73336a7801c98faa51bb3f68e66327c12f5a2d6716
|
4
|
+
data.tar.gz: b79943902e78f921d3361800cd1c371cebc594f527e940b83be3d26fcab190a9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 93fd0bb99180b74f7a98b1186273b9324ac8855f599add59a4eb86b7b38b7930975c32463568ae93ff5d829d7c605dbd8a264c8127e40d92f7f7b0bd46637b0a
|
7
|
+
data.tar.gz: 06a43beddb6b78b4a1866164c63ebff7309c134b74dc32b5272e181f88ba4b2ff2ea30d756a16016e194e6c0babf0e6ee1536984e90835798eaa57be8eb09c9f
|
data/.github/workflows/test.yaml
CHANGED
@@ -57,7 +57,7 @@ jobs:
|
|
57
57
|
|
58
58
|
- name: Check for parser.rb is up to date
|
59
59
|
run: |
|
60
|
-
bundle exec rake build:
|
60
|
+
bundle exec rake build:parser
|
61
61
|
git diff --color --no-ext-diff --ignore-submodules --exit-code lib/lrama/parser.rb
|
62
62
|
steep-check:
|
63
63
|
runs-on: ubuntu-20.04
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -65,13 +65,13 @@ This also requires Lrama to be able to run with only default gems because BASERU
|
|
65
65
|
|
66
66
|
## Development
|
67
67
|
|
68
|
-
### How to generate
|
68
|
+
### How to generate parser.rb
|
69
69
|
|
70
70
|
```shell
|
71
|
-
$ rake build:
|
71
|
+
$ rake build:parser
|
72
72
|
```
|
73
73
|
|
74
|
-
`
|
74
|
+
`parser.rb` is generated from `parser.y` by Racc.
|
75
75
|
Run the rake command when you update `parser.y` then commit changes of both files.
|
76
76
|
|
77
77
|
### Test
|
data/Rakefile
CHANGED
@@ -1,13 +1,8 @@
|
|
1
1
|
require "bundler/gem_tasks"
|
2
2
|
|
3
3
|
namespace "build" do
|
4
|
-
desc "build parser from parser.y
|
5
|
-
task :
|
6
|
-
sh "bundle exec racc parser.y --embedded -o lib/lrama/parser.rb"
|
7
|
-
end
|
8
|
-
|
9
|
-
desc "build parser for debugging"
|
10
|
-
task :racc_verbose_parser do
|
4
|
+
desc "build parser from parser.y"
|
5
|
+
task :parser do
|
11
6
|
sh "bundle exec racc parser.y --embedded -o lib/lrama/parser.rb -t --log-file=parser.output"
|
12
7
|
end
|
13
8
|
end
|
data/Steepfile
CHANGED
@@ -6,8 +6,16 @@ target :lib do
|
|
6
6
|
|
7
7
|
check "lib/lrama/bitmap.rb"
|
8
8
|
check "lib/lrama/digraph.rb"
|
9
|
+
check "lib/lrama/grammar/percent_code.rb"
|
10
|
+
# TODO: Include this file once Lrama::Grammar::Symbol type is defined
|
11
|
+
# check "lib/lrama/grammar/reference.rb"
|
12
|
+
check "lib/lrama/lexer/token.rb"
|
13
|
+
check "lib/lrama/lexer/token/char.rb"
|
14
|
+
check "lib/lrama/lexer/token/ident.rb"
|
15
|
+
check "lib/lrama/lexer/token/parameterizing.rb"
|
16
|
+
check "lib/lrama/lexer/token/tag.rb"
|
17
|
+
check "lib/lrama/lexer/token/user_code.rb"
|
9
18
|
check "lib/lrama/report/duration.rb"
|
10
19
|
check "lib/lrama/report/profile.rb"
|
11
|
-
check "lib/lrama/token/type.rb"
|
12
20
|
check "lib/lrama/warning.rb"
|
13
21
|
end
|
data/lib/lrama/command.rb
CHANGED
@@ -8,7 +8,7 @@ module Lrama
|
|
8
8
|
warning = Lrama::Warning.new
|
9
9
|
text = options.y.read
|
10
10
|
options.y.close if options.y != STDIN
|
11
|
-
grammar = Lrama::Parser.new(text, options.grammar_file).parse
|
11
|
+
grammar = Lrama::Parser.new(text, options.grammar_file, options.debug).parse
|
12
12
|
states = Lrama::States.new(grammar, warning, trace_state: (options.trace_opts[:automaton] || options.trace_opts[:closure]))
|
13
13
|
states.compute
|
14
14
|
context = Lrama::Context.new(states)
|
@@ -20,6 +20,11 @@ module Lrama
|
|
20
20
|
end
|
21
21
|
end
|
22
22
|
|
23
|
+
if options.trace_opts && options.trace_opts[:rules]
|
24
|
+
puts "Grammar rules:"
|
25
|
+
puts grammar.rules
|
26
|
+
end
|
27
|
+
|
23
28
|
File.open(options.outfile, "w+") do |f|
|
24
29
|
Lrama::Output.new(
|
25
30
|
out: f,
|
data/lib/lrama/grammar/symbol.rb
CHANGED
@@ -47,9 +47,9 @@ module Lrama
|
|
47
47
|
name = "YYACCEPT"
|
48
48
|
when eof_symbol?
|
49
49
|
name = "YYEOF"
|
50
|
-
when term? && id.
|
50
|
+
when term? && id.is_a?(Lrama::Lexer::Token::Char)
|
51
51
|
name = number.to_s + display_name
|
52
|
-
when term? && id.
|
52
|
+
when term? && id.is_a?(Lrama::Lexer::Token::Ident)
|
53
53
|
name = id.s_value
|
54
54
|
when nterm? && (id.s_value.include?("$") || id.s_value.include?("@"))
|
55
55
|
name = number.to_s + id.s_value
|
data/lib/lrama/grammar.rb
CHANGED
@@ -3,6 +3,7 @@ require "strscan"
|
|
3
3
|
require "lrama/grammar/auxiliary"
|
4
4
|
require "lrama/grammar/code"
|
5
5
|
require "lrama/grammar/error_token"
|
6
|
+
require "lrama/grammar/percent_code"
|
6
7
|
require "lrama/grammar/precedence"
|
7
8
|
require "lrama/grammar/printer"
|
8
9
|
require "lrama/grammar/reference"
|
@@ -13,11 +14,9 @@ require "lrama/lexer"
|
|
13
14
|
require "lrama/type"
|
14
15
|
|
15
16
|
module Lrama
|
16
|
-
Token = Lrama::Lexer::Token
|
17
|
-
|
18
17
|
# Grammar is the result of parsing an input grammar file
|
19
18
|
class Grammar
|
20
|
-
attr_reader :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol, :aux
|
19
|
+
attr_reader :percent_codes, :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol, :aux
|
21
20
|
attr_accessor :union, :expect,
|
22
21
|
:printers, :error_tokens,
|
23
22
|
:lex_param, :parse_param, :initial_action,
|
@@ -26,6 +25,8 @@ module Lrama
|
|
26
25
|
:sym_to_rules
|
27
26
|
|
28
27
|
def initialize
|
28
|
+
# Code defined by "%code"
|
29
|
+
@percent_codes = []
|
29
30
|
@printers = []
|
30
31
|
@error_tokens = []
|
31
32
|
@symbols = []
|
@@ -43,6 +44,10 @@ module Lrama
|
|
43
44
|
append_special_symbols
|
44
45
|
end
|
45
46
|
|
47
|
+
def add_percent_code(id:, code:)
|
48
|
+
@percent_codes << PercentCode.new(id, code)
|
49
|
+
end
|
50
|
+
|
46
51
|
def add_printer(ident_or_tags:, code:, lineno:)
|
47
52
|
@printers << Printer.new(ident_or_tags: ident_or_tags, code: code, lineno: lineno)
|
48
53
|
end
|
@@ -122,16 +127,7 @@ module Lrama
|
|
122
127
|
@_rules << [lhs, rhs, lineno]
|
123
128
|
end
|
124
129
|
|
125
|
-
def build_references(token_code)
|
126
|
-
token_code.references.map! do |type, value, tag, first_column, last_column|
|
127
|
-
Reference.new(type: type, value: value, ex_tag: tag, first_column: first_column, last_column: last_column)
|
128
|
-
end
|
129
|
-
|
130
|
-
token_code
|
131
|
-
end
|
132
|
-
|
133
130
|
def build_code(type, token_code)
|
134
|
-
build_references(token_code)
|
135
131
|
Code.new(type: type, token_code: token_code)
|
136
132
|
end
|
137
133
|
|
@@ -152,6 +148,7 @@ module Lrama
|
|
152
148
|
end
|
153
149
|
|
154
150
|
def prepare
|
151
|
+
extract_references
|
155
152
|
normalize_rules
|
156
153
|
collect_symbols
|
157
154
|
replace_token_with_symbol
|
@@ -314,31 +311,33 @@ module Lrama
|
|
314
311
|
# $ references
|
315
312
|
# It need to wrap an identifier with brackets to use ".-" for identifiers
|
316
313
|
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
|
317
|
-
tag = scanner[1] ? Lrama::Lexer::Token.new(
|
318
|
-
return
|
314
|
+
tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
|
315
|
+
return Reference.new(type: :dollar, value: "$", ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
|
319
316
|
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
|
320
|
-
tag = scanner[1] ? Lrama::Lexer::Token.new(
|
321
|
-
return
|
317
|
+
tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
|
318
|
+
return Reference.new(type: :dollar, value: Integer(scanner[2]), ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
|
322
319
|
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
|
323
|
-
tag = scanner[1] ? Lrama::Lexer::Token.new(
|
324
|
-
return
|
320
|
+
tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
|
321
|
+
return Reference.new(type: :dollar, value: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
|
325
322
|
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
|
326
|
-
tag = scanner[1] ? Lrama::Lexer::Token.new(
|
327
|
-
return
|
323
|
+
tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
|
324
|
+
return Reference.new(type: :dollar, value: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
|
328
325
|
|
329
326
|
# @ references
|
330
327
|
# It need to wrap an identifier with brackets to use ".-" for identifiers
|
331
328
|
when scanner.scan(/@\$/) # @$
|
332
|
-
return
|
329
|
+
return Reference.new(type: :at, value: "$", first_column: start, last_column: scanner.pos - 1)
|
333
330
|
when scanner.scan(/@(\d+)/) # @1
|
334
|
-
return
|
331
|
+
return Reference.new(type: :at, value: Integer(scanner[1]), first_column: start, last_column: scanner.pos - 1)
|
335
332
|
when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
|
336
|
-
return
|
333
|
+
return Reference.new(type: :at, value: scanner[1], first_column: start, last_column: scanner.pos - 1)
|
337
334
|
when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets)
|
338
|
-
return
|
335
|
+
return Reference.new(type: :at, value: scanner[1], first_column: start, last_column: scanner.pos - 1)
|
339
336
|
end
|
340
337
|
end
|
341
338
|
|
339
|
+
private
|
340
|
+
|
342
341
|
def extract_references
|
343
342
|
unless initial_action.nil?
|
344
343
|
scanner = StringScanner.new(initial_action.s_value)
|
@@ -353,7 +352,6 @@ module Lrama
|
|
353
352
|
end
|
354
353
|
|
355
354
|
initial_action.token_code.references = references
|
356
|
-
build_references(initial_action.token_code)
|
357
355
|
end
|
358
356
|
|
359
357
|
@printers.each do |printer|
|
@@ -369,7 +367,6 @@ module Lrama
|
|
369
367
|
end
|
370
368
|
|
371
369
|
printer.code.token_code.references = references
|
372
|
-
build_references(printer.code.token_code)
|
373
370
|
end
|
374
371
|
|
375
372
|
@error_tokens.each do |error_token|
|
@@ -385,12 +382,11 @@ module Lrama
|
|
385
382
|
end
|
386
383
|
|
387
384
|
error_token.code.token_code.references = references
|
388
|
-
build_references(error_token.code.token_code)
|
389
385
|
end
|
390
386
|
|
391
387
|
@_rules.each do |lhs, rhs, _|
|
392
388
|
rhs.each_with_index do |token, index|
|
393
|
-
next
|
389
|
+
next unless token.class == Lrama::Lexer::Token::UserCode
|
394
390
|
|
395
391
|
scanner = StringScanner.new(token.s_value)
|
396
392
|
references = []
|
@@ -407,14 +403,11 @@ module Lrama
|
|
407
403
|
end
|
408
404
|
|
409
405
|
token.references = references
|
410
|
-
|
411
|
-
build_references(token)
|
406
|
+
numberize_references(lhs, rhs, token.references)
|
412
407
|
end
|
413
408
|
end
|
414
409
|
end
|
415
410
|
|
416
|
-
private
|
417
|
-
|
418
411
|
def find_nterm_by_id!(id)
|
419
412
|
nterms.find do |nterm|
|
420
413
|
nterm.id == id
|
@@ -428,29 +421,54 @@ module Lrama
|
|
428
421
|
# @empty_symbol = term
|
429
422
|
|
430
423
|
# YYEOF
|
431
|
-
term = add_term(id: Token.new(
|
424
|
+
term = add_term(id: Lrama::Lexer::Token::Ident.new(s_value: "YYEOF"), alias_name: "\"end of file\"", token_id: 0)
|
432
425
|
term.number = 0
|
433
426
|
term.eof_symbol = true
|
434
427
|
@eof_symbol = term
|
435
428
|
|
436
429
|
# YYerror
|
437
|
-
term = add_term(id: Token.new(
|
430
|
+
term = add_term(id: Lrama::Lexer::Token::Ident.new(s_value: "YYerror"), alias_name: "error")
|
438
431
|
term.number = 1
|
439
432
|
term.error_symbol = true
|
440
433
|
@error_symbol = term
|
441
434
|
|
442
435
|
# YYUNDEF
|
443
|
-
term = add_term(id: Token.new(
|
436
|
+
term = add_term(id: Lrama::Lexer::Token::Ident.new(s_value: "YYUNDEF"), alias_name: "\"invalid token\"")
|
444
437
|
term.number = 2
|
445
438
|
term.undef_symbol = true
|
446
439
|
@undef_symbol = term
|
447
440
|
|
448
441
|
# $accept
|
449
|
-
term = add_nterm(id: Token.new(
|
442
|
+
term = add_nterm(id: Lrama::Lexer::Token::Ident.new(s_value: "$accept"))
|
450
443
|
term.accept_symbol = true
|
451
444
|
@accept_symbol = term
|
452
445
|
end
|
453
446
|
|
447
|
+
def numberize_references(lhs, rhs, references)
|
448
|
+
references.map! {|ref|
|
449
|
+
ref_name = ref.value
|
450
|
+
if ref_name.is_a?(::String) && ref_name != '$'
|
451
|
+
value =
|
452
|
+
if lhs.referred_by?(ref_name)
|
453
|
+
'$'
|
454
|
+
else
|
455
|
+
index = rhs.find_index {|token| token.referred_by?(ref_name) }
|
456
|
+
|
457
|
+
if index
|
458
|
+
index + 1
|
459
|
+
else
|
460
|
+
raise "'#{ref_name}' is invalid name."
|
461
|
+
end
|
462
|
+
end
|
463
|
+
|
464
|
+
ref.value = value
|
465
|
+
ref
|
466
|
+
else
|
467
|
+
ref
|
468
|
+
end
|
469
|
+
}
|
470
|
+
end
|
471
|
+
|
454
472
|
# 1. Add $accept rule to the top of rules
|
455
473
|
# 2. Extract precedence and last action
|
456
474
|
# 3. Extract action in the middle of RHS into new Empty rule
|
@@ -493,7 +511,7 @@ module Lrama
|
|
493
511
|
case
|
494
512
|
when r.is_a?(Symbol) # precedence_sym
|
495
513
|
precedence_sym = r
|
496
|
-
when
|
514
|
+
when r.is_a?(Lrama::Lexer::Token::UserCode) && precedence_sym.nil? && code.nil? && rhs1.empty?
|
497
515
|
code = r
|
498
516
|
else
|
499
517
|
rhs1 << r
|
@@ -503,7 +521,7 @@ module Lrama
|
|
503
521
|
|
504
522
|
# Bison n'th component is 1-origin
|
505
523
|
(rhs1 + [code]).compact.each.with_index(1) do |token, i|
|
506
|
-
if token.
|
524
|
+
if token.is_a?(Lrama::Lexer::Token::UserCode)
|
507
525
|
token.references.each do |ref|
|
508
526
|
# Need to keep position_in_rhs for actions in the middle of RHS
|
509
527
|
ref.position_in_rhs = i - 1
|
@@ -532,9 +550,9 @@ module Lrama
|
|
532
550
|
end
|
533
551
|
|
534
552
|
rhs2 = rhs1.map do |token|
|
535
|
-
if token.
|
553
|
+
if token.is_a?(Lrama::Lexer::Token::UserCode)
|
536
554
|
prefix = token.referred ? "@" : "$@"
|
537
|
-
new_token = Token.new(
|
555
|
+
new_token = Lrama::Lexer::Token::Ident.new(s_value: prefix + extracted_action_number.to_s)
|
538
556
|
extracted_action_number += 1
|
539
557
|
a << [new_token, token]
|
540
558
|
new_token
|
@@ -550,8 +568,12 @@ module Lrama
|
|
550
568
|
end
|
551
569
|
|
552
570
|
c = code ? Code.new(type: :user_code, token_code: code) : nil
|
553
|
-
|
554
|
-
|
571
|
+
# Expand Parameterizing rules
|
572
|
+
if rhs2.any? {|r| r.is_a?(Lrama::Lexer::Token::Parameterizing) }
|
573
|
+
expand_parameterizing_rules(lhs, rhs2, c, precedence_sym, lineno)
|
574
|
+
else
|
575
|
+
@rules << Rule.new(id: @rules.count, lhs: lhs, rhs: rhs2, code: c, precedence_sym: precedence_sym, lineno: lineno)
|
576
|
+
end
|
555
577
|
add_nterm(id: lhs)
|
556
578
|
a.each do |new_token, _|
|
557
579
|
add_nterm(id: new_token)
|
@@ -559,14 +581,37 @@ module Lrama
|
|
559
581
|
end
|
560
582
|
end
|
561
583
|
|
584
|
+
def expand_parameterizing_rules(lhs, rhs, code, precedence_sym, lineno)
|
585
|
+
token = Lrama::Lexer::Token::Ident.new(s_value: rhs[0].s_value)
|
586
|
+
if rhs.any? {|r| r.is_a?(Lrama::Lexer::Token::Parameterizing) && r.option? }
|
587
|
+
option_token = Lrama::Lexer::Token::Ident.new(s_value: "option_#{rhs[0].s_value}")
|
588
|
+
add_term(id: option_token)
|
589
|
+
@rules << Rule.new(id: @rules.count, lhs: lhs, rhs: [option_token], code: code, precedence_sym: precedence_sym, lineno: lineno)
|
590
|
+
@rules << Rule.new(id: @rules.count, lhs: option_token, rhs: [], code: code, precedence_sym: precedence_sym, lineno: lineno)
|
591
|
+
@rules << Rule.new(id: @rules.count, lhs: option_token, rhs: [token], code: code, precedence_sym: precedence_sym, lineno: lineno)
|
592
|
+
elsif rhs.any? {|r| r.is_a?(Lrama::Lexer::Token::Parameterizing) && r.nonempty_list? }
|
593
|
+
nonempty_list_token = Lrama::Lexer::Token::Ident.new(s_value: "nonempty_list_#{rhs[0].s_value}")
|
594
|
+
add_term(id: nonempty_list_token)
|
595
|
+
@rules << Rule.new(id: @rules.count, lhs: lhs, rhs: [nonempty_list_token], code: code, precedence_sym: precedence_sym, lineno: lineno)
|
596
|
+
@rules << Rule.new(id: @rules.count, lhs: nonempty_list_token, rhs: [token], code: code, precedence_sym: precedence_sym, lineno: lineno)
|
597
|
+
@rules << Rule.new(id: @rules.count, lhs: nonempty_list_token, rhs: [nonempty_list_token, token], code: code, precedence_sym: precedence_sym, lineno: lineno)
|
598
|
+
elsif rhs.any? {|r| r.is_a?(Lrama::Lexer::Token::Parameterizing) && r.list? }
|
599
|
+
list_token = Lrama::Lexer::Token::Ident.new(s_value: "list_#{rhs[0].s_value}")
|
600
|
+
add_term(id: list_token)
|
601
|
+
@rules << Rule.new(id: @rules.count, lhs: lhs, rhs: [list_token], code: code, precedence_sym: precedence_sym, lineno: lineno)
|
602
|
+
@rules << Rule.new(id: @rules.count, lhs: list_token, rhs: [], code: code, precedence_sym: precedence_sym, lineno: lineno)
|
603
|
+
@rules << Rule.new(id: @rules.count, lhs: list_token, rhs: [list_token, token], code: code, precedence_sym: precedence_sym, lineno: lineno)
|
604
|
+
end
|
605
|
+
end
|
606
|
+
|
562
607
|
# Collect symbols from rules
|
563
608
|
def collect_symbols
|
564
609
|
@rules.flat_map(&:rhs).each do |s|
|
565
610
|
case s
|
566
|
-
when Token
|
567
|
-
|
568
|
-
|
569
|
-
|
611
|
+
when Lrama::Lexer::Token::Char
|
612
|
+
add_term(id: s)
|
613
|
+
when Lrama::Lexer::Token
|
614
|
+
# skip
|
570
615
|
when Symbol
|
571
616
|
# skip
|
572
617
|
else
|
@@ -607,7 +652,7 @@ module Lrama
|
|
607
652
|
|
608
653
|
# If id is Token::Char, it uses ASCII code
|
609
654
|
if sym.term? && sym.token_id.nil?
|
610
|
-
if sym.id.
|
655
|
+
if sym.id.is_a?(Lrama::Lexer::Token::Char)
|
611
656
|
# Ignore ' on the both sides
|
612
657
|
case sym.id.s_value[1..-2]
|
613
658
|
when "\\b"
|
@@ -660,7 +705,7 @@ module Lrama
|
|
660
705
|
rule.code.references.each do |ref|
|
661
706
|
next if ref.type == :at
|
662
707
|
|
663
|
-
if ref.referring_symbol.
|
708
|
+
if !ref.referring_symbol.is_a?(Lrama::Lexer::Token::UserCode)
|
664
709
|
ref.referring_symbol = token_to_symbol(ref.referring_symbol)
|
665
710
|
end
|
666
711
|
end
|
@@ -670,7 +715,7 @@ module Lrama
|
|
670
715
|
|
671
716
|
def token_to_symbol(token)
|
672
717
|
case token
|
673
|
-
when Token
|
718
|
+
when Lrama::Lexer::Token
|
674
719
|
find_symbol_by_id!(token)
|
675
720
|
when Symbol
|
676
721
|
token
|
@@ -716,10 +761,10 @@ module Lrama
|
|
716
761
|
@symbols.each do |sym|
|
717
762
|
@printers.each do |printer|
|
718
763
|
printer.ident_or_tags.each do |ident_or_tag|
|
719
|
-
case ident_or_tag
|
720
|
-
when Token::Ident
|
764
|
+
case ident_or_tag
|
765
|
+
when Lrama::Lexer::Token::Ident
|
721
766
|
sym.printer = printer if sym.id == ident_or_tag
|
722
|
-
when Token::Tag
|
767
|
+
when Lrama::Lexer::Token::Tag
|
723
768
|
sym.printer = printer if sym.tag == ident_or_tag
|
724
769
|
else
|
725
770
|
raise "Unknown token type. #{printer}"
|
@@ -733,10 +778,10 @@ module Lrama
|
|
733
778
|
@symbols.each do |sym|
|
734
779
|
@error_tokens.each do |error_token|
|
735
780
|
error_token.ident_or_tags.each do |ident_or_tag|
|
736
|
-
case ident_or_tag
|
737
|
-
when Token::Ident
|
781
|
+
case ident_or_tag
|
782
|
+
when Lrama::Lexer::Token::Ident
|
738
783
|
sym.error_token = error_token if sym.id == ident_or_tag
|
739
|
-
when Token::Tag
|
784
|
+
when Lrama::Lexer::Token::Tag
|
740
785
|
sym.error_token = error_token if sym.tag == ident_or_tag
|
741
786
|
else
|
742
787
|
raise "Unknown token type. #{error_token}"
|
data/lib/lrama/lexer/token.rb
CHANGED
@@ -1,84 +1,26 @@
|
|
1
|
-
require 'lrama/lexer/token/type'
|
2
|
-
|
3
1
|
module Lrama
|
4
2
|
class Lexer
|
5
|
-
class Token
|
3
|
+
class Token < Struct.new(:s_value, :alias_name, keyword_init: true)
|
6
4
|
|
7
5
|
attr_accessor :line, :column, :referred
|
8
|
-
# For User_code
|
9
|
-
attr_accessor :references
|
10
6
|
|
11
7
|
def to_s
|
12
8
|
"#{super} line: #{line}, column: #{column}"
|
13
9
|
end
|
14
10
|
|
15
11
|
def referred_by?(string)
|
16
|
-
[self.s_value, self.
|
12
|
+
[self.s_value, self.alias_name].include?(string)
|
17
13
|
end
|
18
14
|
|
19
15
|
def ==(other)
|
20
|
-
self.class == other.class && self.
|
16
|
+
self.class == other.class && self.s_value == other.s_value
|
21
17
|
end
|
22
|
-
|
23
|
-
def numberize_references(lhs, rhs)
|
24
|
-
self.references.map! {|ref|
|
25
|
-
ref_name = ref[1]
|
26
|
-
if ref_name.is_a?(::String) && ref_name != '$'
|
27
|
-
value =
|
28
|
-
if lhs.referred_by?(ref_name)
|
29
|
-
'$'
|
30
|
-
else
|
31
|
-
index = rhs.find_index {|token| token.referred_by?(ref_name) }
|
32
|
-
|
33
|
-
if index
|
34
|
-
index + 1
|
35
|
-
else
|
36
|
-
raise "'#{ref_name}' is invalid name."
|
37
|
-
end
|
38
|
-
end
|
39
|
-
[ref[0], value, ref[2], ref[3], ref[4]]
|
40
|
-
else
|
41
|
-
ref
|
42
|
-
end
|
43
|
-
}
|
44
|
-
end
|
45
|
-
|
46
|
-
@i = 0
|
47
|
-
@types = []
|
48
|
-
|
49
|
-
def self.define_type(name)
|
50
|
-
type = Type.new(id: @i, name: name.to_s)
|
51
|
-
const_set(name, type)
|
52
|
-
@types << type
|
53
|
-
@i += 1
|
54
|
-
end
|
55
|
-
|
56
|
-
# Token types
|
57
|
-
define_type(:P_expect) # %expect
|
58
|
-
define_type(:P_define) # %define
|
59
|
-
define_type(:P_printer) # %printer
|
60
|
-
define_type(:P_error_token) # %error-token
|
61
|
-
define_type(:P_lex_param) # %lex-param
|
62
|
-
define_type(:P_parse_param) # %parse-param
|
63
|
-
define_type(:P_initial_action) # %initial-action
|
64
|
-
define_type(:P_union) # %union
|
65
|
-
define_type(:P_token) # %token
|
66
|
-
define_type(:P_type) # %type
|
67
|
-
define_type(:P_nonassoc) # %nonassoc
|
68
|
-
define_type(:P_left) # %left
|
69
|
-
define_type(:P_right) # %right
|
70
|
-
define_type(:P_precedence) # %precedence
|
71
|
-
define_type(:P_prec) # %prec
|
72
|
-
define_type(:User_code) # { ... }
|
73
|
-
define_type(:Tag) # <int>
|
74
|
-
define_type(:Number) # 0
|
75
|
-
define_type(:Ident_Colon) # k_if:, k_if : (spaces can be there)
|
76
|
-
define_type(:Ident) # api.pure, tNUMBER
|
77
|
-
define_type(:Named_Ref) # [foo]
|
78
|
-
define_type(:Semicolon) # ;
|
79
|
-
define_type(:Bar) # |
|
80
|
-
define_type(:String) # "str"
|
81
|
-
define_type(:Char) # '+'
|
82
18
|
end
|
83
19
|
end
|
84
20
|
end
|
21
|
+
|
22
|
+
require 'lrama/lexer/token/char'
|
23
|
+
require 'lrama/lexer/token/ident'
|
24
|
+
require 'lrama/lexer/token/parameterizing'
|
25
|
+
require 'lrama/lexer/token/tag'
|
26
|
+
require 'lrama/lexer/token/user_code'
|