lrama 0.5.7 → 0.5.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/test.yaml +1 -1
- data/Gemfile +1 -1
- data/README.md +3 -3
- data/Rakefile +3 -8
- data/Steepfile +9 -1
- data/lib/lrama/command.rb +6 -1
- data/lib/lrama/grammar/percent_code.rb +12 -0
- data/lib/lrama/grammar/symbol.rb +2 -2
- data/lib/lrama/grammar.rb +127 -159
- data/lib/lrama/lexer/token/char.rb +8 -0
- data/lib/lrama/lexer/token/ident.rb +8 -0
- data/lib/lrama/lexer/token/parameterizing.rb +19 -0
- data/lib/lrama/lexer/token/tag.rb +8 -0
- data/lib/lrama/lexer/token/user_code.rb +14 -0
- data/lib/lrama/lexer/token.rb +9 -67
- data/lib/lrama/lexer.rb +14 -15
- data/lib/lrama/option_parser.rb +3 -3
- data/lib/lrama/options.rb +2 -1
- data/lib/lrama/output.rb +9 -0
- data/lib/lrama/parser.rb +540 -493
- data/lib/lrama/version.rb +1 -1
- data/parser.y +101 -71
- data/rbs_collection.lock.yaml +13 -1
- data/sample/calc.y +3 -1
- data/sample/parse.y +5 -1
- data/sig/lrama/grammar/percent_code.rbs +10 -0
- data/sig/lrama/grammar/reference.rbs +22 -0
- data/sig/lrama/grammar.rbs +5 -0
- data/sig/lrama/lexer/token/char.rbs +8 -0
- data/sig/lrama/lexer/token/ident.rbs +8 -0
- data/sig/lrama/lexer/token/parameterizing.rbs +8 -0
- data/sig/lrama/lexer/token/tag.rbs +8 -0
- data/sig/lrama/lexer/token/user_code.rbs +9 -0
- data/sig/lrama/lexer/token.rbs +17 -0
- data/template/bison/_yacc.h +2 -2
- data/template/bison/yacc.c +0 -2
- metadata +17 -4
- data/lib/lrama/lexer/token/type.rb +0 -8
- data/sig/lrama/lexer/token/type.rbs +0 -17
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5369d73367ea2fee299dbb73336a7801c98faa51bb3f68e66327c12f5a2d6716
|
4
|
+
data.tar.gz: b79943902e78f921d3361800cd1c371cebc594f527e940b83be3d26fcab190a9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 93fd0bb99180b74f7a98b1186273b9324ac8855f599add59a4eb86b7b38b7930975c32463568ae93ff5d829d7c605dbd8a264c8127e40d92f7f7b0bd46637b0a
|
7
|
+
data.tar.gz: 06a43beddb6b78b4a1866164c63ebff7309c134b74dc32b5272e181f88ba4b2ff2ea30d756a16016e194e6c0babf0e6ee1536984e90835798eaa57be8eb09c9f
|
data/.github/workflows/test.yaml
CHANGED
@@ -57,7 +57,7 @@ jobs:
|
|
57
57
|
|
58
58
|
- name: Check for parser.rb is up to date
|
59
59
|
run: |
|
60
|
-
bundle exec rake build:
|
60
|
+
bundle exec rake build:parser
|
61
61
|
git diff --color --no-ext-diff --ignore-submodules --exit-code lib/lrama/parser.rb
|
62
62
|
steep-check:
|
63
63
|
runs-on: ubuntu-20.04
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -65,13 +65,13 @@ This also requires Lrama to be able to run with only default gems because BASERU
|
|
65
65
|
|
66
66
|
## Development
|
67
67
|
|
68
|
-
### How to generate
|
68
|
+
### How to generate parser.rb
|
69
69
|
|
70
70
|
```shell
|
71
|
-
$ rake build:
|
71
|
+
$ rake build:parser
|
72
72
|
```
|
73
73
|
|
74
|
-
`
|
74
|
+
`parser.rb` is generated from `parser.y` by Racc.
|
75
75
|
Run the rake command when you update `parser.y` then commit changes of both files.
|
76
76
|
|
77
77
|
### Test
|
data/Rakefile
CHANGED
@@ -1,13 +1,8 @@
|
|
1
1
|
require "bundler/gem_tasks"
|
2
2
|
|
3
3
|
namespace "build" do
|
4
|
-
desc "build parser from parser.y
|
5
|
-
task :
|
6
|
-
|
7
|
-
end
|
8
|
-
|
9
|
-
desc "build parser for debugging"
|
10
|
-
task :racc_verbose_parser do
|
11
|
-
`bundle exec racc parser.y --embedded -o lib/lrama/parser.rb -t --log-file=parser.output`
|
4
|
+
desc "build parser from parser.y"
|
5
|
+
task :parser do
|
6
|
+
sh "bundle exec racc parser.y --embedded -o lib/lrama/parser.rb -t --log-file=parser.output"
|
12
7
|
end
|
13
8
|
end
|
data/Steepfile
CHANGED
@@ -6,8 +6,16 @@ target :lib do
|
|
6
6
|
|
7
7
|
check "lib/lrama/bitmap.rb"
|
8
8
|
check "lib/lrama/digraph.rb"
|
9
|
+
check "lib/lrama/grammar/percent_code.rb"
|
10
|
+
# TODO: Include this file once Lrama::Grammar::Symbol type is defined
|
11
|
+
# check "lib/lrama/grammar/reference.rb"
|
12
|
+
check "lib/lrama/lexer/token.rb"
|
13
|
+
check "lib/lrama/lexer/token/char.rb"
|
14
|
+
check "lib/lrama/lexer/token/ident.rb"
|
15
|
+
check "lib/lrama/lexer/token/parameterizing.rb"
|
16
|
+
check "lib/lrama/lexer/token/tag.rb"
|
17
|
+
check "lib/lrama/lexer/token/user_code.rb"
|
9
18
|
check "lib/lrama/report/duration.rb"
|
10
19
|
check "lib/lrama/report/profile.rb"
|
11
|
-
check "lib/lrama/token/type.rb"
|
12
20
|
check "lib/lrama/warning.rb"
|
13
21
|
end
|
data/lib/lrama/command.rb
CHANGED
@@ -8,7 +8,7 @@ module Lrama
|
|
8
8
|
warning = Lrama::Warning.new
|
9
9
|
text = options.y.read
|
10
10
|
options.y.close if options.y != STDIN
|
11
|
-
grammar = Lrama::Parser.new(text).parse
|
11
|
+
grammar = Lrama::Parser.new(text, options.grammar_file, options.debug).parse
|
12
12
|
states = Lrama::States.new(grammar, warning, trace_state: (options.trace_opts[:automaton] || options.trace_opts[:closure]))
|
13
13
|
states.compute
|
14
14
|
context = Lrama::Context.new(states)
|
@@ -20,6 +20,11 @@ module Lrama
|
|
20
20
|
end
|
21
21
|
end
|
22
22
|
|
23
|
+
if options.trace_opts && options.trace_opts[:rules]
|
24
|
+
puts "Grammar rules:"
|
25
|
+
puts grammar.rules
|
26
|
+
end
|
27
|
+
|
23
28
|
File.open(options.outfile, "w+") do |f|
|
24
29
|
Lrama::Output.new(
|
25
30
|
out: f,
|
data/lib/lrama/grammar/symbol.rb
CHANGED
@@ -47,9 +47,9 @@ module Lrama
|
|
47
47
|
name = "YYACCEPT"
|
48
48
|
when eof_symbol?
|
49
49
|
name = "YYEOF"
|
50
|
-
when term? && id.
|
50
|
+
when term? && id.is_a?(Lrama::Lexer::Token::Char)
|
51
51
|
name = number.to_s + display_name
|
52
|
-
when term? && id.
|
52
|
+
when term? && id.is_a?(Lrama::Lexer::Token::Ident)
|
53
53
|
name = id.s_value
|
54
54
|
when nterm? && (id.s_value.include?("$") || id.s_value.include?("@"))
|
55
55
|
name = number.to_s + id.s_value
|
data/lib/lrama/grammar.rb
CHANGED
@@ -3,6 +3,7 @@ require "strscan"
|
|
3
3
|
require "lrama/grammar/auxiliary"
|
4
4
|
require "lrama/grammar/code"
|
5
5
|
require "lrama/grammar/error_token"
|
6
|
+
require "lrama/grammar/percent_code"
|
6
7
|
require "lrama/grammar/precedence"
|
7
8
|
require "lrama/grammar/printer"
|
8
9
|
require "lrama/grammar/reference"
|
@@ -13,11 +14,9 @@ require "lrama/lexer"
|
|
13
14
|
require "lrama/type"
|
14
15
|
|
15
16
|
module Lrama
|
16
|
-
Token = Lrama::Lexer::Token
|
17
|
-
|
18
17
|
# Grammar is the result of parsing an input grammar file
|
19
18
|
class Grammar
|
20
|
-
attr_reader :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol, :aux
|
19
|
+
attr_reader :percent_codes, :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol, :aux
|
21
20
|
attr_accessor :union, :expect,
|
22
21
|
:printers, :error_tokens,
|
23
22
|
:lex_param, :parse_param, :initial_action,
|
@@ -26,6 +25,8 @@ module Lrama
|
|
26
25
|
:sym_to_rules
|
27
26
|
|
28
27
|
def initialize
|
28
|
+
# Code defined by "%code"
|
29
|
+
@percent_codes = []
|
29
30
|
@printers = []
|
30
31
|
@error_tokens = []
|
31
32
|
@symbols = []
|
@@ -43,6 +44,10 @@ module Lrama
|
|
43
44
|
append_special_symbols
|
44
45
|
end
|
45
46
|
|
47
|
+
def add_percent_code(id:, code:)
|
48
|
+
@percent_codes << PercentCode.new(id, code)
|
49
|
+
end
|
50
|
+
|
46
51
|
def add_printer(ident_or_tags:, code:, lineno:)
|
47
52
|
@printers << Printer.new(ident_or_tags: ident_or_tags, code: code, lineno: lineno)
|
48
53
|
end
|
@@ -122,16 +127,7 @@ module Lrama
|
|
122
127
|
@_rules << [lhs, rhs, lineno]
|
123
128
|
end
|
124
129
|
|
125
|
-
def build_references(token_code)
|
126
|
-
token_code.references.map! do |type, value, tag, first_column, last_column|
|
127
|
-
Reference.new(type: type, value: value, ex_tag: tag, first_column: first_column, last_column: last_column)
|
128
|
-
end
|
129
|
-
|
130
|
-
token_code
|
131
|
-
end
|
132
|
-
|
133
130
|
def build_code(type, token_code)
|
134
|
-
build_references(token_code)
|
135
131
|
Code.new(type: type, token_code: token_code)
|
136
132
|
end
|
137
133
|
|
@@ -152,6 +148,7 @@ module Lrama
|
|
152
148
|
end
|
153
149
|
|
154
150
|
def prepare
|
151
|
+
extract_references
|
155
152
|
normalize_rules
|
156
153
|
collect_symbols
|
157
154
|
replace_token_with_symbol
|
@@ -308,46 +305,53 @@ module Lrama
|
|
308
305
|
@nterms ||= @symbols.select(&:nterm?)
|
309
306
|
end
|
310
307
|
|
308
|
+
def scan_reference(scanner)
|
309
|
+
start = scanner.pos
|
310
|
+
case
|
311
|
+
# $ references
|
312
|
+
# It need to wrap an identifier with brackets to use ".-" for identifiers
|
313
|
+
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
|
314
|
+
tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
|
315
|
+
return Reference.new(type: :dollar, value: "$", ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
|
316
|
+
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
|
317
|
+
tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
|
318
|
+
return Reference.new(type: :dollar, value: Integer(scanner[2]), ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
|
319
|
+
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
|
320
|
+
tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
|
321
|
+
return Reference.new(type: :dollar, value: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
|
322
|
+
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
|
323
|
+
tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
|
324
|
+
return Reference.new(type: :dollar, value: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
|
325
|
+
|
326
|
+
# @ references
|
327
|
+
# It need to wrap an identifier with brackets to use ".-" for identifiers
|
328
|
+
when scanner.scan(/@\$/) # @$
|
329
|
+
return Reference.new(type: :at, value: "$", first_column: start, last_column: scanner.pos - 1)
|
330
|
+
when scanner.scan(/@(\d+)/) # @1
|
331
|
+
return Reference.new(type: :at, value: Integer(scanner[1]), first_column: start, last_column: scanner.pos - 1)
|
332
|
+
when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
|
333
|
+
return Reference.new(type: :at, value: scanner[1], first_column: start, last_column: scanner.pos - 1)
|
334
|
+
when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets)
|
335
|
+
return Reference.new(type: :at, value: scanner[1], first_column: start, last_column: scanner.pos - 1)
|
336
|
+
end
|
337
|
+
end
|
338
|
+
|
339
|
+
private
|
340
|
+
|
311
341
|
def extract_references
|
312
342
|
unless initial_action.nil?
|
313
343
|
scanner = StringScanner.new(initial_action.s_value)
|
314
344
|
references = []
|
315
345
|
|
316
346
|
while !scanner.eos? do
|
317
|
-
|
318
|
-
|
319
|
-
# $ references
|
320
|
-
# It need to wrap an identifier with brackets to use ".-" for identifiers
|
321
|
-
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
|
322
|
-
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
|
323
|
-
references << [:dollar, "$", tag, start, scanner.pos - 1]
|
324
|
-
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
|
325
|
-
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
|
326
|
-
references << [:dollar, Integer(scanner[2]), tag, start, scanner.pos - 1]
|
327
|
-
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
|
328
|
-
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
|
329
|
-
references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
|
330
|
-
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
|
331
|
-
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
|
332
|
-
references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
|
333
|
-
|
334
|
-
# @ references
|
335
|
-
# It need to wrap an identifier with brackets to use ".-" for identifiers
|
336
|
-
when scanner.scan(/@\$/) # @$
|
337
|
-
references << [:at, "$", nil, start, scanner.pos - 1]
|
338
|
-
when scanner.scan(/@(\d+)/) # @1
|
339
|
-
references << [:at, Integer(scanner[1]), nil, start, scanner.pos - 1]
|
340
|
-
when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
|
341
|
-
references << [:at, scanner[1], nil, start, scanner.pos - 1]
|
342
|
-
when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets)
|
343
|
-
references << [:at, scanner[1], nil, start, scanner.pos - 1]
|
347
|
+
if reference = scan_reference(scanner)
|
348
|
+
references << reference
|
344
349
|
else
|
345
350
|
scanner.getch
|
346
351
|
end
|
347
352
|
end
|
348
353
|
|
349
354
|
initial_action.token_code.references = references
|
350
|
-
build_references(initial_action.token_code)
|
351
355
|
end
|
352
356
|
|
353
357
|
@printers.each do |printer|
|
@@ -355,40 +359,14 @@ module Lrama
|
|
355
359
|
references = []
|
356
360
|
|
357
361
|
while !scanner.eos? do
|
358
|
-
|
359
|
-
|
360
|
-
# $ references
|
361
|
-
# It need to wrap an identifier with brackets to use ".-" for identifiers
|
362
|
-
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
|
363
|
-
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
|
364
|
-
references << [:dollar, "$", tag, start, scanner.pos - 1]
|
365
|
-
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
|
366
|
-
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
|
367
|
-
references << [:dollar, Integer(scanner[2]), tag, start, scanner.pos - 1]
|
368
|
-
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
|
369
|
-
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
|
370
|
-
references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
|
371
|
-
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
|
372
|
-
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
|
373
|
-
references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
|
374
|
-
|
375
|
-
# @ references
|
376
|
-
# It need to wrap an identifier with brackets to use ".-" for identifiers
|
377
|
-
when scanner.scan(/@\$/) # @$
|
378
|
-
references << [:at, "$", nil, start, scanner.pos - 1]
|
379
|
-
when scanner.scan(/@(\d+)/) # @1
|
380
|
-
references << [:at, Integer(scanner[1]), nil, start, scanner.pos - 1]
|
381
|
-
when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
|
382
|
-
references << [:at, scanner[1], nil, start, scanner.pos - 1]
|
383
|
-
when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets)
|
384
|
-
references << [:at, scanner[1], nil, start, scanner.pos - 1]
|
362
|
+
if reference = scan_reference(scanner)
|
363
|
+
references << reference
|
385
364
|
else
|
386
365
|
scanner.getch
|
387
366
|
end
|
388
367
|
end
|
389
368
|
|
390
369
|
printer.code.token_code.references = references
|
391
|
-
build_references(printer.code.token_code)
|
392
370
|
end
|
393
371
|
|
394
372
|
@error_tokens.each do |error_token|
|
@@ -396,78 +374,27 @@ module Lrama
|
|
396
374
|
references = []
|
397
375
|
|
398
376
|
while !scanner.eos? do
|
399
|
-
|
400
|
-
|
401
|
-
# $ references
|
402
|
-
# It need to wrap an identifier with brackets to use ".-" for identifiers
|
403
|
-
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
|
404
|
-
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
|
405
|
-
references << [:dollar, "$", tag, start, scanner.pos - 1]
|
406
|
-
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
|
407
|
-
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
|
408
|
-
references << [:dollar, Integer(scanner[2]), tag, start, scanner.pos - 1]
|
409
|
-
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
|
410
|
-
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
|
411
|
-
references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
|
412
|
-
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
|
413
|
-
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
|
414
|
-
references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
|
415
|
-
|
416
|
-
# @ references
|
417
|
-
# It need to wrap an identifier with brackets to use ".-" for identifiers
|
418
|
-
when scanner.scan(/@\$/) # @$
|
419
|
-
references << [:at, "$", nil, start, scanner.pos - 1]
|
420
|
-
when scanner.scan(/@(\d+)/) # @1
|
421
|
-
references << [:at, Integer(scanner[1]), nil, start, scanner.pos - 1]
|
422
|
-
when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
|
423
|
-
references << [:at, scanner[1], nil, start, scanner.pos - 1]
|
424
|
-
when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets)
|
425
|
-
references << [:at, scanner[1], nil, start, scanner.pos - 1]
|
377
|
+
if reference = scan_reference(scanner)
|
378
|
+
references << reference
|
426
379
|
else
|
427
380
|
scanner.getch
|
428
381
|
end
|
429
382
|
end
|
430
383
|
|
431
384
|
error_token.code.token_code.references = references
|
432
|
-
build_references(error_token.code.token_code)
|
433
385
|
end
|
434
386
|
|
435
387
|
@_rules.each do |lhs, rhs, _|
|
436
388
|
rhs.each_with_index do |token, index|
|
437
|
-
next
|
389
|
+
next unless token.class == Lrama::Lexer::Token::UserCode
|
438
390
|
|
439
391
|
scanner = StringScanner.new(token.s_value)
|
440
392
|
references = []
|
441
393
|
|
442
394
|
while !scanner.eos? do
|
443
|
-
start = scanner.pos
|
444
395
|
case
|
445
|
-
|
446
|
-
|
447
|
-
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
|
448
|
-
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
|
449
|
-
references << [:dollar, "$", tag, start, scanner.pos - 1]
|
450
|
-
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
|
451
|
-
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
|
452
|
-
references << [:dollar, Integer(scanner[2]), tag, start, scanner.pos - 1]
|
453
|
-
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
|
454
|
-
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
|
455
|
-
references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
|
456
|
-
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
|
457
|
-
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
|
458
|
-
references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
|
459
|
-
|
460
|
-
# @ references
|
461
|
-
# It need to wrap an identifier with brackets to use ".-" for identifiers
|
462
|
-
when scanner.scan(/@\$/) # @$
|
463
|
-
references << [:at, "$", nil, start, scanner.pos - 1]
|
464
|
-
when scanner.scan(/@(\d+)/) # @1
|
465
|
-
references << [:at, Integer(scanner[1]), nil, start, scanner.pos - 1]
|
466
|
-
when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
|
467
|
-
references << [:at, scanner[1], nil, start, scanner.pos - 1]
|
468
|
-
when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets)
|
469
|
-
references << [:at, scanner[1], nil, start, scanner.pos - 1]
|
470
|
-
|
396
|
+
when reference = scan_reference(scanner)
|
397
|
+
references << reference
|
471
398
|
when scanner.scan(/\/\*/)
|
472
399
|
scanner.scan_until(/\*\//)
|
473
400
|
else
|
@@ -476,22 +403,11 @@ module Lrama
|
|
476
403
|
end
|
477
404
|
|
478
405
|
token.references = references
|
479
|
-
|
480
|
-
build_references(token)
|
406
|
+
numberize_references(lhs, rhs, token.references)
|
481
407
|
end
|
482
408
|
end
|
483
409
|
end
|
484
410
|
|
485
|
-
def create_token(type, s_value, line, column)
|
486
|
-
t = Token.new(type: type, s_value: s_value)
|
487
|
-
t.line = line
|
488
|
-
t.column = column
|
489
|
-
|
490
|
-
return t
|
491
|
-
end
|
492
|
-
|
493
|
-
private
|
494
|
-
|
495
411
|
def find_nterm_by_id!(id)
|
496
412
|
nterms.find do |nterm|
|
497
413
|
nterm.id == id
|
@@ -505,29 +421,54 @@ module Lrama
|
|
505
421
|
# @empty_symbol = term
|
506
422
|
|
507
423
|
# YYEOF
|
508
|
-
term = add_term(id: Token.new(
|
424
|
+
term = add_term(id: Lrama::Lexer::Token::Ident.new(s_value: "YYEOF"), alias_name: "\"end of file\"", token_id: 0)
|
509
425
|
term.number = 0
|
510
426
|
term.eof_symbol = true
|
511
427
|
@eof_symbol = term
|
512
428
|
|
513
429
|
# YYerror
|
514
|
-
term = add_term(id: Token.new(
|
430
|
+
term = add_term(id: Lrama::Lexer::Token::Ident.new(s_value: "YYerror"), alias_name: "error")
|
515
431
|
term.number = 1
|
516
432
|
term.error_symbol = true
|
517
433
|
@error_symbol = term
|
518
434
|
|
519
435
|
# YYUNDEF
|
520
|
-
term = add_term(id: Token.new(
|
436
|
+
term = add_term(id: Lrama::Lexer::Token::Ident.new(s_value: "YYUNDEF"), alias_name: "\"invalid token\"")
|
521
437
|
term.number = 2
|
522
438
|
term.undef_symbol = true
|
523
439
|
@undef_symbol = term
|
524
440
|
|
525
441
|
# $accept
|
526
|
-
term = add_nterm(id: Token.new(
|
442
|
+
term = add_nterm(id: Lrama::Lexer::Token::Ident.new(s_value: "$accept"))
|
527
443
|
term.accept_symbol = true
|
528
444
|
@accept_symbol = term
|
529
445
|
end
|
530
446
|
|
447
|
+
def numberize_references(lhs, rhs, references)
|
448
|
+
references.map! {|ref|
|
449
|
+
ref_name = ref.value
|
450
|
+
if ref_name.is_a?(::String) && ref_name != '$'
|
451
|
+
value =
|
452
|
+
if lhs.referred_by?(ref_name)
|
453
|
+
'$'
|
454
|
+
else
|
455
|
+
index = rhs.find_index {|token| token.referred_by?(ref_name) }
|
456
|
+
|
457
|
+
if index
|
458
|
+
index + 1
|
459
|
+
else
|
460
|
+
raise "'#{ref_name}' is invalid name."
|
461
|
+
end
|
462
|
+
end
|
463
|
+
|
464
|
+
ref.value = value
|
465
|
+
ref
|
466
|
+
else
|
467
|
+
ref
|
468
|
+
end
|
469
|
+
}
|
470
|
+
end
|
471
|
+
|
531
472
|
# 1. Add $accept rule to the top of rules
|
532
473
|
# 2. Extract precedence and last action
|
533
474
|
# 3. Extract action in the middle of RHS into new Empty rule
|
@@ -570,7 +511,7 @@ module Lrama
|
|
570
511
|
case
|
571
512
|
when r.is_a?(Symbol) # precedence_sym
|
572
513
|
precedence_sym = r
|
573
|
-
when
|
514
|
+
when r.is_a?(Lrama::Lexer::Token::UserCode) && precedence_sym.nil? && code.nil? && rhs1.empty?
|
574
515
|
code = r
|
575
516
|
else
|
576
517
|
rhs1 << r
|
@@ -580,7 +521,7 @@ module Lrama
|
|
580
521
|
|
581
522
|
# Bison n'th component is 1-origin
|
582
523
|
(rhs1 + [code]).compact.each.with_index(1) do |token, i|
|
583
|
-
if token.
|
524
|
+
if token.is_a?(Lrama::Lexer::Token::UserCode)
|
584
525
|
token.references.each do |ref|
|
585
526
|
# Need to keep position_in_rhs for actions in the middle of RHS
|
586
527
|
ref.position_in_rhs = i - 1
|
@@ -609,9 +550,9 @@ module Lrama
|
|
609
550
|
end
|
610
551
|
|
611
552
|
rhs2 = rhs1.map do |token|
|
612
|
-
if token.
|
553
|
+
if token.is_a?(Lrama::Lexer::Token::UserCode)
|
613
554
|
prefix = token.referred ? "@" : "$@"
|
614
|
-
new_token = Token.new(
|
555
|
+
new_token = Lrama::Lexer::Token::Ident.new(s_value: prefix + extracted_action_number.to_s)
|
615
556
|
extracted_action_number += 1
|
616
557
|
a << [new_token, token]
|
617
558
|
new_token
|
@@ -627,8 +568,12 @@ module Lrama
|
|
627
568
|
end
|
628
569
|
|
629
570
|
c = code ? Code.new(type: :user_code, token_code: code) : nil
|
630
|
-
|
631
|
-
|
571
|
+
# Expand Parameterizing rules
|
572
|
+
if rhs2.any? {|r| r.is_a?(Lrama::Lexer::Token::Parameterizing) }
|
573
|
+
expand_parameterizing_rules(lhs, rhs2, c, precedence_sym, lineno)
|
574
|
+
else
|
575
|
+
@rules << Rule.new(id: @rules.count, lhs: lhs, rhs: rhs2, code: c, precedence_sym: precedence_sym, lineno: lineno)
|
576
|
+
end
|
632
577
|
add_nterm(id: lhs)
|
633
578
|
a.each do |new_token, _|
|
634
579
|
add_nterm(id: new_token)
|
@@ -636,14 +581,37 @@ module Lrama
|
|
636
581
|
end
|
637
582
|
end
|
638
583
|
|
584
|
+
def expand_parameterizing_rules(lhs, rhs, code, precedence_sym, lineno)
|
585
|
+
token = Lrama::Lexer::Token::Ident.new(s_value: rhs[0].s_value)
|
586
|
+
if rhs.any? {|r| r.is_a?(Lrama::Lexer::Token::Parameterizing) && r.option? }
|
587
|
+
option_token = Lrama::Lexer::Token::Ident.new(s_value: "option_#{rhs[0].s_value}")
|
588
|
+
add_term(id: option_token)
|
589
|
+
@rules << Rule.new(id: @rules.count, lhs: lhs, rhs: [option_token], code: code, precedence_sym: precedence_sym, lineno: lineno)
|
590
|
+
@rules << Rule.new(id: @rules.count, lhs: option_token, rhs: [], code: code, precedence_sym: precedence_sym, lineno: lineno)
|
591
|
+
@rules << Rule.new(id: @rules.count, lhs: option_token, rhs: [token], code: code, precedence_sym: precedence_sym, lineno: lineno)
|
592
|
+
elsif rhs.any? {|r| r.is_a?(Lrama::Lexer::Token::Parameterizing) && r.nonempty_list? }
|
593
|
+
nonempty_list_token = Lrama::Lexer::Token::Ident.new(s_value: "nonempty_list_#{rhs[0].s_value}")
|
594
|
+
add_term(id: nonempty_list_token)
|
595
|
+
@rules << Rule.new(id: @rules.count, lhs: lhs, rhs: [nonempty_list_token], code: code, precedence_sym: precedence_sym, lineno: lineno)
|
596
|
+
@rules << Rule.new(id: @rules.count, lhs: nonempty_list_token, rhs: [token], code: code, precedence_sym: precedence_sym, lineno: lineno)
|
597
|
+
@rules << Rule.new(id: @rules.count, lhs: nonempty_list_token, rhs: [nonempty_list_token, token], code: code, precedence_sym: precedence_sym, lineno: lineno)
|
598
|
+
elsif rhs.any? {|r| r.is_a?(Lrama::Lexer::Token::Parameterizing) && r.list? }
|
599
|
+
list_token = Lrama::Lexer::Token::Ident.new(s_value: "list_#{rhs[0].s_value}")
|
600
|
+
add_term(id: list_token)
|
601
|
+
@rules << Rule.new(id: @rules.count, lhs: lhs, rhs: [list_token], code: code, precedence_sym: precedence_sym, lineno: lineno)
|
602
|
+
@rules << Rule.new(id: @rules.count, lhs: list_token, rhs: [], code: code, precedence_sym: precedence_sym, lineno: lineno)
|
603
|
+
@rules << Rule.new(id: @rules.count, lhs: list_token, rhs: [list_token, token], code: code, precedence_sym: precedence_sym, lineno: lineno)
|
604
|
+
end
|
605
|
+
end
|
606
|
+
|
639
607
|
# Collect symbols from rules
|
640
608
|
def collect_symbols
|
641
609
|
@rules.flat_map(&:rhs).each do |s|
|
642
610
|
case s
|
643
|
-
when Token
|
644
|
-
|
645
|
-
|
646
|
-
|
611
|
+
when Lrama::Lexer::Token::Char
|
612
|
+
add_term(id: s)
|
613
|
+
when Lrama::Lexer::Token
|
614
|
+
# skip
|
647
615
|
when Symbol
|
648
616
|
# skip
|
649
617
|
else
|
@@ -684,7 +652,7 @@ module Lrama
|
|
684
652
|
|
685
653
|
# If id is Token::Char, it uses ASCII code
|
686
654
|
if sym.term? && sym.token_id.nil?
|
687
|
-
if sym.id.
|
655
|
+
if sym.id.is_a?(Lrama::Lexer::Token::Char)
|
688
656
|
# Ignore ' on the both sides
|
689
657
|
case sym.id.s_value[1..-2]
|
690
658
|
when "\\b"
|
@@ -737,7 +705,7 @@ module Lrama
|
|
737
705
|
rule.code.references.each do |ref|
|
738
706
|
next if ref.type == :at
|
739
707
|
|
740
|
-
if ref.referring_symbol.
|
708
|
+
if !ref.referring_symbol.is_a?(Lrama::Lexer::Token::UserCode)
|
741
709
|
ref.referring_symbol = token_to_symbol(ref.referring_symbol)
|
742
710
|
end
|
743
711
|
end
|
@@ -747,7 +715,7 @@ module Lrama
|
|
747
715
|
|
748
716
|
def token_to_symbol(token)
|
749
717
|
case token
|
750
|
-
when Token
|
718
|
+
when Lrama::Lexer::Token
|
751
719
|
find_symbol_by_id!(token)
|
752
720
|
when Symbol
|
753
721
|
token
|
@@ -793,10 +761,10 @@ module Lrama
|
|
793
761
|
@symbols.each do |sym|
|
794
762
|
@printers.each do |printer|
|
795
763
|
printer.ident_or_tags.each do |ident_or_tag|
|
796
|
-
case ident_or_tag
|
797
|
-
when Token::Ident
|
764
|
+
case ident_or_tag
|
765
|
+
when Lrama::Lexer::Token::Ident
|
798
766
|
sym.printer = printer if sym.id == ident_or_tag
|
799
|
-
when Token::Tag
|
767
|
+
when Lrama::Lexer::Token::Tag
|
800
768
|
sym.printer = printer if sym.tag == ident_or_tag
|
801
769
|
else
|
802
770
|
raise "Unknown token type. #{printer}"
|
@@ -810,10 +778,10 @@ module Lrama
|
|
810
778
|
@symbols.each do |sym|
|
811
779
|
@error_tokens.each do |error_token|
|
812
780
|
error_token.ident_or_tags.each do |ident_or_tag|
|
813
|
-
case ident_or_tag
|
814
|
-
when Token::Ident
|
781
|
+
case ident_or_tag
|
782
|
+
when Lrama::Lexer::Token::Ident
|
815
783
|
sym.error_token = error_token if sym.id == ident_or_tag
|
816
|
-
when Token::Tag
|
784
|
+
when Lrama::Lexer::Token::Tag
|
817
785
|
sym.error_token = error_token if sym.tag == ident_or_tag
|
818
786
|
else
|
819
787
|
raise "Unknown token type. #{error_token}"
|