lrama 0.5.6 → 0.5.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.codespellignore +0 -0
- data/.github/workflows/codespell.yaml +16 -0
- data/.github/workflows/test.yaml +18 -2
- data/.gitignore +1 -0
- data/Gemfile +1 -0
- data/README.md +69 -3
- data/Rakefile +12 -0
- data/Steepfile +3 -0
- data/lib/lrama/command.rb +2 -1
- data/lib/lrama/context.rb +4 -4
- data/lib/lrama/digraph.rb +1 -2
- data/lib/lrama/grammar/union.rb +2 -2
- data/lib/lrama/grammar.rb +110 -1
- data/lib/lrama/lexer.rb +131 -303
- data/lib/lrama/option_parser.rb +5 -2
- data/lib/lrama/output.rb +27 -15
- data/lib/lrama/parser.rb +1764 -255
- data/lib/lrama/version.rb +1 -1
- data/parser.y +422 -0
- data/rbs_collection.lock.yaml +1 -1
- data/sample/calc.y +0 -2
- data/sample/parse.y +0 -3
- data/sig/lrama/digraph.rbs +23 -0
- data/sig/lrama/lexer/token/type.rbs +17 -0
- data/template/bison/_yacc.h +71 -0
- data/template/bison/yacc.c +6 -71
- data/template/bison/yacc.h +1 -73
- metadata +8 -3
- data/lib/lrama/parser/token_scanner.rb +0 -56
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 52e495f849079217c2cb4ee28edf873dd7c999d02f2f486acf8b9c2c1e74006f
|
4
|
+
data.tar.gz: 708aea3d87d066b71857d09c4b9fc0705c8abaedc1e8b8e465ae5f6c2a5e0315
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 779e08f5090a78347214d8fbe73e710469ed62f61e923b5fbf50ed1e4c022a4096961df035b517b4c6434fa69326d3a006d94cff144f2c87b48407093f8961c7
|
7
|
+
data.tar.gz: df1e4828906ae7613bca0f5f7dda3466091aa0fe15d7d5147e92bcd803f8ef0e58b6935a6e5061bb18d715a1e0f8b47aaab678c553edda828cb7ff1b05b2bc0f
|
data/.codespellignore
ADDED
File without changes
|
@@ -0,0 +1,16 @@
|
|
1
|
+
name: CodeSpell
|
2
|
+
on:
|
3
|
+
- pull_request
|
4
|
+
jobs:
|
5
|
+
codespell:
|
6
|
+
name: CodeSpell
|
7
|
+
runs-on: ubuntu-latest
|
8
|
+
steps:
|
9
|
+
- uses: actions/checkout@v4
|
10
|
+
- name: CodeSpell
|
11
|
+
uses: codespell-project/actions-codespell@master
|
12
|
+
with:
|
13
|
+
check_filenames: true
|
14
|
+
check_hidden: true
|
15
|
+
ignore_words_file: .codespellignore
|
16
|
+
exclude_file: lib/lrama/parser.rb
|
data/.github/workflows/test.yaml
CHANGED
@@ -38,13 +38,27 @@ jobs:
|
|
38
38
|
- run: bundle exec rspec
|
39
39
|
check-misc:
|
40
40
|
runs-on: ubuntu-20.04
|
41
|
+
strategy:
|
42
|
+
matrix:
|
43
|
+
ruby: ['head']
|
41
44
|
steps:
|
42
45
|
- uses: actions/checkout@v4
|
46
|
+
- uses: ruby/setup-ruby@v1
|
47
|
+
with:
|
48
|
+
ruby-version: ${{ matrix.ruby }}
|
49
|
+
bundler-cache: true
|
50
|
+
- run: bundle install
|
51
|
+
|
43
52
|
# Copy from https://github.com/ruby/ruby/blob/089227e94823542acfdafa68541d330eee42ffea/.github/workflows/check_misc.yml#L27
|
44
53
|
- name: Check for trailing spaces
|
45
54
|
run: |
|
46
|
-
git grep -I -n '[ ]$' -- '*.rb' '*.[chy]' '*.rs' && exit 1 || :
|
55
|
+
git grep -I -n '[ ]$' -- '*.rb' '*.[chy]' '*.rs' ':!spec/' && exit 1 || :
|
47
56
|
git grep -n '^[ ][ ]*$' -- '*.md' && exit 1 || :
|
57
|
+
|
58
|
+
- name: Check for parser.rb is up to date
|
59
|
+
run: |
|
60
|
+
bundle exec rake build:racc_parser
|
61
|
+
git diff --color --no-ext-diff --ignore-submodules --exit-code lib/lrama/parser.rb
|
48
62
|
steep-check:
|
49
63
|
runs-on: ubuntu-20.04
|
50
64
|
strategy:
|
@@ -65,7 +79,9 @@ jobs:
|
|
65
79
|
strategy:
|
66
80
|
fail-fast: false
|
67
81
|
matrix:
|
68
|
-
|
82
|
+
# '3.0' is the oldest living ruby version
|
83
|
+
# '2.5' is for BASERUBY
|
84
|
+
baseruby: ['head', '3.0', '2.5']
|
69
85
|
ruby_branch: ['master']
|
70
86
|
defaults:
|
71
87
|
run:
|
data/.gitignore
CHANGED
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -61,12 +61,78 @@ This branch generates "parse.c" compatible with Bison 3.8.2 for ruby 3.0, 3.1, 3
|
|
61
61
|
|
62
62
|
Lrama is executed with BASERUBY when building ruby from source code. Therefore Lrama needs to support BASERUBY, currently 2.5, or later version.
|
63
63
|
|
64
|
-
This also requires Lrama to be able to run with only default gems
|
64
|
+
This also requires Lrama to be able to run with only default gems because BASERUBY runs with `--disable=gems` option.
|
65
65
|
|
66
|
-
##
|
66
|
+
## Development
|
67
|
+
|
68
|
+
### How to generate new_parser.rb
|
69
|
+
|
70
|
+
```shell
|
71
|
+
$ rake build:racc_parser
|
72
|
+
```
|
73
|
+
|
74
|
+
`new_parser.rb` is generated from `parser.y` by Racc.
|
75
|
+
Run the rake command when you update `parser.y` then commit changes of both files.
|
76
|
+
|
77
|
+
### Test
|
78
|
+
|
79
|
+
Running tests:
|
80
|
+
|
81
|
+
```shell
|
82
|
+
$ bundle install
|
83
|
+
$ bundle exec rspec
|
84
|
+
```
|
85
|
+
|
86
|
+
Running type check:
|
87
|
+
|
88
|
+
```shell
|
89
|
+
$ bundle install
|
90
|
+
$ bundle exec rbs collection install
|
91
|
+
$ bundle exec steep check
|
92
|
+
```
|
93
|
+
|
94
|
+
### Profiling Lrama
|
95
|
+
|
96
|
+
#### 1. Create parse.tmp.y in ruby/ruby
|
97
|
+
|
98
|
+
```shell
|
99
|
+
$ ruby tool/id2token.rb parse.y > parse.tmp.y
|
100
|
+
$ cp parse.tmp.y dir/lrama/tmp
|
101
|
+
```
|
102
|
+
|
103
|
+
#### 2. Enable Profiler
|
104
|
+
|
105
|
+
```diff
|
106
|
+
diff --git a/exe/lrama b/exe/lrama
|
107
|
+
index ba5fb06..2497178 100755
|
108
|
+
--- a/exe/lrama
|
109
|
+
+++ b/exe/lrama
|
110
|
+
@@ -3,4 +3,6 @@
|
111
|
+
$LOAD_PATH << File.join(__dir__, "../lib")
|
112
|
+
require "lrama"
|
113
|
+
|
114
|
+
-Lrama::Command.new.run(ARGV.dup)
|
115
|
+
+Lrama::Report::Profile.report_profile do
|
116
|
+
+ Lrama::Command.new.run(ARGV.dup)
|
117
|
+
+end
|
118
|
+
```
|
119
|
+
|
120
|
+
#### 3. Run Lrama
|
121
|
+
|
122
|
+
```shell
|
123
|
+
$ exe/lrama -o parse.tmp.c --header=parse.tmp.h tmp/parse.tmp.y
|
124
|
+
```
|
125
|
+
|
126
|
+
#### 4. Generate Flamegraph
|
127
|
+
|
128
|
+
```shell
|
129
|
+
$ stackprof --d3-flamegraph tmp/stackprof-cpu-myapp.dump > tmp/flamegraph.html
|
130
|
+
```
|
131
|
+
|
132
|
+
### Build Ruby
|
67
133
|
|
68
134
|
1. Install Lrama
|
69
|
-
2. Run `make
|
135
|
+
2. Run `make main`
|
70
136
|
|
71
137
|
## Release flow
|
72
138
|
|
data/Rakefile
CHANGED
@@ -1 +1,13 @@
|
|
1
1
|
require "bundler/gem_tasks"
|
2
|
+
|
3
|
+
namespace "build" do
|
4
|
+
desc "build parser from parser.y by using Racc"
|
5
|
+
task :racc_parser do
|
6
|
+
sh "bundle exec racc parser.y --embedded -o lib/lrama/parser.rb"
|
7
|
+
end
|
8
|
+
|
9
|
+
desc "build parser for debugging"
|
10
|
+
task :racc_verbose_parser do
|
11
|
+
sh "bundle exec racc parser.y --embedded -o lib/lrama/parser.rb -t --log-file=parser.output"
|
12
|
+
end
|
13
|
+
end
|
data/Steepfile
CHANGED
@@ -1,10 +1,13 @@
|
|
1
1
|
# D = Steep::Diagnostic
|
2
2
|
#
|
3
3
|
target :lib do
|
4
|
+
repo_path '.gem_rbs_collection/'
|
4
5
|
signature "sig"
|
5
6
|
|
6
7
|
check "lib/lrama/bitmap.rb"
|
8
|
+
check "lib/lrama/digraph.rb"
|
7
9
|
check "lib/lrama/report/duration.rb"
|
8
10
|
check "lib/lrama/report/profile.rb"
|
11
|
+
check "lib/lrama/token/type.rb"
|
9
12
|
check "lib/lrama/warning.rb"
|
10
13
|
end
|
data/lib/lrama/command.rb
CHANGED
@@ -6,8 +6,9 @@ module Lrama
|
|
6
6
|
Report::Duration.enable if options.trace_opts[:time]
|
7
7
|
|
8
8
|
warning = Lrama::Warning.new
|
9
|
-
|
9
|
+
text = options.y.read
|
10
10
|
options.y.close if options.y != STDIN
|
11
|
+
grammar = Lrama::Parser.new(text, options.grammar_file).parse
|
11
12
|
states = Lrama::States.new(grammar, warning, trace_state: (options.trace_opts[:automaton] || options.trace_opts[:closure]))
|
12
13
|
states.compute
|
13
14
|
context = Lrama::Context.new(states)
|
data/lib/lrama/context.rb
CHANGED
@@ -170,7 +170,7 @@ module Lrama
|
|
170
170
|
return a
|
171
171
|
end
|
172
172
|
|
173
|
-
# Mapping from rule number to
|
173
|
+
# Mapping from rule number to length of RHS.
|
174
174
|
# Dummy rule is appended as the first element whose value is 0
|
175
175
|
# because 0 means error in yydefact.
|
176
176
|
def yyr2
|
@@ -214,7 +214,7 @@ module Lrama
|
|
214
214
|
(rule_id + 1) * -1
|
215
215
|
end
|
216
216
|
|
217
|
-
# Symbol number is
|
217
|
+
# Symbol number is assigned to term first then nterm.
|
218
218
|
# This method calculates sequence_number for nterm.
|
219
219
|
def nterm_number_to_sequence_number(nterm_number)
|
220
220
|
nterm_number - @states.terms.count
|
@@ -259,7 +259,7 @@ module Lrama
|
|
259
259
|
actions[conflict.symbol.number] = ErrorActionNumber
|
260
260
|
end
|
261
261
|
|
262
|
-
# If default_reduction_rule,
|
262
|
+
# If default_reduction_rule, replace default_reduction_rule in
|
263
263
|
# actions with zero.
|
264
264
|
if state.default_reduction_rule
|
265
265
|
actions.map! do |e|
|
@@ -272,7 +272,7 @@ module Lrama
|
|
272
272
|
end
|
273
273
|
|
274
274
|
# If no default_reduction_rule, default behavior is an
|
275
|
-
# error then
|
275
|
+
# error then replace ErrorActionNumber with zero.
|
276
276
|
if !state.default_reduction_rule
|
277
277
|
actions.map! do |e|
|
278
278
|
if e == ErrorActionNumber
|
data/lib/lrama/digraph.rb
CHANGED
data/lib/lrama/grammar/union.rb
CHANGED
data/lib/lrama/grammar.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
require "strscan"
|
2
|
+
|
1
3
|
require "lrama/grammar/auxiliary"
|
2
4
|
require "lrama/grammar/code"
|
3
5
|
require "lrama/grammar/error_token"
|
@@ -306,6 +308,111 @@ module Lrama
|
|
306
308
|
@nterms ||= @symbols.select(&:nterm?)
|
307
309
|
end
|
308
310
|
|
311
|
+
def scan_reference(scanner)
|
312
|
+
start = scanner.pos
|
313
|
+
case
|
314
|
+
# $ references
|
315
|
+
# It need to wrap an identifier with brackets to use ".-" for identifiers
|
316
|
+
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
|
317
|
+
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
|
318
|
+
return [:dollar, "$", tag, start, scanner.pos - 1]
|
319
|
+
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
|
320
|
+
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
|
321
|
+
return [:dollar, Integer(scanner[2]), tag, start, scanner.pos - 1]
|
322
|
+
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
|
323
|
+
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
|
324
|
+
return [:dollar, scanner[2], tag, start, scanner.pos - 1]
|
325
|
+
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
|
326
|
+
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
|
327
|
+
return [:dollar, scanner[2], tag, start, scanner.pos - 1]
|
328
|
+
|
329
|
+
# @ references
|
330
|
+
# It need to wrap an identifier with brackets to use ".-" for identifiers
|
331
|
+
when scanner.scan(/@\$/) # @$
|
332
|
+
return [:at, "$", nil, start, scanner.pos - 1]
|
333
|
+
when scanner.scan(/@(\d+)/) # @1
|
334
|
+
return [:at, Integer(scanner[1]), nil, start, scanner.pos - 1]
|
335
|
+
when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
|
336
|
+
return [:at, scanner[1], nil, start, scanner.pos - 1]
|
337
|
+
when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets)
|
338
|
+
return [:at, scanner[1], nil, start, scanner.pos - 1]
|
339
|
+
end
|
340
|
+
end
|
341
|
+
|
342
|
+
def extract_references
|
343
|
+
unless initial_action.nil?
|
344
|
+
scanner = StringScanner.new(initial_action.s_value)
|
345
|
+
references = []
|
346
|
+
|
347
|
+
while !scanner.eos? do
|
348
|
+
if reference = scan_reference(scanner)
|
349
|
+
references << reference
|
350
|
+
else
|
351
|
+
scanner.getch
|
352
|
+
end
|
353
|
+
end
|
354
|
+
|
355
|
+
initial_action.token_code.references = references
|
356
|
+
build_references(initial_action.token_code)
|
357
|
+
end
|
358
|
+
|
359
|
+
@printers.each do |printer|
|
360
|
+
scanner = StringScanner.new(printer.code.s_value)
|
361
|
+
references = []
|
362
|
+
|
363
|
+
while !scanner.eos? do
|
364
|
+
if reference = scan_reference(scanner)
|
365
|
+
references << reference
|
366
|
+
else
|
367
|
+
scanner.getch
|
368
|
+
end
|
369
|
+
end
|
370
|
+
|
371
|
+
printer.code.token_code.references = references
|
372
|
+
build_references(printer.code.token_code)
|
373
|
+
end
|
374
|
+
|
375
|
+
@error_tokens.each do |error_token|
|
376
|
+
scanner = StringScanner.new(error_token.code.s_value)
|
377
|
+
references = []
|
378
|
+
|
379
|
+
while !scanner.eos? do
|
380
|
+
if reference = scan_reference(scanner)
|
381
|
+
references << reference
|
382
|
+
else
|
383
|
+
scanner.getch
|
384
|
+
end
|
385
|
+
end
|
386
|
+
|
387
|
+
error_token.code.token_code.references = references
|
388
|
+
build_references(error_token.code.token_code)
|
389
|
+
end
|
390
|
+
|
391
|
+
@_rules.each do |lhs, rhs, _|
|
392
|
+
rhs.each_with_index do |token, index|
|
393
|
+
next if token.class == Lrama::Grammar::Symbol || token.type != Lrama::Lexer::Token::User_code
|
394
|
+
|
395
|
+
scanner = StringScanner.new(token.s_value)
|
396
|
+
references = []
|
397
|
+
|
398
|
+
while !scanner.eos? do
|
399
|
+
case
|
400
|
+
when reference = scan_reference(scanner)
|
401
|
+
references << reference
|
402
|
+
when scanner.scan(/\/\*/)
|
403
|
+
scanner.scan_until(/\*\//)
|
404
|
+
else
|
405
|
+
scanner.getch
|
406
|
+
end
|
407
|
+
end
|
408
|
+
|
409
|
+
token.references = references
|
410
|
+
token.numberize_references(lhs, rhs)
|
411
|
+
build_references(token)
|
412
|
+
end
|
413
|
+
end
|
414
|
+
end
|
415
|
+
|
309
416
|
private
|
310
417
|
|
311
418
|
def find_nterm_by_id!(id)
|
@@ -470,7 +577,9 @@ module Lrama
|
|
470
577
|
|
471
578
|
# Fill #number and #token_id
|
472
579
|
def fill_symbol_number
|
473
|
-
#
|
580
|
+
# Character literal in grammar file has
|
581
|
+
# token id corresponding to ASCII code by default,
|
582
|
+
# so start token_id from 256.
|
474
583
|
token_id = 256
|
475
584
|
|
476
585
|
# YYEMPTY = -2
|