lrama 0.5.6 → 0.5.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e54c51af6f1d3632293cbd7f68762bf7cff63758d3ce633113805a629b92072b
4
- data.tar.gz: 5d053543f0e00c9fb20c40f5ca8236e499292f35fa8cf6fb8e85cac33b930814
3
+ metadata.gz: 52e495f849079217c2cb4ee28edf873dd7c999d02f2f486acf8b9c2c1e74006f
4
+ data.tar.gz: 708aea3d87d066b71857d09c4b9fc0705c8abaedc1e8b8e465ae5f6c2a5e0315
5
5
  SHA512:
6
- metadata.gz: 87bafe9650720b154855e055e53d700cfba67d31489dbc855c716bc150dff35e92d0fc974c14cb81a07ae68febbd69118c1720a628e2819fec1ebfa304acad55
7
- data.tar.gz: 19b0c51748cef053d205bbf13e8ff238bda2fdbb101d304f6bfe9633b4f88fe2c4a1f452627528fd2c9e78bc97955a2a5f7543212aa581c3a9f7ae96dd3d70f8
6
+ metadata.gz: 779e08f5090a78347214d8fbe73e710469ed62f61e923b5fbf50ed1e4c022a4096961df035b517b4c6434fa69326d3a006d94cff144f2c87b48407093f8961c7
7
+ data.tar.gz: df1e4828906ae7613bca0f5f7dda3466091aa0fe15d7d5147e92bcd803f8ef0e58b6935a6e5061bb18d715a1e0f8b47aaab678c553edda828cb7ff1b05b2bc0f
data/.codespellignore ADDED
File without changes
@@ -0,0 +1,16 @@
1
+ name: CodeSpell
2
+ on:
3
+ - pull_request
4
+ jobs:
5
+ codespell:
6
+ name: CodeSpell
7
+ runs-on: ubuntu-latest
8
+ steps:
9
+ - uses: actions/checkout@v4
10
+ - name: CodeSpell
11
+ uses: codespell-project/actions-codespell@master
12
+ with:
13
+ check_filenames: true
14
+ check_hidden: true
15
+ ignore_words_file: .codespellignore
16
+ exclude_file: lib/lrama/parser.rb
@@ -38,13 +38,27 @@ jobs:
38
38
  - run: bundle exec rspec
39
39
  check-misc:
40
40
  runs-on: ubuntu-20.04
41
+ strategy:
42
+ matrix:
43
+ ruby: ['head']
41
44
  steps:
42
45
  - uses: actions/checkout@v4
46
+ - uses: ruby/setup-ruby@v1
47
+ with:
48
+ ruby-version: ${{ matrix.ruby }}
49
+ bundler-cache: true
50
+ - run: bundle install
51
+
43
52
  # Copy from https://github.com/ruby/ruby/blob/089227e94823542acfdafa68541d330eee42ffea/.github/workflows/check_misc.yml#L27
44
53
  - name: Check for trailing spaces
45
54
  run: |
46
- git grep -I -n '[ ]$' -- '*.rb' '*.[chy]' '*.rs' && exit 1 || :
55
+ git grep -I -n '[ ]$' -- '*.rb' '*.[chy]' '*.rs' ':!spec/' && exit 1 || :
47
56
  git grep -n '^[ ][ ]*$' -- '*.md' && exit 1 || :
57
+
58
+ - name: Check for parser.rb is up to date
59
+ run: |
60
+ bundle exec rake build:racc_parser
61
+ git diff --color --no-ext-diff --ignore-submodules --exit-code lib/lrama/parser.rb
48
62
  steep-check:
49
63
  runs-on: ubuntu-20.04
50
64
  strategy:
@@ -65,7 +79,9 @@ jobs:
65
79
  strategy:
66
80
  fail-fast: false
67
81
  matrix:
68
- baseruby: ['3.0']
82
+ # '3.0' is the oldest living ruby version
83
+ # '2.5' is for BASERUBY
84
+ baseruby: ['head', '3.0', '2.5']
69
85
  ruby_branch: ['master']
70
86
  defaults:
71
87
  run:
data/.gitignore CHANGED
@@ -4,3 +4,4 @@
4
4
  /Gemfile.lock
5
5
  /pkg/
6
6
  coverage/
7
+ /parser.output
data/Gemfile CHANGED
@@ -6,6 +6,7 @@ gem "rspec"
6
6
  gem "pry"
7
7
  # stackprof doesn't support Windows
8
8
  gem "stackprof", platforms: [:ruby]
9
+ gem "racc"
9
10
  gem "rake"
10
11
  gem "rbs", require: false
11
12
  gem "steep", require: false
data/README.md CHANGED
@@ -61,12 +61,78 @@ This branch generates "parse.c" compatible with Bison 3.8.2 for ruby 3.0, 3.1, 3
61
61
 
62
62
  Lrama is executed with BASERUBY when building ruby from source code. Therefore Lrama needs to support BASERUBY, currently 2.5, or later version.
63
63
 
64
- This also requires Lrama to be able to run with only default gems and bundled gems.
64
+ This also requires Lrama to be able to run with only default gems because BASERUBY runs with `--disable=gems` option.
65
65
 
66
- ## Build Ruby
66
+ ## Development
67
+
68
+ ### How to generate new_parser.rb
69
+
70
+ ```shell
71
+ $ rake build:racc_parser
72
+ ```
73
+
74
+ `new_parser.rb` is generated from `parser.y` by Racc.
75
+ Run the rake command when you update `parser.y` then commit changes of both files.
76
+
77
+ ### Test
78
+
79
+ Running tests:
80
+
81
+ ```shell
82
+ $ bundle install
83
+ $ bundle exec rspec
84
+ ```
85
+
86
+ Running type check:
87
+
88
+ ```shell
89
+ $ bundle install
90
+ $ bundle exec rbs collection install
91
+ $ bundle exec steep check
92
+ ```
93
+
94
+ ### Profiling Lrama
95
+
96
+ #### 1. Create parse.tmp.y in ruby/ruby
97
+
98
+ ```shell
99
+ $ ruby tool/id2token.rb parse.y > parse.tmp.y
100
+ $ cp parse.tmp.y dir/lrama/tmp
101
+ ```
102
+
103
+ #### 2. Enable Profiler
104
+
105
+ ```diff
106
+ diff --git a/exe/lrama b/exe/lrama
107
+ index ba5fb06..2497178 100755
108
+ --- a/exe/lrama
109
+ +++ b/exe/lrama
110
+ @@ -3,4 +3,6 @@
111
+ $LOAD_PATH << File.join(__dir__, "../lib")
112
+ require "lrama"
113
+
114
+ -Lrama::Command.new.run(ARGV.dup)
115
+ +Lrama::Report::Profile.report_profile do
116
+ + Lrama::Command.new.run(ARGV.dup)
117
+ +end
118
+ ```
119
+
120
+ #### 3. Run Lrama
121
+
122
+ ```shell
123
+ $ exe/lrama -o parse.tmp.c --header=parse.tmp.h tmp/parse.tmp.y
124
+ ```
125
+
126
+ #### 4. Generate Flamegraph
127
+
128
+ ```shell
129
+ $ stackprof --d3-flamegraph tmp/stackprof-cpu-myapp.dump > tmp/flamegraph.html
130
+ ```
131
+
132
+ ### Build Ruby
67
133
 
68
134
  1. Install Lrama
69
- 2. Run `make YACC=lrama`
135
+ 2. Run `make main`
70
136
 
71
137
  ## Release flow
72
138
 
data/Rakefile CHANGED
@@ -1 +1,13 @@
1
1
  require "bundler/gem_tasks"
2
+
3
+ namespace "build" do
4
+ desc "build parser from parser.y by using Racc"
5
+ task :racc_parser do
6
+ sh "bundle exec racc parser.y --embedded -o lib/lrama/parser.rb"
7
+ end
8
+
9
+ desc "build parser for debugging"
10
+ task :racc_verbose_parser do
11
+ sh "bundle exec racc parser.y --embedded -o lib/lrama/parser.rb -t --log-file=parser.output"
12
+ end
13
+ end
data/Steepfile CHANGED
@@ -1,10 +1,13 @@
1
1
  # D = Steep::Diagnostic
2
2
  #
3
3
  target :lib do
4
+ repo_path '.gem_rbs_collection/'
4
5
  signature "sig"
5
6
 
6
7
  check "lib/lrama/bitmap.rb"
8
+ check "lib/lrama/digraph.rb"
7
9
  check "lib/lrama/report/duration.rb"
8
10
  check "lib/lrama/report/profile.rb"
11
+ check "lib/lrama/token/type.rb"
9
12
  check "lib/lrama/warning.rb"
10
13
  end
data/lib/lrama/command.rb CHANGED
@@ -6,8 +6,9 @@ module Lrama
6
6
  Report::Duration.enable if options.trace_opts[:time]
7
7
 
8
8
  warning = Lrama::Warning.new
9
- grammar = Lrama::Parser.new(options.y.read).parse
9
+ text = options.y.read
10
10
  options.y.close if options.y != STDIN
11
+ grammar = Lrama::Parser.new(text, options.grammar_file).parse
11
12
  states = Lrama::States.new(grammar, warning, trace_state: (options.trace_opts[:automaton] || options.trace_opts[:closure]))
12
13
  states.compute
13
14
  context = Lrama::Context.new(states)
data/lib/lrama/context.rb CHANGED
@@ -170,7 +170,7 @@ module Lrama
170
170
  return a
171
171
  end
172
172
 
173
- # Mapping from rule number to lenght of RHS.
173
+ # Mapping from rule number to length of RHS.
174
174
  # Dummy rule is appended as the first element whose value is 0
175
175
  # because 0 means error in yydefact.
176
176
  def yyr2
@@ -214,7 +214,7 @@ module Lrama
214
214
  (rule_id + 1) * -1
215
215
  end
216
216
 
217
- # Symbol number is assinged to term first then nterm.
217
+ # Symbol number is assigned to term first then nterm.
218
218
  # This method calculates sequence_number for nterm.
219
219
  def nterm_number_to_sequence_number(nterm_number)
220
220
  nterm_number - @states.terms.count
@@ -259,7 +259,7 @@ module Lrama
259
259
  actions[conflict.symbol.number] = ErrorActionNumber
260
260
  end
261
261
 
262
- # If default_reduction_rule, replase default_reduction_rule in
262
+ # If default_reduction_rule, replace default_reduction_rule in
263
263
  # actions with zero.
264
264
  if state.default_reduction_rule
265
265
  actions.map! do |e|
@@ -272,7 +272,7 @@ module Lrama
272
272
  end
273
273
 
274
274
  # If no default_reduction_rule, default behavior is an
275
- # error then replase ErrorActionNumber with zero.
275
+ # error then replace ErrorActionNumber with zero.
276
276
  if !state.default_reduction_rule
277
277
  actions.map! do |e|
278
278
  if e == ErrorActionNumber
data/lib/lrama/digraph.rb CHANGED
@@ -40,8 +40,7 @@ module Lrama
40
40
  end
41
41
 
42
42
  if @h[x] == d
43
- while true do
44
- z = @stack.pop
43
+ while (z = @stack.pop) do
45
44
  @h[z] = Float::INFINITY
46
45
  break if z == x
47
46
  @result[z] = @result[x] # F (Top of S) = F x
@@ -2,8 +2,8 @@ module Lrama
2
2
  class Grammar
3
3
  class Union < Struct.new(:code, :lineno, keyword_init: true)
4
4
  def braces_less_code
5
- # Remove braces
6
- code.s_value[1..-2]
5
+ # Braces is already removed by lexer
6
+ code.s_value
7
7
  end
8
8
  end
9
9
  end
data/lib/lrama/grammar.rb CHANGED
@@ -1,3 +1,5 @@
1
+ require "strscan"
2
+
1
3
  require "lrama/grammar/auxiliary"
2
4
  require "lrama/grammar/code"
3
5
  require "lrama/grammar/error_token"
@@ -306,6 +308,111 @@ module Lrama
306
308
  @nterms ||= @symbols.select(&:nterm?)
307
309
  end
308
310
 
311
+ def scan_reference(scanner)
312
+ start = scanner.pos
313
+ case
314
+ # $ references
315
+ # It need to wrap an identifier with brackets to use ".-" for identifiers
316
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
317
+ tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
318
+ return [:dollar, "$", tag, start, scanner.pos - 1]
319
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
320
+ tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
321
+ return [:dollar, Integer(scanner[2]), tag, start, scanner.pos - 1]
322
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
323
+ tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
324
+ return [:dollar, scanner[2], tag, start, scanner.pos - 1]
325
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
326
+ tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
327
+ return [:dollar, scanner[2], tag, start, scanner.pos - 1]
328
+
329
+ # @ references
330
+ # It need to wrap an identifier with brackets to use ".-" for identifiers
331
+ when scanner.scan(/@\$/) # @$
332
+ return [:at, "$", nil, start, scanner.pos - 1]
333
+ when scanner.scan(/@(\d+)/) # @1
334
+ return [:at, Integer(scanner[1]), nil, start, scanner.pos - 1]
335
+ when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
336
+ return [:at, scanner[1], nil, start, scanner.pos - 1]
337
+ when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets)
338
+ return [:at, scanner[1], nil, start, scanner.pos - 1]
339
+ end
340
+ end
341
+
342
+ def extract_references
343
+ unless initial_action.nil?
344
+ scanner = StringScanner.new(initial_action.s_value)
345
+ references = []
346
+
347
+ while !scanner.eos? do
348
+ if reference = scan_reference(scanner)
349
+ references << reference
350
+ else
351
+ scanner.getch
352
+ end
353
+ end
354
+
355
+ initial_action.token_code.references = references
356
+ build_references(initial_action.token_code)
357
+ end
358
+
359
+ @printers.each do |printer|
360
+ scanner = StringScanner.new(printer.code.s_value)
361
+ references = []
362
+
363
+ while !scanner.eos? do
364
+ if reference = scan_reference(scanner)
365
+ references << reference
366
+ else
367
+ scanner.getch
368
+ end
369
+ end
370
+
371
+ printer.code.token_code.references = references
372
+ build_references(printer.code.token_code)
373
+ end
374
+
375
+ @error_tokens.each do |error_token|
376
+ scanner = StringScanner.new(error_token.code.s_value)
377
+ references = []
378
+
379
+ while !scanner.eos? do
380
+ if reference = scan_reference(scanner)
381
+ references << reference
382
+ else
383
+ scanner.getch
384
+ end
385
+ end
386
+
387
+ error_token.code.token_code.references = references
388
+ build_references(error_token.code.token_code)
389
+ end
390
+
391
+ @_rules.each do |lhs, rhs, _|
392
+ rhs.each_with_index do |token, index|
393
+ next if token.class == Lrama::Grammar::Symbol || token.type != Lrama::Lexer::Token::User_code
394
+
395
+ scanner = StringScanner.new(token.s_value)
396
+ references = []
397
+
398
+ while !scanner.eos? do
399
+ case
400
+ when reference = scan_reference(scanner)
401
+ references << reference
402
+ when scanner.scan(/\/\*/)
403
+ scanner.scan_until(/\*\//)
404
+ else
405
+ scanner.getch
406
+ end
407
+ end
408
+
409
+ token.references = references
410
+ token.numberize_references(lhs, rhs)
411
+ build_references(token)
412
+ end
413
+ end
414
+ end
415
+
309
416
  private
310
417
 
311
418
  def find_nterm_by_id!(id)
@@ -470,7 +577,9 @@ module Lrama
470
577
 
471
578
  # Fill #number and #token_id
472
579
  def fill_symbol_number
473
- # TODO: why start from 256
580
+ # Character literal in grammar file has
581
+ # token id corresponding to ASCII code by default,
582
+ # so start token_id from 256.
474
583
  token_id = 256
475
584
 
476
585
  # YYEMPTY = -2