lrama 0.5.6 → 0.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e54c51af6f1d3632293cbd7f68762bf7cff63758d3ce633113805a629b92072b
4
- data.tar.gz: 5d053543f0e00c9fb20c40f5ca8236e499292f35fa8cf6fb8e85cac33b930814
3
+ metadata.gz: 52e495f849079217c2cb4ee28edf873dd7c999d02f2f486acf8b9c2c1e74006f
4
+ data.tar.gz: 708aea3d87d066b71857d09c4b9fc0705c8abaedc1e8b8e465ae5f6c2a5e0315
5
5
  SHA512:
6
- metadata.gz: 87bafe9650720b154855e055e53d700cfba67d31489dbc855c716bc150dff35e92d0fc974c14cb81a07ae68febbd69118c1720a628e2819fec1ebfa304acad55
7
- data.tar.gz: 19b0c51748cef053d205bbf13e8ff238bda2fdbb101d304f6bfe9633b4f88fe2c4a1f452627528fd2c9e78bc97955a2a5f7543212aa581c3a9f7ae96dd3d70f8
6
+ metadata.gz: 779e08f5090a78347214d8fbe73e710469ed62f61e923b5fbf50ed1e4c022a4096961df035b517b4c6434fa69326d3a006d94cff144f2c87b48407093f8961c7
7
+ data.tar.gz: df1e4828906ae7613bca0f5f7dda3466091aa0fe15d7d5147e92bcd803f8ef0e58b6935a6e5061bb18d715a1e0f8b47aaab678c553edda828cb7ff1b05b2bc0f
data/.codespellignore ADDED
File without changes
@@ -0,0 +1,16 @@
1
+ name: CodeSpell
2
+ on:
3
+ - pull_request
4
+ jobs:
5
+ codespell:
6
+ name: CodeSpell
7
+ runs-on: ubuntu-latest
8
+ steps:
9
+ - uses: actions/checkout@v4
10
+ - name: CodeSpell
11
+ uses: codespell-project/actions-codespell@master
12
+ with:
13
+ check_filenames: true
14
+ check_hidden: true
15
+ ignore_words_file: .codespellignore
16
+ exclude_file: lib/lrama/parser.rb
@@ -38,13 +38,27 @@ jobs:
38
38
  - run: bundle exec rspec
39
39
  check-misc:
40
40
  runs-on: ubuntu-20.04
41
+ strategy:
42
+ matrix:
43
+ ruby: ['head']
41
44
  steps:
42
45
  - uses: actions/checkout@v4
46
+ - uses: ruby/setup-ruby@v1
47
+ with:
48
+ ruby-version: ${{ matrix.ruby }}
49
+ bundler-cache: true
50
+ - run: bundle install
51
+
43
52
  # Copy from https://github.com/ruby/ruby/blob/089227e94823542acfdafa68541d330eee42ffea/.github/workflows/check_misc.yml#L27
44
53
  - name: Check for trailing spaces
45
54
  run: |
46
- git grep -I -n '[ ]$' -- '*.rb' '*.[chy]' '*.rs' && exit 1 || :
55
+ git grep -I -n '[ ]$' -- '*.rb' '*.[chy]' '*.rs' ':!spec/' && exit 1 || :
47
56
  git grep -n '^[ ][ ]*$' -- '*.md' && exit 1 || :
57
+
58
+ - name: Check for parser.rb is up to date
59
+ run: |
60
+ bundle exec rake build:racc_parser
61
+ git diff --color --no-ext-diff --ignore-submodules --exit-code lib/lrama/parser.rb
48
62
  steep-check:
49
63
  runs-on: ubuntu-20.04
50
64
  strategy:
@@ -65,7 +79,9 @@ jobs:
65
79
  strategy:
66
80
  fail-fast: false
67
81
  matrix:
68
- baseruby: ['3.0']
82
+ # '3.0' is the oldest living ruby version
83
+ # '2.5' is for BASERUBY
84
+ baseruby: ['head', '3.0', '2.5']
69
85
  ruby_branch: ['master']
70
86
  defaults:
71
87
  run:
data/.gitignore CHANGED
@@ -4,3 +4,4 @@
4
4
  /Gemfile.lock
5
5
  /pkg/
6
6
  coverage/
7
+ /parser.output
data/Gemfile CHANGED
@@ -6,6 +6,7 @@ gem "rspec"
6
6
  gem "pry"
7
7
  # stackprof doesn't support Windows
8
8
  gem "stackprof", platforms: [:ruby]
9
+ gem "racc"
9
10
  gem "rake"
10
11
  gem "rbs", require: false
11
12
  gem "steep", require: false
data/README.md CHANGED
@@ -61,12 +61,78 @@ This branch generates "parse.c" compatible with Bison 3.8.2 for ruby 3.0, 3.1, 3
61
61
 
62
62
  Lrama is executed with BASERUBY when building ruby from source code. Therefore Lrama needs to support BASERUBY, currently 2.5, or later version.
63
63
 
64
- This also requires Lrama to be able to run with only default gems and bundled gems.
64
+ This also requires Lrama to be able to run with only default gems because BASERUBY runs with `--disable=gems` option.
65
65
 
66
- ## Build Ruby
66
+ ## Development
67
+
68
+ ### How to generate new_parser.rb
69
+
70
+ ```shell
71
+ $ rake build:racc_parser
72
+ ```
73
+
74
+ `new_parser.rb` is generated from `parser.y` by Racc.
75
+ Run the rake command when you update `parser.y` then commit changes of both files.
76
+
77
+ ### Test
78
+
79
+ Running tests:
80
+
81
+ ```shell
82
+ $ bundle install
83
+ $ bundle exec rspec
84
+ ```
85
+
86
+ Running type check:
87
+
88
+ ```shell
89
+ $ bundle install
90
+ $ bundle exec rbs collection install
91
+ $ bundle exec steep check
92
+ ```
93
+
94
+ ### Profiling Lrama
95
+
96
+ #### 1. Create parse.tmp.y in ruby/ruby
97
+
98
+ ```shell
99
+ $ ruby tool/id2token.rb parse.y > parse.tmp.y
100
+ $ cp parse.tmp.y dir/lrama/tmp
101
+ ```
102
+
103
+ #### 2. Enable Profiler
104
+
105
+ ```diff
106
+ diff --git a/exe/lrama b/exe/lrama
107
+ index ba5fb06..2497178 100755
108
+ --- a/exe/lrama
109
+ +++ b/exe/lrama
110
+ @@ -3,4 +3,6 @@
111
+ $LOAD_PATH << File.join(__dir__, "../lib")
112
+ require "lrama"
113
+
114
+ -Lrama::Command.new.run(ARGV.dup)
115
+ +Lrama::Report::Profile.report_profile do
116
+ + Lrama::Command.new.run(ARGV.dup)
117
+ +end
118
+ ```
119
+
120
+ #### 3. Run Lrama
121
+
122
+ ```shell
123
+ $ exe/lrama -o parse.tmp.c --header=parse.tmp.h tmp/parse.tmp.y
124
+ ```
125
+
126
+ #### 4. Generate Flamegraph
127
+
128
+ ```shell
129
+ $ stackprof --d3-flamegraph tmp/stackprof-cpu-myapp.dump > tmp/flamegraph.html
130
+ ```
131
+
132
+ ### Build Ruby
67
133
 
68
134
  1. Install Lrama
69
- 2. Run `make YACC=lrama`
135
+ 2. Run `make main`
70
136
 
71
137
  ## Release flow
72
138
 
data/Rakefile CHANGED
@@ -1 +1,13 @@
1
1
  require "bundler/gem_tasks"
2
+
3
+ namespace "build" do
4
+ desc "build parser from parser.y by using Racc"
5
+ task :racc_parser do
6
+ sh "bundle exec racc parser.y --embedded -o lib/lrama/parser.rb"
7
+ end
8
+
9
+ desc "build parser for debugging"
10
+ task :racc_verbose_parser do
11
+ sh "bundle exec racc parser.y --embedded -o lib/lrama/parser.rb -t --log-file=parser.output"
12
+ end
13
+ end
data/Steepfile CHANGED
@@ -1,10 +1,13 @@
1
1
  # D = Steep::Diagnostic
2
2
  #
3
3
  target :lib do
4
+ repo_path '.gem_rbs_collection/'
4
5
  signature "sig"
5
6
 
6
7
  check "lib/lrama/bitmap.rb"
8
+ check "lib/lrama/digraph.rb"
7
9
  check "lib/lrama/report/duration.rb"
8
10
  check "lib/lrama/report/profile.rb"
11
+ check "lib/lrama/token/type.rb"
9
12
  check "lib/lrama/warning.rb"
10
13
  end
data/lib/lrama/command.rb CHANGED
@@ -6,8 +6,9 @@ module Lrama
6
6
  Report::Duration.enable if options.trace_opts[:time]
7
7
 
8
8
  warning = Lrama::Warning.new
9
- grammar = Lrama::Parser.new(options.y.read).parse
9
+ text = options.y.read
10
10
  options.y.close if options.y != STDIN
11
+ grammar = Lrama::Parser.new(text, options.grammar_file).parse
11
12
  states = Lrama::States.new(grammar, warning, trace_state: (options.trace_opts[:automaton] || options.trace_opts[:closure]))
12
13
  states.compute
13
14
  context = Lrama::Context.new(states)
data/lib/lrama/context.rb CHANGED
@@ -170,7 +170,7 @@ module Lrama
170
170
  return a
171
171
  end
172
172
 
173
- # Mapping from rule number to lenght of RHS.
173
+ # Mapping from rule number to length of RHS.
174
174
  # Dummy rule is appended as the first element whose value is 0
175
175
  # because 0 means error in yydefact.
176
176
  def yyr2
@@ -214,7 +214,7 @@ module Lrama
214
214
  (rule_id + 1) * -1
215
215
  end
216
216
 
217
- # Symbol number is assinged to term first then nterm.
217
+ # Symbol number is assigned to term first then nterm.
218
218
  # This method calculates sequence_number for nterm.
219
219
  def nterm_number_to_sequence_number(nterm_number)
220
220
  nterm_number - @states.terms.count
@@ -259,7 +259,7 @@ module Lrama
259
259
  actions[conflict.symbol.number] = ErrorActionNumber
260
260
  end
261
261
 
262
- # If default_reduction_rule, replase default_reduction_rule in
262
+ # If default_reduction_rule, replace default_reduction_rule in
263
263
  # actions with zero.
264
264
  if state.default_reduction_rule
265
265
  actions.map! do |e|
@@ -272,7 +272,7 @@ module Lrama
272
272
  end
273
273
 
274
274
  # If no default_reduction_rule, default behavior is an
275
- # error then replase ErrorActionNumber with zero.
275
+ # error then replace ErrorActionNumber with zero.
276
276
  if !state.default_reduction_rule
277
277
  actions.map! do |e|
278
278
  if e == ErrorActionNumber
data/lib/lrama/digraph.rb CHANGED
@@ -40,8 +40,7 @@ module Lrama
40
40
  end
41
41
 
42
42
  if @h[x] == d
43
- while true do
44
- z = @stack.pop
43
+ while (z = @stack.pop) do
45
44
  @h[z] = Float::INFINITY
46
45
  break if z == x
47
46
  @result[z] = @result[x] # F (Top of S) = F x
@@ -2,8 +2,8 @@ module Lrama
2
2
  class Grammar
3
3
  class Union < Struct.new(:code, :lineno, keyword_init: true)
4
4
  def braces_less_code
5
- # Remove braces
6
- code.s_value[1..-2]
5
+ # Braces is already removed by lexer
6
+ code.s_value
7
7
  end
8
8
  end
9
9
  end
data/lib/lrama/grammar.rb CHANGED
@@ -1,3 +1,5 @@
1
+ require "strscan"
2
+
1
3
  require "lrama/grammar/auxiliary"
2
4
  require "lrama/grammar/code"
3
5
  require "lrama/grammar/error_token"
@@ -306,6 +308,111 @@ module Lrama
306
308
  @nterms ||= @symbols.select(&:nterm?)
307
309
  end
308
310
 
311
+ def scan_reference(scanner)
312
+ start = scanner.pos
313
+ case
314
+ # $ references
315
+ # It need to wrap an identifier with brackets to use ".-" for identifiers
316
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
317
+ tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
318
+ return [:dollar, "$", tag, start, scanner.pos - 1]
319
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
320
+ tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
321
+ return [:dollar, Integer(scanner[2]), tag, start, scanner.pos - 1]
322
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
323
+ tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
324
+ return [:dollar, scanner[2], tag, start, scanner.pos - 1]
325
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
326
+ tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
327
+ return [:dollar, scanner[2], tag, start, scanner.pos - 1]
328
+
329
+ # @ references
330
+ # It need to wrap an identifier with brackets to use ".-" for identifiers
331
+ when scanner.scan(/@\$/) # @$
332
+ return [:at, "$", nil, start, scanner.pos - 1]
333
+ when scanner.scan(/@(\d+)/) # @1
334
+ return [:at, Integer(scanner[1]), nil, start, scanner.pos - 1]
335
+ when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
336
+ return [:at, scanner[1], nil, start, scanner.pos - 1]
337
+ when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets)
338
+ return [:at, scanner[1], nil, start, scanner.pos - 1]
339
+ end
340
+ end
341
+
342
+ def extract_references
343
+ unless initial_action.nil?
344
+ scanner = StringScanner.new(initial_action.s_value)
345
+ references = []
346
+
347
+ while !scanner.eos? do
348
+ if reference = scan_reference(scanner)
349
+ references << reference
350
+ else
351
+ scanner.getch
352
+ end
353
+ end
354
+
355
+ initial_action.token_code.references = references
356
+ build_references(initial_action.token_code)
357
+ end
358
+
359
+ @printers.each do |printer|
360
+ scanner = StringScanner.new(printer.code.s_value)
361
+ references = []
362
+
363
+ while !scanner.eos? do
364
+ if reference = scan_reference(scanner)
365
+ references << reference
366
+ else
367
+ scanner.getch
368
+ end
369
+ end
370
+
371
+ printer.code.token_code.references = references
372
+ build_references(printer.code.token_code)
373
+ end
374
+
375
+ @error_tokens.each do |error_token|
376
+ scanner = StringScanner.new(error_token.code.s_value)
377
+ references = []
378
+
379
+ while !scanner.eos? do
380
+ if reference = scan_reference(scanner)
381
+ references << reference
382
+ else
383
+ scanner.getch
384
+ end
385
+ end
386
+
387
+ error_token.code.token_code.references = references
388
+ build_references(error_token.code.token_code)
389
+ end
390
+
391
+ @_rules.each do |lhs, rhs, _|
392
+ rhs.each_with_index do |token, index|
393
+ next if token.class == Lrama::Grammar::Symbol || token.type != Lrama::Lexer::Token::User_code
394
+
395
+ scanner = StringScanner.new(token.s_value)
396
+ references = []
397
+
398
+ while !scanner.eos? do
399
+ case
400
+ when reference = scan_reference(scanner)
401
+ references << reference
402
+ when scanner.scan(/\/\*/)
403
+ scanner.scan_until(/\*\//)
404
+ else
405
+ scanner.getch
406
+ end
407
+ end
408
+
409
+ token.references = references
410
+ token.numberize_references(lhs, rhs)
411
+ build_references(token)
412
+ end
413
+ end
414
+ end
415
+
309
416
  private
310
417
 
311
418
  def find_nterm_by_id!(id)
@@ -470,7 +577,9 @@ module Lrama
470
577
 
471
578
  # Fill #number and #token_id
472
579
  def fill_symbol_number
473
- # TODO: why start from 256
580
+ # Character literal in grammar file has
581
+ # token id corresponding to ASCII code by default,
582
+ # so start token_id from 256.
474
583
  token_id = 256
475
584
 
476
585
  # YYEMPTY = -2