lrama 0.5.7 → 0.5.9

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c786e8955350ca51e6226aacd493073f7c31714ef7d5911dfb16259aa9415513
4
- data.tar.gz: d167b1a2df9dcbc8f31912e8842388aa7d6a788a8d3eeff0278c6d3ed5ecbac6
3
+ metadata.gz: 5369d73367ea2fee299dbb73336a7801c98faa51bb3f68e66327c12f5a2d6716
4
+ data.tar.gz: b79943902e78f921d3361800cd1c371cebc594f527e940b83be3d26fcab190a9
5
5
  SHA512:
6
- metadata.gz: c89fe932ef32b5f441b87df33c2431c8631a069c293306d0a39c858e462731e11d753117ab4e7c48b4e4e41015405328a8d8f5590fbce3544e67babdaeddd8a6
7
- data.tar.gz: 619d1affd9f09c5c1b7748f712005b60073638be709e8bae986e39801173cf9de32555cf05837dbb7450360989989f5277e76a496eec702ace75c3b4af944471
6
+ metadata.gz: 93fd0bb99180b74f7a98b1186273b9324ac8855f599add59a4eb86b7b38b7930975c32463568ae93ff5d829d7c605dbd8a264c8127e40d92f7f7b0bd46637b0a
7
+ data.tar.gz: 06a43beddb6b78b4a1866164c63ebff7309c134b74dc32b5272e181f88ba4b2ff2ea30d756a16016e194e6c0babf0e6ee1536984e90835798eaa57be8eb09c9f
@@ -57,7 +57,7 @@ jobs:
57
57
 
58
58
  - name: Check for parser.rb is up to date
59
59
  run: |
60
- bundle exec rake build:racc_parser
60
+ bundle exec rake build:parser
61
61
  git diff --color --no-ext-diff --ignore-submodules --exit-code lib/lrama/parser.rb
62
62
  steep-check:
63
63
  runs-on: ubuntu-20.04
data/Gemfile CHANGED
@@ -6,7 +6,7 @@ gem "rspec"
6
6
  gem "pry"
7
7
  # stackprof doesn't support Windows
8
8
  gem "stackprof", platforms: [:ruby]
9
- gem "racc"
9
+ gem "racc", "1.7.3"
10
10
  gem "rake"
11
11
  gem "rbs", require: false
12
12
  gem "steep", require: false
data/README.md CHANGED
@@ -65,13 +65,13 @@ This also requires Lrama to be able to run with only default gems because BASERU
65
65
 
66
66
  ## Development
67
67
 
68
- ### How to generate new_parser.rb
68
+ ### How to generate parser.rb
69
69
 
70
70
  ```shell
71
- $ rake build:racc_parser
71
+ $ rake build:parser
72
72
  ```
73
73
 
74
- `new_parser.rb` is generated from `parser.y` by Racc.
74
+ `parser.rb` is generated from `parser.y` by Racc.
75
75
  Run the rake command when you update `parser.y` then commit changes of both files.
76
76
 
77
77
  ### Test
data/Rakefile CHANGED
@@ -1,13 +1,8 @@
1
1
  require "bundler/gem_tasks"
2
2
 
3
3
  namespace "build" do
4
- desc "build parser from parser.y by using Racc"
5
- task :racc_parser do
6
- `bundle exec racc parser.y --embedded -o lib/lrama/parser.rb`
7
- end
8
-
9
- desc "build parser for debugging"
10
- task :racc_verbose_parser do
11
- `bundle exec racc parser.y --embedded -o lib/lrama/parser.rb -t --log-file=parser.output`
4
+ desc "build parser from parser.y"
5
+ task :parser do
6
+ sh "bundle exec racc parser.y --embedded -o lib/lrama/parser.rb -t --log-file=parser.output"
12
7
  end
13
8
  end
data/Steepfile CHANGED
@@ -6,8 +6,16 @@ target :lib do
6
6
 
7
7
  check "lib/lrama/bitmap.rb"
8
8
  check "lib/lrama/digraph.rb"
9
+ check "lib/lrama/grammar/percent_code.rb"
10
+ # TODO: Include this file once Lrama::Grammar::Symbol type is defined
11
+ # check "lib/lrama/grammar/reference.rb"
12
+ check "lib/lrama/lexer/token.rb"
13
+ check "lib/lrama/lexer/token/char.rb"
14
+ check "lib/lrama/lexer/token/ident.rb"
15
+ check "lib/lrama/lexer/token/parameterizing.rb"
16
+ check "lib/lrama/lexer/token/tag.rb"
17
+ check "lib/lrama/lexer/token/user_code.rb"
9
18
  check "lib/lrama/report/duration.rb"
10
19
  check "lib/lrama/report/profile.rb"
11
- check "lib/lrama/token/type.rb"
12
20
  check "lib/lrama/warning.rb"
13
21
  end
data/lib/lrama/command.rb CHANGED
@@ -8,7 +8,7 @@ module Lrama
8
8
  warning = Lrama::Warning.new
9
9
  text = options.y.read
10
10
  options.y.close if options.y != STDIN
11
- grammar = Lrama::Parser.new(text).parse
11
+ grammar = Lrama::Parser.new(text, options.grammar_file, options.debug).parse
12
12
  states = Lrama::States.new(grammar, warning, trace_state: (options.trace_opts[:automaton] || options.trace_opts[:closure]))
13
13
  states.compute
14
14
  context = Lrama::Context.new(states)
@@ -20,6 +20,11 @@ module Lrama
20
20
  end
21
21
  end
22
22
 
23
+ if options.trace_opts && options.trace_opts[:rules]
24
+ puts "Grammar rules:"
25
+ puts grammar.rules
26
+ end
27
+
23
28
  File.open(options.outfile, "w+") do |f|
24
29
  Lrama::Output.new(
25
30
  out: f,
@@ -0,0 +1,12 @@
1
+ module Lrama
2
+ class Grammar
3
+ class PercentCode
4
+ attr_reader :id, :code
5
+
6
+ def initialize(id, code)
7
+ @id = id
8
+ @code = code
9
+ end
10
+ end
11
+ end
12
+ end
@@ -47,9 +47,9 @@ module Lrama
47
47
  name = "YYACCEPT"
48
48
  when eof_symbol?
49
49
  name = "YYEOF"
50
- when term? && id.type == Token::Char
50
+ when term? && id.is_a?(Lrama::Lexer::Token::Char)
51
51
  name = number.to_s + display_name
52
- when term? && id.type == Token::Ident
52
+ when term? && id.is_a?(Lrama::Lexer::Token::Ident)
53
53
  name = id.s_value
54
54
  when nterm? && (id.s_value.include?("$") || id.s_value.include?("@"))
55
55
  name = number.to_s + id.s_value
data/lib/lrama/grammar.rb CHANGED
@@ -3,6 +3,7 @@ require "strscan"
3
3
  require "lrama/grammar/auxiliary"
4
4
  require "lrama/grammar/code"
5
5
  require "lrama/grammar/error_token"
6
+ require "lrama/grammar/percent_code"
6
7
  require "lrama/grammar/precedence"
7
8
  require "lrama/grammar/printer"
8
9
  require "lrama/grammar/reference"
@@ -13,11 +14,9 @@ require "lrama/lexer"
13
14
  require "lrama/type"
14
15
 
15
16
  module Lrama
16
- Token = Lrama::Lexer::Token
17
-
18
17
  # Grammar is the result of parsing an input grammar file
19
18
  class Grammar
20
- attr_reader :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol, :aux
19
+ attr_reader :percent_codes, :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol, :aux
21
20
  attr_accessor :union, :expect,
22
21
  :printers, :error_tokens,
23
22
  :lex_param, :parse_param, :initial_action,
@@ -26,6 +25,8 @@ module Lrama
26
25
  :sym_to_rules
27
26
 
28
27
  def initialize
28
+ # Code defined by "%code"
29
+ @percent_codes = []
29
30
  @printers = []
30
31
  @error_tokens = []
31
32
  @symbols = []
@@ -43,6 +44,10 @@ module Lrama
43
44
  append_special_symbols
44
45
  end
45
46
 
47
+ def add_percent_code(id:, code:)
48
+ @percent_codes << PercentCode.new(id, code)
49
+ end
50
+
46
51
  def add_printer(ident_or_tags:, code:, lineno:)
47
52
  @printers << Printer.new(ident_or_tags: ident_or_tags, code: code, lineno: lineno)
48
53
  end
@@ -122,16 +127,7 @@ module Lrama
122
127
  @_rules << [lhs, rhs, lineno]
123
128
  end
124
129
 
125
- def build_references(token_code)
126
- token_code.references.map! do |type, value, tag, first_column, last_column|
127
- Reference.new(type: type, value: value, ex_tag: tag, first_column: first_column, last_column: last_column)
128
- end
129
-
130
- token_code
131
- end
132
-
133
130
  def build_code(type, token_code)
134
- build_references(token_code)
135
131
  Code.new(type: type, token_code: token_code)
136
132
  end
137
133
 
@@ -152,6 +148,7 @@ module Lrama
152
148
  end
153
149
 
154
150
  def prepare
151
+ extract_references
155
152
  normalize_rules
156
153
  collect_symbols
157
154
  replace_token_with_symbol
@@ -308,46 +305,53 @@ module Lrama
308
305
  @nterms ||= @symbols.select(&:nterm?)
309
306
  end
310
307
 
308
+ def scan_reference(scanner)
309
+ start = scanner.pos
310
+ case
311
+ # $ references
312
+ # It need to wrap an identifier with brackets to use ".-" for identifiers
313
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
314
+ tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
315
+ return Reference.new(type: :dollar, value: "$", ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
316
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
317
+ tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
318
+ return Reference.new(type: :dollar, value: Integer(scanner[2]), ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
319
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
320
+ tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
321
+ return Reference.new(type: :dollar, value: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
322
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
323
+ tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
324
+ return Reference.new(type: :dollar, value: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
325
+
326
+ # @ references
327
+ # It need to wrap an identifier with brackets to use ".-" for identifiers
328
+ when scanner.scan(/@\$/) # @$
329
+ return Reference.new(type: :at, value: "$", first_column: start, last_column: scanner.pos - 1)
330
+ when scanner.scan(/@(\d+)/) # @1
331
+ return Reference.new(type: :at, value: Integer(scanner[1]), first_column: start, last_column: scanner.pos - 1)
332
+ when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
333
+ return Reference.new(type: :at, value: scanner[1], first_column: start, last_column: scanner.pos - 1)
334
+ when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets)
335
+ return Reference.new(type: :at, value: scanner[1], first_column: start, last_column: scanner.pos - 1)
336
+ end
337
+ end
338
+
339
+ private
340
+
311
341
  def extract_references
312
342
  unless initial_action.nil?
313
343
  scanner = StringScanner.new(initial_action.s_value)
314
344
  references = []
315
345
 
316
346
  while !scanner.eos? do
317
- start = scanner.pos
318
- case
319
- # $ references
320
- # It need to wrap an identifier with brackets to use ".-" for identifiers
321
- when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
322
- tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
323
- references << [:dollar, "$", tag, start, scanner.pos - 1]
324
- when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
325
- tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
326
- references << [:dollar, Integer(scanner[2]), tag, start, scanner.pos - 1]
327
- when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
328
- tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
329
- references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
330
- when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
331
- tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
332
- references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
333
-
334
- # @ references
335
- # It need to wrap an identifier with brackets to use ".-" for identifiers
336
- when scanner.scan(/@\$/) # @$
337
- references << [:at, "$", nil, start, scanner.pos - 1]
338
- when scanner.scan(/@(\d+)/) # @1
339
- references << [:at, Integer(scanner[1]), nil, start, scanner.pos - 1]
340
- when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
341
- references << [:at, scanner[1], nil, start, scanner.pos - 1]
342
- when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets)
343
- references << [:at, scanner[1], nil, start, scanner.pos - 1]
347
+ if reference = scan_reference(scanner)
348
+ references << reference
344
349
  else
345
350
  scanner.getch
346
351
  end
347
352
  end
348
353
 
349
354
  initial_action.token_code.references = references
350
- build_references(initial_action.token_code)
351
355
  end
352
356
 
353
357
  @printers.each do |printer|
@@ -355,40 +359,14 @@ module Lrama
355
359
  references = []
356
360
 
357
361
  while !scanner.eos? do
358
- start = scanner.pos
359
- case
360
- # $ references
361
- # It need to wrap an identifier with brackets to use ".-" for identifiers
362
- when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
363
- tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
364
- references << [:dollar, "$", tag, start, scanner.pos - 1]
365
- when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
366
- tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
367
- references << [:dollar, Integer(scanner[2]), tag, start, scanner.pos - 1]
368
- when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
369
- tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
370
- references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
371
- when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
372
- tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
373
- references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
374
-
375
- # @ references
376
- # It need to wrap an identifier with brackets to use ".-" for identifiers
377
- when scanner.scan(/@\$/) # @$
378
- references << [:at, "$", nil, start, scanner.pos - 1]
379
- when scanner.scan(/@(\d+)/) # @1
380
- references << [:at, Integer(scanner[1]), nil, start, scanner.pos - 1]
381
- when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
382
- references << [:at, scanner[1], nil, start, scanner.pos - 1]
383
- when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets)
384
- references << [:at, scanner[1], nil, start, scanner.pos - 1]
362
+ if reference = scan_reference(scanner)
363
+ references << reference
385
364
  else
386
365
  scanner.getch
387
366
  end
388
367
  end
389
368
 
390
369
  printer.code.token_code.references = references
391
- build_references(printer.code.token_code)
392
370
  end
393
371
 
394
372
  @error_tokens.each do |error_token|
@@ -396,78 +374,27 @@ module Lrama
396
374
  references = []
397
375
 
398
376
  while !scanner.eos? do
399
- start = scanner.pos
400
- case
401
- # $ references
402
- # It need to wrap an identifier with brackets to use ".-" for identifiers
403
- when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
404
- tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
405
- references << [:dollar, "$", tag, start, scanner.pos - 1]
406
- when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
407
- tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
408
- references << [:dollar, Integer(scanner[2]), tag, start, scanner.pos - 1]
409
- when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
410
- tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
411
- references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
412
- when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
413
- tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
414
- references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
415
-
416
- # @ references
417
- # It need to wrap an identifier with brackets to use ".-" for identifiers
418
- when scanner.scan(/@\$/) # @$
419
- references << [:at, "$", nil, start, scanner.pos - 1]
420
- when scanner.scan(/@(\d+)/) # @1
421
- references << [:at, Integer(scanner[1]), nil, start, scanner.pos - 1]
422
- when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
423
- references << [:at, scanner[1], nil, start, scanner.pos - 1]
424
- when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets)
425
- references << [:at, scanner[1], nil, start, scanner.pos - 1]
377
+ if reference = scan_reference(scanner)
378
+ references << reference
426
379
  else
427
380
  scanner.getch
428
381
  end
429
382
  end
430
383
 
431
384
  error_token.code.token_code.references = references
432
- build_references(error_token.code.token_code)
433
385
  end
434
386
 
435
387
  @_rules.each do |lhs, rhs, _|
436
388
  rhs.each_with_index do |token, index|
437
- next if token.class == Lrama::Grammar::Symbol || token.type != Lrama::Lexer::Token::User_code
389
+ next unless token.class == Lrama::Lexer::Token::UserCode
438
390
 
439
391
  scanner = StringScanner.new(token.s_value)
440
392
  references = []
441
393
 
442
394
  while !scanner.eos? do
443
- start = scanner.pos
444
395
  case
445
- # $ references
446
- # It need to wrap an identifier with brackets to use ".-" for identifiers
447
- when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
448
- tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
449
- references << [:dollar, "$", tag, start, scanner.pos - 1]
450
- when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
451
- tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
452
- references << [:dollar, Integer(scanner[2]), tag, start, scanner.pos - 1]
453
- when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
454
- tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
455
- references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
456
- when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
457
- tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
458
- references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
459
-
460
- # @ references
461
- # It need to wrap an identifier with brackets to use ".-" for identifiers
462
- when scanner.scan(/@\$/) # @$
463
- references << [:at, "$", nil, start, scanner.pos - 1]
464
- when scanner.scan(/@(\d+)/) # @1
465
- references << [:at, Integer(scanner[1]), nil, start, scanner.pos - 1]
466
- when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
467
- references << [:at, scanner[1], nil, start, scanner.pos - 1]
468
- when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets)
469
- references << [:at, scanner[1], nil, start, scanner.pos - 1]
470
-
396
+ when reference = scan_reference(scanner)
397
+ references << reference
471
398
  when scanner.scan(/\/\*/)
472
399
  scanner.scan_until(/\*\//)
473
400
  else
@@ -476,22 +403,11 @@ module Lrama
476
403
  end
477
404
 
478
405
  token.references = references
479
- token.numberize_references(lhs, rhs)
480
- build_references(token)
406
+ numberize_references(lhs, rhs, token.references)
481
407
  end
482
408
  end
483
409
  end
484
410
 
485
- def create_token(type, s_value, line, column)
486
- t = Token.new(type: type, s_value: s_value)
487
- t.line = line
488
- t.column = column
489
-
490
- return t
491
- end
492
-
493
- private
494
-
495
411
  def find_nterm_by_id!(id)
496
412
  nterms.find do |nterm|
497
413
  nterm.id == id
@@ -505,29 +421,54 @@ module Lrama
505
421
  # @empty_symbol = term
506
422
 
507
423
  # YYEOF
508
- term = add_term(id: Token.new(type: Token::Ident, s_value: "YYEOF"), alias_name: "\"end of file\"", token_id: 0)
424
+ term = add_term(id: Lrama::Lexer::Token::Ident.new(s_value: "YYEOF"), alias_name: "\"end of file\"", token_id: 0)
509
425
  term.number = 0
510
426
  term.eof_symbol = true
511
427
  @eof_symbol = term
512
428
 
513
429
  # YYerror
514
- term = add_term(id: Token.new(type: Token::Ident, s_value: "YYerror"), alias_name: "error")
430
+ term = add_term(id: Lrama::Lexer::Token::Ident.new(s_value: "YYerror"), alias_name: "error")
515
431
  term.number = 1
516
432
  term.error_symbol = true
517
433
  @error_symbol = term
518
434
 
519
435
  # YYUNDEF
520
- term = add_term(id: Token.new(type: Token::Ident, s_value: "YYUNDEF"), alias_name: "\"invalid token\"")
436
+ term = add_term(id: Lrama::Lexer::Token::Ident.new(s_value: "YYUNDEF"), alias_name: "\"invalid token\"")
521
437
  term.number = 2
522
438
  term.undef_symbol = true
523
439
  @undef_symbol = term
524
440
 
525
441
  # $accept
526
- term = add_nterm(id: Token.new(type: Token::Ident, s_value: "$accept"))
442
+ term = add_nterm(id: Lrama::Lexer::Token::Ident.new(s_value: "$accept"))
527
443
  term.accept_symbol = true
528
444
  @accept_symbol = term
529
445
  end
530
446
 
447
+ def numberize_references(lhs, rhs, references)
448
+ references.map! {|ref|
449
+ ref_name = ref.value
450
+ if ref_name.is_a?(::String) && ref_name != '$'
451
+ value =
452
+ if lhs.referred_by?(ref_name)
453
+ '$'
454
+ else
455
+ index = rhs.find_index {|token| token.referred_by?(ref_name) }
456
+
457
+ if index
458
+ index + 1
459
+ else
460
+ raise "'#{ref_name}' is invalid name."
461
+ end
462
+ end
463
+
464
+ ref.value = value
465
+ ref
466
+ else
467
+ ref
468
+ end
469
+ }
470
+ end
471
+
531
472
  # 1. Add $accept rule to the top of rules
532
473
  # 2. Extract precedence and last action
533
474
  # 3. Extract action in the middle of RHS into new Empty rule
@@ -570,7 +511,7 @@ module Lrama
570
511
  case
571
512
  when r.is_a?(Symbol) # precedence_sym
572
513
  precedence_sym = r
573
- when (r.type == Token::User_code) && precedence_sym.nil? && code.nil? && rhs1.empty?
514
+ when r.is_a?(Lrama::Lexer::Token::UserCode) && precedence_sym.nil? && code.nil? && rhs1.empty?
574
515
  code = r
575
516
  else
576
517
  rhs1 << r
@@ -580,7 +521,7 @@ module Lrama
580
521
 
581
522
  # Bison n'th component is 1-origin
582
523
  (rhs1 + [code]).compact.each.with_index(1) do |token, i|
583
- if token.type == Token::User_code
524
+ if token.is_a?(Lrama::Lexer::Token::UserCode)
584
525
  token.references.each do |ref|
585
526
  # Need to keep position_in_rhs for actions in the middle of RHS
586
527
  ref.position_in_rhs = i - 1
@@ -609,9 +550,9 @@ module Lrama
609
550
  end
610
551
 
611
552
  rhs2 = rhs1.map do |token|
612
- if token.type == Token::User_code
553
+ if token.is_a?(Lrama::Lexer::Token::UserCode)
613
554
  prefix = token.referred ? "@" : "$@"
614
- new_token = Token.new(type: Token::Ident, s_value: prefix + extracted_action_number.to_s)
555
+ new_token = Lrama::Lexer::Token::Ident.new(s_value: prefix + extracted_action_number.to_s)
615
556
  extracted_action_number += 1
616
557
  a << [new_token, token]
617
558
  new_token
@@ -627,8 +568,12 @@ module Lrama
627
568
  end
628
569
 
629
570
  c = code ? Code.new(type: :user_code, token_code: code) : nil
630
- @rules << Rule.new(id: @rules.count, lhs: lhs, rhs: rhs2, code: c, precedence_sym: precedence_sym, lineno: lineno)
631
-
571
+ # Expand Parameterizing rules
572
+ if rhs2.any? {|r| r.is_a?(Lrama::Lexer::Token::Parameterizing) }
573
+ expand_parameterizing_rules(lhs, rhs2, c, precedence_sym, lineno)
574
+ else
575
+ @rules << Rule.new(id: @rules.count, lhs: lhs, rhs: rhs2, code: c, precedence_sym: precedence_sym, lineno: lineno)
576
+ end
632
577
  add_nterm(id: lhs)
633
578
  a.each do |new_token, _|
634
579
  add_nterm(id: new_token)
@@ -636,14 +581,37 @@ module Lrama
636
581
  end
637
582
  end
638
583
 
584
+ def expand_parameterizing_rules(lhs, rhs, code, precedence_sym, lineno)
585
+ token = Lrama::Lexer::Token::Ident.new(s_value: rhs[0].s_value)
586
+ if rhs.any? {|r| r.is_a?(Lrama::Lexer::Token::Parameterizing) && r.option? }
587
+ option_token = Lrama::Lexer::Token::Ident.new(s_value: "option_#{rhs[0].s_value}")
588
+ add_term(id: option_token)
589
+ @rules << Rule.new(id: @rules.count, lhs: lhs, rhs: [option_token], code: code, precedence_sym: precedence_sym, lineno: lineno)
590
+ @rules << Rule.new(id: @rules.count, lhs: option_token, rhs: [], code: code, precedence_sym: precedence_sym, lineno: lineno)
591
+ @rules << Rule.new(id: @rules.count, lhs: option_token, rhs: [token], code: code, precedence_sym: precedence_sym, lineno: lineno)
592
+ elsif rhs.any? {|r| r.is_a?(Lrama::Lexer::Token::Parameterizing) && r.nonempty_list? }
593
+ nonempty_list_token = Lrama::Lexer::Token::Ident.new(s_value: "nonempty_list_#{rhs[0].s_value}")
594
+ add_term(id: nonempty_list_token)
595
+ @rules << Rule.new(id: @rules.count, lhs: lhs, rhs: [nonempty_list_token], code: code, precedence_sym: precedence_sym, lineno: lineno)
596
+ @rules << Rule.new(id: @rules.count, lhs: nonempty_list_token, rhs: [token], code: code, precedence_sym: precedence_sym, lineno: lineno)
597
+ @rules << Rule.new(id: @rules.count, lhs: nonempty_list_token, rhs: [nonempty_list_token, token], code: code, precedence_sym: precedence_sym, lineno: lineno)
598
+ elsif rhs.any? {|r| r.is_a?(Lrama::Lexer::Token::Parameterizing) && r.list? }
599
+ list_token = Lrama::Lexer::Token::Ident.new(s_value: "list_#{rhs[0].s_value}")
600
+ add_term(id: list_token)
601
+ @rules << Rule.new(id: @rules.count, lhs: lhs, rhs: [list_token], code: code, precedence_sym: precedence_sym, lineno: lineno)
602
+ @rules << Rule.new(id: @rules.count, lhs: list_token, rhs: [], code: code, precedence_sym: precedence_sym, lineno: lineno)
603
+ @rules << Rule.new(id: @rules.count, lhs: list_token, rhs: [list_token, token], code: code, precedence_sym: precedence_sym, lineno: lineno)
604
+ end
605
+ end
606
+
639
607
  # Collect symbols from rules
640
608
  def collect_symbols
641
609
  @rules.flat_map(&:rhs).each do |s|
642
610
  case s
643
- when Token
644
- if s.type == Token::Char
645
- add_term(id: s)
646
- end
611
+ when Lrama::Lexer::Token::Char
612
+ add_term(id: s)
613
+ when Lrama::Lexer::Token
614
+ # skip
647
615
  when Symbol
648
616
  # skip
649
617
  else
@@ -684,7 +652,7 @@ module Lrama
684
652
 
685
653
  # If id is Token::Char, it uses ASCII code
686
654
  if sym.term? && sym.token_id.nil?
687
- if sym.id.type == Token::Char
655
+ if sym.id.is_a?(Lrama::Lexer::Token::Char)
688
656
  # Ignore ' on the both sides
689
657
  case sym.id.s_value[1..-2]
690
658
  when "\\b"
@@ -737,7 +705,7 @@ module Lrama
737
705
  rule.code.references.each do |ref|
738
706
  next if ref.type == :at
739
707
 
740
- if ref.referring_symbol.type != Token::User_code
708
+ if !ref.referring_symbol.is_a?(Lrama::Lexer::Token::UserCode)
741
709
  ref.referring_symbol = token_to_symbol(ref.referring_symbol)
742
710
  end
743
711
  end
@@ -747,7 +715,7 @@ module Lrama
747
715
 
748
716
  def token_to_symbol(token)
749
717
  case token
750
- when Token
718
+ when Lrama::Lexer::Token
751
719
  find_symbol_by_id!(token)
752
720
  when Symbol
753
721
  token
@@ -793,10 +761,10 @@ module Lrama
793
761
  @symbols.each do |sym|
794
762
  @printers.each do |printer|
795
763
  printer.ident_or_tags.each do |ident_or_tag|
796
- case ident_or_tag.type
797
- when Token::Ident
764
+ case ident_or_tag
765
+ when Lrama::Lexer::Token::Ident
798
766
  sym.printer = printer if sym.id == ident_or_tag
799
- when Token::Tag
767
+ when Lrama::Lexer::Token::Tag
800
768
  sym.printer = printer if sym.tag == ident_or_tag
801
769
  else
802
770
  raise "Unknown token type. #{printer}"
@@ -810,10 +778,10 @@ module Lrama
810
778
  @symbols.each do |sym|
811
779
  @error_tokens.each do |error_token|
812
780
  error_token.ident_or_tags.each do |ident_or_tag|
813
- case ident_or_tag.type
814
- when Token::Ident
781
+ case ident_or_tag
782
+ when Lrama::Lexer::Token::Ident
815
783
  sym.error_token = error_token if sym.id == ident_or_tag
816
- when Token::Tag
784
+ when Lrama::Lexer::Token::Tag
817
785
  sym.error_token = error_token if sym.tag == ident_or_tag
818
786
  else
819
787
  raise "Unknown token type. #{error_token}"
@@ -0,0 +1,8 @@
1
+ module Lrama
2
+ class Lexer
3
+ class Token
4
+ class Char < Token
5
+ end
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,8 @@
1
+ module Lrama
2
+ class Lexer
3
+ class Token
4
+ class Ident < Token
5
+ end
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,19 @@
1
+ module Lrama
2
+ class Lexer
3
+ class Token
4
+ class Parameterizing < Token
5
+ def option?
6
+ self.s_value == "?"
7
+ end
8
+
9
+ def nonempty_list?
10
+ self.s_value == "+"
11
+ end
12
+
13
+ def list?
14
+ self.s_value == "*"
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end