lrama 0.5.8 → 0.5.10

Sign up to get free protection for your applications and to get access to all the features.
Files changed (78) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/test.yaml +6 -1
  3. data/.gitignore +7 -4
  4. data/Gemfile +10 -6
  5. data/README.md +3 -3
  6. data/Rakefile +15 -7
  7. data/Steepfile +15 -1
  8. data/lib/lrama/command.rb +6 -1
  9. data/lib/lrama/context.rb +1 -3
  10. data/lib/lrama/counterexamples/path.rb +0 -46
  11. data/lib/lrama/counterexamples/production_path.rb +17 -0
  12. data/lib/lrama/counterexamples/start_path.rb +21 -0
  13. data/lib/lrama/counterexamples/transition_path.rb +17 -0
  14. data/lib/lrama/counterexamples.rb +3 -0
  15. data/lib/lrama/grammar/code/initial_action_code.rb +28 -0
  16. data/lib/lrama/grammar/code/no_reference_code.rb +24 -0
  17. data/lib/lrama/grammar/code/printer_code.rb +34 -0
  18. data/lib/lrama/grammar/code/rule_action.rb +62 -0
  19. data/lib/lrama/grammar/code.rb +9 -93
  20. data/lib/lrama/grammar/counter.rb +15 -0
  21. data/lib/lrama/grammar/error_token.rb +3 -3
  22. data/lib/lrama/grammar/parameterizing_rules/builder/base.rb +28 -0
  23. data/lib/lrama/grammar/parameterizing_rules/builder/list.rb +20 -0
  24. data/lib/lrama/grammar/parameterizing_rules/builder/nonempty_list.rb +20 -0
  25. data/lib/lrama/grammar/parameterizing_rules/builder/option.rb +20 -0
  26. data/lib/lrama/grammar/parameterizing_rules/builder/separated_list.rb +28 -0
  27. data/lib/lrama/grammar/parameterizing_rules/builder/separated_nonempty_list.rb +27 -0
  28. data/lib/lrama/grammar/parameterizing_rules/builder.rb +43 -0
  29. data/lib/lrama/grammar/percent_code.rb +12 -0
  30. data/lib/lrama/grammar/printer.rb +3 -3
  31. data/lib/lrama/grammar/reference.rb +7 -16
  32. data/lib/lrama/grammar/rule.rb +18 -2
  33. data/lib/lrama/grammar/rule_builder.rb +179 -0
  34. data/lib/lrama/grammar/symbol.rb +2 -2
  35. data/lib/lrama/grammar.rb +132 -302
  36. data/lib/lrama/lexer/location.rb +22 -0
  37. data/lib/lrama/lexer/token/char.rb +8 -0
  38. data/lib/lrama/lexer/token/ident.rb +8 -0
  39. data/lib/lrama/lexer/token/parameterizing.rb +34 -0
  40. data/lib/lrama/lexer/token/tag.rb +12 -0
  41. data/lib/lrama/lexer/token/user_code.rb +64 -0
  42. data/lib/lrama/lexer/token.rb +23 -63
  43. data/lib/lrama/lexer.rb +38 -37
  44. data/lib/lrama/option_parser.rb +2 -1
  45. data/lib/lrama/options.rb +2 -2
  46. data/lib/lrama/output.rb +11 -2
  47. data/lib/lrama/parser.rb +607 -488
  48. data/lib/lrama/report/profile.rb +1 -12
  49. data/lib/lrama/version.rb +1 -1
  50. data/parser.y +177 -96
  51. data/rbs_collection.lock.yaml +17 -1
  52. data/rbs_collection.yaml +1 -0
  53. data/sample/calc.y +3 -1
  54. data/sample/parse.y +5 -1
  55. data/sig/lrama/grammar/code/printer_code.rbs +15 -0
  56. data/sig/lrama/grammar/code.rbs +24 -0
  57. data/sig/lrama/grammar/counter.rbs +11 -0
  58. data/sig/lrama/grammar/parameterizing_rules/builder.rbs +10 -0
  59. data/sig/lrama/grammar/percent_code.rbs +10 -0
  60. data/sig/lrama/grammar/precedence.rbs +11 -0
  61. data/sig/lrama/grammar/printer.rbs +11 -0
  62. data/sig/lrama/grammar/reference.rbs +22 -0
  63. data/sig/lrama/grammar/rule.rbs +13 -0
  64. data/sig/lrama/grammar/rule_builder.rbs +41 -0
  65. data/sig/lrama/grammar.rbs +5 -0
  66. data/sig/lrama/lexer/location.rbs +14 -0
  67. data/sig/lrama/lexer/token/char.rbs +8 -0
  68. data/sig/lrama/lexer/token/ident.rbs +8 -0
  69. data/sig/lrama/lexer/token/parameterizing.rbs +15 -0
  70. data/sig/lrama/lexer/token/tag.rbs +9 -0
  71. data/sig/lrama/lexer/token/user_code.rbs +16 -0
  72. data/sig/lrama/lexer/token.rbs +22 -0
  73. data/sig/stdlib/strscan/string_scanner.rbs +5 -0
  74. data/template/bison/_yacc.h +2 -2
  75. data/template/bison/yacc.c +5 -2
  76. metadata +44 -4
  77. data/lib/lrama/lexer/token/type.rb +0 -8
  78. data/sig/lrama/lexer/token/type.rbs +0 -17
data/lib/lrama/grammar.rb CHANGED
@@ -1,36 +1,39 @@
1
- require "strscan"
2
-
3
1
  require "lrama/grammar/auxiliary"
4
2
  require "lrama/grammar/code"
3
+ require "lrama/grammar/counter"
5
4
  require "lrama/grammar/error_token"
5
+ require "lrama/grammar/percent_code"
6
6
  require "lrama/grammar/precedence"
7
7
  require "lrama/grammar/printer"
8
8
  require "lrama/grammar/reference"
9
9
  require "lrama/grammar/rule"
10
+ require "lrama/grammar/rule_builder"
10
11
  require "lrama/grammar/symbol"
11
12
  require "lrama/grammar/union"
12
13
  require "lrama/lexer"
13
14
  require "lrama/type"
14
15
 
15
16
  module Lrama
16
- Token = Lrama::Lexer::Token
17
-
18
17
  # Grammar is the result of parsing an input grammar file
19
18
  class Grammar
20
- attr_reader :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol, :aux
19
+ attr_reader :percent_codes, :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol, :aux
21
20
  attr_accessor :union, :expect,
22
21
  :printers, :error_tokens,
23
22
  :lex_param, :parse_param, :initial_action,
24
23
  :symbols, :types,
25
- :rules, :_rules,
24
+ :rules, :rule_builders,
26
25
  :sym_to_rules
27
26
 
28
- def initialize
27
+ def initialize(rule_counter)
28
+ @rule_counter = rule_counter
29
+
30
+ # Code defined by "%code"
31
+ @percent_codes = []
29
32
  @printers = []
30
33
  @error_tokens = []
31
34
  @symbols = []
32
35
  @types = []
33
- @_rules = []
36
+ @rule_builders = []
34
37
  @rules = []
35
38
  @sym_to_rules = {}
36
39
  @empty_symbol = nil
@@ -43,12 +46,16 @@ module Lrama
43
46
  append_special_symbols
44
47
  end
45
48
 
46
- def add_printer(ident_or_tags:, code:, lineno:)
47
- @printers << Printer.new(ident_or_tags: ident_or_tags, code: code, lineno: lineno)
49
+ def add_percent_code(id:, code:)
50
+ @percent_codes << PercentCode.new(id, code)
48
51
  end
49
52
 
50
- def add_error_token(ident_or_tags:, code:, lineno:)
51
- @error_tokens << ErrorToken.new(ident_or_tags: ident_or_tags, code: code, lineno: lineno)
53
+ def add_printer(ident_or_tags:, token_code:, lineno:)
54
+ @printers << Printer.new(ident_or_tags: ident_or_tags, token_code: token_code, lineno: lineno)
55
+ end
56
+
57
+ def add_error_token(ident_or_tags:, token_code:, lineno:)
58
+ @error_tokens << ErrorToken.new(ident_or_tags: ident_or_tags, token_code: token_code, lineno: lineno)
52
59
  end
53
60
 
54
61
  def add_term(id:, alias_name: nil, tag: nil, token_id: nil, replace: false)
@@ -118,21 +125,8 @@ module Lrama
118
125
  @union = Union.new(code: code, lineno: lineno)
119
126
  end
120
127
 
121
- def add_rule(lhs:, rhs:, lineno:)
122
- @_rules << [lhs, rhs, lineno]
123
- end
124
-
125
- def build_references(token_code)
126
- token_code.references.map! do |type, value, tag, first_column, last_column|
127
- Reference.new(type: type, value: value, ex_tag: tag, first_column: first_column, last_column: last_column)
128
- end
129
-
130
- token_code
131
- end
132
-
133
- def build_code(type, token_code)
134
- build_references(token_code)
135
- Code.new(type: type, token_code: token_code)
128
+ def add_rule_builder(builder)
129
+ @rule_builders << builder
136
130
  end
137
131
 
138
132
  def prologue_first_lineno=(prologue_first_lineno)
@@ -162,14 +156,74 @@ module Lrama
162
156
  fill_symbol_printer
163
157
  fill_symbol_error_token
164
158
  @symbols.sort_by!(&:number)
159
+ compute_nullable
160
+ compute_first_set
165
161
  end
166
162
 
167
163
  # TODO: More validation methods
164
+ #
165
+ # * Validaiton for no_declared_type_reference
168
166
  def validate!
169
167
  validate_symbol_number_uniqueness!
170
- validate_no_declared_type_reference!
168
+ validate_symbol_alias_name_uniqueness!
169
+ validate_rule_lhs_is_nterm!
170
+ end
171
+
172
+ def find_symbol_by_s_value(s_value)
173
+ @symbols.find do |sym|
174
+ sym.id.s_value == s_value
175
+ end
176
+ end
177
+
178
+ def find_symbol_by_s_value!(s_value)
179
+ find_symbol_by_s_value(s_value) || (raise "Symbol not found: #{s_value}")
180
+ end
181
+
182
+ def find_symbol_by_id(id)
183
+ @symbols.find do |sym|
184
+ sym.id == id || sym.alias_name == id.s_value
185
+ end
186
+ end
187
+
188
+ def find_symbol_by_id!(id)
189
+ find_symbol_by_id(id) || (raise "Symbol not found: #{id}")
190
+ end
191
+
192
+ def find_symbol_by_number!(number)
193
+ sym = @symbols[number]
194
+
195
+ raise "Symbol not found: #{number}" unless sym
196
+ raise "[BUG] Symbol number mismatch. #{number}, #{sym}" if sym.number != number
197
+
198
+ sym
199
+ end
200
+
201
+ def find_rules_by_symbol!(sym)
202
+ find_rules_by_symbol(sym) || (raise "Rules for #{sym} not found")
171
203
  end
172
204
 
205
+ def find_rules_by_symbol(sym)
206
+ @sym_to_rules[sym.number]
207
+ end
208
+
209
+ def terms_count
210
+ terms.count
211
+ end
212
+
213
+ def terms
214
+ @terms ||= @symbols.select(&:term?)
215
+ end
216
+
217
+ def nterms_count
218
+ nterms.count
219
+ end
220
+
221
+ def nterms
222
+ @nterms ||= @symbols.select(&:nterm?)
223
+ end
224
+
225
+ private
226
+
173
227
  def compute_nullable
174
228
  @rules.each do |rule|
175
229
  case
@@ -254,167 +308,12 @@ module Lrama
254
308
  end
255
309
  end
256
310
 
257
- def find_symbol_by_s_value(s_value)
258
- @symbols.find do |sym|
259
- sym.id.s_value == s_value
260
- end
261
- end
262
-
263
- def find_symbol_by_s_value!(s_value)
264
- find_symbol_by_s_value(s_value) || (raise "Symbol not found: #{s_value}")
265
- end
266
-
267
- def find_symbol_by_id(id)
268
- @symbols.find do |sym|
269
- # TODO: validate uniqueness of Token#s_value and Symbol#alias_name
270
- sym.id == id || sym.alias_name == id.s_value
311
+ def setup_rules
312
+ @rule_builders.each do |builder|
313
+ builder.setup_rules
271
314
  end
272
315
  end
273
316
 
274
- def find_symbol_by_id!(id)
275
- find_symbol_by_id(id) || (raise "Symbol not found: #{id}")
276
- end
277
-
278
- def find_symbol_by_number!(number)
279
- sym = @symbols[number]
280
-
281
- raise "Symbol not found: #{number}" unless sym
282
- raise "[BUG] Symbol number mismatch. #{number}, #{sym}" if sym.number != number
283
-
284
- sym
285
- end
286
-
287
- def find_rules_by_symbol!(sym)
288
- find_rules_by_symbol(sym) || (raise "Rules for #{sym} not found")
289
- end
290
-
291
- def find_rules_by_symbol(sym)
292
- @sym_to_rules[sym.number]
293
- end
294
-
295
- def terms_count
296
- terms.count
297
- end
298
-
299
- def terms
300
- @terms ||= @symbols.select(&:term?)
301
- end
302
-
303
- def nterms_count
304
- nterms.count
305
- end
306
-
307
- def nterms
308
- @nterms ||= @symbols.select(&:nterm?)
309
- end
310
-
311
- def scan_reference(scanner)
312
- start = scanner.pos
313
- case
314
- # $ references
315
- # It need to wrap an identifier with brackets to use ".-" for identifiers
316
- when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
317
- tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
318
- return [:dollar, "$", tag, start, scanner.pos - 1]
319
- when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
320
- tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
321
- return [:dollar, Integer(scanner[2]), tag, start, scanner.pos - 1]
322
- when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
323
- tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
324
- return [:dollar, scanner[2], tag, start, scanner.pos - 1]
325
- when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
326
- tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
327
- return [:dollar, scanner[2], tag, start, scanner.pos - 1]
328
-
329
- # @ references
330
- # It need to wrap an identifier with brackets to use ".-" for identifiers
331
- when scanner.scan(/@\$/) # @$
332
- return [:at, "$", nil, start, scanner.pos - 1]
333
- when scanner.scan(/@(\d+)/) # @1
334
- return [:at, Integer(scanner[1]), nil, start, scanner.pos - 1]
335
- when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
336
- return [:at, scanner[1], nil, start, scanner.pos - 1]
337
- when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets)
338
- return [:at, scanner[1], nil, start, scanner.pos - 1]
339
- end
340
- end
341
-
342
- def extract_references
343
- unless initial_action.nil?
344
- scanner = StringScanner.new(initial_action.s_value)
345
- references = []
346
-
347
- while !scanner.eos? do
348
- if reference = scan_reference(scanner)
349
- references << reference
350
- else
351
- scanner.getch
352
- end
353
- end
354
-
355
- initial_action.token_code.references = references
356
- build_references(initial_action.token_code)
357
- end
358
-
359
- @printers.each do |printer|
360
- scanner = StringScanner.new(printer.code.s_value)
361
- references = []
362
-
363
- while !scanner.eos? do
364
- if reference = scan_reference(scanner)
365
- references << reference
366
- else
367
- scanner.getch
368
- end
369
- end
370
-
371
- printer.code.token_code.references = references
372
- build_references(printer.code.token_code)
373
- end
374
-
375
- @error_tokens.each do |error_token|
376
- scanner = StringScanner.new(error_token.code.s_value)
377
- references = []
378
-
379
- while !scanner.eos? do
380
- if reference = scan_reference(scanner)
381
- references << reference
382
- else
383
- scanner.getch
384
- end
385
- end
386
-
387
- error_token.code.token_code.references = references
388
- build_references(error_token.code.token_code)
389
- end
390
-
391
- @_rules.each do |lhs, rhs, _|
392
- rhs.each_with_index do |token, index|
393
- next if token.class == Lrama::Grammar::Symbol || token.type != Lrama::Lexer::Token::User_code
394
-
395
- scanner = StringScanner.new(token.s_value)
396
- references = []
397
-
398
- while !scanner.eos? do
399
- case
400
- when reference = scan_reference(scanner)
401
- references << reference
402
- when scanner.scan(/\/\*/)
403
- scanner.scan_until(/\*\//)
404
- else
405
- scanner.getch
406
- end
407
- end
408
-
409
- token.references = references
410
- token.numberize_references(lhs, rhs)
411
- build_references(token)
412
- end
413
- end
414
- end
415
-
416
- private
417
-
418
317
  def find_nterm_by_id!(id)
419
318
  nterms.find do |nterm|
420
319
  nterm.id == id
@@ -428,33 +327,32 @@ module Lrama
428
327
  # @empty_symbol = term
429
328
 
430
329
  # YYEOF
431
- term = add_term(id: Token.new(type: Token::Ident, s_value: "YYEOF"), alias_name: "\"end of file\"", token_id: 0)
330
+ term = add_term(id: Lrama::Lexer::Token::Ident.new(s_value: "YYEOF"), alias_name: "\"end of file\"", token_id: 0)
432
331
  term.number = 0
433
332
  term.eof_symbol = true
434
333
  @eof_symbol = term
435
334
 
436
335
  # YYerror
437
- term = add_term(id: Token.new(type: Token::Ident, s_value: "YYerror"), alias_name: "error")
336
+ term = add_term(id: Lrama::Lexer::Token::Ident.new(s_value: "YYerror"), alias_name: "error")
438
337
  term.number = 1
439
338
  term.error_symbol = true
440
339
  @error_symbol = term
441
340
 
442
341
  # YYUNDEF
443
- term = add_term(id: Token.new(type: Token::Ident, s_value: "YYUNDEF"), alias_name: "\"invalid token\"")
342
+ term = add_term(id: Lrama::Lexer::Token::Ident.new(s_value: "YYUNDEF"), alias_name: "\"invalid token\"")
444
343
  term.number = 2
445
344
  term.undef_symbol = true
446
345
  @undef_symbol = term
447
346
 
448
347
  # $accept
449
- term = add_nterm(id: Token.new(type: Token::Ident, s_value: "$accept"))
348
+ term = add_nterm(id: Lrama::Lexer::Token::Ident.new(s_value: "$accept"))
450
349
  term.accept_symbol = true
451
350
  @accept_symbol = term
452
351
  end
453
352
 
454
353
  # 1. Add $accept rule to the top of rules
455
- # 2. Extract precedence and last action
456
- # 3. Extract action in the middle of RHS into new Empty rule
457
- # 4. Append id and extract action then create Rule
354
+ # 2. Extract action in the middle of RHS into new Empty rule
355
+ # 3. Append id and extract action then create Rule
458
356
  #
459
357
  # Bison 3.8.2 uses different orders for symbol number and rule number
460
358
  # when a rule has actions in the middle of a rule.
@@ -475,99 +373,37 @@ module Lrama
475
373
  #
476
374
  def normalize_rules
477
375
  # 1. Add $accept rule to the top of rules
478
- accept = find_symbol_by_s_value!("$accept")
479
- eof = find_symbol_by_number!(0)
480
- lineno = @_rules.first ? @_rules.first[2] : 0
481
- @rules << Rule.new(id: @rules.count, lhs: accept, rhs: [@_rules.first[0], eof], code: nil, lineno: lineno)
482
-
483
- extracted_action_number = 1 # @n as nterm
376
+ accept = @accept_symbol
377
+ eof = @eof_symbol
378
+ lineno = @rule_builders.first ? @rule_builders.first.line : 0
379
+ @rules << Rule.new(id: @rule_counter.increment, _lhs: accept.id, _rhs: [@rule_builders.first.lhs, eof.id], token_code: nil, lineno: lineno)
484
380
 
485
- @_rules.each do |lhs, rhs, lineno|
486
- a = []
487
- rhs1 = []
488
- code = nil
489
- precedence_sym = nil
490
-
491
- # 2. Extract precedence and last action
492
- rhs.reverse.each do |r|
493
- case
494
- when r.is_a?(Symbol) # precedence_sym
495
- precedence_sym = r
496
- when (r.type == Token::User_code) && precedence_sym.nil? && code.nil? && rhs1.empty?
497
- code = r
498
- else
499
- rhs1 << r
500
- end
501
- end
502
- rhs1.reverse!
503
-
504
- # Bison n'th component is 1-origin
505
- (rhs1 + [code]).compact.each.with_index(1) do |token, i|
506
- if token.type == Token::User_code
507
- token.references.each do |ref|
508
- # Need to keep position_in_rhs for actions in the middle of RHS
509
- ref.position_in_rhs = i - 1
510
- next if ref.type == :at
511
- # $$, $n, @$, @n can be used in any actions
512
-
513
- if ref.value == "$"
514
- # TODO: Should be postponed after middle actions are extracted?
515
- ref.referring_symbol = lhs
516
- elsif ref.value.is_a?(Integer)
517
- raise "Can not refer following component. #{ref.value} >= #{i}. #{token}" if ref.value >= i
518
- rhs1[ref.value - 1].referred = true
519
- ref.referring_symbol = rhs1[ref.value - 1]
520
- elsif ref.value.is_a?(String)
521
- target_tokens = ([lhs] + rhs1 + [code]).compact.first(i)
522
- referring_symbol_candidate = target_tokens.filter {|token| token.referred_by?(ref.value) }
523
- raise "Referring symbol `#{ref.value}` is duplicated. #{token}" if referring_symbol_candidate.size >= 2
524
- raise "Referring symbol `#{ref.value}` is not found. #{token}" if referring_symbol_candidate.count == 0
525
-
526
- referring_symbol = referring_symbol_candidate.first
527
- referring_symbol.referred = true
528
- ref.referring_symbol = referring_symbol
529
- end
530
- end
531
- end
532
- end
381
+ setup_rules
533
382
 
534
- rhs2 = rhs1.map do |token|
535
- if token.type == Token::User_code
536
- prefix = token.referred ? "@" : "$@"
537
- new_token = Token.new(type: Token::Ident, s_value: prefix + extracted_action_number.to_s)
538
- extracted_action_number += 1
539
- a << [new_token, token]
540
- new_token
541
- else
542
- token
543
- end
383
+ @rule_builders.each do |builder|
384
+ # Extract actions in the middle of RHS into new rules.
385
+ builder.midrule_action_rules.each do |rule|
386
+ @rules << rule
544
387
  end
545
388
 
546
- # Extract actions in the middle of RHS
547
- # into new rules.
548
- a.each do |new_token, code|
549
- @rules << Rule.new(id: @rules.count, lhs: new_token, rhs: [], code: Code.new(type: :user_code, token_code: code), lineno: code.line)
389
+ builder.rules.each do |rule|
390
+ add_nterm(id: rule._lhs)
391
+ @rules << rule
550
392
  end
551
393
 
552
- c = code ? Code.new(type: :user_code, token_code: code) : nil
553
- @rules << Rule.new(id: @rules.count, lhs: lhs, rhs: rhs2, code: c, precedence_sym: precedence_sym, lineno: lineno)
554
-
555
- add_nterm(id: lhs)
556
- a.each do |new_token, _|
557
- add_nterm(id: new_token)
394
+ builder.midrule_action_rules.each do |rule|
395
+ add_nterm(id: rule._lhs)
558
396
  end
559
397
  end
560
398
  end
561
399
 
562
400
  # Collect symbols from rules
563
401
  def collect_symbols
564
- @rules.flat_map(&:rhs).each do |s|
402
+ @rules.flat_map(&:_rhs).each do |s|
565
403
  case s
566
- when Token
567
- if s.type == Token::Char
568
- add_term(id: s)
569
- end
570
- when Symbol
404
+ when Lrama::Lexer::Token::Char
405
+ add_term(id: s)
406
+ when Lrama::Lexer::Token
571
407
  # skip
572
408
  else
573
409
  raise "Unknown class: #{s}"
@@ -607,7 +443,7 @@ module Lrama
607
443
 
608
444
  # If id is Token::Char, it uses ASCII code
609
445
  if sym.term? && sym.token_id.nil?
610
- if sym.id.type == Token::Char
446
+ if sym.id.is_a?(Lrama::Lexer::Token::Char)
611
447
  # Ignore ' on the both sides
612
448
  case sym.id.s_value[1..-2]
613
449
  when "\\b"
@@ -650,30 +486,18 @@ module Lrama
650
486
 
651
487
  def replace_token_with_symbol
652
488
  @rules.each do |rule|
653
- rule.lhs = token_to_symbol(rule.lhs)
489
+ rule.lhs = token_to_symbol(rule._lhs) if rule._lhs
654
490
 
655
- rule.rhs.map! do |t|
491
+ rule.rhs = rule._rhs.map do |t|
656
492
  token_to_symbol(t)
657
493
  end
658
-
659
- if rule.code
660
- rule.code.references.each do |ref|
661
- next if ref.type == :at
662
-
663
- if ref.referring_symbol.type != Token::User_code
664
- ref.referring_symbol = token_to_symbol(ref.referring_symbol)
665
- end
666
- end
667
- end
668
494
  end
669
495
  end
670
496
 
671
497
  def token_to_symbol(token)
672
498
  case token
673
- when Token
499
+ when Lrama::Lexer::Token
674
500
  find_symbol_by_id!(token)
675
- when Symbol
676
- token
677
501
  else
678
502
  raise "Unknown class: #{token}"
679
503
  end
@@ -716,10 +540,10 @@ module Lrama
716
540
  @symbols.each do |sym|
717
541
  @printers.each do |printer|
718
542
  printer.ident_or_tags.each do |ident_or_tag|
719
- case ident_or_tag.type
720
- when Token::Ident
543
+ case ident_or_tag
544
+ when Lrama::Lexer::Token::Ident
721
545
  sym.printer = printer if sym.id == ident_or_tag
722
- when Token::Tag
546
+ when Lrama::Lexer::Token::Tag
723
547
  sym.printer = printer if sym.tag == ident_or_tag
724
548
  else
725
549
  raise "Unknown token type. #{printer}"
@@ -733,10 +557,10 @@ module Lrama
733
557
  @symbols.each do |sym|
734
558
  @error_tokens.each do |error_token|
735
559
  error_token.ident_or_tags.each do |ident_or_tag|
736
- case ident_or_tag.type
737
- when Token::Ident
560
+ case ident_or_tag
561
+ when Lrama::Lexer::Token::Ident
738
562
  sym.error_token = error_token if sym.id == ident_or_tag
739
- when Token::Tag
563
+ when Lrama::Lexer::Token::Tag
740
564
  sym.error_token = error_token if sym.tag == ident_or_tag
741
565
  else
742
566
  raise "Unknown token type. #{error_token}"
@@ -756,17 +580,23 @@ module Lrama
756
580
  raise "Symbol number is duplicated. #{invalid}"
757
581
  end
758
582
 
759
- def validate_no_declared_type_reference!
583
+ def validate_symbol_alias_name_uniqueness!
584
+ invalid = @symbols.select(&:alias_name).group_by(&:alias_name).select do |alias_name, syms|
585
+ syms.count > 1
586
+ end
587
+
588
+ return if invalid.empty?
589
+
590
+ raise "Symbol alias name is duplicated. #{invalid}"
591
+ end
592
+
593
+ def validate_rule_lhs_is_nterm!
760
594
  errors = []
761
595
 
762
596
  rules.each do |rule|
763
- next unless rule.code
597
+ next if rule.lhs.nterm?
764
598
 
765
- rule.code.references.select do |ref|
766
- ref.type == :dollar && !ref.tag
767
- end.each do |ref|
768
- errors << "$#{ref.value} of '#{rule.lhs.id.s_value}' has no declared type"
769
- end
599
+ errors << "[BUG] LHS of #{rule} (line: #{rule.lineno}) is term. It should be nterm."
770
600
  end
771
601
 
772
602
  return if errors.empty?
@@ -0,0 +1,22 @@
1
+ module Lrama
2
+ class Lexer
3
+ class Location
4
+ attr_reader :first_line, :first_column, :last_line, :last_column
5
+
6
+ def initialize(first_line:, first_column:, last_line:, last_column:)
7
+ @first_line = first_line
8
+ @first_column = first_column
9
+ @last_line = last_line
10
+ @last_column = last_column
11
+ end
12
+
13
+ def ==(other)
14
+ self.class == other.class &&
15
+ self.first_line == other.first_line &&
16
+ self.first_column == other.first_column &&
17
+ self.last_line == other.last_line &&
18
+ self.last_column == other.last_column
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,8 @@
1
+ module Lrama
2
+ class Lexer
3
+ class Token
4
+ class Char < Token
5
+ end
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,8 @@
1
+ module Lrama
2
+ class Lexer
3
+ class Token
4
+ class Ident < Token
5
+ end
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,34 @@
1
+ module Lrama
2
+ class Lexer
3
+ class Token
4
+ class Parameterizing < Token
5
+ attr_accessor :args
6
+
7
+ def initialize(s_value: nil, alias_name: nil, location: nil, args: [])
8
+ super s_value: s_value, alias_name: alias_name, location: location
9
+ @args = args
10
+ end
11
+
12
+ def option?
13
+ %w(option ?).include?(self.s_value)
14
+ end
15
+
16
+ def nonempty_list?
17
+ %w(nonempty_list +).include?(self.s_value)
18
+ end
19
+
20
+ def list?
21
+ %w(list *).include?(self.s_value)
22
+ end
23
+
24
+ def separated_nonempty_list?
25
+ %w(separated_nonempty_list).include?(self.s_value)
26
+ end
27
+
28
+ def separated_list?
29
+ %w(separated_list).include?(self.s_value)
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,12 @@
1
+ module Lrama
2
+ class Lexer
3
+ class Token
4
+ class Tag < Token
5
+ # Omit "<>"
6
+ def member
7
+ s_value[1..-2] or raise "Unexpected Tag format (#{s_value})"
8
+ end
9
+ end
10
+ end
11
+ end
12
+ end