lrama 0.5.9 → 0.5.11

Sign up to get free protection for your applications and to get access to all the features.
Files changed (74) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/test.yaml +25 -0
  3. data/.gitignore +7 -4
  4. data/Gemfile +9 -5
  5. data/Rakefile +13 -0
  6. data/Steepfile +13 -11
  7. data/lib/lrama/context.rb +1 -3
  8. data/lib/lrama/counterexamples/path.rb +0 -46
  9. data/lib/lrama/counterexamples/production_path.rb +17 -0
  10. data/lib/lrama/counterexamples/start_path.rb +21 -0
  11. data/lib/lrama/counterexamples/transition_path.rb +17 -0
  12. data/lib/lrama/counterexamples.rb +3 -0
  13. data/lib/lrama/grammar/code/initial_action_code.rb +28 -0
  14. data/lib/lrama/grammar/code/no_reference_code.rb +24 -0
  15. data/lib/lrama/grammar/code/printer_code.rb +34 -0
  16. data/lib/lrama/grammar/code/rule_action.rb +62 -0
  17. data/lib/lrama/grammar/code.rb +9 -93
  18. data/lib/lrama/grammar/counter.rb +15 -0
  19. data/lib/lrama/grammar/error_token.rb +3 -3
  20. data/lib/lrama/grammar/parameterizing_rules/builder/base.rb +36 -0
  21. data/lib/lrama/grammar/parameterizing_rules/builder/list.rb +28 -0
  22. data/lib/lrama/grammar/parameterizing_rules/builder/nonempty_list.rb +28 -0
  23. data/lib/lrama/grammar/parameterizing_rules/builder/option.rb +28 -0
  24. data/lib/lrama/grammar/parameterizing_rules/builder/separated_list.rb +39 -0
  25. data/lib/lrama/grammar/parameterizing_rules/builder/separated_nonempty_list.rb +34 -0
  26. data/lib/lrama/grammar/parameterizing_rules/builder.rb +60 -0
  27. data/lib/lrama/grammar/printer.rb +3 -3
  28. data/lib/lrama/grammar/reference.rb +7 -16
  29. data/lib/lrama/grammar/rule.rb +19 -2
  30. data/lib/lrama/grammar/rule_builder.rb +177 -0
  31. data/lib/lrama/grammar/symbol.rb +16 -2
  32. data/lib/lrama/grammar/type.rb +6 -0
  33. data/lib/lrama/grammar.rb +115 -325
  34. data/lib/lrama/lexer/location.rb +22 -0
  35. data/lib/lrama/lexer/token/parameterizing.rb +18 -3
  36. data/lib/lrama/lexer/token/tag.rb +4 -0
  37. data/lib/lrama/lexer/token/user_code.rb +54 -4
  38. data/lib/lrama/lexer/token.rb +35 -10
  39. data/lib/lrama/lexer.rb +32 -31
  40. data/lib/lrama/options.rb +1 -2
  41. data/lib/lrama/output.rb +2 -2
  42. data/lib/lrama/parser.rb +514 -424
  43. data/lib/lrama/report/profile.rb +1 -12
  44. data/lib/lrama/version.rb +1 -1
  45. data/lib/lrama.rb +0 -1
  46. data/parser.y +111 -52
  47. data/rbs_collection.lock.yaml +6 -8
  48. data/rbs_collection.yaml +1 -0
  49. data/sig/lrama/grammar/code/printer_code.rbs +15 -0
  50. data/sig/lrama/grammar/code.rbs +24 -0
  51. data/sig/lrama/grammar/counter.rbs +11 -0
  52. data/sig/lrama/grammar/error_token.rbs +11 -0
  53. data/sig/lrama/grammar/parameterizing_rules/builder/base.rbs +26 -0
  54. data/sig/lrama/grammar/parameterizing_rules/builder/list.rbs +10 -0
  55. data/sig/lrama/grammar/parameterizing_rules/builder/nonempty_list.rbs +10 -0
  56. data/sig/lrama/grammar/parameterizing_rules/builder/option.rbs +10 -0
  57. data/sig/lrama/grammar/parameterizing_rules/builder/separated_list.rbs +11 -0
  58. data/sig/lrama/grammar/parameterizing_rules/builder/separated_nonempty_list.rbs +11 -0
  59. data/sig/lrama/grammar/parameterizing_rules/builder.rbs +23 -0
  60. data/sig/lrama/grammar/precedence.rbs +11 -0
  61. data/sig/lrama/grammar/printer.rbs +11 -0
  62. data/sig/lrama/grammar/reference.rbs +6 -6
  63. data/sig/lrama/grammar/rule.rbs +13 -0
  64. data/sig/lrama/grammar/rule_builder.rbs +42 -0
  65. data/sig/lrama/grammar/symbol.rbs +37 -0
  66. data/sig/lrama/lexer/location.rbs +14 -0
  67. data/sig/lrama/lexer/token/parameterizing.rbs +9 -0
  68. data/sig/lrama/lexer/token/tag.rbs +1 -0
  69. data/sig/lrama/lexer/token/user_code.rbs +8 -1
  70. data/sig/lrama/lexer/token.rbs +9 -4
  71. data/sig/stdlib/strscan/string_scanner.rbs +5 -0
  72. data/template/bison/yacc.c +5 -2
  73. metadata +38 -3
  74. data/lib/lrama/type.rb +0 -4
data/lib/lrama/grammar.rb CHANGED
@@ -1,17 +1,17 @@
1
- require "strscan"
2
-
3
1
  require "lrama/grammar/auxiliary"
4
2
  require "lrama/grammar/code"
3
+ require "lrama/grammar/counter"
5
4
  require "lrama/grammar/error_token"
6
5
  require "lrama/grammar/percent_code"
7
6
  require "lrama/grammar/precedence"
8
7
  require "lrama/grammar/printer"
9
8
  require "lrama/grammar/reference"
10
9
  require "lrama/grammar/rule"
10
+ require "lrama/grammar/rule_builder"
11
11
  require "lrama/grammar/symbol"
12
+ require "lrama/grammar/type"
12
13
  require "lrama/grammar/union"
13
14
  require "lrama/lexer"
14
- require "lrama/type"
15
15
 
16
16
  module Lrama
17
17
  # Grammar is the result of parsing an input grammar file
@@ -21,17 +21,19 @@ module Lrama
21
21
  :printers, :error_tokens,
22
22
  :lex_param, :parse_param, :initial_action,
23
23
  :symbols, :types,
24
- :rules, :_rules,
24
+ :rules, :rule_builders,
25
25
  :sym_to_rules
26
26
 
27
- def initialize
27
+ def initialize(rule_counter)
28
+ @rule_counter = rule_counter
29
+
28
30
  # Code defined by "%code"
29
31
  @percent_codes = []
30
32
  @printers = []
31
33
  @error_tokens = []
32
34
  @symbols = []
33
35
  @types = []
34
- @_rules = []
36
+ @rule_builders = []
35
37
  @rules = []
36
38
  @sym_to_rules = {}
37
39
  @empty_symbol = nil
@@ -48,12 +50,12 @@ module Lrama
48
50
  @percent_codes << PercentCode.new(id, code)
49
51
  end
50
52
 
51
- def add_printer(ident_or_tags:, code:, lineno:)
52
- @printers << Printer.new(ident_or_tags: ident_or_tags, code: code, lineno: lineno)
53
+ def add_printer(ident_or_tags:, token_code:, lineno:)
54
+ @printers << Printer.new(ident_or_tags: ident_or_tags, token_code: token_code, lineno: lineno)
53
55
  end
54
56
 
55
- def add_error_token(ident_or_tags:, code:, lineno:)
56
- @error_tokens << ErrorToken.new(ident_or_tags: ident_or_tags, code: code, lineno: lineno)
57
+ def add_error_token(ident_or_tags:, token_code:, lineno:)
58
+ @error_tokens << ErrorToken.new(ident_or_tags: ident_or_tags, token_code: token_code, lineno: lineno)
57
59
  end
58
60
 
59
61
  def add_term(id:, alias_name: nil, tag: nil, token_id: nil, replace: false)
@@ -123,12 +125,8 @@ module Lrama
123
125
  @union = Union.new(code: code, lineno: lineno)
124
126
  end
125
127
 
126
- def add_rule(lhs:, rhs:, lineno:)
127
- @_rules << [lhs, rhs, lineno]
128
- end
129
-
130
- def build_code(type, token_code)
131
- Code.new(type: type, token_code: token_code)
128
+ def add_rule_builder(builder)
129
+ @rule_builders << builder
132
130
  end
133
131
 
134
132
  def prologue_first_lineno=(prologue_first_lineno)
@@ -148,10 +146,9 @@ module Lrama
148
146
  end
149
147
 
150
148
  def prepare
151
- extract_references
152
149
  normalize_rules
153
150
  collect_symbols
154
- replace_token_with_symbol
151
+ set_lhs_and_rhs
155
152
  fill_symbol_number
156
153
  fill_default_precedence
157
154
  fill_sym_to_rules
@@ -159,14 +156,74 @@ module Lrama
159
156
  fill_symbol_printer
160
157
  fill_symbol_error_token
161
158
  @symbols.sort_by!(&:number)
159
+ compute_nullable
160
+ compute_first_set
162
161
  end
163
162
 
164
163
  # TODO: More validation methods
164
+ #
165
+ # * Validaiton for no_declared_type_reference
165
166
  def validate!
166
167
  validate_symbol_number_uniqueness!
167
- validate_no_declared_type_reference!
168
+ validate_symbol_alias_name_uniqueness!
169
+ validate_rule_lhs_is_nterm!
170
+ end
171
+
172
+ def find_symbol_by_s_value(s_value)
173
+ @symbols.find do |sym|
174
+ sym.id.s_value == s_value
175
+ end
176
+ end
177
+
178
+ def find_symbol_by_s_value!(s_value)
179
+ find_symbol_by_s_value(s_value) || (raise "Symbol not found: #{s_value}")
180
+ end
181
+
182
+ def find_symbol_by_id(id)
183
+ @symbols.find do |sym|
184
+ sym.id == id || sym.alias_name == id.s_value
185
+ end
186
+ end
187
+
188
+ def find_symbol_by_id!(id)
189
+ find_symbol_by_id(id) || (raise "Symbol not found: #{id}")
190
+ end
191
+
192
+ def find_symbol_by_number!(number)
193
+ sym = @symbols[number]
194
+
195
+ raise "Symbol not found: #{number}" unless sym
196
+ raise "[BUG] Symbol number mismatch. #{number}, #{sym}" if sym.number != number
197
+
198
+ sym
199
+ end
200
+
201
+ def find_rules_by_symbol!(sym)
202
+ find_rules_by_symbol(sym) || (raise "Rules for #{sym} not found")
203
+ end
204
+
205
+ def find_rules_by_symbol(sym)
206
+ @sym_to_rules[sym.number]
207
+ end
208
+
209
+ def terms_count
210
+ terms.count
168
211
  end
169
212
 
213
+ def terms
214
+ @terms ||= @symbols.select(&:term?)
215
+ end
216
+
217
+ def nterms_count
218
+ nterms.count
219
+ end
220
+
221
+ def nterms
222
+ @nterms ||= @symbols.select(&:nterm?)
223
+ end
224
+
225
+ private
226
+
170
227
  def compute_nullable
171
228
  @rules.each do |rule|
172
229
  case
@@ -251,160 +308,9 @@ module Lrama
251
308
  end
252
309
  end
253
310
 
254
- def find_symbol_by_s_value(s_value)
255
- @symbols.find do |sym|
256
- sym.id.s_value == s_value
257
- end
258
- end
259
-
260
- def find_symbol_by_s_value!(s_value)
261
- find_symbol_by_s_value(s_value) || (raise "Symbol not found: #{s_value}")
262
- end
263
-
264
- def find_symbol_by_id(id)
265
- @symbols.find do |sym|
266
- # TODO: validate uniqueness of Token#s_value and Symbol#alias_name
267
- sym.id == id || sym.alias_name == id.s_value
268
- end
269
- end
270
-
271
- def find_symbol_by_id!(id)
272
- find_symbol_by_id(id) || (raise "Symbol not found: #{id}")
273
- end
274
-
275
- def find_symbol_by_number!(number)
276
- sym = @symbols[number]
277
-
278
- raise "Symbol not found: #{number}" unless sym
279
- raise "[BUG] Symbol number mismatch. #{number}, #{sym}" if sym.number != number
280
-
281
- sym
282
- end
283
-
284
- def find_rules_by_symbol!(sym)
285
- find_rules_by_symbol(sym) || (raise "Rules for #{sym} not found")
286
- end
287
-
288
- def find_rules_by_symbol(sym)
289
- @sym_to_rules[sym.number]
290
- end
291
-
292
- def terms_count
293
- terms.count
294
- end
295
-
296
- def terms
297
- @terms ||= @symbols.select(&:term?)
298
- end
299
-
300
- def nterms_count
301
- nterms.count
302
- end
303
-
304
- def nterms
305
- @nterms ||= @symbols.select(&:nterm?)
306
- end
307
-
308
- def scan_reference(scanner)
309
- start = scanner.pos
310
- case
311
- # $ references
312
- # It need to wrap an identifier with brackets to use ".-" for identifiers
313
- when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
314
- tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
315
- return Reference.new(type: :dollar, value: "$", ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
316
- when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
317
- tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
318
- return Reference.new(type: :dollar, value: Integer(scanner[2]), ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
319
- when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
320
- tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
321
- return Reference.new(type: :dollar, value: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
322
- when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
323
- tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
324
- return Reference.new(type: :dollar, value: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
325
-
326
- # @ references
327
- # It need to wrap an identifier with brackets to use ".-" for identifiers
328
- when scanner.scan(/@\$/) # @$
329
- return Reference.new(type: :at, value: "$", first_column: start, last_column: scanner.pos - 1)
330
- when scanner.scan(/@(\d+)/) # @1
331
- return Reference.new(type: :at, value: Integer(scanner[1]), first_column: start, last_column: scanner.pos - 1)
332
- when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
333
- return Reference.new(type: :at, value: scanner[1], first_column: start, last_column: scanner.pos - 1)
334
- when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets)
335
- return Reference.new(type: :at, value: scanner[1], first_column: start, last_column: scanner.pos - 1)
336
- end
337
- end
338
-
339
- private
340
-
341
- def extract_references
342
- unless initial_action.nil?
343
- scanner = StringScanner.new(initial_action.s_value)
344
- references = []
345
-
346
- while !scanner.eos? do
347
- if reference = scan_reference(scanner)
348
- references << reference
349
- else
350
- scanner.getch
351
- end
352
- end
353
-
354
- initial_action.token_code.references = references
355
- end
356
-
357
- @printers.each do |printer|
358
- scanner = StringScanner.new(printer.code.s_value)
359
- references = []
360
-
361
- while !scanner.eos? do
362
- if reference = scan_reference(scanner)
363
- references << reference
364
- else
365
- scanner.getch
366
- end
367
- end
368
-
369
- printer.code.token_code.references = references
370
- end
371
-
372
- @error_tokens.each do |error_token|
373
- scanner = StringScanner.new(error_token.code.s_value)
374
- references = []
375
-
376
- while !scanner.eos? do
377
- if reference = scan_reference(scanner)
378
- references << reference
379
- else
380
- scanner.getch
381
- end
382
- end
383
-
384
- error_token.code.token_code.references = references
385
- end
386
-
387
- @_rules.each do |lhs, rhs, _|
388
- rhs.each_with_index do |token, index|
389
- next unless token.class == Lrama::Lexer::Token::UserCode
390
-
391
- scanner = StringScanner.new(token.s_value)
392
- references = []
393
-
394
- while !scanner.eos? do
395
- case
396
- when reference = scan_reference(scanner)
397
- references << reference
398
- when scanner.scan(/\/\*/)
399
- scanner.scan_until(/\*\//)
400
- else
401
- scanner.getch
402
- end
403
- end
404
-
405
- token.references = references
406
- numberize_references(lhs, rhs, token.references)
407
- end
311
+ def setup_rules
312
+ @rule_builders.each do |builder|
313
+ builder.setup_rules
408
314
  end
409
315
  end
410
316
 
@@ -444,35 +350,9 @@ module Lrama
444
350
  @accept_symbol = term
445
351
  end
446
352
 
447
- def numberize_references(lhs, rhs, references)
448
- references.map! {|ref|
449
- ref_name = ref.value
450
- if ref_name.is_a?(::String) && ref_name != '$'
451
- value =
452
- if lhs.referred_by?(ref_name)
453
- '$'
454
- else
455
- index = rhs.find_index {|token| token.referred_by?(ref_name) }
456
-
457
- if index
458
- index + 1
459
- else
460
- raise "'#{ref_name}' is invalid name."
461
- end
462
- end
463
-
464
- ref.value = value
465
- ref
466
- else
467
- ref
468
- end
469
- }
470
- end
471
-
472
353
  # 1. Add $accept rule to the top of rules
473
- # 2. Extract precedence and last action
474
- # 3. Extract action in the middle of RHS into new Empty rule
475
- # 4. Append id and extract action then create Rule
354
+ # 2. Extract action in the middle of RHS into new Empty rule
355
+ # 3. Append id and extract action then create Rule
476
356
  #
477
357
  # Bison 3.8.2 uses different orders for symbol number and rule number
478
358
  # when a rule has actions in the middle of a rule.
@@ -493,127 +373,43 @@ module Lrama
493
373
  #
494
374
  def normalize_rules
495
375
  # 1. Add $accept rule to the top of rules
496
- accept = find_symbol_by_s_value!("$accept")
497
- eof = find_symbol_by_number!(0)
498
- lineno = @_rules.first ? @_rules.first[2] : 0
499
- @rules << Rule.new(id: @rules.count, lhs: accept, rhs: [@_rules.first[0], eof], code: nil, lineno: lineno)
376
+ accept = @accept_symbol
377
+ eof = @eof_symbol
378
+ lineno = @rule_builders.first ? @rule_builders.first.line : 0
379
+ @rules << Rule.new(id: @rule_counter.increment, _lhs: accept.id, _rhs: [@rule_builders.first.lhs, eof.id], token_code: nil, lineno: lineno)
500
380
 
501
- extracted_action_number = 1 # @n as nterm
381
+ setup_rules
502
382
 
503
- @_rules.each do |lhs, rhs, lineno|
504
- a = []
505
- rhs1 = []
506
- code = nil
507
- precedence_sym = nil
508
-
509
- # 2. Extract precedence and last action
510
- rhs.reverse.each do |r|
511
- case
512
- when r.is_a?(Symbol) # precedence_sym
513
- precedence_sym = r
514
- when r.is_a?(Lrama::Lexer::Token::UserCode) && precedence_sym.nil? && code.nil? && rhs1.empty?
515
- code = r
516
- else
517
- rhs1 << r
518
- end
519
- end
520
- rhs1.reverse!
521
-
522
- # Bison n'th component is 1-origin
523
- (rhs1 + [code]).compact.each.with_index(1) do |token, i|
524
- if token.is_a?(Lrama::Lexer::Token::UserCode)
525
- token.references.each do |ref|
526
- # Need to keep position_in_rhs for actions in the middle of RHS
527
- ref.position_in_rhs = i - 1
528
- next if ref.type == :at
529
- # $$, $n, @$, @n can be used in any actions
530
-
531
- if ref.value == "$"
532
- # TODO: Should be postponed after middle actions are extracted?
533
- ref.referring_symbol = lhs
534
- elsif ref.value.is_a?(Integer)
535
- raise "Can not refer following component. #{ref.value} >= #{i}. #{token}" if ref.value >= i
536
- rhs1[ref.value - 1].referred = true
537
- ref.referring_symbol = rhs1[ref.value - 1]
538
- elsif ref.value.is_a?(String)
539
- target_tokens = ([lhs] + rhs1 + [code]).compact.first(i)
540
- referring_symbol_candidate = target_tokens.filter {|token| token.referred_by?(ref.value) }
541
- raise "Referring symbol `#{ref.value}` is duplicated. #{token}" if referring_symbol_candidate.size >= 2
542
- raise "Referring symbol `#{ref.value}` is not found. #{token}" if referring_symbol_candidate.count == 0
543
-
544
- referring_symbol = referring_symbol_candidate.first
545
- referring_symbol.referred = true
546
- ref.referring_symbol = referring_symbol
547
- end
548
- end
549
- end
383
+ @rule_builders.each do |builder|
384
+ # Extract actions in the middle of RHS into new rules.
385
+ builder.midrule_action_rules.each do |rule|
386
+ @rules << rule
550
387
  end
551
388
 
552
- rhs2 = rhs1.map do |token|
553
- if token.is_a?(Lrama::Lexer::Token::UserCode)
554
- prefix = token.referred ? "@" : "$@"
555
- new_token = Lrama::Lexer::Token::Ident.new(s_value: prefix + extracted_action_number.to_s)
556
- extracted_action_number += 1
557
- a << [new_token, token]
558
- new_token
559
- else
560
- token
561
- end
389
+ builder.rules.each do |rule|
390
+ add_nterm(id: rule._lhs)
391
+ @rules << rule
562
392
  end
563
393
 
564
- # Extract actions in the middle of RHS
565
- # into new rules.
566
- a.each do |new_token, code|
567
- @rules << Rule.new(id: @rules.count, lhs: new_token, rhs: [], code: Code.new(type: :user_code, token_code: code), lineno: code.line)
394
+ builder.parameterizing_rules.each do |rule|
395
+ add_nterm(id: rule._lhs, tag: rule.lhs_tag)
396
+ @rules << rule
568
397
  end
569
398
 
570
- c = code ? Code.new(type: :user_code, token_code: code) : nil
571
- # Expand Parameterizing rules
572
- if rhs2.any? {|r| r.is_a?(Lrama::Lexer::Token::Parameterizing) }
573
- expand_parameterizing_rules(lhs, rhs2, c, precedence_sym, lineno)
574
- else
575
- @rules << Rule.new(id: @rules.count, lhs: lhs, rhs: rhs2, code: c, precedence_sym: precedence_sym, lineno: lineno)
576
- end
577
- add_nterm(id: lhs)
578
- a.each do |new_token, _|
579
- add_nterm(id: new_token)
399
+ builder.midrule_action_rules.each do |rule|
400
+ add_nterm(id: rule._lhs)
580
401
  end
581
402
  end
582
403
  end
583
404
 
584
- def expand_parameterizing_rules(lhs, rhs, code, precedence_sym, lineno)
585
- token = Lrama::Lexer::Token::Ident.new(s_value: rhs[0].s_value)
586
- if rhs.any? {|r| r.is_a?(Lrama::Lexer::Token::Parameterizing) && r.option? }
587
- option_token = Lrama::Lexer::Token::Ident.new(s_value: "option_#{rhs[0].s_value}")
588
- add_term(id: option_token)
589
- @rules << Rule.new(id: @rules.count, lhs: lhs, rhs: [option_token], code: code, precedence_sym: precedence_sym, lineno: lineno)
590
- @rules << Rule.new(id: @rules.count, lhs: option_token, rhs: [], code: code, precedence_sym: precedence_sym, lineno: lineno)
591
- @rules << Rule.new(id: @rules.count, lhs: option_token, rhs: [token], code: code, precedence_sym: precedence_sym, lineno: lineno)
592
- elsif rhs.any? {|r| r.is_a?(Lrama::Lexer::Token::Parameterizing) && r.nonempty_list? }
593
- nonempty_list_token = Lrama::Lexer::Token::Ident.new(s_value: "nonempty_list_#{rhs[0].s_value}")
594
- add_term(id: nonempty_list_token)
595
- @rules << Rule.new(id: @rules.count, lhs: lhs, rhs: [nonempty_list_token], code: code, precedence_sym: precedence_sym, lineno: lineno)
596
- @rules << Rule.new(id: @rules.count, lhs: nonempty_list_token, rhs: [token], code: code, precedence_sym: precedence_sym, lineno: lineno)
597
- @rules << Rule.new(id: @rules.count, lhs: nonempty_list_token, rhs: [nonempty_list_token, token], code: code, precedence_sym: precedence_sym, lineno: lineno)
598
- elsif rhs.any? {|r| r.is_a?(Lrama::Lexer::Token::Parameterizing) && r.list? }
599
- list_token = Lrama::Lexer::Token::Ident.new(s_value: "list_#{rhs[0].s_value}")
600
- add_term(id: list_token)
601
- @rules << Rule.new(id: @rules.count, lhs: lhs, rhs: [list_token], code: code, precedence_sym: precedence_sym, lineno: lineno)
602
- @rules << Rule.new(id: @rules.count, lhs: list_token, rhs: [], code: code, precedence_sym: precedence_sym, lineno: lineno)
603
- @rules << Rule.new(id: @rules.count, lhs: list_token, rhs: [list_token, token], code: code, precedence_sym: precedence_sym, lineno: lineno)
604
- end
605
- end
606
-
607
405
  # Collect symbols from rules
608
406
  def collect_symbols
609
- @rules.flat_map(&:rhs).each do |s|
407
+ @rules.flat_map(&:_rhs).each do |s|
610
408
  case s
611
409
  when Lrama::Lexer::Token::Char
612
410
  add_term(id: s)
613
411
  when Lrama::Lexer::Token
614
412
  # skip
615
- when Symbol
616
- # skip
617
413
  else
618
414
  raise "Unknown class: #{s}"
619
415
  end
@@ -693,23 +489,13 @@ module Lrama
693
489
  end
694
490
  end
695
491
 
696
- def replace_token_with_symbol
492
+ def set_lhs_and_rhs
697
493
  @rules.each do |rule|
698
- rule.lhs = token_to_symbol(rule.lhs)
494
+ rule.lhs = token_to_symbol(rule._lhs) if rule._lhs
699
495
 
700
- rule.rhs.map! do |t|
496
+ rule.rhs = rule._rhs.map do |t|
701
497
  token_to_symbol(t)
702
498
  end
703
-
704
- if rule.code
705
- rule.code.references.each do |ref|
706
- next if ref.type == :at
707
-
708
- if !ref.referring_symbol.is_a?(Lrama::Lexer::Token::UserCode)
709
- ref.referring_symbol = token_to_symbol(ref.referring_symbol)
710
- end
711
- end
712
- end
713
499
  end
714
500
  end
715
501
 
@@ -717,8 +503,6 @@ module Lrama
717
503
  case token
718
504
  when Lrama::Lexer::Token
719
505
  find_symbol_by_id!(token)
720
- when Symbol
721
- token
722
506
  else
723
507
  raise "Unknown class: #{token}"
724
508
  end
@@ -801,17 +585,23 @@ module Lrama
801
585
  raise "Symbol number is duplicated. #{invalid}"
802
586
  end
803
587
 
804
- def validate_no_declared_type_reference!
588
+ def validate_symbol_alias_name_uniqueness!
589
+ invalid = @symbols.select(&:alias_name).group_by(&:alias_name).select do |alias_name, syms|
590
+ syms.count > 1
591
+ end
592
+
593
+ return if invalid.empty?
594
+
595
+ raise "Symbol alias name is duplicated. #{invalid}"
596
+ end
597
+
598
+ def validate_rule_lhs_is_nterm!
805
599
  errors = []
806
600
 
807
601
  rules.each do |rule|
808
- next unless rule.code
602
+ next if rule.lhs.nterm?
809
603
 
810
- rule.code.references.select do |ref|
811
- ref.type == :dollar && !ref.tag
812
- end.each do |ref|
813
- errors << "$#{ref.value} of '#{rule.lhs.id.s_value}' has no declared type"
814
- end
604
+ errors << "[BUG] LHS of #{rule} (line: #{rule.lineno}) is term. It should be nterm."
815
605
  end
816
606
 
817
607
  return if errors.empty?
@@ -0,0 +1,22 @@
1
+ module Lrama
2
+ class Lexer
3
+ class Location
4
+ attr_reader :first_line, :first_column, :last_line, :last_column
5
+
6
+ def initialize(first_line:, first_column:, last_line:, last_column:)
7
+ @first_line = first_line
8
+ @first_column = first_column
9
+ @last_line = last_line
10
+ @last_column = last_column
11
+ end
12
+
13
+ def ==(other)
14
+ self.class == other.class &&
15
+ self.first_line == other.first_line &&
16
+ self.first_column == other.first_column &&
17
+ self.last_line == other.last_line &&
18
+ self.last_column == other.last_column
19
+ end
20
+ end
21
+ end
22
+ end
@@ -2,16 +2,31 @@ module Lrama
2
2
  class Lexer
3
3
  class Token
4
4
  class Parameterizing < Token
5
+ attr_accessor :args
6
+
7
+ def initialize(s_value:, alias_name: nil, location: nil, args: [])
8
+ super s_value: s_value, alias_name: alias_name, location: location
9
+ @args = args
10
+ end
11
+
5
12
  def option?
6
- self.s_value == "?"
13
+ %w(option ?).include?(self.s_value)
7
14
  end
8
15
 
9
16
  def nonempty_list?
10
- self.s_value == "+"
17
+ %w(nonempty_list +).include?(self.s_value)
11
18
  end
12
19
 
13
20
  def list?
14
- self.s_value == "*"
21
+ %w(list *).include?(self.s_value)
22
+ end
23
+
24
+ def separated_nonempty_list?
25
+ %w(separated_nonempty_list).include?(self.s_value)
26
+ end
27
+
28
+ def separated_list?
29
+ %w(separated_list).include?(self.s_value)
15
30
  end
16
31
  end
17
32
  end
@@ -2,6 +2,10 @@ module Lrama
2
2
  class Lexer
3
3
  class Token
4
4
  class Tag < Token
5
+ # Omit "<>"
6
+ def member
7
+ s_value[1..-2] or raise "Unexpected Tag format (#{s_value})"
8
+ end
5
9
  end
6
10
  end
7
11
  end