lrama 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1124 @@
1
+ require "lrama/report"
2
+
3
+ module Lrama
4
+ # Algorithm Digraph of https://dl.acm.org/doi/pdf/10.1145/69622.357187 (P. 625)
5
+ class Digraph
6
+ def initialize(sets, relation, base_function)
7
+ # X in the paper
8
+ @sets = sets
9
+ # R in the paper
10
+ @relation = relation
11
+ # F' in the paper
12
+ @base_function = base_function
13
+ # S in the paper
14
+ @stack = []
15
+ # N in the paper
16
+ @h = Hash.new(0)
17
+ # F in the paper
18
+ @result = {}
19
+ end
20
+
21
+ def compute
22
+ @sets.each do |x|
23
+ next if @h[x] != 0
24
+ traverse(x)
25
+ end
26
+
27
+ return @result
28
+ end
29
+
30
+ private
31
+
32
+ def traverse(x)
33
+ @stack.push(x)
34
+ d = @stack.count
35
+ @h[x] = d
36
+ @result[x] = @base_function[x] # F x = F' x
37
+
38
+ @relation[x] && @relation[x].each do |y|
39
+ traverse(y) if @h[y] == 0
40
+ @h[x] = [@h[x], @h[y]].min
41
+ @result[x] |= @result[y] # F x = F x + F y
42
+ end
43
+
44
+ if @h[x] == d
45
+ while true do
46
+ z = @stack.pop
47
+ @h[z] = Float::INFINITY
48
+ @result[z] = @result[x] # F (Top of S) = F x
49
+
50
+ break if z == x
51
+ end
52
+ end
53
+ end
54
+ end
55
+
56
+ class State
57
+ class Reduce
58
+ # https://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html
59
+ attr_reader :item, :look_ahead, :not_selected_symbols
60
+ attr_accessor :default_reduction
61
+
62
+ def initialize(item)
63
+ @item = item
64
+ @look_ahead = nil
65
+ @not_selected_symbols = []
66
+ end
67
+
68
+ def rule
69
+ @item.rule
70
+ end
71
+
72
+ def look_ahead=(look_ahead)
73
+ @look_ahead = look_ahead.freeze
74
+ end
75
+
76
+ def add_not_selected_symbol(sym)
77
+ @not_selected_symbols << sym
78
+ end
79
+
80
+ def selected_look_ahead
81
+ if @look_ahead
82
+ @look_ahead - @not_selected_symbols
83
+ else
84
+ []
85
+ end
86
+ end
87
+ end
88
+
89
+ class Shift
90
+ attr_reader :next_sym, :next_items
91
+ attr_accessor :not_selected
92
+
93
+ def initialize(next_sym, next_items)
94
+ @next_sym = next_sym
95
+ @next_items = next_items
96
+ end
97
+ end
98
+
99
+ # * symbol: A symbol under discussion
100
+ # * reduce: A reduce under discussion
101
+ # * which: For which a conflict is resolved. :shift, :reduce or :error (for nonassociative)
102
+ ResolvedConflict = Struct.new(:symbol, :reduce, :which, :same_prec, keyword_init: true) do
103
+ def report_message
104
+ s = symbol.display_name
105
+ r = reduce.rule.precedence_sym.display_name
106
+ case
107
+ when which == :shift && same_prec
108
+ msg = "resolved as #{which} (%right #{s})"
109
+ when which == :shift
110
+ msg = "resolved as #{which} (#{r} < #{s})"
111
+ when which == :reduce && same_prec
112
+ msg = "resolved as #{which} (%left #{s})"
113
+ when which == :reduce
114
+ msg = "resolved as #{which} (#{s} < #{r})"
115
+ when which == :error
116
+ msg = "resolved as an #{which} (%nonassoc #{s})"
117
+ else
118
+ raise "Unknown direction. #{self}"
119
+ end
120
+
121
+ "Conflict between rule #{reduce.rule.id} and token #{s} #{msg}."
122
+ end
123
+ end
124
+
125
+ attr_reader :id, :accessing_symbol, :kernels, :conflicts, :resolved_conflicts,
126
+ :default_reduction_rule
127
+ attr_accessor :closure, :shifts, :reduces
128
+
129
+ def initialize(id, accessing_symbol, kernels)
130
+ @id = id
131
+ @accessing_symbol = accessing_symbol
132
+ @kernels = kernels.freeze
133
+ # Manage relationships between items to state
134
+ # to resolve next state
135
+ @items_to_state = {}
136
+ @conflicts = []
137
+ @resolved_conflicts = []
138
+ @default_reduction_rule = nil
139
+ end
140
+
141
+ def items
142
+ @kernels + @closure
143
+ end
144
+
145
+ def non_default_reduces
146
+ reduces.select do |reduce|
147
+ reduce.rule != @default_reduction_rule
148
+ end
149
+ end
150
+
151
+ def compute_shifts_reduces
152
+ _shifts = {}
153
+ reduces = []
154
+ items.each do |item|
155
+ # TODO: Consider what should be pushed
156
+ if item.end_of_rule?
157
+ reduces << Reduce.new(item)
158
+ else
159
+ key = item.next_sym
160
+ _shifts[key] ||= []
161
+ _shifts[key] << item.new_by_next_position
162
+ end
163
+ end
164
+
165
+ shifts = _shifts.sort_by do |next_sym, new_items|
166
+ next_sym.number
167
+ end.map do |next_sym, new_items|
168
+ Shift.new(next_sym, new_items.flatten)
169
+ end
170
+ self.shifts = shifts.freeze
171
+ self.reduces = reduces.freeze
172
+ end
173
+
174
+ def set_items_to_state(items, next_state)
175
+ @items_to_state[items] = next_state
176
+ end
177
+
178
+ #
179
+ def set_look_ahead(rule, look_ahead)
180
+ reduce = reduces.find do |r|
181
+ r.rule == rule
182
+ end
183
+
184
+ reduce.look_ahead = look_ahead
185
+ end
186
+
187
+ # Returns array of [nterm, next_state]
188
+ def nterm_transitions
189
+ return @nterm_transitions if @nterm_transitions
190
+
191
+ @nterm_transitions = []
192
+
193
+ shifts.each do |shift|
194
+ next if shift.next_sym.term?
195
+
196
+ @nterm_transitions << [shift, @items_to_state[shift.next_items]]
197
+ end
198
+
199
+ @nterm_transitions
200
+ end
201
+
202
+ # Returns array of [term, next_state]
203
+ def term_transitions
204
+ return @term_transitions if @term_transitions
205
+
206
+ @term_transitions = []
207
+
208
+ shifts.each do |shift|
209
+ next if shift.next_sym.nterm?
210
+
211
+ @term_transitions << [shift, @items_to_state[shift.next_items]]
212
+ end
213
+
214
+ @term_transitions
215
+ end
216
+
217
+ def selected_term_transitions
218
+ term_transitions.select do |shift, next_state|
219
+ !shift.not_selected
220
+ end
221
+ end
222
+
223
+ # Move to next state by sym
224
+ def transition(sym)
225
+ result = nil
226
+
227
+ if sym.term?
228
+ term_transitions.each do |shift, next_state|
229
+ term = shift.next_sym
230
+ result = next_state if term == sym
231
+ end
232
+ else
233
+ nterm_transitions.each do |shift, next_state|
234
+ nterm = shift.next_sym
235
+ result = next_state if nterm == sym
236
+ end
237
+ end
238
+
239
+ raise "Can not transit by #{sym} #{self}" if result.nil?
240
+
241
+ result
242
+ end
243
+
244
+ def find_reduce_by_item!(item)
245
+ reduces.find do |r|
246
+ r.item == item
247
+ end || (raise "reduce is not found. #{item}, #{state}")
248
+ end
249
+
250
+ def default_reduction_rule=(default_reduction_rule)
251
+ @default_reduction_rule = default_reduction_rule
252
+
253
+ reduces.each do |r|
254
+ if r.rule == default_reduction_rule
255
+ r.default_reduction = true
256
+ end
257
+ end
258
+ end
259
+ end
260
+
261
+ class StatesReporter
262
+ def initialize(states)
263
+ @states = states
264
+ end
265
+
266
+ def report(io, states: false, itemsets: false, lookaheads: false, solved: false, verbose: false)
267
+ @states.states.each do |state|
268
+ # Report State
269
+ io << "State #{state.id}\n\n"
270
+
271
+ # Report item
272
+ last_lhs = nil
273
+ list = itemsets ? state.items : state.kernels
274
+ list.sort_by {|i| [i.rule_id, i.position] }.each do |item|
275
+ rule = item.rule
276
+ position = item.position
277
+ if rule.rhs.empty?
278
+ r = "ε •"
279
+ else
280
+ r = rule.rhs.map(&:display_name).insert(position, "•").join(" ")
281
+ end
282
+ if rule.lhs == last_lhs
283
+ l = " " * rule.lhs.id.s_value.length + "|"
284
+ else
285
+ l = rule.lhs.id.s_value + ":"
286
+ end
287
+ la = ""
288
+ if lookaheads && item.end_of_rule?
289
+ reduce = state.find_reduce_by_item!(item)
290
+ look_ahead = reduce.selected_look_ahead
291
+ if !look_ahead.empty?
292
+ la = " [#{look_ahead.map(&:display_name).join(", ")}]"
293
+ end
294
+ end
295
+ last_lhs = rule.lhs
296
+
297
+ io << sprintf("%5i %s %s%s\n", rule.id, l, r, la)
298
+ end
299
+ io << "\n"
300
+
301
+
302
+ # Report shifts
303
+ tmp = state.term_transitions.select do |shift, _|
304
+ !shift.not_selected
305
+ end.map do |shift, next_state|
306
+ [shift.next_sym, next_state.id]
307
+ end
308
+ max_len = tmp.map(&:first).map(&:display_name).map(&:length).max
309
+ tmp.each do |term, state_id|
310
+ io << " #{term.display_name.ljust(max_len)} shift, and go to state #{state_id}\n"
311
+ end
312
+ io << "\n" if !tmp.empty?
313
+
314
+
315
+ # Report error caused by %nonassoc
316
+ nl = false
317
+ tmp = state.resolved_conflicts.select do |resolved|
318
+ resolved.which == :error
319
+ end.map do |error|
320
+ error.symbol.display_name
321
+ end
322
+ max_len = tmp.map(&:length).max
323
+ tmp.each do |name|
324
+ nl = true
325
+ io << " #{name.ljust(max_len)} error (nonassociative)\n"
326
+ end
327
+ io << "\n" if !tmp.empty?
328
+
329
+
330
+ # Report reduces
331
+ nl = false
332
+ max_len = state.non_default_reduces.flat_map(&:look_ahead).compact.map(&:display_name).map(&:length).max || 0
333
+ max_len = [max_len, "$default".length].max if state.default_reduction_rule
334
+ @states.terms.each do |term|
335
+ reduce = state.non_default_reduces.find do |r|
336
+ r.look_ahead.include?(term)
337
+ end
338
+
339
+ next unless reduce
340
+
341
+ rule = reduce.item.rule
342
+ io << " #{term.display_name.ljust(max_len)} reduce using rule #{rule.id} (#{rule.lhs.display_name})\n"
343
+ nl = true
344
+ end
345
+ if r = state.default_reduction_rule
346
+ nl = true
347
+ s = "$default".ljust(max_len)
348
+
349
+ if r.initial_rule?
350
+ io << " #{s} accept\n"
351
+ else
352
+ io << " #{s} reduce using rule #{r.id} (#{r.lhs.display_name})\n"
353
+ end
354
+ end
355
+ io << "\n" if nl
356
+
357
+
358
+ # Report nonterminal transitions
359
+ tmp = []
360
+ max_len = 0
361
+ state.nterm_transitions.each do |shift, next_state|
362
+ nterm = shift.next_sym
363
+ tmp << [nterm, next_state.id]
364
+ max_len = [max_len, nterm.id.s_value.length].max
365
+ end
366
+ tmp.uniq!
367
+ tmp.sort_by! do |nterm, state_id|
368
+ nterm.number
369
+ end
370
+ tmp.each do |nterm, state_id|
371
+ io << " #{nterm.id.s_value.ljust(max_len)} go to state #{state_id}\n"
372
+ end
373
+ io << "\n" if !tmp.empty?
374
+
375
+
376
+ if solved
377
+ # Report conflict resolutions
378
+ state.resolved_conflicts.each do |resolved|
379
+ io << " #{resolved.report_message}\n"
380
+ end
381
+ io << "\n" if !state.resolved_conflicts.empty?
382
+ end
383
+
384
+
385
+ if verbose
386
+ # Report direct_read_sets
387
+ io << " [Direct Read sets]\n"
388
+ direct_read_sets = @states.direct_read_sets
389
+ @states.nterms.each do |nterm|
390
+ terms = direct_read_sets[[state.id, nterm.token_id]]
391
+ next if !terms
392
+ next if terms.empty?
393
+
394
+ str = terms.map {|sym| sym.id.s_value }.join(", ")
395
+ io << " read #{nterm.id.s_value} shift #{str}\n"
396
+ end
397
+ io << "\n"
398
+
399
+
400
+ # Reprot reads_relation
401
+ io << " [Reads Relation]\n"
402
+ @states.nterms.each do |nterm|
403
+ a = @states.reads_relation[[state.id, nterm.token_id]]
404
+ next if !a
405
+
406
+ a.each do |state_id2, nterm_id2|
407
+ n = @states.nterms.find {|n| n.token_id == nterm_id2 }
408
+ io << " (State #{state_id2}, #{n.id.s_value})\n"
409
+ end
410
+ end
411
+ io << "\n"
412
+
413
+
414
+ # Reprot read_sets
415
+ io << " [Read sets]\n"
416
+ read_sets = @states.read_sets
417
+ @states.nterms.each do |nterm|
418
+ terms = read_sets[[state.id, nterm.token_id]]
419
+ next if !terms
420
+ next if terms.empty?
421
+
422
+ terms.each do |sym|
423
+ io << " #{sym.id.s_value}\n"
424
+ end
425
+ end
426
+ io << "\n"
427
+
428
+
429
+ # Reprot includes_relation
430
+ io << " [Includes Relation]\n"
431
+ @states.nterms.each do |nterm|
432
+ a = @states.includes_relation[[state.id, nterm.token_id]]
433
+ next if !a
434
+
435
+ a.each do |state_id2, nterm_id2|
436
+ n = @states.nterms.find {|n| n.token_id == nterm_id2 }
437
+ io << " (State #{state.id}, #{nterm.id.s_value}) -> (State #{state_id2}, #{n.id.s_value})\n"
438
+ end
439
+ end
440
+ io << "\n"
441
+
442
+
443
+ # Report lookback_relation
444
+ io << " [Lookback Relation]\n"
445
+ @states.rules.each do |rule|
446
+ a = @states.lookback_relation[[state.id, rule.id]]
447
+ next if !a
448
+
449
+ a.each do |state_id2, nterm_id2|
450
+ n = @states.nterms.find {|n| n.token_id == nterm_id2 }
451
+ io << " (Rule: #{rule.to_s}) -> (State #{state_id2}, #{n.id.s_value})\n"
452
+ end
453
+ end
454
+ io << "\n"
455
+
456
+
457
+ # Reprot follow_sets
458
+ io << " [Follow sets]\n"
459
+ follow_sets = @states.follow_sets
460
+ @states.nterms.each do |nterm|
461
+ terms = follow_sets[[state.id, nterm.token_id]]
462
+
463
+ next if !terms
464
+
465
+ terms.each do |sym|
466
+ io << " #{nterm.id.s_value} -> #{sym.id.s_value}\n"
467
+ end
468
+ end
469
+ io << "\n"
470
+
471
+
472
+ # Report LA
473
+ io << " [Look-Ahead Sets]\n"
474
+ tmp = []
475
+ max_len = 0
476
+ @states.rules.each do |rule|
477
+ syms = @states.la[[state.id, rule.id]]
478
+ next if !syms
479
+
480
+ tmp << [rule, syms]
481
+ max_len = ([max_len] + syms.map {|s| s.id.s_value.length }).max
482
+ end
483
+ tmp.each do |rule, syms|
484
+ syms.each do |sym|
485
+ io << " #{sym.id.s_value.ljust(max_len)} reduce using rule #{rule.id} (#{rule.lhs.id.s_value})\n"
486
+ end
487
+ end
488
+ io << "\n" if !tmp.empty?
489
+ end
490
+
491
+
492
+ # End of Report State
493
+ io << "\n"
494
+ end
495
+ end
496
+ end
497
+
498
+ # States is passed to a template file
499
+ #
500
+ # "Efficient Computation of LALR(1) Look-Ahead Sets"
501
+ # https://dl.acm.org/doi/pdf/10.1145/69622.357187
502
+ class States
503
+ include Lrama::Report::Duration
504
+
505
+ # TODO: Validate position is not over rule rhs
506
+ Item = Struct.new(:rule, :position, keyword_init: true) do
507
+ # Optimization for States#setup_state
508
+ def hash
509
+ [rule.id, position].hash
510
+ end
511
+
512
+ def rule_id
513
+ rule.id
514
+ end
515
+
516
+ def next_sym
517
+ rule.rhs[position]
518
+ end
519
+
520
+ def end_of_rule?
521
+ rule.rhs.count == position
522
+ end
523
+
524
+ def new_by_next_position
525
+ Item.new(rule: rule, position: position + 1)
526
+ end
527
+
528
+ def previous_sym
529
+ rule.rhs[position - 1]
530
+ end
531
+
532
+ def display_name
533
+ r = rule.rhs.map(&:display_name).insert(position, "•").join(" ")
534
+ "#{r} (rule #{rule.id})"
535
+ end
536
+
537
+ # Right after position
538
+ def display_rest
539
+ r = rule.rhs[position..-1].map(&:display_name).join(" ")
540
+ ". #{r} (rule #{rule.id})"
541
+ end
542
+ end
543
+
544
+ attr_reader :states, :item_to_state,
545
+ :reads_relation, :includes_relation, :lookback_relation
546
+
547
+ def initialize(grammar, trace_state: false)
548
+ @grammar = grammar
549
+ @trace_state = trace_state
550
+
551
+ @states = []
552
+ @item_to_state = {}
553
+
554
+ # `DR(p, A) = {t ∈ T | p -(A)-> r -(t)-> }`
555
+ # where p is state, A is nterm, t is term.
556
+ #
557
+ # `@direct_read_sets` is a hash whose
558
+ # key is [state.id, nterm.token_id],
559
+ # value is bitmap of term.
560
+ @direct_read_sets = {}
561
+
562
+ # Reads relation on nonterminal transitions (pair of state and nterm)
563
+ # `(p, A) reads (r, C) iff p -(A)-> r -(C)-> and C =>* ε`
564
+ # where p, r are state, A, C are nterm.
565
+ #
566
+ # `@reads_relation` is a hash whose
567
+ # key is [state.id, nterm.token_id],
568
+ # value is array of [state.id, nterm.token_id].
569
+ @reads_relation = {}
570
+
571
+ # `read_sets` is a hash whose key is [state.id, nterm.token_id]
572
+ #
573
+ # `@read_sets` is a hash whose
574
+ # key is [state.id, nterm.token_id],
575
+ # value is bitmap of term.
576
+ @read_sets = {}
577
+
578
+ # `(p, A) includes (p', B) iff B -> βAγ, γ =>* ε, p' -(β)-> p`
579
+ # where p, p' are state, A, B are nterm, β, γ is sequence of symbol.
580
+ #
581
+ # `@includes_relation` is a hash whose
582
+ # key is [state.id, nterm.token_id],
583
+ # value is array of [state.id, nterm.token_id].
584
+ @includes_relation = {}
585
+
586
+ # `(q, A -> ω) lookback (p, A) iff p -(ω)-> q`
587
+ # where p, q are state, A -> ω is rule, A is nterm, ω is sequence of symbol.
588
+ #
589
+ # `@lookback_relation` is a hash whose
590
+ # key is [state.id, rule.id],
591
+ # value is array of [state.id, nterm.token_id].
592
+ @lookback_relation = {}
593
+
594
+ # `@follow_sets` is a hash whose
595
+ # key is [state.id, rule.id],
596
+ # value is bitmap of term.
597
+ @follow_sets = {}
598
+
599
+ # `LA(q, A -> ω) = ∪{Follow(p, A) | (q, A -> ω) lookback (p, A)`
600
+ #
601
+ # `@la` is a hash whose
602
+ # key is [state.id, rule.id],
603
+ # value is bitmap of term.
604
+ @la = {}
605
+ end
606
+
607
+ def compute
608
+ # TODO: Move report_grammar to other place
609
+ # report_grammar(@grammar)
610
+
611
+ # Look Ahead Sets
612
+ report_duration(:compute_lr0_states) { compute_lr0_states }
613
+ report_duration(:compute_direct_read_sets) { compute_direct_read_sets }
614
+ report_duration(:compute_reads_relation) { compute_reads_relation }
615
+ report_duration(:compute_read_sets) { compute_read_sets }
616
+ report_duration(:compute_includes_relation) { compute_includes_relation }
617
+ report_duration(:compute_lookback_relation) { compute_lookback_relation }
618
+ report_duration(:compute_follow_sets) { compute_follow_sets }
619
+ report_duration(:compute_look_ahead_sets) { compute_look_ahead_sets }
620
+
621
+ # Conflicts
622
+ report_duration(:compute_conflicts) { compute_conflicts }
623
+
624
+ report_duration(:compute_default_reduction) { compute_default_reduction }
625
+ end
626
+
627
+ def reporter
628
+ StatesReporter.new(self)
629
+ end
630
+
631
+ def states_count
632
+ @states.count
633
+ end
634
+
635
+ def symbols
636
+ @grammar.symbols
637
+ end
638
+
639
+ def terms
640
+ @grammar.terms
641
+ end
642
+
643
+ def nterms
644
+ @grammar.nterms
645
+ end
646
+
647
+ def rules
648
+ @grammar.rules
649
+ end
650
+
651
+ def accept_symbol
652
+ @grammar.accept_symbol
653
+ end
654
+
655
+ def eof_symbol
656
+ @grammar.eof_symbol
657
+ end
658
+
659
+ def find_symbol_by_s_value!(s_value)
660
+ @grammar.find_symbol_by_s_value!(s_value)
661
+ end
662
+
663
+ def direct_read_sets
664
+ h = {}
665
+
666
+ @direct_read_sets.each do |k, v|
667
+ h[k] = bitmap_to_terms(v)
668
+ end
669
+
670
+ return h
671
+ end
672
+
673
+ def read_sets
674
+ h = {}
675
+
676
+ @read_sets.each do |k, v|
677
+ h[k] = bitmap_to_terms(v)
678
+ end
679
+
680
+ return h
681
+ end
682
+
683
+ def follow_sets
684
+ h = {}
685
+
686
+ @follow_sets.each do |k, v|
687
+ h[k] = bitmap_to_terms(v)
688
+ end
689
+
690
+ return h
691
+ end
692
+
693
+ def la
694
+ h = {}
695
+
696
+ @la.each do |k, v|
697
+ h[k] = bitmap_to_terms(v)
698
+ end
699
+
700
+ return h
701
+ end
702
+
703
+ private
704
+
705
+ def report_grammar(grammar)
706
+ str = "Grammar\n\n"
707
+ last_lhs = nil
708
+
709
+ grammar.rules.each do |rule|
710
+ if rule.rhs.empty?
711
+ r = "ε"
712
+ else
713
+ r = rule.rhs.map(&:display_name).join(" ")
714
+ end
715
+
716
+ if rule.lhs == last_lhs
717
+ str << sprintf("%5d %s| %s\n", rule.id, " " * rule.lhs.display_name.length, r)
718
+ else
719
+ str << "\n"
720
+ str << sprintf("%5d %s: %s\n", rule.id, rule.lhs.display_name, r)
721
+ end
722
+
723
+ last_lhs = rule.lhs
724
+ end
725
+
726
+ puts str
727
+ end
728
+
729
+ def trace_state(msg)
730
+ puts msg if @trace_state
731
+ end
732
+
733
+ def create_state(accessing_symbol, kernels, states_creted)
734
+ # A item can appear in some states,
735
+ # so need to use `kernels` (not `kernels.first`) as a key.
736
+ #
737
+ # For example...
738
+ #
739
+ # %%
740
+ # program: '+' strings_1
741
+ # | '-' strings_2
742
+ # ;
743
+ #
744
+ # strings_1: string_1
745
+ # ;
746
+ #
747
+ # strings_2: string_1
748
+ # | string_2
749
+ # ;
750
+ #
751
+ # string_1: string
752
+ # ;
753
+ #
754
+ # string_2: string '+'
755
+ # ;
756
+ #
757
+ # string: tSTRING
758
+ # ;
759
+ # %%
760
+ #
761
+ # For these grammar, there are 2 states
762
+ #
763
+ # State A
764
+ # string_1: string •
765
+ #
766
+ # State B
767
+ # string_1: string •
768
+ # string_2: string • '+'
769
+ #
770
+ return [states_creted[kernels], false] if states_creted[kernels]
771
+
772
+ state = State.new(@states.count, accessing_symbol, kernels)
773
+ @states << state
774
+ states_creted[kernels] = state
775
+
776
+ return [state, true]
777
+ end
778
+
779
+ def setup_state(state)
780
+ # closure
781
+ closure = []
782
+ visited = {}
783
+ queued = {}
784
+ items = state.kernels.dup
785
+
786
+ items.each do |item|
787
+ queued[item] = true
788
+ end
789
+
790
+ while (item = items.shift) do
791
+ visited[item] = true
792
+
793
+ if (sym = item.next_sym) && sym.nterm?
794
+ @grammar.find_rules_by_symbol!(sym).each do |rule|
795
+ i = Item.new(rule: rule, position: 0)
796
+ next if queued[i]
797
+ closure << i
798
+ items << i
799
+ queued[i] = true
800
+ end
801
+ end
802
+ end
803
+
804
+ state.closure = closure.sort_by {|i| i.rule.id }
805
+
806
+ # Trace
807
+ trace_state("Closure: input\n")
808
+ state.kernels.each do |item|
809
+ trace_state(" #{item.display_rest}\n")
810
+ end
811
+ trace_state("\n\n")
812
+ trace_state("Closure: output\n")
813
+ state.items.each do |item|
814
+ trace_state(" #{item.display_rest}\n")
815
+ end
816
+ trace_state("\n\n")
817
+
818
+ # shift & reduce
819
+ state.compute_shifts_reduces
820
+
821
+ state.kernels.each do |item|
822
+ @item_to_state[item] = state
823
+ end
824
+ end
825
+
826
+ def enqueue_state(states, state)
827
+ # Trace
828
+ previous = state.kernels.first.previous_sym
829
+ trace_state(
830
+ sprintf("state_list_append (state = %d, symbol = %d (%s))",
831
+ @states.count, previous.number, previous.display_name)
832
+ )
833
+
834
+ states << state
835
+ end
836
+
837
+ def compute_lr0_states
838
+ # State queue
839
+ states = []
840
+ states_creted = {}
841
+
842
+ state, _ = create_state(symbols.first, [Item.new(rule: @grammar.rules.first, position: 0)], states_creted)
843
+ enqueue_state(states, state)
844
+
845
+ while (state = states.shift) do
846
+ # Trace
847
+ #
848
+ # Bison 3.8.2 renders "(reached by "end-of-input")" for State 0 but
849
+ # I think it is not correct...
850
+ previous = state.kernels.first.previous_sym
851
+ trace_state("Processing state #{state.id} (reached by #{previous.display_name})\n")
852
+
853
+ setup_state(state)
854
+
855
+ # It seems Bison 3.8.2 iterates transitions order by symbol number
856
+ state.shifts.sort_by do |shift|
857
+ shift.next_sym.number
858
+ end.each do |shift|
859
+ new_state, created = create_state(shift.next_sym, shift.next_items, states_creted)
860
+ state.set_items_to_state(shift.next_items, new_state)
861
+ enqueue_state(states, new_state) if created
862
+ end
863
+ end
864
+ end
865
+
866
+ def nterm_transitions
867
+ a = []
868
+
869
+ @states.each do |state|
870
+ state.nterm_transitions.each do |shift, next_state|
871
+ nterm = shift.next_sym
872
+ a << [state, nterm, next_state]
873
+ end
874
+ end
875
+
876
+ a
877
+ end
878
+
879
+ def compute_direct_read_sets
880
+ @states.each do |state|
881
+ state.nterm_transitions.each do |shift, next_state|
882
+ nterm = shift.next_sym
883
+ bit = 0
884
+ a = []
885
+
886
+ next_state.term_transitions.each do |shift, _|
887
+ sym = shift.next_sym
888
+ # Encode terms into bitmap
889
+ bit |= (1 << sym.number)
890
+ a << sym
891
+ end
892
+
893
+ key = [state.id, nterm.token_id]
894
+ @direct_read_sets[key] = bit
895
+ end
896
+ end
897
+ end
898
+
899
+ def compute_reads_relation
900
+ @states.each do |state|
901
+ state.nterm_transitions.each do |shift, next_state|
902
+ nterm = shift.next_sym
903
+ next_state.nterm_transitions.each do |shift2, _next_state2|
904
+ nterm2 = shift2.next_sym
905
+ if nterm2.nullable
906
+ key = [state.id, nterm.token_id]
907
+ @reads_relation[key] ||= []
908
+ @reads_relation[key] << [next_state.id, nterm2.token_id]
909
+ end
910
+ end
911
+ end
912
+ end
913
+ end
914
+
915
+ def compute_read_sets
916
+ sets = nterm_transitions.map do |state, nterm, next_state|
917
+ [state.id, nterm.token_id]
918
+ end
919
+
920
+ @read_sets = Digraph.new(sets, @reads_relation, @direct_read_sets).compute
921
+ end
922
+
923
+ # Execute transition of state by symbols
924
+ # then return final state.
925
+ def transition(state, symbols)
926
+ symbols.each do |sym|
927
+ state = state.transition(sym)
928
+ end
929
+
930
+ state
931
+ end
932
+
933
+ def compute_includes_relation
934
+ @states.each do |state|
935
+ state.nterm_transitions.each do |shift, next_state|
936
+ nterm = shift.next_sym
937
+ @grammar.find_rules_by_symbol!(nterm).each do |rule|
938
+ i = rule.rhs.count - 1
939
+
940
+ while (i > -1) do
941
+ sym = rule.rhs[i]
942
+
943
+ break if sym.term?
944
+ beta = []
945
+ state2 = transition(state, rule.rhs[0...i])
946
+ # p' = state, B = nterm, p = state2, A = sym
947
+ key = [state2.id, sym.token_id]
948
+ # TODO: need to omit if state == state2 ?
949
+ @includes_relation[key] ||= []
950
+ @includes_relation[key] << [state.id, nterm.token_id]
951
+ break if !sym.nullable
952
+ i -= 1
953
+ end
954
+ end
955
+ end
956
+ end
957
+ end
958
+
959
+ def compute_lookback_relation
960
+ @states.each do |state|
961
+ state.nterm_transitions.each do |shift, next_state|
962
+ nterm = shift.next_sym
963
+ @grammar.find_rules_by_symbol!(nterm).each do |rule|
964
+ state2 = transition(state, rule.rhs)
965
+ # p = state, A = nterm, q = state2, A -> ω = rule
966
+ key = [state2.id, rule.id]
967
+ @lookback_relation[key] ||= []
968
+ @lookback_relation[key] << [state.id, nterm.token_id]
969
+ end
970
+ end
971
+ end
972
+ end
973
+
974
+ def compute_follow_sets
975
+ sets = nterm_transitions.map do |state, nterm, next_state|
976
+ [state.id, nterm.token_id]
977
+ end
978
+
979
+ @follow_sets = Digraph.new(sets, @includes_relation, @read_sets).compute
980
+ end
981
+
982
+ def compute_look_ahead_sets
983
+ @states.each do |state|
984
+ rules.each do |rule|
985
+ ary = @lookback_relation[[state.id, rule.id]]
986
+ next if !ary
987
+
988
+ ary.each do |state2_id, nterm_token_id|
989
+ # q = state, A -> ω = rule, p = state2, A = nterm
990
+ follows = @follow_sets[[state2_id, nterm_token_id]]
991
+
992
+ next if follows == 0
993
+
994
+ key = [state.id, rule.id]
995
+ @la[key] ||= 0
996
+ look_ahead = @la[key] | follows
997
+ @la[key] |= look_ahead
998
+
999
+ # No risk of conflict when
1000
+ # * the state only has single reduce
1001
+ # * the state only has term_transitions (GOTO)
1002
+ next if state.reduces.count == 1 && state.term_transitions.count == 0
1003
+
1004
+ state.set_look_ahead(rule, bitmap_to_terms(look_ahead))
1005
+ end
1006
+ end
1007
+ end
1008
+ end
1009
+
1010
+ def bitmap_to_terms(bit)
1011
+ a = []
1012
+ i = 0
1013
+
1014
+ while bit > 0 do
1015
+ if bit & 1 == 1
1016
+ a << @grammar.find_symbol_by_number!(i)
1017
+ end
1018
+
1019
+ i += 1
1020
+ bit >>= 1
1021
+ end
1022
+
1023
+ return a
1024
+ end
1025
+
1026
+ def compute_conflicts
1027
+ compute_shift_reduce_conflicts
1028
+ compute_reduece_reduce_conflicts
1029
+ end
1030
+
1031
+ def compute_shift_reduce_conflicts
1032
+ states.each do |state|
1033
+ state.shifts.each do |shift|
1034
+ state.reduces.each do |reduce|
1035
+ sym = shift.next_sym
1036
+
1037
+ next unless reduce.look_ahead
1038
+ next if !reduce.look_ahead.include?(sym)
1039
+
1040
+ # Shift/Reduce conflict
1041
+ shift_prec = sym.precedence
1042
+ reduce_prec = reduce.item.rule.precedence
1043
+
1044
+ # Can resolve only when both have prec
1045
+ unless shift_prec && reduce_prec
1046
+ state.conflicts << [sym, reduce, :no_precedence]
1047
+ next
1048
+ end
1049
+
1050
+ case
1051
+ when shift_prec < reduce_prec
1052
+ # Reduce is selected
1053
+ state.resolved_conflicts << State::ResolvedConflict.new(symbol: sym, reduce: reduce, which: :reduce)
1054
+ shift.not_selected = true
1055
+ next
1056
+ when shift_prec > reduce_prec
1057
+ # Shift is selected
1058
+ state.resolved_conflicts << State::ResolvedConflict.new(symbol: sym, reduce: reduce, which: :shift)
1059
+ reduce.add_not_selected_symbol(sym)
1060
+ next
1061
+ end
1062
+
1063
+ # shift_prec == reduce_prec, then check associativity
1064
+ case sym.precedence.type
1065
+ when :right
1066
+ # Shift is selected
1067
+ state.resolved_conflicts << State::ResolvedConflict.new(symbol: sym, reduce: reduce, which: :shift, same_prec: true)
1068
+ reduce.add_not_selected_symbol(sym)
1069
+ next
1070
+ when :left
1071
+ # Reduce is selected
1072
+ state.resolved_conflicts << State::ResolvedConflict.new(symbol: sym, reduce: reduce, which: :reduce, same_prec: true)
1073
+ shift.not_selected = true
1074
+ next
1075
+ when :nonassoc
1076
+ # Can not resolve
1077
+ #
1078
+ # nonassoc creates "run-time" error, precedence creates "compile-time" error.
1079
+ # Then omit both the shift and reduce.
1080
+ #
1081
+ # https://www.gnu.org/software/bison/manual/html_node/Using-Precedence.html
1082
+ state.resolved_conflicts << State::ResolvedConflict.new(symbol: sym, reduce: reduce, which: :error)
1083
+ shift.not_selected = true
1084
+ reduce.add_not_selected_symbol(sym)
1085
+ else
1086
+ raise "Unknown precedence type. #{sym}"
1087
+ end
1088
+ end
1089
+ end
1090
+ end
1091
+ end
1092
+
1093
+ def compute_reduece_reduce_conflicts
1094
+ states.each do |state|
1095
+ a = []
1096
+
1097
+ state.reduces.each do |reduce|
1098
+ next if reduce.look_ahead.nil?
1099
+
1100
+ intersection = a.intersection(reduce.look_ahead)
1101
+ a += reduce.look_ahead
1102
+
1103
+ if !intersection.empty?
1104
+ state.conflicts << [intersection.dup, reduce, :reduce_reduce]
1105
+ end
1106
+ end
1107
+ end
1108
+ end
1109
+
1110
+ def compute_default_reduction
1111
+ states.each do |state|
1112
+ next if state.reduces.empty?
1113
+ # Do not set, if shift with `error` exists.
1114
+ next if state.shifts.map(&:next_sym).include?(@grammar.error_symbol)
1115
+
1116
+ state.default_reduction_rule = state.reduces.map do |r|
1117
+ [r.rule, r.rule.id, (r.look_ahead || []).count]
1118
+ end.sort_by do |rule, rule_id, count|
1119
+ [-count, rule_id]
1120
+ end.first.first
1121
+ end
1122
+ end
1123
+ end
1124
+ end