lrama 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/lrama/states.rb CHANGED
@@ -1,58 +1,7 @@
1
+ require "forwardable"
1
2
  require "lrama/report"
2
3
 
3
4
  module Lrama
4
- # Algorithm Digraph of https://dl.acm.org/doi/pdf/10.1145/69622.357187 (P. 625)
5
- class Digraph
6
- def initialize(sets, relation, base_function)
7
- # X in the paper
8
- @sets = sets
9
- # R in the paper
10
- @relation = relation
11
- # F' in the paper
12
- @base_function = base_function
13
- # S in the paper
14
- @stack = []
15
- # N in the paper
16
- @h = Hash.new(0)
17
- # F in the paper
18
- @result = {}
19
- end
20
-
21
- def compute
22
- @sets.each do |x|
23
- next if @h[x] != 0
24
- traverse(x)
25
- end
26
-
27
- return @result
28
- end
29
-
30
- private
31
-
32
- def traverse(x)
33
- @stack.push(x)
34
- d = @stack.count
35
- @h[x] = d
36
- @result[x] = @base_function[x] # F x = F' x
37
-
38
- @relation[x] && @relation[x].each do |y|
39
- traverse(y) if @h[y] == 0
40
- @h[x] = [@h[x], @h[y]].min
41
- @result[x] |= @result[y] # F x = F x + F y
42
- end
43
-
44
- if @h[x] == d
45
- while true do
46
- z = @stack.pop
47
- @h[z] = Float::INFINITY
48
- @result[z] = @result[x] # F (Top of S) = F x
49
-
50
- break if z == x
51
- end
52
- end
53
- end
54
- end
55
-
56
5
  class State
57
6
  class Reduce
58
7
  # https://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html
@@ -122,14 +71,17 @@ module Lrama
122
71
  end
123
72
  end
124
73
 
74
+ Conflict = Struct.new(:symbols, :reduce, :type, keyword_init: true)
75
+
125
76
  attr_reader :id, :accessing_symbol, :kernels, :conflicts, :resolved_conflicts,
126
- :default_reduction_rule
127
- attr_accessor :closure, :shifts, :reduces
77
+ :default_reduction_rule, :closure, :items
78
+ attr_accessor :shifts, :reduces
128
79
 
129
80
  def initialize(id, accessing_symbol, kernels)
130
81
  @id = id
131
82
  @accessing_symbol = accessing_symbol
132
83
  @kernels = kernels.freeze
84
+ @items = @kernels
133
85
  # Manage relationships between items to state
134
86
  # to resolve next state
135
87
  @items_to_state = {}
@@ -138,8 +90,9 @@ module Lrama
138
90
  @default_reduction_rule = nil
139
91
  end
140
92
 
141
- def items
142
- @kernels + @closure
93
+ def closure=(closure)
94
+ @closure = closure
95
+ @items = @kernels + @closure
143
96
  end
144
97
 
145
98
  def non_default_reduces
@@ -162,6 +115,7 @@ module Lrama
162
115
  end
163
116
  end
164
117
 
118
+ # It seems Bison 3.8.2 iterates transitions order by symbol number
165
119
  shifts = _shifts.sort_by do |next_sym, new_items|
166
120
  next_sym.number
167
121
  end.map do |next_sym, new_items|
@@ -256,241 +210,16 @@ module Lrama
256
210
  end
257
211
  end
258
212
  end
259
- end
260
213
 
261
- class StatesReporter
262
- def initialize(states)
263
- @states = states
214
+ def sr_conflicts
215
+ @conflicts.select do |conflict|
216
+ conflict.type == :shift_reduce
217
+ end
264
218
  end
265
219
 
266
- def report(io, states: false, itemsets: false, lookaheads: false, solved: false, verbose: false)
267
- @states.states.each do |state|
268
- # Report State
269
- io << "State #{state.id}\n\n"
270
-
271
- # Report item
272
- last_lhs = nil
273
- list = itemsets ? state.items : state.kernels
274
- list.sort_by {|i| [i.rule_id, i.position] }.each do |item|
275
- rule = item.rule
276
- position = item.position
277
- if rule.rhs.empty?
278
- r = "ε •"
279
- else
280
- r = rule.rhs.map(&:display_name).insert(position, "•").join(" ")
281
- end
282
- if rule.lhs == last_lhs
283
- l = " " * rule.lhs.id.s_value.length + "|"
284
- else
285
- l = rule.lhs.id.s_value + ":"
286
- end
287
- la = ""
288
- if lookaheads && item.end_of_rule?
289
- reduce = state.find_reduce_by_item!(item)
290
- look_ahead = reduce.selected_look_ahead
291
- if !look_ahead.empty?
292
- la = " [#{look_ahead.map(&:display_name).join(", ")}]"
293
- end
294
- end
295
- last_lhs = rule.lhs
296
-
297
- io << sprintf("%5i %s %s%s\n", rule.id, l, r, la)
298
- end
299
- io << "\n"
300
-
301
-
302
- # Report shifts
303
- tmp = state.term_transitions.select do |shift, _|
304
- !shift.not_selected
305
- end.map do |shift, next_state|
306
- [shift.next_sym, next_state.id]
307
- end
308
- max_len = tmp.map(&:first).map(&:display_name).map(&:length).max
309
- tmp.each do |term, state_id|
310
- io << " #{term.display_name.ljust(max_len)} shift, and go to state #{state_id}\n"
311
- end
312
- io << "\n" if !tmp.empty?
313
-
314
-
315
- # Report error caused by %nonassoc
316
- nl = false
317
- tmp = state.resolved_conflicts.select do |resolved|
318
- resolved.which == :error
319
- end.map do |error|
320
- error.symbol.display_name
321
- end
322
- max_len = tmp.map(&:length).max
323
- tmp.each do |name|
324
- nl = true
325
- io << " #{name.ljust(max_len)} error (nonassociative)\n"
326
- end
327
- io << "\n" if !tmp.empty?
328
-
329
-
330
- # Report reduces
331
- nl = false
332
- max_len = state.non_default_reduces.flat_map(&:look_ahead).compact.map(&:display_name).map(&:length).max || 0
333
- max_len = [max_len, "$default".length].max if state.default_reduction_rule
334
- @states.terms.each do |term|
335
- reduce = state.non_default_reduces.find do |r|
336
- r.look_ahead.include?(term)
337
- end
338
-
339
- next unless reduce
340
-
341
- rule = reduce.item.rule
342
- io << " #{term.display_name.ljust(max_len)} reduce using rule #{rule.id} (#{rule.lhs.display_name})\n"
343
- nl = true
344
- end
345
- if r = state.default_reduction_rule
346
- nl = true
347
- s = "$default".ljust(max_len)
348
-
349
- if r.initial_rule?
350
- io << " #{s} accept\n"
351
- else
352
- io << " #{s} reduce using rule #{r.id} (#{r.lhs.display_name})\n"
353
- end
354
- end
355
- io << "\n" if nl
356
-
357
-
358
- # Report nonterminal transitions
359
- tmp = []
360
- max_len = 0
361
- state.nterm_transitions.each do |shift, next_state|
362
- nterm = shift.next_sym
363
- tmp << [nterm, next_state.id]
364
- max_len = [max_len, nterm.id.s_value.length].max
365
- end
366
- tmp.uniq!
367
- tmp.sort_by! do |nterm, state_id|
368
- nterm.number
369
- end
370
- tmp.each do |nterm, state_id|
371
- io << " #{nterm.id.s_value.ljust(max_len)} go to state #{state_id}\n"
372
- end
373
- io << "\n" if !tmp.empty?
374
-
375
-
376
- if solved
377
- # Report conflict resolutions
378
- state.resolved_conflicts.each do |resolved|
379
- io << " #{resolved.report_message}\n"
380
- end
381
- io << "\n" if !state.resolved_conflicts.empty?
382
- end
383
-
384
-
385
- if verbose
386
- # Report direct_read_sets
387
- io << " [Direct Read sets]\n"
388
- direct_read_sets = @states.direct_read_sets
389
- @states.nterms.each do |nterm|
390
- terms = direct_read_sets[[state.id, nterm.token_id]]
391
- next if !terms
392
- next if terms.empty?
393
-
394
- str = terms.map {|sym| sym.id.s_value }.join(", ")
395
- io << " read #{nterm.id.s_value} shift #{str}\n"
396
- end
397
- io << "\n"
398
-
399
-
400
- # Reprot reads_relation
401
- io << " [Reads Relation]\n"
402
- @states.nterms.each do |nterm|
403
- a = @states.reads_relation[[state.id, nterm.token_id]]
404
- next if !a
405
-
406
- a.each do |state_id2, nterm_id2|
407
- n = @states.nterms.find {|n| n.token_id == nterm_id2 }
408
- io << " (State #{state_id2}, #{n.id.s_value})\n"
409
- end
410
- end
411
- io << "\n"
412
-
413
-
414
- # Reprot read_sets
415
- io << " [Read sets]\n"
416
- read_sets = @states.read_sets
417
- @states.nterms.each do |nterm|
418
- terms = read_sets[[state.id, nterm.token_id]]
419
- next if !terms
420
- next if terms.empty?
421
-
422
- terms.each do |sym|
423
- io << " #{sym.id.s_value}\n"
424
- end
425
- end
426
- io << "\n"
427
-
428
-
429
- # Reprot includes_relation
430
- io << " [Includes Relation]\n"
431
- @states.nterms.each do |nterm|
432
- a = @states.includes_relation[[state.id, nterm.token_id]]
433
- next if !a
434
-
435
- a.each do |state_id2, nterm_id2|
436
- n = @states.nterms.find {|n| n.token_id == nterm_id2 }
437
- io << " (State #{state.id}, #{nterm.id.s_value}) -> (State #{state_id2}, #{n.id.s_value})\n"
438
- end
439
- end
440
- io << "\n"
441
-
442
-
443
- # Report lookback_relation
444
- io << " [Lookback Relation]\n"
445
- @states.rules.each do |rule|
446
- a = @states.lookback_relation[[state.id, rule.id]]
447
- next if !a
448
-
449
- a.each do |state_id2, nterm_id2|
450
- n = @states.nterms.find {|n| n.token_id == nterm_id2 }
451
- io << " (Rule: #{rule.to_s}) -> (State #{state_id2}, #{n.id.s_value})\n"
452
- end
453
- end
454
- io << "\n"
455
-
456
-
457
- # Reprot follow_sets
458
- io << " [Follow sets]\n"
459
- follow_sets = @states.follow_sets
460
- @states.nterms.each do |nterm|
461
- terms = follow_sets[[state.id, nterm.token_id]]
462
-
463
- next if !terms
464
-
465
- terms.each do |sym|
466
- io << " #{nterm.id.s_value} -> #{sym.id.s_value}\n"
467
- end
468
- end
469
- io << "\n"
470
-
471
-
472
- # Report LA
473
- io << " [Look-Ahead Sets]\n"
474
- tmp = []
475
- max_len = 0
476
- @states.rules.each do |rule|
477
- syms = @states.la[[state.id, rule.id]]
478
- next if !syms
479
-
480
- tmp << [rule, syms]
481
- max_len = ([max_len] + syms.map {|s| s.id.s_value.length }).max
482
- end
483
- tmp.each do |rule, syms|
484
- syms.each do |sym|
485
- io << " #{sym.id.s_value.ljust(max_len)} reduce using rule #{rule.id} (#{rule.lhs.id.s_value})\n"
486
- end
487
- end
488
- io << "\n" if !tmp.empty?
489
- end
490
-
491
-
492
- # End of Report State
493
- io << "\n"
220
+ def rr_conflicts
221
+ @conflicts.select do |conflict|
222
+ conflict.type == :reduce_reduce
494
223
  end
495
224
  end
496
225
  end
@@ -500,8 +229,12 @@ module Lrama
500
229
  # "Efficient Computation of LALR(1) Look-Ahead Sets"
501
230
  # https://dl.acm.org/doi/pdf/10.1145/69622.357187
502
231
  class States
232
+ extend Forwardable
503
233
  include Lrama::Report::Duration
504
234
 
235
+ def_delegators "@grammar", :symbols, :terms, :nterms, :rules,
236
+ :accept_symbol, :eof_symbol, :find_symbol_by_s_value!
237
+
505
238
  # TODO: Validate position is not over rule rhs
506
239
  Item = Struct.new(:rule, :position, keyword_init: true) do
507
240
  # Optimization for States#setup_state
@@ -541,15 +274,14 @@ module Lrama
541
274
  end
542
275
  end
543
276
 
544
- attr_reader :states, :item_to_state,
545
- :reads_relation, :includes_relation, :lookback_relation
277
+ attr_reader :states, :reads_relation, :includes_relation, :lookback_relation
546
278
 
547
- def initialize(grammar, trace_state: false)
279
+ def initialize(grammar, warning, trace_state: false)
548
280
  @grammar = grammar
281
+ @warning = warning
549
282
  @trace_state = trace_state
550
283
 
551
284
  @states = []
552
- @item_to_state = {}
553
285
 
554
286
  # `DR(p, A) = {t ∈ T | p -(A)-> r -(t)-> }`
555
287
  # where p is state, A is nterm, t is term.
@@ -568,8 +300,6 @@ module Lrama
568
300
  # value is array of [state.id, nterm.token_id].
569
301
  @reads_relation = {}
570
302
 
571
- # `read_sets` is a hash whose key is [state.id, nterm.token_id]
572
- #
573
303
  # `@read_sets` is a hash whose
574
304
  # key is [state.id, nterm.token_id],
575
305
  # value is bitmap of term.
@@ -605,9 +335,6 @@ module Lrama
605
335
  end
606
336
 
607
337
  def compute
608
- # TODO: Move report_grammar to other place
609
- # report_grammar(@grammar)
610
-
611
338
  # Look Ahead Sets
612
339
  report_duration(:compute_lr0_states) { compute_lr0_states }
613
340
  report_duration(:compute_direct_read_sets) { compute_direct_read_sets }
@@ -622,6 +349,8 @@ module Lrama
622
349
  report_duration(:compute_conflicts) { compute_conflicts }
623
350
 
624
351
  report_duration(:compute_default_reduction) { compute_default_reduction }
352
+
353
+ check_conflicts
625
354
  end
626
355
 
627
356
  def reporter
@@ -632,34 +361,6 @@ module Lrama
632
361
  @states.count
633
362
  end
634
363
 
635
- def symbols
636
- @grammar.symbols
637
- end
638
-
639
- def terms
640
- @grammar.terms
641
- end
642
-
643
- def nterms
644
- @grammar.nterms
645
- end
646
-
647
- def rules
648
- @grammar.rules
649
- end
650
-
651
- def accept_symbol
652
- @grammar.accept_symbol
653
- end
654
-
655
- def eof_symbol
656
- @grammar.eof_symbol
657
- end
658
-
659
- def find_symbol_by_s_value!(s_value)
660
- @grammar.find_symbol_by_s_value!(s_value)
661
- end
662
-
663
364
  def direct_read_sets
664
365
  h = {}
665
366
 
@@ -702,32 +403,28 @@ module Lrama
702
403
 
703
404
  private
704
405
 
705
- def report_grammar(grammar)
706
- str = "Grammar\n\n"
707
- last_lhs = nil
406
+ def sr_conflicts
407
+ @states.flat_map(&:sr_conflicts)
408
+ end
708
409
 
709
- grammar.rules.each do |rule|
710
- if rule.rhs.empty?
711
- r = "ε"
712
- else
713
- r = rule.rhs.map(&:display_name).join(" ")
714
- end
410
+ def rr_conflicts
411
+ @states.flat_map(&:rr_conflicts)
412
+ end
715
413
 
716
- if rule.lhs == last_lhs
717
- str << sprintf("%5d %s| %s\n", rule.id, " " * rule.lhs.display_name.length, r)
718
- else
719
- str << "\n"
720
- str << sprintf("%5d %s: %s\n", rule.id, rule.lhs.display_name, r)
721
- end
414
+ def initial_attrs
415
+ h = {}
722
416
 
723
- last_lhs = rule.lhs
417
+ attrs.each do |attr|
418
+ h[attr.id] = false
724
419
  end
725
420
 
726
- puts str
421
+ h
727
422
  end
728
423
 
729
- def trace_state(msg)
730
- puts msg if @trace_state
424
+ def trace_state
425
+ if @trace_state
426
+ yield STDERR
427
+ end
731
428
  end
732
429
 
733
430
  def create_state(accessing_symbol, kernels, states_creted)
@@ -804,32 +501,30 @@ module Lrama
804
501
  state.closure = closure.sort_by {|i| i.rule.id }
805
502
 
806
503
  # Trace
807
- trace_state("Closure: input\n")
808
- state.kernels.each do |item|
809
- trace_state(" #{item.display_rest}\n")
810
- end
811
- trace_state("\n\n")
812
- trace_state("Closure: output\n")
813
- state.items.each do |item|
814
- trace_state(" #{item.display_rest}\n")
504
+ trace_state do |out|
505
+ out << "Closure: input\n"
506
+ state.kernels.each do |item|
507
+ out << " #{item.display_rest}\n"
508
+ end
509
+ out << "\n\n"
510
+ out << "Closure: output\n"
511
+ state.items.each do |item|
512
+ out << " #{item.display_rest}\n"
513
+ end
514
+ out << "\n\n"
815
515
  end
816
- trace_state("\n\n")
817
516
 
818
517
  # shift & reduce
819
518
  state.compute_shifts_reduces
820
-
821
- state.kernels.each do |item|
822
- @item_to_state[item] = state
823
- end
824
519
  end
825
520
 
826
521
  def enqueue_state(states, state)
827
522
  # Trace
828
523
  previous = state.kernels.first.previous_sym
829
- trace_state(
830
- sprintf("state_list_append (state = %d, symbol = %d (%s))",
524
+ trace_state do |out|
525
+ out << sprintf("state_list_append (state = %d, symbol = %d (%s))",
831
526
  @states.count, previous.number, previous.display_name)
832
- )
527
+ end
833
528
 
834
529
  states << state
835
530
  end
@@ -848,14 +543,13 @@ module Lrama
848
543
  # Bison 3.8.2 renders "(reached by "end-of-input")" for State 0 but
849
544
  # I think it is not correct...
850
545
  previous = state.kernels.first.previous_sym
851
- trace_state("Processing state #{state.id} (reached by #{previous.display_name})\n")
546
+ trace_state do |out|
547
+ out << "Processing state #{state.id} (reached by #{previous.display_name})\n"
548
+ end
852
549
 
853
550
  setup_state(state)
854
551
 
855
- # It seems Bison 3.8.2 iterates transitions order by symbol number
856
- state.shifts.sort_by do |shift|
857
- shift.next_sym.number
858
- end.each do |shift|
552
+ state.shifts.each do |shift|
859
553
  new_state, created = create_state(shift.next_sym, shift.next_items, states_creted)
860
554
  state.set_items_to_state(shift.next_items, new_state)
861
555
  enqueue_state(states, new_state) if created
@@ -880,18 +574,13 @@ module Lrama
880
574
  @states.each do |state|
881
575
  state.nterm_transitions.each do |shift, next_state|
882
576
  nterm = shift.next_sym
883
- bit = 0
884
- a = []
885
577
 
886
- next_state.term_transitions.each do |shift, _|
887
- sym = shift.next_sym
888
- # Encode terms into bitmap
889
- bit |= (1 << sym.number)
890
- a << sym
578
+ ary = next_state.term_transitions.map do |shift, _|
579
+ shift.next_sym.number
891
580
  end
892
581
 
893
582
  key = [state.id, nterm.token_id]
894
- @direct_read_sets[key] = bit
583
+ @direct_read_sets[key] = Bitmap.from_array(ary)
895
584
  end
896
585
  end
897
586
  end
@@ -941,7 +630,6 @@ module Lrama
941
630
  sym = rule.rhs[i]
942
631
 
943
632
  break if sym.term?
944
- beta = []
945
633
  state2 = transition(state, rule.rhs[0...i])
946
634
  # p' = state, B = nterm, p = state2, A = sym
947
635
  key = [state2.id, sym.token_id]
@@ -998,7 +686,7 @@ module Lrama
998
686
 
999
687
  # No risk of conflict when
1000
688
  # * the state only has single reduce
1001
- # * the state only has term_transitions (GOTO)
689
+ # * the state only has nterm_transitions (GOTO)
1002
690
  next if state.reduces.count == 1 && state.term_transitions.count == 0
1003
691
 
1004
692
  state.set_look_ahead(rule, bitmap_to_terms(look_ahead))
@@ -1008,24 +696,15 @@ module Lrama
1008
696
  end
1009
697
 
1010
698
  def bitmap_to_terms(bit)
1011
- a = []
1012
- i = 0
1013
-
1014
- while bit > 0 do
1015
- if bit & 1 == 1
1016
- a << @grammar.find_symbol_by_number!(i)
1017
- end
1018
-
1019
- i += 1
1020
- bit >>= 1
699
+ ary = Bitmap.to_array(bit)
700
+ ary.map do |i|
701
+ @grammar.find_symbol_by_number!(i)
1021
702
  end
1022
-
1023
- return a
1024
703
  end
1025
704
 
1026
705
  def compute_conflicts
1027
706
  compute_shift_reduce_conflicts
1028
- compute_reduece_reduce_conflicts
707
+ compute_reduce_reduce_conflicts
1029
708
  end
1030
709
 
1031
710
  def compute_shift_reduce_conflicts
@@ -1043,7 +722,7 @@ module Lrama
1043
722
 
1044
723
  # Can resolve only when both have prec
1045
724
  unless shift_prec && reduce_prec
1046
- state.conflicts << [sym, reduce, :no_precedence]
725
+ state.conflicts << State::Conflict.new(symbols: [sym], reduce: reduce, type: :shift_reduce)
1047
726
  next
1048
727
  end
1049
728
 
@@ -1090,7 +769,7 @@ module Lrama
1090
769
  end
1091
770
  end
1092
771
 
1093
- def compute_reduece_reduce_conflicts
772
+ def compute_reduce_reduce_conflicts
1094
773
  states.each do |state|
1095
774
  a = []
1096
775
 
@@ -1101,7 +780,7 @@ module Lrama
1101
780
  a += reduce.look_ahead
1102
781
 
1103
782
  if !intersection.empty?
1104
- state.conflicts << [intersection.dup, reduce, :reduce_reduce]
783
+ state.conflicts << State::Conflict.new(symbols: intersection.dup, reduce: reduce, type: :reduce_reduce)
1105
784
  end
1106
785
  end
1107
786
  end
@@ -1110,6 +789,8 @@ module Lrama
1110
789
  def compute_default_reduction
1111
790
  states.each do |state|
1112
791
  next if state.reduces.empty?
792
+ # Do not set, if conflict exist
793
+ next if !state.conflicts.empty?
1113
794
  # Do not set, if shift with `error` exists.
1114
795
  next if state.shifts.map(&:next_sym).include?(@grammar.error_symbol)
1115
796
 
@@ -1120,5 +801,32 @@ module Lrama
1120
801
  end.first.first
1121
802
  end
1122
803
  end
804
+
805
+ def check_conflicts
806
+ sr_count = sr_conflicts.count
807
+ rr_count = rr_conflicts.count
808
+
809
+ if @grammar.expect
810
+
811
+ expected_sr_conflicts = @grammar.expect
812
+ expected_rr_conflicts = 0
813
+
814
+ if expected_sr_conflicts != sr_count
815
+ @warning.error("shift/reduce conflicts: #{sr_count} found, #{expected_sr_conflicts} expected")
816
+ end
817
+
818
+ if expected_rr_conflicts != rr_count
819
+ @warning.error("reduce/reduce conflicts: #{rr_count} found, #{expected_rr_conflicts} expected")
820
+ end
821
+ else
822
+ if sr_count != 0
823
+ @warning.warn("shift/reduce conflicts: #{sr_count} found")
824
+ end
825
+
826
+ if rr_count != 0
827
+ @warning.warn("reduce/reduce conflicts: #{rr_count} found")
828
+ end
829
+ end
830
+ end
1123
831
  end
1124
832
  end