lrama 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/workflows/test.yaml +72 -0
- data/.gitignore +4 -0
- data/.rspec +2 -0
- data/Gemfile +8 -0
- data/LEGAL.md +26 -0
- data/MIT +21 -0
- data/README.md +32 -0
- data/Rakefile +1 -0
- data/doc/TODO.md +50 -0
- data/exe/lrama +7 -0
- data/lib/lrama/command.rb +129 -0
- data/lib/lrama/context.rb +510 -0
- data/lib/lrama/grammar.rb +850 -0
- data/lib/lrama/lexer.rb +349 -0
- data/lib/lrama/output.rb +268 -0
- data/lib/lrama/parser.rb +321 -0
- data/lib/lrama/report.rb +35 -0
- data/lib/lrama/states.rb +1124 -0
- data/lib/lrama/version.rb +3 -0
- data/lib/lrama.rb +9 -0
- data/lrama.gemspec +22 -0
- data/template/bison/yacc.c +1750 -0
- data/template/bison/yacc.h +112 -0
- metadata +67 -0
data/lib/lrama/states.rb
ADDED
@@ -0,0 +1,1124 @@
|
|
1
|
+
require "lrama/report"
|
2
|
+
|
3
|
+
module Lrama
|
4
|
+
# Algorithm Digraph of https://dl.acm.org/doi/pdf/10.1145/69622.357187 (P. 625)
|
5
|
+
class Digraph
|
6
|
+
def initialize(sets, relation, base_function)
|
7
|
+
# X in the paper
|
8
|
+
@sets = sets
|
9
|
+
# R in the paper
|
10
|
+
@relation = relation
|
11
|
+
# F' in the paper
|
12
|
+
@base_function = base_function
|
13
|
+
# S in the paper
|
14
|
+
@stack = []
|
15
|
+
# N in the paper
|
16
|
+
@h = Hash.new(0)
|
17
|
+
# F in the paper
|
18
|
+
@result = {}
|
19
|
+
end
|
20
|
+
|
21
|
+
def compute
|
22
|
+
@sets.each do |x|
|
23
|
+
next if @h[x] != 0
|
24
|
+
traverse(x)
|
25
|
+
end
|
26
|
+
|
27
|
+
return @result
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def traverse(x)
|
33
|
+
@stack.push(x)
|
34
|
+
d = @stack.count
|
35
|
+
@h[x] = d
|
36
|
+
@result[x] = @base_function[x] # F x = F' x
|
37
|
+
|
38
|
+
@relation[x] && @relation[x].each do |y|
|
39
|
+
traverse(y) if @h[y] == 0
|
40
|
+
@h[x] = [@h[x], @h[y]].min
|
41
|
+
@result[x] |= @result[y] # F x = F x + F y
|
42
|
+
end
|
43
|
+
|
44
|
+
if @h[x] == d
|
45
|
+
while true do
|
46
|
+
z = @stack.pop
|
47
|
+
@h[z] = Float::INFINITY
|
48
|
+
@result[z] = @result[x] # F (Top of S) = F x
|
49
|
+
|
50
|
+
break if z == x
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
class State
|
57
|
+
class Reduce
|
58
|
+
# https://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html
|
59
|
+
attr_reader :item, :look_ahead, :not_selected_symbols
|
60
|
+
attr_accessor :default_reduction
|
61
|
+
|
62
|
+
def initialize(item)
|
63
|
+
@item = item
|
64
|
+
@look_ahead = nil
|
65
|
+
@not_selected_symbols = []
|
66
|
+
end
|
67
|
+
|
68
|
+
def rule
|
69
|
+
@item.rule
|
70
|
+
end
|
71
|
+
|
72
|
+
def look_ahead=(look_ahead)
|
73
|
+
@look_ahead = look_ahead.freeze
|
74
|
+
end
|
75
|
+
|
76
|
+
def add_not_selected_symbol(sym)
|
77
|
+
@not_selected_symbols << sym
|
78
|
+
end
|
79
|
+
|
80
|
+
def selected_look_ahead
|
81
|
+
if @look_ahead
|
82
|
+
@look_ahead - @not_selected_symbols
|
83
|
+
else
|
84
|
+
[]
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
class Shift
|
90
|
+
attr_reader :next_sym, :next_items
|
91
|
+
attr_accessor :not_selected
|
92
|
+
|
93
|
+
def initialize(next_sym, next_items)
|
94
|
+
@next_sym = next_sym
|
95
|
+
@next_items = next_items
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
# * symbol: A symbol under discussion
|
100
|
+
# * reduce: A reduce under discussion
|
101
|
+
# * which: For which a conflict is resolved. :shift, :reduce or :error (for nonassociative)
|
102
|
+
ResolvedConflict = Struct.new(:symbol, :reduce, :which, :same_prec, keyword_init: true) do
|
103
|
+
def report_message
|
104
|
+
s = symbol.display_name
|
105
|
+
r = reduce.rule.precedence_sym.display_name
|
106
|
+
case
|
107
|
+
when which == :shift && same_prec
|
108
|
+
msg = "resolved as #{which} (%right #{s})"
|
109
|
+
when which == :shift
|
110
|
+
msg = "resolved as #{which} (#{r} < #{s})"
|
111
|
+
when which == :reduce && same_prec
|
112
|
+
msg = "resolved as #{which} (%left #{s})"
|
113
|
+
when which == :reduce
|
114
|
+
msg = "resolved as #{which} (#{s} < #{r})"
|
115
|
+
when which == :error
|
116
|
+
msg = "resolved as an #{which} (%nonassoc #{s})"
|
117
|
+
else
|
118
|
+
raise "Unknown direction. #{self}"
|
119
|
+
end
|
120
|
+
|
121
|
+
"Conflict between rule #{reduce.rule.id} and token #{s} #{msg}."
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
attr_reader :id, :accessing_symbol, :kernels, :conflicts, :resolved_conflicts,
|
126
|
+
:default_reduction_rule
|
127
|
+
attr_accessor :closure, :shifts, :reduces
|
128
|
+
|
129
|
+
def initialize(id, accessing_symbol, kernels)
|
130
|
+
@id = id
|
131
|
+
@accessing_symbol = accessing_symbol
|
132
|
+
@kernels = kernels.freeze
|
133
|
+
# Manage relationships between items to state
|
134
|
+
# to resolve next state
|
135
|
+
@items_to_state = {}
|
136
|
+
@conflicts = []
|
137
|
+
@resolved_conflicts = []
|
138
|
+
@default_reduction_rule = nil
|
139
|
+
end
|
140
|
+
|
141
|
+
def items
|
142
|
+
@kernels + @closure
|
143
|
+
end
|
144
|
+
|
145
|
+
def non_default_reduces
|
146
|
+
reduces.select do |reduce|
|
147
|
+
reduce.rule != @default_reduction_rule
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
def compute_shifts_reduces
|
152
|
+
_shifts = {}
|
153
|
+
reduces = []
|
154
|
+
items.each do |item|
|
155
|
+
# TODO: Consider what should be pushed
|
156
|
+
if item.end_of_rule?
|
157
|
+
reduces << Reduce.new(item)
|
158
|
+
else
|
159
|
+
key = item.next_sym
|
160
|
+
_shifts[key] ||= []
|
161
|
+
_shifts[key] << item.new_by_next_position
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
shifts = _shifts.sort_by do |next_sym, new_items|
|
166
|
+
next_sym.number
|
167
|
+
end.map do |next_sym, new_items|
|
168
|
+
Shift.new(next_sym, new_items.flatten)
|
169
|
+
end
|
170
|
+
self.shifts = shifts.freeze
|
171
|
+
self.reduces = reduces.freeze
|
172
|
+
end
|
173
|
+
|
174
|
+
def set_items_to_state(items, next_state)
|
175
|
+
@items_to_state[items] = next_state
|
176
|
+
end
|
177
|
+
|
178
|
+
#
|
179
|
+
def set_look_ahead(rule, look_ahead)
|
180
|
+
reduce = reduces.find do |r|
|
181
|
+
r.rule == rule
|
182
|
+
end
|
183
|
+
|
184
|
+
reduce.look_ahead = look_ahead
|
185
|
+
end
|
186
|
+
|
187
|
+
# Returns array of [nterm, next_state]
|
188
|
+
def nterm_transitions
|
189
|
+
return @nterm_transitions if @nterm_transitions
|
190
|
+
|
191
|
+
@nterm_transitions = []
|
192
|
+
|
193
|
+
shifts.each do |shift|
|
194
|
+
next if shift.next_sym.term?
|
195
|
+
|
196
|
+
@nterm_transitions << [shift, @items_to_state[shift.next_items]]
|
197
|
+
end
|
198
|
+
|
199
|
+
@nterm_transitions
|
200
|
+
end
|
201
|
+
|
202
|
+
# Returns array of [term, next_state]
|
203
|
+
def term_transitions
|
204
|
+
return @term_transitions if @term_transitions
|
205
|
+
|
206
|
+
@term_transitions = []
|
207
|
+
|
208
|
+
shifts.each do |shift|
|
209
|
+
next if shift.next_sym.nterm?
|
210
|
+
|
211
|
+
@term_transitions << [shift, @items_to_state[shift.next_items]]
|
212
|
+
end
|
213
|
+
|
214
|
+
@term_transitions
|
215
|
+
end
|
216
|
+
|
217
|
+
def selected_term_transitions
|
218
|
+
term_transitions.select do |shift, next_state|
|
219
|
+
!shift.not_selected
|
220
|
+
end
|
221
|
+
end
|
222
|
+
|
223
|
+
# Move to next state by sym
|
224
|
+
def transition(sym)
|
225
|
+
result = nil
|
226
|
+
|
227
|
+
if sym.term?
|
228
|
+
term_transitions.each do |shift, next_state|
|
229
|
+
term = shift.next_sym
|
230
|
+
result = next_state if term == sym
|
231
|
+
end
|
232
|
+
else
|
233
|
+
nterm_transitions.each do |shift, next_state|
|
234
|
+
nterm = shift.next_sym
|
235
|
+
result = next_state if nterm == sym
|
236
|
+
end
|
237
|
+
end
|
238
|
+
|
239
|
+
raise "Can not transit by #{sym} #{self}" if result.nil?
|
240
|
+
|
241
|
+
result
|
242
|
+
end
|
243
|
+
|
244
|
+
def find_reduce_by_item!(item)
|
245
|
+
reduces.find do |r|
|
246
|
+
r.item == item
|
247
|
+
end || (raise "reduce is not found. #{item}, #{state}")
|
248
|
+
end
|
249
|
+
|
250
|
+
def default_reduction_rule=(default_reduction_rule)
|
251
|
+
@default_reduction_rule = default_reduction_rule
|
252
|
+
|
253
|
+
reduces.each do |r|
|
254
|
+
if r.rule == default_reduction_rule
|
255
|
+
r.default_reduction = true
|
256
|
+
end
|
257
|
+
end
|
258
|
+
end
|
259
|
+
end
|
260
|
+
|
261
|
+
class StatesReporter
|
262
|
+
def initialize(states)
|
263
|
+
@states = states
|
264
|
+
end
|
265
|
+
|
266
|
+
def report(io, states: false, itemsets: false, lookaheads: false, solved: false, verbose: false)
|
267
|
+
@states.states.each do |state|
|
268
|
+
# Report State
|
269
|
+
io << "State #{state.id}\n\n"
|
270
|
+
|
271
|
+
# Report item
|
272
|
+
last_lhs = nil
|
273
|
+
list = itemsets ? state.items : state.kernels
|
274
|
+
list.sort_by {|i| [i.rule_id, i.position] }.each do |item|
|
275
|
+
rule = item.rule
|
276
|
+
position = item.position
|
277
|
+
if rule.rhs.empty?
|
278
|
+
r = "ε •"
|
279
|
+
else
|
280
|
+
r = rule.rhs.map(&:display_name).insert(position, "•").join(" ")
|
281
|
+
end
|
282
|
+
if rule.lhs == last_lhs
|
283
|
+
l = " " * rule.lhs.id.s_value.length + "|"
|
284
|
+
else
|
285
|
+
l = rule.lhs.id.s_value + ":"
|
286
|
+
end
|
287
|
+
la = ""
|
288
|
+
if lookaheads && item.end_of_rule?
|
289
|
+
reduce = state.find_reduce_by_item!(item)
|
290
|
+
look_ahead = reduce.selected_look_ahead
|
291
|
+
if !look_ahead.empty?
|
292
|
+
la = " [#{look_ahead.map(&:display_name).join(", ")}]"
|
293
|
+
end
|
294
|
+
end
|
295
|
+
last_lhs = rule.lhs
|
296
|
+
|
297
|
+
io << sprintf("%5i %s %s%s\n", rule.id, l, r, la)
|
298
|
+
end
|
299
|
+
io << "\n"
|
300
|
+
|
301
|
+
|
302
|
+
# Report shifts
|
303
|
+
tmp = state.term_transitions.select do |shift, _|
|
304
|
+
!shift.not_selected
|
305
|
+
end.map do |shift, next_state|
|
306
|
+
[shift.next_sym, next_state.id]
|
307
|
+
end
|
308
|
+
max_len = tmp.map(&:first).map(&:display_name).map(&:length).max
|
309
|
+
tmp.each do |term, state_id|
|
310
|
+
io << " #{term.display_name.ljust(max_len)} shift, and go to state #{state_id}\n"
|
311
|
+
end
|
312
|
+
io << "\n" if !tmp.empty?
|
313
|
+
|
314
|
+
|
315
|
+
# Report error caused by %nonassoc
|
316
|
+
nl = false
|
317
|
+
tmp = state.resolved_conflicts.select do |resolved|
|
318
|
+
resolved.which == :error
|
319
|
+
end.map do |error|
|
320
|
+
error.symbol.display_name
|
321
|
+
end
|
322
|
+
max_len = tmp.map(&:length).max
|
323
|
+
tmp.each do |name|
|
324
|
+
nl = true
|
325
|
+
io << " #{name.ljust(max_len)} error (nonassociative)\n"
|
326
|
+
end
|
327
|
+
io << "\n" if !tmp.empty?
|
328
|
+
|
329
|
+
|
330
|
+
# Report reduces
|
331
|
+
nl = false
|
332
|
+
max_len = state.non_default_reduces.flat_map(&:look_ahead).compact.map(&:display_name).map(&:length).max || 0
|
333
|
+
max_len = [max_len, "$default".length].max if state.default_reduction_rule
|
334
|
+
@states.terms.each do |term|
|
335
|
+
reduce = state.non_default_reduces.find do |r|
|
336
|
+
r.look_ahead.include?(term)
|
337
|
+
end
|
338
|
+
|
339
|
+
next unless reduce
|
340
|
+
|
341
|
+
rule = reduce.item.rule
|
342
|
+
io << " #{term.display_name.ljust(max_len)} reduce using rule #{rule.id} (#{rule.lhs.display_name})\n"
|
343
|
+
nl = true
|
344
|
+
end
|
345
|
+
if r = state.default_reduction_rule
|
346
|
+
nl = true
|
347
|
+
s = "$default".ljust(max_len)
|
348
|
+
|
349
|
+
if r.initial_rule?
|
350
|
+
io << " #{s} accept\n"
|
351
|
+
else
|
352
|
+
io << " #{s} reduce using rule #{r.id} (#{r.lhs.display_name})\n"
|
353
|
+
end
|
354
|
+
end
|
355
|
+
io << "\n" if nl
|
356
|
+
|
357
|
+
|
358
|
+
# Report nonterminal transitions
|
359
|
+
tmp = []
|
360
|
+
max_len = 0
|
361
|
+
state.nterm_transitions.each do |shift, next_state|
|
362
|
+
nterm = shift.next_sym
|
363
|
+
tmp << [nterm, next_state.id]
|
364
|
+
max_len = [max_len, nterm.id.s_value.length].max
|
365
|
+
end
|
366
|
+
tmp.uniq!
|
367
|
+
tmp.sort_by! do |nterm, state_id|
|
368
|
+
nterm.number
|
369
|
+
end
|
370
|
+
tmp.each do |nterm, state_id|
|
371
|
+
io << " #{nterm.id.s_value.ljust(max_len)} go to state #{state_id}\n"
|
372
|
+
end
|
373
|
+
io << "\n" if !tmp.empty?
|
374
|
+
|
375
|
+
|
376
|
+
if solved
|
377
|
+
# Report conflict resolutions
|
378
|
+
state.resolved_conflicts.each do |resolved|
|
379
|
+
io << " #{resolved.report_message}\n"
|
380
|
+
end
|
381
|
+
io << "\n" if !state.resolved_conflicts.empty?
|
382
|
+
end
|
383
|
+
|
384
|
+
|
385
|
+
if verbose
|
386
|
+
# Report direct_read_sets
|
387
|
+
io << " [Direct Read sets]\n"
|
388
|
+
direct_read_sets = @states.direct_read_sets
|
389
|
+
@states.nterms.each do |nterm|
|
390
|
+
terms = direct_read_sets[[state.id, nterm.token_id]]
|
391
|
+
next if !terms
|
392
|
+
next if terms.empty?
|
393
|
+
|
394
|
+
str = terms.map {|sym| sym.id.s_value }.join(", ")
|
395
|
+
io << " read #{nterm.id.s_value} shift #{str}\n"
|
396
|
+
end
|
397
|
+
io << "\n"
|
398
|
+
|
399
|
+
|
400
|
+
# Reprot reads_relation
|
401
|
+
io << " [Reads Relation]\n"
|
402
|
+
@states.nterms.each do |nterm|
|
403
|
+
a = @states.reads_relation[[state.id, nterm.token_id]]
|
404
|
+
next if !a
|
405
|
+
|
406
|
+
a.each do |state_id2, nterm_id2|
|
407
|
+
n = @states.nterms.find {|n| n.token_id == nterm_id2 }
|
408
|
+
io << " (State #{state_id2}, #{n.id.s_value})\n"
|
409
|
+
end
|
410
|
+
end
|
411
|
+
io << "\n"
|
412
|
+
|
413
|
+
|
414
|
+
# Reprot read_sets
|
415
|
+
io << " [Read sets]\n"
|
416
|
+
read_sets = @states.read_sets
|
417
|
+
@states.nterms.each do |nterm|
|
418
|
+
terms = read_sets[[state.id, nterm.token_id]]
|
419
|
+
next if !terms
|
420
|
+
next if terms.empty?
|
421
|
+
|
422
|
+
terms.each do |sym|
|
423
|
+
io << " #{sym.id.s_value}\n"
|
424
|
+
end
|
425
|
+
end
|
426
|
+
io << "\n"
|
427
|
+
|
428
|
+
|
429
|
+
# Reprot includes_relation
|
430
|
+
io << " [Includes Relation]\n"
|
431
|
+
@states.nterms.each do |nterm|
|
432
|
+
a = @states.includes_relation[[state.id, nterm.token_id]]
|
433
|
+
next if !a
|
434
|
+
|
435
|
+
a.each do |state_id2, nterm_id2|
|
436
|
+
n = @states.nterms.find {|n| n.token_id == nterm_id2 }
|
437
|
+
io << " (State #{state.id}, #{nterm.id.s_value}) -> (State #{state_id2}, #{n.id.s_value})\n"
|
438
|
+
end
|
439
|
+
end
|
440
|
+
io << "\n"
|
441
|
+
|
442
|
+
|
443
|
+
# Report lookback_relation
|
444
|
+
io << " [Lookback Relation]\n"
|
445
|
+
@states.rules.each do |rule|
|
446
|
+
a = @states.lookback_relation[[state.id, rule.id]]
|
447
|
+
next if !a
|
448
|
+
|
449
|
+
a.each do |state_id2, nterm_id2|
|
450
|
+
n = @states.nterms.find {|n| n.token_id == nterm_id2 }
|
451
|
+
io << " (Rule: #{rule.to_s}) -> (State #{state_id2}, #{n.id.s_value})\n"
|
452
|
+
end
|
453
|
+
end
|
454
|
+
io << "\n"
|
455
|
+
|
456
|
+
|
457
|
+
# Reprot follow_sets
|
458
|
+
io << " [Follow sets]\n"
|
459
|
+
follow_sets = @states.follow_sets
|
460
|
+
@states.nterms.each do |nterm|
|
461
|
+
terms = follow_sets[[state.id, nterm.token_id]]
|
462
|
+
|
463
|
+
next if !terms
|
464
|
+
|
465
|
+
terms.each do |sym|
|
466
|
+
io << " #{nterm.id.s_value} -> #{sym.id.s_value}\n"
|
467
|
+
end
|
468
|
+
end
|
469
|
+
io << "\n"
|
470
|
+
|
471
|
+
|
472
|
+
# Report LA
|
473
|
+
io << " [Look-Ahead Sets]\n"
|
474
|
+
tmp = []
|
475
|
+
max_len = 0
|
476
|
+
@states.rules.each do |rule|
|
477
|
+
syms = @states.la[[state.id, rule.id]]
|
478
|
+
next if !syms
|
479
|
+
|
480
|
+
tmp << [rule, syms]
|
481
|
+
max_len = ([max_len] + syms.map {|s| s.id.s_value.length }).max
|
482
|
+
end
|
483
|
+
tmp.each do |rule, syms|
|
484
|
+
syms.each do |sym|
|
485
|
+
io << " #{sym.id.s_value.ljust(max_len)} reduce using rule #{rule.id} (#{rule.lhs.id.s_value})\n"
|
486
|
+
end
|
487
|
+
end
|
488
|
+
io << "\n" if !tmp.empty?
|
489
|
+
end
|
490
|
+
|
491
|
+
|
492
|
+
# End of Report State
|
493
|
+
io << "\n"
|
494
|
+
end
|
495
|
+
end
|
496
|
+
end
|
497
|
+
|
498
|
+
# States is passed to a template file
|
499
|
+
#
|
500
|
+
# "Efficient Computation of LALR(1) Look-Ahead Sets"
|
501
|
+
# https://dl.acm.org/doi/pdf/10.1145/69622.357187
|
502
|
+
class States
|
503
|
+
include Lrama::Report::Duration
|
504
|
+
|
505
|
+
# TODO: Validate position is not over rule rhs
|
506
|
+
Item = Struct.new(:rule, :position, keyword_init: true) do
|
507
|
+
# Optimization for States#setup_state
|
508
|
+
def hash
|
509
|
+
[rule.id, position].hash
|
510
|
+
end
|
511
|
+
|
512
|
+
def rule_id
|
513
|
+
rule.id
|
514
|
+
end
|
515
|
+
|
516
|
+
def next_sym
|
517
|
+
rule.rhs[position]
|
518
|
+
end
|
519
|
+
|
520
|
+
def end_of_rule?
|
521
|
+
rule.rhs.count == position
|
522
|
+
end
|
523
|
+
|
524
|
+
def new_by_next_position
|
525
|
+
Item.new(rule: rule, position: position + 1)
|
526
|
+
end
|
527
|
+
|
528
|
+
def previous_sym
|
529
|
+
rule.rhs[position - 1]
|
530
|
+
end
|
531
|
+
|
532
|
+
def display_name
|
533
|
+
r = rule.rhs.map(&:display_name).insert(position, "•").join(" ")
|
534
|
+
"#{r} (rule #{rule.id})"
|
535
|
+
end
|
536
|
+
|
537
|
+
# Right after position
|
538
|
+
def display_rest
|
539
|
+
r = rule.rhs[position..-1].map(&:display_name).join(" ")
|
540
|
+
". #{r} (rule #{rule.id})"
|
541
|
+
end
|
542
|
+
end
|
543
|
+
|
544
|
+
attr_reader :states, :item_to_state,
|
545
|
+
:reads_relation, :includes_relation, :lookback_relation
|
546
|
+
|
547
|
+
def initialize(grammar, trace_state: false)
|
548
|
+
@grammar = grammar
|
549
|
+
@trace_state = trace_state
|
550
|
+
|
551
|
+
@states = []
|
552
|
+
@item_to_state = {}
|
553
|
+
|
554
|
+
# `DR(p, A) = {t ∈ T | p -(A)-> r -(t)-> }`
|
555
|
+
# where p is state, A is nterm, t is term.
|
556
|
+
#
|
557
|
+
# `@direct_read_sets` is a hash whose
|
558
|
+
# key is [state.id, nterm.token_id],
|
559
|
+
# value is bitmap of term.
|
560
|
+
@direct_read_sets = {}
|
561
|
+
|
562
|
+
# Reads relation on nonterminal transitions (pair of state and nterm)
|
563
|
+
# `(p, A) reads (r, C) iff p -(A)-> r -(C)-> and C =>* ε`
|
564
|
+
# where p, r are state, A, C are nterm.
|
565
|
+
#
|
566
|
+
# `@reads_relation` is a hash whose
|
567
|
+
# key is [state.id, nterm.token_id],
|
568
|
+
# value is array of [state.id, nterm.token_id].
|
569
|
+
@reads_relation = {}
|
570
|
+
|
571
|
+
# `read_sets` is a hash whose key is [state.id, nterm.token_id]
|
572
|
+
#
|
573
|
+
# `@read_sets` is a hash whose
|
574
|
+
# key is [state.id, nterm.token_id],
|
575
|
+
# value is bitmap of term.
|
576
|
+
@read_sets = {}
|
577
|
+
|
578
|
+
# `(p, A) includes (p', B) iff B -> βAγ, γ =>* ε, p' -(β)-> p`
|
579
|
+
# where p, p' are state, A, B are nterm, β, γ is sequence of symbol.
|
580
|
+
#
|
581
|
+
# `@includes_relation` is a hash whose
|
582
|
+
# key is [state.id, nterm.token_id],
|
583
|
+
# value is array of [state.id, nterm.token_id].
|
584
|
+
@includes_relation = {}
|
585
|
+
|
586
|
+
# `(q, A -> ω) lookback (p, A) iff p -(ω)-> q`
|
587
|
+
# where p, q are state, A -> ω is rule, A is nterm, ω is sequence of symbol.
|
588
|
+
#
|
589
|
+
# `@lookback_relation` is a hash whose
|
590
|
+
# key is [state.id, rule.id],
|
591
|
+
# value is array of [state.id, nterm.token_id].
|
592
|
+
@lookback_relation = {}
|
593
|
+
|
594
|
+
# `@follow_sets` is a hash whose
|
595
|
+
# key is [state.id, rule.id],
|
596
|
+
# value is bitmap of term.
|
597
|
+
@follow_sets = {}
|
598
|
+
|
599
|
+
# `LA(q, A -> ω) = ∪{Follow(p, A) | (q, A -> ω) lookback (p, A)`
|
600
|
+
#
|
601
|
+
# `@la` is a hash whose
|
602
|
+
# key is [state.id, rule.id],
|
603
|
+
# value is bitmap of term.
|
604
|
+
@la = {}
|
605
|
+
end
|
606
|
+
|
607
|
+
def compute
|
608
|
+
# TODO: Move report_grammar to other place
|
609
|
+
# report_grammar(@grammar)
|
610
|
+
|
611
|
+
# Look Ahead Sets
|
612
|
+
report_duration(:compute_lr0_states) { compute_lr0_states }
|
613
|
+
report_duration(:compute_direct_read_sets) { compute_direct_read_sets }
|
614
|
+
report_duration(:compute_reads_relation) { compute_reads_relation }
|
615
|
+
report_duration(:compute_read_sets) { compute_read_sets }
|
616
|
+
report_duration(:compute_includes_relation) { compute_includes_relation }
|
617
|
+
report_duration(:compute_lookback_relation) { compute_lookback_relation }
|
618
|
+
report_duration(:compute_follow_sets) { compute_follow_sets }
|
619
|
+
report_duration(:compute_look_ahead_sets) { compute_look_ahead_sets }
|
620
|
+
|
621
|
+
# Conflicts
|
622
|
+
report_duration(:compute_conflicts) { compute_conflicts }
|
623
|
+
|
624
|
+
report_duration(:compute_default_reduction) { compute_default_reduction }
|
625
|
+
end
|
626
|
+
|
627
|
+
def reporter
|
628
|
+
StatesReporter.new(self)
|
629
|
+
end
|
630
|
+
|
631
|
+
def states_count
|
632
|
+
@states.count
|
633
|
+
end
|
634
|
+
|
635
|
+
def symbols
|
636
|
+
@grammar.symbols
|
637
|
+
end
|
638
|
+
|
639
|
+
def terms
|
640
|
+
@grammar.terms
|
641
|
+
end
|
642
|
+
|
643
|
+
def nterms
|
644
|
+
@grammar.nterms
|
645
|
+
end
|
646
|
+
|
647
|
+
def rules
|
648
|
+
@grammar.rules
|
649
|
+
end
|
650
|
+
|
651
|
+
def accept_symbol
|
652
|
+
@grammar.accept_symbol
|
653
|
+
end
|
654
|
+
|
655
|
+
def eof_symbol
|
656
|
+
@grammar.eof_symbol
|
657
|
+
end
|
658
|
+
|
659
|
+
def find_symbol_by_s_value!(s_value)
|
660
|
+
@grammar.find_symbol_by_s_value!(s_value)
|
661
|
+
end
|
662
|
+
|
663
|
+
def direct_read_sets
|
664
|
+
h = {}
|
665
|
+
|
666
|
+
@direct_read_sets.each do |k, v|
|
667
|
+
h[k] = bitmap_to_terms(v)
|
668
|
+
end
|
669
|
+
|
670
|
+
return h
|
671
|
+
end
|
672
|
+
|
673
|
+
def read_sets
|
674
|
+
h = {}
|
675
|
+
|
676
|
+
@read_sets.each do |k, v|
|
677
|
+
h[k] = bitmap_to_terms(v)
|
678
|
+
end
|
679
|
+
|
680
|
+
return h
|
681
|
+
end
|
682
|
+
|
683
|
+
def follow_sets
|
684
|
+
h = {}
|
685
|
+
|
686
|
+
@follow_sets.each do |k, v|
|
687
|
+
h[k] = bitmap_to_terms(v)
|
688
|
+
end
|
689
|
+
|
690
|
+
return h
|
691
|
+
end
|
692
|
+
|
693
|
+
def la
|
694
|
+
h = {}
|
695
|
+
|
696
|
+
@la.each do |k, v|
|
697
|
+
h[k] = bitmap_to_terms(v)
|
698
|
+
end
|
699
|
+
|
700
|
+
return h
|
701
|
+
end
|
702
|
+
|
703
|
+
private
|
704
|
+
|
705
|
+
def report_grammar(grammar)
|
706
|
+
str = "Grammar\n\n"
|
707
|
+
last_lhs = nil
|
708
|
+
|
709
|
+
grammar.rules.each do |rule|
|
710
|
+
if rule.rhs.empty?
|
711
|
+
r = "ε"
|
712
|
+
else
|
713
|
+
r = rule.rhs.map(&:display_name).join(" ")
|
714
|
+
end
|
715
|
+
|
716
|
+
if rule.lhs == last_lhs
|
717
|
+
str << sprintf("%5d %s| %s\n", rule.id, " " * rule.lhs.display_name.length, r)
|
718
|
+
else
|
719
|
+
str << "\n"
|
720
|
+
str << sprintf("%5d %s: %s\n", rule.id, rule.lhs.display_name, r)
|
721
|
+
end
|
722
|
+
|
723
|
+
last_lhs = rule.lhs
|
724
|
+
end
|
725
|
+
|
726
|
+
puts str
|
727
|
+
end
|
728
|
+
|
729
|
+
def trace_state(msg)
|
730
|
+
puts msg if @trace_state
|
731
|
+
end
|
732
|
+
|
733
|
+
def create_state(accessing_symbol, kernels, states_creted)
|
734
|
+
# A item can appear in some states,
|
735
|
+
# so need to use `kernels` (not `kernels.first`) as a key.
|
736
|
+
#
|
737
|
+
# For example...
|
738
|
+
#
|
739
|
+
# %%
|
740
|
+
# program: '+' strings_1
|
741
|
+
# | '-' strings_2
|
742
|
+
# ;
|
743
|
+
#
|
744
|
+
# strings_1: string_1
|
745
|
+
# ;
|
746
|
+
#
|
747
|
+
# strings_2: string_1
|
748
|
+
# | string_2
|
749
|
+
# ;
|
750
|
+
#
|
751
|
+
# string_1: string
|
752
|
+
# ;
|
753
|
+
#
|
754
|
+
# string_2: string '+'
|
755
|
+
# ;
|
756
|
+
#
|
757
|
+
# string: tSTRING
|
758
|
+
# ;
|
759
|
+
# %%
|
760
|
+
#
|
761
|
+
# For these grammar, there are 2 states
|
762
|
+
#
|
763
|
+
# State A
|
764
|
+
# string_1: string •
|
765
|
+
#
|
766
|
+
# State B
|
767
|
+
# string_1: string •
|
768
|
+
# string_2: string • '+'
|
769
|
+
#
|
770
|
+
return [states_creted[kernels], false] if states_creted[kernels]
|
771
|
+
|
772
|
+
state = State.new(@states.count, accessing_symbol, kernels)
|
773
|
+
@states << state
|
774
|
+
states_creted[kernels] = state
|
775
|
+
|
776
|
+
return [state, true]
|
777
|
+
end
|
778
|
+
|
779
|
+
def setup_state(state)
|
780
|
+
# closure
|
781
|
+
closure = []
|
782
|
+
visited = {}
|
783
|
+
queued = {}
|
784
|
+
items = state.kernels.dup
|
785
|
+
|
786
|
+
items.each do |item|
|
787
|
+
queued[item] = true
|
788
|
+
end
|
789
|
+
|
790
|
+
while (item = items.shift) do
|
791
|
+
visited[item] = true
|
792
|
+
|
793
|
+
if (sym = item.next_sym) && sym.nterm?
|
794
|
+
@grammar.find_rules_by_symbol!(sym).each do |rule|
|
795
|
+
i = Item.new(rule: rule, position: 0)
|
796
|
+
next if queued[i]
|
797
|
+
closure << i
|
798
|
+
items << i
|
799
|
+
queued[i] = true
|
800
|
+
end
|
801
|
+
end
|
802
|
+
end
|
803
|
+
|
804
|
+
state.closure = closure.sort_by {|i| i.rule.id }
|
805
|
+
|
806
|
+
# Trace
|
807
|
+
trace_state("Closure: input\n")
|
808
|
+
state.kernels.each do |item|
|
809
|
+
trace_state(" #{item.display_rest}\n")
|
810
|
+
end
|
811
|
+
trace_state("\n\n")
|
812
|
+
trace_state("Closure: output\n")
|
813
|
+
state.items.each do |item|
|
814
|
+
trace_state(" #{item.display_rest}\n")
|
815
|
+
end
|
816
|
+
trace_state("\n\n")
|
817
|
+
|
818
|
+
# shift & reduce
|
819
|
+
state.compute_shifts_reduces
|
820
|
+
|
821
|
+
state.kernels.each do |item|
|
822
|
+
@item_to_state[item] = state
|
823
|
+
end
|
824
|
+
end
|
825
|
+
|
826
|
+
def enqueue_state(states, state)
|
827
|
+
# Trace
|
828
|
+
previous = state.kernels.first.previous_sym
|
829
|
+
trace_state(
|
830
|
+
sprintf("state_list_append (state = %d, symbol = %d (%s))",
|
831
|
+
@states.count, previous.number, previous.display_name)
|
832
|
+
)
|
833
|
+
|
834
|
+
states << state
|
835
|
+
end
|
836
|
+
|
837
|
+
def compute_lr0_states
|
838
|
+
# State queue
|
839
|
+
states = []
|
840
|
+
states_creted = {}
|
841
|
+
|
842
|
+
state, _ = create_state(symbols.first, [Item.new(rule: @grammar.rules.first, position: 0)], states_creted)
|
843
|
+
enqueue_state(states, state)
|
844
|
+
|
845
|
+
while (state = states.shift) do
|
846
|
+
# Trace
|
847
|
+
#
|
848
|
+
# Bison 3.8.2 renders "(reached by "end-of-input")" for State 0 but
|
849
|
+
# I think it is not correct...
|
850
|
+
previous = state.kernels.first.previous_sym
|
851
|
+
trace_state("Processing state #{state.id} (reached by #{previous.display_name})\n")
|
852
|
+
|
853
|
+
setup_state(state)
|
854
|
+
|
855
|
+
# It seems Bison 3.8.2 iterates transitions order by symbol number
|
856
|
+
state.shifts.sort_by do |shift|
|
857
|
+
shift.next_sym.number
|
858
|
+
end.each do |shift|
|
859
|
+
new_state, created = create_state(shift.next_sym, shift.next_items, states_creted)
|
860
|
+
state.set_items_to_state(shift.next_items, new_state)
|
861
|
+
enqueue_state(states, new_state) if created
|
862
|
+
end
|
863
|
+
end
|
864
|
+
end
|
865
|
+
|
866
|
+
def nterm_transitions
|
867
|
+
a = []
|
868
|
+
|
869
|
+
@states.each do |state|
|
870
|
+
state.nterm_transitions.each do |shift, next_state|
|
871
|
+
nterm = shift.next_sym
|
872
|
+
a << [state, nterm, next_state]
|
873
|
+
end
|
874
|
+
end
|
875
|
+
|
876
|
+
a
|
877
|
+
end
|
878
|
+
|
879
|
+
def compute_direct_read_sets
|
880
|
+
@states.each do |state|
|
881
|
+
state.nterm_transitions.each do |shift, next_state|
|
882
|
+
nterm = shift.next_sym
|
883
|
+
bit = 0
|
884
|
+
a = []
|
885
|
+
|
886
|
+
next_state.term_transitions.each do |shift, _|
|
887
|
+
sym = shift.next_sym
|
888
|
+
# Encode terms into bitmap
|
889
|
+
bit |= (1 << sym.number)
|
890
|
+
a << sym
|
891
|
+
end
|
892
|
+
|
893
|
+
key = [state.id, nterm.token_id]
|
894
|
+
@direct_read_sets[key] = bit
|
895
|
+
end
|
896
|
+
end
|
897
|
+
end
|
898
|
+
|
899
|
+
def compute_reads_relation
|
900
|
+
@states.each do |state|
|
901
|
+
state.nterm_transitions.each do |shift, next_state|
|
902
|
+
nterm = shift.next_sym
|
903
|
+
next_state.nterm_transitions.each do |shift2, _next_state2|
|
904
|
+
nterm2 = shift2.next_sym
|
905
|
+
if nterm2.nullable
|
906
|
+
key = [state.id, nterm.token_id]
|
907
|
+
@reads_relation[key] ||= []
|
908
|
+
@reads_relation[key] << [next_state.id, nterm2.token_id]
|
909
|
+
end
|
910
|
+
end
|
911
|
+
end
|
912
|
+
end
|
913
|
+
end
|
914
|
+
|
915
|
+
def compute_read_sets
|
916
|
+
sets = nterm_transitions.map do |state, nterm, next_state|
|
917
|
+
[state.id, nterm.token_id]
|
918
|
+
end
|
919
|
+
|
920
|
+
@read_sets = Digraph.new(sets, @reads_relation, @direct_read_sets).compute
|
921
|
+
end
|
922
|
+
|
923
|
+
# Execute transition of state by symbols
|
924
|
+
# then return final state.
|
925
|
+
def transition(state, symbols)
|
926
|
+
symbols.each do |sym|
|
927
|
+
state = state.transition(sym)
|
928
|
+
end
|
929
|
+
|
930
|
+
state
|
931
|
+
end
|
932
|
+
|
933
|
+
def compute_includes_relation
|
934
|
+
@states.each do |state|
|
935
|
+
state.nterm_transitions.each do |shift, next_state|
|
936
|
+
nterm = shift.next_sym
|
937
|
+
@grammar.find_rules_by_symbol!(nterm).each do |rule|
|
938
|
+
i = rule.rhs.count - 1
|
939
|
+
|
940
|
+
while (i > -1) do
|
941
|
+
sym = rule.rhs[i]
|
942
|
+
|
943
|
+
break if sym.term?
|
944
|
+
beta = []
|
945
|
+
state2 = transition(state, rule.rhs[0...i])
|
946
|
+
# p' = state, B = nterm, p = state2, A = sym
|
947
|
+
key = [state2.id, sym.token_id]
|
948
|
+
# TODO: need to omit if state == state2 ?
|
949
|
+
@includes_relation[key] ||= []
|
950
|
+
@includes_relation[key] << [state.id, nterm.token_id]
|
951
|
+
break if !sym.nullable
|
952
|
+
i -= 1
|
953
|
+
end
|
954
|
+
end
|
955
|
+
end
|
956
|
+
end
|
957
|
+
end
|
958
|
+
|
959
|
+
def compute_lookback_relation
|
960
|
+
@states.each do |state|
|
961
|
+
state.nterm_transitions.each do |shift, next_state|
|
962
|
+
nterm = shift.next_sym
|
963
|
+
@grammar.find_rules_by_symbol!(nterm).each do |rule|
|
964
|
+
state2 = transition(state, rule.rhs)
|
965
|
+
# p = state, A = nterm, q = state2, A -> ω = rule
|
966
|
+
key = [state2.id, rule.id]
|
967
|
+
@lookback_relation[key] ||= []
|
968
|
+
@lookback_relation[key] << [state.id, nterm.token_id]
|
969
|
+
end
|
970
|
+
end
|
971
|
+
end
|
972
|
+
end
|
973
|
+
|
974
|
+
def compute_follow_sets
|
975
|
+
sets = nterm_transitions.map do |state, nterm, next_state|
|
976
|
+
[state.id, nterm.token_id]
|
977
|
+
end
|
978
|
+
|
979
|
+
@follow_sets = Digraph.new(sets, @includes_relation, @read_sets).compute
|
980
|
+
end
|
981
|
+
|
982
|
+
def compute_look_ahead_sets
|
983
|
+
@states.each do |state|
|
984
|
+
rules.each do |rule|
|
985
|
+
ary = @lookback_relation[[state.id, rule.id]]
|
986
|
+
next if !ary
|
987
|
+
|
988
|
+
ary.each do |state2_id, nterm_token_id|
|
989
|
+
# q = state, A -> ω = rule, p = state2, A = nterm
|
990
|
+
follows = @follow_sets[[state2_id, nterm_token_id]]
|
991
|
+
|
992
|
+
next if follows == 0
|
993
|
+
|
994
|
+
key = [state.id, rule.id]
|
995
|
+
@la[key] ||= 0
|
996
|
+
look_ahead = @la[key] | follows
|
997
|
+
@la[key] |= look_ahead
|
998
|
+
|
999
|
+
# No risk of conflict when
|
1000
|
+
# * the state only has single reduce
|
1001
|
+
# * the state only has term_transitions (GOTO)
|
1002
|
+
next if state.reduces.count == 1 && state.term_transitions.count == 0
|
1003
|
+
|
1004
|
+
state.set_look_ahead(rule, bitmap_to_terms(look_ahead))
|
1005
|
+
end
|
1006
|
+
end
|
1007
|
+
end
|
1008
|
+
end
|
1009
|
+
|
1010
|
+
def bitmap_to_terms(bit)
|
1011
|
+
a = []
|
1012
|
+
i = 0
|
1013
|
+
|
1014
|
+
while bit > 0 do
|
1015
|
+
if bit & 1 == 1
|
1016
|
+
a << @grammar.find_symbol_by_number!(i)
|
1017
|
+
end
|
1018
|
+
|
1019
|
+
i += 1
|
1020
|
+
bit >>= 1
|
1021
|
+
end
|
1022
|
+
|
1023
|
+
return a
|
1024
|
+
end
|
1025
|
+
|
1026
|
+
def compute_conflicts
|
1027
|
+
compute_shift_reduce_conflicts
|
1028
|
+
compute_reduece_reduce_conflicts
|
1029
|
+
end
|
1030
|
+
|
1031
|
+
def compute_shift_reduce_conflicts
|
1032
|
+
states.each do |state|
|
1033
|
+
state.shifts.each do |shift|
|
1034
|
+
state.reduces.each do |reduce|
|
1035
|
+
sym = shift.next_sym
|
1036
|
+
|
1037
|
+
next unless reduce.look_ahead
|
1038
|
+
next if !reduce.look_ahead.include?(sym)
|
1039
|
+
|
1040
|
+
# Shift/Reduce conflict
|
1041
|
+
shift_prec = sym.precedence
|
1042
|
+
reduce_prec = reduce.item.rule.precedence
|
1043
|
+
|
1044
|
+
# Can resolve only when both have prec
|
1045
|
+
unless shift_prec && reduce_prec
|
1046
|
+
state.conflicts << [sym, reduce, :no_precedence]
|
1047
|
+
next
|
1048
|
+
end
|
1049
|
+
|
1050
|
+
case
|
1051
|
+
when shift_prec < reduce_prec
|
1052
|
+
# Reduce is selected
|
1053
|
+
state.resolved_conflicts << State::ResolvedConflict.new(symbol: sym, reduce: reduce, which: :reduce)
|
1054
|
+
shift.not_selected = true
|
1055
|
+
next
|
1056
|
+
when shift_prec > reduce_prec
|
1057
|
+
# Shift is selected
|
1058
|
+
state.resolved_conflicts << State::ResolvedConflict.new(symbol: sym, reduce: reduce, which: :shift)
|
1059
|
+
reduce.add_not_selected_symbol(sym)
|
1060
|
+
next
|
1061
|
+
end
|
1062
|
+
|
1063
|
+
# shift_prec == reduce_prec, then check associativity
|
1064
|
+
case sym.precedence.type
|
1065
|
+
when :right
|
1066
|
+
# Shift is selected
|
1067
|
+
state.resolved_conflicts << State::ResolvedConflict.new(symbol: sym, reduce: reduce, which: :shift, same_prec: true)
|
1068
|
+
reduce.add_not_selected_symbol(sym)
|
1069
|
+
next
|
1070
|
+
when :left
|
1071
|
+
# Reduce is selected
|
1072
|
+
state.resolved_conflicts << State::ResolvedConflict.new(symbol: sym, reduce: reduce, which: :reduce, same_prec: true)
|
1073
|
+
shift.not_selected = true
|
1074
|
+
next
|
1075
|
+
when :nonassoc
|
1076
|
+
# Can not resolve
|
1077
|
+
#
|
1078
|
+
# nonassoc creates "run-time" error, precedence creates "compile-time" error.
|
1079
|
+
# Then omit both the shift and reduce.
|
1080
|
+
#
|
1081
|
+
# https://www.gnu.org/software/bison/manual/html_node/Using-Precedence.html
|
1082
|
+
state.resolved_conflicts << State::ResolvedConflict.new(symbol: sym, reduce: reduce, which: :error)
|
1083
|
+
shift.not_selected = true
|
1084
|
+
reduce.add_not_selected_symbol(sym)
|
1085
|
+
else
|
1086
|
+
raise "Unknown precedence type. #{sym}"
|
1087
|
+
end
|
1088
|
+
end
|
1089
|
+
end
|
1090
|
+
end
|
1091
|
+
end
|
1092
|
+
|
1093
|
+
def compute_reduece_reduce_conflicts
|
1094
|
+
states.each do |state|
|
1095
|
+
a = []
|
1096
|
+
|
1097
|
+
state.reduces.each do |reduce|
|
1098
|
+
next if reduce.look_ahead.nil?
|
1099
|
+
|
1100
|
+
intersection = a.intersection(reduce.look_ahead)
|
1101
|
+
a += reduce.look_ahead
|
1102
|
+
|
1103
|
+
if !intersection.empty?
|
1104
|
+
state.conflicts << [intersection.dup, reduce, :reduce_reduce]
|
1105
|
+
end
|
1106
|
+
end
|
1107
|
+
end
|
1108
|
+
end
|
1109
|
+
|
1110
|
+
def compute_default_reduction
|
1111
|
+
states.each do |state|
|
1112
|
+
next if state.reduces.empty?
|
1113
|
+
# Do not set, if shift with `error` exists.
|
1114
|
+
next if state.shifts.map(&:next_sym).include?(@grammar.error_symbol)
|
1115
|
+
|
1116
|
+
state.default_reduction_rule = state.reduces.map do |r|
|
1117
|
+
[r.rule, r.rule.id, (r.look_ahead || []).count]
|
1118
|
+
end.sort_by do |rule, rule_id, count|
|
1119
|
+
[-count, rule_id]
|
1120
|
+
end.first.first
|
1121
|
+
end
|
1122
|
+
end
|
1123
|
+
end
|
1124
|
+
end
|