aurum 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/example/expression/expression.rb +29 -0
- data/lib/aurum.rb +10 -0
- data/lib/aurum/engine.rb +173 -0
- data/lib/aurum/grammar.rb +234 -0
- data/lib/aurum/lexical_table_generator.rb +423 -0
- data/lib/aurum/parsing_table_generator.rb +445 -0
- data/test/engine/lexer_test.rb +52 -0
- data/test/engine/semantic_attributes_test.rb +15 -0
- data/test/grammar_definition/character_class_definition_test.rb +28 -0
- data/test/grammar_definition/grammar_definition_test.rb +54 -0
- data/test/grammar_definition/lexical_definition_test.rb +56 -0
- data/test/grammar_definition/operator_precedence_definition_test.rb +35 -0
- data/test/grammar_definition/production_definition_test.rb +60 -0
- data/test/lexical_table_generator/automata_test.rb +74 -0
- data/test/lexical_table_generator/character_set_test.rb +73 -0
- data/test/lexical_table_generator/interval_test.rb +36 -0
- data/test/lexical_table_generator/pattern_test.rb +109 -0
- data/test/lexical_table_generator/subset_determinizer_test.rb +19 -0
- data/test/lexical_table_generator/table_generator_test.rb +126 -0
- data/test/parsing_table_generator/augmented_grammar_test.rb +45 -0
- data/test/parsing_table_generator/lalr_n_computation_test.rb +89 -0
- data/test/parsing_table_generator/lr_0_automata_test.rb +91 -0
- data/test/parsing_table_generator/lr_item_test.rb +33 -0
- data/test/parsing_table_generator/parsing_table_state_test.rb +39 -0
- data/test/parsing_table_generator/precedence_table_test.rb +28 -0
- data/test/parsing_table_generator/production_test.rb +9 -0
- data/test/test_helper.rb +103 -0
- metadata +78 -0
@@ -0,0 +1,423 @@
|
|
1
|
+
module Aurum
|
2
|
+
RecognizeTokenAction, ChangeStateAction, UserDefinedAction = Struct.new(:token), Struct.new(:state), Struct.new(:action)
|
3
|
+
RecognizeTokenAndChangeStateAction = Struct.new :token, :state
|
4
|
+
IgnoreAction = RecognizeTokenAction.new '$ignore'
|
5
|
+
|
6
|
+
class LexicalTableGenerator
|
7
|
+
attr_reader :lexical_states
|
8
|
+
def initialize specification
|
9
|
+
@specification, @accept_states = specification, {}
|
10
|
+
@lexical_states = @specification.keys - [:all]
|
11
|
+
@patterns_for_all = specification[:all] ? specification[:all] : {}
|
12
|
+
end
|
13
|
+
|
14
|
+
def lexical_table
|
15
|
+
construct_automata
|
16
|
+
make_initial_partitions
|
17
|
+
refine_partitions
|
18
|
+
@partitions.size < @lexical_automata.table.size ? construct_minimize_automata : [@lexical_automata.table, @accept_states]
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
def construct_automata
|
23
|
+
automata, accepts = Automata.new(1), {}, {}
|
24
|
+
@lexical_states.each_with_index do |lexcial_state, index|
|
25
|
+
lexical_state_start = automata.new_state
|
26
|
+
automata.connect 0, CharacterSet::Interval.new(-index - 1).to_char_set, lexical_state_start
|
27
|
+
@patterns_for_all.merge(@specification[lexcial_state]).each do |pattern, action|
|
28
|
+
pattern_start = automata.merge! pattern.automata
|
29
|
+
automata.connect lexical_state_start, Epsilon, pattern_start
|
30
|
+
accepts[pattern_start + pattern.accept] = action
|
31
|
+
end
|
32
|
+
end
|
33
|
+
@lexical_automata, deterministic_accepts = automata.determinize accepts.keys
|
34
|
+
deterministic_accepts.each {|d, n| @accept_states[d] = n.inject([]){|r, x| r << accepts[x]}}
|
35
|
+
end
|
36
|
+
|
37
|
+
def make_initial_partitions
|
38
|
+
partitions = {}
|
39
|
+
@accept_states.each do |state, action|
|
40
|
+
partitions[action] = [] unless partitions.has_key? action
|
41
|
+
partitions[action] << state
|
42
|
+
end
|
43
|
+
@partitions = [[0], @lexical_automata.all_states - @accept_states.keys - [0]] + partitions.values
|
44
|
+
@partitions.delete []
|
45
|
+
end
|
46
|
+
|
47
|
+
def refine_partitions
|
48
|
+
reverse_automata, working_list = @lexical_automata.reverse, @partitions.dup
|
49
|
+
until working_list.empty?
|
50
|
+
reverse_automata.alphabet(working_list.pop) do |ia, symbols|
|
51
|
+
@partitions.grep_each 'x.size > 1' do |r|
|
52
|
+
r1, r2 = r & ia, r - ia
|
53
|
+
unless r2.empty? || r2 == r
|
54
|
+
replace @partitions, r => [r1, r2]
|
55
|
+
if working_list.include? r
|
56
|
+
replace working_list, r => [r1, r2]
|
57
|
+
else
|
58
|
+
working_list << (r1.size <= r2.size ? r1 : r2)
|
59
|
+
end
|
60
|
+
working_list.uniq!
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def construct_minimize_automata
|
68
|
+
automata, accepts = Automata.new(@partitions.size), {}
|
69
|
+
choose_representatives do |representative, index|
|
70
|
+
@lexical_automata.table[representative].each do |transition|
|
71
|
+
automata.connect index, transition.symbols, partition_contains(transition.destination)
|
72
|
+
end
|
73
|
+
accepts[index] = @accept_states[representative] if @accept_states.has_key? representative
|
74
|
+
end
|
75
|
+
return automata.table, accepts
|
76
|
+
end
|
77
|
+
|
78
|
+
def choose_representatives
|
79
|
+
@partitions.each_with_index {|partition, index| yield partition.first, index}
|
80
|
+
end
|
81
|
+
|
82
|
+
def partition_contains state
|
83
|
+
@partitions.each_with_index {|partition, index| return index if partition.include? state}
|
84
|
+
end
|
85
|
+
|
86
|
+
def replace array, replacements
|
87
|
+
replacements.each do |old, new|
|
88
|
+
array.delete old
|
89
|
+
new.each {|x| array << x}
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
class Pattern
|
95
|
+
attr_reader :automata, :accept
|
96
|
+
def self.from_string literal
|
97
|
+
automata, index = Automata.new(literal.length + 1), 0
|
98
|
+
literal.each_byte {|byte|automata.connect index, CharacterSet::Interval.new(byte).to_char_set, (index += 1)}
|
99
|
+
new automata, index
|
100
|
+
end
|
101
|
+
|
102
|
+
def self.from_char_set set
|
103
|
+
automata = Automata.new 2
|
104
|
+
automata.connect 0, set, 1
|
105
|
+
new automata, 1
|
106
|
+
end
|
107
|
+
|
108
|
+
def self.concat *patterns
|
109
|
+
automata, index = Automata.new, 0
|
110
|
+
patterns.each do |pattern|
|
111
|
+
index = automata.connect(index, Epsilon, automata.merge!(pattern.automata)) + pattern.accept
|
112
|
+
end
|
113
|
+
new automata, index
|
114
|
+
end
|
115
|
+
|
116
|
+
def initialize automata, accept
|
117
|
+
@automata, @accept = automata, accept
|
118
|
+
end
|
119
|
+
|
120
|
+
def kleene
|
121
|
+
kleene_automata = @automata.dup
|
122
|
+
kleene_automata.connect 0, Epsilon, @accept
|
123
|
+
kleene_automata.connect @accept, Epsilon, 0
|
124
|
+
Pattern.new kleene_automata, @accept
|
125
|
+
end
|
126
|
+
alias :zero_or_more :kleene
|
127
|
+
|
128
|
+
def iterate
|
129
|
+
iterate_automata = @automata.dup
|
130
|
+
iterate_automata.connect @accept, Epsilon, 0
|
131
|
+
Pattern.new iterate_automata, @accept
|
132
|
+
end
|
133
|
+
alias :one_or_more :iterate
|
134
|
+
|
135
|
+
def opt
|
136
|
+
opt_automata = @automata.dup
|
137
|
+
opt_automata.connect 0, Epsilon, @accept
|
138
|
+
Pattern.new opt_automata, @accept
|
139
|
+
end
|
140
|
+
alias :zero_or_one :opt
|
141
|
+
|
142
|
+
def negate
|
143
|
+
deterministic, accepts = automata.determinize [@accept]
|
144
|
+
sink = deterministic.new_state
|
145
|
+
deterministic.connect sink, CharacterSet.any, sink
|
146
|
+
sink.times do |state|
|
147
|
+
joint = CharacterSet.any
|
148
|
+
deterministic.table[state].each {|tran| joint.delete tran.symbols}
|
149
|
+
deterministic.connect state, joint, sink unless joint.empty?
|
150
|
+
end
|
151
|
+
accept = deterministic.new_state
|
152
|
+
accept.times {|state| deterministic.connect state, Epsilon, accept unless accepts.include? state }
|
153
|
+
Pattern.new deterministic, accept
|
154
|
+
end
|
155
|
+
alias :not :negate
|
156
|
+
|
157
|
+
def [] least, most = least
|
158
|
+
Pattern.concat *([self] * least + [self.opt] * (most-least))
|
159
|
+
end
|
160
|
+
|
161
|
+
def | other
|
162
|
+
automata = Automata.new 2
|
163
|
+
[self, other].each do |pattern|
|
164
|
+
automata.connect automata.connect(0, Epsilon, automata.merge!(pattern.automata)) + pattern.accept, Epsilon, 1
|
165
|
+
end
|
166
|
+
Pattern.new automata, 1
|
167
|
+
end
|
168
|
+
|
169
|
+
def ~
|
170
|
+
any = Pattern.from_char_set(CharacterSet.any).kleene
|
171
|
+
return Pattern.concat(Pattern.concat(any, self, any).negate, self)
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
class Automata
|
176
|
+
attr_reader :table
|
177
|
+
Transition = Struct.new(:symbols, :destination)
|
178
|
+
|
179
|
+
def initialize(table=[])
|
180
|
+
case table
|
181
|
+
when Array
|
182
|
+
@table = table
|
183
|
+
when Fixnum
|
184
|
+
@table = []
|
185
|
+
table.times {@table << []}
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
189
|
+
def connect start, symbols, destination
|
190
|
+
@table[start] << Transition.new(symbols, destination)
|
191
|
+
destination
|
192
|
+
end
|
193
|
+
|
194
|
+
def merge! other
|
195
|
+
start = @table.length
|
196
|
+
other_table = other.instance_eval{@table}
|
197
|
+
other_table.each do |trans|
|
198
|
+
@table << []
|
199
|
+
trans.each {|tran| @table.last << Transition.new(tran.symbols, tran.destination + start)}
|
200
|
+
end
|
201
|
+
start
|
202
|
+
end
|
203
|
+
|
204
|
+
def reverse
|
205
|
+
reverse = []
|
206
|
+
@table.length.times {reverse << []}
|
207
|
+
@table.each_with_index do |trans, index|
|
208
|
+
trans.each {|tran| reverse[tran.destination] << Transition.new(tran.symbols, index)}
|
209
|
+
end
|
210
|
+
Automata.new reverse
|
211
|
+
end
|
212
|
+
|
213
|
+
def dup
|
214
|
+
dup_table = []
|
215
|
+
@table.each {|x| dup_table << x.dup}
|
216
|
+
Automata.new dup_table
|
217
|
+
end
|
218
|
+
|
219
|
+
def alphabet states
|
220
|
+
points = states.inject([]) do |result, state|
|
221
|
+
@table[state].inject(result){|r, s|r += s.symbols.to_points s.destination}
|
222
|
+
end
|
223
|
+
points.sort! do |x, y|
|
224
|
+
x.char == y.char ? (x.is_start ? (y.is_start ? 0 : -1) : (y.is_start ? 1 : 0)) : (x.char < y.char ? -1 : 1)
|
225
|
+
end
|
226
|
+
reachable_states = []
|
227
|
+
points.each_with_index do |point, index|
|
228
|
+
if point.is_start
|
229
|
+
reachable_states << point.destination
|
230
|
+
else
|
231
|
+
reachable_states.delete point.destination
|
232
|
+
next if reachable_states.empty?
|
233
|
+
end
|
234
|
+
symbols = range(point, points[index + 1])
|
235
|
+
yield reachable_states.uniq, symbols if symbols
|
236
|
+
end
|
237
|
+
end
|
238
|
+
|
239
|
+
def determinize accepts
|
240
|
+
SubsetDeterminizer.new(self, accepts).determinize
|
241
|
+
end
|
242
|
+
|
243
|
+
def new_state
|
244
|
+
@table << []
|
245
|
+
@table.length - 1
|
246
|
+
end
|
247
|
+
|
248
|
+
def all_states
|
249
|
+
(0..table.length - 1).to_a
|
250
|
+
end
|
251
|
+
|
252
|
+
private
|
253
|
+
def range point_a, point_b
|
254
|
+
start_point = point_a.is_start ? point_a.char : (point_a.char + 1)
|
255
|
+
end_point = point_b.is_start ? point_b.char - 1 : point_b.char
|
256
|
+
start_point > end_point ? nil : CharacterSet::Interval.new(start_point, end_point).to_char_set
|
257
|
+
end
|
258
|
+
end
|
259
|
+
|
260
|
+
class SubsetDeterminizer
|
261
|
+
def initialize nondeterministic, accepts
|
262
|
+
@unmarked, @dstates, @accepts = [], [], accepts
|
263
|
+
@nondeterministic, @deterministic, @accept_states = nondeterministic, Automata.new, {}
|
264
|
+
unmark closure([0])
|
265
|
+
end
|
266
|
+
|
267
|
+
def determinize
|
268
|
+
until @unmarked.empty?
|
269
|
+
start = @unmarked.pop
|
270
|
+
@nondeterministic.alphabet(@dstates[start]) do |states, symbols|
|
271
|
+
destination_state = closure(states)
|
272
|
+
destination = unmark destination_state unless destination = @dstates.index(destination_state)
|
273
|
+
@deterministic.connect start, symbols, destination
|
274
|
+
end
|
275
|
+
end
|
276
|
+
return @deterministic, @accept_states
|
277
|
+
end
|
278
|
+
private
|
279
|
+
def unmark states
|
280
|
+
@dstates << states
|
281
|
+
@unmarked.push @deterministic.new_state
|
282
|
+
accepts = states.find_all {|x| @accepts.include? x}
|
283
|
+
@accept_states[@unmarked.last] = accepts unless accepts.empty?
|
284
|
+
@unmarked.last
|
285
|
+
end
|
286
|
+
|
287
|
+
def closure states
|
288
|
+
closure, unvisited = states.dup, states.dup
|
289
|
+
until unvisited.empty? do
|
290
|
+
@nondeterministic.table[unvisited.pop].each do |tran|
|
291
|
+
if tran.symbols == Epsilon && !closure.include?(tran.destination)
|
292
|
+
closure << tran.destination
|
293
|
+
unvisited << tran.destination
|
294
|
+
end
|
295
|
+
end
|
296
|
+
end
|
297
|
+
closure.sort!
|
298
|
+
end
|
299
|
+
end
|
300
|
+
|
301
|
+
class CharacterSet
|
302
|
+
attr_reader :intervals
|
303
|
+
def self.any
|
304
|
+
Interval.new(0, 65535).to_char_set
|
305
|
+
end
|
306
|
+
|
307
|
+
def + other
|
308
|
+
result = self.dup
|
309
|
+
if (other.kind_of? CharacterSet)
|
310
|
+
for interval in other.intervals
|
311
|
+
result.add_interval interval.first, interval.last
|
312
|
+
end
|
313
|
+
else
|
314
|
+
other.to_s.each_byte do |byte|
|
315
|
+
result.add_interval byte
|
316
|
+
end
|
317
|
+
end
|
318
|
+
result
|
319
|
+
end
|
320
|
+
|
321
|
+
def - other
|
322
|
+
result = self.dup
|
323
|
+
if (other.kind_of? CharacterSet)
|
324
|
+
for interval in other.intervals
|
325
|
+
result.delete_interval interval.first, interval.last
|
326
|
+
end
|
327
|
+
else
|
328
|
+
other.to_s.each_byte do |byte|
|
329
|
+
result.delete_interval byte
|
330
|
+
end
|
331
|
+
end
|
332
|
+
result
|
333
|
+
end
|
334
|
+
|
335
|
+
def initialize *intervals
|
336
|
+
@intervals = intervals
|
337
|
+
end
|
338
|
+
|
339
|
+
def << obj
|
340
|
+
obj.kind_of?(Range) ? add_interval(obj.first, obj.last) : obj.to_s.each_byte {|x| add_interval x}
|
341
|
+
end
|
342
|
+
|
343
|
+
def delete obj
|
344
|
+
case obj
|
345
|
+
when Range
|
346
|
+
delete_interval obj.first, obj.last
|
347
|
+
when Aurum::CharacterSet
|
348
|
+
obj.intervals.each {|interval| delete_interval interval.first, interval.last}
|
349
|
+
else
|
350
|
+
obj.to_s.each_byte {|x| delete_interval x}
|
351
|
+
end
|
352
|
+
end
|
353
|
+
|
354
|
+
def include? char
|
355
|
+
@intervals.any? {|x| x.include? char}
|
356
|
+
end
|
357
|
+
|
358
|
+
def empty?
|
359
|
+
return @intervals.empty?
|
360
|
+
end
|
361
|
+
|
362
|
+
def to_points destination
|
363
|
+
@intervals.inject [] do |points, interval|
|
364
|
+
points << Point.new(interval.first, true, destination)
|
365
|
+
points << Point.new(interval.last, false, destination)
|
366
|
+
end
|
367
|
+
end
|
368
|
+
|
369
|
+
def dup
|
370
|
+
intervals = []
|
371
|
+
for interval in @intervals
|
372
|
+
intervals << interval.dup
|
373
|
+
end
|
374
|
+
CharacterSet.new *intervals
|
375
|
+
end
|
376
|
+
|
377
|
+
protected
|
378
|
+
def add_interval first, last = first
|
379
|
+
interval = Interval.new first, last
|
380
|
+
@intervals << interval unless @intervals.any? {|x| x.merge! interval}
|
381
|
+
end
|
382
|
+
|
383
|
+
def delete_interval first, last = first
|
384
|
+
interval = Interval.new first, last
|
385
|
+
return unless to_be_replaced = @intervals.find {|x| x.include?(interval.first) || x.include?(interval.last)}
|
386
|
+
@intervals.delete to_be_replaced
|
387
|
+
add_new_interval to_be_replaced.first, interval.first - 1
|
388
|
+
add_new_interval interval.last + 1, to_be_replaced.last
|
389
|
+
end
|
390
|
+
|
391
|
+
def add_new_interval first, last
|
392
|
+
@intervals << Interval.new(first, last) if first <= last
|
393
|
+
end
|
394
|
+
|
395
|
+
Interval, Point = Struct.new(:first, :last), Struct.new(:char, :is_start, :destination)
|
396
|
+
|
397
|
+
Interval.class_eval do
|
398
|
+
def initialize first, last = first
|
399
|
+
super first, last
|
400
|
+
end
|
401
|
+
|
402
|
+
def include? char
|
403
|
+
char = char[0] if char.kind_of? String
|
404
|
+
self.first <= char && char <= self.last
|
405
|
+
end
|
406
|
+
|
407
|
+
def merge! other
|
408
|
+
if include?(other.first) || include?(other.last) || other.first - self.last == 1 || self.first - other.last == 1
|
409
|
+
self.first = [self.first, other.first].min
|
410
|
+
self.last = [self.last, other.last].max
|
411
|
+
return true;
|
412
|
+
end
|
413
|
+
false
|
414
|
+
end
|
415
|
+
|
416
|
+
def to_char_set
|
417
|
+
CharacterSet.new self
|
418
|
+
end
|
419
|
+
end
|
420
|
+
end
|
421
|
+
|
422
|
+
Epsilon = CharacterSet.new
|
423
|
+
end
|
@@ -0,0 +1,445 @@
|
|
1
|
+
require 'set'
|
2
|
+
|
3
|
+
module Aurum
|
4
|
+
Symbol, Production= Struct.new(:name, :is_terminal), Struct.new(:nonterminal, :symbols)
|
5
|
+
ShiftAction, ReduceAction = Struct.new(:state, :is_lookahead_shift), Struct.new(:handle, :is_read_reduce)
|
6
|
+
|
7
|
+
START, EOF = Symbol.new('$start', false), Symbol.new('$eof', true)
|
8
|
+
|
9
|
+
class ParsingTableGenerator
|
10
|
+
attr_reader :symbols, :productions
|
11
|
+
|
12
|
+
DEFAULT_ASSOCIATIVITIES = {:left => [], :right => []}
|
13
|
+
|
14
|
+
def initialize(definition, precedences = [], associativities = DEFAULT_ASSOCIATIVITIES)
|
15
|
+
@definition = definition
|
16
|
+
@precedence_table = PrecedenceTable.new precedences, associativities
|
17
|
+
end
|
18
|
+
|
19
|
+
def start_from start
|
20
|
+
@start_production = Aurum::Production.new START, [start]
|
21
|
+
@symbols, @productions, @nullables, @first_sets = [], [@start_production], [], {START => []}
|
22
|
+
find_all_used_symbols_and_productions start
|
23
|
+
compute_nullable_symbols
|
24
|
+
compute_first_sets
|
25
|
+
self
|
26
|
+
end
|
27
|
+
|
28
|
+
def parsing_table
|
29
|
+
construct_LR0_automata
|
30
|
+
if @states.any? {|x| !x.consistent? }
|
31
|
+
compute_LALR_1_lookahead
|
32
|
+
compute_LALR_n_lookahead if @states.any? {|x| x.conflicted?}
|
33
|
+
end
|
34
|
+
parsing_table = []
|
35
|
+
for state in @states do
|
36
|
+
actions = Hash.new default_action(state)
|
37
|
+
state.actions.each {|symbol, action| actions[symbol] = action.to_a.first}
|
38
|
+
parsing_table << actions
|
39
|
+
end
|
40
|
+
return parsing_table, @lookahead_level
|
41
|
+
end
|
42
|
+
|
43
|
+
private
|
44
|
+
def find_all_used_symbols_and_productions start
|
45
|
+
unvisited = [start]
|
46
|
+
while !unvisited.empty?
|
47
|
+
visiting = unvisited.pop
|
48
|
+
@symbols << visiting
|
49
|
+
@first_sets[visiting] = visiting.is_terminal ? [visiting] : []
|
50
|
+
for production in @definition[visiting] do
|
51
|
+
@productions << production
|
52
|
+
for symbol in production.symbols do
|
53
|
+
unvisited << symbol unless @symbols.include? symbol
|
54
|
+
end
|
55
|
+
end unless visiting.is_terminal
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def compute_nullable_symbols
|
60
|
+
begin
|
61
|
+
changed = false
|
62
|
+
for production in @productions
|
63
|
+
if production.symbols.all? {|s| nullable? s}
|
64
|
+
@nullables << production.nonterminal
|
65
|
+
changed = true
|
66
|
+
end unless nullable? production.nonterminal
|
67
|
+
end
|
68
|
+
end while changed
|
69
|
+
end
|
70
|
+
|
71
|
+
def compute_first_sets
|
72
|
+
begin
|
73
|
+
changed = false
|
74
|
+
for production in @productions do
|
75
|
+
set = @first_sets[production.nonterminal]
|
76
|
+
for symbol in production.symbols do
|
77
|
+
changed |= set.length != set.replace(set | @first_sets[symbol]).length
|
78
|
+
break unless nullable? symbol
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end while changed
|
82
|
+
end
|
83
|
+
|
84
|
+
def nullable? symbol
|
85
|
+
@nullables.include? symbol
|
86
|
+
end
|
87
|
+
|
88
|
+
def construct_LR0_automata
|
89
|
+
@lookahead_level = 0
|
90
|
+
start_state = State.new closure([LRItem.new(@start_production, 0)])
|
91
|
+
@states, unvisited = [start_state], [start_state]
|
92
|
+
while !unvisited.empty?
|
93
|
+
visiting = unvisited.pop
|
94
|
+
visiting.grep_each '!x.handle?' do |item|
|
95
|
+
symbol = item.dot_symbol
|
96
|
+
new_state = goto visiting, symbol
|
97
|
+
if (read_reduce = new_state.read_reduce)
|
98
|
+
visiting[symbol] << ReduceAction.new(@productions.index(read_reduce), true)
|
99
|
+
else
|
100
|
+
if index = @states.index(new_state)
|
101
|
+
new_state = @states[index]
|
102
|
+
else
|
103
|
+
[@states, unvisited].each {|x| x << new_state}
|
104
|
+
index = @states.length - 1
|
105
|
+
end
|
106
|
+
visiting[symbol] << ShiftAction.new(index, false)
|
107
|
+
new_state.predsucceors << visiting
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
def compute_LALR_1_lookahead
|
114
|
+
@lookahead_level = 1
|
115
|
+
@lookahead_config_stack, @lookahead_indicitor, @lookahead_result = [], {}, {}
|
116
|
+
@states.grep_each '!x.consistent?' do |state|
|
117
|
+
state.grep_each 'x.handle?' do |handle|
|
118
|
+
production = handle.production
|
119
|
+
for predsucceor in state.predsucceors production.symbols.reverse
|
120
|
+
for lookahead in compute_follow_set predsucceor, production.nonterminal
|
121
|
+
if state.only_shift?(lookahead) && @precedence_table.operator?(lookahead)
|
122
|
+
if @precedence_table.compare(production.operator, lookahead) >= 0
|
123
|
+
state[lookahead].clear
|
124
|
+
state[lookahead] << ReduceAction.new(@productions.index(production), false)
|
125
|
+
end
|
126
|
+
else
|
127
|
+
state[lookahead] << ReduceAction.new(@productions.index(production), false)
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
def compute_LALR_n_lookahead
|
136
|
+
@stack_seen = []
|
137
|
+
@states.grep_each 'x.conflicted?' do |state|
|
138
|
+
@current_lookahead_level = 1
|
139
|
+
for lookahead, actions in state.conflicted_actions do
|
140
|
+
sources = {}
|
141
|
+
for action in actions do
|
142
|
+
if action.kind_of? ShiftAction
|
143
|
+
sources[action] = [[state]].to_set
|
144
|
+
else
|
145
|
+
handle = @productions[action.handle]
|
146
|
+
sources[action] = action.is_read_reduce ? [[state]].to_set : [].to_set
|
147
|
+
for predsucceor in state.predsucceors handle.symbols.reverse do
|
148
|
+
@follow_sources_visited = []
|
149
|
+
sources[action] |= follow_sources [predsucceor], handle.nonterminal, lookahead
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
resolve_conficts state, lookahead, sources
|
154
|
+
end
|
155
|
+
@lookahead_level = [@current_lookahead_level, @lookahead_level].max
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
def closure items
|
160
|
+
result, unmarked = items.dup, items.dup
|
161
|
+
while !unmarked.empty?
|
162
|
+
visiting = unmarked.pop
|
163
|
+
for production in @definition[visiting.dot_symbol] do
|
164
|
+
item = LRItem.new production, 0
|
165
|
+
[result, unmarked].each {|x| x << item} unless result.include? item
|
166
|
+
end unless visiting.handle? || visiting.dot_symbol.is_terminal
|
167
|
+
end
|
168
|
+
result
|
169
|
+
end
|
170
|
+
|
171
|
+
def goto items, symbol
|
172
|
+
result = State.new []
|
173
|
+
items.each {|item| result << LRItem.new(item.production, item.position + 1) if item.dot_symbol == symbol}
|
174
|
+
closure result
|
175
|
+
end
|
176
|
+
|
177
|
+
def compute_follow_set state, nonterminal = nil
|
178
|
+
if state.kind_of? Configuration
|
179
|
+
config = state
|
180
|
+
state, nonterminal = config.state, config.symbol
|
181
|
+
else
|
182
|
+
config = Configuration.new state, nonterminal
|
183
|
+
end
|
184
|
+
if START == nonterminal
|
185
|
+
@lookahead_indicitor[config] = 65535
|
186
|
+
@lookahead_result[config] = [EOF]
|
187
|
+
else
|
188
|
+
@lookahead_config_stack.push config
|
189
|
+
@lookahead_indicitor[config] = (d = @lookahead_config_stack.length)
|
190
|
+
@lookahead_result[config] = read_set state, nonterminal
|
191
|
+
each_included_by state, nonterminal do |p, b|
|
192
|
+
new_config = Configuration.new p, b
|
193
|
+
compute_follow_set new_config unless @lookahead_indicitor[new_config]
|
194
|
+
@lookahead_indicitor[config] = [@lookahead_indicitor[config], @lookahead_indicitor[new_config]].min
|
195
|
+
@lookahead_result[config] |= @lookahead_result[new_config]
|
196
|
+
end
|
197
|
+
connected = nil
|
198
|
+
until connected == config
|
199
|
+
connected = @lookahead_config_stack.pop
|
200
|
+
@lookahead_result[connected] = @lookahead_result[config].dup
|
201
|
+
@lookahead_indicitor[connected] = 65535
|
202
|
+
end if @lookahead_indicitor[config] == d
|
203
|
+
end
|
204
|
+
@lookahead_result[config]
|
205
|
+
end
|
206
|
+
|
207
|
+
def follow_sources stack, nonterminal, lookahead
|
208
|
+
top = stack.last
|
209
|
+
if stack.length == 1
|
210
|
+
config = Configuration.new top, nonterminal
|
211
|
+
@follow_sources_visited.include?(config) and return []
|
212
|
+
@follow_sources_visited |= [config]
|
213
|
+
end
|
214
|
+
stacks = [].to_set
|
215
|
+
if q_index = top.goto(nonterminal)
|
216
|
+
q = @states[q_index]
|
217
|
+
stacks = [stack + [q]].to_set if q.direct_read.include?(lookahead)
|
218
|
+
end
|
219
|
+
each_read_by(top, nonterminal) {|q, y| stacks |= follow_sources stack+[q], y, lookahead unless y.is_terminal}
|
220
|
+
top.grep_each(lambda {|x| x.kernel? && !x.start? && x.dot_symbol == nonterminal}) do |item|
|
221
|
+
c = item.production.nonterminal
|
222
|
+
if item.position < stack.length
|
223
|
+
stacks |= follow_sources stack.slice(0..-item.position-1), c, lookahead
|
224
|
+
else
|
225
|
+
first_part = item.production.symbols.slice 0..-stack.length-1
|
226
|
+
stack[0].predsucceors(first_part).reverse.each {|q| stacks |= follow_sources [q], c, lookahead }
|
227
|
+
end
|
228
|
+
end
|
229
|
+
stacks
|
230
|
+
end
|
231
|
+
|
232
|
+
def resolve_conficts state, lookahead, sources
|
233
|
+
@current_lookahead_level += 1
|
234
|
+
@states << (lookahead_state = State.new([]))
|
235
|
+
state[lookahead].replace [ShiftAction.new((@states.length - 1), true)]
|
236
|
+
for action, stacks in sources
|
237
|
+
for stk in stacks
|
238
|
+
raise 'not LALR(n)' if @stack_seen.include? stk
|
239
|
+
@stack_seen << stk
|
240
|
+
for a in next_lookaheads stk, lookahead do
|
241
|
+
lookahead_state[a] << action
|
242
|
+
end
|
243
|
+
end
|
244
|
+
end
|
245
|
+
for next_lookahead, actions in lookahead_state.conflicted_actions
|
246
|
+
new_sources = {}
|
247
|
+
for action in actions do
|
248
|
+
new_sources[action] = [].to_set
|
249
|
+
for stk in source[action] do
|
250
|
+
@follow_sources_visited = []
|
251
|
+
new_sources[action] |= follow_sources stk, lookahead, next_lookahead
|
252
|
+
end
|
253
|
+
resolve_conficts lookahead_state, next_lookahead, new_sources
|
254
|
+
end
|
255
|
+
end
|
256
|
+
end
|
257
|
+
|
258
|
+
def next_lookaheads stack, lookahead
|
259
|
+
EOF == lookahead and return [EOF]
|
260
|
+
top = stack.last
|
261
|
+
lookaheads = read_set top, lookahead
|
262
|
+
top.grep_each(lambda {|x| x.kernel? && !x.start? && x.dot_symbol == lookahead}) do |item|
|
263
|
+
c = item.production.nonterminal
|
264
|
+
if item.position < stack.length
|
265
|
+
lookaheads |= next_lookaheads stack.slice(0..-item.position-1), c
|
266
|
+
else
|
267
|
+
first_part = item.production.symbols.slice 0..-stack.length-1
|
268
|
+
stack[0].predsucceors(first_part).reverse.each {|q| lookaheads |= compute_follow_set q, c }
|
269
|
+
end
|
270
|
+
end
|
271
|
+
lookaheads
|
272
|
+
end
|
273
|
+
|
274
|
+
def read_set state, symbol
|
275
|
+
result = []
|
276
|
+
each_read_by(state, symbol) {|q, y| result |= @first_sets[y] }
|
277
|
+
result
|
278
|
+
end
|
279
|
+
|
280
|
+
def each_read_by state, symbol
|
281
|
+
index = state.goto symbol
|
282
|
+
for item in @states[index]
|
283
|
+
for symbol in item.second_part
|
284
|
+
yield state, symbol
|
285
|
+
nullable? symbol or break
|
286
|
+
end
|
287
|
+
end if index
|
288
|
+
end
|
289
|
+
|
290
|
+
def each_included_by state, nonterminal
|
291
|
+
for item in state
|
292
|
+
symbols = item.production.symbols
|
293
|
+
symbols.reverse.each_with_index do |symbol, index|
|
294
|
+
first_part = symbols.slice 0, symbols.length - index - 1
|
295
|
+
state.predsucceors(first_part.reverse).each {|s| yield s, item.production.nonterminal} if nonterminal == symbol
|
296
|
+
nullable? symbol or break
|
297
|
+
end
|
298
|
+
end if state
|
299
|
+
end
|
300
|
+
|
301
|
+
def default_action state
|
302
|
+
if !state.empty?
|
303
|
+
handle = nil
|
304
|
+
for x in state
|
305
|
+
p = x.production
|
306
|
+
handle = x if x.handle? && p.nonterminal != START && (!handle || handle.production.symbols.length > p.symbols.length)
|
307
|
+
end
|
308
|
+
default_action = handle ? ReduceAction.new(@productions.index(handle.production), false) : nil
|
309
|
+
else
|
310
|
+
candidates = state.actions.values.inject [] do |candidates, actions|
|
311
|
+
candidates |= actions.find_all {|x| x.kind_of?(Aurum::ReduceAction) && !x.is_read_reduce }
|
312
|
+
end
|
313
|
+
default_action = candidates.min {|x, y| @productions[x].handle.length <=> @productions[y].handle.length}
|
314
|
+
end
|
315
|
+
default_action
|
316
|
+
end
|
317
|
+
|
318
|
+
class PrecedenceTable
|
319
|
+
def initialize precedences, associativities
|
320
|
+
@precedence_table, @associativities = {}, associativities
|
321
|
+
precedences.reverse.each_with_index do |terminals, index|
|
322
|
+
for terminal in terminals
|
323
|
+
@precedence_table[terminal] = index
|
324
|
+
end
|
325
|
+
end
|
326
|
+
end
|
327
|
+
|
328
|
+
def operator? symbol
|
329
|
+
@precedence_table.has_key? symbol
|
330
|
+
end
|
331
|
+
|
332
|
+
def compare current, lookahead
|
333
|
+
if @precedence_table[current] == @precedence_table[lookahead]
|
334
|
+
return (@associativities[:right].include?(current) &&
|
335
|
+
@associativities[:right].include?(lookahead)) ? -1 : 1
|
336
|
+
end
|
337
|
+
@precedence_table[current] <=> @precedence_table[lookahead]
|
338
|
+
end
|
339
|
+
end
|
340
|
+
|
341
|
+
Production.class_eval do
|
342
|
+
attr_accessor :action
|
343
|
+
def operator
|
344
|
+
symbols.reverse.find {|x| x.is_terminal}
|
345
|
+
end
|
346
|
+
end
|
347
|
+
|
348
|
+
class State < Array
|
349
|
+
attr_reader :actions
|
350
|
+
def initialize elements
|
351
|
+
super elements
|
352
|
+
@actions, @predsucceors = {}, []
|
353
|
+
end
|
354
|
+
|
355
|
+
def [] symbol
|
356
|
+
@actions[symbol] = Set.new([]) unless @actions.has_key? symbol
|
357
|
+
@actions[symbol]
|
358
|
+
end
|
359
|
+
|
360
|
+
def consistent?
|
361
|
+
handles, kernels = 0, 0
|
362
|
+
for item in self do
|
363
|
+
handles += 1 if item.handle?
|
364
|
+
kernels += 1 if item.kernel?
|
365
|
+
handles > 1 || (handles == 1 && handles != kernels) and return false
|
366
|
+
end
|
367
|
+
true
|
368
|
+
end
|
369
|
+
|
370
|
+
def conflicted?
|
371
|
+
!consistent? && @actions.any? {|symbol, actions| actions.length > 1}
|
372
|
+
end
|
373
|
+
|
374
|
+
def conflicted_actions
|
375
|
+
@actions.find_all {|symbol, actions| actions.length > 1}
|
376
|
+
end
|
377
|
+
|
378
|
+
def only_shift? symbol
|
379
|
+
!self[symbol].empty? && @actions[symbol].all? {|x| x.kind_of? ShiftAction}
|
380
|
+
end
|
381
|
+
|
382
|
+
def read_reduce
|
383
|
+
length == 1 && first.handle? ? first.production : nil
|
384
|
+
end
|
385
|
+
|
386
|
+
def goto symbol
|
387
|
+
shift = self[symbol].find {|x| x.kind_of? Aurum::ShiftAction }
|
388
|
+
shift.state if shift
|
389
|
+
end
|
390
|
+
|
391
|
+
def predsucceors(symbols = nil)
|
392
|
+
symbols or return @predsucceors
|
393
|
+
result = [self]
|
394
|
+
for symbol in symbols
|
395
|
+
new_result = result.inject [] do |sum, x|
|
396
|
+
sum | x.predsucceors.find_all {|predsucceor| predsucceor.any? {|item| item.dot_symbol == symbol}}
|
397
|
+
end
|
398
|
+
result.replace new_result
|
399
|
+
end
|
400
|
+
result
|
401
|
+
end
|
402
|
+
|
403
|
+
def direct_read
|
404
|
+
inject [] do |result, item|
|
405
|
+
item.dot_symbol ? result | [item.dot_symbol] : result
|
406
|
+
end
|
407
|
+
end
|
408
|
+
|
409
|
+
def == other
|
410
|
+
other.kind_of? State or return false
|
411
|
+
equal? other and return true
|
412
|
+
length == other.length or return false
|
413
|
+
all? {|x| other.include? x}
|
414
|
+
end
|
415
|
+
end
|
416
|
+
|
417
|
+
LRItem, Configuration = Struct.new(:production, :position), Struct.new(:state, :symbol)
|
418
|
+
|
419
|
+
LRItem.class_eval do
|
420
|
+
def dot_symbol
|
421
|
+
production.symbols[position]
|
422
|
+
end
|
423
|
+
|
424
|
+
def start?
|
425
|
+
production.nonterminal == START
|
426
|
+
end
|
427
|
+
|
428
|
+
def handle?
|
429
|
+
position >= production.symbols.length
|
430
|
+
end
|
431
|
+
|
432
|
+
def kernel?
|
433
|
+
handle? || position != 0
|
434
|
+
end
|
435
|
+
|
436
|
+
def first_part
|
437
|
+
production.symbols.slice(0, position)
|
438
|
+
end
|
439
|
+
|
440
|
+
def second_part
|
441
|
+
handle? ? [] : production.symbols.slice(position..-1)
|
442
|
+
end
|
443
|
+
end
|
444
|
+
end
|
445
|
+
end
|