aurum 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/example/expression/expression.rb +29 -0
- data/lib/aurum.rb +10 -0
- data/lib/aurum/engine.rb +173 -0
- data/lib/aurum/grammar.rb +234 -0
- data/lib/aurum/lexical_table_generator.rb +423 -0
- data/lib/aurum/parsing_table_generator.rb +445 -0
- data/test/engine/lexer_test.rb +52 -0
- data/test/engine/semantic_attributes_test.rb +15 -0
- data/test/grammar_definition/character_class_definition_test.rb +28 -0
- data/test/grammar_definition/grammar_definition_test.rb +54 -0
- data/test/grammar_definition/lexical_definition_test.rb +56 -0
- data/test/grammar_definition/operator_precedence_definition_test.rb +35 -0
- data/test/grammar_definition/production_definition_test.rb +60 -0
- data/test/lexical_table_generator/automata_test.rb +74 -0
- data/test/lexical_table_generator/character_set_test.rb +73 -0
- data/test/lexical_table_generator/interval_test.rb +36 -0
- data/test/lexical_table_generator/pattern_test.rb +109 -0
- data/test/lexical_table_generator/subset_determinizer_test.rb +19 -0
- data/test/lexical_table_generator/table_generator_test.rb +126 -0
- data/test/parsing_table_generator/augmented_grammar_test.rb +45 -0
- data/test/parsing_table_generator/lalr_n_computation_test.rb +89 -0
- data/test/parsing_table_generator/lr_0_automata_test.rb +91 -0
- data/test/parsing_table_generator/lr_item_test.rb +33 -0
- data/test/parsing_table_generator/parsing_table_state_test.rb +39 -0
- data/test/parsing_table_generator/precedence_table_test.rb +28 -0
- data/test/parsing_table_generator/production_test.rb +9 -0
- data/test/test_helper.rb +103 -0
- metadata +78 -0
@@ -0,0 +1,423 @@
|
|
1
|
+
module Aurum
|
2
|
+
RecognizeTokenAction, ChangeStateAction, UserDefinedAction = Struct.new(:token), Struct.new(:state), Struct.new(:action)
|
3
|
+
RecognizeTokenAndChangeStateAction = Struct.new :token, :state
|
4
|
+
IgnoreAction = RecognizeTokenAction.new '$ignore'
|
5
|
+
|
6
|
+
class LexicalTableGenerator
|
7
|
+
attr_reader :lexical_states
|
8
|
+
def initialize specification
|
9
|
+
@specification, @accept_states = specification, {}
|
10
|
+
@lexical_states = @specification.keys - [:all]
|
11
|
+
@patterns_for_all = specification[:all] ? specification[:all] : {}
|
12
|
+
end
|
13
|
+
|
14
|
+
def lexical_table
|
15
|
+
construct_automata
|
16
|
+
make_initial_partitions
|
17
|
+
refine_partitions
|
18
|
+
@partitions.size < @lexical_automata.table.size ? construct_minimize_automata : [@lexical_automata.table, @accept_states]
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
def construct_automata
|
23
|
+
automata, accepts = Automata.new(1), {}, {}
|
24
|
+
@lexical_states.each_with_index do |lexcial_state, index|
|
25
|
+
lexical_state_start = automata.new_state
|
26
|
+
automata.connect 0, CharacterSet::Interval.new(-index - 1).to_char_set, lexical_state_start
|
27
|
+
@patterns_for_all.merge(@specification[lexcial_state]).each do |pattern, action|
|
28
|
+
pattern_start = automata.merge! pattern.automata
|
29
|
+
automata.connect lexical_state_start, Epsilon, pattern_start
|
30
|
+
accepts[pattern_start + pattern.accept] = action
|
31
|
+
end
|
32
|
+
end
|
33
|
+
@lexical_automata, deterministic_accepts = automata.determinize accepts.keys
|
34
|
+
deterministic_accepts.each {|d, n| @accept_states[d] = n.inject([]){|r, x| r << accepts[x]}}
|
35
|
+
end
|
36
|
+
|
37
|
+
def make_initial_partitions
|
38
|
+
partitions = {}
|
39
|
+
@accept_states.each do |state, action|
|
40
|
+
partitions[action] = [] unless partitions.has_key? action
|
41
|
+
partitions[action] << state
|
42
|
+
end
|
43
|
+
@partitions = [[0], @lexical_automata.all_states - @accept_states.keys - [0]] + partitions.values
|
44
|
+
@partitions.delete []
|
45
|
+
end
|
46
|
+
|
47
|
+
def refine_partitions
|
48
|
+
reverse_automata, working_list = @lexical_automata.reverse, @partitions.dup
|
49
|
+
until working_list.empty?
|
50
|
+
reverse_automata.alphabet(working_list.pop) do |ia, symbols|
|
51
|
+
@partitions.grep_each 'x.size > 1' do |r|
|
52
|
+
r1, r2 = r & ia, r - ia
|
53
|
+
unless r2.empty? || r2 == r
|
54
|
+
replace @partitions, r => [r1, r2]
|
55
|
+
if working_list.include? r
|
56
|
+
replace working_list, r => [r1, r2]
|
57
|
+
else
|
58
|
+
working_list << (r1.size <= r2.size ? r1 : r2)
|
59
|
+
end
|
60
|
+
working_list.uniq!
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def construct_minimize_automata
|
68
|
+
automata, accepts = Automata.new(@partitions.size), {}
|
69
|
+
choose_representatives do |representative, index|
|
70
|
+
@lexical_automata.table[representative].each do |transition|
|
71
|
+
automata.connect index, transition.symbols, partition_contains(transition.destination)
|
72
|
+
end
|
73
|
+
accepts[index] = @accept_states[representative] if @accept_states.has_key? representative
|
74
|
+
end
|
75
|
+
return automata.table, accepts
|
76
|
+
end
|
77
|
+
|
78
|
+
def choose_representatives
|
79
|
+
@partitions.each_with_index {|partition, index| yield partition.first, index}
|
80
|
+
end
|
81
|
+
|
82
|
+
def partition_contains state
|
83
|
+
@partitions.each_with_index {|partition, index| return index if partition.include? state}
|
84
|
+
end
|
85
|
+
|
86
|
+
def replace array, replacements
|
87
|
+
replacements.each do |old, new|
|
88
|
+
array.delete old
|
89
|
+
new.each {|x| array << x}
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
class Pattern
|
95
|
+
attr_reader :automata, :accept
|
96
|
+
def self.from_string literal
|
97
|
+
automata, index = Automata.new(literal.length + 1), 0
|
98
|
+
literal.each_byte {|byte|automata.connect index, CharacterSet::Interval.new(byte).to_char_set, (index += 1)}
|
99
|
+
new automata, index
|
100
|
+
end
|
101
|
+
|
102
|
+
def self.from_char_set set
|
103
|
+
automata = Automata.new 2
|
104
|
+
automata.connect 0, set, 1
|
105
|
+
new automata, 1
|
106
|
+
end
|
107
|
+
|
108
|
+
def self.concat *patterns
|
109
|
+
automata, index = Automata.new, 0
|
110
|
+
patterns.each do |pattern|
|
111
|
+
index = automata.connect(index, Epsilon, automata.merge!(pattern.automata)) + pattern.accept
|
112
|
+
end
|
113
|
+
new automata, index
|
114
|
+
end
|
115
|
+
|
116
|
+
def initialize automata, accept
|
117
|
+
@automata, @accept = automata, accept
|
118
|
+
end
|
119
|
+
|
120
|
+
def kleene
|
121
|
+
kleene_automata = @automata.dup
|
122
|
+
kleene_automata.connect 0, Epsilon, @accept
|
123
|
+
kleene_automata.connect @accept, Epsilon, 0
|
124
|
+
Pattern.new kleene_automata, @accept
|
125
|
+
end
|
126
|
+
alias :zero_or_more :kleene
|
127
|
+
|
128
|
+
def iterate
|
129
|
+
iterate_automata = @automata.dup
|
130
|
+
iterate_automata.connect @accept, Epsilon, 0
|
131
|
+
Pattern.new iterate_automata, @accept
|
132
|
+
end
|
133
|
+
alias :one_or_more :iterate
|
134
|
+
|
135
|
+
def opt
|
136
|
+
opt_automata = @automata.dup
|
137
|
+
opt_automata.connect 0, Epsilon, @accept
|
138
|
+
Pattern.new opt_automata, @accept
|
139
|
+
end
|
140
|
+
alias :zero_or_one :opt
|
141
|
+
|
142
|
+
def negate
|
143
|
+
deterministic, accepts = automata.determinize [@accept]
|
144
|
+
sink = deterministic.new_state
|
145
|
+
deterministic.connect sink, CharacterSet.any, sink
|
146
|
+
sink.times do |state|
|
147
|
+
joint = CharacterSet.any
|
148
|
+
deterministic.table[state].each {|tran| joint.delete tran.symbols}
|
149
|
+
deterministic.connect state, joint, sink unless joint.empty?
|
150
|
+
end
|
151
|
+
accept = deterministic.new_state
|
152
|
+
accept.times {|state| deterministic.connect state, Epsilon, accept unless accepts.include? state }
|
153
|
+
Pattern.new deterministic, accept
|
154
|
+
end
|
155
|
+
alias :not :negate
|
156
|
+
|
157
|
+
def [] least, most = least
|
158
|
+
Pattern.concat *([self] * least + [self.opt] * (most-least))
|
159
|
+
end
|
160
|
+
|
161
|
+
def | other
|
162
|
+
automata = Automata.new 2
|
163
|
+
[self, other].each do |pattern|
|
164
|
+
automata.connect automata.connect(0, Epsilon, automata.merge!(pattern.automata)) + pattern.accept, Epsilon, 1
|
165
|
+
end
|
166
|
+
Pattern.new automata, 1
|
167
|
+
end
|
168
|
+
|
169
|
+
def ~
|
170
|
+
any = Pattern.from_char_set(CharacterSet.any).kleene
|
171
|
+
return Pattern.concat(Pattern.concat(any, self, any).negate, self)
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
class Automata
|
176
|
+
attr_reader :table
|
177
|
+
Transition = Struct.new(:symbols, :destination)
|
178
|
+
|
179
|
+
def initialize(table=[])
|
180
|
+
case table
|
181
|
+
when Array
|
182
|
+
@table = table
|
183
|
+
when Fixnum
|
184
|
+
@table = []
|
185
|
+
table.times {@table << []}
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
189
|
+
def connect start, symbols, destination
|
190
|
+
@table[start] << Transition.new(symbols, destination)
|
191
|
+
destination
|
192
|
+
end
|
193
|
+
|
194
|
+
def merge! other
|
195
|
+
start = @table.length
|
196
|
+
other_table = other.instance_eval{@table}
|
197
|
+
other_table.each do |trans|
|
198
|
+
@table << []
|
199
|
+
trans.each {|tran| @table.last << Transition.new(tran.symbols, tran.destination + start)}
|
200
|
+
end
|
201
|
+
start
|
202
|
+
end
|
203
|
+
|
204
|
+
def reverse
|
205
|
+
reverse = []
|
206
|
+
@table.length.times {reverse << []}
|
207
|
+
@table.each_with_index do |trans, index|
|
208
|
+
trans.each {|tran| reverse[tran.destination] << Transition.new(tran.symbols, index)}
|
209
|
+
end
|
210
|
+
Automata.new reverse
|
211
|
+
end
|
212
|
+
|
213
|
+
def dup
|
214
|
+
dup_table = []
|
215
|
+
@table.each {|x| dup_table << x.dup}
|
216
|
+
Automata.new dup_table
|
217
|
+
end
|
218
|
+
|
219
|
+
def alphabet states
|
220
|
+
points = states.inject([]) do |result, state|
|
221
|
+
@table[state].inject(result){|r, s|r += s.symbols.to_points s.destination}
|
222
|
+
end
|
223
|
+
points.sort! do |x, y|
|
224
|
+
x.char == y.char ? (x.is_start ? (y.is_start ? 0 : -1) : (y.is_start ? 1 : 0)) : (x.char < y.char ? -1 : 1)
|
225
|
+
end
|
226
|
+
reachable_states = []
|
227
|
+
points.each_with_index do |point, index|
|
228
|
+
if point.is_start
|
229
|
+
reachable_states << point.destination
|
230
|
+
else
|
231
|
+
reachable_states.delete point.destination
|
232
|
+
next if reachable_states.empty?
|
233
|
+
end
|
234
|
+
symbols = range(point, points[index + 1])
|
235
|
+
yield reachable_states.uniq, symbols if symbols
|
236
|
+
end
|
237
|
+
end
|
238
|
+
|
239
|
+
def determinize accepts
|
240
|
+
SubsetDeterminizer.new(self, accepts).determinize
|
241
|
+
end
|
242
|
+
|
243
|
+
def new_state
|
244
|
+
@table << []
|
245
|
+
@table.length - 1
|
246
|
+
end
|
247
|
+
|
248
|
+
def all_states
|
249
|
+
(0..table.length - 1).to_a
|
250
|
+
end
|
251
|
+
|
252
|
+
private
|
253
|
+
def range point_a, point_b
|
254
|
+
start_point = point_a.is_start ? point_a.char : (point_a.char + 1)
|
255
|
+
end_point = point_b.is_start ? point_b.char - 1 : point_b.char
|
256
|
+
start_point > end_point ? nil : CharacterSet::Interval.new(start_point, end_point).to_char_set
|
257
|
+
end
|
258
|
+
end
|
259
|
+
|
260
|
+
class SubsetDeterminizer
|
261
|
+
def initialize nondeterministic, accepts
|
262
|
+
@unmarked, @dstates, @accepts = [], [], accepts
|
263
|
+
@nondeterministic, @deterministic, @accept_states = nondeterministic, Automata.new, {}
|
264
|
+
unmark closure([0])
|
265
|
+
end
|
266
|
+
|
267
|
+
def determinize
|
268
|
+
until @unmarked.empty?
|
269
|
+
start = @unmarked.pop
|
270
|
+
@nondeterministic.alphabet(@dstates[start]) do |states, symbols|
|
271
|
+
destination_state = closure(states)
|
272
|
+
destination = unmark destination_state unless destination = @dstates.index(destination_state)
|
273
|
+
@deterministic.connect start, symbols, destination
|
274
|
+
end
|
275
|
+
end
|
276
|
+
return @deterministic, @accept_states
|
277
|
+
end
|
278
|
+
private
|
279
|
+
def unmark states
|
280
|
+
@dstates << states
|
281
|
+
@unmarked.push @deterministic.new_state
|
282
|
+
accepts = states.find_all {|x| @accepts.include? x}
|
283
|
+
@accept_states[@unmarked.last] = accepts unless accepts.empty?
|
284
|
+
@unmarked.last
|
285
|
+
end
|
286
|
+
|
287
|
+
def closure states
|
288
|
+
closure, unvisited = states.dup, states.dup
|
289
|
+
until unvisited.empty? do
|
290
|
+
@nondeterministic.table[unvisited.pop].each do |tran|
|
291
|
+
if tran.symbols == Epsilon && !closure.include?(tran.destination)
|
292
|
+
closure << tran.destination
|
293
|
+
unvisited << tran.destination
|
294
|
+
end
|
295
|
+
end
|
296
|
+
end
|
297
|
+
closure.sort!
|
298
|
+
end
|
299
|
+
end
|
300
|
+
|
301
|
+
class CharacterSet
|
302
|
+
attr_reader :intervals
|
303
|
+
def self.any
|
304
|
+
Interval.new(0, 65535).to_char_set
|
305
|
+
end
|
306
|
+
|
307
|
+
def + other
|
308
|
+
result = self.dup
|
309
|
+
if (other.kind_of? CharacterSet)
|
310
|
+
for interval in other.intervals
|
311
|
+
result.add_interval interval.first, interval.last
|
312
|
+
end
|
313
|
+
else
|
314
|
+
other.to_s.each_byte do |byte|
|
315
|
+
result.add_interval byte
|
316
|
+
end
|
317
|
+
end
|
318
|
+
result
|
319
|
+
end
|
320
|
+
|
321
|
+
def - other
|
322
|
+
result = self.dup
|
323
|
+
if (other.kind_of? CharacterSet)
|
324
|
+
for interval in other.intervals
|
325
|
+
result.delete_interval interval.first, interval.last
|
326
|
+
end
|
327
|
+
else
|
328
|
+
other.to_s.each_byte do |byte|
|
329
|
+
result.delete_interval byte
|
330
|
+
end
|
331
|
+
end
|
332
|
+
result
|
333
|
+
end
|
334
|
+
|
335
|
+
def initialize *intervals
|
336
|
+
@intervals = intervals
|
337
|
+
end
|
338
|
+
|
339
|
+
def << obj
|
340
|
+
obj.kind_of?(Range) ? add_interval(obj.first, obj.last) : obj.to_s.each_byte {|x| add_interval x}
|
341
|
+
end
|
342
|
+
|
343
|
+
def delete obj
|
344
|
+
case obj
|
345
|
+
when Range
|
346
|
+
delete_interval obj.first, obj.last
|
347
|
+
when Aurum::CharacterSet
|
348
|
+
obj.intervals.each {|interval| delete_interval interval.first, interval.last}
|
349
|
+
else
|
350
|
+
obj.to_s.each_byte {|x| delete_interval x}
|
351
|
+
end
|
352
|
+
end
|
353
|
+
|
354
|
+
def include? char
|
355
|
+
@intervals.any? {|x| x.include? char}
|
356
|
+
end
|
357
|
+
|
358
|
+
def empty?
|
359
|
+
return @intervals.empty?
|
360
|
+
end
|
361
|
+
|
362
|
+
def to_points destination
|
363
|
+
@intervals.inject [] do |points, interval|
|
364
|
+
points << Point.new(interval.first, true, destination)
|
365
|
+
points << Point.new(interval.last, false, destination)
|
366
|
+
end
|
367
|
+
end
|
368
|
+
|
369
|
+
def dup
|
370
|
+
intervals = []
|
371
|
+
for interval in @intervals
|
372
|
+
intervals << interval.dup
|
373
|
+
end
|
374
|
+
CharacterSet.new *intervals
|
375
|
+
end
|
376
|
+
|
377
|
+
protected
|
378
|
+
def add_interval first, last = first
|
379
|
+
interval = Interval.new first, last
|
380
|
+
@intervals << interval unless @intervals.any? {|x| x.merge! interval}
|
381
|
+
end
|
382
|
+
|
383
|
+
def delete_interval first, last = first
|
384
|
+
interval = Interval.new first, last
|
385
|
+
return unless to_be_replaced = @intervals.find {|x| x.include?(interval.first) || x.include?(interval.last)}
|
386
|
+
@intervals.delete to_be_replaced
|
387
|
+
add_new_interval to_be_replaced.first, interval.first - 1
|
388
|
+
add_new_interval interval.last + 1, to_be_replaced.last
|
389
|
+
end
|
390
|
+
|
391
|
+
def add_new_interval first, last
|
392
|
+
@intervals << Interval.new(first, last) if first <= last
|
393
|
+
end
|
394
|
+
|
395
|
+
Interval, Point = Struct.new(:first, :last), Struct.new(:char, :is_start, :destination)
|
396
|
+
|
397
|
+
Interval.class_eval do
|
398
|
+
def initialize first, last = first
|
399
|
+
super first, last
|
400
|
+
end
|
401
|
+
|
402
|
+
def include? char
|
403
|
+
char = char[0] if char.kind_of? String
|
404
|
+
self.first <= char && char <= self.last
|
405
|
+
end
|
406
|
+
|
407
|
+
def merge! other
|
408
|
+
if include?(other.first) || include?(other.last) || other.first - self.last == 1 || self.first - other.last == 1
|
409
|
+
self.first = [self.first, other.first].min
|
410
|
+
self.last = [self.last, other.last].max
|
411
|
+
return true;
|
412
|
+
end
|
413
|
+
false
|
414
|
+
end
|
415
|
+
|
416
|
+
def to_char_set
|
417
|
+
CharacterSet.new self
|
418
|
+
end
|
419
|
+
end
|
420
|
+
end
|
421
|
+
|
422
|
+
Epsilon = CharacterSet.new
|
423
|
+
end
|
@@ -0,0 +1,445 @@
|
|
1
|
+
require 'set'
|
2
|
+
|
3
|
+
module Aurum
|
4
|
+
Symbol, Production= Struct.new(:name, :is_terminal), Struct.new(:nonterminal, :symbols)
|
5
|
+
ShiftAction, ReduceAction = Struct.new(:state, :is_lookahead_shift), Struct.new(:handle, :is_read_reduce)
|
6
|
+
|
7
|
+
START, EOF = Symbol.new('$start', false), Symbol.new('$eof', true)
|
8
|
+
|
9
|
+
class ParsingTableGenerator
|
10
|
+
attr_reader :symbols, :productions
|
11
|
+
|
12
|
+
DEFAULT_ASSOCIATIVITIES = {:left => [], :right => []}
|
13
|
+
|
14
|
+
def initialize(definition, precedences = [], associativities = DEFAULT_ASSOCIATIVITIES)
|
15
|
+
@definition = definition
|
16
|
+
@precedence_table = PrecedenceTable.new precedences, associativities
|
17
|
+
end
|
18
|
+
|
19
|
+
def start_from start
|
20
|
+
@start_production = Aurum::Production.new START, [start]
|
21
|
+
@symbols, @productions, @nullables, @first_sets = [], [@start_production], [], {START => []}
|
22
|
+
find_all_used_symbols_and_productions start
|
23
|
+
compute_nullable_symbols
|
24
|
+
compute_first_sets
|
25
|
+
self
|
26
|
+
end
|
27
|
+
|
28
|
+
def parsing_table
|
29
|
+
construct_LR0_automata
|
30
|
+
if @states.any? {|x| !x.consistent? }
|
31
|
+
compute_LALR_1_lookahead
|
32
|
+
compute_LALR_n_lookahead if @states.any? {|x| x.conflicted?}
|
33
|
+
end
|
34
|
+
parsing_table = []
|
35
|
+
for state in @states do
|
36
|
+
actions = Hash.new default_action(state)
|
37
|
+
state.actions.each {|symbol, action| actions[symbol] = action.to_a.first}
|
38
|
+
parsing_table << actions
|
39
|
+
end
|
40
|
+
return parsing_table, @lookahead_level
|
41
|
+
end
|
42
|
+
|
43
|
+
private
|
44
|
+
def find_all_used_symbols_and_productions start
|
45
|
+
unvisited = [start]
|
46
|
+
while !unvisited.empty?
|
47
|
+
visiting = unvisited.pop
|
48
|
+
@symbols << visiting
|
49
|
+
@first_sets[visiting] = visiting.is_terminal ? [visiting] : []
|
50
|
+
for production in @definition[visiting] do
|
51
|
+
@productions << production
|
52
|
+
for symbol in production.symbols do
|
53
|
+
unvisited << symbol unless @symbols.include? symbol
|
54
|
+
end
|
55
|
+
end unless visiting.is_terminal
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def compute_nullable_symbols
|
60
|
+
begin
|
61
|
+
changed = false
|
62
|
+
for production in @productions
|
63
|
+
if production.symbols.all? {|s| nullable? s}
|
64
|
+
@nullables << production.nonterminal
|
65
|
+
changed = true
|
66
|
+
end unless nullable? production.nonterminal
|
67
|
+
end
|
68
|
+
end while changed
|
69
|
+
end
|
70
|
+
|
71
|
+
def compute_first_sets
|
72
|
+
begin
|
73
|
+
changed = false
|
74
|
+
for production in @productions do
|
75
|
+
set = @first_sets[production.nonterminal]
|
76
|
+
for symbol in production.symbols do
|
77
|
+
changed |= set.length != set.replace(set | @first_sets[symbol]).length
|
78
|
+
break unless nullable? symbol
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end while changed
|
82
|
+
end
|
83
|
+
|
84
|
+
def nullable? symbol
|
85
|
+
@nullables.include? symbol
|
86
|
+
end
|
87
|
+
|
88
|
+
def construct_LR0_automata
|
89
|
+
@lookahead_level = 0
|
90
|
+
start_state = State.new closure([LRItem.new(@start_production, 0)])
|
91
|
+
@states, unvisited = [start_state], [start_state]
|
92
|
+
while !unvisited.empty?
|
93
|
+
visiting = unvisited.pop
|
94
|
+
visiting.grep_each '!x.handle?' do |item|
|
95
|
+
symbol = item.dot_symbol
|
96
|
+
new_state = goto visiting, symbol
|
97
|
+
if (read_reduce = new_state.read_reduce)
|
98
|
+
visiting[symbol] << ReduceAction.new(@productions.index(read_reduce), true)
|
99
|
+
else
|
100
|
+
if index = @states.index(new_state)
|
101
|
+
new_state = @states[index]
|
102
|
+
else
|
103
|
+
[@states, unvisited].each {|x| x << new_state}
|
104
|
+
index = @states.length - 1
|
105
|
+
end
|
106
|
+
visiting[symbol] << ShiftAction.new(index, false)
|
107
|
+
new_state.predsucceors << visiting
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
def compute_LALR_1_lookahead
|
114
|
+
@lookahead_level = 1
|
115
|
+
@lookahead_config_stack, @lookahead_indicitor, @lookahead_result = [], {}, {}
|
116
|
+
@states.grep_each '!x.consistent?' do |state|
|
117
|
+
state.grep_each 'x.handle?' do |handle|
|
118
|
+
production = handle.production
|
119
|
+
for predsucceor in state.predsucceors production.symbols.reverse
|
120
|
+
for lookahead in compute_follow_set predsucceor, production.nonterminal
|
121
|
+
if state.only_shift?(lookahead) && @precedence_table.operator?(lookahead)
|
122
|
+
if @precedence_table.compare(production.operator, lookahead) >= 0
|
123
|
+
state[lookahead].clear
|
124
|
+
state[lookahead] << ReduceAction.new(@productions.index(production), false)
|
125
|
+
end
|
126
|
+
else
|
127
|
+
state[lookahead] << ReduceAction.new(@productions.index(production), false)
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
def compute_LALR_n_lookahead
|
136
|
+
@stack_seen = []
|
137
|
+
@states.grep_each 'x.conflicted?' do |state|
|
138
|
+
@current_lookahead_level = 1
|
139
|
+
for lookahead, actions in state.conflicted_actions do
|
140
|
+
sources = {}
|
141
|
+
for action in actions do
|
142
|
+
if action.kind_of? ShiftAction
|
143
|
+
sources[action] = [[state]].to_set
|
144
|
+
else
|
145
|
+
handle = @productions[action.handle]
|
146
|
+
sources[action] = action.is_read_reduce ? [[state]].to_set : [].to_set
|
147
|
+
for predsucceor in state.predsucceors handle.symbols.reverse do
|
148
|
+
@follow_sources_visited = []
|
149
|
+
sources[action] |= follow_sources [predsucceor], handle.nonterminal, lookahead
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
resolve_conficts state, lookahead, sources
|
154
|
+
end
|
155
|
+
@lookahead_level = [@current_lookahead_level, @lookahead_level].max
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
def closure items
|
160
|
+
result, unmarked = items.dup, items.dup
|
161
|
+
while !unmarked.empty?
|
162
|
+
visiting = unmarked.pop
|
163
|
+
for production in @definition[visiting.dot_symbol] do
|
164
|
+
item = LRItem.new production, 0
|
165
|
+
[result, unmarked].each {|x| x << item} unless result.include? item
|
166
|
+
end unless visiting.handle? || visiting.dot_symbol.is_terminal
|
167
|
+
end
|
168
|
+
result
|
169
|
+
end
|
170
|
+
|
171
|
+
def goto items, symbol
|
172
|
+
result = State.new []
|
173
|
+
items.each {|item| result << LRItem.new(item.production, item.position + 1) if item.dot_symbol == symbol}
|
174
|
+
closure result
|
175
|
+
end
|
176
|
+
|
177
|
+
def compute_follow_set state, nonterminal = nil
|
178
|
+
if state.kind_of? Configuration
|
179
|
+
config = state
|
180
|
+
state, nonterminal = config.state, config.symbol
|
181
|
+
else
|
182
|
+
config = Configuration.new state, nonterminal
|
183
|
+
end
|
184
|
+
if START == nonterminal
|
185
|
+
@lookahead_indicitor[config] = 65535
|
186
|
+
@lookahead_result[config] = [EOF]
|
187
|
+
else
|
188
|
+
@lookahead_config_stack.push config
|
189
|
+
@lookahead_indicitor[config] = (d = @lookahead_config_stack.length)
|
190
|
+
@lookahead_result[config] = read_set state, nonterminal
|
191
|
+
each_included_by state, nonterminal do |p, b|
|
192
|
+
new_config = Configuration.new p, b
|
193
|
+
compute_follow_set new_config unless @lookahead_indicitor[new_config]
|
194
|
+
@lookahead_indicitor[config] = [@lookahead_indicitor[config], @lookahead_indicitor[new_config]].min
|
195
|
+
@lookahead_result[config] |= @lookahead_result[new_config]
|
196
|
+
end
|
197
|
+
connected = nil
|
198
|
+
until connected == config
|
199
|
+
connected = @lookahead_config_stack.pop
|
200
|
+
@lookahead_result[connected] = @lookahead_result[config].dup
|
201
|
+
@lookahead_indicitor[connected] = 65535
|
202
|
+
end if @lookahead_indicitor[config] == d
|
203
|
+
end
|
204
|
+
@lookahead_result[config]
|
205
|
+
end
|
206
|
+
|
207
|
+
def follow_sources stack, nonterminal, lookahead
|
208
|
+
top = stack.last
|
209
|
+
if stack.length == 1
|
210
|
+
config = Configuration.new top, nonterminal
|
211
|
+
@follow_sources_visited.include?(config) and return []
|
212
|
+
@follow_sources_visited |= [config]
|
213
|
+
end
|
214
|
+
stacks = [].to_set
|
215
|
+
if q_index = top.goto(nonterminal)
|
216
|
+
q = @states[q_index]
|
217
|
+
stacks = [stack + [q]].to_set if q.direct_read.include?(lookahead)
|
218
|
+
end
|
219
|
+
each_read_by(top, nonterminal) {|q, y| stacks |= follow_sources stack+[q], y, lookahead unless y.is_terminal}
|
220
|
+
top.grep_each(lambda {|x| x.kernel? && !x.start? && x.dot_symbol == nonterminal}) do |item|
|
221
|
+
c = item.production.nonterminal
|
222
|
+
if item.position < stack.length
|
223
|
+
stacks |= follow_sources stack.slice(0..-item.position-1), c, lookahead
|
224
|
+
else
|
225
|
+
first_part = item.production.symbols.slice 0..-stack.length-1
|
226
|
+
stack[0].predsucceors(first_part).reverse.each {|q| stacks |= follow_sources [q], c, lookahead }
|
227
|
+
end
|
228
|
+
end
|
229
|
+
stacks
|
230
|
+
end
|
231
|
+
|
232
|
+
def resolve_conficts state, lookahead, sources
|
233
|
+
@current_lookahead_level += 1
|
234
|
+
@states << (lookahead_state = State.new([]))
|
235
|
+
state[lookahead].replace [ShiftAction.new((@states.length - 1), true)]
|
236
|
+
for action, stacks in sources
|
237
|
+
for stk in stacks
|
238
|
+
raise 'not LALR(n)' if @stack_seen.include? stk
|
239
|
+
@stack_seen << stk
|
240
|
+
for a in next_lookaheads stk, lookahead do
|
241
|
+
lookahead_state[a] << action
|
242
|
+
end
|
243
|
+
end
|
244
|
+
end
|
245
|
+
for next_lookahead, actions in lookahead_state.conflicted_actions
|
246
|
+
new_sources = {}
|
247
|
+
for action in actions do
|
248
|
+
new_sources[action] = [].to_set
|
249
|
+
for stk in source[action] do
|
250
|
+
@follow_sources_visited = []
|
251
|
+
new_sources[action] |= follow_sources stk, lookahead, next_lookahead
|
252
|
+
end
|
253
|
+
resolve_conficts lookahead_state, next_lookahead, new_sources
|
254
|
+
end
|
255
|
+
end
|
256
|
+
end
|
257
|
+
|
258
|
+
def next_lookaheads stack, lookahead
|
259
|
+
EOF == lookahead and return [EOF]
|
260
|
+
top = stack.last
|
261
|
+
lookaheads = read_set top, lookahead
|
262
|
+
top.grep_each(lambda {|x| x.kernel? && !x.start? && x.dot_symbol == lookahead}) do |item|
|
263
|
+
c = item.production.nonterminal
|
264
|
+
if item.position < stack.length
|
265
|
+
lookaheads |= next_lookaheads stack.slice(0..-item.position-1), c
|
266
|
+
else
|
267
|
+
first_part = item.production.symbols.slice 0..-stack.length-1
|
268
|
+
stack[0].predsucceors(first_part).reverse.each {|q| lookaheads |= compute_follow_set q, c }
|
269
|
+
end
|
270
|
+
end
|
271
|
+
lookaheads
|
272
|
+
end
|
273
|
+
|
274
|
+
def read_set state, symbol
|
275
|
+
result = []
|
276
|
+
each_read_by(state, symbol) {|q, y| result |= @first_sets[y] }
|
277
|
+
result
|
278
|
+
end
|
279
|
+
|
280
|
+
def each_read_by state, symbol
|
281
|
+
index = state.goto symbol
|
282
|
+
for item in @states[index]
|
283
|
+
for symbol in item.second_part
|
284
|
+
yield state, symbol
|
285
|
+
nullable? symbol or break
|
286
|
+
end
|
287
|
+
end if index
|
288
|
+
end
|
289
|
+
|
290
|
+
def each_included_by state, nonterminal
|
291
|
+
for item in state
|
292
|
+
symbols = item.production.symbols
|
293
|
+
symbols.reverse.each_with_index do |symbol, index|
|
294
|
+
first_part = symbols.slice 0, symbols.length - index - 1
|
295
|
+
state.predsucceors(first_part.reverse).each {|s| yield s, item.production.nonterminal} if nonterminal == symbol
|
296
|
+
nullable? symbol or break
|
297
|
+
end
|
298
|
+
end if state
|
299
|
+
end
|
300
|
+
|
301
|
+
def default_action state
|
302
|
+
if !state.empty?
|
303
|
+
handle = nil
|
304
|
+
for x in state
|
305
|
+
p = x.production
|
306
|
+
handle = x if x.handle? && p.nonterminal != START && (!handle || handle.production.symbols.length > p.symbols.length)
|
307
|
+
end
|
308
|
+
default_action = handle ? ReduceAction.new(@productions.index(handle.production), false) : nil
|
309
|
+
else
|
310
|
+
candidates = state.actions.values.inject [] do |candidates, actions|
|
311
|
+
candidates |= actions.find_all {|x| x.kind_of?(Aurum::ReduceAction) && !x.is_read_reduce }
|
312
|
+
end
|
313
|
+
default_action = candidates.min {|x, y| @productions[x].handle.length <=> @productions[y].handle.length}
|
314
|
+
end
|
315
|
+
default_action
|
316
|
+
end
|
317
|
+
|
318
|
+
class PrecedenceTable
|
319
|
+
def initialize precedences, associativities
|
320
|
+
@precedence_table, @associativities = {}, associativities
|
321
|
+
precedences.reverse.each_with_index do |terminals, index|
|
322
|
+
for terminal in terminals
|
323
|
+
@precedence_table[terminal] = index
|
324
|
+
end
|
325
|
+
end
|
326
|
+
end
|
327
|
+
|
328
|
+
def operator? symbol
|
329
|
+
@precedence_table.has_key? symbol
|
330
|
+
end
|
331
|
+
|
332
|
+
def compare current, lookahead
|
333
|
+
if @precedence_table[current] == @precedence_table[lookahead]
|
334
|
+
return (@associativities[:right].include?(current) &&
|
335
|
+
@associativities[:right].include?(lookahead)) ? -1 : 1
|
336
|
+
end
|
337
|
+
@precedence_table[current] <=> @precedence_table[lookahead]
|
338
|
+
end
|
339
|
+
end
|
340
|
+
|
341
|
+
Production.class_eval do
|
342
|
+
attr_accessor :action
|
343
|
+
def operator
|
344
|
+
symbols.reverse.find {|x| x.is_terminal}
|
345
|
+
end
|
346
|
+
end
|
347
|
+
|
348
|
+
class State < Array
|
349
|
+
attr_reader :actions
|
350
|
+
def initialize elements
|
351
|
+
super elements
|
352
|
+
@actions, @predsucceors = {}, []
|
353
|
+
end
|
354
|
+
|
355
|
+
def [] symbol
|
356
|
+
@actions[symbol] = Set.new([]) unless @actions.has_key? symbol
|
357
|
+
@actions[symbol]
|
358
|
+
end
|
359
|
+
|
360
|
+
def consistent?
|
361
|
+
handles, kernels = 0, 0
|
362
|
+
for item in self do
|
363
|
+
handles += 1 if item.handle?
|
364
|
+
kernels += 1 if item.kernel?
|
365
|
+
handles > 1 || (handles == 1 && handles != kernels) and return false
|
366
|
+
end
|
367
|
+
true
|
368
|
+
end
|
369
|
+
|
370
|
+
def conflicted?
|
371
|
+
!consistent? && @actions.any? {|symbol, actions| actions.length > 1}
|
372
|
+
end
|
373
|
+
|
374
|
+
def conflicted_actions
|
375
|
+
@actions.find_all {|symbol, actions| actions.length > 1}
|
376
|
+
end
|
377
|
+
|
378
|
+
def only_shift? symbol
|
379
|
+
!self[symbol].empty? && @actions[symbol].all? {|x| x.kind_of? ShiftAction}
|
380
|
+
end
|
381
|
+
|
382
|
+
def read_reduce
|
383
|
+
length == 1 && first.handle? ? first.production : nil
|
384
|
+
end
|
385
|
+
|
386
|
+
def goto symbol
|
387
|
+
shift = self[symbol].find {|x| x.kind_of? Aurum::ShiftAction }
|
388
|
+
shift.state if shift
|
389
|
+
end
|
390
|
+
|
391
|
+
def predsucceors(symbols = nil)
|
392
|
+
symbols or return @predsucceors
|
393
|
+
result = [self]
|
394
|
+
for symbol in symbols
|
395
|
+
new_result = result.inject [] do |sum, x|
|
396
|
+
sum | x.predsucceors.find_all {|predsucceor| predsucceor.any? {|item| item.dot_symbol == symbol}}
|
397
|
+
end
|
398
|
+
result.replace new_result
|
399
|
+
end
|
400
|
+
result
|
401
|
+
end
|
402
|
+
|
403
|
+
def direct_read
|
404
|
+
inject [] do |result, item|
|
405
|
+
item.dot_symbol ? result | [item.dot_symbol] : result
|
406
|
+
end
|
407
|
+
end
|
408
|
+
|
409
|
+
def == other
|
410
|
+
other.kind_of? State or return false
|
411
|
+
equal? other and return true
|
412
|
+
length == other.length or return false
|
413
|
+
all? {|x| other.include? x}
|
414
|
+
end
|
415
|
+
end
|
416
|
+
|
417
|
+
LRItem, Configuration = Struct.new(:production, :position), Struct.new(:state, :symbol)
|
418
|
+
|
419
|
+
LRItem.class_eval do
|
420
|
+
def dot_symbol
|
421
|
+
production.symbols[position]
|
422
|
+
end
|
423
|
+
|
424
|
+
def start?
|
425
|
+
production.nonterminal == START
|
426
|
+
end
|
427
|
+
|
428
|
+
def handle?
|
429
|
+
position >= production.symbols.length
|
430
|
+
end
|
431
|
+
|
432
|
+
def kernel?
|
433
|
+
handle? || position != 0
|
434
|
+
end
|
435
|
+
|
436
|
+
def first_part
|
437
|
+
production.symbols.slice(0, position)
|
438
|
+
end
|
439
|
+
|
440
|
+
def second_part
|
441
|
+
handle? ? [] : production.symbols.slice(position..-1)
|
442
|
+
end
|
443
|
+
end
|
444
|
+
end
|
445
|
+
end
|