aurum 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (28) hide show
  1. data/example/expression/expression.rb +29 -0
  2. data/lib/aurum.rb +10 -0
  3. data/lib/aurum/engine.rb +173 -0
  4. data/lib/aurum/grammar.rb +234 -0
  5. data/lib/aurum/lexical_table_generator.rb +423 -0
  6. data/lib/aurum/parsing_table_generator.rb +445 -0
  7. data/test/engine/lexer_test.rb +52 -0
  8. data/test/engine/semantic_attributes_test.rb +15 -0
  9. data/test/grammar_definition/character_class_definition_test.rb +28 -0
  10. data/test/grammar_definition/grammar_definition_test.rb +54 -0
  11. data/test/grammar_definition/lexical_definition_test.rb +56 -0
  12. data/test/grammar_definition/operator_precedence_definition_test.rb +35 -0
  13. data/test/grammar_definition/production_definition_test.rb +60 -0
  14. data/test/lexical_table_generator/automata_test.rb +74 -0
  15. data/test/lexical_table_generator/character_set_test.rb +73 -0
  16. data/test/lexical_table_generator/interval_test.rb +36 -0
  17. data/test/lexical_table_generator/pattern_test.rb +109 -0
  18. data/test/lexical_table_generator/subset_determinizer_test.rb +19 -0
  19. data/test/lexical_table_generator/table_generator_test.rb +126 -0
  20. data/test/parsing_table_generator/augmented_grammar_test.rb +45 -0
  21. data/test/parsing_table_generator/lalr_n_computation_test.rb +89 -0
  22. data/test/parsing_table_generator/lr_0_automata_test.rb +91 -0
  23. data/test/parsing_table_generator/lr_item_test.rb +33 -0
  24. data/test/parsing_table_generator/parsing_table_state_test.rb +39 -0
  25. data/test/parsing_table_generator/precedence_table_test.rb +28 -0
  26. data/test/parsing_table_generator/production_test.rb +9 -0
  27. data/test/test_helper.rb +103 -0
  28. metadata +78 -0
@@ -0,0 +1,423 @@
1
+ module Aurum
2
+ RecognizeTokenAction, ChangeStateAction, UserDefinedAction = Struct.new(:token), Struct.new(:state), Struct.new(:action)
3
+ RecognizeTokenAndChangeStateAction = Struct.new :token, :state
4
+ IgnoreAction = RecognizeTokenAction.new '$ignore'
5
+
6
+ class LexicalTableGenerator
7
+ attr_reader :lexical_states
8
+ def initialize specification
9
+ @specification, @accept_states = specification, {}
10
+ @lexical_states = @specification.keys - [:all]
11
+ @patterns_for_all = specification[:all] ? specification[:all] : {}
12
+ end
13
+
14
+ def lexical_table
15
+ construct_automata
16
+ make_initial_partitions
17
+ refine_partitions
18
+ @partitions.size < @lexical_automata.table.size ? construct_minimize_automata : [@lexical_automata.table, @accept_states]
19
+ end
20
+
21
+ private
22
+ def construct_automata
23
+ automata, accepts = Automata.new(1), {}, {}
24
+ @lexical_states.each_with_index do |lexcial_state, index|
25
+ lexical_state_start = automata.new_state
26
+ automata.connect 0, CharacterSet::Interval.new(-index - 1).to_char_set, lexical_state_start
27
+ @patterns_for_all.merge(@specification[lexcial_state]).each do |pattern, action|
28
+ pattern_start = automata.merge! pattern.automata
29
+ automata.connect lexical_state_start, Epsilon, pattern_start
30
+ accepts[pattern_start + pattern.accept] = action
31
+ end
32
+ end
33
+ @lexical_automata, deterministic_accepts = automata.determinize accepts.keys
34
+ deterministic_accepts.each {|d, n| @accept_states[d] = n.inject([]){|r, x| r << accepts[x]}}
35
+ end
36
+
37
+ def make_initial_partitions
38
+ partitions = {}
39
+ @accept_states.each do |state, action|
40
+ partitions[action] = [] unless partitions.has_key? action
41
+ partitions[action] << state
42
+ end
43
+ @partitions = [[0], @lexical_automata.all_states - @accept_states.keys - [0]] + partitions.values
44
+ @partitions.delete []
45
+ end
46
+
47
+ def refine_partitions
48
+ reverse_automata, working_list = @lexical_automata.reverse, @partitions.dup
49
+ until working_list.empty?
50
+ reverse_automata.alphabet(working_list.pop) do |ia, symbols|
51
+ @partitions.grep_each 'x.size > 1' do |r|
52
+ r1, r2 = r & ia, r - ia
53
+ unless r2.empty? || r2 == r
54
+ replace @partitions, r => [r1, r2]
55
+ if working_list.include? r
56
+ replace working_list, r => [r1, r2]
57
+ else
58
+ working_list << (r1.size <= r2.size ? r1 : r2)
59
+ end
60
+ working_list.uniq!
61
+ end
62
+ end
63
+ end
64
+ end
65
+ end
66
+
67
+ def construct_minimize_automata
68
+ automata, accepts = Automata.new(@partitions.size), {}
69
+ choose_representatives do |representative, index|
70
+ @lexical_automata.table[representative].each do |transition|
71
+ automata.connect index, transition.symbols, partition_contains(transition.destination)
72
+ end
73
+ accepts[index] = @accept_states[representative] if @accept_states.has_key? representative
74
+ end
75
+ return automata.table, accepts
76
+ end
77
+
78
+ def choose_representatives
79
+ @partitions.each_with_index {|partition, index| yield partition.first, index}
80
+ end
81
+
82
+ def partition_contains state
83
+ @partitions.each_with_index {|partition, index| return index if partition.include? state}
84
+ end
85
+
86
+ def replace array, replacements
87
+ replacements.each do |old, new|
88
+ array.delete old
89
+ new.each {|x| array << x}
90
+ end
91
+ end
92
+ end
93
+
94
+ class Pattern
95
+ attr_reader :automata, :accept
96
+ def self.from_string literal
97
+ automata, index = Automata.new(literal.length + 1), 0
98
+ literal.each_byte {|byte|automata.connect index, CharacterSet::Interval.new(byte).to_char_set, (index += 1)}
99
+ new automata, index
100
+ end
101
+
102
+ def self.from_char_set set
103
+ automata = Automata.new 2
104
+ automata.connect 0, set, 1
105
+ new automata, 1
106
+ end
107
+
108
+ def self.concat *patterns
109
+ automata, index = Automata.new, 0
110
+ patterns.each do |pattern|
111
+ index = automata.connect(index, Epsilon, automata.merge!(pattern.automata)) + pattern.accept
112
+ end
113
+ new automata, index
114
+ end
115
+
116
+ def initialize automata, accept
117
+ @automata, @accept = automata, accept
118
+ end
119
+
120
+ def kleene
121
+ kleene_automata = @automata.dup
122
+ kleene_automata.connect 0, Epsilon, @accept
123
+ kleene_automata.connect @accept, Epsilon, 0
124
+ Pattern.new kleene_automata, @accept
125
+ end
126
+ alias :zero_or_more :kleene
127
+
128
+ def iterate
129
+ iterate_automata = @automata.dup
130
+ iterate_automata.connect @accept, Epsilon, 0
131
+ Pattern.new iterate_automata, @accept
132
+ end
133
+ alias :one_or_more :iterate
134
+
135
+ def opt
136
+ opt_automata = @automata.dup
137
+ opt_automata.connect 0, Epsilon, @accept
138
+ Pattern.new opt_automata, @accept
139
+ end
140
+ alias :zero_or_one :opt
141
+
142
+ def negate
143
+ deterministic, accepts = automata.determinize [@accept]
144
+ sink = deterministic.new_state
145
+ deterministic.connect sink, CharacterSet.any, sink
146
+ sink.times do |state|
147
+ joint = CharacterSet.any
148
+ deterministic.table[state].each {|tran| joint.delete tran.symbols}
149
+ deterministic.connect state, joint, sink unless joint.empty?
150
+ end
151
+ accept = deterministic.new_state
152
+ accept.times {|state| deterministic.connect state, Epsilon, accept unless accepts.include? state }
153
+ Pattern.new deterministic, accept
154
+ end
155
+ alias :not :negate
156
+
157
+ def [] least, most = least
158
+ Pattern.concat *([self] * least + [self.opt] * (most-least))
159
+ end
160
+
161
+ def | other
162
+ automata = Automata.new 2
163
+ [self, other].each do |pattern|
164
+ automata.connect automata.connect(0, Epsilon, automata.merge!(pattern.automata)) + pattern.accept, Epsilon, 1
165
+ end
166
+ Pattern.new automata, 1
167
+ end
168
+
169
+ def ~
170
+ any = Pattern.from_char_set(CharacterSet.any).kleene
171
+ return Pattern.concat(Pattern.concat(any, self, any).negate, self)
172
+ end
173
+ end
174
+
175
+ class Automata
176
+ attr_reader :table
177
+ Transition = Struct.new(:symbols, :destination)
178
+
179
+ def initialize(table=[])
180
+ case table
181
+ when Array
182
+ @table = table
183
+ when Fixnum
184
+ @table = []
185
+ table.times {@table << []}
186
+ end
187
+ end
188
+
189
+ def connect start, symbols, destination
190
+ @table[start] << Transition.new(symbols, destination)
191
+ destination
192
+ end
193
+
194
+ def merge! other
195
+ start = @table.length
196
+ other_table = other.instance_eval{@table}
197
+ other_table.each do |trans|
198
+ @table << []
199
+ trans.each {|tran| @table.last << Transition.new(tran.symbols, tran.destination + start)}
200
+ end
201
+ start
202
+ end
203
+
204
+ def reverse
205
+ reverse = []
206
+ @table.length.times {reverse << []}
207
+ @table.each_with_index do |trans, index|
208
+ trans.each {|tran| reverse[tran.destination] << Transition.new(tran.symbols, index)}
209
+ end
210
+ Automata.new reverse
211
+ end
212
+
213
+ def dup
214
+ dup_table = []
215
+ @table.each {|x| dup_table << x.dup}
216
+ Automata.new dup_table
217
+ end
218
+
219
+ def alphabet states
220
+ points = states.inject([]) do |result, state|
221
+ @table[state].inject(result){|r, s|r += s.symbols.to_points s.destination}
222
+ end
223
+ points.sort! do |x, y|
224
+ x.char == y.char ? (x.is_start ? (y.is_start ? 0 : -1) : (y.is_start ? 1 : 0)) : (x.char < y.char ? -1 : 1)
225
+ end
226
+ reachable_states = []
227
+ points.each_with_index do |point, index|
228
+ if point.is_start
229
+ reachable_states << point.destination
230
+ else
231
+ reachable_states.delete point.destination
232
+ next if reachable_states.empty?
233
+ end
234
+ symbols = range(point, points[index + 1])
235
+ yield reachable_states.uniq, symbols if symbols
236
+ end
237
+ end
238
+
239
+ def determinize accepts
240
+ SubsetDeterminizer.new(self, accepts).determinize
241
+ end
242
+
243
+ def new_state
244
+ @table << []
245
+ @table.length - 1
246
+ end
247
+
248
+ def all_states
249
+ (0..table.length - 1).to_a
250
+ end
251
+
252
+ private
253
+ def range point_a, point_b
254
+ start_point = point_a.is_start ? point_a.char : (point_a.char + 1)
255
+ end_point = point_b.is_start ? point_b.char - 1 : point_b.char
256
+ start_point > end_point ? nil : CharacterSet::Interval.new(start_point, end_point).to_char_set
257
+ end
258
+ end
259
+
260
+ class SubsetDeterminizer
261
+ def initialize nondeterministic, accepts
262
+ @unmarked, @dstates, @accepts = [], [], accepts
263
+ @nondeterministic, @deterministic, @accept_states = nondeterministic, Automata.new, {}
264
+ unmark closure([0])
265
+ end
266
+
267
+ def determinize
268
+ until @unmarked.empty?
269
+ start = @unmarked.pop
270
+ @nondeterministic.alphabet(@dstates[start]) do |states, symbols|
271
+ destination_state = closure(states)
272
+ destination = unmark destination_state unless destination = @dstates.index(destination_state)
273
+ @deterministic.connect start, symbols, destination
274
+ end
275
+ end
276
+ return @deterministic, @accept_states
277
+ end
278
+ private
279
+ def unmark states
280
+ @dstates << states
281
+ @unmarked.push @deterministic.new_state
282
+ accepts = states.find_all {|x| @accepts.include? x}
283
+ @accept_states[@unmarked.last] = accepts unless accepts.empty?
284
+ @unmarked.last
285
+ end
286
+
287
+ def closure states
288
+ closure, unvisited = states.dup, states.dup
289
+ until unvisited.empty? do
290
+ @nondeterministic.table[unvisited.pop].each do |tran|
291
+ if tran.symbols == Epsilon && !closure.include?(tran.destination)
292
+ closure << tran.destination
293
+ unvisited << tran.destination
294
+ end
295
+ end
296
+ end
297
+ closure.sort!
298
+ end
299
+ end
300
+
301
+ class CharacterSet
302
+ attr_reader :intervals
303
+ def self.any
304
+ Interval.new(0, 65535).to_char_set
305
+ end
306
+
307
+ def + other
308
+ result = self.dup
309
+ if (other.kind_of? CharacterSet)
310
+ for interval in other.intervals
311
+ result.add_interval interval.first, interval.last
312
+ end
313
+ else
314
+ other.to_s.each_byte do |byte|
315
+ result.add_interval byte
316
+ end
317
+ end
318
+ result
319
+ end
320
+
321
+ def - other
322
+ result = self.dup
323
+ if (other.kind_of? CharacterSet)
324
+ for interval in other.intervals
325
+ result.delete_interval interval.first, interval.last
326
+ end
327
+ else
328
+ other.to_s.each_byte do |byte|
329
+ result.delete_interval byte
330
+ end
331
+ end
332
+ result
333
+ end
334
+
335
+ def initialize *intervals
336
+ @intervals = intervals
337
+ end
338
+
339
+ def << obj
340
+ obj.kind_of?(Range) ? add_interval(obj.first, obj.last) : obj.to_s.each_byte {|x| add_interval x}
341
+ end
342
+
343
+ def delete obj
344
+ case obj
345
+ when Range
346
+ delete_interval obj.first, obj.last
347
+ when Aurum::CharacterSet
348
+ obj.intervals.each {|interval| delete_interval interval.first, interval.last}
349
+ else
350
+ obj.to_s.each_byte {|x| delete_interval x}
351
+ end
352
+ end
353
+
354
+ def include? char
355
+ @intervals.any? {|x| x.include? char}
356
+ end
357
+
358
+ def empty?
359
+ return @intervals.empty?
360
+ end
361
+
362
+ def to_points destination
363
+ @intervals.inject [] do |points, interval|
364
+ points << Point.new(interval.first, true, destination)
365
+ points << Point.new(interval.last, false, destination)
366
+ end
367
+ end
368
+
369
+ def dup
370
+ intervals = []
371
+ for interval in @intervals
372
+ intervals << interval.dup
373
+ end
374
+ CharacterSet.new *intervals
375
+ end
376
+
377
+ protected
378
+ def add_interval first, last = first
379
+ interval = Interval.new first, last
380
+ @intervals << interval unless @intervals.any? {|x| x.merge! interval}
381
+ end
382
+
383
+ def delete_interval first, last = first
384
+ interval = Interval.new first, last
385
+ return unless to_be_replaced = @intervals.find {|x| x.include?(interval.first) || x.include?(interval.last)}
386
+ @intervals.delete to_be_replaced
387
+ add_new_interval to_be_replaced.first, interval.first - 1
388
+ add_new_interval interval.last + 1, to_be_replaced.last
389
+ end
390
+
391
+ def add_new_interval first, last
392
+ @intervals << Interval.new(first, last) if first <= last
393
+ end
394
+
395
+ Interval, Point = Struct.new(:first, :last), Struct.new(:char, :is_start, :destination)
396
+
397
+ Interval.class_eval do
398
+ def initialize first, last = first
399
+ super first, last
400
+ end
401
+
402
+ def include? char
403
+ char = char[0] if char.kind_of? String
404
+ self.first <= char && char <= self.last
405
+ end
406
+
407
+ def merge! other
408
+ if include?(other.first) || include?(other.last) || other.first - self.last == 1 || self.first - other.last == 1
409
+ self.first = [self.first, other.first].min
410
+ self.last = [self.last, other.last].max
411
+ return true;
412
+ end
413
+ false
414
+ end
415
+
416
+ def to_char_set
417
+ CharacterSet.new self
418
+ end
419
+ end
420
+ end
421
+
422
+ Epsilon = CharacterSet.new
423
+ end
@@ -0,0 +1,445 @@
1
+ require 'set'
2
+
3
+ module Aurum
4
+ Symbol, Production= Struct.new(:name, :is_terminal), Struct.new(:nonterminal, :symbols)
5
+ ShiftAction, ReduceAction = Struct.new(:state, :is_lookahead_shift), Struct.new(:handle, :is_read_reduce)
6
+
7
+ START, EOF = Symbol.new('$start', false), Symbol.new('$eof', true)
8
+
9
+ class ParsingTableGenerator
10
+ attr_reader :symbols, :productions
11
+
12
+ DEFAULT_ASSOCIATIVITIES = {:left => [], :right => []}
13
+
14
+ def initialize(definition, precedences = [], associativities = DEFAULT_ASSOCIATIVITIES)
15
+ @definition = definition
16
+ @precedence_table = PrecedenceTable.new precedences, associativities
17
+ end
18
+
19
+ def start_from start
20
+ @start_production = Aurum::Production.new START, [start]
21
+ @symbols, @productions, @nullables, @first_sets = [], [@start_production], [], {START => []}
22
+ find_all_used_symbols_and_productions start
23
+ compute_nullable_symbols
24
+ compute_first_sets
25
+ self
26
+ end
27
+
28
+ def parsing_table
29
+ construct_LR0_automata
30
+ if @states.any? {|x| !x.consistent? }
31
+ compute_LALR_1_lookahead
32
+ compute_LALR_n_lookahead if @states.any? {|x| x.conflicted?}
33
+ end
34
+ parsing_table = []
35
+ for state in @states do
36
+ actions = Hash.new default_action(state)
37
+ state.actions.each {|symbol, action| actions[symbol] = action.to_a.first}
38
+ parsing_table << actions
39
+ end
40
+ return parsing_table, @lookahead_level
41
+ end
42
+
43
+ private
44
+ def find_all_used_symbols_and_productions start
45
+ unvisited = [start]
46
+ while !unvisited.empty?
47
+ visiting = unvisited.pop
48
+ @symbols << visiting
49
+ @first_sets[visiting] = visiting.is_terminal ? [visiting] : []
50
+ for production in @definition[visiting] do
51
+ @productions << production
52
+ for symbol in production.symbols do
53
+ unvisited << symbol unless @symbols.include? symbol
54
+ end
55
+ end unless visiting.is_terminal
56
+ end
57
+ end
58
+
59
+ def compute_nullable_symbols
60
+ begin
61
+ changed = false
62
+ for production in @productions
63
+ if production.symbols.all? {|s| nullable? s}
64
+ @nullables << production.nonterminal
65
+ changed = true
66
+ end unless nullable? production.nonterminal
67
+ end
68
+ end while changed
69
+ end
70
+
71
+ def compute_first_sets
72
+ begin
73
+ changed = false
74
+ for production in @productions do
75
+ set = @first_sets[production.nonterminal]
76
+ for symbol in production.symbols do
77
+ changed |= set.length != set.replace(set | @first_sets[symbol]).length
78
+ break unless nullable? symbol
79
+ end
80
+ end
81
+ end while changed
82
+ end
83
+
84
+ def nullable? symbol
85
+ @nullables.include? symbol
86
+ end
87
+
88
+ def construct_LR0_automata
89
+ @lookahead_level = 0
90
+ start_state = State.new closure([LRItem.new(@start_production, 0)])
91
+ @states, unvisited = [start_state], [start_state]
92
+ while !unvisited.empty?
93
+ visiting = unvisited.pop
94
+ visiting.grep_each '!x.handle?' do |item|
95
+ symbol = item.dot_symbol
96
+ new_state = goto visiting, symbol
97
+ if (read_reduce = new_state.read_reduce)
98
+ visiting[symbol] << ReduceAction.new(@productions.index(read_reduce), true)
99
+ else
100
+ if index = @states.index(new_state)
101
+ new_state = @states[index]
102
+ else
103
+ [@states, unvisited].each {|x| x << new_state}
104
+ index = @states.length - 1
105
+ end
106
+ visiting[symbol] << ShiftAction.new(index, false)
107
+ new_state.predsucceors << visiting
108
+ end
109
+ end
110
+ end
111
+ end
112
+
113
+ def compute_LALR_1_lookahead
114
+ @lookahead_level = 1
115
+ @lookahead_config_stack, @lookahead_indicitor, @lookahead_result = [], {}, {}
116
+ @states.grep_each '!x.consistent?' do |state|
117
+ state.grep_each 'x.handle?' do |handle|
118
+ production = handle.production
119
+ for predsucceor in state.predsucceors production.symbols.reverse
120
+ for lookahead in compute_follow_set predsucceor, production.nonterminal
121
+ if state.only_shift?(lookahead) && @precedence_table.operator?(lookahead)
122
+ if @precedence_table.compare(production.operator, lookahead) >= 0
123
+ state[lookahead].clear
124
+ state[lookahead] << ReduceAction.new(@productions.index(production), false)
125
+ end
126
+ else
127
+ state[lookahead] << ReduceAction.new(@productions.index(production), false)
128
+ end
129
+ end
130
+ end
131
+ end
132
+ end
133
+ end
134
+
135
+ def compute_LALR_n_lookahead
136
+ @stack_seen = []
137
+ @states.grep_each 'x.conflicted?' do |state|
138
+ @current_lookahead_level = 1
139
+ for lookahead, actions in state.conflicted_actions do
140
+ sources = {}
141
+ for action in actions do
142
+ if action.kind_of? ShiftAction
143
+ sources[action] = [[state]].to_set
144
+ else
145
+ handle = @productions[action.handle]
146
+ sources[action] = action.is_read_reduce ? [[state]].to_set : [].to_set
147
+ for predsucceor in state.predsucceors handle.symbols.reverse do
148
+ @follow_sources_visited = []
149
+ sources[action] |= follow_sources [predsucceor], handle.nonterminal, lookahead
150
+ end
151
+ end
152
+ end
153
+ resolve_conficts state, lookahead, sources
154
+ end
155
+ @lookahead_level = [@current_lookahead_level, @lookahead_level].max
156
+ end
157
+ end
158
+
159
+ def closure items
160
+ result, unmarked = items.dup, items.dup
161
+ while !unmarked.empty?
162
+ visiting = unmarked.pop
163
+ for production in @definition[visiting.dot_symbol] do
164
+ item = LRItem.new production, 0
165
+ [result, unmarked].each {|x| x << item} unless result.include? item
166
+ end unless visiting.handle? || visiting.dot_symbol.is_terminal
167
+ end
168
+ result
169
+ end
170
+
171
+ def goto items, symbol
172
+ result = State.new []
173
+ items.each {|item| result << LRItem.new(item.production, item.position + 1) if item.dot_symbol == symbol}
174
+ closure result
175
+ end
176
+
177
+ def compute_follow_set state, nonterminal = nil
178
+ if state.kind_of? Configuration
179
+ config = state
180
+ state, nonterminal = config.state, config.symbol
181
+ else
182
+ config = Configuration.new state, nonterminal
183
+ end
184
+ if START == nonterminal
185
+ @lookahead_indicitor[config] = 65535
186
+ @lookahead_result[config] = [EOF]
187
+ else
188
+ @lookahead_config_stack.push config
189
+ @lookahead_indicitor[config] = (d = @lookahead_config_stack.length)
190
+ @lookahead_result[config] = read_set state, nonterminal
191
+ each_included_by state, nonterminal do |p, b|
192
+ new_config = Configuration.new p, b
193
+ compute_follow_set new_config unless @lookahead_indicitor[new_config]
194
+ @lookahead_indicitor[config] = [@lookahead_indicitor[config], @lookahead_indicitor[new_config]].min
195
+ @lookahead_result[config] |= @lookahead_result[new_config]
196
+ end
197
+ connected = nil
198
+ until connected == config
199
+ connected = @lookahead_config_stack.pop
200
+ @lookahead_result[connected] = @lookahead_result[config].dup
201
+ @lookahead_indicitor[connected] = 65535
202
+ end if @lookahead_indicitor[config] == d
203
+ end
204
+ @lookahead_result[config]
205
+ end
206
+
207
+ def follow_sources stack, nonterminal, lookahead
208
+ top = stack.last
209
+ if stack.length == 1
210
+ config = Configuration.new top, nonterminal
211
+ @follow_sources_visited.include?(config) and return []
212
+ @follow_sources_visited |= [config]
213
+ end
214
+ stacks = [].to_set
215
+ if q_index = top.goto(nonterminal)
216
+ q = @states[q_index]
217
+ stacks = [stack + [q]].to_set if q.direct_read.include?(lookahead)
218
+ end
219
+ each_read_by(top, nonterminal) {|q, y| stacks |= follow_sources stack+[q], y, lookahead unless y.is_terminal}
220
+ top.grep_each(lambda {|x| x.kernel? && !x.start? && x.dot_symbol == nonterminal}) do |item|
221
+ c = item.production.nonterminal
222
+ if item.position < stack.length
223
+ stacks |= follow_sources stack.slice(0..-item.position-1), c, lookahead
224
+ else
225
+ first_part = item.production.symbols.slice 0..-stack.length-1
226
+ stack[0].predsucceors(first_part).reverse.each {|q| stacks |= follow_sources [q], c, lookahead }
227
+ end
228
+ end
229
+ stacks
230
+ end
231
+
232
+ def resolve_conficts state, lookahead, sources
233
+ @current_lookahead_level += 1
234
+ @states << (lookahead_state = State.new([]))
235
+ state[lookahead].replace [ShiftAction.new((@states.length - 1), true)]
236
+ for action, stacks in sources
237
+ for stk in stacks
238
+ raise 'not LALR(n)' if @stack_seen.include? stk
239
+ @stack_seen << stk
240
+ for a in next_lookaheads stk, lookahead do
241
+ lookahead_state[a] << action
242
+ end
243
+ end
244
+ end
245
+ for next_lookahead, actions in lookahead_state.conflicted_actions
246
+ new_sources = {}
247
+ for action in actions do
248
+ new_sources[action] = [].to_set
249
+ for stk in source[action] do
250
+ @follow_sources_visited = []
251
+ new_sources[action] |= follow_sources stk, lookahead, next_lookahead
252
+ end
253
+ resolve_conficts lookahead_state, next_lookahead, new_sources
254
+ end
255
+ end
256
+ end
257
+
258
+ def next_lookaheads stack, lookahead
259
+ EOF == lookahead and return [EOF]
260
+ top = stack.last
261
+ lookaheads = read_set top, lookahead
262
+ top.grep_each(lambda {|x| x.kernel? && !x.start? && x.dot_symbol == lookahead}) do |item|
263
+ c = item.production.nonterminal
264
+ if item.position < stack.length
265
+ lookaheads |= next_lookaheads stack.slice(0..-item.position-1), c
266
+ else
267
+ first_part = item.production.symbols.slice 0..-stack.length-1
268
+ stack[0].predsucceors(first_part).reverse.each {|q| lookaheads |= compute_follow_set q, c }
269
+ end
270
+ end
271
+ lookaheads
272
+ end
273
+
274
+ def read_set state, symbol
275
+ result = []
276
+ each_read_by(state, symbol) {|q, y| result |= @first_sets[y] }
277
+ result
278
+ end
279
+
280
+ def each_read_by state, symbol
281
+ index = state.goto symbol
282
+ for item in @states[index]
283
+ for symbol in item.second_part
284
+ yield state, symbol
285
+ nullable? symbol or break
286
+ end
287
+ end if index
288
+ end
289
+
290
+ def each_included_by state, nonterminal
291
+ for item in state
292
+ symbols = item.production.symbols
293
+ symbols.reverse.each_with_index do |symbol, index|
294
+ first_part = symbols.slice 0, symbols.length - index - 1
295
+ state.predsucceors(first_part.reverse).each {|s| yield s, item.production.nonterminal} if nonterminal == symbol
296
+ nullable? symbol or break
297
+ end
298
+ end if state
299
+ end
300
+
301
+ def default_action state
302
+ if !state.empty?
303
+ handle = nil
304
+ for x in state
305
+ p = x.production
306
+ handle = x if x.handle? && p.nonterminal != START && (!handle || handle.production.symbols.length > p.symbols.length)
307
+ end
308
+ default_action = handle ? ReduceAction.new(@productions.index(handle.production), false) : nil
309
+ else
310
+ candidates = state.actions.values.inject [] do |candidates, actions|
311
+ candidates |= actions.find_all {|x| x.kind_of?(Aurum::ReduceAction) && !x.is_read_reduce }
312
+ end
313
+ default_action = candidates.min {|x, y| @productions[x].handle.length <=> @productions[y].handle.length}
314
+ end
315
+ default_action
316
+ end
317
+
318
+ class PrecedenceTable
319
+ def initialize precedences, associativities
320
+ @precedence_table, @associativities = {}, associativities
321
+ precedences.reverse.each_with_index do |terminals, index|
322
+ for terminal in terminals
323
+ @precedence_table[terminal] = index
324
+ end
325
+ end
326
+ end
327
+
328
+ def operator? symbol
329
+ @precedence_table.has_key? symbol
330
+ end
331
+
332
+ def compare current, lookahead
333
+ if @precedence_table[current] == @precedence_table[lookahead]
334
+ return (@associativities[:right].include?(current) &&
335
+ @associativities[:right].include?(lookahead)) ? -1 : 1
336
+ end
337
+ @precedence_table[current] <=> @precedence_table[lookahead]
338
+ end
339
+ end
340
+
341
+ Production.class_eval do
342
+ attr_accessor :action
343
+ def operator
344
+ symbols.reverse.find {|x| x.is_terminal}
345
+ end
346
+ end
347
+
348
+ class State < Array
349
+ attr_reader :actions
350
+ def initialize elements
351
+ super elements
352
+ @actions, @predsucceors = {}, []
353
+ end
354
+
355
+ def [] symbol
356
+ @actions[symbol] = Set.new([]) unless @actions.has_key? symbol
357
+ @actions[symbol]
358
+ end
359
+
360
+ def consistent?
361
+ handles, kernels = 0, 0
362
+ for item in self do
363
+ handles += 1 if item.handle?
364
+ kernels += 1 if item.kernel?
365
+ handles > 1 || (handles == 1 && handles != kernels) and return false
366
+ end
367
+ true
368
+ end
369
+
370
+ def conflicted?
371
+ !consistent? && @actions.any? {|symbol, actions| actions.length > 1}
372
+ end
373
+
374
+ def conflicted_actions
375
+ @actions.find_all {|symbol, actions| actions.length > 1}
376
+ end
377
+
378
+ def only_shift? symbol
379
+ !self[symbol].empty? && @actions[symbol].all? {|x| x.kind_of? ShiftAction}
380
+ end
381
+
382
+ def read_reduce
383
+ length == 1 && first.handle? ? first.production : nil
384
+ end
385
+
386
+ def goto symbol
387
+ shift = self[symbol].find {|x| x.kind_of? Aurum::ShiftAction }
388
+ shift.state if shift
389
+ end
390
+
391
+ def predsucceors(symbols = nil)
392
+ symbols or return @predsucceors
393
+ result = [self]
394
+ for symbol in symbols
395
+ new_result = result.inject [] do |sum, x|
396
+ sum | x.predsucceors.find_all {|predsucceor| predsucceor.any? {|item| item.dot_symbol == symbol}}
397
+ end
398
+ result.replace new_result
399
+ end
400
+ result
401
+ end
402
+
403
+ def direct_read
404
+ inject [] do |result, item|
405
+ item.dot_symbol ? result | [item.dot_symbol] : result
406
+ end
407
+ end
408
+
409
+ def == other
410
+ other.kind_of? State or return false
411
+ equal? other and return true
412
+ length == other.length or return false
413
+ all? {|x| other.include? x}
414
+ end
415
+ end
416
+
417
+ LRItem, Configuration = Struct.new(:production, :position), Struct.new(:state, :symbol)
418
+
419
+ LRItem.class_eval do
420
+ def dot_symbol
421
+ production.symbols[position]
422
+ end
423
+
424
+ def start?
425
+ production.nonterminal == START
426
+ end
427
+
428
+ def handle?
429
+ position >= production.symbols.length
430
+ end
431
+
432
+ def kernel?
433
+ handle? || position != 0
434
+ end
435
+
436
+ def first_part
437
+ production.symbols.slice(0, position)
438
+ end
439
+
440
+ def second_part
441
+ handle? ? [] : production.symbols.slice(position..-1)
442
+ end
443
+ end
444
+ end
445
+ end