aurum 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. data/example/expression/expression.rb +29 -0
  2. data/lib/aurum.rb +10 -0
  3. data/lib/aurum/engine.rb +173 -0
  4. data/lib/aurum/grammar.rb +234 -0
  5. data/lib/aurum/lexical_table_generator.rb +423 -0
  6. data/lib/aurum/parsing_table_generator.rb +445 -0
  7. data/test/engine/lexer_test.rb +52 -0
  8. data/test/engine/semantic_attributes_test.rb +15 -0
  9. data/test/grammar_definition/character_class_definition_test.rb +28 -0
  10. data/test/grammar_definition/grammar_definition_test.rb +54 -0
  11. data/test/grammar_definition/lexical_definition_test.rb +56 -0
  12. data/test/grammar_definition/operator_precedence_definition_test.rb +35 -0
  13. data/test/grammar_definition/production_definition_test.rb +60 -0
  14. data/test/lexical_table_generator/automata_test.rb +74 -0
  15. data/test/lexical_table_generator/character_set_test.rb +73 -0
  16. data/test/lexical_table_generator/interval_test.rb +36 -0
  17. data/test/lexical_table_generator/pattern_test.rb +109 -0
  18. data/test/lexical_table_generator/subset_determinizer_test.rb +19 -0
  19. data/test/lexical_table_generator/table_generator_test.rb +126 -0
  20. data/test/parsing_table_generator/augmented_grammar_test.rb +45 -0
  21. data/test/parsing_table_generator/lalr_n_computation_test.rb +89 -0
  22. data/test/parsing_table_generator/lr_0_automata_test.rb +91 -0
  23. data/test/parsing_table_generator/lr_item_test.rb +33 -0
  24. data/test/parsing_table_generator/parsing_table_state_test.rb +39 -0
  25. data/test/parsing_table_generator/precedence_table_test.rb +28 -0
  26. data/test/parsing_table_generator/production_test.rb +9 -0
  27. data/test/test_helper.rb +103 -0
  28. metadata +78 -0
@@ -0,0 +1,423 @@
1
+ module Aurum
2
+ RecognizeTokenAction, ChangeStateAction, UserDefinedAction = Struct.new(:token), Struct.new(:state), Struct.new(:action)
3
+ RecognizeTokenAndChangeStateAction = Struct.new :token, :state
4
+ IgnoreAction = RecognizeTokenAction.new '$ignore'
5
+
6
+ class LexicalTableGenerator
7
+ attr_reader :lexical_states
8
+ def initialize specification
9
+ @specification, @accept_states = specification, {}
10
+ @lexical_states = @specification.keys - [:all]
11
+ @patterns_for_all = specification[:all] ? specification[:all] : {}
12
+ end
13
+
14
+ def lexical_table
15
+ construct_automata
16
+ make_initial_partitions
17
+ refine_partitions
18
+ @partitions.size < @lexical_automata.table.size ? construct_minimize_automata : [@lexical_automata.table, @accept_states]
19
+ end
20
+
21
+ private
22
+ def construct_automata
23
+ automata, accepts = Automata.new(1), {}, {}
24
+ @lexical_states.each_with_index do |lexcial_state, index|
25
+ lexical_state_start = automata.new_state
26
+ automata.connect 0, CharacterSet::Interval.new(-index - 1).to_char_set, lexical_state_start
27
+ @patterns_for_all.merge(@specification[lexcial_state]).each do |pattern, action|
28
+ pattern_start = automata.merge! pattern.automata
29
+ automata.connect lexical_state_start, Epsilon, pattern_start
30
+ accepts[pattern_start + pattern.accept] = action
31
+ end
32
+ end
33
+ @lexical_automata, deterministic_accepts = automata.determinize accepts.keys
34
+ deterministic_accepts.each {|d, n| @accept_states[d] = n.inject([]){|r, x| r << accepts[x]}}
35
+ end
36
+
37
+ def make_initial_partitions
38
+ partitions = {}
39
+ @accept_states.each do |state, action|
40
+ partitions[action] = [] unless partitions.has_key? action
41
+ partitions[action] << state
42
+ end
43
+ @partitions = [[0], @lexical_automata.all_states - @accept_states.keys - [0]] + partitions.values
44
+ @partitions.delete []
45
+ end
46
+
47
+ def refine_partitions
48
+ reverse_automata, working_list = @lexical_automata.reverse, @partitions.dup
49
+ until working_list.empty?
50
+ reverse_automata.alphabet(working_list.pop) do |ia, symbols|
51
+ @partitions.grep_each 'x.size > 1' do |r|
52
+ r1, r2 = r & ia, r - ia
53
+ unless r2.empty? || r2 == r
54
+ replace @partitions, r => [r1, r2]
55
+ if working_list.include? r
56
+ replace working_list, r => [r1, r2]
57
+ else
58
+ working_list << (r1.size <= r2.size ? r1 : r2)
59
+ end
60
+ working_list.uniq!
61
+ end
62
+ end
63
+ end
64
+ end
65
+ end
66
+
67
+ def construct_minimize_automata
68
+ automata, accepts = Automata.new(@partitions.size), {}
69
+ choose_representatives do |representative, index|
70
+ @lexical_automata.table[representative].each do |transition|
71
+ automata.connect index, transition.symbols, partition_contains(transition.destination)
72
+ end
73
+ accepts[index] = @accept_states[representative] if @accept_states.has_key? representative
74
+ end
75
+ return automata.table, accepts
76
+ end
77
+
78
+ def choose_representatives
79
+ @partitions.each_with_index {|partition, index| yield partition.first, index}
80
+ end
81
+
82
+ def partition_contains state
83
+ @partitions.each_with_index {|partition, index| return index if partition.include? state}
84
+ end
85
+
86
+ def replace array, replacements
87
+ replacements.each do |old, new|
88
+ array.delete old
89
+ new.each {|x| array << x}
90
+ end
91
+ end
92
+ end
93
+
94
+ class Pattern
95
+ attr_reader :automata, :accept
96
+ def self.from_string literal
97
+ automata, index = Automata.new(literal.length + 1), 0
98
+ literal.each_byte {|byte|automata.connect index, CharacterSet::Interval.new(byte).to_char_set, (index += 1)}
99
+ new automata, index
100
+ end
101
+
102
+ def self.from_char_set set
103
+ automata = Automata.new 2
104
+ automata.connect 0, set, 1
105
+ new automata, 1
106
+ end
107
+
108
+ def self.concat *patterns
109
+ automata, index = Automata.new, 0
110
+ patterns.each do |pattern|
111
+ index = automata.connect(index, Epsilon, automata.merge!(pattern.automata)) + pattern.accept
112
+ end
113
+ new automata, index
114
+ end
115
+
116
+ def initialize automata, accept
117
+ @automata, @accept = automata, accept
118
+ end
119
+
120
+ def kleene
121
+ kleene_automata = @automata.dup
122
+ kleene_automata.connect 0, Epsilon, @accept
123
+ kleene_automata.connect @accept, Epsilon, 0
124
+ Pattern.new kleene_automata, @accept
125
+ end
126
+ alias :zero_or_more :kleene
127
+
128
+ def iterate
129
+ iterate_automata = @automata.dup
130
+ iterate_automata.connect @accept, Epsilon, 0
131
+ Pattern.new iterate_automata, @accept
132
+ end
133
+ alias :one_or_more :iterate
134
+
135
+ def opt
136
+ opt_automata = @automata.dup
137
+ opt_automata.connect 0, Epsilon, @accept
138
+ Pattern.new opt_automata, @accept
139
+ end
140
+ alias :zero_or_one :opt
141
+
142
+ def negate
143
+ deterministic, accepts = automata.determinize [@accept]
144
+ sink = deterministic.new_state
145
+ deterministic.connect sink, CharacterSet.any, sink
146
+ sink.times do |state|
147
+ joint = CharacterSet.any
148
+ deterministic.table[state].each {|tran| joint.delete tran.symbols}
149
+ deterministic.connect state, joint, sink unless joint.empty?
150
+ end
151
+ accept = deterministic.new_state
152
+ accept.times {|state| deterministic.connect state, Epsilon, accept unless accepts.include? state }
153
+ Pattern.new deterministic, accept
154
+ end
155
+ alias :not :negate
156
+
157
+ def [] least, most = least
158
+ Pattern.concat *([self] * least + [self.opt] * (most-least))
159
+ end
160
+
161
+ def | other
162
+ automata = Automata.new 2
163
+ [self, other].each do |pattern|
164
+ automata.connect automata.connect(0, Epsilon, automata.merge!(pattern.automata)) + pattern.accept, Epsilon, 1
165
+ end
166
+ Pattern.new automata, 1
167
+ end
168
+
169
+ def ~
170
+ any = Pattern.from_char_set(CharacterSet.any).kleene
171
+ return Pattern.concat(Pattern.concat(any, self, any).negate, self)
172
+ end
173
+ end
174
+
175
+ class Automata
176
+ attr_reader :table
177
+ Transition = Struct.new(:symbols, :destination)
178
+
179
+ def initialize(table=[])
180
+ case table
181
+ when Array
182
+ @table = table
183
+ when Fixnum
184
+ @table = []
185
+ table.times {@table << []}
186
+ end
187
+ end
188
+
189
+ def connect start, symbols, destination
190
+ @table[start] << Transition.new(symbols, destination)
191
+ destination
192
+ end
193
+
194
+ def merge! other
195
+ start = @table.length
196
+ other_table = other.instance_eval{@table}
197
+ other_table.each do |trans|
198
+ @table << []
199
+ trans.each {|tran| @table.last << Transition.new(tran.symbols, tran.destination + start)}
200
+ end
201
+ start
202
+ end
203
+
204
+ def reverse
205
+ reverse = []
206
+ @table.length.times {reverse << []}
207
+ @table.each_with_index do |trans, index|
208
+ trans.each {|tran| reverse[tran.destination] << Transition.new(tran.symbols, index)}
209
+ end
210
+ Automata.new reverse
211
+ end
212
+
213
+ def dup
214
+ dup_table = []
215
+ @table.each {|x| dup_table << x.dup}
216
+ Automata.new dup_table
217
+ end
218
+
219
+ def alphabet states
220
+ points = states.inject([]) do |result, state|
221
+ @table[state].inject(result){|r, s|r += s.symbols.to_points s.destination}
222
+ end
223
+ points.sort! do |x, y|
224
+ x.char == y.char ? (x.is_start ? (y.is_start ? 0 : -1) : (y.is_start ? 1 : 0)) : (x.char < y.char ? -1 : 1)
225
+ end
226
+ reachable_states = []
227
+ points.each_with_index do |point, index|
228
+ if point.is_start
229
+ reachable_states << point.destination
230
+ else
231
+ reachable_states.delete point.destination
232
+ next if reachable_states.empty?
233
+ end
234
+ symbols = range(point, points[index + 1])
235
+ yield reachable_states.uniq, symbols if symbols
236
+ end
237
+ end
238
+
239
+ def determinize accepts
240
+ SubsetDeterminizer.new(self, accepts).determinize
241
+ end
242
+
243
+ def new_state
244
+ @table << []
245
+ @table.length - 1
246
+ end
247
+
248
+ def all_states
249
+ (0..table.length - 1).to_a
250
+ end
251
+
252
+ private
253
+ def range point_a, point_b
254
+ start_point = point_a.is_start ? point_a.char : (point_a.char + 1)
255
+ end_point = point_b.is_start ? point_b.char - 1 : point_b.char
256
+ start_point > end_point ? nil : CharacterSet::Interval.new(start_point, end_point).to_char_set
257
+ end
258
+ end
259
+
260
+ class SubsetDeterminizer
261
+ def initialize nondeterministic, accepts
262
+ @unmarked, @dstates, @accepts = [], [], accepts
263
+ @nondeterministic, @deterministic, @accept_states = nondeterministic, Automata.new, {}
264
+ unmark closure([0])
265
+ end
266
+
267
+ def determinize
268
+ until @unmarked.empty?
269
+ start = @unmarked.pop
270
+ @nondeterministic.alphabet(@dstates[start]) do |states, symbols|
271
+ destination_state = closure(states)
272
+ destination = unmark destination_state unless destination = @dstates.index(destination_state)
273
+ @deterministic.connect start, symbols, destination
274
+ end
275
+ end
276
+ return @deterministic, @accept_states
277
+ end
278
+ private
279
+ def unmark states
280
+ @dstates << states
281
+ @unmarked.push @deterministic.new_state
282
+ accepts = states.find_all {|x| @accepts.include? x}
283
+ @accept_states[@unmarked.last] = accepts unless accepts.empty?
284
+ @unmarked.last
285
+ end
286
+
287
+ def closure states
288
+ closure, unvisited = states.dup, states.dup
289
+ until unvisited.empty? do
290
+ @nondeterministic.table[unvisited.pop].each do |tran|
291
+ if tran.symbols == Epsilon && !closure.include?(tran.destination)
292
+ closure << tran.destination
293
+ unvisited << tran.destination
294
+ end
295
+ end
296
+ end
297
+ closure.sort!
298
+ end
299
+ end
300
+
301
+ class CharacterSet
302
+ attr_reader :intervals
303
+ def self.any
304
+ Interval.new(0, 65535).to_char_set
305
+ end
306
+
307
+ def + other
308
+ result = self.dup
309
+ if (other.kind_of? CharacterSet)
310
+ for interval in other.intervals
311
+ result.add_interval interval.first, interval.last
312
+ end
313
+ else
314
+ other.to_s.each_byte do |byte|
315
+ result.add_interval byte
316
+ end
317
+ end
318
+ result
319
+ end
320
+
321
+ def - other
322
+ result = self.dup
323
+ if (other.kind_of? CharacterSet)
324
+ for interval in other.intervals
325
+ result.delete_interval interval.first, interval.last
326
+ end
327
+ else
328
+ other.to_s.each_byte do |byte|
329
+ result.delete_interval byte
330
+ end
331
+ end
332
+ result
333
+ end
334
+
335
+ def initialize *intervals
336
+ @intervals = intervals
337
+ end
338
+
339
+ def << obj
340
+ obj.kind_of?(Range) ? add_interval(obj.first, obj.last) : obj.to_s.each_byte {|x| add_interval x}
341
+ end
342
+
343
+ def delete obj
344
+ case obj
345
+ when Range
346
+ delete_interval obj.first, obj.last
347
+ when Aurum::CharacterSet
348
+ obj.intervals.each {|interval| delete_interval interval.first, interval.last}
349
+ else
350
+ obj.to_s.each_byte {|x| delete_interval x}
351
+ end
352
+ end
353
+
354
+ def include? char
355
+ @intervals.any? {|x| x.include? char}
356
+ end
357
+
358
+ def empty?
359
+ return @intervals.empty?
360
+ end
361
+
362
+ def to_points destination
363
+ @intervals.inject [] do |points, interval|
364
+ points << Point.new(interval.first, true, destination)
365
+ points << Point.new(interval.last, false, destination)
366
+ end
367
+ end
368
+
369
+ def dup
370
+ intervals = []
371
+ for interval in @intervals
372
+ intervals << interval.dup
373
+ end
374
+ CharacterSet.new *intervals
375
+ end
376
+
377
+ protected
378
+ def add_interval first, last = first
379
+ interval = Interval.new first, last
380
+ @intervals << interval unless @intervals.any? {|x| x.merge! interval}
381
+ end
382
+
383
+ def delete_interval first, last = first
384
+ interval = Interval.new first, last
385
+ return unless to_be_replaced = @intervals.find {|x| x.include?(interval.first) || x.include?(interval.last)}
386
+ @intervals.delete to_be_replaced
387
+ add_new_interval to_be_replaced.first, interval.first - 1
388
+ add_new_interval interval.last + 1, to_be_replaced.last
389
+ end
390
+
391
+ def add_new_interval first, last
392
+ @intervals << Interval.new(first, last) if first <= last
393
+ end
394
+
395
+ Interval, Point = Struct.new(:first, :last), Struct.new(:char, :is_start, :destination)
396
+
397
+ Interval.class_eval do
398
+ def initialize first, last = first
399
+ super first, last
400
+ end
401
+
402
+ def include? char
403
+ char = char[0] if char.kind_of? String
404
+ self.first <= char && char <= self.last
405
+ end
406
+
407
+ def merge! other
408
+ if include?(other.first) || include?(other.last) || other.first - self.last == 1 || self.first - other.last == 1
409
+ self.first = [self.first, other.first].min
410
+ self.last = [self.last, other.last].max
411
+ return true;
412
+ end
413
+ false
414
+ end
415
+
416
+ def to_char_set
417
+ CharacterSet.new self
418
+ end
419
+ end
420
+ end
421
+
422
+ Epsilon = CharacterSet.new
423
+ end
@@ -0,0 +1,445 @@
1
+ require 'set'
2
+
3
+ module Aurum
4
+ Symbol, Production= Struct.new(:name, :is_terminal), Struct.new(:nonterminal, :symbols)
5
+ ShiftAction, ReduceAction = Struct.new(:state, :is_lookahead_shift), Struct.new(:handle, :is_read_reduce)
6
+
7
+ START, EOF = Symbol.new('$start', false), Symbol.new('$eof', true)
8
+
9
+ class ParsingTableGenerator
10
+ attr_reader :symbols, :productions
11
+
12
+ DEFAULT_ASSOCIATIVITIES = {:left => [], :right => []}
13
+
14
+ def initialize(definition, precedences = [], associativities = DEFAULT_ASSOCIATIVITIES)
15
+ @definition = definition
16
+ @precedence_table = PrecedenceTable.new precedences, associativities
17
+ end
18
+
19
+ def start_from start
20
+ @start_production = Aurum::Production.new START, [start]
21
+ @symbols, @productions, @nullables, @first_sets = [], [@start_production], [], {START => []}
22
+ find_all_used_symbols_and_productions start
23
+ compute_nullable_symbols
24
+ compute_first_sets
25
+ self
26
+ end
27
+
28
+ def parsing_table
29
+ construct_LR0_automata
30
+ if @states.any? {|x| !x.consistent? }
31
+ compute_LALR_1_lookahead
32
+ compute_LALR_n_lookahead if @states.any? {|x| x.conflicted?}
33
+ end
34
+ parsing_table = []
35
+ for state in @states do
36
+ actions = Hash.new default_action(state)
37
+ state.actions.each {|symbol, action| actions[symbol] = action.to_a.first}
38
+ parsing_table << actions
39
+ end
40
+ return parsing_table, @lookahead_level
41
+ end
42
+
43
+ private
44
+ def find_all_used_symbols_and_productions start
45
+ unvisited = [start]
46
+ while !unvisited.empty?
47
+ visiting = unvisited.pop
48
+ @symbols << visiting
49
+ @first_sets[visiting] = visiting.is_terminal ? [visiting] : []
50
+ for production in @definition[visiting] do
51
+ @productions << production
52
+ for symbol in production.symbols do
53
+ unvisited << symbol unless @symbols.include? symbol
54
+ end
55
+ end unless visiting.is_terminal
56
+ end
57
+ end
58
+
59
+ def compute_nullable_symbols
60
+ begin
61
+ changed = false
62
+ for production in @productions
63
+ if production.symbols.all? {|s| nullable? s}
64
+ @nullables << production.nonterminal
65
+ changed = true
66
+ end unless nullable? production.nonterminal
67
+ end
68
+ end while changed
69
+ end
70
+
71
+ def compute_first_sets
72
+ begin
73
+ changed = false
74
+ for production in @productions do
75
+ set = @first_sets[production.nonterminal]
76
+ for symbol in production.symbols do
77
+ changed |= set.length != set.replace(set | @first_sets[symbol]).length
78
+ break unless nullable? symbol
79
+ end
80
+ end
81
+ end while changed
82
+ end
83
+
84
+ def nullable? symbol
85
+ @nullables.include? symbol
86
+ end
87
+
88
+ def construct_LR0_automata
89
+ @lookahead_level = 0
90
+ start_state = State.new closure([LRItem.new(@start_production, 0)])
91
+ @states, unvisited = [start_state], [start_state]
92
+ while !unvisited.empty?
93
+ visiting = unvisited.pop
94
+ visiting.grep_each '!x.handle?' do |item|
95
+ symbol = item.dot_symbol
96
+ new_state = goto visiting, symbol
97
+ if (read_reduce = new_state.read_reduce)
98
+ visiting[symbol] << ReduceAction.new(@productions.index(read_reduce), true)
99
+ else
100
+ if index = @states.index(new_state)
101
+ new_state = @states[index]
102
+ else
103
+ [@states, unvisited].each {|x| x << new_state}
104
+ index = @states.length - 1
105
+ end
106
+ visiting[symbol] << ShiftAction.new(index, false)
107
+ new_state.predsucceors << visiting
108
+ end
109
+ end
110
+ end
111
+ end
112
+
113
+ def compute_LALR_1_lookahead
114
+ @lookahead_level = 1
115
+ @lookahead_config_stack, @lookahead_indicitor, @lookahead_result = [], {}, {}
116
+ @states.grep_each '!x.consistent?' do |state|
117
+ state.grep_each 'x.handle?' do |handle|
118
+ production = handle.production
119
+ for predsucceor in state.predsucceors production.symbols.reverse
120
+ for lookahead in compute_follow_set predsucceor, production.nonterminal
121
+ if state.only_shift?(lookahead) && @precedence_table.operator?(lookahead)
122
+ if @precedence_table.compare(production.operator, lookahead) >= 0
123
+ state[lookahead].clear
124
+ state[lookahead] << ReduceAction.new(@productions.index(production), false)
125
+ end
126
+ else
127
+ state[lookahead] << ReduceAction.new(@productions.index(production), false)
128
+ end
129
+ end
130
+ end
131
+ end
132
+ end
133
+ end
134
+
135
+ def compute_LALR_n_lookahead
136
+ @stack_seen = []
137
+ @states.grep_each 'x.conflicted?' do |state|
138
+ @current_lookahead_level = 1
139
+ for lookahead, actions in state.conflicted_actions do
140
+ sources = {}
141
+ for action in actions do
142
+ if action.kind_of? ShiftAction
143
+ sources[action] = [[state]].to_set
144
+ else
145
+ handle = @productions[action.handle]
146
+ sources[action] = action.is_read_reduce ? [[state]].to_set : [].to_set
147
+ for predsucceor in state.predsucceors handle.symbols.reverse do
148
+ @follow_sources_visited = []
149
+ sources[action] |= follow_sources [predsucceor], handle.nonterminal, lookahead
150
+ end
151
+ end
152
+ end
153
+ resolve_conficts state, lookahead, sources
154
+ end
155
+ @lookahead_level = [@current_lookahead_level, @lookahead_level].max
156
+ end
157
+ end
158
+
159
+ def closure items
160
+ result, unmarked = items.dup, items.dup
161
+ while !unmarked.empty?
162
+ visiting = unmarked.pop
163
+ for production in @definition[visiting.dot_symbol] do
164
+ item = LRItem.new production, 0
165
+ [result, unmarked].each {|x| x << item} unless result.include? item
166
+ end unless visiting.handle? || visiting.dot_symbol.is_terminal
167
+ end
168
+ result
169
+ end
170
+
171
+ def goto items, symbol
172
+ result = State.new []
173
+ items.each {|item| result << LRItem.new(item.production, item.position + 1) if item.dot_symbol == symbol}
174
+ closure result
175
+ end
176
+
177
+ def compute_follow_set state, nonterminal = nil
178
+ if state.kind_of? Configuration
179
+ config = state
180
+ state, nonterminal = config.state, config.symbol
181
+ else
182
+ config = Configuration.new state, nonterminal
183
+ end
184
+ if START == nonterminal
185
+ @lookahead_indicitor[config] = 65535
186
+ @lookahead_result[config] = [EOF]
187
+ else
188
+ @lookahead_config_stack.push config
189
+ @lookahead_indicitor[config] = (d = @lookahead_config_stack.length)
190
+ @lookahead_result[config] = read_set state, nonterminal
191
+ each_included_by state, nonterminal do |p, b|
192
+ new_config = Configuration.new p, b
193
+ compute_follow_set new_config unless @lookahead_indicitor[new_config]
194
+ @lookahead_indicitor[config] = [@lookahead_indicitor[config], @lookahead_indicitor[new_config]].min
195
+ @lookahead_result[config] |= @lookahead_result[new_config]
196
+ end
197
+ connected = nil
198
+ until connected == config
199
+ connected = @lookahead_config_stack.pop
200
+ @lookahead_result[connected] = @lookahead_result[config].dup
201
+ @lookahead_indicitor[connected] = 65535
202
+ end if @lookahead_indicitor[config] == d
203
+ end
204
+ @lookahead_result[config]
205
+ end
206
+
207
+ def follow_sources stack, nonterminal, lookahead
208
+ top = stack.last
209
+ if stack.length == 1
210
+ config = Configuration.new top, nonterminal
211
+ @follow_sources_visited.include?(config) and return []
212
+ @follow_sources_visited |= [config]
213
+ end
214
+ stacks = [].to_set
215
+ if q_index = top.goto(nonterminal)
216
+ q = @states[q_index]
217
+ stacks = [stack + [q]].to_set if q.direct_read.include?(lookahead)
218
+ end
219
+ each_read_by(top, nonterminal) {|q, y| stacks |= follow_sources stack+[q], y, lookahead unless y.is_terminal}
220
+ top.grep_each(lambda {|x| x.kernel? && !x.start? && x.dot_symbol == nonterminal}) do |item|
221
+ c = item.production.nonterminal
222
+ if item.position < stack.length
223
+ stacks |= follow_sources stack.slice(0..-item.position-1), c, lookahead
224
+ else
225
+ first_part = item.production.symbols.slice 0..-stack.length-1
226
+ stack[0].predsucceors(first_part).reverse.each {|q| stacks |= follow_sources [q], c, lookahead }
227
+ end
228
+ end
229
+ stacks
230
+ end
231
+
232
+ def resolve_conficts state, lookahead, sources
233
+ @current_lookahead_level += 1
234
+ @states << (lookahead_state = State.new([]))
235
+ state[lookahead].replace [ShiftAction.new((@states.length - 1), true)]
236
+ for action, stacks in sources
237
+ for stk in stacks
238
+ raise 'not LALR(n)' if @stack_seen.include? stk
239
+ @stack_seen << stk
240
+ for a in next_lookaheads stk, lookahead do
241
+ lookahead_state[a] << action
242
+ end
243
+ end
244
+ end
245
+ for next_lookahead, actions in lookahead_state.conflicted_actions
246
+ new_sources = {}
247
+ for action in actions do
248
+ new_sources[action] = [].to_set
249
+ for stk in source[action] do
250
+ @follow_sources_visited = []
251
+ new_sources[action] |= follow_sources stk, lookahead, next_lookahead
252
+ end
253
+ resolve_conficts lookahead_state, next_lookahead, new_sources
254
+ end
255
+ end
256
+ end
257
+
258
+ def next_lookaheads stack, lookahead
259
+ EOF == lookahead and return [EOF]
260
+ top = stack.last
261
+ lookaheads = read_set top, lookahead
262
+ top.grep_each(lambda {|x| x.kernel? && !x.start? && x.dot_symbol == lookahead}) do |item|
263
+ c = item.production.nonterminal
264
+ if item.position < stack.length
265
+ lookaheads |= next_lookaheads stack.slice(0..-item.position-1), c
266
+ else
267
+ first_part = item.production.symbols.slice 0..-stack.length-1
268
+ stack[0].predsucceors(first_part).reverse.each {|q| lookaheads |= compute_follow_set q, c }
269
+ end
270
+ end
271
+ lookaheads
272
+ end
273
+
274
+ def read_set state, symbol
275
+ result = []
276
+ each_read_by(state, symbol) {|q, y| result |= @first_sets[y] }
277
+ result
278
+ end
279
+
280
+ def each_read_by state, symbol
281
+ index = state.goto symbol
282
+ for item in @states[index]
283
+ for symbol in item.second_part
284
+ yield state, symbol
285
+ nullable? symbol or break
286
+ end
287
+ end if index
288
+ end
289
+
290
+ def each_included_by state, nonterminal
291
+ for item in state
292
+ symbols = item.production.symbols
293
+ symbols.reverse.each_with_index do |symbol, index|
294
+ first_part = symbols.slice 0, symbols.length - index - 1
295
+ state.predsucceors(first_part.reverse).each {|s| yield s, item.production.nonterminal} if nonterminal == symbol
296
+ nullable? symbol or break
297
+ end
298
+ end if state
299
+ end
300
+
301
+ def default_action state
302
+ if !state.empty?
303
+ handle = nil
304
+ for x in state
305
+ p = x.production
306
+ handle = x if x.handle? && p.nonterminal != START && (!handle || handle.production.symbols.length > p.symbols.length)
307
+ end
308
+ default_action = handle ? ReduceAction.new(@productions.index(handle.production), false) : nil
309
+ else
310
+ candidates = state.actions.values.inject [] do |candidates, actions|
311
+ candidates |= actions.find_all {|x| x.kind_of?(Aurum::ReduceAction) && !x.is_read_reduce }
312
+ end
313
+ default_action = candidates.min {|x, y| @productions[x].handle.length <=> @productions[y].handle.length}
314
+ end
315
+ default_action
316
+ end
317
+
318
+ class PrecedenceTable
319
+ def initialize precedences, associativities
320
+ @precedence_table, @associativities = {}, associativities
321
+ precedences.reverse.each_with_index do |terminals, index|
322
+ for terminal in terminals
323
+ @precedence_table[terminal] = index
324
+ end
325
+ end
326
+ end
327
+
328
+ def operator? symbol
329
+ @precedence_table.has_key? symbol
330
+ end
331
+
332
+ def compare current, lookahead
333
+ if @precedence_table[current] == @precedence_table[lookahead]
334
+ return (@associativities[:right].include?(current) &&
335
+ @associativities[:right].include?(lookahead)) ? -1 : 1
336
+ end
337
+ @precedence_table[current] <=> @precedence_table[lookahead]
338
+ end
339
+ end
340
+
341
+ Production.class_eval do
342
+ attr_accessor :action
343
+ def operator
344
+ symbols.reverse.find {|x| x.is_terminal}
345
+ end
346
+ end
347
+
348
+ class State < Array
349
+ attr_reader :actions
350
+ def initialize elements
351
+ super elements
352
+ @actions, @predsucceors = {}, []
353
+ end
354
+
355
+ def [] symbol
356
+ @actions[symbol] = Set.new([]) unless @actions.has_key? symbol
357
+ @actions[symbol]
358
+ end
359
+
360
+ def consistent?
361
+ handles, kernels = 0, 0
362
+ for item in self do
363
+ handles += 1 if item.handle?
364
+ kernels += 1 if item.kernel?
365
+ handles > 1 || (handles == 1 && handles != kernels) and return false
366
+ end
367
+ true
368
+ end
369
+
370
+ def conflicted?
371
+ !consistent? && @actions.any? {|symbol, actions| actions.length > 1}
372
+ end
373
+
374
+ def conflicted_actions
375
+ @actions.find_all {|symbol, actions| actions.length > 1}
376
+ end
377
+
378
+ def only_shift? symbol
379
+ !self[symbol].empty? && @actions[symbol].all? {|x| x.kind_of? ShiftAction}
380
+ end
381
+
382
+ def read_reduce
383
+ length == 1 && first.handle? ? first.production : nil
384
+ end
385
+
386
+ def goto symbol
387
+ shift = self[symbol].find {|x| x.kind_of? Aurum::ShiftAction }
388
+ shift.state if shift
389
+ end
390
+
391
+ def predsucceors(symbols = nil)
392
+ symbols or return @predsucceors
393
+ result = [self]
394
+ for symbol in symbols
395
+ new_result = result.inject [] do |sum, x|
396
+ sum | x.predsucceors.find_all {|predsucceor| predsucceor.any? {|item| item.dot_symbol == symbol}}
397
+ end
398
+ result.replace new_result
399
+ end
400
+ result
401
+ end
402
+
403
+ def direct_read
404
+ inject [] do |result, item|
405
+ item.dot_symbol ? result | [item.dot_symbol] : result
406
+ end
407
+ end
408
+
409
+ def == other
410
+ other.kind_of? State or return false
411
+ equal? other and return true
412
+ length == other.length or return false
413
+ all? {|x| other.include? x}
414
+ end
415
+ end
416
+
417
+ LRItem, Configuration = Struct.new(:production, :position), Struct.new(:state, :symbol)
418
+
419
+ LRItem.class_eval do
420
+ def dot_symbol
421
+ production.symbols[position]
422
+ end
423
+
424
+ def start?
425
+ production.nonterminal == START
426
+ end
427
+
428
+ def handle?
429
+ position >= production.symbols.length
430
+ end
431
+
432
+ def kernel?
433
+ handle? || position != 0
434
+ end
435
+
436
+ def first_part
437
+ production.symbols.slice(0, position)
438
+ end
439
+
440
+ def second_part
441
+ handle? ? [] : production.symbols.slice(position..-1)
442
+ end
443
+ end
444
+ end
445
+ end