aurum 0.1.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (77) hide show
  1. data/Rakefile +29 -0
  2. data/examples/dangling_else/grammar.rb +23 -0
  3. data/examples/expression/grammar.rb +28 -0
  4. data/examples/smalltalk/grammar.rb +151 -0
  5. data/examples/smalltalk/interpreter.rb +70 -0
  6. data/examples/yacc/grammar.rb +72 -0
  7. data/lib/aurum.rb +1 -9
  8. data/lib/aurum/engine.rb +39 -175
  9. data/lib/aurum/engine/parsing_facility.rb +107 -0
  10. data/lib/aurum/engine/tokenization_facility.rb +86 -0
  11. data/lib/aurum/grammar.rb +52 -219
  12. data/lib/aurum/grammar/automata.rb +194 -0
  13. data/lib/aurum/grammar/builder/augmented_grammar.rb +83 -0
  14. data/lib/aurum/grammar/builder/dot_logger.rb +66 -0
  15. data/lib/aurum/grammar/builder/lexical_table_builder.rb +55 -0
  16. data/lib/aurum/grammar/builder/parsing_table_builder.rb +238 -0
  17. data/lib/aurum/grammar/builder/set_of_items.rb +190 -0
  18. data/lib/aurum/grammar/compiled_tables.rb +20 -0
  19. data/lib/aurum/grammar/dsl/lexical_definition.rb +94 -0
  20. data/lib/aurum/grammar/dsl/syntax_definition.rb +79 -0
  21. data/lib/aurum/grammar/lexical_rules.rb +224 -0
  22. data/lib/aurum/grammar/metalang/grammar.rb +47 -0
  23. data/lib/aurum/grammar/syntax_rules.rb +95 -0
  24. data/spec/builder/dsl_definition/aurum_grammar_spec.rb +33 -0
  25. data/spec/engine/lexer_spec.rb +59 -0
  26. data/spec/engine/parser_spec.rb +90 -0
  27. data/spec/examples/dangling_else_example.rb +30 -0
  28. data/spec/examples/expression_example.rb +48 -0
  29. data/spec/examples/smalltalk_example.rb +50 -0
  30. data/spec/examples/yacc_spec.rb +30 -0
  31. data/spec/grammar/builder/lexical_table/automata_spec.rb +55 -0
  32. data/spec/grammar/builder/lexical_table/builder_spec.rb +78 -0
  33. data/spec/grammar/builder/lexical_table/character_set_spec.rb +100 -0
  34. data/spec/grammar/builder/lexical_table/pattern_spec.rb +11 -0
  35. data/spec/grammar/builder/lexical_table/regular_expression.rb +40 -0
  36. data/spec/grammar/builder/parsing_table/augmented_grammar_spec.rb +36 -0
  37. data/spec/grammar/builder/parsing_table/builder_spec.rb +152 -0
  38. data/spec/grammar/builder/parsing_table/digraph_traverser_spec.rb +42 -0
  39. data/spec/grammar/builder/parsing_table/item_spec.rb +51 -0
  40. data/spec/grammar/builder/parsing_table/sources_spec.rb +66 -0
  41. data/spec/grammar/builder/parsing_table/state_spec.rb +82 -0
  42. data/spec/grammar/dsl/character_classes_builder_spec.rb +50 -0
  43. data/spec/grammar/dsl/lexical_rules_builder_spec.rb +181 -0
  44. data/spec/grammar/dsl/precedence_builder_spec.rb +64 -0
  45. data/spec/grammar/dsl/productions_builder_spec.rb +78 -0
  46. data/spec/grammar/metalang/metalang_spec.rb +0 -0
  47. data/spec/grammar/precedence_spec.rb +42 -0
  48. data/spec/grammar/syntax_rules_spec.rb +31 -0
  49. data/spec/parser_matcher.rb +69 -0
  50. data/spec/pattern_matcher.rb +123 -0
  51. data/spec/spec_helper.rb +133 -0
  52. metadata +70 -36
  53. data/example/expression/expression.rb +0 -35
  54. data/example/expression/lisp.rb +0 -26
  55. data/lib/aurum/lexical_table_generator.rb +0 -429
  56. data/lib/aurum/parsing_table_generator.rb +0 -464
  57. data/test/engine/lexer_test.rb +0 -59
  58. data/test/engine/semantic_attributes_test.rb +0 -15
  59. data/test/grammar_definition/character_class_definition_test.rb +0 -28
  60. data/test/grammar_definition/grammar_definition_test.rb +0 -55
  61. data/test/grammar_definition/lexical_definition_test.rb +0 -56
  62. data/test/grammar_definition/operator_precedence_definition_test.rb +0 -35
  63. data/test/grammar_definition/production_definition_test.rb +0 -60
  64. data/test/lexical_table_generator/automata_test.rb +0 -74
  65. data/test/lexical_table_generator/character_set_test.rb +0 -73
  66. data/test/lexical_table_generator/interval_test.rb +0 -36
  67. data/test/lexical_table_generator/pattern_test.rb +0 -115
  68. data/test/lexical_table_generator/subset_determinizer_test.rb +0 -19
  69. data/test/lexical_table_generator/table_generator_test.rb +0 -126
  70. data/test/parsing_table_generator/augmented_grammar_test.rb +0 -45
  71. data/test/parsing_table_generator/lalr_n_computation_test.rb +0 -92
  72. data/test/parsing_table_generator/lr_0_automata_test.rb +0 -94
  73. data/test/parsing_table_generator/lr_item_test.rb +0 -27
  74. data/test/parsing_table_generator/parsing_table_state_test.rb +0 -39
  75. data/test/parsing_table_generator/precedence_table_test.rb +0 -28
  76. data/test/parsing_table_generator/production_test.rb +0 -9
  77. data/test/test_helper.rb +0 -103
@@ -0,0 +1,194 @@
1
+ module Aurum
2
+ class Grammar
3
+ class LexicalRules
4
+ class Automata
5
+ Transition = Struct.new(:character_set, :destination)
6
+ attr_reader :table
7
+ def initialize(table=[])
8
+ @table = table.is_a?(Array) ? table : Array.new(table){[]}
9
+ end
10
+
11
+ def connect start, character_set, destination
12
+ @table[start] << Transition.new(character_set, destination)
13
+ destination
14
+ end
15
+
16
+ def merge! other
17
+ start = @table.length
18
+ for trans in other.table
19
+ @table << trans.map {|tran| Transition.new(tran.character_set, tran.destination + start)}
20
+ end
21
+ start
22
+ end
23
+
24
+ def new_state
25
+ @table << []
26
+ @table.size - 1
27
+ end
28
+
29
+ def all_states
30
+ (0..@table.size - 1).to_a
31
+ end
32
+
33
+ def alphabet states
34
+ points, reachable_states = [], []
35
+ for state in states
36
+ @table[state].each {|tran| points.concat(tran.character_set.to_points(tran.destination))}
37
+ end
38
+ points.sort!
39
+ points.each_with_index do |point, index|
40
+ if point.is_start
41
+ reachable_states << point.destination
42
+ else
43
+ reachable_states.delete point.destination
44
+ next if reachable_states.empty?
45
+ end
46
+ character_set = range(point, points[index + 1])
47
+ yield(reachable_states.uniq, character_set) if character_set
48
+ end
49
+ end
50
+
51
+ def dup
52
+ Automata.new(@table.map {|x| x.dup})
53
+ end
54
+
55
+ def reverse
56
+ reverse = Automata.new(@table.size)
57
+ @table.each_with_index do |state, index|
58
+ state.each{|tran| reverse.connect(tran.destination, tran.character_set, index)}
59
+ end
60
+ reverse
61
+ end
62
+
63
+ def remove_dead_states accepts
64
+ dead_states = []
65
+ @table.each_with_index do |state, index|
66
+ next if accepts.include?(index) || state.any? {|tran| tran.destination != index }
67
+ dead_states << index
68
+ end
69
+ unless dead_states.empty?
70
+ @table.each_with_index do |state, index|
71
+ state.delete_if {|tran| dead_states.include?(tran.destination) }
72
+ end
73
+ end
74
+ end
75
+
76
+ private
77
+ def range point_a, point_b
78
+ start_point = point_a.is_start ? point_a.char : (point_a.char + 1)
79
+ end_point = point_b.is_start ? point_b.char - 1 : point_b.char
80
+ start_point > end_point ? nil : CharacterSet::Interval.new(start_point, end_point).to_char_set
81
+ end
82
+ end
83
+
84
+ class SubsetDeterminizer
85
+ def initialize nondeterministic, accepts
86
+ @unmarked, @dstates, @accepts = [], [], accepts
87
+ @nondeterministic, @deterministic, @accept_states = nondeterministic, Automata.new, {}
88
+ end
89
+
90
+ def determinize
91
+ unmark(closure([0]))
92
+ until @unmarked.empty?
93
+ start = @unmarked.pop
94
+ @nondeterministic.alphabet(@dstates[start]) do |states, character_set|
95
+ destination_state = closure(states)
96
+ destination = unmark(destination_state) unless destination = @dstates.index(destination_state)
97
+ @deterministic.connect(start, character_set, destination)
98
+ end
99
+ end
100
+ return @deterministic, @accept_states
101
+ end
102
+ private
103
+ def unmark states
104
+ @dstates << states
105
+ @unmarked.push(@deterministic.new_state)
106
+ accepts = states.find_all {|x| @accepts.include? x}
107
+ @accept_states[@unmarked.last] = accepts unless accepts.empty?
108
+ @unmarked.last
109
+ end
110
+
111
+ def closure states
112
+ closure, unvisited = states.dup, states.dup
113
+ until unvisited.empty? do
114
+ @nondeterministic.table[unvisited.pop].each do |tran|
115
+ if tran.character_set == Epsilon && !closure.include?(tran.destination)
116
+ closure << tran.destination
117
+ unvisited << tran.destination
118
+ end
119
+ end
120
+ end
121
+ closure.sort!
122
+ end
123
+ end
124
+
125
+ class HopcroftMinimizer
126
+ def initialize deterministic, accepts
127
+ @deterministic, @accepts = deterministic, accepts
128
+ end
129
+
130
+ def minimize
131
+ make_initial_partitions
132
+ refine_partitions
133
+ automata, accepts = Aurum::Grammar::LexicalRules::Automata.new(@partitions.size), {}
134
+ choose_representatives do |representative, index|
135
+ @deterministic.table[representative].each do |transition|
136
+ automata.connect(index, transition.character_set, partition_contains(transition.destination))
137
+ end
138
+ accepts[index] = @accepts[representative] if @accepts.has_key? representative
139
+ end
140
+ automata.remove_dead_states(accepts.keys)
141
+ return automata, accepts
142
+ end
143
+
144
+ private
145
+ def make_initial_partitions
146
+ partitions = {}
147
+ @accepts.each do |state, action|
148
+ partitions[action] = [] unless partitions.has_key? action
149
+ partitions[action] << state
150
+ end
151
+ @partitions = [[0], @deterministic.all_states - @accepts.keys - [0]] + partitions.values
152
+ @partitions.delete []
153
+ end
154
+
155
+ def refine_partitions
156
+ reverse_automata, working_list = @deterministic.reverse, @partitions.dup
157
+ until working_list.empty?
158
+ reverse_automata.alphabet(working_list.pop) do |ia, symbols|
159
+ @partitions.each do |r|
160
+ unless r.empty?
161
+ r1, r2 = r & ia, r - ia
162
+ unless r2.empty? || r2 == r
163
+ replace(@partitions, r => [r1, r2])
164
+ if working_list.include?(r)
165
+ replace(working_list, r => [r1, r2])
166
+ else
167
+ working_list << (r1.size <= r2.size ? r1 : r2)
168
+ end
169
+ working_list.uniq!
170
+ end
171
+ end
172
+ end
173
+ end
174
+ end
175
+ end
176
+
177
+ def choose_representatives
178
+ @partitions.each_with_index {|partition, index| yield partition.first, index}
179
+ end
180
+
181
+ def partition_contains state
182
+ @partitions.each_with_index {|partition, index| return index if partition.include? state}
183
+ end
184
+
185
+ def replace array, replacements
186
+ replacements.each do |old, new|
187
+ array.delete old
188
+ new.each {|x| array << x}
189
+ end
190
+ end
191
+ end
192
+ end
193
+ end
194
+ end
@@ -0,0 +1,83 @@
1
+ module Aurum
2
+ module Builder
3
+ StartSymbol = Aurum::Grammar.nonterminal('$start')
4
+
5
+ def Builder.working_list unvisited
6
+ yield unvisited, unvisited.pop while !unvisited.empty?
7
+ end
8
+
9
+ class AugmentedGrammar
10
+ attr_reader :start_production, :all_productions
11
+ def initialize syntax_rules, start_symbol
12
+ @syntax_rules, @start_symbol = syntax_rules, start_symbol
13
+ @start_production = Aurum::Grammar.production(StartSymbol, [start_symbol])
14
+ @all_productions = [@start_production]
15
+ @nonterminals = [StartSymbol, @start_symbol].to_set
16
+ @nullables = {StartSymbol => false, @start_symbol => false}
17
+ @first_sets = {StartSymbol => [].to_set, @start_symbol => [].to_set}
18
+ initialize_for_used_symbols
19
+ end
20
+
21
+ def nullable? symbol
22
+ @nullables[symbol]
23
+ end
24
+
25
+ def first_set symbol
26
+ @first_sets[symbol]
27
+ end
28
+
29
+ def productions(nonterminal = nil)
30
+ return [@start_production] if nonterminal == StartSymbol
31
+ @syntax_rules.productions(nonterminal)
32
+ end
33
+
34
+ def compute_nullables
35
+ fixed_point do |nonterminal|
36
+ break true if nullable? nonterminal
37
+ @nullables[nonterminal] = productions(nonterminal).any? {|prod| production_nullable?(prod)}
38
+ end
39
+ end
40
+
41
+ def compute_first_sets
42
+ fixed_point do |nonterminal|
43
+ for production in productions(nonterminal)
44
+ for symbol in production.symbols
45
+ is_updated = @first_sets[symbol].inject(false) {|r, s| r |= @first_sets[nonterminal].add?(s) }
46
+ break unless nullable?(symbol) and !is_updated
47
+ end
48
+ end
49
+ is_updated
50
+ end
51
+ end
52
+
53
+ private
54
+ def fixed_point
55
+ changed = true
56
+ while changed
57
+ changed = false
58
+ @nonterminals.each {| nonterminal | changed |= yield(nonterminal)}
59
+ end
60
+ end
61
+
62
+ def initialize_for_used_symbols
63
+ Builder.working_list [@start_symbol] do |unvisited, visiting|
64
+ for production in @syntax_rules.productions(visiting)
65
+ @all_productions << production
66
+ for symbol in production.symbols
67
+ if symbol.is_terminal
68
+ @nullables[symbol], @first_sets[symbol] = false, [symbol].to_set unless @nullables.has_key?(symbol)
69
+ elsif @nonterminals.add?(symbol)
70
+ unvisited << symbol
71
+ @nullables[symbol], @first_sets[symbol] = false, [].to_set
72
+ end
73
+ end
74
+ end
75
+ end
76
+ end
77
+
78
+ def production_nullable? production
79
+ production.symbols.all? {|symbol| nullable? symbol}
80
+ end
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,66 @@
1
+ module Aurum
2
+ module Builder
3
+ class DotLogger
4
+ attr_reader :dot_graphs
5
+ def initialize
6
+ @dot_graphs = {}
7
+ end
8
+
9
+ def productions productions
10
+ @productions = productions.to_a
11
+ end
12
+
13
+ def parsing_table_states name, states
14
+ dot = %Q_digraph grammar {\nrankdir=LR;\n_
15
+ @productions.each_with_index {|production, index| dot << production_to_dot(production, index)}
16
+ states.each {|state| dot << state_to_dot(state)}
17
+ dot << "}\n"
18
+ @dot_graphs[name] = dot
19
+ end
20
+
21
+ def lexical_table_automata name, automata, accepts
22
+ dot = %Q_digraph grammar {\nrankdir=LR;\n_
23
+ accepts.each {|state, action| dot << lexical_action_to_dot(action, state)}
24
+ automata.table.each_with_index do |transitions, index|
25
+ dot << %Q{#{index} [shape = circle, color = palegreen, style = filled]\n}
26
+ dot << %Q{#{index} -> action_#{index}\n} if accepts.has_key?(index)
27
+ transitions.each {|transition| dot << %Q{#{index} -> #{transition.destination} [label="#{transition.character_set.inspect}"]\n} }
28
+ end
29
+ dot << "}\n"
30
+ @dot_graphs[name] = dot
31
+ end
32
+
33
+ private
34
+ def state_to_dot state
35
+ dot = %Q{state_#{state.index} [label = "#{state.empty? ? 'lookahead' : state.all_items.join('\n')}"]\n}
36
+ dot << %Q{state_#{state.index} [shape = circle, color = #{state.conflict? ? 'maroon' : (state.inconsistent? ? 'yellow' : (state.empty? ? 'lightblue' : 'palegreen'))}, style = filled]\n}
37
+ for symbol, actions in state.actions
38
+ for action in actions do
39
+ dot << %Q{state_#{state.index} -> #{action_to_dot(action)} [label="#{symbol.inspect}"];\n}
40
+ end
41
+ end
42
+ dot
43
+ end
44
+
45
+ def production_to_dot production, index
46
+ %Q{production_#{index} [label = "#{production.inspect}", color = lightpink, style = filled, shape = octagon];\n}
47
+ end
48
+
49
+ def action_to_dot action
50
+ if action.is_a?(Aurum::ParsingTable::ShiftAction)
51
+ "state_#{action.state.index}"
52
+ else
53
+ "production_#{@productions.index(action.handle)}"
54
+ end
55
+ end
56
+
57
+ def lexical_action_to_dot action, state
58
+ action_attr = []
59
+ action_attr << "recognize '#{action.token}'" if action.token
60
+ action_attr << "shift to '#{action.state}'\n" if action.state
61
+ action_attr << "notify event '#{action.event}'\n" if action.event
62
+ %Q{action_#{state} [label = "#{action_attr.join("\n")}", color = lightpink, style = filled, shape = octagon];\n}
63
+ end
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,55 @@
1
+ module Aurum
2
+ module Builder
3
+ def Builder.build_lexical_table(lexical_rules, states, literals, logger = nil)
4
+ LexicalTableBuilder.new(lexical_rules, states, literals).build
5
+ end
6
+
7
+ class LexicalTableBuilder
8
+ def initialize lexical_rules, states, literals
9
+ @patterns, @literals = lexical_rules.patterns, literals
10
+ @lexical_states = @patterns.keys.to_a - ['all']
11
+ @literal_states, @literal_patterns, @common_patterns = states, {}, @patterns['all'] || {}
12
+ for literal in literals
13
+ pattern = Aurum::Grammar::LexicalRules::Pattern.string(literal.to_s)
14
+ @literal_patterns[pattern] = Aurum::LexicalTable::Action.new("$literal_#{literal}")
15
+ end
16
+ end
17
+
18
+ def build
19
+ automata = Aurum::Grammar::LexicalRules::Automata.new
20
+ start, accepts, actions = automata.new_state, {}, {}
21
+ @lexical_states.each_with_index do |lexical_state, index|
22
+ lexical_state_start = automata.new_state
23
+ automata.connect(start, Aurum::Grammar::LexicalRules::CharacterSet.range(-index - 1), lexical_state_start)
24
+ [@patterns[lexical_state], @common_patterns, literals_in(lexical_state)].each do |patterns|
25
+ for pattern, action in patterns
26
+ pattern_start = automata.merge!(pattern.automata)
27
+ automata.connect(lexical_state_start, Aurum::Grammar::LexicalRules::Epsilon, pattern_start)
28
+ accepts[pattern_start + pattern.accept] = action
29
+ end
30
+ end
31
+ end
32
+ automata, dfa_accepts = Aurum::Grammar::LexicalRules::SubsetDeterminizer.new(automata, accepts.keys).determinize
33
+ for dfa_accept, nfa_accepts in dfa_accepts
34
+ lexical_actions = nfa_accepts.inject([]) {|result, state| result << accepts[state]}
35
+ actions[dfa_accept] = resolve(lexical_actions.uniq)
36
+ end
37
+ automata, actions = Aurum::Grammar::LexicalRules::HopcroftMinimizer.new(automata, actions).minimize
38
+ Aurum::LexicalTable.new(automata.table, @lexical_states, actions)
39
+ end
40
+
41
+ private
42
+ def resolve lexical_actions
43
+ return lexical_actions.first if lexical_actions.size == 1
44
+ candidates = lexical_actions.find_all {|action| action.token =~ /^\$literal_/}
45
+ return candidates.first if candidates.size == 1
46
+ lexical_actions.each {|action| p action}
47
+ raise 'conflict'
48
+ end
49
+
50
+ def literals_in state
51
+ (@literal_states.include?('all') || @literal_states.include?(state)) ? @literal_patterns : {}
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,238 @@
1
+ require File.join(File.dirname(__FILE__), 'augmented_grammar')
2
+ require File.join(File.dirname(__FILE__), 'set_of_items')
3
+
4
+ module Aurum
5
+ module Builder
6
+ def Builder.build_parsing_table(syntax_rules, start_symbol, logger = nil)
7
+ augmented_grammar = AugmentedGrammar.new(syntax_rules, start_symbol)
8
+ ParsingTableBuilder.new(augmented_grammar, logger).build
9
+ end
10
+
11
+ class ParsingTableBuilder
12
+ def initialize augmented_grammar, logger
13
+ @augmented_grammar, @logger = augmented_grammar, logger
14
+ @lookahead_level, @states = 0, []
15
+ @inconsistent_states = []
16
+ end
17
+
18
+ def build
19
+ construct_automata
20
+ unless @inconsistent_states.empty?
21
+ @augmented_grammar.compute_nullables
22
+ @augmented_grammar.compute_first_sets
23
+ @conflict_states = [].to_set
24
+ @follow_set_calculator = DigraphTraverser.new do |config|
25
+ (config.symbol == StartSymbol) ? [false, [Aurum::Grammar::EOF], 65535] : [true, nil, nil]
26
+ end
27
+ for inconsistent_state in @inconsistent_states do
28
+ determine_lookaheads_for(inconsistent_state)
29
+ @conflict_states << inconsistent_state if inconsistent_state.conflict?
30
+ end
31
+ unless @conflict_states.empty?
32
+ for conflict_state in @conflict_states do
33
+ for lookahead in conflict_state.conflicts do
34
+ @state_lookahead_level = 2
35
+ resolve_conflicts_for(conflict_state, lookahead, Sources.new(conflict_state, lookahead))
36
+ @lookahead_level = @state_lookahead_level if @state_lookahead_level > @lookahead_level
37
+ end
38
+ end
39
+ end
40
+ end
41
+ construct_parsing_table
42
+ end
43
+
44
+ private
45
+ def construct_automata
46
+ item = LRItem.new(@augmented_grammar.start_production)
47
+ add_state(start_state = State.new(@augmented_grammar, [item]))
48
+ Builder.working_list [start_state] do |unvisited, visiting|
49
+ for symbol in visiting.expect_symbols
50
+ new_state = visiting.goto(symbol)
51
+ if production = new_state.read_reduce
52
+ visiting.add_read_reduce(symbol, production)
53
+ else
54
+ is_new_state, new_state = add_state(new_state)
55
+ unvisited << new_state if is_new_state
56
+ visiting.add_shift(symbol, new_state)
57
+ end
58
+ end
59
+ end
60
+ end
61
+
62
+ def determine_lookaheads_for state
63
+ @lookahead_level = 1
64
+ for item in state.handles
65
+ lookahead_each(state, item.production) do |production, symbol|
66
+ if symbol.precedence
67
+ if production.precedence > symbol.precedence
68
+ state.actions[symbol].clear
69
+ state.add_reduce(symbol, production)
70
+ end
71
+ else
72
+ state.add_reduce(symbol, production)
73
+ end
74
+ end
75
+ end
76
+ end
77
+
78
+ def resolve_conflicts_for state, lookahead, sources
79
+ lookahead_state = add_state(State.new(@augmented_grammar, [])).last
80
+ state.add_lookahead_shift(lookahead, lookahead_state)
81
+ sources.each_pair do |action, stacks|
82
+ for stack in stacks
83
+ raise 'not LALR(n)' if sources.stack_seen.include? stack
84
+ sources.stack_seen << stack
85
+ next_lookaheads(stack, lookahead).each {|next_la| lookahead_state.add_action(next_la, action)}
86
+ end
87
+ end
88
+ for symbol in lookahead_state.conflicts
89
+ @state_lookahead_level += 1
90
+ resolve_conflicts_for(lookahead_state, symbol, Sources.new(lookahead_state, lookahead, sources, symbol))
91
+ end if lookahead_state.conflict?
92
+ end
93
+
94
+ def construct_parsing_table
95
+ action_table = @states.map do |state|
96
+ actions = {}
97
+ state.actions.each { |symbol, action| actions[symbol.name] = action.first}
98
+ actions
99
+ end
100
+ Aurum::ParsingTable.new(@augmented_grammar.all_productions, action_table, @lookahead_level)
101
+ end
102
+
103
+ def next_lookaheads stack, lookahead
104
+ Aurum::Grammar::EOF == lookahead and return [Aurum::Grammar::EOF]
105
+ top = stack.last
106
+ lookaheads = top.read_set(lookahead)
107
+ top.reducable_items(lookahead) do |production, position|
108
+ c = production.nonterminal
109
+ if position < stack.length
110
+ lookaheads |= next_lookaheads stack.slice(0..stack.length-position-1), c
111
+ else
112
+ first_part = production.symbols.slice 0..-stack.length-1
113
+ stack[0].predecessors(first_part).each {|q| lookaheads |= @follow_set_calculator.traverse(Configuration.new(q, c))}
114
+ end unless c.name == '$start'
115
+ end
116
+ lookaheads
117
+ end
118
+
119
+ def lookahead_each state, production
120
+ for predecessor in state.predecessors(production.symbols)
121
+ config = Configuration.new(predecessor, production.nonterminal)
122
+ @follow_set_calculator.traverse(config).each {|symbol| yield production, symbol}
123
+ end
124
+ end
125
+
126
+ def add_state state
127
+ index = @states.index(state)
128
+ return [false, @states[index]] if index && !state.empty?
129
+ state.index = @states.size
130
+ @states << state
131
+ @inconsistent_states << state if state.inconsistent?
132
+ [true, state]
133
+ end
134
+ end
135
+
136
+ class Configuration < Struct.new(:state, :symbol)
137
+ def map_to_set
138
+ state.read_set(symbol)
139
+ end
140
+
141
+ def result_each
142
+ state.include_each(symbol) {|state, symbol| yield Configuration.new(state, symbol)}
143
+ end
144
+ end
145
+
146
+ class DigraphTraverser
147
+ def initialize &condition
148
+ @indicitor, @result, @stack = {}, {}, []
149
+ @condition = condition
150
+ end
151
+
152
+ def traverse node
153
+ return @result[node] if @indicitor[node]
154
+ to_be_continued, @result[node], @indicitor[node] = @condition.call(node)
155
+ if to_be_continued
156
+ @stack.push(node)
157
+ @indicitor[node] = (cycle_indicitor = @stack.length)
158
+ @result[node] = node.map_to_set
159
+ node.result_each do |new_node|
160
+ traverse(new_node) unless @indicitor[new_node]
161
+ @indicitor[node] = [@indicitor[node], @indicitor[new_node]].min
162
+ @result[node] |= @result[new_node]
163
+ end
164
+ node_in_cycle = nil
165
+ until node_in_cycle == node
166
+ node_in_cycle = @stack.pop
167
+ @result[node_in_cycle] = @result[node].dup
168
+ @indicitor[node_in_cycle] = 65535
169
+ end if @indicitor[node] == cycle_indicitor
170
+ end
171
+ @result[node]
172
+ end
173
+ end
174
+
175
+ class Sources < DelegateClass(Hash)
176
+ attr_reader :stack_seen
177
+ def initialize(state, symbol, sources = nil, lookahead = nil)
178
+ @sources, @stack_seen = {}, []
179
+ for action in state.actions[symbol]
180
+ @sources[action] = [].to_set
181
+ if sources
182
+ @sources[action] = initialize_lookahead_state(sources[action], symbol, lookahead)
183
+ else
184
+ @sources[action] = initialize_state(state, action, symbol)
185
+ end
186
+ end
187
+ super(@sources)
188
+ end
189
+
190
+ private
191
+ def initialize_state state, action, symbol
192
+ if action.shift_action? || action.read_reduce?
193
+ stacks = [[state]].to_set
194
+ else
195
+ stacks = [].to_set
196
+ for predecessor in state.predecessors(action.value.symbols)
197
+ @visited = [].to_set
198
+ stacks |= follow_sources([predecessor], action.value.nonterminal, symbol)
199
+ end
200
+ end
201
+ stacks
202
+ end
203
+ def initialize_lookahead_state stacks, nonterminal, lookahead
204
+ new_stacks = [].to_set
205
+ for stack in stacks
206
+ @visited = [].to_set
207
+ new_stacks |= follow_sources(stack, nonterminal, lookahead)
208
+ end
209
+ new_stacks
210
+ end
211
+ def follow_sources(stack, nonterminal, lookahead)
212
+ top = stack.last
213
+ if stack.length == 1
214
+ config = Configuration.new(top, nonterminal)
215
+ @visited.include?(config) and return []
216
+ @visited << config
217
+ end
218
+ stacks = [].to_set
219
+ top.read(nonterminal) do |state, direct, indirect|
220
+ stacks << [state] if direct.include?(lookahead)
221
+ for indirect_read in indirect
222
+ stacks |= follow_sources(stack + [state], indirect_read, lookahead)
223
+ end
224
+ end
225
+ top.read_reduce_items(nonterminal) do |production, position|
226
+ c = production.nonterminal
227
+ if position < stack.length
228
+ stacks |= follow_sources stack.slice(0..stack.length-position-1), c, lookahead
229
+ else
230
+ first_part = production.symbols.slice 0..-stack.length-1
231
+ stack[0].predecessors(first_part).each {|q| stacks |= follow_sources([q], c, lookahead)}
232
+ end unless c == StartSymbol
233
+ end
234
+ stacks
235
+ end
236
+ end
237
+ end
238
+ end