aurum 0.1.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +29 -0
- data/examples/dangling_else/grammar.rb +23 -0
- data/examples/expression/grammar.rb +28 -0
- data/examples/smalltalk/grammar.rb +151 -0
- data/examples/smalltalk/interpreter.rb +70 -0
- data/examples/yacc/grammar.rb +72 -0
- data/lib/aurum.rb +1 -9
- data/lib/aurum/engine.rb +39 -175
- data/lib/aurum/engine/parsing_facility.rb +107 -0
- data/lib/aurum/engine/tokenization_facility.rb +86 -0
- data/lib/aurum/grammar.rb +52 -219
- data/lib/aurum/grammar/automata.rb +194 -0
- data/lib/aurum/grammar/builder/augmented_grammar.rb +83 -0
- data/lib/aurum/grammar/builder/dot_logger.rb +66 -0
- data/lib/aurum/grammar/builder/lexical_table_builder.rb +55 -0
- data/lib/aurum/grammar/builder/parsing_table_builder.rb +238 -0
- data/lib/aurum/grammar/builder/set_of_items.rb +190 -0
- data/lib/aurum/grammar/compiled_tables.rb +20 -0
- data/lib/aurum/grammar/dsl/lexical_definition.rb +94 -0
- data/lib/aurum/grammar/dsl/syntax_definition.rb +79 -0
- data/lib/aurum/grammar/lexical_rules.rb +224 -0
- data/lib/aurum/grammar/metalang/grammar.rb +47 -0
- data/lib/aurum/grammar/syntax_rules.rb +95 -0
- data/spec/builder/dsl_definition/aurum_grammar_spec.rb +33 -0
- data/spec/engine/lexer_spec.rb +59 -0
- data/spec/engine/parser_spec.rb +90 -0
- data/spec/examples/dangling_else_example.rb +30 -0
- data/spec/examples/expression_example.rb +48 -0
- data/spec/examples/smalltalk_example.rb +50 -0
- data/spec/examples/yacc_spec.rb +30 -0
- data/spec/grammar/builder/lexical_table/automata_spec.rb +55 -0
- data/spec/grammar/builder/lexical_table/builder_spec.rb +78 -0
- data/spec/grammar/builder/lexical_table/character_set_spec.rb +100 -0
- data/spec/grammar/builder/lexical_table/pattern_spec.rb +11 -0
- data/spec/grammar/builder/lexical_table/regular_expression.rb +40 -0
- data/spec/grammar/builder/parsing_table/augmented_grammar_spec.rb +36 -0
- data/spec/grammar/builder/parsing_table/builder_spec.rb +152 -0
- data/spec/grammar/builder/parsing_table/digraph_traverser_spec.rb +42 -0
- data/spec/grammar/builder/parsing_table/item_spec.rb +51 -0
- data/spec/grammar/builder/parsing_table/sources_spec.rb +66 -0
- data/spec/grammar/builder/parsing_table/state_spec.rb +82 -0
- data/spec/grammar/dsl/character_classes_builder_spec.rb +50 -0
- data/spec/grammar/dsl/lexical_rules_builder_spec.rb +181 -0
- data/spec/grammar/dsl/precedence_builder_spec.rb +64 -0
- data/spec/grammar/dsl/productions_builder_spec.rb +78 -0
- data/spec/grammar/metalang/metalang_spec.rb +0 -0
- data/spec/grammar/precedence_spec.rb +42 -0
- data/spec/grammar/syntax_rules_spec.rb +31 -0
- data/spec/parser_matcher.rb +69 -0
- data/spec/pattern_matcher.rb +123 -0
- data/spec/spec_helper.rb +133 -0
- metadata +70 -36
- data/example/expression/expression.rb +0 -35
- data/example/expression/lisp.rb +0 -26
- data/lib/aurum/lexical_table_generator.rb +0 -429
- data/lib/aurum/parsing_table_generator.rb +0 -464
- data/test/engine/lexer_test.rb +0 -59
- data/test/engine/semantic_attributes_test.rb +0 -15
- data/test/grammar_definition/character_class_definition_test.rb +0 -28
- data/test/grammar_definition/grammar_definition_test.rb +0 -55
- data/test/grammar_definition/lexical_definition_test.rb +0 -56
- data/test/grammar_definition/operator_precedence_definition_test.rb +0 -35
- data/test/grammar_definition/production_definition_test.rb +0 -60
- data/test/lexical_table_generator/automata_test.rb +0 -74
- data/test/lexical_table_generator/character_set_test.rb +0 -73
- data/test/lexical_table_generator/interval_test.rb +0 -36
- data/test/lexical_table_generator/pattern_test.rb +0 -115
- data/test/lexical_table_generator/subset_determinizer_test.rb +0 -19
- data/test/lexical_table_generator/table_generator_test.rb +0 -126
- data/test/parsing_table_generator/augmented_grammar_test.rb +0 -45
- data/test/parsing_table_generator/lalr_n_computation_test.rb +0 -92
- data/test/parsing_table_generator/lr_0_automata_test.rb +0 -94
- data/test/parsing_table_generator/lr_item_test.rb +0 -27
- data/test/parsing_table_generator/parsing_table_state_test.rb +0 -39
- data/test/parsing_table_generator/precedence_table_test.rb +0 -28
- data/test/parsing_table_generator/production_test.rb +0 -9
- data/test/test_helper.rb +0 -103
@@ -0,0 +1,194 @@
|
|
1
|
+
module Aurum
|
2
|
+
class Grammar
|
3
|
+
class LexicalRules
|
4
|
+
class Automata
|
5
|
+
Transition = Struct.new(:character_set, :destination)
|
6
|
+
attr_reader :table
|
7
|
+
def initialize(table=[])
|
8
|
+
@table = table.is_a?(Array) ? table : Array.new(table){[]}
|
9
|
+
end
|
10
|
+
|
11
|
+
def connect start, character_set, destination
|
12
|
+
@table[start] << Transition.new(character_set, destination)
|
13
|
+
destination
|
14
|
+
end
|
15
|
+
|
16
|
+
def merge! other
|
17
|
+
start = @table.length
|
18
|
+
for trans in other.table
|
19
|
+
@table << trans.map {|tran| Transition.new(tran.character_set, tran.destination + start)}
|
20
|
+
end
|
21
|
+
start
|
22
|
+
end
|
23
|
+
|
24
|
+
def new_state
|
25
|
+
@table << []
|
26
|
+
@table.size - 1
|
27
|
+
end
|
28
|
+
|
29
|
+
def all_states
|
30
|
+
(0..@table.size - 1).to_a
|
31
|
+
end
|
32
|
+
|
33
|
+
def alphabet states
|
34
|
+
points, reachable_states = [], []
|
35
|
+
for state in states
|
36
|
+
@table[state].each {|tran| points.concat(tran.character_set.to_points(tran.destination))}
|
37
|
+
end
|
38
|
+
points.sort!
|
39
|
+
points.each_with_index do |point, index|
|
40
|
+
if point.is_start
|
41
|
+
reachable_states << point.destination
|
42
|
+
else
|
43
|
+
reachable_states.delete point.destination
|
44
|
+
next if reachable_states.empty?
|
45
|
+
end
|
46
|
+
character_set = range(point, points[index + 1])
|
47
|
+
yield(reachable_states.uniq, character_set) if character_set
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def dup
|
52
|
+
Automata.new(@table.map {|x| x.dup})
|
53
|
+
end
|
54
|
+
|
55
|
+
def reverse
|
56
|
+
reverse = Automata.new(@table.size)
|
57
|
+
@table.each_with_index do |state, index|
|
58
|
+
state.each{|tran| reverse.connect(tran.destination, tran.character_set, index)}
|
59
|
+
end
|
60
|
+
reverse
|
61
|
+
end
|
62
|
+
|
63
|
+
def remove_dead_states accepts
|
64
|
+
dead_states = []
|
65
|
+
@table.each_with_index do |state, index|
|
66
|
+
next if accepts.include?(index) || state.any? {|tran| tran.destination != index }
|
67
|
+
dead_states << index
|
68
|
+
end
|
69
|
+
unless dead_states.empty?
|
70
|
+
@table.each_with_index do |state, index|
|
71
|
+
state.delete_if {|tran| dead_states.include?(tran.destination) }
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
private
|
77
|
+
def range point_a, point_b
|
78
|
+
start_point = point_a.is_start ? point_a.char : (point_a.char + 1)
|
79
|
+
end_point = point_b.is_start ? point_b.char - 1 : point_b.char
|
80
|
+
start_point > end_point ? nil : CharacterSet::Interval.new(start_point, end_point).to_char_set
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
class SubsetDeterminizer
|
85
|
+
def initialize nondeterministic, accepts
|
86
|
+
@unmarked, @dstates, @accepts = [], [], accepts
|
87
|
+
@nondeterministic, @deterministic, @accept_states = nondeterministic, Automata.new, {}
|
88
|
+
end
|
89
|
+
|
90
|
+
def determinize
|
91
|
+
unmark(closure([0]))
|
92
|
+
until @unmarked.empty?
|
93
|
+
start = @unmarked.pop
|
94
|
+
@nondeterministic.alphabet(@dstates[start]) do |states, character_set|
|
95
|
+
destination_state = closure(states)
|
96
|
+
destination = unmark(destination_state) unless destination = @dstates.index(destination_state)
|
97
|
+
@deterministic.connect(start, character_set, destination)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
return @deterministic, @accept_states
|
101
|
+
end
|
102
|
+
private
|
103
|
+
def unmark states
|
104
|
+
@dstates << states
|
105
|
+
@unmarked.push(@deterministic.new_state)
|
106
|
+
accepts = states.find_all {|x| @accepts.include? x}
|
107
|
+
@accept_states[@unmarked.last] = accepts unless accepts.empty?
|
108
|
+
@unmarked.last
|
109
|
+
end
|
110
|
+
|
111
|
+
def closure states
|
112
|
+
closure, unvisited = states.dup, states.dup
|
113
|
+
until unvisited.empty? do
|
114
|
+
@nondeterministic.table[unvisited.pop].each do |tran|
|
115
|
+
if tran.character_set == Epsilon && !closure.include?(tran.destination)
|
116
|
+
closure << tran.destination
|
117
|
+
unvisited << tran.destination
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
closure.sort!
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
class HopcroftMinimizer
|
126
|
+
def initialize deterministic, accepts
|
127
|
+
@deterministic, @accepts = deterministic, accepts
|
128
|
+
end
|
129
|
+
|
130
|
+
def minimize
|
131
|
+
make_initial_partitions
|
132
|
+
refine_partitions
|
133
|
+
automata, accepts = Aurum::Grammar::LexicalRules::Automata.new(@partitions.size), {}
|
134
|
+
choose_representatives do |representative, index|
|
135
|
+
@deterministic.table[representative].each do |transition|
|
136
|
+
automata.connect(index, transition.character_set, partition_contains(transition.destination))
|
137
|
+
end
|
138
|
+
accepts[index] = @accepts[representative] if @accepts.has_key? representative
|
139
|
+
end
|
140
|
+
automata.remove_dead_states(accepts.keys)
|
141
|
+
return automata, accepts
|
142
|
+
end
|
143
|
+
|
144
|
+
private
|
145
|
+
def make_initial_partitions
|
146
|
+
partitions = {}
|
147
|
+
@accepts.each do |state, action|
|
148
|
+
partitions[action] = [] unless partitions.has_key? action
|
149
|
+
partitions[action] << state
|
150
|
+
end
|
151
|
+
@partitions = [[0], @deterministic.all_states - @accepts.keys - [0]] + partitions.values
|
152
|
+
@partitions.delete []
|
153
|
+
end
|
154
|
+
|
155
|
+
def refine_partitions
|
156
|
+
reverse_automata, working_list = @deterministic.reverse, @partitions.dup
|
157
|
+
until working_list.empty?
|
158
|
+
reverse_automata.alphabet(working_list.pop) do |ia, symbols|
|
159
|
+
@partitions.each do |r|
|
160
|
+
unless r.empty?
|
161
|
+
r1, r2 = r & ia, r - ia
|
162
|
+
unless r2.empty? || r2 == r
|
163
|
+
replace(@partitions, r => [r1, r2])
|
164
|
+
if working_list.include?(r)
|
165
|
+
replace(working_list, r => [r1, r2])
|
166
|
+
else
|
167
|
+
working_list << (r1.size <= r2.size ? r1 : r2)
|
168
|
+
end
|
169
|
+
working_list.uniq!
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
def choose_representatives
|
178
|
+
@partitions.each_with_index {|partition, index| yield partition.first, index}
|
179
|
+
end
|
180
|
+
|
181
|
+
def partition_contains state
|
182
|
+
@partitions.each_with_index {|partition, index| return index if partition.include? state}
|
183
|
+
end
|
184
|
+
|
185
|
+
def replace array, replacements
|
186
|
+
replacements.each do |old, new|
|
187
|
+
array.delete old
|
188
|
+
new.each {|x| array << x}
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|
192
|
+
end
|
193
|
+
end
|
194
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
module Aurum
|
2
|
+
module Builder
|
3
|
+
StartSymbol = Aurum::Grammar.nonterminal('$start')
|
4
|
+
|
5
|
+
def Builder.working_list unvisited
|
6
|
+
yield unvisited, unvisited.pop while !unvisited.empty?
|
7
|
+
end
|
8
|
+
|
9
|
+
class AugmentedGrammar
|
10
|
+
attr_reader :start_production, :all_productions
|
11
|
+
def initialize syntax_rules, start_symbol
|
12
|
+
@syntax_rules, @start_symbol = syntax_rules, start_symbol
|
13
|
+
@start_production = Aurum::Grammar.production(StartSymbol, [start_symbol])
|
14
|
+
@all_productions = [@start_production]
|
15
|
+
@nonterminals = [StartSymbol, @start_symbol].to_set
|
16
|
+
@nullables = {StartSymbol => false, @start_symbol => false}
|
17
|
+
@first_sets = {StartSymbol => [].to_set, @start_symbol => [].to_set}
|
18
|
+
initialize_for_used_symbols
|
19
|
+
end
|
20
|
+
|
21
|
+
def nullable? symbol
|
22
|
+
@nullables[symbol]
|
23
|
+
end
|
24
|
+
|
25
|
+
def first_set symbol
|
26
|
+
@first_sets[symbol]
|
27
|
+
end
|
28
|
+
|
29
|
+
def productions(nonterminal = nil)
|
30
|
+
return [@start_production] if nonterminal == StartSymbol
|
31
|
+
@syntax_rules.productions(nonterminal)
|
32
|
+
end
|
33
|
+
|
34
|
+
def compute_nullables
|
35
|
+
fixed_point do |nonterminal|
|
36
|
+
break true if nullable? nonterminal
|
37
|
+
@nullables[nonterminal] = productions(nonterminal).any? {|prod| production_nullable?(prod)}
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def compute_first_sets
|
42
|
+
fixed_point do |nonterminal|
|
43
|
+
for production in productions(nonterminal)
|
44
|
+
for symbol in production.symbols
|
45
|
+
is_updated = @first_sets[symbol].inject(false) {|r, s| r |= @first_sets[nonterminal].add?(s) }
|
46
|
+
break unless nullable?(symbol) and !is_updated
|
47
|
+
end
|
48
|
+
end
|
49
|
+
is_updated
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
private
|
54
|
+
def fixed_point
|
55
|
+
changed = true
|
56
|
+
while changed
|
57
|
+
changed = false
|
58
|
+
@nonterminals.each {| nonterminal | changed |= yield(nonterminal)}
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def initialize_for_used_symbols
|
63
|
+
Builder.working_list [@start_symbol] do |unvisited, visiting|
|
64
|
+
for production in @syntax_rules.productions(visiting)
|
65
|
+
@all_productions << production
|
66
|
+
for symbol in production.symbols
|
67
|
+
if symbol.is_terminal
|
68
|
+
@nullables[symbol], @first_sets[symbol] = false, [symbol].to_set unless @nullables.has_key?(symbol)
|
69
|
+
elsif @nonterminals.add?(symbol)
|
70
|
+
unvisited << symbol
|
71
|
+
@nullables[symbol], @first_sets[symbol] = false, [].to_set
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def production_nullable? production
|
79
|
+
production.symbols.all? {|symbol| nullable? symbol}
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
module Aurum
|
2
|
+
module Builder
|
3
|
+
class DotLogger
|
4
|
+
attr_reader :dot_graphs
|
5
|
+
def initialize
|
6
|
+
@dot_graphs = {}
|
7
|
+
end
|
8
|
+
|
9
|
+
def productions productions
|
10
|
+
@productions = productions.to_a
|
11
|
+
end
|
12
|
+
|
13
|
+
def parsing_table_states name, states
|
14
|
+
dot = %Q_digraph grammar {\nrankdir=LR;\n_
|
15
|
+
@productions.each_with_index {|production, index| dot << production_to_dot(production, index)}
|
16
|
+
states.each {|state| dot << state_to_dot(state)}
|
17
|
+
dot << "}\n"
|
18
|
+
@dot_graphs[name] = dot
|
19
|
+
end
|
20
|
+
|
21
|
+
def lexical_table_automata name, automata, accepts
|
22
|
+
dot = %Q_digraph grammar {\nrankdir=LR;\n_
|
23
|
+
accepts.each {|state, action| dot << lexical_action_to_dot(action, state)}
|
24
|
+
automata.table.each_with_index do |transitions, index|
|
25
|
+
dot << %Q{#{index} [shape = circle, color = palegreen, style = filled]\n}
|
26
|
+
dot << %Q{#{index} -> action_#{index}\n} if accepts.has_key?(index)
|
27
|
+
transitions.each {|transition| dot << %Q{#{index} -> #{transition.destination} [label="#{transition.character_set.inspect}"]\n} }
|
28
|
+
end
|
29
|
+
dot << "}\n"
|
30
|
+
@dot_graphs[name] = dot
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
def state_to_dot state
|
35
|
+
dot = %Q{state_#{state.index} [label = "#{state.empty? ? 'lookahead' : state.all_items.join('\n')}"]\n}
|
36
|
+
dot << %Q{state_#{state.index} [shape = circle, color = #{state.conflict? ? 'maroon' : (state.inconsistent? ? 'yellow' : (state.empty? ? 'lightblue' : 'palegreen'))}, style = filled]\n}
|
37
|
+
for symbol, actions in state.actions
|
38
|
+
for action in actions do
|
39
|
+
dot << %Q{state_#{state.index} -> #{action_to_dot(action)} [label="#{symbol.inspect}"];\n}
|
40
|
+
end
|
41
|
+
end
|
42
|
+
dot
|
43
|
+
end
|
44
|
+
|
45
|
+
def production_to_dot production, index
|
46
|
+
%Q{production_#{index} [label = "#{production.inspect}", color = lightpink, style = filled, shape = octagon];\n}
|
47
|
+
end
|
48
|
+
|
49
|
+
def action_to_dot action
|
50
|
+
if action.is_a?(Aurum::ParsingTable::ShiftAction)
|
51
|
+
"state_#{action.state.index}"
|
52
|
+
else
|
53
|
+
"production_#{@productions.index(action.handle)}"
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def lexical_action_to_dot action, state
|
58
|
+
action_attr = []
|
59
|
+
action_attr << "recognize '#{action.token}'" if action.token
|
60
|
+
action_attr << "shift to '#{action.state}'\n" if action.state
|
61
|
+
action_attr << "notify event '#{action.event}'\n" if action.event
|
62
|
+
%Q{action_#{state} [label = "#{action_attr.join("\n")}", color = lightpink, style = filled, shape = octagon];\n}
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
module Aurum
|
2
|
+
module Builder
|
3
|
+
def Builder.build_lexical_table(lexical_rules, states, literals, logger = nil)
|
4
|
+
LexicalTableBuilder.new(lexical_rules, states, literals).build
|
5
|
+
end
|
6
|
+
|
7
|
+
class LexicalTableBuilder
|
8
|
+
def initialize lexical_rules, states, literals
|
9
|
+
@patterns, @literals = lexical_rules.patterns, literals
|
10
|
+
@lexical_states = @patterns.keys.to_a - ['all']
|
11
|
+
@literal_states, @literal_patterns, @common_patterns = states, {}, @patterns['all'] || {}
|
12
|
+
for literal in literals
|
13
|
+
pattern = Aurum::Grammar::LexicalRules::Pattern.string(literal.to_s)
|
14
|
+
@literal_patterns[pattern] = Aurum::LexicalTable::Action.new("$literal_#{literal}")
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def build
|
19
|
+
automata = Aurum::Grammar::LexicalRules::Automata.new
|
20
|
+
start, accepts, actions = automata.new_state, {}, {}
|
21
|
+
@lexical_states.each_with_index do |lexical_state, index|
|
22
|
+
lexical_state_start = automata.new_state
|
23
|
+
automata.connect(start, Aurum::Grammar::LexicalRules::CharacterSet.range(-index - 1), lexical_state_start)
|
24
|
+
[@patterns[lexical_state], @common_patterns, literals_in(lexical_state)].each do |patterns|
|
25
|
+
for pattern, action in patterns
|
26
|
+
pattern_start = automata.merge!(pattern.automata)
|
27
|
+
automata.connect(lexical_state_start, Aurum::Grammar::LexicalRules::Epsilon, pattern_start)
|
28
|
+
accepts[pattern_start + pattern.accept] = action
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
automata, dfa_accepts = Aurum::Grammar::LexicalRules::SubsetDeterminizer.new(automata, accepts.keys).determinize
|
33
|
+
for dfa_accept, nfa_accepts in dfa_accepts
|
34
|
+
lexical_actions = nfa_accepts.inject([]) {|result, state| result << accepts[state]}
|
35
|
+
actions[dfa_accept] = resolve(lexical_actions.uniq)
|
36
|
+
end
|
37
|
+
automata, actions = Aurum::Grammar::LexicalRules::HopcroftMinimizer.new(automata, actions).minimize
|
38
|
+
Aurum::LexicalTable.new(automata.table, @lexical_states, actions)
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
def resolve lexical_actions
|
43
|
+
return lexical_actions.first if lexical_actions.size == 1
|
44
|
+
candidates = lexical_actions.find_all {|action| action.token =~ /^\$literal_/}
|
45
|
+
return candidates.first if candidates.size == 1
|
46
|
+
lexical_actions.each {|action| p action}
|
47
|
+
raise 'conflict'
|
48
|
+
end
|
49
|
+
|
50
|
+
def literals_in state
|
51
|
+
(@literal_states.include?('all') || @literal_states.include?(state)) ? @literal_patterns : {}
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,238 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'augmented_grammar')
|
2
|
+
require File.join(File.dirname(__FILE__), 'set_of_items')
|
3
|
+
|
4
|
+
module Aurum
|
5
|
+
module Builder
|
6
|
+
def Builder.build_parsing_table(syntax_rules, start_symbol, logger = nil)
|
7
|
+
augmented_grammar = AugmentedGrammar.new(syntax_rules, start_symbol)
|
8
|
+
ParsingTableBuilder.new(augmented_grammar, logger).build
|
9
|
+
end
|
10
|
+
|
11
|
+
class ParsingTableBuilder
|
12
|
+
def initialize augmented_grammar, logger
|
13
|
+
@augmented_grammar, @logger = augmented_grammar, logger
|
14
|
+
@lookahead_level, @states = 0, []
|
15
|
+
@inconsistent_states = []
|
16
|
+
end
|
17
|
+
|
18
|
+
def build
|
19
|
+
construct_automata
|
20
|
+
unless @inconsistent_states.empty?
|
21
|
+
@augmented_grammar.compute_nullables
|
22
|
+
@augmented_grammar.compute_first_sets
|
23
|
+
@conflict_states = [].to_set
|
24
|
+
@follow_set_calculator = DigraphTraverser.new do |config|
|
25
|
+
(config.symbol == StartSymbol) ? [false, [Aurum::Grammar::EOF], 65535] : [true, nil, nil]
|
26
|
+
end
|
27
|
+
for inconsistent_state in @inconsistent_states do
|
28
|
+
determine_lookaheads_for(inconsistent_state)
|
29
|
+
@conflict_states << inconsistent_state if inconsistent_state.conflict?
|
30
|
+
end
|
31
|
+
unless @conflict_states.empty?
|
32
|
+
for conflict_state in @conflict_states do
|
33
|
+
for lookahead in conflict_state.conflicts do
|
34
|
+
@state_lookahead_level = 2
|
35
|
+
resolve_conflicts_for(conflict_state, lookahead, Sources.new(conflict_state, lookahead))
|
36
|
+
@lookahead_level = @state_lookahead_level if @state_lookahead_level > @lookahead_level
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
construct_parsing_table
|
42
|
+
end
|
43
|
+
|
44
|
+
private
|
45
|
+
def construct_automata
|
46
|
+
item = LRItem.new(@augmented_grammar.start_production)
|
47
|
+
add_state(start_state = State.new(@augmented_grammar, [item]))
|
48
|
+
Builder.working_list [start_state] do |unvisited, visiting|
|
49
|
+
for symbol in visiting.expect_symbols
|
50
|
+
new_state = visiting.goto(symbol)
|
51
|
+
if production = new_state.read_reduce
|
52
|
+
visiting.add_read_reduce(symbol, production)
|
53
|
+
else
|
54
|
+
is_new_state, new_state = add_state(new_state)
|
55
|
+
unvisited << new_state if is_new_state
|
56
|
+
visiting.add_shift(symbol, new_state)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def determine_lookaheads_for state
|
63
|
+
@lookahead_level = 1
|
64
|
+
for item in state.handles
|
65
|
+
lookahead_each(state, item.production) do |production, symbol|
|
66
|
+
if symbol.precedence
|
67
|
+
if production.precedence > symbol.precedence
|
68
|
+
state.actions[symbol].clear
|
69
|
+
state.add_reduce(symbol, production)
|
70
|
+
end
|
71
|
+
else
|
72
|
+
state.add_reduce(symbol, production)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def resolve_conflicts_for state, lookahead, sources
|
79
|
+
lookahead_state = add_state(State.new(@augmented_grammar, [])).last
|
80
|
+
state.add_lookahead_shift(lookahead, lookahead_state)
|
81
|
+
sources.each_pair do |action, stacks|
|
82
|
+
for stack in stacks
|
83
|
+
raise 'not LALR(n)' if sources.stack_seen.include? stack
|
84
|
+
sources.stack_seen << stack
|
85
|
+
next_lookaheads(stack, lookahead).each {|next_la| lookahead_state.add_action(next_la, action)}
|
86
|
+
end
|
87
|
+
end
|
88
|
+
for symbol in lookahead_state.conflicts
|
89
|
+
@state_lookahead_level += 1
|
90
|
+
resolve_conflicts_for(lookahead_state, symbol, Sources.new(lookahead_state, lookahead, sources, symbol))
|
91
|
+
end if lookahead_state.conflict?
|
92
|
+
end
|
93
|
+
|
94
|
+
def construct_parsing_table
|
95
|
+
action_table = @states.map do |state|
|
96
|
+
actions = {}
|
97
|
+
state.actions.each { |symbol, action| actions[symbol.name] = action.first}
|
98
|
+
actions
|
99
|
+
end
|
100
|
+
Aurum::ParsingTable.new(@augmented_grammar.all_productions, action_table, @lookahead_level)
|
101
|
+
end
|
102
|
+
|
103
|
+
def next_lookaheads stack, lookahead
|
104
|
+
Aurum::Grammar::EOF == lookahead and return [Aurum::Grammar::EOF]
|
105
|
+
top = stack.last
|
106
|
+
lookaheads = top.read_set(lookahead)
|
107
|
+
top.reducable_items(lookahead) do |production, position|
|
108
|
+
c = production.nonterminal
|
109
|
+
if position < stack.length
|
110
|
+
lookaheads |= next_lookaheads stack.slice(0..stack.length-position-1), c
|
111
|
+
else
|
112
|
+
first_part = production.symbols.slice 0..-stack.length-1
|
113
|
+
stack[0].predecessors(first_part).each {|q| lookaheads |= @follow_set_calculator.traverse(Configuration.new(q, c))}
|
114
|
+
end unless c.name == '$start'
|
115
|
+
end
|
116
|
+
lookaheads
|
117
|
+
end
|
118
|
+
|
119
|
+
def lookahead_each state, production
|
120
|
+
for predecessor in state.predecessors(production.symbols)
|
121
|
+
config = Configuration.new(predecessor, production.nonterminal)
|
122
|
+
@follow_set_calculator.traverse(config).each {|symbol| yield production, symbol}
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
def add_state state
|
127
|
+
index = @states.index(state)
|
128
|
+
return [false, @states[index]] if index && !state.empty?
|
129
|
+
state.index = @states.size
|
130
|
+
@states << state
|
131
|
+
@inconsistent_states << state if state.inconsistent?
|
132
|
+
[true, state]
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
class Configuration < Struct.new(:state, :symbol)
|
137
|
+
def map_to_set
|
138
|
+
state.read_set(symbol)
|
139
|
+
end
|
140
|
+
|
141
|
+
def result_each
|
142
|
+
state.include_each(symbol) {|state, symbol| yield Configuration.new(state, symbol)}
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
class DigraphTraverser
|
147
|
+
def initialize &condition
|
148
|
+
@indicitor, @result, @stack = {}, {}, []
|
149
|
+
@condition = condition
|
150
|
+
end
|
151
|
+
|
152
|
+
def traverse node
|
153
|
+
return @result[node] if @indicitor[node]
|
154
|
+
to_be_continued, @result[node], @indicitor[node] = @condition.call(node)
|
155
|
+
if to_be_continued
|
156
|
+
@stack.push(node)
|
157
|
+
@indicitor[node] = (cycle_indicitor = @stack.length)
|
158
|
+
@result[node] = node.map_to_set
|
159
|
+
node.result_each do |new_node|
|
160
|
+
traverse(new_node) unless @indicitor[new_node]
|
161
|
+
@indicitor[node] = [@indicitor[node], @indicitor[new_node]].min
|
162
|
+
@result[node] |= @result[new_node]
|
163
|
+
end
|
164
|
+
node_in_cycle = nil
|
165
|
+
until node_in_cycle == node
|
166
|
+
node_in_cycle = @stack.pop
|
167
|
+
@result[node_in_cycle] = @result[node].dup
|
168
|
+
@indicitor[node_in_cycle] = 65535
|
169
|
+
end if @indicitor[node] == cycle_indicitor
|
170
|
+
end
|
171
|
+
@result[node]
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
class Sources < DelegateClass(Hash)
|
176
|
+
attr_reader :stack_seen
|
177
|
+
def initialize(state, symbol, sources = nil, lookahead = nil)
|
178
|
+
@sources, @stack_seen = {}, []
|
179
|
+
for action in state.actions[symbol]
|
180
|
+
@sources[action] = [].to_set
|
181
|
+
if sources
|
182
|
+
@sources[action] = initialize_lookahead_state(sources[action], symbol, lookahead)
|
183
|
+
else
|
184
|
+
@sources[action] = initialize_state(state, action, symbol)
|
185
|
+
end
|
186
|
+
end
|
187
|
+
super(@sources)
|
188
|
+
end
|
189
|
+
|
190
|
+
private
|
191
|
+
def initialize_state state, action, symbol
|
192
|
+
if action.shift_action? || action.read_reduce?
|
193
|
+
stacks = [[state]].to_set
|
194
|
+
else
|
195
|
+
stacks = [].to_set
|
196
|
+
for predecessor in state.predecessors(action.value.symbols)
|
197
|
+
@visited = [].to_set
|
198
|
+
stacks |= follow_sources([predecessor], action.value.nonterminal, symbol)
|
199
|
+
end
|
200
|
+
end
|
201
|
+
stacks
|
202
|
+
end
|
203
|
+
def initialize_lookahead_state stacks, nonterminal, lookahead
|
204
|
+
new_stacks = [].to_set
|
205
|
+
for stack in stacks
|
206
|
+
@visited = [].to_set
|
207
|
+
new_stacks |= follow_sources(stack, nonterminal, lookahead)
|
208
|
+
end
|
209
|
+
new_stacks
|
210
|
+
end
|
211
|
+
def follow_sources(stack, nonterminal, lookahead)
|
212
|
+
top = stack.last
|
213
|
+
if stack.length == 1
|
214
|
+
config = Configuration.new(top, nonterminal)
|
215
|
+
@visited.include?(config) and return []
|
216
|
+
@visited << config
|
217
|
+
end
|
218
|
+
stacks = [].to_set
|
219
|
+
top.read(nonterminal) do |state, direct, indirect|
|
220
|
+
stacks << [state] if direct.include?(lookahead)
|
221
|
+
for indirect_read in indirect
|
222
|
+
stacks |= follow_sources(stack + [state], indirect_read, lookahead)
|
223
|
+
end
|
224
|
+
end
|
225
|
+
top.read_reduce_items(nonterminal) do |production, position|
|
226
|
+
c = production.nonterminal
|
227
|
+
if position < stack.length
|
228
|
+
stacks |= follow_sources stack.slice(0..stack.length-position-1), c, lookahead
|
229
|
+
else
|
230
|
+
first_part = production.symbols.slice 0..-stack.length-1
|
231
|
+
stack[0].predecessors(first_part).each {|q| stacks |= follow_sources([q], c, lookahead)}
|
232
|
+
end unless c == StartSymbol
|
233
|
+
end
|
234
|
+
stacks
|
235
|
+
end
|
236
|
+
end
|
237
|
+
end
|
238
|
+
end
|