aurum 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +29 -0
- data/examples/dangling_else/grammar.rb +23 -0
- data/examples/expression/grammar.rb +28 -0
- data/examples/smalltalk/grammar.rb +151 -0
- data/examples/smalltalk/interpreter.rb +70 -0
- data/examples/yacc/grammar.rb +72 -0
- data/lib/aurum.rb +1 -9
- data/lib/aurum/engine.rb +39 -175
- data/lib/aurum/engine/parsing_facility.rb +107 -0
- data/lib/aurum/engine/tokenization_facility.rb +86 -0
- data/lib/aurum/grammar.rb +52 -219
- data/lib/aurum/grammar/automata.rb +194 -0
- data/lib/aurum/grammar/builder/augmented_grammar.rb +83 -0
- data/lib/aurum/grammar/builder/dot_logger.rb +66 -0
- data/lib/aurum/grammar/builder/lexical_table_builder.rb +55 -0
- data/lib/aurum/grammar/builder/parsing_table_builder.rb +238 -0
- data/lib/aurum/grammar/builder/set_of_items.rb +190 -0
- data/lib/aurum/grammar/compiled_tables.rb +20 -0
- data/lib/aurum/grammar/dsl/lexical_definition.rb +94 -0
- data/lib/aurum/grammar/dsl/syntax_definition.rb +79 -0
- data/lib/aurum/grammar/lexical_rules.rb +224 -0
- data/lib/aurum/grammar/metalang/grammar.rb +47 -0
- data/lib/aurum/grammar/syntax_rules.rb +95 -0
- data/spec/builder/dsl_definition/aurum_grammar_spec.rb +33 -0
- data/spec/engine/lexer_spec.rb +59 -0
- data/spec/engine/parser_spec.rb +90 -0
- data/spec/examples/dangling_else_example.rb +30 -0
- data/spec/examples/expression_example.rb +48 -0
- data/spec/examples/smalltalk_example.rb +50 -0
- data/spec/examples/yacc_spec.rb +30 -0
- data/spec/grammar/builder/lexical_table/automata_spec.rb +55 -0
- data/spec/grammar/builder/lexical_table/builder_spec.rb +78 -0
- data/spec/grammar/builder/lexical_table/character_set_spec.rb +100 -0
- data/spec/grammar/builder/lexical_table/pattern_spec.rb +11 -0
- data/spec/grammar/builder/lexical_table/regular_expression.rb +40 -0
- data/spec/grammar/builder/parsing_table/augmented_grammar_spec.rb +36 -0
- data/spec/grammar/builder/parsing_table/builder_spec.rb +152 -0
- data/spec/grammar/builder/parsing_table/digraph_traverser_spec.rb +42 -0
- data/spec/grammar/builder/parsing_table/item_spec.rb +51 -0
- data/spec/grammar/builder/parsing_table/sources_spec.rb +66 -0
- data/spec/grammar/builder/parsing_table/state_spec.rb +82 -0
- data/spec/grammar/dsl/character_classes_builder_spec.rb +50 -0
- data/spec/grammar/dsl/lexical_rules_builder_spec.rb +181 -0
- data/spec/grammar/dsl/precedence_builder_spec.rb +64 -0
- data/spec/grammar/dsl/productions_builder_spec.rb +78 -0
- data/spec/grammar/metalang/metalang_spec.rb +0 -0
- data/spec/grammar/precedence_spec.rb +42 -0
- data/spec/grammar/syntax_rules_spec.rb +31 -0
- data/spec/parser_matcher.rb +69 -0
- data/spec/pattern_matcher.rb +123 -0
- data/spec/spec_helper.rb +133 -0
- metadata +70 -36
- data/example/expression/expression.rb +0 -35
- data/example/expression/lisp.rb +0 -26
- data/lib/aurum/lexical_table_generator.rb +0 -429
- data/lib/aurum/parsing_table_generator.rb +0 -464
- data/test/engine/lexer_test.rb +0 -59
- data/test/engine/semantic_attributes_test.rb +0 -15
- data/test/grammar_definition/character_class_definition_test.rb +0 -28
- data/test/grammar_definition/grammar_definition_test.rb +0 -55
- data/test/grammar_definition/lexical_definition_test.rb +0 -56
- data/test/grammar_definition/operator_precedence_definition_test.rb +0 -35
- data/test/grammar_definition/production_definition_test.rb +0 -60
- data/test/lexical_table_generator/automata_test.rb +0 -74
- data/test/lexical_table_generator/character_set_test.rb +0 -73
- data/test/lexical_table_generator/interval_test.rb +0 -36
- data/test/lexical_table_generator/pattern_test.rb +0 -115
- data/test/lexical_table_generator/subset_determinizer_test.rb +0 -19
- data/test/lexical_table_generator/table_generator_test.rb +0 -126
- data/test/parsing_table_generator/augmented_grammar_test.rb +0 -45
- data/test/parsing_table_generator/lalr_n_computation_test.rb +0 -92
- data/test/parsing_table_generator/lr_0_automata_test.rb +0 -94
- data/test/parsing_table_generator/lr_item_test.rb +0 -27
- data/test/parsing_table_generator/parsing_table_state_test.rb +0 -39
- data/test/parsing_table_generator/precedence_table_test.rb +0 -28
- data/test/parsing_table_generator/production_test.rb +0 -9
- data/test/test_helper.rb +0 -103
@@ -0,0 +1,194 @@
|
|
1
|
+
module Aurum
|
2
|
+
class Grammar
|
3
|
+
class LexicalRules
|
4
|
+
class Automata
|
5
|
+
Transition = Struct.new(:character_set, :destination)
|
6
|
+
attr_reader :table
|
7
|
+
def initialize(table=[])
|
8
|
+
@table = table.is_a?(Array) ? table : Array.new(table){[]}
|
9
|
+
end
|
10
|
+
|
11
|
+
def connect start, character_set, destination
|
12
|
+
@table[start] << Transition.new(character_set, destination)
|
13
|
+
destination
|
14
|
+
end
|
15
|
+
|
16
|
+
def merge! other
|
17
|
+
start = @table.length
|
18
|
+
for trans in other.table
|
19
|
+
@table << trans.map {|tran| Transition.new(tran.character_set, tran.destination + start)}
|
20
|
+
end
|
21
|
+
start
|
22
|
+
end
|
23
|
+
|
24
|
+
def new_state
|
25
|
+
@table << []
|
26
|
+
@table.size - 1
|
27
|
+
end
|
28
|
+
|
29
|
+
def all_states
|
30
|
+
(0..@table.size - 1).to_a
|
31
|
+
end
|
32
|
+
|
33
|
+
def alphabet states
|
34
|
+
points, reachable_states = [], []
|
35
|
+
for state in states
|
36
|
+
@table[state].each {|tran| points.concat(tran.character_set.to_points(tran.destination))}
|
37
|
+
end
|
38
|
+
points.sort!
|
39
|
+
points.each_with_index do |point, index|
|
40
|
+
if point.is_start
|
41
|
+
reachable_states << point.destination
|
42
|
+
else
|
43
|
+
reachable_states.delete point.destination
|
44
|
+
next if reachable_states.empty?
|
45
|
+
end
|
46
|
+
character_set = range(point, points[index + 1])
|
47
|
+
yield(reachable_states.uniq, character_set) if character_set
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def dup
|
52
|
+
Automata.new(@table.map {|x| x.dup})
|
53
|
+
end
|
54
|
+
|
55
|
+
def reverse
|
56
|
+
reverse = Automata.new(@table.size)
|
57
|
+
@table.each_with_index do |state, index|
|
58
|
+
state.each{|tran| reverse.connect(tran.destination, tran.character_set, index)}
|
59
|
+
end
|
60
|
+
reverse
|
61
|
+
end
|
62
|
+
|
63
|
+
def remove_dead_states accepts
|
64
|
+
dead_states = []
|
65
|
+
@table.each_with_index do |state, index|
|
66
|
+
next if accepts.include?(index) || state.any? {|tran| tran.destination != index }
|
67
|
+
dead_states << index
|
68
|
+
end
|
69
|
+
unless dead_states.empty?
|
70
|
+
@table.each_with_index do |state, index|
|
71
|
+
state.delete_if {|tran| dead_states.include?(tran.destination) }
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
private
|
77
|
+
def range point_a, point_b
|
78
|
+
start_point = point_a.is_start ? point_a.char : (point_a.char + 1)
|
79
|
+
end_point = point_b.is_start ? point_b.char - 1 : point_b.char
|
80
|
+
start_point > end_point ? nil : CharacterSet::Interval.new(start_point, end_point).to_char_set
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
class SubsetDeterminizer
|
85
|
+
def initialize nondeterministic, accepts
|
86
|
+
@unmarked, @dstates, @accepts = [], [], accepts
|
87
|
+
@nondeterministic, @deterministic, @accept_states = nondeterministic, Automata.new, {}
|
88
|
+
end
|
89
|
+
|
90
|
+
def determinize
|
91
|
+
unmark(closure([0]))
|
92
|
+
until @unmarked.empty?
|
93
|
+
start = @unmarked.pop
|
94
|
+
@nondeterministic.alphabet(@dstates[start]) do |states, character_set|
|
95
|
+
destination_state = closure(states)
|
96
|
+
destination = unmark(destination_state) unless destination = @dstates.index(destination_state)
|
97
|
+
@deterministic.connect(start, character_set, destination)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
return @deterministic, @accept_states
|
101
|
+
end
|
102
|
+
private
|
103
|
+
def unmark states
|
104
|
+
@dstates << states
|
105
|
+
@unmarked.push(@deterministic.new_state)
|
106
|
+
accepts = states.find_all {|x| @accepts.include? x}
|
107
|
+
@accept_states[@unmarked.last] = accepts unless accepts.empty?
|
108
|
+
@unmarked.last
|
109
|
+
end
|
110
|
+
|
111
|
+
def closure states
|
112
|
+
closure, unvisited = states.dup, states.dup
|
113
|
+
until unvisited.empty? do
|
114
|
+
@nondeterministic.table[unvisited.pop].each do |tran|
|
115
|
+
if tran.character_set == Epsilon && !closure.include?(tran.destination)
|
116
|
+
closure << tran.destination
|
117
|
+
unvisited << tran.destination
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
closure.sort!
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
class HopcroftMinimizer
|
126
|
+
def initialize deterministic, accepts
|
127
|
+
@deterministic, @accepts = deterministic, accepts
|
128
|
+
end
|
129
|
+
|
130
|
+
def minimize
|
131
|
+
make_initial_partitions
|
132
|
+
refine_partitions
|
133
|
+
automata, accepts = Aurum::Grammar::LexicalRules::Automata.new(@partitions.size), {}
|
134
|
+
choose_representatives do |representative, index|
|
135
|
+
@deterministic.table[representative].each do |transition|
|
136
|
+
automata.connect(index, transition.character_set, partition_contains(transition.destination))
|
137
|
+
end
|
138
|
+
accepts[index] = @accepts[representative] if @accepts.has_key? representative
|
139
|
+
end
|
140
|
+
automata.remove_dead_states(accepts.keys)
|
141
|
+
return automata, accepts
|
142
|
+
end
|
143
|
+
|
144
|
+
private
|
145
|
+
def make_initial_partitions
|
146
|
+
partitions = {}
|
147
|
+
@accepts.each do |state, action|
|
148
|
+
partitions[action] = [] unless partitions.has_key? action
|
149
|
+
partitions[action] << state
|
150
|
+
end
|
151
|
+
@partitions = [[0], @deterministic.all_states - @accepts.keys - [0]] + partitions.values
|
152
|
+
@partitions.delete []
|
153
|
+
end
|
154
|
+
|
155
|
+
def refine_partitions
|
156
|
+
reverse_automata, working_list = @deterministic.reverse, @partitions.dup
|
157
|
+
until working_list.empty?
|
158
|
+
reverse_automata.alphabet(working_list.pop) do |ia, symbols|
|
159
|
+
@partitions.each do |r|
|
160
|
+
unless r.empty?
|
161
|
+
r1, r2 = r & ia, r - ia
|
162
|
+
unless r2.empty? || r2 == r
|
163
|
+
replace(@partitions, r => [r1, r2])
|
164
|
+
if working_list.include?(r)
|
165
|
+
replace(working_list, r => [r1, r2])
|
166
|
+
else
|
167
|
+
working_list << (r1.size <= r2.size ? r1 : r2)
|
168
|
+
end
|
169
|
+
working_list.uniq!
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
def choose_representatives
|
178
|
+
@partitions.each_with_index {|partition, index| yield partition.first, index}
|
179
|
+
end
|
180
|
+
|
181
|
+
def partition_contains state
|
182
|
+
@partitions.each_with_index {|partition, index| return index if partition.include? state}
|
183
|
+
end
|
184
|
+
|
185
|
+
def replace array, replacements
|
186
|
+
replacements.each do |old, new|
|
187
|
+
array.delete old
|
188
|
+
new.each {|x| array << x}
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|
192
|
+
end
|
193
|
+
end
|
194
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
module Aurum
|
2
|
+
module Builder
|
3
|
+
StartSymbol = Aurum::Grammar.nonterminal('$start')
|
4
|
+
|
5
|
+
def Builder.working_list unvisited
|
6
|
+
yield unvisited, unvisited.pop while !unvisited.empty?
|
7
|
+
end
|
8
|
+
|
9
|
+
class AugmentedGrammar
|
10
|
+
attr_reader :start_production, :all_productions
|
11
|
+
def initialize syntax_rules, start_symbol
|
12
|
+
@syntax_rules, @start_symbol = syntax_rules, start_symbol
|
13
|
+
@start_production = Aurum::Grammar.production(StartSymbol, [start_symbol])
|
14
|
+
@all_productions = [@start_production]
|
15
|
+
@nonterminals = [StartSymbol, @start_symbol].to_set
|
16
|
+
@nullables = {StartSymbol => false, @start_symbol => false}
|
17
|
+
@first_sets = {StartSymbol => [].to_set, @start_symbol => [].to_set}
|
18
|
+
initialize_for_used_symbols
|
19
|
+
end
|
20
|
+
|
21
|
+
def nullable? symbol
|
22
|
+
@nullables[symbol]
|
23
|
+
end
|
24
|
+
|
25
|
+
def first_set symbol
|
26
|
+
@first_sets[symbol]
|
27
|
+
end
|
28
|
+
|
29
|
+
def productions(nonterminal = nil)
|
30
|
+
return [@start_production] if nonterminal == StartSymbol
|
31
|
+
@syntax_rules.productions(nonterminal)
|
32
|
+
end
|
33
|
+
|
34
|
+
def compute_nullables
|
35
|
+
fixed_point do |nonterminal|
|
36
|
+
break true if nullable? nonterminal
|
37
|
+
@nullables[nonterminal] = productions(nonterminal).any? {|prod| production_nullable?(prod)}
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def compute_first_sets
|
42
|
+
fixed_point do |nonterminal|
|
43
|
+
for production in productions(nonterminal)
|
44
|
+
for symbol in production.symbols
|
45
|
+
is_updated = @first_sets[symbol].inject(false) {|r, s| r |= @first_sets[nonterminal].add?(s) }
|
46
|
+
break unless nullable?(symbol) and !is_updated
|
47
|
+
end
|
48
|
+
end
|
49
|
+
is_updated
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
private
|
54
|
+
def fixed_point
|
55
|
+
changed = true
|
56
|
+
while changed
|
57
|
+
changed = false
|
58
|
+
@nonterminals.each {| nonterminal | changed |= yield(nonterminal)}
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def initialize_for_used_symbols
|
63
|
+
Builder.working_list [@start_symbol] do |unvisited, visiting|
|
64
|
+
for production in @syntax_rules.productions(visiting)
|
65
|
+
@all_productions << production
|
66
|
+
for symbol in production.symbols
|
67
|
+
if symbol.is_terminal
|
68
|
+
@nullables[symbol], @first_sets[symbol] = false, [symbol].to_set unless @nullables.has_key?(symbol)
|
69
|
+
elsif @nonterminals.add?(symbol)
|
70
|
+
unvisited << symbol
|
71
|
+
@nullables[symbol], @first_sets[symbol] = false, [].to_set
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def production_nullable? production
|
79
|
+
production.symbols.all? {|symbol| nullable? symbol}
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
module Aurum
|
2
|
+
module Builder
|
3
|
+
class DotLogger
|
4
|
+
attr_reader :dot_graphs
|
5
|
+
def initialize
|
6
|
+
@dot_graphs = {}
|
7
|
+
end
|
8
|
+
|
9
|
+
def productions productions
|
10
|
+
@productions = productions.to_a
|
11
|
+
end
|
12
|
+
|
13
|
+
def parsing_table_states name, states
|
14
|
+
dot = %Q_digraph grammar {\nrankdir=LR;\n_
|
15
|
+
@productions.each_with_index {|production, index| dot << production_to_dot(production, index)}
|
16
|
+
states.each {|state| dot << state_to_dot(state)}
|
17
|
+
dot << "}\n"
|
18
|
+
@dot_graphs[name] = dot
|
19
|
+
end
|
20
|
+
|
21
|
+
def lexical_table_automata name, automata, accepts
|
22
|
+
dot = %Q_digraph grammar {\nrankdir=LR;\n_
|
23
|
+
accepts.each {|state, action| dot << lexical_action_to_dot(action, state)}
|
24
|
+
automata.table.each_with_index do |transitions, index|
|
25
|
+
dot << %Q{#{index} [shape = circle, color = palegreen, style = filled]\n}
|
26
|
+
dot << %Q{#{index} -> action_#{index}\n} if accepts.has_key?(index)
|
27
|
+
transitions.each {|transition| dot << %Q{#{index} -> #{transition.destination} [label="#{transition.character_set.inspect}"]\n} }
|
28
|
+
end
|
29
|
+
dot << "}\n"
|
30
|
+
@dot_graphs[name] = dot
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
def state_to_dot state
|
35
|
+
dot = %Q{state_#{state.index} [label = "#{state.empty? ? 'lookahead' : state.all_items.join('\n')}"]\n}
|
36
|
+
dot << %Q{state_#{state.index} [shape = circle, color = #{state.conflict? ? 'maroon' : (state.inconsistent? ? 'yellow' : (state.empty? ? 'lightblue' : 'palegreen'))}, style = filled]\n}
|
37
|
+
for symbol, actions in state.actions
|
38
|
+
for action in actions do
|
39
|
+
dot << %Q{state_#{state.index} -> #{action_to_dot(action)} [label="#{symbol.inspect}"];\n}
|
40
|
+
end
|
41
|
+
end
|
42
|
+
dot
|
43
|
+
end
|
44
|
+
|
45
|
+
def production_to_dot production, index
|
46
|
+
%Q{production_#{index} [label = "#{production.inspect}", color = lightpink, style = filled, shape = octagon];\n}
|
47
|
+
end
|
48
|
+
|
49
|
+
def action_to_dot action
|
50
|
+
if action.is_a?(Aurum::ParsingTable::ShiftAction)
|
51
|
+
"state_#{action.state.index}"
|
52
|
+
else
|
53
|
+
"production_#{@productions.index(action.handle)}"
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def lexical_action_to_dot action, state
|
58
|
+
action_attr = []
|
59
|
+
action_attr << "recognize '#{action.token}'" if action.token
|
60
|
+
action_attr << "shift to '#{action.state}'\n" if action.state
|
61
|
+
action_attr << "notify event '#{action.event}'\n" if action.event
|
62
|
+
%Q{action_#{state} [label = "#{action_attr.join("\n")}", color = lightpink, style = filled, shape = octagon];\n}
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
module Aurum
|
2
|
+
module Builder
|
3
|
+
def Builder.build_lexical_table(lexical_rules, states, literals, logger = nil)
|
4
|
+
LexicalTableBuilder.new(lexical_rules, states, literals).build
|
5
|
+
end
|
6
|
+
|
7
|
+
class LexicalTableBuilder
|
8
|
+
def initialize lexical_rules, states, literals
|
9
|
+
@patterns, @literals = lexical_rules.patterns, literals
|
10
|
+
@lexical_states = @patterns.keys.to_a - ['all']
|
11
|
+
@literal_states, @literal_patterns, @common_patterns = states, {}, @patterns['all'] || {}
|
12
|
+
for literal in literals
|
13
|
+
pattern = Aurum::Grammar::LexicalRules::Pattern.string(literal.to_s)
|
14
|
+
@literal_patterns[pattern] = Aurum::LexicalTable::Action.new("$literal_#{literal}")
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def build
|
19
|
+
automata = Aurum::Grammar::LexicalRules::Automata.new
|
20
|
+
start, accepts, actions = automata.new_state, {}, {}
|
21
|
+
@lexical_states.each_with_index do |lexical_state, index|
|
22
|
+
lexical_state_start = automata.new_state
|
23
|
+
automata.connect(start, Aurum::Grammar::LexicalRules::CharacterSet.range(-index - 1), lexical_state_start)
|
24
|
+
[@patterns[lexical_state], @common_patterns, literals_in(lexical_state)].each do |patterns|
|
25
|
+
for pattern, action in patterns
|
26
|
+
pattern_start = automata.merge!(pattern.automata)
|
27
|
+
automata.connect(lexical_state_start, Aurum::Grammar::LexicalRules::Epsilon, pattern_start)
|
28
|
+
accepts[pattern_start + pattern.accept] = action
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
automata, dfa_accepts = Aurum::Grammar::LexicalRules::SubsetDeterminizer.new(automata, accepts.keys).determinize
|
33
|
+
for dfa_accept, nfa_accepts in dfa_accepts
|
34
|
+
lexical_actions = nfa_accepts.inject([]) {|result, state| result << accepts[state]}
|
35
|
+
actions[dfa_accept] = resolve(lexical_actions.uniq)
|
36
|
+
end
|
37
|
+
automata, actions = Aurum::Grammar::LexicalRules::HopcroftMinimizer.new(automata, actions).minimize
|
38
|
+
Aurum::LexicalTable.new(automata.table, @lexical_states, actions)
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
def resolve lexical_actions
|
43
|
+
return lexical_actions.first if lexical_actions.size == 1
|
44
|
+
candidates = lexical_actions.find_all {|action| action.token =~ /^\$literal_/}
|
45
|
+
return candidates.first if candidates.size == 1
|
46
|
+
lexical_actions.each {|action| p action}
|
47
|
+
raise 'conflict'
|
48
|
+
end
|
49
|
+
|
50
|
+
def literals_in state
|
51
|
+
(@literal_states.include?('all') || @literal_states.include?(state)) ? @literal_patterns : {}
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,238 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'augmented_grammar')
|
2
|
+
require File.join(File.dirname(__FILE__), 'set_of_items')
|
3
|
+
|
4
|
+
module Aurum
|
5
|
+
module Builder
|
6
|
+
def Builder.build_parsing_table(syntax_rules, start_symbol, logger = nil)
|
7
|
+
augmented_grammar = AugmentedGrammar.new(syntax_rules, start_symbol)
|
8
|
+
ParsingTableBuilder.new(augmented_grammar, logger).build
|
9
|
+
end
|
10
|
+
|
11
|
+
class ParsingTableBuilder
|
12
|
+
def initialize augmented_grammar, logger
|
13
|
+
@augmented_grammar, @logger = augmented_grammar, logger
|
14
|
+
@lookahead_level, @states = 0, []
|
15
|
+
@inconsistent_states = []
|
16
|
+
end
|
17
|
+
|
18
|
+
def build
|
19
|
+
construct_automata
|
20
|
+
unless @inconsistent_states.empty?
|
21
|
+
@augmented_grammar.compute_nullables
|
22
|
+
@augmented_grammar.compute_first_sets
|
23
|
+
@conflict_states = [].to_set
|
24
|
+
@follow_set_calculator = DigraphTraverser.new do |config|
|
25
|
+
(config.symbol == StartSymbol) ? [false, [Aurum::Grammar::EOF], 65535] : [true, nil, nil]
|
26
|
+
end
|
27
|
+
for inconsistent_state in @inconsistent_states do
|
28
|
+
determine_lookaheads_for(inconsistent_state)
|
29
|
+
@conflict_states << inconsistent_state if inconsistent_state.conflict?
|
30
|
+
end
|
31
|
+
unless @conflict_states.empty?
|
32
|
+
for conflict_state in @conflict_states do
|
33
|
+
for lookahead in conflict_state.conflicts do
|
34
|
+
@state_lookahead_level = 2
|
35
|
+
resolve_conflicts_for(conflict_state, lookahead, Sources.new(conflict_state, lookahead))
|
36
|
+
@lookahead_level = @state_lookahead_level if @state_lookahead_level > @lookahead_level
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
construct_parsing_table
|
42
|
+
end
|
43
|
+
|
44
|
+
private
|
45
|
+
def construct_automata
|
46
|
+
item = LRItem.new(@augmented_grammar.start_production)
|
47
|
+
add_state(start_state = State.new(@augmented_grammar, [item]))
|
48
|
+
Builder.working_list [start_state] do |unvisited, visiting|
|
49
|
+
for symbol in visiting.expect_symbols
|
50
|
+
new_state = visiting.goto(symbol)
|
51
|
+
if production = new_state.read_reduce
|
52
|
+
visiting.add_read_reduce(symbol, production)
|
53
|
+
else
|
54
|
+
is_new_state, new_state = add_state(new_state)
|
55
|
+
unvisited << new_state if is_new_state
|
56
|
+
visiting.add_shift(symbol, new_state)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def determine_lookaheads_for state
|
63
|
+
@lookahead_level = 1
|
64
|
+
for item in state.handles
|
65
|
+
lookahead_each(state, item.production) do |production, symbol|
|
66
|
+
if symbol.precedence
|
67
|
+
if production.precedence > symbol.precedence
|
68
|
+
state.actions[symbol].clear
|
69
|
+
state.add_reduce(symbol, production)
|
70
|
+
end
|
71
|
+
else
|
72
|
+
state.add_reduce(symbol, production)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def resolve_conflicts_for state, lookahead, sources
|
79
|
+
lookahead_state = add_state(State.new(@augmented_grammar, [])).last
|
80
|
+
state.add_lookahead_shift(lookahead, lookahead_state)
|
81
|
+
sources.each_pair do |action, stacks|
|
82
|
+
for stack in stacks
|
83
|
+
raise 'not LALR(n)' if sources.stack_seen.include? stack
|
84
|
+
sources.stack_seen << stack
|
85
|
+
next_lookaheads(stack, lookahead).each {|next_la| lookahead_state.add_action(next_la, action)}
|
86
|
+
end
|
87
|
+
end
|
88
|
+
for symbol in lookahead_state.conflicts
|
89
|
+
@state_lookahead_level += 1
|
90
|
+
resolve_conflicts_for(lookahead_state, symbol, Sources.new(lookahead_state, lookahead, sources, symbol))
|
91
|
+
end if lookahead_state.conflict?
|
92
|
+
end
|
93
|
+
|
94
|
+
def construct_parsing_table
|
95
|
+
action_table = @states.map do |state|
|
96
|
+
actions = {}
|
97
|
+
state.actions.each { |symbol, action| actions[symbol.name] = action.first}
|
98
|
+
actions
|
99
|
+
end
|
100
|
+
Aurum::ParsingTable.new(@augmented_grammar.all_productions, action_table, @lookahead_level)
|
101
|
+
end
|
102
|
+
|
103
|
+
def next_lookaheads stack, lookahead
|
104
|
+
Aurum::Grammar::EOF == lookahead and return [Aurum::Grammar::EOF]
|
105
|
+
top = stack.last
|
106
|
+
lookaheads = top.read_set(lookahead)
|
107
|
+
top.reducable_items(lookahead) do |production, position|
|
108
|
+
c = production.nonterminal
|
109
|
+
if position < stack.length
|
110
|
+
lookaheads |= next_lookaheads stack.slice(0..stack.length-position-1), c
|
111
|
+
else
|
112
|
+
first_part = production.symbols.slice 0..-stack.length-1
|
113
|
+
stack[0].predecessors(first_part).each {|q| lookaheads |= @follow_set_calculator.traverse(Configuration.new(q, c))}
|
114
|
+
end unless c.name == '$start'
|
115
|
+
end
|
116
|
+
lookaheads
|
117
|
+
end
|
118
|
+
|
119
|
+
def lookahead_each state, production
|
120
|
+
for predecessor in state.predecessors(production.symbols)
|
121
|
+
config = Configuration.new(predecessor, production.nonterminal)
|
122
|
+
@follow_set_calculator.traverse(config).each {|symbol| yield production, symbol}
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
def add_state state
|
127
|
+
index = @states.index(state)
|
128
|
+
return [false, @states[index]] if index && !state.empty?
|
129
|
+
state.index = @states.size
|
130
|
+
@states << state
|
131
|
+
@inconsistent_states << state if state.inconsistent?
|
132
|
+
[true, state]
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
class Configuration < Struct.new(:state, :symbol)
|
137
|
+
def map_to_set
|
138
|
+
state.read_set(symbol)
|
139
|
+
end
|
140
|
+
|
141
|
+
def result_each
|
142
|
+
state.include_each(symbol) {|state, symbol| yield Configuration.new(state, symbol)}
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
class DigraphTraverser
|
147
|
+
def initialize &condition
|
148
|
+
@indicitor, @result, @stack = {}, {}, []
|
149
|
+
@condition = condition
|
150
|
+
end
|
151
|
+
|
152
|
+
def traverse node
|
153
|
+
return @result[node] if @indicitor[node]
|
154
|
+
to_be_continued, @result[node], @indicitor[node] = @condition.call(node)
|
155
|
+
if to_be_continued
|
156
|
+
@stack.push(node)
|
157
|
+
@indicitor[node] = (cycle_indicitor = @stack.length)
|
158
|
+
@result[node] = node.map_to_set
|
159
|
+
node.result_each do |new_node|
|
160
|
+
traverse(new_node) unless @indicitor[new_node]
|
161
|
+
@indicitor[node] = [@indicitor[node], @indicitor[new_node]].min
|
162
|
+
@result[node] |= @result[new_node]
|
163
|
+
end
|
164
|
+
node_in_cycle = nil
|
165
|
+
until node_in_cycle == node
|
166
|
+
node_in_cycle = @stack.pop
|
167
|
+
@result[node_in_cycle] = @result[node].dup
|
168
|
+
@indicitor[node_in_cycle] = 65535
|
169
|
+
end if @indicitor[node] == cycle_indicitor
|
170
|
+
end
|
171
|
+
@result[node]
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
class Sources < DelegateClass(Hash)
|
176
|
+
attr_reader :stack_seen
|
177
|
+
def initialize(state, symbol, sources = nil, lookahead = nil)
|
178
|
+
@sources, @stack_seen = {}, []
|
179
|
+
for action in state.actions[symbol]
|
180
|
+
@sources[action] = [].to_set
|
181
|
+
if sources
|
182
|
+
@sources[action] = initialize_lookahead_state(sources[action], symbol, lookahead)
|
183
|
+
else
|
184
|
+
@sources[action] = initialize_state(state, action, symbol)
|
185
|
+
end
|
186
|
+
end
|
187
|
+
super(@sources)
|
188
|
+
end
|
189
|
+
|
190
|
+
private
|
191
|
+
def initialize_state state, action, symbol
|
192
|
+
if action.shift_action? || action.read_reduce?
|
193
|
+
stacks = [[state]].to_set
|
194
|
+
else
|
195
|
+
stacks = [].to_set
|
196
|
+
for predecessor in state.predecessors(action.value.symbols)
|
197
|
+
@visited = [].to_set
|
198
|
+
stacks |= follow_sources([predecessor], action.value.nonterminal, symbol)
|
199
|
+
end
|
200
|
+
end
|
201
|
+
stacks
|
202
|
+
end
|
203
|
+
def initialize_lookahead_state stacks, nonterminal, lookahead
|
204
|
+
new_stacks = [].to_set
|
205
|
+
for stack in stacks
|
206
|
+
@visited = [].to_set
|
207
|
+
new_stacks |= follow_sources(stack, nonterminal, lookahead)
|
208
|
+
end
|
209
|
+
new_stacks
|
210
|
+
end
|
211
|
+
def follow_sources(stack, nonterminal, lookahead)
|
212
|
+
top = stack.last
|
213
|
+
if stack.length == 1
|
214
|
+
config = Configuration.new(top, nonterminal)
|
215
|
+
@visited.include?(config) and return []
|
216
|
+
@visited << config
|
217
|
+
end
|
218
|
+
stacks = [].to_set
|
219
|
+
top.read(nonterminal) do |state, direct, indirect|
|
220
|
+
stacks << [state] if direct.include?(lookahead)
|
221
|
+
for indirect_read in indirect
|
222
|
+
stacks |= follow_sources(stack + [state], indirect_read, lookahead)
|
223
|
+
end
|
224
|
+
end
|
225
|
+
top.read_reduce_items(nonterminal) do |production, position|
|
226
|
+
c = production.nonterminal
|
227
|
+
if position < stack.length
|
228
|
+
stacks |= follow_sources stack.slice(0..stack.length-position-1), c, lookahead
|
229
|
+
else
|
230
|
+
first_part = production.symbols.slice 0..-stack.length-1
|
231
|
+
stack[0].predecessors(first_part).each {|q| stacks |= follow_sources([q], c, lookahead)}
|
232
|
+
end unless c == StartSymbol
|
233
|
+
end
|
234
|
+
stacks
|
235
|
+
end
|
236
|
+
end
|
237
|
+
end
|
238
|
+
end
|