rusa 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +175 -0
- data/Rakefile +26 -0
- data/Steepfile +9 -0
- data/examples/calc.rb +29 -0
- data/examples/json.rb +55 -0
- data/examples/mini_lang.rb +52 -0
- data/exe/rusa +6 -0
- data/lib/rusa/analysis/automaton.rb +60 -0
- data/lib/rusa/analysis/conflict_resolver.rb +211 -0
- data/lib/rusa/analysis/first_follow.rb +106 -0
- data/lib/rusa/analysis/item.rb +51 -0
- data/lib/rusa/analysis/item_set.rb +64 -0
- data/lib/rusa/analysis/lalr_table.rb +460 -0
- data/lib/rusa/analysis/parse_action.rb +81 -0
- data/lib/rusa/cli.rb +188 -0
- data/lib/rusa/errors.rb +12 -0
- data/lib/rusa/generator/code_generator.rb +334 -0
- data/lib/rusa/grammar/action_capture.rb +128 -0
- data/lib/rusa/grammar/dsl.rb +123 -0
- data/lib/rusa/grammar/grammar.rb +212 -0
- data/lib/rusa/grammar/precedence.rb +29 -0
- data/lib/rusa/grammar/rule.rb +55 -0
- data/lib/rusa/grammar/symbol.rb +71 -0
- data/lib/rusa/version.rb +5 -0
- data/lib/rusa.rb +31 -0
- data/sig/generated/rusa/analysis/automaton.rbs +25 -0
- data/sig/generated/rusa/analysis/conflict_resolver.rbs +57 -0
- data/sig/generated/rusa/analysis/first_follow.rbs +33 -0
- data/sig/generated/rusa/analysis/item.rbs +35 -0
- data/sig/generated/rusa/analysis/item_set.rbs +31 -0
- data/sig/generated/rusa/analysis/lalr_table.rbs +182 -0
- data/sig/generated/rusa/analysis/parse_action.rbs +58 -0
- data/sig/generated/rusa/cli.rbs +68 -0
- data/sig/generated/rusa/errors.rbs +24 -0
- data/sig/generated/rusa/generator/code_generator.rbs +82 -0
- data/sig/generated/rusa/grammar/action_capture.rbs +46 -0
- data/sig/generated/rusa/grammar/dsl.rbs +62 -0
- data/sig/generated/rusa/grammar/grammar.rbs +103 -0
- data/sig/generated/rusa/grammar/precedence.rbs +23 -0
- data/sig/generated/rusa/grammar/rule.rbs +35 -0
- data/sig/generated/rusa/grammar/symbol.rbs +51 -0
- data/sig/generated/rusa/version.rbs +5 -0
- data/sig/generated/rusa.rbs +6 -0
- data/test/test_automaton.rb +27 -0
- data/test/test_code_generator.rb +74 -0
- data/test/test_dsl.rb +77 -0
- data/test/test_e2e.rb +134 -0
- data/test/test_first_follow.rb +70 -0
- data/test/test_grammar_model.rb +60 -0
- data/test/test_helper.rb +6 -0
- data/test/test_lalr_table.rb +64 -0
- metadata +96 -0
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "set"
|
|
4
|
+
|
|
5
|
+
module Rusa
|
|
6
|
+
module Analysis
|
|
7
|
+
# FIRST/FOLLOW sets drive lookahead computation for parser construction.
|
|
8
|
+
class FirstFollow
|
|
9
|
+
EMPTY = :$empty
|
|
10
|
+
|
|
11
|
+
attr_reader :first_sets #: Hash[Symbol, Set[Symbol]]
|
|
12
|
+
attr_reader :follow_sets #: Hash[Symbol, Set[Symbol]]
|
|
13
|
+
|
|
14
|
+
#: (Grammar::Grammar) -> void
|
|
15
|
+
def initialize(grammar)
|
|
16
|
+
@grammar = grammar
|
|
17
|
+
@first_sets = {} #: Hash[Symbol, Set[Symbol]]
|
|
18
|
+
@follow_sets = {} #: Hash[Symbol, Set[Symbol]]
|
|
19
|
+
initialize_sets
|
|
20
|
+
compute_first_sets
|
|
21
|
+
compute_follow_sets
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
#: (Array[Symbol]) -> Set[Symbol]
|
|
25
|
+
def first_of_sequence(symbols)
|
|
26
|
+
return Set[EMPTY] if symbols.empty?
|
|
27
|
+
|
|
28
|
+
result = Set.new
|
|
29
|
+
|
|
30
|
+
symbols.each do |symbol|
|
|
31
|
+
first = first_sets.fetch(symbol) { Set[symbol] }
|
|
32
|
+
result.merge(first - Set[EMPTY])
|
|
33
|
+
return result unless first.include?(EMPTY)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
result.add(EMPTY)
|
|
37
|
+
result
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
private
|
|
41
|
+
|
|
42
|
+
attr_reader :grammar #: Grammar::Grammar
|
|
43
|
+
|
|
44
|
+
#: () -> void
|
|
45
|
+
def initialize_sets
|
|
46
|
+
grammar.terminals.each_key do |name|
|
|
47
|
+
first_sets[name] = Set[name]
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
grammar.nonterminals.each_key do |name|
|
|
51
|
+
first_sets[name] = Set.new
|
|
52
|
+
follow_sets[name] = Set.new
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
#: () -> void
|
|
57
|
+
def compute_first_sets
|
|
58
|
+
loop do
|
|
59
|
+
changed = false
|
|
60
|
+
|
|
61
|
+
grammar.productions.each do |production|
|
|
62
|
+
additions = if production.empty?
|
|
63
|
+
Set[EMPTY]
|
|
64
|
+
else
|
|
65
|
+
first_of_sequence(production.rhs)
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
before = first_sets[production.lhs].dup
|
|
69
|
+
first_sets[production.lhs].merge(additions)
|
|
70
|
+
changed ||= before != first_sets[production.lhs]
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
break unless changed
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
#: () -> void
|
|
78
|
+
def compute_follow_sets
|
|
79
|
+
start_symbol = grammar.start_symbol or raise NoStartSymbolError, "start symbol is not set"
|
|
80
|
+
follow_sets[start_symbol].add(Grammar::Grammar::END_OF_INPUT)
|
|
81
|
+
|
|
82
|
+
loop do
|
|
83
|
+
changed = false
|
|
84
|
+
|
|
85
|
+
grammar.productions.each do |production|
|
|
86
|
+
production.rhs.each_with_index do |symbol, index|
|
|
87
|
+
next unless grammar.nonterminal?(symbol)
|
|
88
|
+
|
|
89
|
+
suffix = production.rhs[(index + 1)..] || []
|
|
90
|
+
suffix_first = first_of_sequence(suffix)
|
|
91
|
+
|
|
92
|
+
before = follow_sets[symbol].dup
|
|
93
|
+
follow_sets[symbol].merge(suffix_first - Set[EMPTY])
|
|
94
|
+
if suffix.empty? || suffix_first.include?(EMPTY)
|
|
95
|
+
follow_sets[symbol].merge(follow_sets[production.lhs])
|
|
96
|
+
end
|
|
97
|
+
changed ||= before != follow_sets[symbol]
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
break unless changed
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
end
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Rusa
|
|
4
|
+
module Analysis
|
|
5
|
+
# Item represents a single LR(0) production with a dot position.
|
|
6
|
+
class Item
|
|
7
|
+
attr_reader :production #: Grammar::Production
|
|
8
|
+
attr_reader :dot #: Integer
|
|
9
|
+
|
|
10
|
+
#: (Grammar::Production, ?Integer) -> void
|
|
11
|
+
def initialize(production, dot = 0)
|
|
12
|
+
@production = production
|
|
13
|
+
@dot = dot
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
#: () -> Symbol?
|
|
17
|
+
def next_symbol
|
|
18
|
+
production.rhs[dot]
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
#: () -> Item
|
|
22
|
+
def advance
|
|
23
|
+
self.class.new(production, dot + 1)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
#: () -> bool
|
|
27
|
+
def reduce?
|
|
28
|
+
dot >= production.rhs.length
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
#: (Object) -> bool
|
|
32
|
+
def ==(other)
|
|
33
|
+
other.is_a?(self.class) && other.production.id == production.id && other.dot == dot
|
|
34
|
+
end
|
|
35
|
+
alias eql? ==
|
|
36
|
+
|
|
37
|
+
#: () -> Integer
|
|
38
|
+
def hash
|
|
39
|
+
[production.id, dot].hash
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
#: () -> String
|
|
43
|
+
def to_s
|
|
44
|
+
rhs = production.rhs.dup
|
|
45
|
+
rhs.insert(dot, :"·")
|
|
46
|
+
rendered_rhs = rhs.empty? ? "·" : rhs.join(" ")
|
|
47
|
+
"#{production.lhs} -> #{rendered_rhs}"
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "set"
|
|
4
|
+
|
|
5
|
+
module Rusa
|
|
6
|
+
module Analysis
|
|
7
|
+
# ItemSet is a parser state identified by its LR(0) kernel items.
|
|
8
|
+
class ItemSet
|
|
9
|
+
attr_reader :id #: Integer
|
|
10
|
+
attr_reader :items #: Set[Item]
|
|
11
|
+
attr_reader :kernel_items #: Set[Item]
|
|
12
|
+
|
|
13
|
+
#: (Integer, Enumerable[Item], Grammar::Grammar) -> void
|
|
14
|
+
def initialize(id, kernel_items, grammar)
|
|
15
|
+
@id = id
|
|
16
|
+
@kernel_items = kernel_items.to_set.freeze
|
|
17
|
+
@items = self.class.closure(@kernel_items, grammar).freeze
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
#: (Object) -> bool
|
|
21
|
+
def ==(other)
|
|
22
|
+
other.is_a?(self.class) && other.kernel_items == kernel_items
|
|
23
|
+
end
|
|
24
|
+
alias eql? ==
|
|
25
|
+
|
|
26
|
+
#: () -> Integer
|
|
27
|
+
def hash
|
|
28
|
+
kernel_items.hash
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
#: () -> String
|
|
32
|
+
def to_s
|
|
33
|
+
lines = items.to_a.sort_by { |item| [item.production.id, item.dot] }.map(&:to_s)
|
|
34
|
+
"State #{id}\n#{lines.join("\n")}"
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
#: (Enumerable[Item], Grammar::Grammar) -> Set[Item]
|
|
38
|
+
def self.closure(kernel_items, grammar)
|
|
39
|
+
result = kernel_items.to_a.to_set
|
|
40
|
+
|
|
41
|
+
loop do
|
|
42
|
+
changed = false
|
|
43
|
+
|
|
44
|
+
result.to_a.each do |item|
|
|
45
|
+
symbol = item.next_symbol
|
|
46
|
+
next unless symbol && grammar.nonterminal?(symbol)
|
|
47
|
+
|
|
48
|
+
grammar.productions_for(symbol).each do |production|
|
|
49
|
+
candidate = Item.new(production, 0)
|
|
50
|
+
next if result.include?(candidate)
|
|
51
|
+
|
|
52
|
+
result.add(candidate)
|
|
53
|
+
changed = true
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
break unless changed
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
result
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
@@ -0,0 +1,460 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "set"
|
|
4
|
+
|
|
5
|
+
module Rusa
|
|
6
|
+
module Analysis
|
|
7
|
+
# LALRTable merges canonical LR(1) states with the same LR(0) core.
|
|
8
|
+
class LALRTable
|
|
9
|
+
class LR1Item
|
|
10
|
+
attr_reader :production #: Grammar::Production
|
|
11
|
+
attr_reader :dot #: Integer
|
|
12
|
+
attr_reader :lookahead #: Symbol
|
|
13
|
+
|
|
14
|
+
#: (Grammar::Production, Integer, Symbol) -> void
|
|
15
|
+
def initialize(production, dot, lookahead)
|
|
16
|
+
@production = production
|
|
17
|
+
@dot = dot
|
|
18
|
+
@lookahead = lookahead
|
|
19
|
+
freeze
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
#: () -> Symbol?
|
|
23
|
+
def next_symbol
|
|
24
|
+
production.rhs[dot]
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
#: () -> LR1Item
|
|
28
|
+
def advance
|
|
29
|
+
self.class.new(production, dot + 1, lookahead)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
#: () -> bool
|
|
33
|
+
def reduce?
|
|
34
|
+
dot >= production.rhs.length
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
#: () -> [Integer?, Integer]
|
|
38
|
+
def core
|
|
39
|
+
[production.id, dot]
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
#: (Object) -> bool
|
|
43
|
+
def ==(other)
|
|
44
|
+
other.is_a?(self.class) &&
|
|
45
|
+
other.production.id == production.id &&
|
|
46
|
+
other.dot == dot &&
|
|
47
|
+
other.lookahead == lookahead
|
|
48
|
+
end
|
|
49
|
+
alias eql? ==
|
|
50
|
+
|
|
51
|
+
#: () -> Integer
|
|
52
|
+
def hash
|
|
53
|
+
[production.id, dot, lookahead].hash
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
class MergedItem
|
|
58
|
+
attr_reader :production #: Grammar::Production
|
|
59
|
+
attr_reader :dot #: Integer
|
|
60
|
+
attr_reader :lookaheads #: Set[Symbol]
|
|
61
|
+
|
|
62
|
+
#: (Grammar::Production, Integer, Set[Symbol]) -> void
|
|
63
|
+
def initialize(production, dot, lookaheads)
|
|
64
|
+
@production = production
|
|
65
|
+
@dot = dot
|
|
66
|
+
@lookaheads = lookaheads
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
#: () -> Symbol?
|
|
70
|
+
def next_symbol
|
|
71
|
+
production.rhs[dot]
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
#: () -> bool
|
|
75
|
+
def reduce?
|
|
76
|
+
dot >= production.rhs.length
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
#: () -> [Integer?, Integer]
|
|
80
|
+
def core
|
|
81
|
+
[production.id, dot]
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
class MergedState
|
|
86
|
+
attr_reader :id #: Integer
|
|
87
|
+
attr_reader :items #: Array[MergedItem]
|
|
88
|
+
|
|
89
|
+
#: (Integer, Array[MergedItem]) -> void
|
|
90
|
+
def initialize(id, items)
|
|
91
|
+
@id = id
|
|
92
|
+
@items = items.freeze
|
|
93
|
+
freeze
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
attr_reader :action_table #: Hash[Integer, Hash[Symbol, Shift | Reduce | Accept]]
|
|
98
|
+
attr_reader :goto_table #: Hash[Integer, Hash[Symbol, Integer]]
|
|
99
|
+
attr_reader :conflicts #: Array[ConflictReport]
|
|
100
|
+
attr_reader :states #: Array[MergedState]
|
|
101
|
+
attr_reader :transitions #: Hash[Integer, Hash[Symbol, Integer]]
|
|
102
|
+
|
|
103
|
+
#: (Grammar::Grammar, ?first_follow: FirstFollow) -> void
|
|
104
|
+
def initialize(grammar, first_follow: FirstFollow.new(grammar))
|
|
105
|
+
@grammar = grammar
|
|
106
|
+
@first_follow = first_follow
|
|
107
|
+
@resolver = ConflictResolver.new
|
|
108
|
+
@action_table = Hash.new do |hash, key|
|
|
109
|
+
hash[key] = {} #: Hash[Symbol, Shift | Reduce | Accept]
|
|
110
|
+
end
|
|
111
|
+
@goto_table = Hash.new do |hash, key|
|
|
112
|
+
hash[key] = {} #: Hash[Symbol, Integer]
|
|
113
|
+
end
|
|
114
|
+
@conflicts = []
|
|
115
|
+
@states = []
|
|
116
|
+
@transitions = Hash.new do |hash, key|
|
|
117
|
+
hash[key] = {} #: Hash[Symbol, Integer]
|
|
118
|
+
end
|
|
119
|
+
build_tables
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
#: () -> String
|
|
123
|
+
def report
|
|
124
|
+
lines = [] #: Array[String]
|
|
125
|
+
lines << "states: #{states.length}"
|
|
126
|
+
conflicts.each do |conflict|
|
|
127
|
+
lines << conflict_message(conflict)
|
|
128
|
+
end
|
|
129
|
+
lines.join("\n")
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
private
|
|
133
|
+
|
|
134
|
+
attr_reader :grammar #: Grammar::Grammar
|
|
135
|
+
attr_reader :first_follow #: FirstFollow
|
|
136
|
+
attr_reader :resolver #: ConflictResolver
|
|
137
|
+
|
|
138
|
+
#: () -> void
|
|
139
|
+
def build_tables
|
|
140
|
+
canonical_states, canonical_transitions = build_canonical_automaton
|
|
141
|
+
merge_states(canonical_states, canonical_transitions)
|
|
142
|
+
populate_tables
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
#: () -> [Array[Set[LR1Item]], Hash[Integer, Hash[Symbol, Integer]]]
|
|
146
|
+
def build_canonical_automaton
|
|
147
|
+
state0 = initial_lr1_state
|
|
148
|
+
states = [state0] #: Array[Set[LR1Item]]
|
|
149
|
+
transitions = empty_transition_table
|
|
150
|
+
known = { lr1_state_key(state0) => 0 }
|
|
151
|
+
worklist = [0]
|
|
152
|
+
|
|
153
|
+
until worklist.empty?
|
|
154
|
+
state_id = worklist.shift
|
|
155
|
+
build_canonical_transitions_for(
|
|
156
|
+
states,
|
|
157
|
+
transitions,
|
|
158
|
+
known,
|
|
159
|
+
worklist,
|
|
160
|
+
state_id
|
|
161
|
+
)
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
[states, transitions]
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
#: (Enumerable[LR1Item]) -> Set[LR1Item]
|
|
168
|
+
def lr1_closure(items)
|
|
169
|
+
result = items.to_set
|
|
170
|
+
|
|
171
|
+
loop do
|
|
172
|
+
changed = false
|
|
173
|
+
|
|
174
|
+
result.to_a.each do |item|
|
|
175
|
+
symbol = item.next_symbol
|
|
176
|
+
next unless symbol && grammar.nonterminal?(symbol)
|
|
177
|
+
|
|
178
|
+
changed ||= add_closure_items(result, item, symbol)
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
break unless changed
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
result
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
#: (Enumerable[LR1Item]) -> Array[Array[Integer | nil | Symbol]]
|
|
188
|
+
def lr1_state_key(items)
|
|
189
|
+
items.map { |item| [item.production.id, item.dot, item.lookahead] }.sort.freeze
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
#: (Array[Set[LR1Item]], Hash[Integer, Hash[Symbol, Integer]]) -> void
|
|
193
|
+
def merge_states(canonical_states, canonical_transitions)
|
|
194
|
+
merged_ids = {} #: Hash[Integer, Integer]
|
|
195
|
+
grouped_states = group_states_by_lr0_core(canonical_states)
|
|
196
|
+
|
|
197
|
+
grouped_states.each_with_index do |(_, canonical_indexes), merged_id|
|
|
198
|
+
states << merge_state_group(
|
|
199
|
+
canonical_states,
|
|
200
|
+
canonical_indexes,
|
|
201
|
+
merged_ids,
|
|
202
|
+
merged_id
|
|
203
|
+
)
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
merge_transitions(canonical_transitions, merged_ids)
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
#: (Enumerable[LR1Item]) -> Array[Array[Integer?]]
|
|
210
|
+
def lr0_core_key(items)
|
|
211
|
+
items.map(&:core).sort.freeze
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
#: () -> void
|
|
215
|
+
def populate_tables
|
|
216
|
+
productions_by_id = index_productions_by_id
|
|
217
|
+
|
|
218
|
+
states.each do |state|
|
|
219
|
+
populate_actions_for(state, productions_by_id)
|
|
220
|
+
populate_gotos_for(state)
|
|
221
|
+
end
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
#: (Integer, Symbol, Shift | Reduce | Accept, Hash[Integer?, Grammar::Production]) -> void
|
|
225
|
+
def add_action(state_id, lookahead, action, productions_by_id)
|
|
226
|
+
existing = action_table[state_id][lookahead]
|
|
227
|
+
if existing.nil?
|
|
228
|
+
action_table[state_id][lookahead] = action
|
|
229
|
+
return
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
return if existing == action
|
|
233
|
+
|
|
234
|
+
resolved, report = resolver.resolve(
|
|
235
|
+
existing,
|
|
236
|
+
action,
|
|
237
|
+
lookahead,
|
|
238
|
+
grammar,
|
|
239
|
+
state_id: state_id,
|
|
240
|
+
productions_by_id: productions_by_id
|
|
241
|
+
)
|
|
242
|
+
conflicts << report
|
|
243
|
+
if resolved.nil?
|
|
244
|
+
action_table[state_id].delete(lookahead)
|
|
245
|
+
else
|
|
246
|
+
action_table[state_id][lookahead] = resolved
|
|
247
|
+
end
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
#: () -> Set[LR1Item]
|
|
251
|
+
def initial_lr1_state
|
|
252
|
+
start_item = LR1Item.new(
|
|
253
|
+
grammar.augmented_production,
|
|
254
|
+
0,
|
|
255
|
+
Grammar::Grammar::END_OF_INPUT
|
|
256
|
+
)
|
|
257
|
+
lr1_closure(Set[start_item])
|
|
258
|
+
end
|
|
259
|
+
|
|
260
|
+
#: () -> Hash[Integer, Hash[Symbol, Integer]]
|
|
261
|
+
def empty_transition_table
|
|
262
|
+
Hash.new do |hash, key|
|
|
263
|
+
hash[key] = {} #: Hash[Symbol, Integer]
|
|
264
|
+
end
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
#: (Array[Set[LR1Item]], Hash[Integer, Hash[Symbol, Integer]], Hash[Array[Array[Integer | nil | Symbol]], Integer], Array[Integer], Integer) -> void
|
|
268
|
+
def build_canonical_transitions_for(states, transitions, known, worklist, state_id)
|
|
269
|
+
state = states.fetch(state_id)
|
|
270
|
+
|
|
271
|
+
next_symbols_for(state).each do |symbol|
|
|
272
|
+
target = target_state_for(state, symbol)
|
|
273
|
+
target_id = register_state(states, known, worklist, target)
|
|
274
|
+
transitions[state_id][symbol] = target_id
|
|
275
|
+
end
|
|
276
|
+
end
|
|
277
|
+
|
|
278
|
+
#: (Set[LR1Item]) -> Array[Symbol]
|
|
279
|
+
def next_symbols_for(state)
|
|
280
|
+
state.map(&:next_symbol).compact.uniq
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
#: (Set[LR1Item], Symbol) -> Set[LR1Item]
|
|
284
|
+
def target_state_for(state, symbol)
|
|
285
|
+
advanced_items = state
|
|
286
|
+
.select { |item| item.next_symbol == symbol }
|
|
287
|
+
.map(&:advance)
|
|
288
|
+
.to_set
|
|
289
|
+
|
|
290
|
+
lr1_closure(advanced_items)
|
|
291
|
+
end
|
|
292
|
+
|
|
293
|
+
#: (Array[Set[LR1Item]], Hash[Array[Array[Integer | nil | Symbol]], Integer], Array[Integer], Set[LR1Item]) -> Integer
|
|
294
|
+
def register_state(states, known, worklist, target)
|
|
295
|
+
key = lr1_state_key(target)
|
|
296
|
+
target_id = known[key]
|
|
297
|
+
return target_id if target_id
|
|
298
|
+
|
|
299
|
+
target_id = states.length
|
|
300
|
+
states << target
|
|
301
|
+
known[key] = target_id
|
|
302
|
+
worklist << target_id
|
|
303
|
+
target_id
|
|
304
|
+
end
|
|
305
|
+
|
|
306
|
+
#: (Set[LR1Item], LR1Item, Symbol) -> bool
|
|
307
|
+
def add_closure_items(result, item, symbol)
|
|
308
|
+
lookaheads = closure_lookaheads_for(item)
|
|
309
|
+
changed = false
|
|
310
|
+
|
|
311
|
+
grammar.productions_for(symbol).each do |production|
|
|
312
|
+
lookaheads.each do |lookahead|
|
|
313
|
+
candidate = LR1Item.new(production, 0, lookahead)
|
|
314
|
+
next if result.include?(candidate)
|
|
315
|
+
|
|
316
|
+
result.add(candidate)
|
|
317
|
+
changed = true
|
|
318
|
+
end
|
|
319
|
+
end
|
|
320
|
+
|
|
321
|
+
changed
|
|
322
|
+
end
|
|
323
|
+
|
|
324
|
+
#: (LR1Item) -> Set[Symbol]
|
|
325
|
+
def closure_lookaheads_for(item)
|
|
326
|
+
beta = item.production.rhs[(item.dot + 1)..] || []
|
|
327
|
+
symbols = beta + [item.lookahead]
|
|
328
|
+
|
|
329
|
+
first_follow.first_of_sequence(symbols) - Set[FirstFollow::EMPTY]
|
|
330
|
+
end
|
|
331
|
+
|
|
332
|
+
#: (Array[Set[LR1Item]]) -> Hash[Array[Array[Integer?]], Array[Integer]]
|
|
333
|
+
def group_states_by_lr0_core(canonical_states)
|
|
334
|
+
canonical_states
|
|
335
|
+
.each_with_index
|
|
336
|
+
.with_object(grouped_state_index) do |(items, index), grouped_states|
|
|
337
|
+
grouped_states[lr0_core_key(items)] << index
|
|
338
|
+
end
|
|
339
|
+
end
|
|
340
|
+
|
|
341
|
+
#: () -> Hash[Array[Array[Integer?]], Array[Integer]]
|
|
342
|
+
def grouped_state_index
|
|
343
|
+
Hash.new do |hash, key|
|
|
344
|
+
hash[key] = [] #: Array[Integer]
|
|
345
|
+
end
|
|
346
|
+
end
|
|
347
|
+
|
|
348
|
+
#: (Array[Set[LR1Item]], Array[Integer], Hash[Integer, Integer], Integer) -> MergedState
|
|
349
|
+
def merge_state_group(canonical_states, canonical_indexes, merged_ids, merged_id)
|
|
350
|
+
merged_items = {} #: Hash[[Integer?, Integer], MergedItem]
|
|
351
|
+
|
|
352
|
+
canonical_indexes.each do |canonical_id|
|
|
353
|
+
merged_ids[canonical_id] = merged_id
|
|
354
|
+
merge_items_from_state(merged_items, canonical_states.fetch(canonical_id))
|
|
355
|
+
end
|
|
356
|
+
|
|
357
|
+
MergedState.new(merged_id, sorted_merged_items(merged_items))
|
|
358
|
+
end
|
|
359
|
+
|
|
360
|
+
#: (Hash[[Integer?, Integer], MergedItem], Set[LR1Item]) -> void
|
|
361
|
+
def merge_items_from_state(merged_items, items)
|
|
362
|
+
items.each do |item|
|
|
363
|
+
merged_items[item.core] ||= MergedItem.new(item.production, item.dot, Set.new)
|
|
364
|
+
merged_items[item.core].lookaheads.add(item.lookahead)
|
|
365
|
+
end
|
|
366
|
+
end
|
|
367
|
+
|
|
368
|
+
#: (Hash[[Integer?, Integer], MergedItem]) -> Array[MergedItem]
|
|
369
|
+
def sorted_merged_items(merged_items)
|
|
370
|
+
merged_items.values.sort_by { |item| [item.production.id, item.dot] }
|
|
371
|
+
end
|
|
372
|
+
|
|
373
|
+
#: (Hash[Integer, Hash[Symbol, Integer]], Hash[Integer, Integer]) -> void
|
|
374
|
+
def merge_transitions(canonical_transitions, merged_ids)
|
|
375
|
+
canonical_transitions.each do |from_id, edges|
|
|
376
|
+
edges.each do |symbol, to_id|
|
|
377
|
+
transitions[merged_ids.fetch(from_id)][symbol] = merged_ids.fetch(to_id)
|
|
378
|
+
end
|
|
379
|
+
end
|
|
380
|
+
end
|
|
381
|
+
|
|
382
|
+
#: () -> Hash[Integer?, Grammar::Production]
|
|
383
|
+
def index_productions_by_id
|
|
384
|
+
productions_by_id = {} #: Hash[Integer?, Grammar::Production]
|
|
385
|
+
|
|
386
|
+
grammar.productions.each_with_object(productions_by_id) do |production, indexed_productions|
|
|
387
|
+
indexed_productions[production.id] = production
|
|
388
|
+
end
|
|
389
|
+
end
|
|
390
|
+
|
|
391
|
+
#: (MergedState, Hash[Integer?, Grammar::Production]) -> void
|
|
392
|
+
def populate_actions_for(state, productions_by_id)
|
|
393
|
+
state.items.each do |item|
|
|
394
|
+
symbol = item.next_symbol
|
|
395
|
+
if symbol && grammar.terminal?(symbol)
|
|
396
|
+
add_shift_action(state.id, symbol, productions_by_id)
|
|
397
|
+
next
|
|
398
|
+
end
|
|
399
|
+
|
|
400
|
+
next unless item.reduce?
|
|
401
|
+
|
|
402
|
+
populate_reduce_actions(state.id, item, productions_by_id)
|
|
403
|
+
end
|
|
404
|
+
end
|
|
405
|
+
|
|
406
|
+
#: (Integer, Symbol, Hash[Integer?, Grammar::Production]) -> void
|
|
407
|
+
def add_shift_action(state_id, symbol, productions_by_id)
|
|
408
|
+
target_state = transitions.fetch(state_id).fetch(symbol)
|
|
409
|
+
add_action(state_id, symbol, Shift.new(target_state), productions_by_id)
|
|
410
|
+
end
|
|
411
|
+
|
|
412
|
+
#: (Integer, MergedItem, Hash[Integer?, Grammar::Production]) -> void
|
|
413
|
+
def populate_reduce_actions(state_id, item, productions_by_id)
|
|
414
|
+
if item.production.lhs == grammar.augmented_start
|
|
415
|
+
add_accept_action(state_id, productions_by_id)
|
|
416
|
+
return
|
|
417
|
+
end
|
|
418
|
+
|
|
419
|
+
item.lookaheads.each do |lookahead|
|
|
420
|
+
add_reduce_action(state_id, lookahead, item, productions_by_id)
|
|
421
|
+
end
|
|
422
|
+
end
|
|
423
|
+
|
|
424
|
+
#: (Integer, Hash[Integer?, Grammar::Production]) -> void
|
|
425
|
+
def add_accept_action(state_id, productions_by_id)
|
|
426
|
+
add_action(
|
|
427
|
+
state_id,
|
|
428
|
+
Grammar::Grammar::END_OF_INPUT,
|
|
429
|
+
Accept.new,
|
|
430
|
+
productions_by_id
|
|
431
|
+
)
|
|
432
|
+
end
|
|
433
|
+
|
|
434
|
+
#: (Integer, Symbol, MergedItem, Hash[Integer?, Grammar::Production]) -> void
|
|
435
|
+
def add_reduce_action(state_id, lookahead, item, productions_by_id)
|
|
436
|
+
add_action(
|
|
437
|
+
state_id,
|
|
438
|
+
lookahead,
|
|
439
|
+
Reduce.new(item.production.id),
|
|
440
|
+
productions_by_id
|
|
441
|
+
)
|
|
442
|
+
end
|
|
443
|
+
|
|
444
|
+
#: (MergedState) -> void
|
|
445
|
+
def populate_gotos_for(state)
|
|
446
|
+
transitions[state.id].each do |symbol, target_state|
|
|
447
|
+
next unless grammar.nonterminal?(symbol)
|
|
448
|
+
|
|
449
|
+
goto_table[state.id][symbol] = target_state
|
|
450
|
+
end
|
|
451
|
+
end
|
|
452
|
+
|
|
453
|
+
#: (ConflictReport) -> String
|
|
454
|
+
def conflict_message(conflict)
|
|
455
|
+
"state #{conflict.state_id}, " \
|
|
456
|
+
"lookahead #{conflict.lookahead}: #{conflict.message}"
|
|
457
|
+
end
|
|
458
|
+
end
|
|
459
|
+
end
|
|
460
|
+
end
|