rly 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +2 -2
- data/assets/ply_dump.erb +15 -0
- data/lib/rly.rb +2 -0
- data/lib/rly/lex.rb +54 -25
- data/lib/rly/lex_token.rb +8 -0
- data/lib/rly/parse/grammar.rb +211 -0
- data/lib/rly/parse/lr_item.rb +32 -0
- data/lib/rly/parse/lr_table.rb +529 -0
- data/lib/rly/parse/ply_dump.rb +52 -0
- data/lib/rly/parse/production.rb +38 -0
- data/lib/rly/parse/rule_parser.rb +68 -0
- data/lib/rly/parse/yacc_production.rb +11 -0
- data/lib/rly/parse/yacc_symbol.rb +6 -0
- data/lib/rly/version.rb +2 -1
- data/lib/rly/yacc.rb +355 -0
- data/spec/lex/{lexer_spec.rb → lex_spec.rb} +45 -24
- data/spec/parse/calc_spec.rb +95 -0
- data/spec/parse/grammar_spec.rb +239 -0
- data/spec/parse/lr_table_spec.rb +212 -0
- data/spec/parse/production_spec.rb +18 -0
- data/spec/parse/rule_parser_spec.rb +20 -0
- data/spec/parse/yacc_spec.rb +57 -0
- data/spec/spec_helper.rb +5 -0
- metadata +26 -4
data/README.md
CHANGED
@@ -2,8 +2,8 @@
|
|
2
2
|
|
3
3
|
# Rly
|
4
4
|
|
5
|
-
Rly is a lexer and parser generator for ruby, based on ideas and solutions of
|
6
|
-
Python's [Ply](http://www.dabeaz.com/ply/).
|
5
|
+
Rly is a lexer and parser generator for ruby (O RLY?), based on ideas and solutions of
|
6
|
+
Python's [Ply](http://www.dabeaz.com/ply/) (in some places it's a total rip off actually).
|
7
7
|
|
8
8
|
## Installation
|
9
9
|
|
data/assets/ply_dump.erb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
Created by PLY version 3.4 (http://www.dabeaz.com/ply)
|
2
|
+
|
3
|
+
Grammar
|
4
|
+
|
5
|
+
<% for p in g.productions %>Rule <%= sprintf("%-5d", p.index) %> <%= p %>
|
6
|
+
<% end %>
|
7
|
+
Terminals, with rules where they appear
|
8
|
+
|
9
|
+
<% @t = g.terminals.keys.map {|k| k.to_s }.sort; for t in @t %><%= sprintf("%-20s : %s", t, (g.terminals[t] || g.terminals[t.to_sym]).join(' ')) %>
|
10
|
+
<% end %>
|
11
|
+
Nonterminals, with rules where they appear
|
12
|
+
|
13
|
+
<% @t = g.nonterminals.keys.sort; for t in @t %><%= sprintf("%-20s : %s", t, g.nonterminals[t].join(' ')) %>
|
14
|
+
<% end %>
|
15
|
+
<%= backlog %>
|
data/lib/rly.rb
CHANGED
data/lib/rly/lex.rb
CHANGED
@@ -12,10 +12,10 @@ module Rly
|
|
12
12
|
# configuration (check the methods documentation for details).
|
13
13
|
#
|
14
14
|
# Once you got your lexer configured, you can create its instances passing a
|
15
|
-
# String to be tokenized. You can then use
|
16
|
-
#
|
15
|
+
# String to be tokenized. You can then use {#next} method to get tokens. If you
|
16
|
+
# have more string to tokenize, you can append it to input buffer at any time with
|
17
|
+
# {#input}.
|
17
18
|
class Lex
|
18
|
-
include Enumerable
|
19
19
|
|
20
20
|
# Tracks the current line number for generated tokens
|
21
21
|
#
|
@@ -55,21 +55,46 @@ module Rly
|
|
55
55
|
# end
|
56
56
|
#
|
57
57
|
# lex = MyLexer.new("hello WORLD")
|
58
|
-
# lex.
|
59
|
-
#
|
60
|
-
#
|
61
|
-
#
|
58
|
+
# t = lex.next
|
59
|
+
# puts "#{tok.type} -> #{tok.value}" #=> "LOWERS -> hello"
|
60
|
+
# t = lex.next
|
61
|
+
# puts "#{tok.type} -> #{tok.value}" #=> "UPPERS -> WORLD"
|
62
|
+
# t = lex.next # => nil
|
62
63
|
def initialize(input="")
|
63
64
|
@input = input
|
64
65
|
@pos = 0
|
65
66
|
@lineno = 0
|
66
67
|
end
|
67
68
|
|
69
|
+
def inspect
|
70
|
+
"#<#{self.class} pos=#{@pos} len=#{@input.length} lineno=#{@lineno}>"
|
71
|
+
end
|
72
|
+
|
73
|
+
# Appends string to input buffer
|
74
|
+
#
|
75
|
+
# The given string is appended to input buffer, further {#next} calls will
|
76
|
+
# tokenize it as usual.
|
77
|
+
#
|
78
|
+
# @api public
|
79
|
+
#
|
80
|
+
# @example
|
81
|
+
# lex = MyLexer.new("hello")
|
82
|
+
#
|
83
|
+
# t = lex.next
|
84
|
+
# puts "#{tok.type} -> #{tok.value}" #=> "LOWERS -> hello"
|
85
|
+
# t = lex.next # => nil
|
86
|
+
# lex.input("WORLD")
|
87
|
+
# t = lex.next
|
88
|
+
# puts "#{tok.type} -> #{tok.value}" #=> "UPPERS -> WORLD"
|
89
|
+
# t = lex.next # => nil
|
90
|
+
def input(input)
|
91
|
+
@input << input
|
92
|
+
end
|
93
|
+
|
68
94
|
# Processes the next token in input
|
69
95
|
#
|
70
|
-
# This is the main interface to lexer.
|
71
|
-
#
|
72
|
-
# returns an Enumerator object.
|
96
|
+
# This is the main interface to lexer. It returns next available token or **nil**
|
97
|
+
# if there are no more tokens available in the input string.
|
73
98
|
#
|
74
99
|
# {#each} Raises {LexError} if the input cannot be processed. This happens if
|
75
100
|
# there were no matches by 'token' rules and no matches by 'literals' rule.
|
@@ -78,23 +103,19 @@ module Rly
|
|
78
103
|
# after returning from error handler is still unchanged.
|
79
104
|
#
|
80
105
|
# @api public
|
81
|
-
# @yieldparam tok [LexToken] next processed token
|
82
106
|
# @raise [LexError] if the input cannot be processed
|
83
|
-
# @return [
|
84
|
-
# @return [nil] if
|
107
|
+
# @return [LexToken] if the next chunk of input was processed successfully
|
108
|
+
# @return [nil] if there are no more tokens available in input
|
85
109
|
#
|
86
110
|
# @example
|
87
111
|
# lex = MyLexer.new("hello WORLD")
|
88
112
|
#
|
89
|
-
# lex.
|
90
|
-
#
|
91
|
-
# lex.
|
92
|
-
#
|
93
|
-
#
|
94
|
-
|
95
|
-
def each
|
96
|
-
return self.to_enum unless block_given?
|
97
|
-
|
113
|
+
# t = lex.next
|
114
|
+
# puts "#{tok.type} -> #{tok.value}" #=> "LOWERS -> hello"
|
115
|
+
# t = lex.next
|
116
|
+
# puts "#{tok.type} -> #{tok.value}" #=> "UPPERS -> WORLD"
|
117
|
+
# t = lex.next # => nil
|
118
|
+
def next
|
98
119
|
while @pos < @input.length
|
99
120
|
if self.class.ignores_list[@input[@pos]]
|
100
121
|
@pos += 1
|
@@ -105,15 +126,17 @@ module Rly
|
|
105
126
|
self.class.tokens.each do |type, rule, block|
|
106
127
|
m = rule.match(@input, @pos)
|
107
128
|
next unless m
|
129
|
+
next unless m.begin(0) == @pos
|
108
130
|
|
109
131
|
tok = LexToken.new(type, m[0], self)
|
110
132
|
|
111
133
|
matched = true
|
112
134
|
|
113
135
|
tok = block.call(tok) if block
|
114
|
-
yield tok if tok.type
|
115
136
|
|
116
137
|
@pos = m.end(0)
|
138
|
+
|
139
|
+
return tok if tok.type
|
117
140
|
end
|
118
141
|
|
119
142
|
unless matched
|
@@ -121,8 +144,10 @@ module Rly
|
|
121
144
|
tok = LexToken.new(@input[@pos], @input[@pos], self)
|
122
145
|
|
123
146
|
matched = true
|
124
|
-
|
147
|
+
|
125
148
|
@pos += 1
|
149
|
+
|
150
|
+
return tok
|
126
151
|
end
|
127
152
|
end
|
128
153
|
|
@@ -134,16 +159,20 @@ module Rly
|
|
134
159
|
if pos == @pos
|
135
160
|
raise LexError.new("Illegal character '#{@input[@pos]}' at index #{@pos}")
|
136
161
|
else
|
137
|
-
|
162
|
+
return tok if tok && tok.type
|
138
163
|
end
|
139
164
|
else
|
140
165
|
raise LexError.new("Illegal character '#{@input[@pos]}' at index #{@pos}")
|
141
166
|
end
|
142
167
|
end
|
143
168
|
end
|
169
|
+
return nil
|
144
170
|
end
|
145
171
|
|
146
172
|
class << self
|
173
|
+
def terminals
|
174
|
+
self.tokens.map { |t,r,b| t }.compact + self.literals_list.chars.to_a
|
175
|
+
end
|
147
176
|
# Returns the list of registered tokens
|
148
177
|
#
|
149
178
|
# @api private
|
data/lib/rly/lex_token.rb
CHANGED
@@ -0,0 +1,211 @@
|
|
1
|
+
require "rly/parse/production"
|
2
|
+
require "rly/parse/lr_item"
|
3
|
+
|
4
|
+
module Rly
|
5
|
+
class Grammar
|
6
|
+
attr_reader :terminals, :nonterminals, :productions, :prodnames, :start, :precedence
|
7
|
+
|
8
|
+
def initialize(terminals)
|
9
|
+
@productions = [nil]
|
10
|
+
@prodnames = {}
|
11
|
+
@prodmap = {}
|
12
|
+
|
13
|
+
@terminals = {}
|
14
|
+
terminals.each do |t|
|
15
|
+
raise ArgumentError unless t.upcase == t
|
16
|
+
@terminals[t] = []
|
17
|
+
end
|
18
|
+
@terminals[:error] = []
|
19
|
+
|
20
|
+
@nonterminals = {}
|
21
|
+
@first = {}
|
22
|
+
@follow = {}
|
23
|
+
@precedence = {}
|
24
|
+
@used_precedence = {}
|
25
|
+
@start = nil
|
26
|
+
end
|
27
|
+
|
28
|
+
def add_production(name, symbols, &block)
|
29
|
+
raise ArgumentError unless name.downcase == name
|
30
|
+
raise ArgumentError if name == :error
|
31
|
+
|
32
|
+
symbols.each do |sym|
|
33
|
+
if sym.is_a?(String)
|
34
|
+
raise ArgumentError unless sym.length == 1
|
35
|
+
@terminals[sym] = [] unless @terminals[sym]
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
precedence = prec_for_rightmost_terminal(symbols)
|
40
|
+
|
41
|
+
mapname = "#{name.to_s} -> #{symbols.to_s}"
|
42
|
+
raise ArgumentError if @prodmap[mapname]
|
43
|
+
|
44
|
+
index = @productions.count
|
45
|
+
@nonterminals[name] = [] unless @nonterminals[name]
|
46
|
+
|
47
|
+
symbols.each do |sym|
|
48
|
+
if @terminals[sym]
|
49
|
+
@terminals[sym] << index
|
50
|
+
else
|
51
|
+
@nonterminals[sym] = [] unless @nonterminals[sym]
|
52
|
+
@nonterminals[sym] << index
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
p = Production.new(index, name, symbols, precedence, block)
|
57
|
+
|
58
|
+
@productions << p
|
59
|
+
@prodmap[mapname] = p
|
60
|
+
|
61
|
+
@prodnames[name] = [] unless @prodnames[name]
|
62
|
+
@prodnames[name] << p
|
63
|
+
|
64
|
+
p
|
65
|
+
end
|
66
|
+
|
67
|
+
def set_precedence(term, assoc, level)
|
68
|
+
raise RuntimeError if @productions != [nil]
|
69
|
+
raise ArgumentError if @precedence[term]
|
70
|
+
raise ArgumentError unless [:left, :right, :noassoc].include?(assoc)
|
71
|
+
|
72
|
+
@precedence[term] = [assoc, level]
|
73
|
+
end
|
74
|
+
|
75
|
+
def set_start(symbol=nil)
|
76
|
+
symbol = @productions[1].name unless symbol
|
77
|
+
raise ArgumentError unless @nonterminals[symbol]
|
78
|
+
@productions[0] = Production.new(0, :"S'", [symbol])
|
79
|
+
@nonterminals[symbol] << 0
|
80
|
+
@start = symbol
|
81
|
+
end
|
82
|
+
|
83
|
+
def build_lritems
|
84
|
+
@productions.each do |p|
|
85
|
+
lastlri = p
|
86
|
+
i = 0
|
87
|
+
lr_items = []
|
88
|
+
while true do
|
89
|
+
if i > p.length
|
90
|
+
lri = nil
|
91
|
+
else
|
92
|
+
lri = LRItem.new(p,i)
|
93
|
+
lri.lr_after = @prodnames[lri.prod[i+1]] || []
|
94
|
+
lri.lr_before = lri.prod[i-1] || nil
|
95
|
+
end
|
96
|
+
|
97
|
+
lastlri.lr_next = lri
|
98
|
+
break unless lri
|
99
|
+
lr_items << lri
|
100
|
+
lastlri = lri
|
101
|
+
i += 1
|
102
|
+
end
|
103
|
+
p.lr_items = lr_items
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def compute_first
|
108
|
+
return @first unless @first.empty?
|
109
|
+
|
110
|
+
@terminals.keys.each { |t| @first[t] = [t] }
|
111
|
+
@first[:'$end'] = [:'$end']
|
112
|
+
@nonterminals.keys.each { |n| @first[n] = [] }
|
113
|
+
while true
|
114
|
+
any_changes = false
|
115
|
+
nonterminals.keys.each do |n|
|
116
|
+
@prodnames[n].each do |p|
|
117
|
+
_first(p.prod).each do |f|
|
118
|
+
unless @first[n].include?(f)
|
119
|
+
@first[n] << f
|
120
|
+
any_changes = true
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
125
|
+
break unless any_changes
|
126
|
+
end
|
127
|
+
|
128
|
+
@first
|
129
|
+
end
|
130
|
+
|
131
|
+
def compute_follow(start=nil)
|
132
|
+
return @follow unless @follow.empty?
|
133
|
+
|
134
|
+
compute_first if @first.empty?
|
135
|
+
|
136
|
+
@nonterminals.keys.each { |n| @follow[n] = [] }
|
137
|
+
|
138
|
+
start = @productions[1].name unless start
|
139
|
+
|
140
|
+
@follow[start] = [:'$end']
|
141
|
+
|
142
|
+
while true
|
143
|
+
didadd = false
|
144
|
+
@productions[1..-1].each do |p|
|
145
|
+
p.prod.length.times do |i|
|
146
|
+
b = p.prod[i]
|
147
|
+
next unless @nonterminals.include?(b)
|
148
|
+
|
149
|
+
fst = _first(p.prod[i+1..-1])
|
150
|
+
hasempty = false
|
151
|
+
fst.each do |f|
|
152
|
+
if f != :'<empty>' && !@follow[b].include?(f)
|
153
|
+
@follow[b] << f
|
154
|
+
didadd = true
|
155
|
+
end
|
156
|
+
hasempty = true if f == :'<empty>'
|
157
|
+
end
|
158
|
+
if hasempty || i == p.prod.length - 1
|
159
|
+
@follow[p.name].each do |f|
|
160
|
+
unless @follow[b].include?(f)
|
161
|
+
@follow[b] << f
|
162
|
+
didadd = true
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
168
|
+
break unless didadd
|
169
|
+
end
|
170
|
+
|
171
|
+
@follow
|
172
|
+
end
|
173
|
+
|
174
|
+
private
|
175
|
+
def _first(beta)
|
176
|
+
result = []
|
177
|
+
should_add_empty = true
|
178
|
+
|
179
|
+
beta.each do |x|
|
180
|
+
x_produces_empty = false
|
181
|
+
|
182
|
+
@first[x].each do |f|
|
183
|
+
if f == :'<empty>'
|
184
|
+
x_produces_empty = true
|
185
|
+
else
|
186
|
+
result << f unless result.include?(f)
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
if x_produces_empty
|
191
|
+
next
|
192
|
+
else
|
193
|
+
should_add_empty = false
|
194
|
+
break
|
195
|
+
end
|
196
|
+
end
|
197
|
+
result << :'<empty>' if should_add_empty
|
198
|
+
|
199
|
+
result
|
200
|
+
end
|
201
|
+
|
202
|
+
def prec_for_rightmost_terminal(symbols)
|
203
|
+
symbols.reverse_each do |sym|
|
204
|
+
next unless @terminals[sym]
|
205
|
+
|
206
|
+
return @precedence[sym] || [:right, 0]
|
207
|
+
end
|
208
|
+
[:right, 0]
|
209
|
+
end
|
210
|
+
end
|
211
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module Rly
|
2
|
+
class LRItem
|
3
|
+
attr_accessor :lr_after, :lr_before, :lr_next
|
4
|
+
attr_reader :prod, :name, :usyms, :lr_index, :length, :lookaheads, :index
|
5
|
+
|
6
|
+
def initialize(p, n)
|
7
|
+
@name = p.name
|
8
|
+
@prod = p.prod.dup
|
9
|
+
@index = p.index
|
10
|
+
@lr_index = n
|
11
|
+
@lookaheads = {}
|
12
|
+
@prod.insert(n, :'.')
|
13
|
+
@length = @prod.length
|
14
|
+
@usyms = p.usyms
|
15
|
+
|
16
|
+
@lr_items = []
|
17
|
+
@lr_next = nil
|
18
|
+
end
|
19
|
+
|
20
|
+
def to_s
|
21
|
+
if @prod
|
22
|
+
"#{@name} -> #{@prod.join(' ')}"
|
23
|
+
else
|
24
|
+
"#{@name} -> <empty>"
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def inspect
|
29
|
+
"#<LRItem #{to_s}>"
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,529 @@
|
|
1
|
+
require "set"
|
2
|
+
require "rly/parse/ply_dump"
|
3
|
+
|
4
|
+
module Rly
|
5
|
+
class LRTable
|
6
|
+
MAXINT = (2**(0.size * 8 -2) -1)
|
7
|
+
|
8
|
+
attr_reader :lr_action, :lr_goto, :lr_productions
|
9
|
+
|
10
|
+
def initialize(grammar, method=:LALR)
|
11
|
+
raise ArgumentError unless [:LALR, :SLR].include?(method)
|
12
|
+
|
13
|
+
@grammar = grammar
|
14
|
+
@lr_method = method
|
15
|
+
|
16
|
+
@lr_action = {}
|
17
|
+
@lr_goto = {}
|
18
|
+
@lr_productions = grammar.productions
|
19
|
+
@lr_goto_cache = {}
|
20
|
+
@lr0_cidhash = {}
|
21
|
+
|
22
|
+
@add_count = 0
|
23
|
+
|
24
|
+
@sr_conflict = 0
|
25
|
+
@rr_conflict = 0
|
26
|
+
@conflicts = []
|
27
|
+
|
28
|
+
@sr_conflicts = []
|
29
|
+
@rr_conflicts = []
|
30
|
+
|
31
|
+
grammar.build_lritems
|
32
|
+
grammar.compute_first
|
33
|
+
grammar.compute_follow
|
34
|
+
end
|
35
|
+
|
36
|
+
def parse_table(log=PlyDump.stub)
|
37
|
+
productions = @grammar.productions
|
38
|
+
precedence = @grammar.precedence
|
39
|
+
|
40
|
+
actionp = {}
|
41
|
+
|
42
|
+
log.info("Parsing method: %s", @lr_method)
|
43
|
+
|
44
|
+
c = lr0_items
|
45
|
+
|
46
|
+
add_lalr_lookaheads(c) if @lr_method == :LALR
|
47
|
+
|
48
|
+
# Build the parser table, state by state
|
49
|
+
st = 0
|
50
|
+
c.each do |i|
|
51
|
+
# Loop over each production in I
|
52
|
+
actlist = [] # List of actions
|
53
|
+
st_action = {}
|
54
|
+
st_actionp = {}
|
55
|
+
st_goto = {}
|
56
|
+
log.info("")
|
57
|
+
log.info("state %d", st)
|
58
|
+
log.info("")
|
59
|
+
i.each { |p| log.info(" (%d) %s", p.index, p.to_s) }
|
60
|
+
log.info("")
|
61
|
+
|
62
|
+
i.each do |p|
|
63
|
+
if p.length == p.lr_index + 1
|
64
|
+
if p.name == :"S'"
|
65
|
+
# Start symbol. Accept!
|
66
|
+
st_action[:"$end"] = 0
|
67
|
+
st_actionp[:"$end"] = p
|
68
|
+
else
|
69
|
+
# We are at the end of a production. Reduce!
|
70
|
+
if @lr_method == :LALR
|
71
|
+
laheads = p.lookaheads[st]
|
72
|
+
else
|
73
|
+
laheads = @grammar.follow[p.name]
|
74
|
+
end
|
75
|
+
laheads.each do |a|
|
76
|
+
actlist << [a, p, sprintf("reduce using rule %d (%s)", p.index, p)]
|
77
|
+
r = st_action[a]
|
78
|
+
if r
|
79
|
+
# Whoa. Have a shift/reduce or reduce/reduce conflict
|
80
|
+
if r > 0
|
81
|
+
# Need to decide on shift or reduce here
|
82
|
+
# By default we favor shifting. Need to add
|
83
|
+
# some precedence rules here.
|
84
|
+
sprec, slevel = productions[st_actionp[a].number].prec
|
85
|
+
rprec, rlevel = precedence[a] || [:right, 0]
|
86
|
+
if (slevel < rlevel) || ((slevel == rlevel) && (rprec == :left))
|
87
|
+
# We really need to reduce here.
|
88
|
+
st_action[a] = -p.number
|
89
|
+
st_actionp[a] = p
|
90
|
+
if ! slevel && ! rlevel
|
91
|
+
log.info(" ! shift/reduce conflict for %s resolved as reduce",a)
|
92
|
+
@sr_conflicts << [st, a, 'reduce']
|
93
|
+
end
|
94
|
+
productions[p.number].reduced += 1
|
95
|
+
elsif (slevel == rlevel) && (rprec == :nonassoc)
|
96
|
+
st_action[a] = nil
|
97
|
+
else
|
98
|
+
# Hmmm. Guess we'll keep the shift
|
99
|
+
unless rlevel
|
100
|
+
log.info(" ! shift/reduce conflict for %s resolved as shift",a)
|
101
|
+
@sr_conflicts << [st,a,'shift']
|
102
|
+
end
|
103
|
+
end
|
104
|
+
elsif r < 0
|
105
|
+
# Reduce/reduce conflict. In this case, we favor the rule
|
106
|
+
# that was defined first in the grammar file
|
107
|
+
oldp = productions[-r]
|
108
|
+
pp = productions[p.number]
|
109
|
+
if oldp.line > pp.line
|
110
|
+
st_action[a] = -p.number
|
111
|
+
st_actionp[a] = p
|
112
|
+
chosenp = pp
|
113
|
+
rejectp = oldp
|
114
|
+
productions[p.number].reduced += 1
|
115
|
+
productions[oldp.number].reduced -= 1
|
116
|
+
else
|
117
|
+
chosenp,rejectp = oldp,pp
|
118
|
+
end
|
119
|
+
@rr_conflicts << [st, chosenp, rejectp]
|
120
|
+
log.info(" ! reduce/reduce conflict for %s resolved using rule %d (%s)", a, st_actionp[a].number, st_actionp[a])
|
121
|
+
else
|
122
|
+
raise RuntimeError("Unknown conflict in state #{st}")
|
123
|
+
end
|
124
|
+
else
|
125
|
+
st_action[a] = -p.index
|
126
|
+
st_actionp[a] = p
|
127
|
+
productions[p.index].reduced += 1
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
else # <-- level ok
|
132
|
+
# i = p.lr_index
|
133
|
+
a = p.prod[p.lr_index+1] # Get symbol right after the "."
|
134
|
+
if @grammar.terminals.include?(a)
|
135
|
+
g = lr0_goto(i, a)
|
136
|
+
j = @lr0_cidhash[g.hash] || -1
|
137
|
+
if j >= 0
|
138
|
+
# We are in a shift state
|
139
|
+
actlist << [a, p, sprintf("shift and go to state %d", j)]
|
140
|
+
r = st_action[a]
|
141
|
+
if r
|
142
|
+
# Whoa have a shift/reduce or shift/shift conflict
|
143
|
+
if r > 0
|
144
|
+
if r != j
|
145
|
+
raise RuntimeError("Shift/shift conflict in state #{st}")
|
146
|
+
end
|
147
|
+
elsif r < 0
|
148
|
+
# Do a precedence check.
|
149
|
+
# - if precedence of reduce rule is higher, we reduce.
|
150
|
+
# - if precedence of reduce is same and left assoc, we reduce.
|
151
|
+
# - otherwise we shift
|
152
|
+
rprec, rlevel = productions[st_actionp[a].index].precedence
|
153
|
+
sprec, slevel = precedence[a] || [:right, 0]
|
154
|
+
if (slevel > rlevel) || ((slevel == rlevel) && (rprec == :right))
|
155
|
+
# We decide to shift here... highest precedence to shift
|
156
|
+
productions[st_actionp[a].index].reduced -= 1
|
157
|
+
st_action[a] = j
|
158
|
+
st_actionp[a] = p
|
159
|
+
unless rlevel
|
160
|
+
log.info(" ! shift/reduce conflict for %s resolved as shift",a)
|
161
|
+
@sr_conflicts << [st, a, 'shift']
|
162
|
+
end
|
163
|
+
elsif (slevel == rlevel) && (rprec == :nonassoc)
|
164
|
+
st_action[a] = nil
|
165
|
+
else
|
166
|
+
# Hmmm. Guess we'll keep the reduce
|
167
|
+
if ! slevel && ! rlevel
|
168
|
+
log.info(" ! shift/reduce conflict for %s resolved as reduce",a)
|
169
|
+
@sr_conflicts << [st, a, 'reduce']
|
170
|
+
end
|
171
|
+
end
|
172
|
+
else
|
173
|
+
raise RuntimeError("Unknown conflict in state #{st}")
|
174
|
+
end
|
175
|
+
else
|
176
|
+
st_action[a] = j
|
177
|
+
st_actionp[a] = p
|
178
|
+
end
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
# Print the actions associated with each terminal
|
185
|
+
_actprint = {}
|
186
|
+
actlist.each do |a, p, m|
|
187
|
+
if st_action[a]
|
188
|
+
if p == st_actionp[a]
|
189
|
+
log.info(" %-15s %s",a,m)
|
190
|
+
_actprint[[a,m]] = 1
|
191
|
+
end
|
192
|
+
end
|
193
|
+
end
|
194
|
+
log.info("")
|
195
|
+
# Print the actions that were not used. (debugging)
|
196
|
+
not_used = false
|
197
|
+
actlist.each do |a, p, m|
|
198
|
+
if st_action[a]
|
199
|
+
unless p == st_actionp[a]
|
200
|
+
unless _actprint[[a,m]]
|
201
|
+
log.debug(" ! %-15s [ %s ]", a, m)
|
202
|
+
not_used = true
|
203
|
+
_actprint[[a,m]] = 1
|
204
|
+
end
|
205
|
+
end
|
206
|
+
end
|
207
|
+
end
|
208
|
+
log.debug("") if not_used
|
209
|
+
|
210
|
+
# Construct the goto table for this state
|
211
|
+
|
212
|
+
nkeys = {}
|
213
|
+
i.each do |ii|
|
214
|
+
ii.usyms.each do |s|
|
215
|
+
nkeys[s] = nil if @grammar.nonterminals.include?(s)
|
216
|
+
end
|
217
|
+
end
|
218
|
+
nkeys.each do |n, _|
|
219
|
+
g = lr0_goto(i, n)
|
220
|
+
j = @lr0_cidhash[g.hash] || -1
|
221
|
+
if j >= 0
|
222
|
+
st_goto[n] = j
|
223
|
+
log.info(" %-30s shift and go to state %d",n,j)
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
@lr_action[st] = st_action
|
228
|
+
actionp[st] = st_actionp
|
229
|
+
@lr_goto[st] = st_goto
|
230
|
+
st += 1
|
231
|
+
end
|
232
|
+
end
|
233
|
+
|
234
|
+
private
|
235
|
+
def add_lalr_lookaheads(c)
|
236
|
+
nullable = compute_nullable_nonterminals
|
237
|
+
trans = find_nonterminal_transitions(c)
|
238
|
+
readsets = compute_read_sets(c, trans, nullable)
|
239
|
+
lookd, included = compute_lookback_includes(c, trans, nullable)
|
240
|
+
followsets = compute_follow_sets(trans, readsets, included)
|
241
|
+
add_lookaheads(lookd, followsets)
|
242
|
+
end
|
243
|
+
|
244
|
+
def lr0_closure(i)
|
245
|
+
@add_count += 1
|
246
|
+
|
247
|
+
# Add everything in I to J
|
248
|
+
j = i.dup
|
249
|
+
didadd = true
|
250
|
+
while didadd
|
251
|
+
didadd = false
|
252
|
+
j.each do |k|
|
253
|
+
k.lr_after.each do |x|
|
254
|
+
next if x.lr0_added == @add_count
|
255
|
+
# Add B --> .G to J
|
256
|
+
j << x.lr_next
|
257
|
+
x.lr0_added = @add_count
|
258
|
+
didadd = true
|
259
|
+
end
|
260
|
+
end
|
261
|
+
end
|
262
|
+
j
|
263
|
+
end
|
264
|
+
|
265
|
+
def lr0_goto(i, x)
|
266
|
+
g = @lr_goto_cache[[i.hash, x]]
|
267
|
+
return g if g
|
268
|
+
|
269
|
+
s = @lr_goto_cache[x]
|
270
|
+
unless s
|
271
|
+
s = {}
|
272
|
+
@lr_goto_cache[x] = s
|
273
|
+
end
|
274
|
+
|
275
|
+
gs = []
|
276
|
+
i.each do |p|
|
277
|
+
n = p.lr_next
|
278
|
+
if n and n.lr_before == x
|
279
|
+
s1 = s[n.hash]
|
280
|
+
unless s1
|
281
|
+
s1 = {}
|
282
|
+
s[n.hash] = s1
|
283
|
+
end
|
284
|
+
gs << n
|
285
|
+
s = s1
|
286
|
+
end
|
287
|
+
end
|
288
|
+
g = s[:'$end']
|
289
|
+
unless g
|
290
|
+
if gs
|
291
|
+
g = lr0_closure(gs)
|
292
|
+
s[:'$end'] = g
|
293
|
+
else
|
294
|
+
s[:'$end'] = gs
|
295
|
+
end
|
296
|
+
end
|
297
|
+
@lr_goto_cache[[i.hash,x]] = g
|
298
|
+
g
|
299
|
+
end
|
300
|
+
|
301
|
+
def lr0_items
|
302
|
+
c = [ lr0_closure([@grammar.productions[0].lr_next]) ]
|
303
|
+
|
304
|
+
c.each_with_index { |c_i, j| @lr0_cidhash[c_i.hash] = j }
|
305
|
+
|
306
|
+
i = 0
|
307
|
+
while i < c.length
|
308
|
+
c_i = c[i]
|
309
|
+
i += 1
|
310
|
+
|
311
|
+
asyms = Set.new
|
312
|
+
c_i.each { |ii| ii.usyms.each { |s| asyms << s } }
|
313
|
+
|
314
|
+
asyms.each do |x|
|
315
|
+
g = lr0_goto(c_i, x)
|
316
|
+
next if g.empty?
|
317
|
+
next if @lr0_cidhash[g.hash]
|
318
|
+
@lr0_cidhash[g.hash] = c.length
|
319
|
+
c << g
|
320
|
+
end
|
321
|
+
end
|
322
|
+
c
|
323
|
+
end
|
324
|
+
|
325
|
+
def compute_nullable_nonterminals
|
326
|
+
nullable = {}
|
327
|
+
num_nullable = 0
|
328
|
+
while true
|
329
|
+
@grammar.productions[1..-1].each do |p|
|
330
|
+
if p.length == 0
|
331
|
+
nullable[p.name] = 1
|
332
|
+
next
|
333
|
+
end
|
334
|
+
found_t = false
|
335
|
+
p.prod.each do |t|
|
336
|
+
unless nullable[t]
|
337
|
+
found_t = true
|
338
|
+
break
|
339
|
+
end
|
340
|
+
end
|
341
|
+
nullable[p.name] = 1 unless found_t
|
342
|
+
end
|
343
|
+
break if nullable.length == num_nullable
|
344
|
+
num_nullable = nullable.length
|
345
|
+
end
|
346
|
+
nullable
|
347
|
+
end
|
348
|
+
|
349
|
+
def find_nonterminal_transitions(c)
|
350
|
+
trans = []
|
351
|
+
c.each_with_index do |a, state|
|
352
|
+
a.each do |p|
|
353
|
+
if p.lr_index < p.length - 1
|
354
|
+
next_prod = p.prod[p.lr_index+1]
|
355
|
+
if @grammar.nonterminals[next_prod]
|
356
|
+
t = [state, next_prod]
|
357
|
+
trans << t unless trans.include?(t)
|
358
|
+
end
|
359
|
+
end
|
360
|
+
end
|
361
|
+
end
|
362
|
+
trans
|
363
|
+
end
|
364
|
+
|
365
|
+
def compute_read_sets(c, ntrans, nullable)
|
366
|
+
fp = lambda { |x| dr_relation(c, x, nullable) }
|
367
|
+
r = lambda { |x| reads_relation(c, x, nullable) }
|
368
|
+
digraph(ntrans, r, fp)
|
369
|
+
end
|
370
|
+
|
371
|
+
def dr_relation(c, trans, nullable)
|
372
|
+
dr_set = {}
|
373
|
+
state, n = trans
|
374
|
+
terms = []
|
375
|
+
|
376
|
+
g = lr0_goto(c[state], n)
|
377
|
+
g.each do |p|
|
378
|
+
if p.lr_index < p.length - 1
|
379
|
+
a = p.prod[p.lr_index+1]
|
380
|
+
if @grammar.terminals.include?(a)
|
381
|
+
terms << a unless terms.include?(a)
|
382
|
+
end
|
383
|
+
end
|
384
|
+
end
|
385
|
+
|
386
|
+
terms << :'$end' if state == 0 && n == @grammar.productions[0].prod[0]
|
387
|
+
|
388
|
+
terms
|
389
|
+
end
|
390
|
+
|
391
|
+
def reads_relation(c, trans, empty)
|
392
|
+
rel = []
|
393
|
+
state, n = trans
|
394
|
+
|
395
|
+
g = lr0_goto(c[state], n)
|
396
|
+
j = @lr0_cidhash[g.hash] || -1
|
397
|
+
g.each do |p|
|
398
|
+
if p.lr_index < p.length - 1
|
399
|
+
a = p.prod[p.lr_index + 1]
|
400
|
+
rel << [j, a] if empty.include?(a)
|
401
|
+
end
|
402
|
+
end
|
403
|
+
|
404
|
+
rel
|
405
|
+
end
|
406
|
+
|
407
|
+
def digraph(x, r, fp)
|
408
|
+
n = {}
|
409
|
+
x.each { |xx| n[xx] = 0 }
|
410
|
+
stack = []
|
411
|
+
f = {}
|
412
|
+
x.each do |xx|
|
413
|
+
traverse(xx, n, stack, f, x, r, fp) if n[xx] == 0
|
414
|
+
end
|
415
|
+
f
|
416
|
+
end
|
417
|
+
|
418
|
+
def traverse(xx, n, stack, f, x, r, fp)
|
419
|
+
stack.push(xx)
|
420
|
+
d = stack.length
|
421
|
+
n[xx] = d
|
422
|
+
f[xx] = fp.call(xx)
|
423
|
+
|
424
|
+
rel = r.call(xx)
|
425
|
+
rel.each do |y|
|
426
|
+
traverse(y, n, stack, f, x, r, fp) if n[y] == 0
|
427
|
+
|
428
|
+
n[xx] = [n[xx], n[y]].min
|
429
|
+
|
430
|
+
arr = f[y] || []
|
431
|
+
arr.each do |a|
|
432
|
+
f[xx] << a unless f[xx].include?(a)
|
433
|
+
end
|
434
|
+
end
|
435
|
+
if n[xx] == d
|
436
|
+
n[stack[-1]] = MAXINT
|
437
|
+
f[stack[-1]] = f[xx]
|
438
|
+
element = stack.pop()
|
439
|
+
while element != xx
|
440
|
+
n[stack[-1]] = MAXINT
|
441
|
+
f[stack[-1]] = f[xx]
|
442
|
+
element = stack.pop()
|
443
|
+
end
|
444
|
+
end
|
445
|
+
end
|
446
|
+
|
447
|
+
def compute_lookback_includes(c, trans, nullable)
|
448
|
+
lookdict = {}
|
449
|
+
includedict = {}
|
450
|
+
|
451
|
+
dtrans = trans.each_with_object({}) { |k, h| h[k] = 1 }
|
452
|
+
|
453
|
+
trans.each do |state, n|
|
454
|
+
lookb = []
|
455
|
+
includes = []
|
456
|
+
c[state].each do |p|
|
457
|
+
next unless p.name == n
|
458
|
+
|
459
|
+
lr_index = p.lr_index
|
460
|
+
j = state
|
461
|
+
while lr_index < p.length - 1
|
462
|
+
lr_index = lr_index + 1
|
463
|
+
t = p.prod[lr_index]
|
464
|
+
|
465
|
+
if dtrans.include?([j,t])
|
466
|
+
li = lr_index + 1
|
467
|
+
escaped = false
|
468
|
+
while li < p.length
|
469
|
+
if @grammar.terminals[p.prod[li]]
|
470
|
+
escaped = true
|
471
|
+
break
|
472
|
+
end
|
473
|
+
unless nullable[p.prod[li]]
|
474
|
+
escaped = true
|
475
|
+
break
|
476
|
+
end
|
477
|
+
li = li + 1
|
478
|
+
end
|
479
|
+
includes << [j,t] unless escaped
|
480
|
+
end
|
481
|
+
|
482
|
+
g = lr0_goto(c[j],t)
|
483
|
+
j = @lr0_cidhash[g.hash] || -1
|
484
|
+
end
|
485
|
+
|
486
|
+
c[j].each do |r|
|
487
|
+
next unless r.name == p.name
|
488
|
+
next unless r.length == p.length
|
489
|
+
i = 0
|
490
|
+
escaped = false
|
491
|
+
while i < r.lr_index
|
492
|
+
unless r.prod[i] == p.prod[i+1]
|
493
|
+
escaped = true
|
494
|
+
break
|
495
|
+
end
|
496
|
+
i = i + 1
|
497
|
+
end
|
498
|
+
lookb << [j,r] unless escaped
|
499
|
+
end
|
500
|
+
end
|
501
|
+
includes.each do |i|
|
502
|
+
includedict[i] = [] unless includedict[i]
|
503
|
+
includedict[i] << [state, n]
|
504
|
+
end
|
505
|
+
lookdict[[state,n]] = lookb
|
506
|
+
end
|
507
|
+
|
508
|
+
[lookdict, includedict]
|
509
|
+
end
|
510
|
+
|
511
|
+
def compute_follow_sets(ntrans, readsets, inclsets)
|
512
|
+
fp = lambda { |x| readsets[x] }
|
513
|
+
r = lambda { |x| inclsets[x] || [] }
|
514
|
+
digraph(ntrans, r, fp)
|
515
|
+
end
|
516
|
+
|
517
|
+
def add_lookaheads(lookbacks, followset)
|
518
|
+
lookbacks.each do |trans, lb|
|
519
|
+
lb.each do |state, p|
|
520
|
+
p.lookaheads[state] = [] unless p.lookaheads[state]
|
521
|
+
f = followset[trans] || []
|
522
|
+
f.each do |a|
|
523
|
+
p.lookaheads[state] << a unless p.lookaheads[state].include?(a)
|
524
|
+
end
|
525
|
+
end
|
526
|
+
end
|
527
|
+
end
|
528
|
+
end
|
529
|
+
end
|