rly 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +2 -2
- data/assets/ply_dump.erb +15 -0
- data/lib/rly.rb +2 -0
- data/lib/rly/lex.rb +54 -25
- data/lib/rly/lex_token.rb +8 -0
- data/lib/rly/parse/grammar.rb +211 -0
- data/lib/rly/parse/lr_item.rb +32 -0
- data/lib/rly/parse/lr_table.rb +529 -0
- data/lib/rly/parse/ply_dump.rb +52 -0
- data/lib/rly/parse/production.rb +38 -0
- data/lib/rly/parse/rule_parser.rb +68 -0
- data/lib/rly/parse/yacc_production.rb +11 -0
- data/lib/rly/parse/yacc_symbol.rb +6 -0
- data/lib/rly/version.rb +2 -1
- data/lib/rly/yacc.rb +355 -0
- data/spec/lex/{lexer_spec.rb → lex_spec.rb} +45 -24
- data/spec/parse/calc_spec.rb +95 -0
- data/spec/parse/grammar_spec.rb +239 -0
- data/spec/parse/lr_table_spec.rb +212 -0
- data/spec/parse/production_spec.rb +18 -0
- data/spec/parse/rule_parser_spec.rb +20 -0
- data/spec/parse/yacc_spec.rb +57 -0
- data/spec/spec_helper.rb +5 -0
- metadata +26 -4
data/README.md
CHANGED
@@ -2,8 +2,8 @@
|
|
2
2
|
|
3
3
|
# Rly
|
4
4
|
|
5
|
-
Rly is a lexer and parser generator for ruby, based on ideas and solutions of
|
6
|
-
Python's [Ply](http://www.dabeaz.com/ply/).
|
5
|
+
Rly is a lexer and parser generator for ruby (O RLY?), based on ideas and solutions of
|
6
|
+
Python's [Ply](http://www.dabeaz.com/ply/) (in some places it's a total rip off actually).
|
7
7
|
|
8
8
|
## Installation
|
9
9
|
|
data/assets/ply_dump.erb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
Created by PLY version 3.4 (http://www.dabeaz.com/ply)
|
2
|
+
|
3
|
+
Grammar
|
4
|
+
|
5
|
+
<% for p in g.productions %>Rule <%= sprintf("%-5d", p.index) %> <%= p %>
|
6
|
+
<% end %>
|
7
|
+
Terminals, with rules where they appear
|
8
|
+
|
9
|
+
<% @t = g.terminals.keys.map {|k| k.to_s }.sort; for t in @t %><%= sprintf("%-20s : %s", t, (g.terminals[t] || g.terminals[t.to_sym]).join(' ')) %>
|
10
|
+
<% end %>
|
11
|
+
Nonterminals, with rules where they appear
|
12
|
+
|
13
|
+
<% @t = g.nonterminals.keys.sort; for t in @t %><%= sprintf("%-20s : %s", t, g.nonterminals[t].join(' ')) %>
|
14
|
+
<% end %>
|
15
|
+
<%= backlog %>
|
data/lib/rly.rb
CHANGED
data/lib/rly/lex.rb
CHANGED
@@ -12,10 +12,10 @@ module Rly
|
|
12
12
|
# configuration (check the methods documentation for details).
|
13
13
|
#
|
14
14
|
# Once you got your lexer configured, you can create its instances passing a
|
15
|
-
# String to be tokenized. You can then use
|
16
|
-
#
|
15
|
+
# String to be tokenized. You can then use {#next} method to get tokens. If you
|
16
|
+
# have more string to tokenize, you can append it to input buffer at any time with
|
17
|
+
# {#input}.
|
17
18
|
class Lex
|
18
|
-
include Enumerable
|
19
19
|
|
20
20
|
# Tracks the current line number for generated tokens
|
21
21
|
#
|
@@ -55,21 +55,46 @@ module Rly
|
|
55
55
|
# end
|
56
56
|
#
|
57
57
|
# lex = MyLexer.new("hello WORLD")
|
58
|
-
# lex.
|
59
|
-
#
|
60
|
-
#
|
61
|
-
#
|
58
|
+
# t = lex.next
|
59
|
+
# puts "#{tok.type} -> #{tok.value}" #=> "LOWERS -> hello"
|
60
|
+
# t = lex.next
|
61
|
+
# puts "#{tok.type} -> #{tok.value}" #=> "UPPERS -> WORLD"
|
62
|
+
# t = lex.next # => nil
|
62
63
|
def initialize(input="")
|
63
64
|
@input = input
|
64
65
|
@pos = 0
|
65
66
|
@lineno = 0
|
66
67
|
end
|
67
68
|
|
69
|
+
def inspect
|
70
|
+
"#<#{self.class} pos=#{@pos} len=#{@input.length} lineno=#{@lineno}>"
|
71
|
+
end
|
72
|
+
|
73
|
+
# Appends string to input buffer
|
74
|
+
#
|
75
|
+
# The given string is appended to input buffer, further {#next} calls will
|
76
|
+
# tokenize it as usual.
|
77
|
+
#
|
78
|
+
# @api public
|
79
|
+
#
|
80
|
+
# @example
|
81
|
+
# lex = MyLexer.new("hello")
|
82
|
+
#
|
83
|
+
# t = lex.next
|
84
|
+
# puts "#{tok.type} -> #{tok.value}" #=> "LOWERS -> hello"
|
85
|
+
# t = lex.next # => nil
|
86
|
+
# lex.input("WORLD")
|
87
|
+
# t = lex.next
|
88
|
+
# puts "#{tok.type} -> #{tok.value}" #=> "UPPERS -> WORLD"
|
89
|
+
# t = lex.next # => nil
|
90
|
+
def input(input)
|
91
|
+
@input << input
|
92
|
+
end
|
93
|
+
|
68
94
|
# Processes the next token in input
|
69
95
|
#
|
70
|
-
# This is the main interface to lexer.
|
71
|
-
#
|
72
|
-
# returns an Enumerator object.
|
96
|
+
# This is the main interface to lexer. It returns next available token or **nil**
|
97
|
+
# if there are no more tokens available in the input string.
|
73
98
|
#
|
74
99
|
# {#each} Raises {LexError} if the input cannot be processed. This happens if
|
75
100
|
# there were no matches by 'token' rules and no matches by 'literals' rule.
|
@@ -78,23 +103,19 @@ module Rly
|
|
78
103
|
# after returning from error handler is still unchanged.
|
79
104
|
#
|
80
105
|
# @api public
|
81
|
-
# @yieldparam tok [LexToken] next processed token
|
82
106
|
# @raise [LexError] if the input cannot be processed
|
83
|
-
# @return [
|
84
|
-
# @return [nil] if
|
107
|
+
# @return [LexToken] if the next chunk of input was processed successfully
|
108
|
+
# @return [nil] if there are no more tokens available in input
|
85
109
|
#
|
86
110
|
# @example
|
87
111
|
# lex = MyLexer.new("hello WORLD")
|
88
112
|
#
|
89
|
-
# lex.
|
90
|
-
#
|
91
|
-
# lex.
|
92
|
-
#
|
93
|
-
#
|
94
|
-
|
95
|
-
def each
|
96
|
-
return self.to_enum unless block_given?
|
97
|
-
|
113
|
+
# t = lex.next
|
114
|
+
# puts "#{tok.type} -> #{tok.value}" #=> "LOWERS -> hello"
|
115
|
+
# t = lex.next
|
116
|
+
# puts "#{tok.type} -> #{tok.value}" #=> "UPPERS -> WORLD"
|
117
|
+
# t = lex.next # => nil
|
118
|
+
def next
|
98
119
|
while @pos < @input.length
|
99
120
|
if self.class.ignores_list[@input[@pos]]
|
100
121
|
@pos += 1
|
@@ -105,15 +126,17 @@ module Rly
|
|
105
126
|
self.class.tokens.each do |type, rule, block|
|
106
127
|
m = rule.match(@input, @pos)
|
107
128
|
next unless m
|
129
|
+
next unless m.begin(0) == @pos
|
108
130
|
|
109
131
|
tok = LexToken.new(type, m[0], self)
|
110
132
|
|
111
133
|
matched = true
|
112
134
|
|
113
135
|
tok = block.call(tok) if block
|
114
|
-
yield tok if tok.type
|
115
136
|
|
116
137
|
@pos = m.end(0)
|
138
|
+
|
139
|
+
return tok if tok.type
|
117
140
|
end
|
118
141
|
|
119
142
|
unless matched
|
@@ -121,8 +144,10 @@ module Rly
|
|
121
144
|
tok = LexToken.new(@input[@pos], @input[@pos], self)
|
122
145
|
|
123
146
|
matched = true
|
124
|
-
|
147
|
+
|
125
148
|
@pos += 1
|
149
|
+
|
150
|
+
return tok
|
126
151
|
end
|
127
152
|
end
|
128
153
|
|
@@ -134,16 +159,20 @@ module Rly
|
|
134
159
|
if pos == @pos
|
135
160
|
raise LexError.new("Illegal character '#{@input[@pos]}' at index #{@pos}")
|
136
161
|
else
|
137
|
-
|
162
|
+
return tok if tok && tok.type
|
138
163
|
end
|
139
164
|
else
|
140
165
|
raise LexError.new("Illegal character '#{@input[@pos]}' at index #{@pos}")
|
141
166
|
end
|
142
167
|
end
|
143
168
|
end
|
169
|
+
return nil
|
144
170
|
end
|
145
171
|
|
146
172
|
class << self
|
173
|
+
def terminals
|
174
|
+
self.tokens.map { |t,r,b| t }.compact + self.literals_list.chars.to_a
|
175
|
+
end
|
147
176
|
# Returns the list of registered tokens
|
148
177
|
#
|
149
178
|
# @api private
|
data/lib/rly/lex_token.rb
CHANGED
@@ -0,0 +1,211 @@
|
|
1
|
+
require "rly/parse/production"
|
2
|
+
require "rly/parse/lr_item"
|
3
|
+
|
4
|
+
module Rly
|
5
|
+
class Grammar
|
6
|
+
attr_reader :terminals, :nonterminals, :productions, :prodnames, :start, :precedence
|
7
|
+
|
8
|
+
def initialize(terminals)
|
9
|
+
@productions = [nil]
|
10
|
+
@prodnames = {}
|
11
|
+
@prodmap = {}
|
12
|
+
|
13
|
+
@terminals = {}
|
14
|
+
terminals.each do |t|
|
15
|
+
raise ArgumentError unless t.upcase == t
|
16
|
+
@terminals[t] = []
|
17
|
+
end
|
18
|
+
@terminals[:error] = []
|
19
|
+
|
20
|
+
@nonterminals = {}
|
21
|
+
@first = {}
|
22
|
+
@follow = {}
|
23
|
+
@precedence = {}
|
24
|
+
@used_precedence = {}
|
25
|
+
@start = nil
|
26
|
+
end
|
27
|
+
|
28
|
+
def add_production(name, symbols, &block)
|
29
|
+
raise ArgumentError unless name.downcase == name
|
30
|
+
raise ArgumentError if name == :error
|
31
|
+
|
32
|
+
symbols.each do |sym|
|
33
|
+
if sym.is_a?(String)
|
34
|
+
raise ArgumentError unless sym.length == 1
|
35
|
+
@terminals[sym] = [] unless @terminals[sym]
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
precedence = prec_for_rightmost_terminal(symbols)
|
40
|
+
|
41
|
+
mapname = "#{name.to_s} -> #{symbols.to_s}"
|
42
|
+
raise ArgumentError if @prodmap[mapname]
|
43
|
+
|
44
|
+
index = @productions.count
|
45
|
+
@nonterminals[name] = [] unless @nonterminals[name]
|
46
|
+
|
47
|
+
symbols.each do |sym|
|
48
|
+
if @terminals[sym]
|
49
|
+
@terminals[sym] << index
|
50
|
+
else
|
51
|
+
@nonterminals[sym] = [] unless @nonterminals[sym]
|
52
|
+
@nonterminals[sym] << index
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
p = Production.new(index, name, symbols, precedence, block)
|
57
|
+
|
58
|
+
@productions << p
|
59
|
+
@prodmap[mapname] = p
|
60
|
+
|
61
|
+
@prodnames[name] = [] unless @prodnames[name]
|
62
|
+
@prodnames[name] << p
|
63
|
+
|
64
|
+
p
|
65
|
+
end
|
66
|
+
|
67
|
+
def set_precedence(term, assoc, level)
|
68
|
+
raise RuntimeError if @productions != [nil]
|
69
|
+
raise ArgumentError if @precedence[term]
|
70
|
+
raise ArgumentError unless [:left, :right, :noassoc].include?(assoc)
|
71
|
+
|
72
|
+
@precedence[term] = [assoc, level]
|
73
|
+
end
|
74
|
+
|
75
|
+
def set_start(symbol=nil)
|
76
|
+
symbol = @productions[1].name unless symbol
|
77
|
+
raise ArgumentError unless @nonterminals[symbol]
|
78
|
+
@productions[0] = Production.new(0, :"S'", [symbol])
|
79
|
+
@nonterminals[symbol] << 0
|
80
|
+
@start = symbol
|
81
|
+
end
|
82
|
+
|
83
|
+
def build_lritems
|
84
|
+
@productions.each do |p|
|
85
|
+
lastlri = p
|
86
|
+
i = 0
|
87
|
+
lr_items = []
|
88
|
+
while true do
|
89
|
+
if i > p.length
|
90
|
+
lri = nil
|
91
|
+
else
|
92
|
+
lri = LRItem.new(p,i)
|
93
|
+
lri.lr_after = @prodnames[lri.prod[i+1]] || []
|
94
|
+
lri.lr_before = lri.prod[i-1] || nil
|
95
|
+
end
|
96
|
+
|
97
|
+
lastlri.lr_next = lri
|
98
|
+
break unless lri
|
99
|
+
lr_items << lri
|
100
|
+
lastlri = lri
|
101
|
+
i += 1
|
102
|
+
end
|
103
|
+
p.lr_items = lr_items
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def compute_first
|
108
|
+
return @first unless @first.empty?
|
109
|
+
|
110
|
+
@terminals.keys.each { |t| @first[t] = [t] }
|
111
|
+
@first[:'$end'] = [:'$end']
|
112
|
+
@nonterminals.keys.each { |n| @first[n] = [] }
|
113
|
+
while true
|
114
|
+
any_changes = false
|
115
|
+
nonterminals.keys.each do |n|
|
116
|
+
@prodnames[n].each do |p|
|
117
|
+
_first(p.prod).each do |f|
|
118
|
+
unless @first[n].include?(f)
|
119
|
+
@first[n] << f
|
120
|
+
any_changes = true
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
125
|
+
break unless any_changes
|
126
|
+
end
|
127
|
+
|
128
|
+
@first
|
129
|
+
end
|
130
|
+
|
131
|
+
def compute_follow(start=nil)
|
132
|
+
return @follow unless @follow.empty?
|
133
|
+
|
134
|
+
compute_first if @first.empty?
|
135
|
+
|
136
|
+
@nonterminals.keys.each { |n| @follow[n] = [] }
|
137
|
+
|
138
|
+
start = @productions[1].name unless start
|
139
|
+
|
140
|
+
@follow[start] = [:'$end']
|
141
|
+
|
142
|
+
while true
|
143
|
+
didadd = false
|
144
|
+
@productions[1..-1].each do |p|
|
145
|
+
p.prod.length.times do |i|
|
146
|
+
b = p.prod[i]
|
147
|
+
next unless @nonterminals.include?(b)
|
148
|
+
|
149
|
+
fst = _first(p.prod[i+1..-1])
|
150
|
+
hasempty = false
|
151
|
+
fst.each do |f|
|
152
|
+
if f != :'<empty>' && !@follow[b].include?(f)
|
153
|
+
@follow[b] << f
|
154
|
+
didadd = true
|
155
|
+
end
|
156
|
+
hasempty = true if f == :'<empty>'
|
157
|
+
end
|
158
|
+
if hasempty || i == p.prod.length - 1
|
159
|
+
@follow[p.name].each do |f|
|
160
|
+
unless @follow[b].include?(f)
|
161
|
+
@follow[b] << f
|
162
|
+
didadd = true
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
168
|
+
break unless didadd
|
169
|
+
end
|
170
|
+
|
171
|
+
@follow
|
172
|
+
end
|
173
|
+
|
174
|
+
private
|
175
|
+
def _first(beta)
|
176
|
+
result = []
|
177
|
+
should_add_empty = true
|
178
|
+
|
179
|
+
beta.each do |x|
|
180
|
+
x_produces_empty = false
|
181
|
+
|
182
|
+
@first[x].each do |f|
|
183
|
+
if f == :'<empty>'
|
184
|
+
x_produces_empty = true
|
185
|
+
else
|
186
|
+
result << f unless result.include?(f)
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
if x_produces_empty
|
191
|
+
next
|
192
|
+
else
|
193
|
+
should_add_empty = false
|
194
|
+
break
|
195
|
+
end
|
196
|
+
end
|
197
|
+
result << :'<empty>' if should_add_empty
|
198
|
+
|
199
|
+
result
|
200
|
+
end
|
201
|
+
|
202
|
+
def prec_for_rightmost_terminal(symbols)
|
203
|
+
symbols.reverse_each do |sym|
|
204
|
+
next unless @terminals[sym]
|
205
|
+
|
206
|
+
return @precedence[sym] || [:right, 0]
|
207
|
+
end
|
208
|
+
[:right, 0]
|
209
|
+
end
|
210
|
+
end
|
211
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module Rly
|
2
|
+
class LRItem
|
3
|
+
attr_accessor :lr_after, :lr_before, :lr_next
|
4
|
+
attr_reader :prod, :name, :usyms, :lr_index, :length, :lookaheads, :index
|
5
|
+
|
6
|
+
def initialize(p, n)
|
7
|
+
@name = p.name
|
8
|
+
@prod = p.prod.dup
|
9
|
+
@index = p.index
|
10
|
+
@lr_index = n
|
11
|
+
@lookaheads = {}
|
12
|
+
@prod.insert(n, :'.')
|
13
|
+
@length = @prod.length
|
14
|
+
@usyms = p.usyms
|
15
|
+
|
16
|
+
@lr_items = []
|
17
|
+
@lr_next = nil
|
18
|
+
end
|
19
|
+
|
20
|
+
def to_s
|
21
|
+
if @prod
|
22
|
+
"#{@name} -> #{@prod.join(' ')}"
|
23
|
+
else
|
24
|
+
"#{@name} -> <empty>"
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def inspect
|
29
|
+
"#<LRItem #{to_s}>"
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,529 @@
|
|
1
|
+
require "set"
|
2
|
+
require "rly/parse/ply_dump"
|
3
|
+
|
4
|
+
module Rly
|
5
|
+
class LRTable
|
6
|
+
MAXINT = (2**(0.size * 8 -2) -1)
|
7
|
+
|
8
|
+
attr_reader :lr_action, :lr_goto, :lr_productions
|
9
|
+
|
10
|
+
def initialize(grammar, method=:LALR)
|
11
|
+
raise ArgumentError unless [:LALR, :SLR].include?(method)
|
12
|
+
|
13
|
+
@grammar = grammar
|
14
|
+
@lr_method = method
|
15
|
+
|
16
|
+
@lr_action = {}
|
17
|
+
@lr_goto = {}
|
18
|
+
@lr_productions = grammar.productions
|
19
|
+
@lr_goto_cache = {}
|
20
|
+
@lr0_cidhash = {}
|
21
|
+
|
22
|
+
@add_count = 0
|
23
|
+
|
24
|
+
@sr_conflict = 0
|
25
|
+
@rr_conflict = 0
|
26
|
+
@conflicts = []
|
27
|
+
|
28
|
+
@sr_conflicts = []
|
29
|
+
@rr_conflicts = []
|
30
|
+
|
31
|
+
grammar.build_lritems
|
32
|
+
grammar.compute_first
|
33
|
+
grammar.compute_follow
|
34
|
+
end
|
35
|
+
|
36
|
+
def parse_table(log=PlyDump.stub)
|
37
|
+
productions = @grammar.productions
|
38
|
+
precedence = @grammar.precedence
|
39
|
+
|
40
|
+
actionp = {}
|
41
|
+
|
42
|
+
log.info("Parsing method: %s", @lr_method)
|
43
|
+
|
44
|
+
c = lr0_items
|
45
|
+
|
46
|
+
add_lalr_lookaheads(c) if @lr_method == :LALR
|
47
|
+
|
48
|
+
# Build the parser table, state by state
|
49
|
+
st = 0
|
50
|
+
c.each do |i|
|
51
|
+
# Loop over each production in I
|
52
|
+
actlist = [] # List of actions
|
53
|
+
st_action = {}
|
54
|
+
st_actionp = {}
|
55
|
+
st_goto = {}
|
56
|
+
log.info("")
|
57
|
+
log.info("state %d", st)
|
58
|
+
log.info("")
|
59
|
+
i.each { |p| log.info(" (%d) %s", p.index, p.to_s) }
|
60
|
+
log.info("")
|
61
|
+
|
62
|
+
i.each do |p|
|
63
|
+
if p.length == p.lr_index + 1
|
64
|
+
if p.name == :"S'"
|
65
|
+
# Start symbol. Accept!
|
66
|
+
st_action[:"$end"] = 0
|
67
|
+
st_actionp[:"$end"] = p
|
68
|
+
else
|
69
|
+
# We are at the end of a production. Reduce!
|
70
|
+
if @lr_method == :LALR
|
71
|
+
laheads = p.lookaheads[st]
|
72
|
+
else
|
73
|
+
laheads = @grammar.follow[p.name]
|
74
|
+
end
|
75
|
+
laheads.each do |a|
|
76
|
+
actlist << [a, p, sprintf("reduce using rule %d (%s)", p.index, p)]
|
77
|
+
r = st_action[a]
|
78
|
+
if r
|
79
|
+
# Whoa. Have a shift/reduce or reduce/reduce conflict
|
80
|
+
if r > 0
|
81
|
+
# Need to decide on shift or reduce here
|
82
|
+
# By default we favor shifting. Need to add
|
83
|
+
# some precedence rules here.
|
84
|
+
sprec, slevel = productions[st_actionp[a].number].prec
|
85
|
+
rprec, rlevel = precedence[a] || [:right, 0]
|
86
|
+
if (slevel < rlevel) || ((slevel == rlevel) && (rprec == :left))
|
87
|
+
# We really need to reduce here.
|
88
|
+
st_action[a] = -p.number
|
89
|
+
st_actionp[a] = p
|
90
|
+
if ! slevel && ! rlevel
|
91
|
+
log.info(" ! shift/reduce conflict for %s resolved as reduce",a)
|
92
|
+
@sr_conflicts << [st, a, 'reduce']
|
93
|
+
end
|
94
|
+
productions[p.number].reduced += 1
|
95
|
+
elsif (slevel == rlevel) && (rprec == :nonassoc)
|
96
|
+
st_action[a] = nil
|
97
|
+
else
|
98
|
+
# Hmmm. Guess we'll keep the shift
|
99
|
+
unless rlevel
|
100
|
+
log.info(" ! shift/reduce conflict for %s resolved as shift",a)
|
101
|
+
@sr_conflicts << [st,a,'shift']
|
102
|
+
end
|
103
|
+
end
|
104
|
+
elsif r < 0
|
105
|
+
# Reduce/reduce conflict. In this case, we favor the rule
|
106
|
+
# that was defined first in the grammar file
|
107
|
+
oldp = productions[-r]
|
108
|
+
pp = productions[p.number]
|
109
|
+
if oldp.line > pp.line
|
110
|
+
st_action[a] = -p.number
|
111
|
+
st_actionp[a] = p
|
112
|
+
chosenp = pp
|
113
|
+
rejectp = oldp
|
114
|
+
productions[p.number].reduced += 1
|
115
|
+
productions[oldp.number].reduced -= 1
|
116
|
+
else
|
117
|
+
chosenp,rejectp = oldp,pp
|
118
|
+
end
|
119
|
+
@rr_conflicts << [st, chosenp, rejectp]
|
120
|
+
log.info(" ! reduce/reduce conflict for %s resolved using rule %d (%s)", a, st_actionp[a].number, st_actionp[a])
|
121
|
+
else
|
122
|
+
raise RuntimeError("Unknown conflict in state #{st}")
|
123
|
+
end
|
124
|
+
else
|
125
|
+
st_action[a] = -p.index
|
126
|
+
st_actionp[a] = p
|
127
|
+
productions[p.index].reduced += 1
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
else # <-- level ok
|
132
|
+
# i = p.lr_index
|
133
|
+
a = p.prod[p.lr_index+1] # Get symbol right after the "."
|
134
|
+
if @grammar.terminals.include?(a)
|
135
|
+
g = lr0_goto(i, a)
|
136
|
+
j = @lr0_cidhash[g.hash] || -1
|
137
|
+
if j >= 0
|
138
|
+
# We are in a shift state
|
139
|
+
actlist << [a, p, sprintf("shift and go to state %d", j)]
|
140
|
+
r = st_action[a]
|
141
|
+
if r
|
142
|
+
# Whoa have a shift/reduce or shift/shift conflict
|
143
|
+
if r > 0
|
144
|
+
if r != j
|
145
|
+
raise RuntimeError("Shift/shift conflict in state #{st}")
|
146
|
+
end
|
147
|
+
elsif r < 0
|
148
|
+
# Do a precedence check.
|
149
|
+
# - if precedence of reduce rule is higher, we reduce.
|
150
|
+
# - if precedence of reduce is same and left assoc, we reduce.
|
151
|
+
# - otherwise we shift
|
152
|
+
rprec, rlevel = productions[st_actionp[a].index].precedence
|
153
|
+
sprec, slevel = precedence[a] || [:right, 0]
|
154
|
+
if (slevel > rlevel) || ((slevel == rlevel) && (rprec == :right))
|
155
|
+
# We decide to shift here... highest precedence to shift
|
156
|
+
productions[st_actionp[a].index].reduced -= 1
|
157
|
+
st_action[a] = j
|
158
|
+
st_actionp[a] = p
|
159
|
+
unless rlevel
|
160
|
+
log.info(" ! shift/reduce conflict for %s resolved as shift",a)
|
161
|
+
@sr_conflicts << [st, a, 'shift']
|
162
|
+
end
|
163
|
+
elsif (slevel == rlevel) && (rprec == :nonassoc)
|
164
|
+
st_action[a] = nil
|
165
|
+
else
|
166
|
+
# Hmmm. Guess we'll keep the reduce
|
167
|
+
if ! slevel && ! rlevel
|
168
|
+
log.info(" ! shift/reduce conflict for %s resolved as reduce",a)
|
169
|
+
@sr_conflicts << [st, a, 'reduce']
|
170
|
+
end
|
171
|
+
end
|
172
|
+
else
|
173
|
+
raise RuntimeError("Unknown conflict in state #{st}")
|
174
|
+
end
|
175
|
+
else
|
176
|
+
st_action[a] = j
|
177
|
+
st_actionp[a] = p
|
178
|
+
end
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
# Print the actions associated with each terminal
|
185
|
+
_actprint = {}
|
186
|
+
actlist.each do |a, p, m|
|
187
|
+
if st_action[a]
|
188
|
+
if p == st_actionp[a]
|
189
|
+
log.info(" %-15s %s",a,m)
|
190
|
+
_actprint[[a,m]] = 1
|
191
|
+
end
|
192
|
+
end
|
193
|
+
end
|
194
|
+
log.info("")
|
195
|
+
# Print the actions that were not used. (debugging)
|
196
|
+
not_used = false
|
197
|
+
actlist.each do |a, p, m|
|
198
|
+
if st_action[a]
|
199
|
+
unless p == st_actionp[a]
|
200
|
+
unless _actprint[[a,m]]
|
201
|
+
log.debug(" ! %-15s [ %s ]", a, m)
|
202
|
+
not_used = true
|
203
|
+
_actprint[[a,m]] = 1
|
204
|
+
end
|
205
|
+
end
|
206
|
+
end
|
207
|
+
end
|
208
|
+
log.debug("") if not_used
|
209
|
+
|
210
|
+
# Construct the goto table for this state
|
211
|
+
|
212
|
+
nkeys = {}
|
213
|
+
i.each do |ii|
|
214
|
+
ii.usyms.each do |s|
|
215
|
+
nkeys[s] = nil if @grammar.nonterminals.include?(s)
|
216
|
+
end
|
217
|
+
end
|
218
|
+
nkeys.each do |n, _|
|
219
|
+
g = lr0_goto(i, n)
|
220
|
+
j = @lr0_cidhash[g.hash] || -1
|
221
|
+
if j >= 0
|
222
|
+
st_goto[n] = j
|
223
|
+
log.info(" %-30s shift and go to state %d",n,j)
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
@lr_action[st] = st_action
|
228
|
+
actionp[st] = st_actionp
|
229
|
+
@lr_goto[st] = st_goto
|
230
|
+
st += 1
|
231
|
+
end
|
232
|
+
end
|
233
|
+
|
234
|
+
private
|
235
|
+
def add_lalr_lookaheads(c)
|
236
|
+
nullable = compute_nullable_nonterminals
|
237
|
+
trans = find_nonterminal_transitions(c)
|
238
|
+
readsets = compute_read_sets(c, trans, nullable)
|
239
|
+
lookd, included = compute_lookback_includes(c, trans, nullable)
|
240
|
+
followsets = compute_follow_sets(trans, readsets, included)
|
241
|
+
add_lookaheads(lookd, followsets)
|
242
|
+
end
|
243
|
+
|
244
|
+
def lr0_closure(i)
|
245
|
+
@add_count += 1
|
246
|
+
|
247
|
+
# Add everything in I to J
|
248
|
+
j = i.dup
|
249
|
+
didadd = true
|
250
|
+
while didadd
|
251
|
+
didadd = false
|
252
|
+
j.each do |k|
|
253
|
+
k.lr_after.each do |x|
|
254
|
+
next if x.lr0_added == @add_count
|
255
|
+
# Add B --> .G to J
|
256
|
+
j << x.lr_next
|
257
|
+
x.lr0_added = @add_count
|
258
|
+
didadd = true
|
259
|
+
end
|
260
|
+
end
|
261
|
+
end
|
262
|
+
j
|
263
|
+
end
|
264
|
+
|
265
|
+
def lr0_goto(i, x)
|
266
|
+
g = @lr_goto_cache[[i.hash, x]]
|
267
|
+
return g if g
|
268
|
+
|
269
|
+
s = @lr_goto_cache[x]
|
270
|
+
unless s
|
271
|
+
s = {}
|
272
|
+
@lr_goto_cache[x] = s
|
273
|
+
end
|
274
|
+
|
275
|
+
gs = []
|
276
|
+
i.each do |p|
|
277
|
+
n = p.lr_next
|
278
|
+
if n and n.lr_before == x
|
279
|
+
s1 = s[n.hash]
|
280
|
+
unless s1
|
281
|
+
s1 = {}
|
282
|
+
s[n.hash] = s1
|
283
|
+
end
|
284
|
+
gs << n
|
285
|
+
s = s1
|
286
|
+
end
|
287
|
+
end
|
288
|
+
g = s[:'$end']
|
289
|
+
unless g
|
290
|
+
if gs
|
291
|
+
g = lr0_closure(gs)
|
292
|
+
s[:'$end'] = g
|
293
|
+
else
|
294
|
+
s[:'$end'] = gs
|
295
|
+
end
|
296
|
+
end
|
297
|
+
@lr_goto_cache[[i.hash,x]] = g
|
298
|
+
g
|
299
|
+
end
|
300
|
+
|
301
|
+
def lr0_items
|
302
|
+
c = [ lr0_closure([@grammar.productions[0].lr_next]) ]
|
303
|
+
|
304
|
+
c.each_with_index { |c_i, j| @lr0_cidhash[c_i.hash] = j }
|
305
|
+
|
306
|
+
i = 0
|
307
|
+
while i < c.length
|
308
|
+
c_i = c[i]
|
309
|
+
i += 1
|
310
|
+
|
311
|
+
asyms = Set.new
|
312
|
+
c_i.each { |ii| ii.usyms.each { |s| asyms << s } }
|
313
|
+
|
314
|
+
asyms.each do |x|
|
315
|
+
g = lr0_goto(c_i, x)
|
316
|
+
next if g.empty?
|
317
|
+
next if @lr0_cidhash[g.hash]
|
318
|
+
@lr0_cidhash[g.hash] = c.length
|
319
|
+
c << g
|
320
|
+
end
|
321
|
+
end
|
322
|
+
c
|
323
|
+
end
|
324
|
+
|
325
|
+
def compute_nullable_nonterminals
|
326
|
+
nullable = {}
|
327
|
+
num_nullable = 0
|
328
|
+
while true
|
329
|
+
@grammar.productions[1..-1].each do |p|
|
330
|
+
if p.length == 0
|
331
|
+
nullable[p.name] = 1
|
332
|
+
next
|
333
|
+
end
|
334
|
+
found_t = false
|
335
|
+
p.prod.each do |t|
|
336
|
+
unless nullable[t]
|
337
|
+
found_t = true
|
338
|
+
break
|
339
|
+
end
|
340
|
+
end
|
341
|
+
nullable[p.name] = 1 unless found_t
|
342
|
+
end
|
343
|
+
break if nullable.length == num_nullable
|
344
|
+
num_nullable = nullable.length
|
345
|
+
end
|
346
|
+
nullable
|
347
|
+
end
|
348
|
+
|
349
|
+
def find_nonterminal_transitions(c)
|
350
|
+
trans = []
|
351
|
+
c.each_with_index do |a, state|
|
352
|
+
a.each do |p|
|
353
|
+
if p.lr_index < p.length - 1
|
354
|
+
next_prod = p.prod[p.lr_index+1]
|
355
|
+
if @grammar.nonterminals[next_prod]
|
356
|
+
t = [state, next_prod]
|
357
|
+
trans << t unless trans.include?(t)
|
358
|
+
end
|
359
|
+
end
|
360
|
+
end
|
361
|
+
end
|
362
|
+
trans
|
363
|
+
end
|
364
|
+
|
365
|
+
def compute_read_sets(c, ntrans, nullable)
|
366
|
+
fp = lambda { |x| dr_relation(c, x, nullable) }
|
367
|
+
r = lambda { |x| reads_relation(c, x, nullable) }
|
368
|
+
digraph(ntrans, r, fp)
|
369
|
+
end
|
370
|
+
|
371
|
+
def dr_relation(c, trans, nullable)
|
372
|
+
dr_set = {}
|
373
|
+
state, n = trans
|
374
|
+
terms = []
|
375
|
+
|
376
|
+
g = lr0_goto(c[state], n)
|
377
|
+
g.each do |p|
|
378
|
+
if p.lr_index < p.length - 1
|
379
|
+
a = p.prod[p.lr_index+1]
|
380
|
+
if @grammar.terminals.include?(a)
|
381
|
+
terms << a unless terms.include?(a)
|
382
|
+
end
|
383
|
+
end
|
384
|
+
end
|
385
|
+
|
386
|
+
terms << :'$end' if state == 0 && n == @grammar.productions[0].prod[0]
|
387
|
+
|
388
|
+
terms
|
389
|
+
end
|
390
|
+
|
391
|
+
def reads_relation(c, trans, empty)
|
392
|
+
rel = []
|
393
|
+
state, n = trans
|
394
|
+
|
395
|
+
g = lr0_goto(c[state], n)
|
396
|
+
j = @lr0_cidhash[g.hash] || -1
|
397
|
+
g.each do |p|
|
398
|
+
if p.lr_index < p.length - 1
|
399
|
+
a = p.prod[p.lr_index + 1]
|
400
|
+
rel << [j, a] if empty.include?(a)
|
401
|
+
end
|
402
|
+
end
|
403
|
+
|
404
|
+
rel
|
405
|
+
end
|
406
|
+
|
407
|
+
def digraph(x, r, fp)
|
408
|
+
n = {}
|
409
|
+
x.each { |xx| n[xx] = 0 }
|
410
|
+
stack = []
|
411
|
+
f = {}
|
412
|
+
x.each do |xx|
|
413
|
+
traverse(xx, n, stack, f, x, r, fp) if n[xx] == 0
|
414
|
+
end
|
415
|
+
f
|
416
|
+
end
|
417
|
+
|
418
|
+
def traverse(xx, n, stack, f, x, r, fp)
|
419
|
+
stack.push(xx)
|
420
|
+
d = stack.length
|
421
|
+
n[xx] = d
|
422
|
+
f[xx] = fp.call(xx)
|
423
|
+
|
424
|
+
rel = r.call(xx)
|
425
|
+
rel.each do |y|
|
426
|
+
traverse(y, n, stack, f, x, r, fp) if n[y] == 0
|
427
|
+
|
428
|
+
n[xx] = [n[xx], n[y]].min
|
429
|
+
|
430
|
+
arr = f[y] || []
|
431
|
+
arr.each do |a|
|
432
|
+
f[xx] << a unless f[xx].include?(a)
|
433
|
+
end
|
434
|
+
end
|
435
|
+
if n[xx] == d
|
436
|
+
n[stack[-1]] = MAXINT
|
437
|
+
f[stack[-1]] = f[xx]
|
438
|
+
element = stack.pop()
|
439
|
+
while element != xx
|
440
|
+
n[stack[-1]] = MAXINT
|
441
|
+
f[stack[-1]] = f[xx]
|
442
|
+
element = stack.pop()
|
443
|
+
end
|
444
|
+
end
|
445
|
+
end
|
446
|
+
|
447
|
+
def compute_lookback_includes(c, trans, nullable)
|
448
|
+
lookdict = {}
|
449
|
+
includedict = {}
|
450
|
+
|
451
|
+
dtrans = trans.each_with_object({}) { |k, h| h[k] = 1 }
|
452
|
+
|
453
|
+
trans.each do |state, n|
|
454
|
+
lookb = []
|
455
|
+
includes = []
|
456
|
+
c[state].each do |p|
|
457
|
+
next unless p.name == n
|
458
|
+
|
459
|
+
lr_index = p.lr_index
|
460
|
+
j = state
|
461
|
+
while lr_index < p.length - 1
|
462
|
+
lr_index = lr_index + 1
|
463
|
+
t = p.prod[lr_index]
|
464
|
+
|
465
|
+
if dtrans.include?([j,t])
|
466
|
+
li = lr_index + 1
|
467
|
+
escaped = false
|
468
|
+
while li < p.length
|
469
|
+
if @grammar.terminals[p.prod[li]]
|
470
|
+
escaped = true
|
471
|
+
break
|
472
|
+
end
|
473
|
+
unless nullable[p.prod[li]]
|
474
|
+
escaped = true
|
475
|
+
break
|
476
|
+
end
|
477
|
+
li = li + 1
|
478
|
+
end
|
479
|
+
includes << [j,t] unless escaped
|
480
|
+
end
|
481
|
+
|
482
|
+
g = lr0_goto(c[j],t)
|
483
|
+
j = @lr0_cidhash[g.hash] || -1
|
484
|
+
end
|
485
|
+
|
486
|
+
c[j].each do |r|
|
487
|
+
next unless r.name == p.name
|
488
|
+
next unless r.length == p.length
|
489
|
+
i = 0
|
490
|
+
escaped = false
|
491
|
+
while i < r.lr_index
|
492
|
+
unless r.prod[i] == p.prod[i+1]
|
493
|
+
escaped = true
|
494
|
+
break
|
495
|
+
end
|
496
|
+
i = i + 1
|
497
|
+
end
|
498
|
+
lookb << [j,r] unless escaped
|
499
|
+
end
|
500
|
+
end
|
501
|
+
includes.each do |i|
|
502
|
+
includedict[i] = [] unless includedict[i]
|
503
|
+
includedict[i] << [state, n]
|
504
|
+
end
|
505
|
+
lookdict[[state,n]] = lookb
|
506
|
+
end
|
507
|
+
|
508
|
+
[lookdict, includedict]
|
509
|
+
end
|
510
|
+
|
511
|
+
def compute_follow_sets(ntrans, readsets, inclsets)
|
512
|
+
fp = lambda { |x| readsets[x] }
|
513
|
+
r = lambda { |x| inclsets[x] || [] }
|
514
|
+
digraph(ntrans, r, fp)
|
515
|
+
end
|
516
|
+
|
517
|
+
def add_lookaheads(lookbacks, followset)
|
518
|
+
lookbacks.each do |trans, lb|
|
519
|
+
lb.each do |state, p|
|
520
|
+
p.lookaheads[state] = [] unless p.lookaheads[state]
|
521
|
+
f = followset[trans] || []
|
522
|
+
f.each do |a|
|
523
|
+
p.lookaheads[state] << a unless p.lookaheads[state].include?(a)
|
524
|
+
end
|
525
|
+
end
|
526
|
+
end
|
527
|
+
end
|
528
|
+
end
|
529
|
+
end
|