rly 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +2 -2
- data/assets/ply_dump.erb +15 -0
- data/lib/rly.rb +2 -0
- data/lib/rly/lex.rb +54 -25
- data/lib/rly/lex_token.rb +8 -0
- data/lib/rly/parse/grammar.rb +211 -0
- data/lib/rly/parse/lr_item.rb +32 -0
- data/lib/rly/parse/lr_table.rb +529 -0
- data/lib/rly/parse/ply_dump.rb +52 -0
- data/lib/rly/parse/production.rb +38 -0
- data/lib/rly/parse/rule_parser.rb +68 -0
- data/lib/rly/parse/yacc_production.rb +11 -0
- data/lib/rly/parse/yacc_symbol.rb +6 -0
- data/lib/rly/version.rb +2 -1
- data/lib/rly/yacc.rb +355 -0
- data/spec/lex/{lexer_spec.rb → lex_spec.rb} +45 -24
- data/spec/parse/calc_spec.rb +95 -0
- data/spec/parse/grammar_spec.rb +239 -0
- data/spec/parse/lr_table_spec.rb +212 -0
- data/spec/parse/production_spec.rb +18 -0
- data/spec/parse/rule_parser_spec.rb +20 -0
- data/spec/parse/yacc_spec.rb +57 -0
- data/spec/spec_helper.rb +5 -0
- metadata +26 -4
@@ -0,0 +1,52 @@
|
|
1
|
+
require "rly/parse/lr_table"
|
2
|
+
|
3
|
+
module Rly
|
4
|
+
|
5
|
+
class PlyDump
|
6
|
+
attr_reader :backlog
|
7
|
+
|
8
|
+
def initialize(grammar)
|
9
|
+
@grammar = grammar
|
10
|
+
@backlog = ""
|
11
|
+
if grammar
|
12
|
+
@t = Rly::LRTable.new(grammar)
|
13
|
+
@t.parse_table(self)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def to_s
|
18
|
+
fn = File.join(File.dirname(__FILE__), '..', '..', '..', 'assets', 'ply_dump.erb')
|
19
|
+
e = ERB.new(open(fn).read)
|
20
|
+
e.result(TinyContext.new(g: @grammar, backlog: @backlog).get_binding)
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.stub
|
24
|
+
PlyDump.new(nil)
|
25
|
+
end
|
26
|
+
|
27
|
+
def info(*args)
|
28
|
+
s = sprintf(*args)
|
29
|
+
@backlog += s + "\n"
|
30
|
+
end
|
31
|
+
|
32
|
+
def debug(*args)
|
33
|
+
s = sprintf(*args)
|
34
|
+
@backlog += s + "\n"
|
35
|
+
end
|
36
|
+
|
37
|
+
class TinyContext
|
38
|
+
def initialize(ctx)
|
39
|
+
@ctx = ctx
|
40
|
+
end
|
41
|
+
|
42
|
+
def get_binding
|
43
|
+
binding()
|
44
|
+
end
|
45
|
+
|
46
|
+
def method_missing(m)
|
47
|
+
@ctx[m]
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module Rly
|
2
|
+
class Production
|
3
|
+
attr_reader :index, :name, :prod, :precedence, :block, :usyms, :line
|
4
|
+
attr_accessor :lr_items, :lr_next, :lr0_added, :reduced
|
5
|
+
|
6
|
+
# FIXME line!!!
|
7
|
+
def initialize(index, name, prod, precedence=[:right, 0], block=nil)
|
8
|
+
@index = index
|
9
|
+
@name = name
|
10
|
+
@prod = prod
|
11
|
+
@precedence = precedence
|
12
|
+
@block = block
|
13
|
+
|
14
|
+
@usyms = []
|
15
|
+
prod.each { |sym| @usyms << sym unless @usyms.include?(sym) }
|
16
|
+
end
|
17
|
+
|
18
|
+
def to_s
|
19
|
+
"#{name} -> #{@prod.map { |s| s.to_s }.join(' ')}"
|
20
|
+
end
|
21
|
+
|
22
|
+
def inspect
|
23
|
+
"#<Production #{to_s}>"
|
24
|
+
end
|
25
|
+
|
26
|
+
def length
|
27
|
+
@prod.length
|
28
|
+
end
|
29
|
+
|
30
|
+
def lr0_added
|
31
|
+
@lr0_added ||= 0
|
32
|
+
end
|
33
|
+
|
34
|
+
def reduced
|
35
|
+
@reduced ||= 0
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
require "rly/lex"
|
2
|
+
require "rly/parse/grammar"
|
3
|
+
require "rly/parse/lr_table"
|
4
|
+
|
5
|
+
module Rly
|
6
|
+
class RuleParser < Yacc
|
7
|
+
def self.lexer_class
|
8
|
+
return @lexer_class if @lexer_class
|
9
|
+
|
10
|
+
@lexer_class = Class.new(Lex) do
|
11
|
+
token :ID, /[a-zA-Z_][a-zA-Z_0-9]*/
|
12
|
+
token :LITERAL, /"."|'.'/ do |t|
|
13
|
+
t.value = t.value[1]
|
14
|
+
t
|
15
|
+
end
|
16
|
+
literals ":|"
|
17
|
+
ignore " \t\n"
|
18
|
+
end
|
19
|
+
|
20
|
+
@lexer_class
|
21
|
+
end
|
22
|
+
|
23
|
+
def grammar
|
24
|
+
return @grammar if @grammar
|
25
|
+
|
26
|
+
@grammar = Grammar.new(self.class.lexer_class.terminals)
|
27
|
+
|
28
|
+
@grammar.add_production(:grammar, [:ID, ':', :rules]) do |g, pname, _, r|
|
29
|
+
productions = []
|
30
|
+
r.value.each do |p|
|
31
|
+
productions << [pname.value.to_sym, p]
|
32
|
+
end
|
33
|
+
g.value = productions
|
34
|
+
end
|
35
|
+
@grammar.add_production(:rules, [:rule, '|', :rules]) do |rls, r, _, rl|
|
36
|
+
rls.value = [r.value] + rl.value
|
37
|
+
end
|
38
|
+
@grammar.add_production(:rules, [:rule]) do |rl, r|
|
39
|
+
rl.value = [r.value]
|
40
|
+
end
|
41
|
+
@grammar.add_production(:rule, [:tokens]) do |r, tok|
|
42
|
+
r.value = tok.value
|
43
|
+
end
|
44
|
+
@grammar.add_production(:tokens, [:ID, :tokens]) do |t, i, toks|
|
45
|
+
t.value = [i.value.to_sym] + toks.value
|
46
|
+
end
|
47
|
+
@grammar.add_production(:tokens, [:LITERAL, :tokens]) do |t, l, toks|
|
48
|
+
t.value = [l.value] + toks.value
|
49
|
+
end
|
50
|
+
@grammar.add_production(:tokens, [:ID]) do |t, i|
|
51
|
+
t.value = [i.value.to_sym]
|
52
|
+
end
|
53
|
+
@grammar.add_production(:tokens, [:LITERAL]) do |t, l|
|
54
|
+
t.value = [l.value]
|
55
|
+
end
|
56
|
+
|
57
|
+
@grammar.set_start
|
58
|
+
|
59
|
+
@grammar.build_lritems
|
60
|
+
|
61
|
+
@lr_table = LRTable.new(@grammar)
|
62
|
+
|
63
|
+
@lr_table.parse_table
|
64
|
+
|
65
|
+
@grammar
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
data/lib/rly/version.rb
CHANGED
data/lib/rly/yacc.rb
ADDED
@@ -0,0 +1,355 @@
|
|
1
|
+
require "rly/lex"
|
2
|
+
require "rly/parse/grammar"
|
3
|
+
require "rly/parse/yacc_production"
|
4
|
+
require "rly/parse/yacc_symbol"
|
5
|
+
|
6
|
+
module Rly
|
7
|
+
class YaccError < RuntimeError; end
|
8
|
+
|
9
|
+
class Yacc
|
10
|
+
attr_reader :lex, :grammar, :lr_table
|
11
|
+
|
12
|
+
def initialize(lex=nil)
|
13
|
+
raise ArgumentError.new("No lexer available") if lex == nil && self.class.lexer_class == nil
|
14
|
+
@lex = lex || self.class.lexer_class.new
|
15
|
+
|
16
|
+
@grammar = grammar
|
17
|
+
end
|
18
|
+
|
19
|
+
def parse(input=nil)
|
20
|
+
lookahead = nil
|
21
|
+
lookaheadstack = []
|
22
|
+
actions = @lr_table.lr_action
|
23
|
+
goto = @lr_table.lr_goto
|
24
|
+
prod = @lr_table.lr_productions
|
25
|
+
pslice = YaccProduction.new(nil)
|
26
|
+
errorcount = 0
|
27
|
+
|
28
|
+
# Set up the lexer and parser objects on pslice
|
29
|
+
pslice.lexer = @lex
|
30
|
+
pslice.parser = self
|
31
|
+
|
32
|
+
# If input was supplied, pass to lexer
|
33
|
+
@lex.input(input) if input
|
34
|
+
|
35
|
+
# Set up the state and symbol stacks
|
36
|
+
@statestack = []
|
37
|
+
@symstack = []
|
38
|
+
|
39
|
+
pslice.stack = @symstack
|
40
|
+
errtoken = nil
|
41
|
+
|
42
|
+
# The start state is assumed to be (0,$end)
|
43
|
+
@statestack.push(0)
|
44
|
+
sym = YaccSymbol.new
|
45
|
+
sym.type = :"$end"
|
46
|
+
@symstack.push(sym)
|
47
|
+
state = 0
|
48
|
+
|
49
|
+
while true
|
50
|
+
# Get the next symbol on the input. If a lookahead symbol
|
51
|
+
# is already set, we just use that. Otherwise, we'll pull
|
52
|
+
# the next token off of the lookaheadstack or from the lexer
|
53
|
+
|
54
|
+
# DBG # puts "State: #{state}"
|
55
|
+
|
56
|
+
unless lookahead
|
57
|
+
if lookaheadstack.empty?
|
58
|
+
lookahead = @lex.next
|
59
|
+
else
|
60
|
+
lookahead = lookaheadstack.pop
|
61
|
+
end
|
62
|
+
unless lookahead
|
63
|
+
lookahead = YaccSymbol.new()
|
64
|
+
lookahead.type = :"$end"
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
# Check the action table
|
69
|
+
ltype = lookahead.type
|
70
|
+
t = actions[state][ltype]
|
71
|
+
|
72
|
+
if t
|
73
|
+
if t > 0
|
74
|
+
# shift a symbol on the stack
|
75
|
+
@statestack.push(t)
|
76
|
+
state = t
|
77
|
+
|
78
|
+
# DBG # puts "Action : Shift and goto state #{t}"
|
79
|
+
|
80
|
+
@symstack.push(lookahead)
|
81
|
+
lookahead = nil
|
82
|
+
|
83
|
+
# Decrease error count on successful shift
|
84
|
+
errorcount -= 1 if errorcount > 0
|
85
|
+
next
|
86
|
+
end
|
87
|
+
|
88
|
+
if t < 0
|
89
|
+
# reduce a symbol on the stack, emit a production
|
90
|
+
p = prod[-t]
|
91
|
+
pname = p.name
|
92
|
+
plen = p.length
|
93
|
+
|
94
|
+
# Get production function
|
95
|
+
sym = YaccSymbol.new()
|
96
|
+
sym.type = pname
|
97
|
+
sym.value = nil
|
98
|
+
|
99
|
+
# DBG # if plen
|
100
|
+
# DBG # puts "Action : Reduce rule [#{p}] with [#{@symstack[-plen..@symstack.length].map{|s|s.value}.join(', ')}] and goto state #{-t}"
|
101
|
+
# DBG # else
|
102
|
+
# DBG # puts "Action : Reduce rule [#{p}] with [] and goto state #{-t}"
|
103
|
+
# DBG # end
|
104
|
+
|
105
|
+
if plen
|
106
|
+
targ = @symstack.pop(plen)
|
107
|
+
targ.insert(0, sym)
|
108
|
+
|
109
|
+
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
110
|
+
# The code enclosed in this section is duplicated
|
111
|
+
# below as a performance optimization. Make sure
|
112
|
+
# changes get made in both locations.
|
113
|
+
|
114
|
+
pslice.slice = targ
|
115
|
+
|
116
|
+
begin
|
117
|
+
# Call the grammar rule with our special slice object
|
118
|
+
@statestack.pop(plen)
|
119
|
+
instance_exec(*targ, &p.block)
|
120
|
+
|
121
|
+
# DBG # puts "Result : #{targ[0].value}"
|
122
|
+
|
123
|
+
@symstack.push(sym)
|
124
|
+
state = goto[@statestack[-1]][pname]
|
125
|
+
@statestack.push(state)
|
126
|
+
rescue YaccError
|
127
|
+
# If an error was set. Enter error recovery state
|
128
|
+
lookaheadstack.push(lookahead)
|
129
|
+
@symstack.pop # FIXME: this is definitely broken
|
130
|
+
@statestack.pop
|
131
|
+
state = @statestack[-1]
|
132
|
+
sym.type = :error
|
133
|
+
lookahead = sym
|
134
|
+
errorcount = self.class.error_count
|
135
|
+
@errorok = false
|
136
|
+
end
|
137
|
+
next
|
138
|
+
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
139
|
+
else
|
140
|
+
targ = [ sym ]
|
141
|
+
|
142
|
+
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
143
|
+
# The code enclosed in this section is duplicated
|
144
|
+
# below as a performance optimization. Make sure
|
145
|
+
# changes get made in both locations.
|
146
|
+
|
147
|
+
pslice.slice = targ
|
148
|
+
|
149
|
+
begin
|
150
|
+
# Call the grammar rule with our special slice object
|
151
|
+
@statestack.pop(plen)
|
152
|
+
pslice[0] = instance_exec(*pslice, &p.block)
|
153
|
+
|
154
|
+
# DBG # puts "Result : #{targ[0].value}"
|
155
|
+
|
156
|
+
@symstack.push(sym)
|
157
|
+
state = goto[@statestack[-1]][pname]
|
158
|
+
@statestack.push(state)
|
159
|
+
rescue
|
160
|
+
# If an error was set. Enter error recovery state
|
161
|
+
lookaheadstack.push(lookahead)
|
162
|
+
@symstack.pop # FIXME: this is definitely broken
|
163
|
+
@statestack.pop
|
164
|
+
state = @statestack[-1]
|
165
|
+
sym.type = :error
|
166
|
+
lookahead = sym
|
167
|
+
errorcount = error_count
|
168
|
+
@errorok = false
|
169
|
+
end
|
170
|
+
next
|
171
|
+
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
if t == 0
|
176
|
+
n = @symstack[-1]
|
177
|
+
result = n.value
|
178
|
+
|
179
|
+
# DBG # puts "Done : Returning #{result}"
|
180
|
+
|
181
|
+
return result
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
if t == nil
|
186
|
+
# We have some kind of parsing error here. To handle
|
187
|
+
# this, we are going to push the current token onto
|
188
|
+
# the tokenstack and replace it with an 'error' token.
|
189
|
+
# If there are any synchronization rules, they may
|
190
|
+
# catch it.
|
191
|
+
#
|
192
|
+
# In addition to pushing the error token, we call call
|
193
|
+
# the user defined p_error() function if this is the
|
194
|
+
# first syntax error. This function is only called if
|
195
|
+
# errorcount == 0.
|
196
|
+
if errorcount == 0 || @errorok == true
|
197
|
+
errorcount = error_count
|
198
|
+
@errorok = false
|
199
|
+
errtoken = lookahead
|
200
|
+
errtoken = nil if errtoken.type == :"$end"
|
201
|
+
|
202
|
+
if self.class.error_handler
|
203
|
+
errok = @errok
|
204
|
+
token = @lex.next
|
205
|
+
restart = @restart
|
206
|
+
errtoken.lex = @lex if errtoken
|
207
|
+
|
208
|
+
tok = self.class.error_handler.call(errtoken)
|
209
|
+
|
210
|
+
if @errorok
|
211
|
+
# User must have done some kind of panic
|
212
|
+
# mode recovery on their own. The
|
213
|
+
# returned token is the next lookahead
|
214
|
+
lookahead = tok
|
215
|
+
errtoken = nil
|
216
|
+
next
|
217
|
+
end
|
218
|
+
else
|
219
|
+
if errtoken
|
220
|
+
# if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
|
221
|
+
# else: lineno = 0
|
222
|
+
#if lineno:
|
223
|
+
# sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
|
224
|
+
#else:
|
225
|
+
# sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
|
226
|
+
else
|
227
|
+
# sys.stderr.write("yacc: Parse error in input. EOF\n")
|
228
|
+
return nil
|
229
|
+
end
|
230
|
+
end
|
231
|
+
else
|
232
|
+
errorcount = self.class.error_count
|
233
|
+
end
|
234
|
+
|
235
|
+
# case 1: the @statestack only has 1 entry on it. If we're in this state, the
|
236
|
+
# entire parse has been rolled back and we're completely hosed. The token is
|
237
|
+
# discarded and we just keep going.
|
238
|
+
|
239
|
+
if @statestack.length <= 1 and lookahead.type != :"$end"
|
240
|
+
lookahead = nil
|
241
|
+
errtoken = nil
|
242
|
+
state = 0
|
243
|
+
# Nuke the pushback stack
|
244
|
+
lookaheadstack = []
|
245
|
+
next
|
246
|
+
end
|
247
|
+
|
248
|
+
# case 2: the @statestack has a couple of entries on it, but we're
|
249
|
+
# at the end of the file. nuke the top entry and generate an error token
|
250
|
+
|
251
|
+
# Start nuking entries on the stack
|
252
|
+
if lookahead.type == :"$end"
|
253
|
+
# Whoa. We're really hosed here. Bail out
|
254
|
+
return nil
|
255
|
+
end
|
256
|
+
|
257
|
+
if lookahead.type != :error
|
258
|
+
sym = @symstack[-1]
|
259
|
+
if sym.type == :error
|
260
|
+
# Hmmm. Error is on top of stack, we'll just nuke input
|
261
|
+
# symbol and continue
|
262
|
+
lookahead = nil
|
263
|
+
next
|
264
|
+
end
|
265
|
+
t = YaccSymbol.new
|
266
|
+
t.type = :error
|
267
|
+
# if hasattr(lookahead,"lineno"):
|
268
|
+
# t.lineno = lookahead.lineno
|
269
|
+
t.value = lookahead
|
270
|
+
lookaheadstack.push(lookahead)
|
271
|
+
lookahead = t
|
272
|
+
else
|
273
|
+
@symstack.pop
|
274
|
+
@statestack.pop
|
275
|
+
state = @statestack[-1] # Potential bug fix
|
276
|
+
end
|
277
|
+
|
278
|
+
next
|
279
|
+
end
|
280
|
+
|
281
|
+
# Call an error function here
|
282
|
+
raise RuntimeError.new("yacc: internal parser error!!!")
|
283
|
+
end
|
284
|
+
end
|
285
|
+
|
286
|
+
protected
|
287
|
+
def grammar
|
288
|
+
return @grammar if @grammar
|
289
|
+
|
290
|
+
@grammar = Grammar.new(@lex.class.terminals)
|
291
|
+
|
292
|
+
self.class.prec_rules.each do |assoc, terms|
|
293
|
+
terms.each_with_index do |term, i|
|
294
|
+
@grammar.set_precedence(term, assoc, i)
|
295
|
+
end
|
296
|
+
end
|
297
|
+
|
298
|
+
self.class.parsed_rules.each do |prod, block|
|
299
|
+
@grammar.add_production(*prod, &block)
|
300
|
+
end
|
301
|
+
|
302
|
+
@grammar.set_start
|
303
|
+
|
304
|
+
@grammar.build_lritems
|
305
|
+
|
306
|
+
@lr_table = LRTable.new(@grammar)
|
307
|
+
|
308
|
+
@lr_table.parse_table
|
309
|
+
|
310
|
+
@grammar
|
311
|
+
end
|
312
|
+
|
313
|
+
class << self
|
314
|
+
attr_accessor :rules, :grammar, :lexer_class, :prec_rules
|
315
|
+
|
316
|
+
def rule(desc, &block)
|
317
|
+
self.rules << [desc, block]
|
318
|
+
end
|
319
|
+
|
320
|
+
def lexer(&block)
|
321
|
+
@lexer_class = Class.new(Lex, &block)
|
322
|
+
end
|
323
|
+
|
324
|
+
def rules
|
325
|
+
@rules ||= []
|
326
|
+
end
|
327
|
+
|
328
|
+
def precedence(*prec)
|
329
|
+
assoc = prec.shift
|
330
|
+
self.prec_rules << [assoc, prec.reverse]
|
331
|
+
end
|
332
|
+
|
333
|
+
def prec_rules
|
334
|
+
@prec_rules ||= []
|
335
|
+
end
|
336
|
+
|
337
|
+
def error_count
|
338
|
+
3
|
339
|
+
end
|
340
|
+
|
341
|
+
def parsed_rules
|
342
|
+
@parsed_rules if @parsed_rules
|
343
|
+
|
344
|
+
@parsed_rules = []
|
345
|
+
rp = RuleParser.new
|
346
|
+
self.rules.each do |d, b|
|
347
|
+
rp.parse(d).each do |prod|
|
348
|
+
@parsed_rules << [prod, b]
|
349
|
+
end
|
350
|
+
end
|
351
|
+
@parsed_rules
|
352
|
+
end
|
353
|
+
end
|
354
|
+
end
|
355
|
+
end
|