rly 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +2 -2
- data/assets/ply_dump.erb +15 -0
- data/lib/rly.rb +2 -0
- data/lib/rly/lex.rb +54 -25
- data/lib/rly/lex_token.rb +8 -0
- data/lib/rly/parse/grammar.rb +211 -0
- data/lib/rly/parse/lr_item.rb +32 -0
- data/lib/rly/parse/lr_table.rb +529 -0
- data/lib/rly/parse/ply_dump.rb +52 -0
- data/lib/rly/parse/production.rb +38 -0
- data/lib/rly/parse/rule_parser.rb +68 -0
- data/lib/rly/parse/yacc_production.rb +11 -0
- data/lib/rly/parse/yacc_symbol.rb +6 -0
- data/lib/rly/version.rb +2 -1
- data/lib/rly/yacc.rb +355 -0
- data/spec/lex/{lexer_spec.rb → lex_spec.rb} +45 -24
- data/spec/parse/calc_spec.rb +95 -0
- data/spec/parse/grammar_spec.rb +239 -0
- data/spec/parse/lr_table_spec.rb +212 -0
- data/spec/parse/production_spec.rb +18 -0
- data/spec/parse/rule_parser_spec.rb +20 -0
- data/spec/parse/yacc_spec.rb +57 -0
- data/spec/spec_helper.rb +5 -0
- metadata +26 -4
@@ -0,0 +1,52 @@
|
|
1
|
+
require "rly/parse/lr_table"
|
2
|
+
|
3
|
+
module Rly
|
4
|
+
|
5
|
+
class PlyDump
|
6
|
+
attr_reader :backlog
|
7
|
+
|
8
|
+
def initialize(grammar)
|
9
|
+
@grammar = grammar
|
10
|
+
@backlog = ""
|
11
|
+
if grammar
|
12
|
+
@t = Rly::LRTable.new(grammar)
|
13
|
+
@t.parse_table(self)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def to_s
|
18
|
+
fn = File.join(File.dirname(__FILE__), '..', '..', '..', 'assets', 'ply_dump.erb')
|
19
|
+
e = ERB.new(open(fn).read)
|
20
|
+
e.result(TinyContext.new(g: @grammar, backlog: @backlog).get_binding)
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.stub
|
24
|
+
PlyDump.new(nil)
|
25
|
+
end
|
26
|
+
|
27
|
+
def info(*args)
|
28
|
+
s = sprintf(*args)
|
29
|
+
@backlog += s + "\n"
|
30
|
+
end
|
31
|
+
|
32
|
+
def debug(*args)
|
33
|
+
s = sprintf(*args)
|
34
|
+
@backlog += s + "\n"
|
35
|
+
end
|
36
|
+
|
37
|
+
class TinyContext
|
38
|
+
def initialize(ctx)
|
39
|
+
@ctx = ctx
|
40
|
+
end
|
41
|
+
|
42
|
+
def get_binding
|
43
|
+
binding()
|
44
|
+
end
|
45
|
+
|
46
|
+
def method_missing(m)
|
47
|
+
@ctx[m]
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module Rly
|
2
|
+
class Production
|
3
|
+
attr_reader :index, :name, :prod, :precedence, :block, :usyms, :line
|
4
|
+
attr_accessor :lr_items, :lr_next, :lr0_added, :reduced
|
5
|
+
|
6
|
+
# FIXME line!!!
|
7
|
+
def initialize(index, name, prod, precedence=[:right, 0], block=nil)
|
8
|
+
@index = index
|
9
|
+
@name = name
|
10
|
+
@prod = prod
|
11
|
+
@precedence = precedence
|
12
|
+
@block = block
|
13
|
+
|
14
|
+
@usyms = []
|
15
|
+
prod.each { |sym| @usyms << sym unless @usyms.include?(sym) }
|
16
|
+
end
|
17
|
+
|
18
|
+
def to_s
|
19
|
+
"#{name} -> #{@prod.map { |s| s.to_s }.join(' ')}"
|
20
|
+
end
|
21
|
+
|
22
|
+
def inspect
|
23
|
+
"#<Production #{to_s}>"
|
24
|
+
end
|
25
|
+
|
26
|
+
def length
|
27
|
+
@prod.length
|
28
|
+
end
|
29
|
+
|
30
|
+
def lr0_added
|
31
|
+
@lr0_added ||= 0
|
32
|
+
end
|
33
|
+
|
34
|
+
def reduced
|
35
|
+
@reduced ||= 0
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
require "rly/lex"
|
2
|
+
require "rly/parse/grammar"
|
3
|
+
require "rly/parse/lr_table"
|
4
|
+
|
5
|
+
module Rly
|
6
|
+
class RuleParser < Yacc
|
7
|
+
def self.lexer_class
|
8
|
+
return @lexer_class if @lexer_class
|
9
|
+
|
10
|
+
@lexer_class = Class.new(Lex) do
|
11
|
+
token :ID, /[a-zA-Z_][a-zA-Z_0-9]*/
|
12
|
+
token :LITERAL, /"."|'.'/ do |t|
|
13
|
+
t.value = t.value[1]
|
14
|
+
t
|
15
|
+
end
|
16
|
+
literals ":|"
|
17
|
+
ignore " \t\n"
|
18
|
+
end
|
19
|
+
|
20
|
+
@lexer_class
|
21
|
+
end
|
22
|
+
|
23
|
+
def grammar
|
24
|
+
return @grammar if @grammar
|
25
|
+
|
26
|
+
@grammar = Grammar.new(self.class.lexer_class.terminals)
|
27
|
+
|
28
|
+
@grammar.add_production(:grammar, [:ID, ':', :rules]) do |g, pname, _, r|
|
29
|
+
productions = []
|
30
|
+
r.value.each do |p|
|
31
|
+
productions << [pname.value.to_sym, p]
|
32
|
+
end
|
33
|
+
g.value = productions
|
34
|
+
end
|
35
|
+
@grammar.add_production(:rules, [:rule, '|', :rules]) do |rls, r, _, rl|
|
36
|
+
rls.value = [r.value] + rl.value
|
37
|
+
end
|
38
|
+
@grammar.add_production(:rules, [:rule]) do |rl, r|
|
39
|
+
rl.value = [r.value]
|
40
|
+
end
|
41
|
+
@grammar.add_production(:rule, [:tokens]) do |r, tok|
|
42
|
+
r.value = tok.value
|
43
|
+
end
|
44
|
+
@grammar.add_production(:tokens, [:ID, :tokens]) do |t, i, toks|
|
45
|
+
t.value = [i.value.to_sym] + toks.value
|
46
|
+
end
|
47
|
+
@grammar.add_production(:tokens, [:LITERAL, :tokens]) do |t, l, toks|
|
48
|
+
t.value = [l.value] + toks.value
|
49
|
+
end
|
50
|
+
@grammar.add_production(:tokens, [:ID]) do |t, i|
|
51
|
+
t.value = [i.value.to_sym]
|
52
|
+
end
|
53
|
+
@grammar.add_production(:tokens, [:LITERAL]) do |t, l|
|
54
|
+
t.value = [l.value]
|
55
|
+
end
|
56
|
+
|
57
|
+
@grammar.set_start
|
58
|
+
|
59
|
+
@grammar.build_lritems
|
60
|
+
|
61
|
+
@lr_table = LRTable.new(@grammar)
|
62
|
+
|
63
|
+
@lr_table.parse_table
|
64
|
+
|
65
|
+
@grammar
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
data/lib/rly/version.rb
CHANGED
data/lib/rly/yacc.rb
ADDED
@@ -0,0 +1,355 @@
|
|
1
|
+
require "rly/lex"
|
2
|
+
require "rly/parse/grammar"
|
3
|
+
require "rly/parse/yacc_production"
|
4
|
+
require "rly/parse/yacc_symbol"
|
5
|
+
|
6
|
+
module Rly
|
7
|
+
class YaccError < RuntimeError; end
|
8
|
+
|
9
|
+
class Yacc
|
10
|
+
attr_reader :lex, :grammar, :lr_table
|
11
|
+
|
12
|
+
def initialize(lex=nil)
|
13
|
+
raise ArgumentError.new("No lexer available") if lex == nil && self.class.lexer_class == nil
|
14
|
+
@lex = lex || self.class.lexer_class.new
|
15
|
+
|
16
|
+
@grammar = grammar
|
17
|
+
end
|
18
|
+
|
19
|
+
def parse(input=nil)
|
20
|
+
lookahead = nil
|
21
|
+
lookaheadstack = []
|
22
|
+
actions = @lr_table.lr_action
|
23
|
+
goto = @lr_table.lr_goto
|
24
|
+
prod = @lr_table.lr_productions
|
25
|
+
pslice = YaccProduction.new(nil)
|
26
|
+
errorcount = 0
|
27
|
+
|
28
|
+
# Set up the lexer and parser objects on pslice
|
29
|
+
pslice.lexer = @lex
|
30
|
+
pslice.parser = self
|
31
|
+
|
32
|
+
# If input was supplied, pass to lexer
|
33
|
+
@lex.input(input) if input
|
34
|
+
|
35
|
+
# Set up the state and symbol stacks
|
36
|
+
@statestack = []
|
37
|
+
@symstack = []
|
38
|
+
|
39
|
+
pslice.stack = @symstack
|
40
|
+
errtoken = nil
|
41
|
+
|
42
|
+
# The start state is assumed to be (0,$end)
|
43
|
+
@statestack.push(0)
|
44
|
+
sym = YaccSymbol.new
|
45
|
+
sym.type = :"$end"
|
46
|
+
@symstack.push(sym)
|
47
|
+
state = 0
|
48
|
+
|
49
|
+
while true
|
50
|
+
# Get the next symbol on the input. If a lookahead symbol
|
51
|
+
# is already set, we just use that. Otherwise, we'll pull
|
52
|
+
# the next token off of the lookaheadstack or from the lexer
|
53
|
+
|
54
|
+
# DBG # puts "State: #{state}"
|
55
|
+
|
56
|
+
unless lookahead
|
57
|
+
if lookaheadstack.empty?
|
58
|
+
lookahead = @lex.next
|
59
|
+
else
|
60
|
+
lookahead = lookaheadstack.pop
|
61
|
+
end
|
62
|
+
unless lookahead
|
63
|
+
lookahead = YaccSymbol.new()
|
64
|
+
lookahead.type = :"$end"
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
# Check the action table
|
69
|
+
ltype = lookahead.type
|
70
|
+
t = actions[state][ltype]
|
71
|
+
|
72
|
+
if t
|
73
|
+
if t > 0
|
74
|
+
# shift a symbol on the stack
|
75
|
+
@statestack.push(t)
|
76
|
+
state = t
|
77
|
+
|
78
|
+
# DBG # puts "Action : Shift and goto state #{t}"
|
79
|
+
|
80
|
+
@symstack.push(lookahead)
|
81
|
+
lookahead = nil
|
82
|
+
|
83
|
+
# Decrease error count on successful shift
|
84
|
+
errorcount -= 1 if errorcount > 0
|
85
|
+
next
|
86
|
+
end
|
87
|
+
|
88
|
+
if t < 0
|
89
|
+
# reduce a symbol on the stack, emit a production
|
90
|
+
p = prod[-t]
|
91
|
+
pname = p.name
|
92
|
+
plen = p.length
|
93
|
+
|
94
|
+
# Get production function
|
95
|
+
sym = YaccSymbol.new()
|
96
|
+
sym.type = pname
|
97
|
+
sym.value = nil
|
98
|
+
|
99
|
+
# DBG # if plen
|
100
|
+
# DBG # puts "Action : Reduce rule [#{p}] with [#{@symstack[-plen..@symstack.length].map{|s|s.value}.join(', ')}] and goto state #{-t}"
|
101
|
+
# DBG # else
|
102
|
+
# DBG # puts "Action : Reduce rule [#{p}] with [] and goto state #{-t}"
|
103
|
+
# DBG # end
|
104
|
+
|
105
|
+
if plen
|
106
|
+
targ = @symstack.pop(plen)
|
107
|
+
targ.insert(0, sym)
|
108
|
+
|
109
|
+
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
110
|
+
# The code enclosed in this section is duplicated
|
111
|
+
# below as a performance optimization. Make sure
|
112
|
+
# changes get made in both locations.
|
113
|
+
|
114
|
+
pslice.slice = targ
|
115
|
+
|
116
|
+
begin
|
117
|
+
# Call the grammar rule with our special slice object
|
118
|
+
@statestack.pop(plen)
|
119
|
+
instance_exec(*targ, &p.block)
|
120
|
+
|
121
|
+
# DBG # puts "Result : #{targ[0].value}"
|
122
|
+
|
123
|
+
@symstack.push(sym)
|
124
|
+
state = goto[@statestack[-1]][pname]
|
125
|
+
@statestack.push(state)
|
126
|
+
rescue YaccError
|
127
|
+
# If an error was set. Enter error recovery state
|
128
|
+
lookaheadstack.push(lookahead)
|
129
|
+
@symstack.pop # FIXME: this is definitely broken
|
130
|
+
@statestack.pop
|
131
|
+
state = @statestack[-1]
|
132
|
+
sym.type = :error
|
133
|
+
lookahead = sym
|
134
|
+
errorcount = self.class.error_count
|
135
|
+
@errorok = false
|
136
|
+
end
|
137
|
+
next
|
138
|
+
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
139
|
+
else
|
140
|
+
targ = [ sym ]
|
141
|
+
|
142
|
+
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
143
|
+
# The code enclosed in this section is duplicated
|
144
|
+
# below as a performance optimization. Make sure
|
145
|
+
# changes get made in both locations.
|
146
|
+
|
147
|
+
pslice.slice = targ
|
148
|
+
|
149
|
+
begin
|
150
|
+
# Call the grammar rule with our special slice object
|
151
|
+
@statestack.pop(plen)
|
152
|
+
pslice[0] = instance_exec(*pslice, &p.block)
|
153
|
+
|
154
|
+
# DBG # puts "Result : #{targ[0].value}"
|
155
|
+
|
156
|
+
@symstack.push(sym)
|
157
|
+
state = goto[@statestack[-1]][pname]
|
158
|
+
@statestack.push(state)
|
159
|
+
rescue
|
160
|
+
# If an error was set. Enter error recovery state
|
161
|
+
lookaheadstack.push(lookahead)
|
162
|
+
@symstack.pop # FIXME: this is definitely broken
|
163
|
+
@statestack.pop
|
164
|
+
state = @statestack[-1]
|
165
|
+
sym.type = :error
|
166
|
+
lookahead = sym
|
167
|
+
errorcount = error_count
|
168
|
+
@errorok = false
|
169
|
+
end
|
170
|
+
next
|
171
|
+
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
if t == 0
|
176
|
+
n = @symstack[-1]
|
177
|
+
result = n.value
|
178
|
+
|
179
|
+
# DBG # puts "Done : Returning #{result}"
|
180
|
+
|
181
|
+
return result
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
if t == nil
|
186
|
+
# We have some kind of parsing error here. To handle
|
187
|
+
# this, we are going to push the current token onto
|
188
|
+
# the tokenstack and replace it with an 'error' token.
|
189
|
+
# If there are any synchronization rules, they may
|
190
|
+
# catch it.
|
191
|
+
#
|
192
|
+
# In addition to pushing the error token, we call call
|
193
|
+
# the user defined p_error() function if this is the
|
194
|
+
# first syntax error. This function is only called if
|
195
|
+
# errorcount == 0.
|
196
|
+
if errorcount == 0 || @errorok == true
|
197
|
+
errorcount = error_count
|
198
|
+
@errorok = false
|
199
|
+
errtoken = lookahead
|
200
|
+
errtoken = nil if errtoken.type == :"$end"
|
201
|
+
|
202
|
+
if self.class.error_handler
|
203
|
+
errok = @errok
|
204
|
+
token = @lex.next
|
205
|
+
restart = @restart
|
206
|
+
errtoken.lex = @lex if errtoken
|
207
|
+
|
208
|
+
tok = self.class.error_handler.call(errtoken)
|
209
|
+
|
210
|
+
if @errorok
|
211
|
+
# User must have done some kind of panic
|
212
|
+
# mode recovery on their own. The
|
213
|
+
# returned token is the next lookahead
|
214
|
+
lookahead = tok
|
215
|
+
errtoken = nil
|
216
|
+
next
|
217
|
+
end
|
218
|
+
else
|
219
|
+
if errtoken
|
220
|
+
# if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
|
221
|
+
# else: lineno = 0
|
222
|
+
#if lineno:
|
223
|
+
# sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
|
224
|
+
#else:
|
225
|
+
# sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
|
226
|
+
else
|
227
|
+
# sys.stderr.write("yacc: Parse error in input. EOF\n")
|
228
|
+
return nil
|
229
|
+
end
|
230
|
+
end
|
231
|
+
else
|
232
|
+
errorcount = self.class.error_count
|
233
|
+
end
|
234
|
+
|
235
|
+
# case 1: the @statestack only has 1 entry on it. If we're in this state, the
|
236
|
+
# entire parse has been rolled back and we're completely hosed. The token is
|
237
|
+
# discarded and we just keep going.
|
238
|
+
|
239
|
+
if @statestack.length <= 1 and lookahead.type != :"$end"
|
240
|
+
lookahead = nil
|
241
|
+
errtoken = nil
|
242
|
+
state = 0
|
243
|
+
# Nuke the pushback stack
|
244
|
+
lookaheadstack = []
|
245
|
+
next
|
246
|
+
end
|
247
|
+
|
248
|
+
# case 2: the @statestack has a couple of entries on it, but we're
|
249
|
+
# at the end of the file. nuke the top entry and generate an error token
|
250
|
+
|
251
|
+
# Start nuking entries on the stack
|
252
|
+
if lookahead.type == :"$end"
|
253
|
+
# Whoa. We're really hosed here. Bail out
|
254
|
+
return nil
|
255
|
+
end
|
256
|
+
|
257
|
+
if lookahead.type != :error
|
258
|
+
sym = @symstack[-1]
|
259
|
+
if sym.type == :error
|
260
|
+
# Hmmm. Error is on top of stack, we'll just nuke input
|
261
|
+
# symbol and continue
|
262
|
+
lookahead = nil
|
263
|
+
next
|
264
|
+
end
|
265
|
+
t = YaccSymbol.new
|
266
|
+
t.type = :error
|
267
|
+
# if hasattr(lookahead,"lineno"):
|
268
|
+
# t.lineno = lookahead.lineno
|
269
|
+
t.value = lookahead
|
270
|
+
lookaheadstack.push(lookahead)
|
271
|
+
lookahead = t
|
272
|
+
else
|
273
|
+
@symstack.pop
|
274
|
+
@statestack.pop
|
275
|
+
state = @statestack[-1] # Potential bug fix
|
276
|
+
end
|
277
|
+
|
278
|
+
next
|
279
|
+
end
|
280
|
+
|
281
|
+
# Call an error function here
|
282
|
+
raise RuntimeError.new("yacc: internal parser error!!!")
|
283
|
+
end
|
284
|
+
end
|
285
|
+
|
286
|
+
protected
|
287
|
+
def grammar
|
288
|
+
return @grammar if @grammar
|
289
|
+
|
290
|
+
@grammar = Grammar.new(@lex.class.terminals)
|
291
|
+
|
292
|
+
self.class.prec_rules.each do |assoc, terms|
|
293
|
+
terms.each_with_index do |term, i|
|
294
|
+
@grammar.set_precedence(term, assoc, i)
|
295
|
+
end
|
296
|
+
end
|
297
|
+
|
298
|
+
self.class.parsed_rules.each do |prod, block|
|
299
|
+
@grammar.add_production(*prod, &block)
|
300
|
+
end
|
301
|
+
|
302
|
+
@grammar.set_start
|
303
|
+
|
304
|
+
@grammar.build_lritems
|
305
|
+
|
306
|
+
@lr_table = LRTable.new(@grammar)
|
307
|
+
|
308
|
+
@lr_table.parse_table
|
309
|
+
|
310
|
+
@grammar
|
311
|
+
end
|
312
|
+
|
313
|
+
class << self
|
314
|
+
attr_accessor :rules, :grammar, :lexer_class, :prec_rules
|
315
|
+
|
316
|
+
def rule(desc, &block)
|
317
|
+
self.rules << [desc, block]
|
318
|
+
end
|
319
|
+
|
320
|
+
def lexer(&block)
|
321
|
+
@lexer_class = Class.new(Lex, &block)
|
322
|
+
end
|
323
|
+
|
324
|
+
def rules
|
325
|
+
@rules ||= []
|
326
|
+
end
|
327
|
+
|
328
|
+
def precedence(*prec)
|
329
|
+
assoc = prec.shift
|
330
|
+
self.prec_rules << [assoc, prec.reverse]
|
331
|
+
end
|
332
|
+
|
333
|
+
def prec_rules
|
334
|
+
@prec_rules ||= []
|
335
|
+
end
|
336
|
+
|
337
|
+
def error_count
|
338
|
+
3
|
339
|
+
end
|
340
|
+
|
341
|
+
def parsed_rules
|
342
|
+
@parsed_rules if @parsed_rules
|
343
|
+
|
344
|
+
@parsed_rules = []
|
345
|
+
rp = RuleParser.new
|
346
|
+
self.rules.each do |d, b|
|
347
|
+
rp.parse(d).each do |prod|
|
348
|
+
@parsed_rules << [prod, b]
|
349
|
+
end
|
350
|
+
end
|
351
|
+
@parsed_rules
|
352
|
+
end
|
353
|
+
end
|
354
|
+
end
|
355
|
+
end
|