rly 0.2.1 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/rly.rb +1 -0
- data/lib/rly/file_lex.rb +45 -0
- data/lib/rly/lex.rb +81 -36
- data/lib/rly/lex_token.rb +4 -3
- data/lib/rly/parse/grammar.rb +3 -1
- data/lib/rly/parse/lr_table.rb +9 -10
- data/lib/rly/parse/production.rb +4 -0
- data/lib/rly/parse/rule_parser.rb +3 -0
- data/lib/rly/parse/yacc_symbol.rb +1 -1
- data/lib/rly/version.rb +1 -1
- data/lib/rly/yacc.rb +21 -22
- data/spec/parse/rule_parser_spec.rb +11 -0
- metadata +3 -2
data/lib/rly.rb
CHANGED
data/lib/rly/file_lex.rb
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
require "rly/lex"
|
2
|
+
|
3
|
+
module Rly
|
4
|
+
class FileLex < Lex
|
5
|
+
def initialize(fn=nil)
|
6
|
+
@inputstack = []
|
7
|
+
push_file(fn) if fn
|
8
|
+
end
|
9
|
+
|
10
|
+
def push_file(fn)
|
11
|
+
@inputstack.push([@input, @pos, @filename]) if @filename
|
12
|
+
|
13
|
+
@filename = fn
|
14
|
+
@input = open(fn).read
|
15
|
+
@pos = 0
|
16
|
+
end
|
17
|
+
|
18
|
+
def pop_file
|
19
|
+
(@input, @pos, @filename) = @inputstack.pop
|
20
|
+
end
|
21
|
+
|
22
|
+
def next
|
23
|
+
begin
|
24
|
+
tok = super
|
25
|
+
|
26
|
+
if tok
|
27
|
+
return tok
|
28
|
+
else
|
29
|
+
if @inputstack.empty?
|
30
|
+
return nil
|
31
|
+
else
|
32
|
+
pop_file
|
33
|
+
redo
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end until tok
|
37
|
+
end
|
38
|
+
|
39
|
+
def build_token(type, value)
|
40
|
+
tok = LexToken.new(type, value, self, @pos, @lineno)
|
41
|
+
tok.location_info[:filename] = @filename
|
42
|
+
tok
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
data/lib/rly/lex.rb
CHANGED
@@ -119,61 +119,106 @@ module Rly
|
|
119
119
|
def next
|
120
120
|
while @pos < @input.length
|
121
121
|
if self.class.ignores_list[@input[@pos]]
|
122
|
-
|
122
|
+
ignore_symbol
|
123
123
|
next
|
124
124
|
end
|
125
125
|
|
126
|
-
|
127
|
-
self.class.tokens.each do |type, rule, block|
|
128
|
-
m = rule.match(@input, @pos)
|
129
|
-
next unless m
|
130
|
-
next unless m.begin(0) == @pos
|
131
|
-
|
132
|
-
tok = LexToken.new(type, m[0], self)
|
133
|
-
|
134
|
-
matched = true
|
135
|
-
|
136
|
-
tok = block.call(tok) if block
|
137
|
-
|
138
|
-
@pos = m.end(0)
|
139
|
-
|
140
|
-
return tok if tok.type
|
141
|
-
end
|
142
|
-
|
143
|
-
unless matched
|
144
|
-
if self.class.literals_list[@input[@pos]]
|
145
|
-
tok = LexToken.new(@input[@pos], @input[@pos], self)
|
126
|
+
m = self.class.token_regexps.match(@input[@pos..-1])
|
146
127
|
|
147
|
-
|
128
|
+
if m && ! m[0].empty?
|
129
|
+
val = nil
|
130
|
+
type = nil
|
131
|
+
resolved_type = nil
|
132
|
+
m.names.each do |n|
|
133
|
+
if m[n]
|
134
|
+
type = n.to_sym
|
135
|
+
resolved_type = (n.start_with?('__anonymous_') ? nil : type)
|
136
|
+
val = m[n]
|
137
|
+
break
|
138
|
+
end
|
139
|
+
end
|
148
140
|
|
149
|
-
|
141
|
+
if type
|
142
|
+
tok = build_token(resolved_type, val)
|
143
|
+
@pos += m.end(0)
|
144
|
+
tok = self.class.callables[type].call(tok) if self.class.callables[type]
|
150
145
|
|
151
|
-
|
146
|
+
if tok && tok.type
|
147
|
+
return tok
|
148
|
+
else
|
149
|
+
next
|
150
|
+
end
|
152
151
|
end
|
153
152
|
end
|
153
|
+
|
154
|
+
if self.class.literals_list[@input[@pos]]
|
155
|
+
tok = build_token(@input[@pos], @input[@pos])
|
156
|
+
matched = true
|
157
|
+
@pos += 1
|
158
|
+
return tok
|
159
|
+
end
|
154
160
|
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
if pos == @pos
|
161
|
-
raise LexError.new("Illegal character '#{@input[@pos]}' at index #{@pos}")
|
162
|
-
else
|
163
|
-
return tok if tok && tok.type
|
164
|
-
end
|
165
|
-
else
|
161
|
+
if self.class.error_hander
|
162
|
+
pos = @pos
|
163
|
+
tok = build_token(:error, @input[@pos])
|
164
|
+
tok = self.class.error_hander.call(tok)
|
165
|
+
if pos == @pos
|
166
166
|
raise LexError.new("Illegal character '#{@input[@pos]}' at index #{@pos}")
|
167
|
+
else
|
168
|
+
return tok if tok && tok.type
|
167
169
|
end
|
170
|
+
else
|
171
|
+
raise LexError.new("Illegal character '#{@input[@pos]}' at index #{@pos}")
|
168
172
|
end
|
173
|
+
|
169
174
|
end
|
170
175
|
return nil
|
171
176
|
end
|
172
177
|
|
178
|
+
def build_token(type, value)
|
179
|
+
LexToken.new(type, value, self, @pos, @lineno)
|
180
|
+
end
|
181
|
+
|
182
|
+
def ignore_symbol
|
183
|
+
@pos += 1
|
184
|
+
end
|
185
|
+
|
173
186
|
class << self
|
174
187
|
def terminals
|
175
|
-
self.tokens.map { |t,r,b| t }.compact + self.literals_list.chars.to_a
|
188
|
+
self.tokens.map { |t,r,b| t }.compact + self.literals_list.chars.to_a + self.metatokens_list
|
189
|
+
end
|
190
|
+
|
191
|
+
def callables
|
192
|
+
@callables ||= {}
|
193
|
+
end
|
194
|
+
|
195
|
+
def token_regexps
|
196
|
+
return @token_regexps if @token_regexps
|
197
|
+
|
198
|
+
collector = []
|
199
|
+
self.tokens.each do |name, rx, block|
|
200
|
+
name = "__anonymous_#{block.hash}".to_sym unless name
|
201
|
+
|
202
|
+
self.callables[name] = block
|
203
|
+
|
204
|
+
rxs = rx.to_s
|
205
|
+
named_rxs = "\\A(?<#{name}>#{rxs})"
|
206
|
+
|
207
|
+
collector << named_rxs
|
208
|
+
end
|
209
|
+
|
210
|
+
rxss = collector.join('|')
|
211
|
+
@token_regexps = Regexp.new(rxss)
|
176
212
|
end
|
213
|
+
|
214
|
+
def metatokens_list
|
215
|
+
@metatokens_list ||= []
|
216
|
+
end
|
217
|
+
|
218
|
+
def metatokens(*args)
|
219
|
+
@metatokens_list = args
|
220
|
+
end
|
221
|
+
|
177
222
|
# Returns the list of registered tokens
|
178
223
|
#
|
179
224
|
# @api private
|
data/lib/rly/lex_token.rb
CHANGED
@@ -1,13 +1,14 @@
|
|
1
1
|
module Rly
|
2
2
|
|
3
3
|
class LexToken
|
4
|
-
attr_accessor :value
|
5
|
-
attr_reader :
|
4
|
+
attr_accessor :value, :type, :location_info
|
5
|
+
attr_reader :lexer
|
6
6
|
|
7
|
-
def initialize(type, value, lexer)
|
7
|
+
def initialize(type, value, lexer, pos=0, lineno=0, filename=nil)
|
8
8
|
@type = type
|
9
9
|
@value = value
|
10
10
|
@lexer = lexer
|
11
|
+
@location_info = { pos: pos, lineno: lineno, filename: filename }
|
11
12
|
end
|
12
13
|
|
13
14
|
def to_s
|
data/lib/rly/parse/grammar.rb
CHANGED
@@ -45,7 +45,7 @@ module Rly
|
|
45
45
|
end
|
46
46
|
|
47
47
|
mapname = "#{name.to_s} -> #{symbols.to_s}"
|
48
|
-
raise ArgumentError if @prodmap[mapname]
|
48
|
+
raise ArgumentError.new("Production #{mapname} is already defined!") if @prodmap[mapname]
|
49
49
|
|
50
50
|
index = @productions.count
|
51
51
|
@nonterminals[name] = [] unless @nonterminals[name]
|
@@ -79,6 +79,7 @@ module Rly
|
|
79
79
|
end
|
80
80
|
|
81
81
|
def set_start(symbol=nil)
|
82
|
+
raise RuntimeError.new("No productions defined in #{self}") if @productions.empty?
|
82
83
|
symbol = @productions[1].name unless symbol
|
83
84
|
raise ArgumentError unless @nonterminals[symbol]
|
84
85
|
@productions[0] = Production.new(0, :"S'", [symbol])
|
@@ -119,6 +120,7 @@ module Rly
|
|
119
120
|
while true
|
120
121
|
any_changes = false
|
121
122
|
nonterminals.keys.each do |n|
|
123
|
+
raise RuntimeError.new("Unefined production '#{n}'") unless @prodnames[n]
|
122
124
|
@prodnames[n].each do |p|
|
123
125
|
_first(p.prod).each do |f|
|
124
126
|
unless @first[n].include?(f)
|
data/lib/rly/parse/lr_table.rb
CHANGED
@@ -81,17 +81,17 @@ module Rly
|
|
81
81
|
# Need to decide on shift or reduce here
|
82
82
|
# By default we favor shifting. Need to add
|
83
83
|
# some precedence rules here.
|
84
|
-
sprec, slevel = productions[st_actionp[a].
|
84
|
+
sprec, slevel = productions[st_actionp[a].index].precedence
|
85
85
|
rprec, rlevel = precedence[a] || [:right, 0]
|
86
86
|
if (slevel < rlevel) || ((slevel == rlevel) && (rprec == :left))
|
87
87
|
# We really need to reduce here.
|
88
|
-
st_action[a] = -p.
|
88
|
+
st_action[a] = -p.index
|
89
89
|
st_actionp[a] = p
|
90
90
|
if ! slevel && ! rlevel
|
91
91
|
log.info(" ! shift/reduce conflict for %s resolved as reduce",a)
|
92
92
|
@sr_conflicts << [st, a, 'reduce']
|
93
93
|
end
|
94
|
-
productions[p.
|
94
|
+
productions[p.index].reduced += 1
|
95
95
|
elsif (slevel == rlevel) && (rprec == :nonassoc)
|
96
96
|
st_action[a] = nil
|
97
97
|
else
|
@@ -105,19 +105,19 @@ module Rly
|
|
105
105
|
# Reduce/reduce conflict. In this case, we favor the rule
|
106
106
|
# that was defined first in the grammar file
|
107
107
|
oldp = productions[-r]
|
108
|
-
pp = productions[p.
|
108
|
+
pp = productions[p.index]
|
109
109
|
if oldp.line > pp.line
|
110
|
-
st_action[a] = -p.
|
110
|
+
st_action[a] = -p.index
|
111
111
|
st_actionp[a] = p
|
112
112
|
chosenp = pp
|
113
113
|
rejectp = oldp
|
114
|
-
productions[p.
|
115
|
-
productions[oldp.
|
114
|
+
productions[p.index].reduced += 1
|
115
|
+
productions[oldp.index].reduced -= 1
|
116
116
|
else
|
117
117
|
chosenp,rejectp = oldp,pp
|
118
118
|
end
|
119
119
|
@rr_conflicts << [st, chosenp, rejectp]
|
120
|
-
log.info(" ! reduce/reduce conflict for %s resolved using rule %d (%s)", a, st_actionp[a].
|
120
|
+
log.info(" ! reduce/reduce conflict for %s resolved using rule %d (%s)", a, st_actionp[a].index, st_actionp[a])
|
121
121
|
else
|
122
122
|
raise RuntimeError("Unknown conflict in state #{st}")
|
123
123
|
end
|
@@ -128,8 +128,7 @@ module Rly
|
|
128
128
|
end
|
129
129
|
end
|
130
130
|
end
|
131
|
-
else
|
132
|
-
# i = p.lr_index
|
131
|
+
else
|
133
132
|
a = p.prod[p.lr_index+1] # Get symbol right after the "."
|
134
133
|
if @grammar.terminals.include?(a)
|
135
134
|
g = lr0_goto(i, a)
|
data/lib/rly/parse/production.rb
CHANGED
data/lib/rly/version.rb
CHANGED
data/lib/rly/yacc.rb
CHANGED
@@ -22,6 +22,8 @@ module Rly
|
|
22
22
|
end
|
23
23
|
|
24
24
|
def parse(input=nil, trace=false)
|
25
|
+
@trace = trace
|
26
|
+
|
25
27
|
lookahead = nil
|
26
28
|
lookaheadstack = []
|
27
29
|
actions = @lr_table.lr_action
|
@@ -56,7 +58,7 @@ module Rly
|
|
56
58
|
# is already set, we just use that. Otherwise, we'll pull
|
57
59
|
# the next token off of the lookaheadstack or from the lexer
|
58
60
|
|
59
|
-
puts "State : #{state}" if trace
|
61
|
+
puts "State : #{state}" if @trace
|
60
62
|
|
61
63
|
unless lookahead
|
62
64
|
if lookaheadstack.empty?
|
@@ -70,7 +72,7 @@ module Rly
|
|
70
72
|
end
|
71
73
|
end
|
72
74
|
|
73
|
-
puts "Stack : #{(@symstack[1..-1].map{|s|s.type}.join(' ') + ' ' + lookahead.inspect).lstrip}" if trace
|
75
|
+
puts "Stack : #{(@symstack[1..-1].map{|s|s.type}.join(' ') + ' ' + lookahead.inspect).lstrip}" if @trace
|
74
76
|
|
75
77
|
# Check the action table
|
76
78
|
ltype = lookahead.type
|
@@ -82,7 +84,7 @@ module Rly
|
|
82
84
|
@statestack.push(t)
|
83
85
|
state = t
|
84
86
|
|
85
|
-
puts "Action : Shift and goto state #{t}" if trace
|
87
|
+
puts "Action : Shift and goto state #{t}" if @trace
|
86
88
|
|
87
89
|
@symstack.push(lookahead)
|
88
90
|
lookahead = nil
|
@@ -103,9 +105,9 @@ module Rly
|
|
103
105
|
sym.type = pname
|
104
106
|
sym.value = nil
|
105
107
|
|
106
|
-
if trace
|
108
|
+
if @trace
|
107
109
|
if plen
|
108
|
-
puts "Action : Reduce rule [#{p}] with [#{@symstack[-plen..@symstack.length].map{|s|s.
|
110
|
+
puts "Action : Reduce rule [#{p}] with [#{@symstack[-plen..@symstack.length].map{|s|s.inspect}.join(', ')}] and goto state #{-t}"
|
109
111
|
else
|
110
112
|
puts "Action : Reduce rule [#{p}] with [] and goto state #{-t}"
|
111
113
|
end
|
@@ -127,7 +129,7 @@ module Rly
|
|
127
129
|
@statestack.pop(plen)
|
128
130
|
instance_exec(*targ, &p.block)
|
129
131
|
|
130
|
-
puts "Result : #{targ[0].
|
132
|
+
puts "Result : #{targ[0].inspect}" if @trace
|
131
133
|
|
132
134
|
@symstack.push(sym)
|
133
135
|
state = goto[@statestack[-1]][pname]
|
@@ -160,7 +162,7 @@ module Rly
|
|
160
162
|
@statestack.pop(plen)
|
161
163
|
pslice[0] = instance_exec(*pslice, &p.block)
|
162
164
|
|
163
|
-
puts "Result : #{targ[0].value}" if trace
|
165
|
+
puts "Result : #{targ[0].value}" if @trace
|
164
166
|
|
165
167
|
@symstack.push(sym)
|
166
168
|
state = goto[@statestack[-1]][pname]
|
@@ -185,7 +187,7 @@ module Rly
|
|
185
187
|
n = @symstack[-1]
|
186
188
|
result = n.value
|
187
189
|
|
188
|
-
puts "Done : Returning #{result}" if trace
|
190
|
+
puts "Done : Returning #{result}" if @trace
|
189
191
|
|
190
192
|
return result
|
191
193
|
end
|
@@ -209,12 +211,7 @@ module Rly
|
|
209
211
|
errtoken = nil if errtoken.type == :"$end"
|
210
212
|
|
211
213
|
if self.class.error_handler
|
212
|
-
|
213
|
-
token = @lex.next
|
214
|
-
restart = @restart
|
215
|
-
errtoken.lex = @lex if errtoken
|
216
|
-
|
217
|
-
tok = self.class.error_handler.call(errtoken)
|
214
|
+
tok = self.instance_exec(errtoken, &self.class.error_handler)
|
218
215
|
|
219
216
|
if @errorok
|
220
217
|
# User must have done some kind of panic
|
@@ -226,14 +223,10 @@ module Rly
|
|
226
223
|
end
|
227
224
|
else
|
228
225
|
if errtoken
|
229
|
-
|
230
|
-
|
231
|
-
#if lineno:
|
232
|
-
# sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
|
233
|
-
#else:
|
234
|
-
# sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
|
226
|
+
location_info = lookahead.location_info
|
227
|
+
puts "Fail : Syntax error at #{location_info}, token='#{errtoken}'" if @trace
|
235
228
|
else
|
236
|
-
|
229
|
+
puts "Fail : Parse error in input. EOF" if @trace
|
237
230
|
return nil
|
238
231
|
end
|
239
232
|
end
|
@@ -361,13 +354,19 @@ module Rly
|
|
361
354
|
3
|
362
355
|
end
|
363
356
|
|
357
|
+
def on_error(lambda)
|
358
|
+
@error_handler = lambda
|
359
|
+
end
|
360
|
+
|
364
361
|
def parsed_rules
|
365
362
|
return @parsed_rules if @parsed_rules
|
366
363
|
|
367
364
|
@parsed_rules = []
|
368
365
|
rp = RuleParser.new
|
369
366
|
self.rules.each do |desc, block|
|
370
|
-
rp.parse(desc)
|
367
|
+
rules = rp.parse(desc)
|
368
|
+
raise RuntimeError.new("Failed to parse rules: #{desc}") unless rules
|
369
|
+
rules.each do |(pname, p, prec)|
|
371
370
|
@parsed_rules << [pname, p, prec, block]
|
372
371
|
end
|
373
372
|
end
|
@@ -17,4 +17,15 @@ describe Rly::RuleParser do
|
|
17
17
|
productions[2].should == [:expression, [:expression, '*', :expression], nil]
|
18
18
|
productions[3].should == [:expression, [:expression, '/', :expression], nil]
|
19
19
|
end
|
20
|
+
|
21
|
+
it "tokenizes the rule correctly" do
|
22
|
+
s = 'maybe_superclasses : ":" superclasses |'
|
23
|
+
l = Rly::RuleParser.lexer_class.new(s)
|
24
|
+
|
25
|
+
l.next.type.should == :ID
|
26
|
+
l.next.type.should == ':'
|
27
|
+
l.next.type.should == :LITERAL
|
28
|
+
l.next.type.should == :ID
|
29
|
+
l.next.type.should == '|'
|
30
|
+
end
|
20
31
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rly
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-12-30 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec
|
@@ -43,6 +43,7 @@ files:
|
|
43
43
|
- Rakefile
|
44
44
|
- assets/ply_dump.erb
|
45
45
|
- lib/rly.rb
|
46
|
+
- lib/rly/file_lex.rb
|
46
47
|
- lib/rly/lex.rb
|
47
48
|
- lib/rly/lex_token.rb
|
48
49
|
- lib/rly/parse/grammar.rb
|