rly 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/rly.rb +1 -0
- data/lib/rly/file_lex.rb +45 -0
- data/lib/rly/lex.rb +81 -36
- data/lib/rly/lex_token.rb +4 -3
- data/lib/rly/parse/grammar.rb +3 -1
- data/lib/rly/parse/lr_table.rb +9 -10
- data/lib/rly/parse/production.rb +4 -0
- data/lib/rly/parse/rule_parser.rb +3 -0
- data/lib/rly/parse/yacc_symbol.rb +1 -1
- data/lib/rly/version.rb +1 -1
- data/lib/rly/yacc.rb +21 -22
- data/spec/parse/rule_parser_spec.rb +11 -0
- metadata +3 -2
data/lib/rly.rb
CHANGED
data/lib/rly/file_lex.rb
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
require "rly/lex"
|
2
|
+
|
3
|
+
module Rly
|
4
|
+
class FileLex < Lex
|
5
|
+
def initialize(fn=nil)
|
6
|
+
@inputstack = []
|
7
|
+
push_file(fn) if fn
|
8
|
+
end
|
9
|
+
|
10
|
+
def push_file(fn)
|
11
|
+
@inputstack.push([@input, @pos, @filename]) if @filename
|
12
|
+
|
13
|
+
@filename = fn
|
14
|
+
@input = open(fn).read
|
15
|
+
@pos = 0
|
16
|
+
end
|
17
|
+
|
18
|
+
def pop_file
|
19
|
+
(@input, @pos, @filename) = @inputstack.pop
|
20
|
+
end
|
21
|
+
|
22
|
+
def next
|
23
|
+
begin
|
24
|
+
tok = super
|
25
|
+
|
26
|
+
if tok
|
27
|
+
return tok
|
28
|
+
else
|
29
|
+
if @inputstack.empty?
|
30
|
+
return nil
|
31
|
+
else
|
32
|
+
pop_file
|
33
|
+
redo
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end until tok
|
37
|
+
end
|
38
|
+
|
39
|
+
def build_token(type, value)
|
40
|
+
tok = LexToken.new(type, value, self, @pos, @lineno)
|
41
|
+
tok.location_info[:filename] = @filename
|
42
|
+
tok
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
data/lib/rly/lex.rb
CHANGED
@@ -119,61 +119,106 @@ module Rly
|
|
119
119
|
def next
|
120
120
|
while @pos < @input.length
|
121
121
|
if self.class.ignores_list[@input[@pos]]
|
122
|
-
|
122
|
+
ignore_symbol
|
123
123
|
next
|
124
124
|
end
|
125
125
|
|
126
|
-
|
127
|
-
self.class.tokens.each do |type, rule, block|
|
128
|
-
m = rule.match(@input, @pos)
|
129
|
-
next unless m
|
130
|
-
next unless m.begin(0) == @pos
|
131
|
-
|
132
|
-
tok = LexToken.new(type, m[0], self)
|
133
|
-
|
134
|
-
matched = true
|
135
|
-
|
136
|
-
tok = block.call(tok) if block
|
137
|
-
|
138
|
-
@pos = m.end(0)
|
139
|
-
|
140
|
-
return tok if tok.type
|
141
|
-
end
|
142
|
-
|
143
|
-
unless matched
|
144
|
-
if self.class.literals_list[@input[@pos]]
|
145
|
-
tok = LexToken.new(@input[@pos], @input[@pos], self)
|
126
|
+
m = self.class.token_regexps.match(@input[@pos..-1])
|
146
127
|
|
147
|
-
|
128
|
+
if m && ! m[0].empty?
|
129
|
+
val = nil
|
130
|
+
type = nil
|
131
|
+
resolved_type = nil
|
132
|
+
m.names.each do |n|
|
133
|
+
if m[n]
|
134
|
+
type = n.to_sym
|
135
|
+
resolved_type = (n.start_with?('__anonymous_') ? nil : type)
|
136
|
+
val = m[n]
|
137
|
+
break
|
138
|
+
end
|
139
|
+
end
|
148
140
|
|
149
|
-
|
141
|
+
if type
|
142
|
+
tok = build_token(resolved_type, val)
|
143
|
+
@pos += m.end(0)
|
144
|
+
tok = self.class.callables[type].call(tok) if self.class.callables[type]
|
150
145
|
|
151
|
-
|
146
|
+
if tok && tok.type
|
147
|
+
return tok
|
148
|
+
else
|
149
|
+
next
|
150
|
+
end
|
152
151
|
end
|
153
152
|
end
|
153
|
+
|
154
|
+
if self.class.literals_list[@input[@pos]]
|
155
|
+
tok = build_token(@input[@pos], @input[@pos])
|
156
|
+
matched = true
|
157
|
+
@pos += 1
|
158
|
+
return tok
|
159
|
+
end
|
154
160
|
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
if pos == @pos
|
161
|
-
raise LexError.new("Illegal character '#{@input[@pos]}' at index #{@pos}")
|
162
|
-
else
|
163
|
-
return tok if tok && tok.type
|
164
|
-
end
|
165
|
-
else
|
161
|
+
if self.class.error_hander
|
162
|
+
pos = @pos
|
163
|
+
tok = build_token(:error, @input[@pos])
|
164
|
+
tok = self.class.error_hander.call(tok)
|
165
|
+
if pos == @pos
|
166
166
|
raise LexError.new("Illegal character '#{@input[@pos]}' at index #{@pos}")
|
167
|
+
else
|
168
|
+
return tok if tok && tok.type
|
167
169
|
end
|
170
|
+
else
|
171
|
+
raise LexError.new("Illegal character '#{@input[@pos]}' at index #{@pos}")
|
168
172
|
end
|
173
|
+
|
169
174
|
end
|
170
175
|
return nil
|
171
176
|
end
|
172
177
|
|
178
|
+
def build_token(type, value)
|
179
|
+
LexToken.new(type, value, self, @pos, @lineno)
|
180
|
+
end
|
181
|
+
|
182
|
+
def ignore_symbol
|
183
|
+
@pos += 1
|
184
|
+
end
|
185
|
+
|
173
186
|
class << self
|
174
187
|
def terminals
|
175
|
-
self.tokens.map { |t,r,b| t }.compact + self.literals_list.chars.to_a
|
188
|
+
self.tokens.map { |t,r,b| t }.compact + self.literals_list.chars.to_a + self.metatokens_list
|
189
|
+
end
|
190
|
+
|
191
|
+
def callables
|
192
|
+
@callables ||= {}
|
193
|
+
end
|
194
|
+
|
195
|
+
def token_regexps
|
196
|
+
return @token_regexps if @token_regexps
|
197
|
+
|
198
|
+
collector = []
|
199
|
+
self.tokens.each do |name, rx, block|
|
200
|
+
name = "__anonymous_#{block.hash}".to_sym unless name
|
201
|
+
|
202
|
+
self.callables[name] = block
|
203
|
+
|
204
|
+
rxs = rx.to_s
|
205
|
+
named_rxs = "\\A(?<#{name}>#{rxs})"
|
206
|
+
|
207
|
+
collector << named_rxs
|
208
|
+
end
|
209
|
+
|
210
|
+
rxss = collector.join('|')
|
211
|
+
@token_regexps = Regexp.new(rxss)
|
176
212
|
end
|
213
|
+
|
214
|
+
def metatokens_list
|
215
|
+
@metatokens_list ||= []
|
216
|
+
end
|
217
|
+
|
218
|
+
def metatokens(*args)
|
219
|
+
@metatokens_list = args
|
220
|
+
end
|
221
|
+
|
177
222
|
# Returns the list of registered tokens
|
178
223
|
#
|
179
224
|
# @api private
|
data/lib/rly/lex_token.rb
CHANGED
@@ -1,13 +1,14 @@
|
|
1
1
|
module Rly
|
2
2
|
|
3
3
|
class LexToken
|
4
|
-
attr_accessor :value
|
5
|
-
attr_reader :
|
4
|
+
attr_accessor :value, :type, :location_info
|
5
|
+
attr_reader :lexer
|
6
6
|
|
7
|
-
def initialize(type, value, lexer)
|
7
|
+
def initialize(type, value, lexer, pos=0, lineno=0, filename=nil)
|
8
8
|
@type = type
|
9
9
|
@value = value
|
10
10
|
@lexer = lexer
|
11
|
+
@location_info = { pos: pos, lineno: lineno, filename: filename }
|
11
12
|
end
|
12
13
|
|
13
14
|
def to_s
|
data/lib/rly/parse/grammar.rb
CHANGED
@@ -45,7 +45,7 @@ module Rly
|
|
45
45
|
end
|
46
46
|
|
47
47
|
mapname = "#{name.to_s} -> #{symbols.to_s}"
|
48
|
-
raise ArgumentError if @prodmap[mapname]
|
48
|
+
raise ArgumentError.new("Production #{mapname} is already defined!") if @prodmap[mapname]
|
49
49
|
|
50
50
|
index = @productions.count
|
51
51
|
@nonterminals[name] = [] unless @nonterminals[name]
|
@@ -79,6 +79,7 @@ module Rly
|
|
79
79
|
end
|
80
80
|
|
81
81
|
def set_start(symbol=nil)
|
82
|
+
raise RuntimeError.new("No productions defined in #{self}") if @productions.empty?
|
82
83
|
symbol = @productions[1].name unless symbol
|
83
84
|
raise ArgumentError unless @nonterminals[symbol]
|
84
85
|
@productions[0] = Production.new(0, :"S'", [symbol])
|
@@ -119,6 +120,7 @@ module Rly
|
|
119
120
|
while true
|
120
121
|
any_changes = false
|
121
122
|
nonterminals.keys.each do |n|
|
123
|
+
raise RuntimeError.new("Unefined production '#{n}'") unless @prodnames[n]
|
122
124
|
@prodnames[n].each do |p|
|
123
125
|
_first(p.prod).each do |f|
|
124
126
|
unless @first[n].include?(f)
|
data/lib/rly/parse/lr_table.rb
CHANGED
@@ -81,17 +81,17 @@ module Rly
|
|
81
81
|
# Need to decide on shift or reduce here
|
82
82
|
# By default we favor shifting. Need to add
|
83
83
|
# some precedence rules here.
|
84
|
-
sprec, slevel = productions[st_actionp[a].
|
84
|
+
sprec, slevel = productions[st_actionp[a].index].precedence
|
85
85
|
rprec, rlevel = precedence[a] || [:right, 0]
|
86
86
|
if (slevel < rlevel) || ((slevel == rlevel) && (rprec == :left))
|
87
87
|
# We really need to reduce here.
|
88
|
-
st_action[a] = -p.
|
88
|
+
st_action[a] = -p.index
|
89
89
|
st_actionp[a] = p
|
90
90
|
if ! slevel && ! rlevel
|
91
91
|
log.info(" ! shift/reduce conflict for %s resolved as reduce",a)
|
92
92
|
@sr_conflicts << [st, a, 'reduce']
|
93
93
|
end
|
94
|
-
productions[p.
|
94
|
+
productions[p.index].reduced += 1
|
95
95
|
elsif (slevel == rlevel) && (rprec == :nonassoc)
|
96
96
|
st_action[a] = nil
|
97
97
|
else
|
@@ -105,19 +105,19 @@ module Rly
|
|
105
105
|
# Reduce/reduce conflict. In this case, we favor the rule
|
106
106
|
# that was defined first in the grammar file
|
107
107
|
oldp = productions[-r]
|
108
|
-
pp = productions[p.
|
108
|
+
pp = productions[p.index]
|
109
109
|
if oldp.line > pp.line
|
110
|
-
st_action[a] = -p.
|
110
|
+
st_action[a] = -p.index
|
111
111
|
st_actionp[a] = p
|
112
112
|
chosenp = pp
|
113
113
|
rejectp = oldp
|
114
|
-
productions[p.
|
115
|
-
productions[oldp.
|
114
|
+
productions[p.index].reduced += 1
|
115
|
+
productions[oldp.index].reduced -= 1
|
116
116
|
else
|
117
117
|
chosenp,rejectp = oldp,pp
|
118
118
|
end
|
119
119
|
@rr_conflicts << [st, chosenp, rejectp]
|
120
|
-
log.info(" ! reduce/reduce conflict for %s resolved using rule %d (%s)", a, st_actionp[a].
|
120
|
+
log.info(" ! reduce/reduce conflict for %s resolved using rule %d (%s)", a, st_actionp[a].index, st_actionp[a])
|
121
121
|
else
|
122
122
|
raise RuntimeError("Unknown conflict in state #{st}")
|
123
123
|
end
|
@@ -128,8 +128,7 @@ module Rly
|
|
128
128
|
end
|
129
129
|
end
|
130
130
|
end
|
131
|
-
else
|
132
|
-
# i = p.lr_index
|
131
|
+
else
|
133
132
|
a = p.prod[p.lr_index+1] # Get symbol right after the "."
|
134
133
|
if @grammar.terminals.include?(a)
|
135
134
|
g = lr0_goto(i, a)
|
data/lib/rly/parse/production.rb
CHANGED
data/lib/rly/version.rb
CHANGED
data/lib/rly/yacc.rb
CHANGED
@@ -22,6 +22,8 @@ module Rly
|
|
22
22
|
end
|
23
23
|
|
24
24
|
def parse(input=nil, trace=false)
|
25
|
+
@trace = trace
|
26
|
+
|
25
27
|
lookahead = nil
|
26
28
|
lookaheadstack = []
|
27
29
|
actions = @lr_table.lr_action
|
@@ -56,7 +58,7 @@ module Rly
|
|
56
58
|
# is already set, we just use that. Otherwise, we'll pull
|
57
59
|
# the next token off of the lookaheadstack or from the lexer
|
58
60
|
|
59
|
-
puts "State : #{state}" if trace
|
61
|
+
puts "State : #{state}" if @trace
|
60
62
|
|
61
63
|
unless lookahead
|
62
64
|
if lookaheadstack.empty?
|
@@ -70,7 +72,7 @@ module Rly
|
|
70
72
|
end
|
71
73
|
end
|
72
74
|
|
73
|
-
puts "Stack : #{(@symstack[1..-1].map{|s|s.type}.join(' ') + ' ' + lookahead.inspect).lstrip}" if trace
|
75
|
+
puts "Stack : #{(@symstack[1..-1].map{|s|s.type}.join(' ') + ' ' + lookahead.inspect).lstrip}" if @trace
|
74
76
|
|
75
77
|
# Check the action table
|
76
78
|
ltype = lookahead.type
|
@@ -82,7 +84,7 @@ module Rly
|
|
82
84
|
@statestack.push(t)
|
83
85
|
state = t
|
84
86
|
|
85
|
-
puts "Action : Shift and goto state #{t}" if trace
|
87
|
+
puts "Action : Shift and goto state #{t}" if @trace
|
86
88
|
|
87
89
|
@symstack.push(lookahead)
|
88
90
|
lookahead = nil
|
@@ -103,9 +105,9 @@ module Rly
|
|
103
105
|
sym.type = pname
|
104
106
|
sym.value = nil
|
105
107
|
|
106
|
-
if trace
|
108
|
+
if @trace
|
107
109
|
if plen
|
108
|
-
puts "Action : Reduce rule [#{p}] with [#{@symstack[-plen..@symstack.length].map{|s|s.
|
110
|
+
puts "Action : Reduce rule [#{p}] with [#{@symstack[-plen..@symstack.length].map{|s|s.inspect}.join(', ')}] and goto state #{-t}"
|
109
111
|
else
|
110
112
|
puts "Action : Reduce rule [#{p}] with [] and goto state #{-t}"
|
111
113
|
end
|
@@ -127,7 +129,7 @@ module Rly
|
|
127
129
|
@statestack.pop(plen)
|
128
130
|
instance_exec(*targ, &p.block)
|
129
131
|
|
130
|
-
puts "Result : #{targ[0].
|
132
|
+
puts "Result : #{targ[0].inspect}" if @trace
|
131
133
|
|
132
134
|
@symstack.push(sym)
|
133
135
|
state = goto[@statestack[-1]][pname]
|
@@ -160,7 +162,7 @@ module Rly
|
|
160
162
|
@statestack.pop(plen)
|
161
163
|
pslice[0] = instance_exec(*pslice, &p.block)
|
162
164
|
|
163
|
-
puts "Result : #{targ[0].value}" if trace
|
165
|
+
puts "Result : #{targ[0].value}" if @trace
|
164
166
|
|
165
167
|
@symstack.push(sym)
|
166
168
|
state = goto[@statestack[-1]][pname]
|
@@ -185,7 +187,7 @@ module Rly
|
|
185
187
|
n = @symstack[-1]
|
186
188
|
result = n.value
|
187
189
|
|
188
|
-
puts "Done : Returning #{result}" if trace
|
190
|
+
puts "Done : Returning #{result}" if @trace
|
189
191
|
|
190
192
|
return result
|
191
193
|
end
|
@@ -209,12 +211,7 @@ module Rly
|
|
209
211
|
errtoken = nil if errtoken.type == :"$end"
|
210
212
|
|
211
213
|
if self.class.error_handler
|
212
|
-
|
213
|
-
token = @lex.next
|
214
|
-
restart = @restart
|
215
|
-
errtoken.lex = @lex if errtoken
|
216
|
-
|
217
|
-
tok = self.class.error_handler.call(errtoken)
|
214
|
+
tok = self.instance_exec(errtoken, &self.class.error_handler)
|
218
215
|
|
219
216
|
if @errorok
|
220
217
|
# User must have done some kind of panic
|
@@ -226,14 +223,10 @@ module Rly
|
|
226
223
|
end
|
227
224
|
else
|
228
225
|
if errtoken
|
229
|
-
|
230
|
-
|
231
|
-
#if lineno:
|
232
|
-
# sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
|
233
|
-
#else:
|
234
|
-
# sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
|
226
|
+
location_info = lookahead.location_info
|
227
|
+
puts "Fail : Syntax error at #{location_info}, token='#{errtoken}'" if @trace
|
235
228
|
else
|
236
|
-
|
229
|
+
puts "Fail : Parse error in input. EOF" if @trace
|
237
230
|
return nil
|
238
231
|
end
|
239
232
|
end
|
@@ -361,13 +354,19 @@ module Rly
|
|
361
354
|
3
|
362
355
|
end
|
363
356
|
|
357
|
+
def on_error(lambda)
|
358
|
+
@error_handler = lambda
|
359
|
+
end
|
360
|
+
|
364
361
|
def parsed_rules
|
365
362
|
return @parsed_rules if @parsed_rules
|
366
363
|
|
367
364
|
@parsed_rules = []
|
368
365
|
rp = RuleParser.new
|
369
366
|
self.rules.each do |desc, block|
|
370
|
-
rp.parse(desc)
|
367
|
+
rules = rp.parse(desc)
|
368
|
+
raise RuntimeError.new("Failed to parse rules: #{desc}") unless rules
|
369
|
+
rules.each do |(pname, p, prec)|
|
371
370
|
@parsed_rules << [pname, p, prec, block]
|
372
371
|
end
|
373
372
|
end
|
@@ -17,4 +17,15 @@ describe Rly::RuleParser do
|
|
17
17
|
productions[2].should == [:expression, [:expression, '*', :expression], nil]
|
18
18
|
productions[3].should == [:expression, [:expression, '/', :expression], nil]
|
19
19
|
end
|
20
|
+
|
21
|
+
it "tokenizes the rule correctly" do
|
22
|
+
s = 'maybe_superclasses : ":" superclasses |'
|
23
|
+
l = Rly::RuleParser.lexer_class.new(s)
|
24
|
+
|
25
|
+
l.next.type.should == :ID
|
26
|
+
l.next.type.should == ':'
|
27
|
+
l.next.type.should == :LITERAL
|
28
|
+
l.next.type.should == :ID
|
29
|
+
l.next.type.should == '|'
|
30
|
+
end
|
20
31
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rly
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-12-30 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec
|
@@ -43,6 +43,7 @@ files:
|
|
43
43
|
- Rakefile
|
44
44
|
- assets/ply_dump.erb
|
45
45
|
- lib/rly.rb
|
46
|
+
- lib/rly/file_lex.rb
|
46
47
|
- lib/rly/lex.rb
|
47
48
|
- lib/rly/lex_token.rb
|
48
49
|
- lib/rly/parse/grammar.rb
|