rly 0.2.1 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
data/lib/rly.rb CHANGED
@@ -1,5 +1,6 @@
1
1
  require "rly/version"
2
2
  require "rly/lex"
3
+ require "rly/file_lex"
3
4
  require "rly/yacc"
4
5
  require "rly/parse/rule_parser"
5
6
 
@@ -0,0 +1,45 @@
1
+ require "rly/lex"
2
+
3
+ module Rly
4
+ class FileLex < Lex
5
+ def initialize(fn=nil)
6
+ @inputstack = []
7
+ push_file(fn) if fn
8
+ end
9
+
10
+ def push_file(fn)
11
+ @inputstack.push([@input, @pos, @filename]) if @filename
12
+
13
+ @filename = fn
14
+ @input = open(fn).read
15
+ @pos = 0
16
+ end
17
+
18
+ def pop_file
19
+ (@input, @pos, @filename) = @inputstack.pop
20
+ end
21
+
22
+ def next
23
+ begin
24
+ tok = super
25
+
26
+ if tok
27
+ return tok
28
+ else
29
+ if @inputstack.empty?
30
+ return nil
31
+ else
32
+ pop_file
33
+ redo
34
+ end
35
+ end
36
+ end until tok
37
+ end
38
+
39
+ def build_token(type, value)
40
+ tok = LexToken.new(type, value, self, @pos, @lineno)
41
+ tok.location_info[:filename] = @filename
42
+ tok
43
+ end
44
+ end
45
+ end
@@ -119,61 +119,106 @@ module Rly
119
119
  def next
120
120
  while @pos < @input.length
121
121
  if self.class.ignores_list[@input[@pos]]
122
- @pos += 1
122
+ ignore_symbol
123
123
  next
124
124
  end
125
125
 
126
- matched = false
127
- self.class.tokens.each do |type, rule, block|
128
- m = rule.match(@input, @pos)
129
- next unless m
130
- next unless m.begin(0) == @pos
131
-
132
- tok = LexToken.new(type, m[0], self)
133
-
134
- matched = true
135
-
136
- tok = block.call(tok) if block
137
-
138
- @pos = m.end(0)
139
-
140
- return tok if tok.type
141
- end
142
-
143
- unless matched
144
- if self.class.literals_list[@input[@pos]]
145
- tok = LexToken.new(@input[@pos], @input[@pos], self)
126
+ m = self.class.token_regexps.match(@input[@pos..-1])
146
127
 
147
- matched = true
128
+ if m && ! m[0].empty?
129
+ val = nil
130
+ type = nil
131
+ resolved_type = nil
132
+ m.names.each do |n|
133
+ if m[n]
134
+ type = n.to_sym
135
+ resolved_type = (n.start_with?('__anonymous_') ? nil : type)
136
+ val = m[n]
137
+ break
138
+ end
139
+ end
148
140
 
149
- @pos += 1
141
+ if type
142
+ tok = build_token(resolved_type, val)
143
+ @pos += m.end(0)
144
+ tok = self.class.callables[type].call(tok) if self.class.callables[type]
150
145
 
151
- return tok
146
+ if tok && tok.type
147
+ return tok
148
+ else
149
+ next
150
+ end
152
151
  end
153
152
  end
153
+
154
+ if self.class.literals_list[@input[@pos]]
155
+ tok = build_token(@input[@pos], @input[@pos])
156
+ matched = true
157
+ @pos += 1
158
+ return tok
159
+ end
154
160
 
155
- unless matched
156
- if self.class.error_hander
157
- pos = @pos
158
- tok = LexToken.new(:error, @input[@pos], self)
159
- tok = self.class.error_hander.call(tok)
160
- if pos == @pos
161
- raise LexError.new("Illegal character '#{@input[@pos]}' at index #{@pos}")
162
- else
163
- return tok if tok && tok.type
164
- end
165
- else
161
+ if self.class.error_hander
162
+ pos = @pos
163
+ tok = build_token(:error, @input[@pos])
164
+ tok = self.class.error_hander.call(tok)
165
+ if pos == @pos
166
166
  raise LexError.new("Illegal character '#{@input[@pos]}' at index #{@pos}")
167
+ else
168
+ return tok if tok && tok.type
167
169
  end
170
+ else
171
+ raise LexError.new("Illegal character '#{@input[@pos]}' at index #{@pos}")
168
172
  end
173
+
169
174
  end
170
175
  return nil
171
176
  end
172
177
 
178
+ def build_token(type, value)
179
+ LexToken.new(type, value, self, @pos, @lineno)
180
+ end
181
+
182
+ def ignore_symbol
183
+ @pos += 1
184
+ end
185
+
173
186
  class << self
174
187
  def terminals
175
- self.tokens.map { |t,r,b| t }.compact + self.literals_list.chars.to_a
188
+ self.tokens.map { |t,r,b| t }.compact + self.literals_list.chars.to_a + self.metatokens_list
189
+ end
190
+
191
+ def callables
192
+ @callables ||= {}
193
+ end
194
+
195
+ def token_regexps
196
+ return @token_regexps if @token_regexps
197
+
198
+ collector = []
199
+ self.tokens.each do |name, rx, block|
200
+ name = "__anonymous_#{block.hash}".to_sym unless name
201
+
202
+ self.callables[name] = block
203
+
204
+ rxs = rx.to_s
205
+ named_rxs = "\\A(?<#{name}>#{rxs})"
206
+
207
+ collector << named_rxs
208
+ end
209
+
210
+ rxss = collector.join('|')
211
+ @token_regexps = Regexp.new(rxss)
176
212
  end
213
+
214
+ def metatokens_list
215
+ @metatokens_list ||= []
216
+ end
217
+
218
+ def metatokens(*args)
219
+ @metatokens_list = args
220
+ end
221
+
177
222
  # Returns the list of registered tokens
178
223
  #
179
224
  # @api private
@@ -1,13 +1,14 @@
1
1
  module Rly
2
2
 
3
3
  class LexToken
4
- attr_accessor :value
5
- attr_reader :type, :lexer
4
+ attr_accessor :value, :type, :location_info
5
+ attr_reader :lexer
6
6
 
7
- def initialize(type, value, lexer)
7
+ def initialize(type, value, lexer, pos=0, lineno=0, filename=nil)
8
8
  @type = type
9
9
  @value = value
10
10
  @lexer = lexer
11
+ @location_info = { pos: pos, lineno: lineno, filename: filename }
11
12
  end
12
13
 
13
14
  def to_s
@@ -45,7 +45,7 @@ module Rly
45
45
  end
46
46
 
47
47
  mapname = "#{name.to_s} -> #{symbols.to_s}"
48
- raise ArgumentError if @prodmap[mapname]
48
+ raise ArgumentError.new("Production #{mapname} is already defined!") if @prodmap[mapname]
49
49
 
50
50
  index = @productions.count
51
51
  @nonterminals[name] = [] unless @nonterminals[name]
@@ -79,6 +79,7 @@ module Rly
79
79
  end
80
80
 
81
81
  def set_start(symbol=nil)
82
+ raise RuntimeError.new("No productions defined in #{self}") if @productions.empty?
82
83
  symbol = @productions[1].name unless symbol
83
84
  raise ArgumentError unless @nonterminals[symbol]
84
85
  @productions[0] = Production.new(0, :"S'", [symbol])
@@ -119,6 +120,7 @@ module Rly
119
120
  while true
120
121
  any_changes = false
121
122
  nonterminals.keys.each do |n|
123
+ raise RuntimeError.new("Unefined production '#{n}'") unless @prodnames[n]
122
124
  @prodnames[n].each do |p|
123
125
  _first(p.prod).each do |f|
124
126
  unless @first[n].include?(f)
@@ -81,17 +81,17 @@ module Rly
81
81
  # Need to decide on shift or reduce here
82
82
  # By default we favor shifting. Need to add
83
83
  # some precedence rules here.
84
- sprec, slevel = productions[st_actionp[a].number].prec
84
+ sprec, slevel = productions[st_actionp[a].index].precedence
85
85
  rprec, rlevel = precedence[a] || [:right, 0]
86
86
  if (slevel < rlevel) || ((slevel == rlevel) && (rprec == :left))
87
87
  # We really need to reduce here.
88
- st_action[a] = -p.number
88
+ st_action[a] = -p.index
89
89
  st_actionp[a] = p
90
90
  if ! slevel && ! rlevel
91
91
  log.info(" ! shift/reduce conflict for %s resolved as reduce",a)
92
92
  @sr_conflicts << [st, a, 'reduce']
93
93
  end
94
- productions[p.number].reduced += 1
94
+ productions[p.index].reduced += 1
95
95
  elsif (slevel == rlevel) && (rprec == :nonassoc)
96
96
  st_action[a] = nil
97
97
  else
@@ -105,19 +105,19 @@ module Rly
105
105
  # Reduce/reduce conflict. In this case, we favor the rule
106
106
  # that was defined first in the grammar file
107
107
  oldp = productions[-r]
108
- pp = productions[p.number]
108
+ pp = productions[p.index]
109
109
  if oldp.line > pp.line
110
- st_action[a] = -p.number
110
+ st_action[a] = -p.index
111
111
  st_actionp[a] = p
112
112
  chosenp = pp
113
113
  rejectp = oldp
114
- productions[p.number].reduced += 1
115
- productions[oldp.number].reduced -= 1
114
+ productions[p.index].reduced += 1
115
+ productions[oldp.index].reduced -= 1
116
116
  else
117
117
  chosenp,rejectp = oldp,pp
118
118
  end
119
119
  @rr_conflicts << [st, chosenp, rejectp]
120
- log.info(" ! reduce/reduce conflict for %s resolved using rule %d (%s)", a, st_actionp[a].number, st_actionp[a])
120
+ log.info(" ! reduce/reduce conflict for %s resolved using rule %d (%s)", a, st_actionp[a].index, st_actionp[a])
121
121
  else
122
122
  raise RuntimeError("Unknown conflict in state #{st}")
123
123
  end
@@ -128,8 +128,7 @@ module Rly
128
128
  end
129
129
  end
130
130
  end
131
- else # <-- level ok
132
- # i = p.lr_index
131
+ else
133
132
  a = p.prod[p.lr_index+1] # Get symbol right after the "."
134
133
  if @grammar.terminals.include?(a)
135
134
  g = lr0_goto(i, a)
@@ -27,6 +27,10 @@ module Rly
27
27
  @prod.length
28
28
  end
29
29
 
30
+ def line
31
+ @line ||= 0
32
+ end
33
+
30
34
  def lr0_added
31
35
  @lr0_added ||= 0
32
36
  end
@@ -49,6 +49,9 @@ module Rly
49
49
  @grammar.add_production(:rules, [:rule]) do |rl, r|
50
50
  rl.value = [r.value]
51
51
  end
52
+ @grammar.add_production(:rules, []) do |rl|
53
+ rl.value = [[]]
54
+ end
52
55
  @grammar.add_production(:rule, [:tokens]) do |r, tok|
53
56
  r.value = tok.value
54
57
  end
@@ -7,7 +7,7 @@ module Rly
7
7
  end
8
8
 
9
9
  def inspect
10
- "#<YaccSymbol #{@type} '#{@value}'>"
10
+ "#<YaccSymbol #{@type} '#{to_s}'>"
11
11
  end
12
12
  end
13
13
  end
@@ -1,4 +1,4 @@
1
1
  module Rly
2
- VERSION = "0.2.1"
2
+ VERSION = "0.2.2"
3
3
  PLY_BASE_VERSION = "3.4"
4
4
  end
@@ -22,6 +22,8 @@ module Rly
22
22
  end
23
23
 
24
24
  def parse(input=nil, trace=false)
25
+ @trace = trace
26
+
25
27
  lookahead = nil
26
28
  lookaheadstack = []
27
29
  actions = @lr_table.lr_action
@@ -56,7 +58,7 @@ module Rly
56
58
  # is already set, we just use that. Otherwise, we'll pull
57
59
  # the next token off of the lookaheadstack or from the lexer
58
60
 
59
- puts "State : #{state}" if trace
61
+ puts "State : #{state}" if @trace
60
62
 
61
63
  unless lookahead
62
64
  if lookaheadstack.empty?
@@ -70,7 +72,7 @@ module Rly
70
72
  end
71
73
  end
72
74
 
73
- puts "Stack : #{(@symstack[1..-1].map{|s|s.type}.join(' ') + ' ' + lookahead.inspect).lstrip}" if trace
75
+ puts "Stack : #{(@symstack[1..-1].map{|s|s.type}.join(' ') + ' ' + lookahead.inspect).lstrip}" if @trace
74
76
 
75
77
  # Check the action table
76
78
  ltype = lookahead.type
@@ -82,7 +84,7 @@ module Rly
82
84
  @statestack.push(t)
83
85
  state = t
84
86
 
85
- puts "Action : Shift and goto state #{t}" if trace
87
+ puts "Action : Shift and goto state #{t}" if @trace
86
88
 
87
89
  @symstack.push(lookahead)
88
90
  lookahead = nil
@@ -103,9 +105,9 @@ module Rly
103
105
  sym.type = pname
104
106
  sym.value = nil
105
107
 
106
- if trace
108
+ if @trace
107
109
  if plen
108
- puts "Action : Reduce rule [#{p}] with [#{@symstack[-plen..@symstack.length].map{|s|s.value}.join(', ')}] and goto state #{-t}"
110
+ puts "Action : Reduce rule [#{p}] with [#{@symstack[-plen..@symstack.length].map{|s|s.inspect}.join(', ')}] and goto state #{-t}"
109
111
  else
110
112
  puts "Action : Reduce rule [#{p}] with [] and goto state #{-t}"
111
113
  end
@@ -127,7 +129,7 @@ module Rly
127
129
  @statestack.pop(plen)
128
130
  instance_exec(*targ, &p.block)
129
131
 
130
- puts "Result : #{targ[0].value}" if trace
132
+ puts "Result : #{targ[0].inspect}" if @trace
131
133
 
132
134
  @symstack.push(sym)
133
135
  state = goto[@statestack[-1]][pname]
@@ -160,7 +162,7 @@ module Rly
160
162
  @statestack.pop(plen)
161
163
  pslice[0] = instance_exec(*pslice, &p.block)
162
164
 
163
- puts "Result : #{targ[0].value}" if trace
165
+ puts "Result : #{targ[0].value}" if @trace
164
166
 
165
167
  @symstack.push(sym)
166
168
  state = goto[@statestack[-1]][pname]
@@ -185,7 +187,7 @@ module Rly
185
187
  n = @symstack[-1]
186
188
  result = n.value
187
189
 
188
- puts "Done : Returning #{result}" if trace
190
+ puts "Done : Returning #{result}" if @trace
189
191
 
190
192
  return result
191
193
  end
@@ -209,12 +211,7 @@ module Rly
209
211
  errtoken = nil if errtoken.type == :"$end"
210
212
 
211
213
  if self.class.error_handler
212
- errok = @errok
213
- token = @lex.next
214
- restart = @restart
215
- errtoken.lex = @lex if errtoken
216
-
217
- tok = self.class.error_handler.call(errtoken)
214
+ tok = self.instance_exec(errtoken, &self.class.error_handler)
218
215
 
219
216
  if @errorok
220
217
  # User must have done some kind of panic
@@ -226,14 +223,10 @@ module Rly
226
223
  end
227
224
  else
228
225
  if errtoken
229
- # if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
230
- # else: lineno = 0
231
- #if lineno:
232
- # sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
233
- #else:
234
- # sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
226
+ location_info = lookahead.location_info
227
+ puts "Fail : Syntax error at #{location_info}, token='#{errtoken}'" if @trace
235
228
  else
236
- # sys.stderr.write("yacc: Parse error in input. EOF\n")
229
+ puts "Fail : Parse error in input. EOF" if @trace
237
230
  return nil
238
231
  end
239
232
  end
@@ -361,13 +354,19 @@ module Rly
361
354
  3
362
355
  end
363
356
 
357
+ def on_error(lambda)
358
+ @error_handler = lambda
359
+ end
360
+
364
361
  def parsed_rules
365
362
  return @parsed_rules if @parsed_rules
366
363
 
367
364
  @parsed_rules = []
368
365
  rp = RuleParser.new
369
366
  self.rules.each do |desc, block|
370
- rp.parse(desc).each do |(pname, p, prec)|
367
+ rules = rp.parse(desc)
368
+ raise RuntimeError.new("Failed to parse rules: #{desc}") unless rules
369
+ rules.each do |(pname, p, prec)|
371
370
  @parsed_rules << [pname, p, prec, block]
372
371
  end
373
372
  end
@@ -17,4 +17,15 @@ describe Rly::RuleParser do
17
17
  productions[2].should == [:expression, [:expression, '*', :expression], nil]
18
18
  productions[3].should == [:expression, [:expression, '/', :expression], nil]
19
19
  end
20
+
21
+ it "tokenizes the rule correctly" do
22
+ s = 'maybe_superclasses : ":" superclasses |'
23
+ l = Rly::RuleParser.lexer_class.new(s)
24
+
25
+ l.next.type.should == :ID
26
+ l.next.type.should == ':'
27
+ l.next.type.should == :LITERAL
28
+ l.next.type.should == :ID
29
+ l.next.type.should == '|'
30
+ end
20
31
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rly
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-11-18 00:00:00.000000000 Z
12
+ date: 2012-12-30 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rspec
@@ -43,6 +43,7 @@ files:
43
43
  - Rakefile
44
44
  - assets/ply_dump.erb
45
45
  - lib/rly.rb
46
+ - lib/rly/file_lex.rb
46
47
  - lib/rly/lex.rb
47
48
  - lib/rly/lex_token.rb
48
49
  - lib/rly/parse/grammar.rb