rly 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/rly.rb CHANGED
@@ -1,5 +1,6 @@
1
1
  require "rly/version"
2
2
  require "rly/lex"
3
+ require "rly/file_lex"
3
4
  require "rly/yacc"
4
5
  require "rly/parse/rule_parser"
5
6
 
@@ -0,0 +1,45 @@
1
+ require "rly/lex"
2
+
3
+ module Rly
4
+ class FileLex < Lex
5
+ def initialize(fn=nil)
6
+ @inputstack = []
7
+ push_file(fn) if fn
8
+ end
9
+
10
+ def push_file(fn)
11
+ @inputstack.push([@input, @pos, @filename]) if @filename
12
+
13
+ @filename = fn
14
+ @input = open(fn).read
15
+ @pos = 0
16
+ end
17
+
18
+ def pop_file
19
+ (@input, @pos, @filename) = @inputstack.pop
20
+ end
21
+
22
+ def next
23
+ begin
24
+ tok = super
25
+
26
+ if tok
27
+ return tok
28
+ else
29
+ if @inputstack.empty?
30
+ return nil
31
+ else
32
+ pop_file
33
+ redo
34
+ end
35
+ end
36
+ end until tok
37
+ end
38
+
39
+ def build_token(type, value)
40
+ tok = LexToken.new(type, value, self, @pos, @lineno)
41
+ tok.location_info[:filename] = @filename
42
+ tok
43
+ end
44
+ end
45
+ end
@@ -119,61 +119,106 @@ module Rly
119
119
  def next
120
120
  while @pos < @input.length
121
121
  if self.class.ignores_list[@input[@pos]]
122
- @pos += 1
122
+ ignore_symbol
123
123
  next
124
124
  end
125
125
 
126
- matched = false
127
- self.class.tokens.each do |type, rule, block|
128
- m = rule.match(@input, @pos)
129
- next unless m
130
- next unless m.begin(0) == @pos
131
-
132
- tok = LexToken.new(type, m[0], self)
133
-
134
- matched = true
135
-
136
- tok = block.call(tok) if block
137
-
138
- @pos = m.end(0)
139
-
140
- return tok if tok.type
141
- end
142
-
143
- unless matched
144
- if self.class.literals_list[@input[@pos]]
145
- tok = LexToken.new(@input[@pos], @input[@pos], self)
126
+ m = self.class.token_regexps.match(@input[@pos..-1])
146
127
 
147
- matched = true
128
+ if m && ! m[0].empty?
129
+ val = nil
130
+ type = nil
131
+ resolved_type = nil
132
+ m.names.each do |n|
133
+ if m[n]
134
+ type = n.to_sym
135
+ resolved_type = (n.start_with?('__anonymous_') ? nil : type)
136
+ val = m[n]
137
+ break
138
+ end
139
+ end
148
140
 
149
- @pos += 1
141
+ if type
142
+ tok = build_token(resolved_type, val)
143
+ @pos += m.end(0)
144
+ tok = self.class.callables[type].call(tok) if self.class.callables[type]
150
145
 
151
- return tok
146
+ if tok && tok.type
147
+ return tok
148
+ else
149
+ next
150
+ end
152
151
  end
153
152
  end
153
+
154
+ if self.class.literals_list[@input[@pos]]
155
+ tok = build_token(@input[@pos], @input[@pos])
156
+ matched = true
157
+ @pos += 1
158
+ return tok
159
+ end
154
160
 
155
- unless matched
156
- if self.class.error_hander
157
- pos = @pos
158
- tok = LexToken.new(:error, @input[@pos], self)
159
- tok = self.class.error_hander.call(tok)
160
- if pos == @pos
161
- raise LexError.new("Illegal character '#{@input[@pos]}' at index #{@pos}")
162
- else
163
- return tok if tok && tok.type
164
- end
165
- else
161
+ if self.class.error_hander
162
+ pos = @pos
163
+ tok = build_token(:error, @input[@pos])
164
+ tok = self.class.error_hander.call(tok)
165
+ if pos == @pos
166
166
  raise LexError.new("Illegal character '#{@input[@pos]}' at index #{@pos}")
167
+ else
168
+ return tok if tok && tok.type
167
169
  end
170
+ else
171
+ raise LexError.new("Illegal character '#{@input[@pos]}' at index #{@pos}")
168
172
  end
173
+
169
174
  end
170
175
  return nil
171
176
  end
172
177
 
178
+ def build_token(type, value)
179
+ LexToken.new(type, value, self, @pos, @lineno)
180
+ end
181
+
182
+ def ignore_symbol
183
+ @pos += 1
184
+ end
185
+
173
186
  class << self
174
187
  def terminals
175
- self.tokens.map { |t,r,b| t }.compact + self.literals_list.chars.to_a
188
+ self.tokens.map { |t,r,b| t }.compact + self.literals_list.chars.to_a + self.metatokens_list
189
+ end
190
+
191
+ def callables
192
+ @callables ||= {}
193
+ end
194
+
195
+ def token_regexps
196
+ return @token_regexps if @token_regexps
197
+
198
+ collector = []
199
+ self.tokens.each do |name, rx, block|
200
+ name = "__anonymous_#{block.hash}".to_sym unless name
201
+
202
+ self.callables[name] = block
203
+
204
+ rxs = rx.to_s
205
+ named_rxs = "\\A(?<#{name}>#{rxs})"
206
+
207
+ collector << named_rxs
208
+ end
209
+
210
+ rxss = collector.join('|')
211
+ @token_regexps = Regexp.new(rxss)
176
212
  end
213
+
214
+ def metatokens_list
215
+ @metatokens_list ||= []
216
+ end
217
+
218
+ def metatokens(*args)
219
+ @metatokens_list = args
220
+ end
221
+
177
222
  # Returns the list of registered tokens
178
223
  #
179
224
  # @api private
@@ -1,13 +1,14 @@
1
1
  module Rly
2
2
 
3
3
  class LexToken
4
- attr_accessor :value
5
- attr_reader :type, :lexer
4
+ attr_accessor :value, :type, :location_info
5
+ attr_reader :lexer
6
6
 
7
- def initialize(type, value, lexer)
7
+ def initialize(type, value, lexer, pos=0, lineno=0, filename=nil)
8
8
  @type = type
9
9
  @value = value
10
10
  @lexer = lexer
11
+ @location_info = { pos: pos, lineno: lineno, filename: filename }
11
12
  end
12
13
 
13
14
  def to_s
@@ -45,7 +45,7 @@ module Rly
45
45
  end
46
46
 
47
47
  mapname = "#{name.to_s} -> #{symbols.to_s}"
48
- raise ArgumentError if @prodmap[mapname]
48
+ raise ArgumentError.new("Production #{mapname} is already defined!") if @prodmap[mapname]
49
49
 
50
50
  index = @productions.count
51
51
  @nonterminals[name] = [] unless @nonterminals[name]
@@ -79,6 +79,7 @@ module Rly
79
79
  end
80
80
 
81
81
  def set_start(symbol=nil)
82
+ raise RuntimeError.new("No productions defined in #{self}") if @productions.empty?
82
83
  symbol = @productions[1].name unless symbol
83
84
  raise ArgumentError unless @nonterminals[symbol]
84
85
  @productions[0] = Production.new(0, :"S'", [symbol])
@@ -119,6 +120,7 @@ module Rly
119
120
  while true
120
121
  any_changes = false
121
122
  nonterminals.keys.each do |n|
123
+ raise RuntimeError.new("Unefined production '#{n}'") unless @prodnames[n]
122
124
  @prodnames[n].each do |p|
123
125
  _first(p.prod).each do |f|
124
126
  unless @first[n].include?(f)
@@ -81,17 +81,17 @@ module Rly
81
81
  # Need to decide on shift or reduce here
82
82
  # By default we favor shifting. Need to add
83
83
  # some precedence rules here.
84
- sprec, slevel = productions[st_actionp[a].number].prec
84
+ sprec, slevel = productions[st_actionp[a].index].precedence
85
85
  rprec, rlevel = precedence[a] || [:right, 0]
86
86
  if (slevel < rlevel) || ((slevel == rlevel) && (rprec == :left))
87
87
  # We really need to reduce here.
88
- st_action[a] = -p.number
88
+ st_action[a] = -p.index
89
89
  st_actionp[a] = p
90
90
  if ! slevel && ! rlevel
91
91
  log.info(" ! shift/reduce conflict for %s resolved as reduce",a)
92
92
  @sr_conflicts << [st, a, 'reduce']
93
93
  end
94
- productions[p.number].reduced += 1
94
+ productions[p.index].reduced += 1
95
95
  elsif (slevel == rlevel) && (rprec == :nonassoc)
96
96
  st_action[a] = nil
97
97
  else
@@ -105,19 +105,19 @@ module Rly
105
105
  # Reduce/reduce conflict. In this case, we favor the rule
106
106
  # that was defined first in the grammar file
107
107
  oldp = productions[-r]
108
- pp = productions[p.number]
108
+ pp = productions[p.index]
109
109
  if oldp.line > pp.line
110
- st_action[a] = -p.number
110
+ st_action[a] = -p.index
111
111
  st_actionp[a] = p
112
112
  chosenp = pp
113
113
  rejectp = oldp
114
- productions[p.number].reduced += 1
115
- productions[oldp.number].reduced -= 1
114
+ productions[p.index].reduced += 1
115
+ productions[oldp.index].reduced -= 1
116
116
  else
117
117
  chosenp,rejectp = oldp,pp
118
118
  end
119
119
  @rr_conflicts << [st, chosenp, rejectp]
120
- log.info(" ! reduce/reduce conflict for %s resolved using rule %d (%s)", a, st_actionp[a].number, st_actionp[a])
120
+ log.info(" ! reduce/reduce conflict for %s resolved using rule %d (%s)", a, st_actionp[a].index, st_actionp[a])
121
121
  else
122
122
  raise RuntimeError("Unknown conflict in state #{st}")
123
123
  end
@@ -128,8 +128,7 @@ module Rly
128
128
  end
129
129
  end
130
130
  end
131
- else # <-- level ok
132
- # i = p.lr_index
131
+ else
133
132
  a = p.prod[p.lr_index+1] # Get symbol right after the "."
134
133
  if @grammar.terminals.include?(a)
135
134
  g = lr0_goto(i, a)
@@ -27,6 +27,10 @@ module Rly
27
27
  @prod.length
28
28
  end
29
29
 
30
+ def line
31
+ @line ||= 0
32
+ end
33
+
30
34
  def lr0_added
31
35
  @lr0_added ||= 0
32
36
  end
@@ -49,6 +49,9 @@ module Rly
49
49
  @grammar.add_production(:rules, [:rule]) do |rl, r|
50
50
  rl.value = [r.value]
51
51
  end
52
+ @grammar.add_production(:rules, []) do |rl|
53
+ rl.value = [[]]
54
+ end
52
55
  @grammar.add_production(:rule, [:tokens]) do |r, tok|
53
56
  r.value = tok.value
54
57
  end
@@ -7,7 +7,7 @@ module Rly
7
7
  end
8
8
 
9
9
  def inspect
10
- "#<YaccSymbol #{@type} '#{@value}'>"
10
+ "#<YaccSymbol #{@type} '#{to_s}'>"
11
11
  end
12
12
  end
13
13
  end
@@ -1,4 +1,4 @@
1
1
  module Rly
2
- VERSION = "0.2.1"
2
+ VERSION = "0.2.2"
3
3
  PLY_BASE_VERSION = "3.4"
4
4
  end
@@ -22,6 +22,8 @@ module Rly
22
22
  end
23
23
 
24
24
  def parse(input=nil, trace=false)
25
+ @trace = trace
26
+
25
27
  lookahead = nil
26
28
  lookaheadstack = []
27
29
  actions = @lr_table.lr_action
@@ -56,7 +58,7 @@ module Rly
56
58
  # is already set, we just use that. Otherwise, we'll pull
57
59
  # the next token off of the lookaheadstack or from the lexer
58
60
 
59
- puts "State : #{state}" if trace
61
+ puts "State : #{state}" if @trace
60
62
 
61
63
  unless lookahead
62
64
  if lookaheadstack.empty?
@@ -70,7 +72,7 @@ module Rly
70
72
  end
71
73
  end
72
74
 
73
- puts "Stack : #{(@symstack[1..-1].map{|s|s.type}.join(' ') + ' ' + lookahead.inspect).lstrip}" if trace
75
+ puts "Stack : #{(@symstack[1..-1].map{|s|s.type}.join(' ') + ' ' + lookahead.inspect).lstrip}" if @trace
74
76
 
75
77
  # Check the action table
76
78
  ltype = lookahead.type
@@ -82,7 +84,7 @@ module Rly
82
84
  @statestack.push(t)
83
85
  state = t
84
86
 
85
- puts "Action : Shift and goto state #{t}" if trace
87
+ puts "Action : Shift and goto state #{t}" if @trace
86
88
 
87
89
  @symstack.push(lookahead)
88
90
  lookahead = nil
@@ -103,9 +105,9 @@ module Rly
103
105
  sym.type = pname
104
106
  sym.value = nil
105
107
 
106
- if trace
108
+ if @trace
107
109
  if plen
108
- puts "Action : Reduce rule [#{p}] with [#{@symstack[-plen..@symstack.length].map{|s|s.value}.join(', ')}] and goto state #{-t}"
110
+ puts "Action : Reduce rule [#{p}] with [#{@symstack[-plen..@symstack.length].map{|s|s.inspect}.join(', ')}] and goto state #{-t}"
109
111
  else
110
112
  puts "Action : Reduce rule [#{p}] with [] and goto state #{-t}"
111
113
  end
@@ -127,7 +129,7 @@ module Rly
127
129
  @statestack.pop(plen)
128
130
  instance_exec(*targ, &p.block)
129
131
 
130
- puts "Result : #{targ[0].value}" if trace
132
+ puts "Result : #{targ[0].inspect}" if @trace
131
133
 
132
134
  @symstack.push(sym)
133
135
  state = goto[@statestack[-1]][pname]
@@ -160,7 +162,7 @@ module Rly
160
162
  @statestack.pop(plen)
161
163
  pslice[0] = instance_exec(*pslice, &p.block)
162
164
 
163
- puts "Result : #{targ[0].value}" if trace
165
+ puts "Result : #{targ[0].value}" if @trace
164
166
 
165
167
  @symstack.push(sym)
166
168
  state = goto[@statestack[-1]][pname]
@@ -185,7 +187,7 @@ module Rly
185
187
  n = @symstack[-1]
186
188
  result = n.value
187
189
 
188
- puts "Done : Returning #{result}" if trace
190
+ puts "Done : Returning #{result}" if @trace
189
191
 
190
192
  return result
191
193
  end
@@ -209,12 +211,7 @@ module Rly
209
211
  errtoken = nil if errtoken.type == :"$end"
210
212
 
211
213
  if self.class.error_handler
212
- errok = @errok
213
- token = @lex.next
214
- restart = @restart
215
- errtoken.lex = @lex if errtoken
216
-
217
- tok = self.class.error_handler.call(errtoken)
214
+ tok = self.instance_exec(errtoken, &self.class.error_handler)
218
215
 
219
216
  if @errorok
220
217
  # User must have done some kind of panic
@@ -226,14 +223,10 @@ module Rly
226
223
  end
227
224
  else
228
225
  if errtoken
229
- # if hasattr(errtoken,"lineno"): lineno = lookahead.lineno
230
- # else: lineno = 0
231
- #if lineno:
232
- # sys.stderr.write("yacc: Syntax error at line %d, token=%s\n" % (lineno, errtoken.type))
233
- #else:
234
- # sys.stderr.write("yacc: Syntax error, token=%s" % errtoken.type)
226
+ location_info = lookahead.location_info
227
+ puts "Fail : Syntax error at #{location_info}, token='#{errtoken}'" if @trace
235
228
  else
236
- # sys.stderr.write("yacc: Parse error in input. EOF\n")
229
+ puts "Fail : Parse error in input. EOF" if @trace
237
230
  return nil
238
231
  end
239
232
  end
@@ -361,13 +354,19 @@ module Rly
361
354
  3
362
355
  end
363
356
 
357
+ def on_error(lambda)
358
+ @error_handler = lambda
359
+ end
360
+
364
361
  def parsed_rules
365
362
  return @parsed_rules if @parsed_rules
366
363
 
367
364
  @parsed_rules = []
368
365
  rp = RuleParser.new
369
366
  self.rules.each do |desc, block|
370
- rp.parse(desc).each do |(pname, p, prec)|
367
+ rules = rp.parse(desc)
368
+ raise RuntimeError.new("Failed to parse rules: #{desc}") unless rules
369
+ rules.each do |(pname, p, prec)|
371
370
  @parsed_rules << [pname, p, prec, block]
372
371
  end
373
372
  end
@@ -17,4 +17,15 @@ describe Rly::RuleParser do
17
17
  productions[2].should == [:expression, [:expression, '*', :expression], nil]
18
18
  productions[3].should == [:expression, [:expression, '/', :expression], nil]
19
19
  end
20
+
21
+ it "tokenizes the rule correctly" do
22
+ s = 'maybe_superclasses : ":" superclasses |'
23
+ l = Rly::RuleParser.lexer_class.new(s)
24
+
25
+ l.next.type.should == :ID
26
+ l.next.type.should == ':'
27
+ l.next.type.should == :LITERAL
28
+ l.next.type.should == :ID
29
+ l.next.type.should == '|'
30
+ end
20
31
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rly
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-11-18 00:00:00.000000000 Z
12
+ date: 2012-12-30 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rspec
@@ -43,6 +43,7 @@ files:
43
43
  - Rakefile
44
44
  - assets/ply_dump.erb
45
45
  - lib/rly.rb
46
+ - lib/rly/file_lex.rb
46
47
  - lib/rly/lex.rb
47
48
  - lib/rly/lex_token.rb
48
49
  - lib/rly/parse/grammar.rb