rly 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -2,8 +2,8 @@
2
2
 
3
3
  # Rly
4
4
 
5
- Rly is a lexer and parser generator for ruby, based on ideas and solutions of
6
- Python's [Ply](http://www.dabeaz.com/ply/).
5
+ Rly is a lexer and parser generator for ruby (O RLY?), based on ideas and solutions of
6
+ Python's [Ply](http://www.dabeaz.com/ply/) (in some places it's a total rip off actually).
7
7
 
8
8
  ## Installation
9
9
 
@@ -0,0 +1,15 @@
1
+ Created by PLY version 3.4 (http://www.dabeaz.com/ply)
2
+
3
+ Grammar
4
+
5
+ <% for p in g.productions %>Rule <%= sprintf("%-5d", p.index) %> <%= p %>
6
+ <% end %>
7
+ Terminals, with rules where they appear
8
+
9
+ <% @t = g.terminals.keys.map {|k| k.to_s }.sort; for t in @t %><%= sprintf("%-20s : %s", t, (g.terminals[t] || g.terminals[t.to_sym]).join(' ')) %>
10
+ <% end %>
11
+ Nonterminals, with rules where they appear
12
+
13
+ <% @t = g.nonterminals.keys.sort; for t in @t %><%= sprintf("%-20s : %s", t, g.nonterminals[t].join(' ')) %>
14
+ <% end %>
15
+ <%= backlog %>
data/lib/rly.rb CHANGED
@@ -1,5 +1,7 @@
1
1
  require "rly/version"
2
2
  require "rly/lex"
3
+ require "rly/yacc"
4
+ require "rly/parse/rule_parser"
3
5
 
4
6
  module Rly
5
7
  # Your code goes here...
@@ -12,10 +12,10 @@ module Rly
12
12
  # configuration (check the methods documentation for details).
13
13
  #
14
14
  # Once you got your lexer configured, you can create its instances passing a
15
- # String to be tokenized. You can then use either {#each} method or common
16
- # *Enumerable* methods to get the processed tokens.
15
+ # String to be tokenized. You can then use {#next} method to get tokens. If you
16
+ # have more string to tokenize, you can append it to input buffer at any time with
17
+ # {#input}.
17
18
  class Lex
18
- include Enumerable
19
19
 
20
20
  # Tracks the current line number for generated tokens
21
21
  #
@@ -55,21 +55,46 @@ module Rly
55
55
  # end
56
56
  #
57
57
  # lex = MyLexer.new("hello WORLD")
58
- # lex.each do |tok|
59
- # puts "#{tok.type} -> #{tok.value}" #=> "LOWERS -> hello"
60
- # #=> "UPPERS -> WORLD"
61
- # end
58
+ # t = lex.next
59
+ # puts "#{tok.type} -> #{tok.value}" #=> "LOWERS -> hello"
60
+ # t = lex.next
61
+ # puts "#{tok.type} -> #{tok.value}" #=> "UPPERS -> WORLD"
62
+ # t = lex.next # => nil
62
63
  def initialize(input="")
63
64
  @input = input
64
65
  @pos = 0
65
66
  @lineno = 0
66
67
  end
67
68
 
69
+ def inspect
70
+ "#<#{self.class} pos=#{@pos} len=#{@input.length} lineno=#{@lineno}>"
71
+ end
72
+
73
+ # Appends string to input buffer
74
+ #
75
+ # The given string is appended to input buffer, further {#next} calls will
76
+ # tokenize it as usual.
77
+ #
78
+ # @api public
79
+ #
80
+ # @example
81
+ # lex = MyLexer.new("hello")
82
+ #
83
+ # t = lex.next
84
+ # puts "#{tok.type} -> #{tok.value}" #=> "LOWERS -> hello"
85
+ # t = lex.next # => nil
86
+ # lex.input("WORLD")
87
+ # t = lex.next
88
+ # puts "#{tok.type} -> #{tok.value}" #=> "UPPERS -> WORLD"
89
+ # t = lex.next # => nil
90
+ def input(input)
91
+ @input << input
92
+ end
93
+
68
94
  # Processes the next token in input
69
95
  #
70
- # This is the main interface to lexer. If block is given, {#each} behaves like
71
- # an usual enumerator, yielding the next token. If there is no block, {#each}
72
- # returns an Enumerator object.
96
+ # This is the main interface to lexer. It returns next available token or **nil**
97
+ # if there are no more tokens available in the input string.
73
98
  #
74
99
  # {#each} Raises {LexError} if the input cannot be processed. This happens if
75
100
  # there were no matches by 'token' rules and no matches by 'literals' rule.
@@ -78,23 +103,19 @@ module Rly
78
103
  # after returning from error handler is still unchanged.
79
104
  #
80
105
  # @api public
81
- # @yieldparam tok [LexToken] next processed token
82
106
  # @raise [LexError] if the input cannot be processed
83
- # @return [Enumerator] if block is not given
84
- # @return [nil] if block is given
107
+ # @return [LexToken] if the next chunk of input was processed successfully
108
+ # @return [nil] if there are no more tokens available in input
85
109
  #
86
110
  # @example
87
111
  # lex = MyLexer.new("hello WORLD")
88
112
  #
89
- # lex.each #=> #<Enumerator: ...>
90
- #
91
- # lex.each do |tok|
92
- # puts "#{tok.type} -> #{tok.value}" #=> "LOWERS -> hello"
93
- # #=> "UPPERS -> WORLD"
94
- # end
95
- def each
96
- return self.to_enum unless block_given?
97
-
113
+ # t = lex.next
114
+ # puts "#{tok.type} -> #{tok.value}" #=> "LOWERS -> hello"
115
+ # t = lex.next
116
+ # puts "#{tok.type} -> #{tok.value}" #=> "UPPERS -> WORLD"
117
+ # t = lex.next # => nil
118
+ def next
98
119
  while @pos < @input.length
99
120
  if self.class.ignores_list[@input[@pos]]
100
121
  @pos += 1
@@ -105,15 +126,17 @@ module Rly
105
126
  self.class.tokens.each do |type, rule, block|
106
127
  m = rule.match(@input, @pos)
107
128
  next unless m
129
+ next unless m.begin(0) == @pos
108
130
 
109
131
  tok = LexToken.new(type, m[0], self)
110
132
 
111
133
  matched = true
112
134
 
113
135
  tok = block.call(tok) if block
114
- yield tok if tok.type
115
136
 
116
137
  @pos = m.end(0)
138
+
139
+ return tok if tok.type
117
140
  end
118
141
 
119
142
  unless matched
@@ -121,8 +144,10 @@ module Rly
121
144
  tok = LexToken.new(@input[@pos], @input[@pos], self)
122
145
 
123
146
  matched = true
124
- yield tok
147
+
125
148
  @pos += 1
149
+
150
+ return tok
126
151
  end
127
152
  end
128
153
 
@@ -134,16 +159,20 @@ module Rly
134
159
  if pos == @pos
135
160
  raise LexError.new("Illegal character '#{@input[@pos]}' at index #{@pos}")
136
161
  else
137
- yield tok if tok && tok.type
162
+ return tok if tok && tok.type
138
163
  end
139
164
  else
140
165
  raise LexError.new("Illegal character '#{@input[@pos]}' at index #{@pos}")
141
166
  end
142
167
  end
143
168
  end
169
+ return nil
144
170
  end
145
171
 
146
172
  class << self
173
+ def terminals
174
+ self.tokens.map { |t,r,b| t }.compact + self.literals_list.chars.to_a
175
+ end
147
176
  # Returns the list of registered tokens
148
177
  #
149
178
  # @api private
@@ -9,5 +9,13 @@ module Rly
9
9
  @value = value
10
10
  @lexer = lexer
11
11
  end
12
+
13
+ def to_s
14
+ @value.to_s
15
+ end
16
+
17
+ def inspect
18
+ "#<LexToken #{@type} '#{@value}'>"
19
+ end
12
20
  end
13
21
  end
@@ -0,0 +1,211 @@
1
+ require "rly/parse/production"
2
+ require "rly/parse/lr_item"
3
+
4
+ module Rly
5
+ class Grammar
6
+ attr_reader :terminals, :nonterminals, :productions, :prodnames, :start, :precedence
7
+
8
+ def initialize(terminals)
9
+ @productions = [nil]
10
+ @prodnames = {}
11
+ @prodmap = {}
12
+
13
+ @terminals = {}
14
+ terminals.each do |t|
15
+ raise ArgumentError unless t.upcase == t
16
+ @terminals[t] = []
17
+ end
18
+ @terminals[:error] = []
19
+
20
+ @nonterminals = {}
21
+ @first = {}
22
+ @follow = {}
23
+ @precedence = {}
24
+ @used_precedence = {}
25
+ @start = nil
26
+ end
27
+
28
+ def add_production(name, symbols, &block)
29
+ raise ArgumentError unless name.downcase == name
30
+ raise ArgumentError if name == :error
31
+
32
+ symbols.each do |sym|
33
+ if sym.is_a?(String)
34
+ raise ArgumentError unless sym.length == 1
35
+ @terminals[sym] = [] unless @terminals[sym]
36
+ end
37
+ end
38
+
39
+ precedence = prec_for_rightmost_terminal(symbols)
40
+
41
+ mapname = "#{name.to_s} -> #{symbols.to_s}"
42
+ raise ArgumentError if @prodmap[mapname]
43
+
44
+ index = @productions.count
45
+ @nonterminals[name] = [] unless @nonterminals[name]
46
+
47
+ symbols.each do |sym|
48
+ if @terminals[sym]
49
+ @terminals[sym] << index
50
+ else
51
+ @nonterminals[sym] = [] unless @nonterminals[sym]
52
+ @nonterminals[sym] << index
53
+ end
54
+ end
55
+
56
+ p = Production.new(index, name, symbols, precedence, block)
57
+
58
+ @productions << p
59
+ @prodmap[mapname] = p
60
+
61
+ @prodnames[name] = [] unless @prodnames[name]
62
+ @prodnames[name] << p
63
+
64
+ p
65
+ end
66
+
67
+ def set_precedence(term, assoc, level)
68
+ raise RuntimeError if @productions != [nil]
69
+ raise ArgumentError if @precedence[term]
70
+ raise ArgumentError unless [:left, :right, :noassoc].include?(assoc)
71
+
72
+ @precedence[term] = [assoc, level]
73
+ end
74
+
75
+ def set_start(symbol=nil)
76
+ symbol = @productions[1].name unless symbol
77
+ raise ArgumentError unless @nonterminals[symbol]
78
+ @productions[0] = Production.new(0, :"S'", [symbol])
79
+ @nonterminals[symbol] << 0
80
+ @start = symbol
81
+ end
82
+
83
+ def build_lritems
84
+ @productions.each do |p|
85
+ lastlri = p
86
+ i = 0
87
+ lr_items = []
88
+ while true do
89
+ if i > p.length
90
+ lri = nil
91
+ else
92
+ lri = LRItem.new(p,i)
93
+ lri.lr_after = @prodnames[lri.prod[i+1]] || []
94
+ lri.lr_before = lri.prod[i-1] || nil
95
+ end
96
+
97
+ lastlri.lr_next = lri
98
+ break unless lri
99
+ lr_items << lri
100
+ lastlri = lri
101
+ i += 1
102
+ end
103
+ p.lr_items = lr_items
104
+ end
105
+ end
106
+
107
+ def compute_first
108
+ return @first unless @first.empty?
109
+
110
+ @terminals.keys.each { |t| @first[t] = [t] }
111
+ @first[:'$end'] = [:'$end']
112
+ @nonterminals.keys.each { |n| @first[n] = [] }
113
+ while true
114
+ any_changes = false
115
+ nonterminals.keys.each do |n|
116
+ @prodnames[n].each do |p|
117
+ _first(p.prod).each do |f|
118
+ unless @first[n].include?(f)
119
+ @first[n] << f
120
+ any_changes = true
121
+ end
122
+ end
123
+ end
124
+ end
125
+ break unless any_changes
126
+ end
127
+
128
+ @first
129
+ end
130
+
131
+ def compute_follow(start=nil)
132
+ return @follow unless @follow.empty?
133
+
134
+ compute_first if @first.empty?
135
+
136
+ @nonterminals.keys.each { |n| @follow[n] = [] }
137
+
138
+ start = @productions[1].name unless start
139
+
140
+ @follow[start] = [:'$end']
141
+
142
+ while true
143
+ didadd = false
144
+ @productions[1..-1].each do |p|
145
+ p.prod.length.times do |i|
146
+ b = p.prod[i]
147
+ next unless @nonterminals.include?(b)
148
+
149
+ fst = _first(p.prod[i+1..-1])
150
+ hasempty = false
151
+ fst.each do |f|
152
+ if f != :'<empty>' && !@follow[b].include?(f)
153
+ @follow[b] << f
154
+ didadd = true
155
+ end
156
+ hasempty = true if f == :'<empty>'
157
+ end
158
+ if hasempty || i == p.prod.length - 1
159
+ @follow[p.name].each do |f|
160
+ unless @follow[b].include?(f)
161
+ @follow[b] << f
162
+ didadd = true
163
+ end
164
+ end
165
+ end
166
+ end
167
+ end
168
+ break unless didadd
169
+ end
170
+
171
+ @follow
172
+ end
173
+
174
+ private
175
+ def _first(beta)
176
+ result = []
177
+ should_add_empty = true
178
+
179
+ beta.each do |x|
180
+ x_produces_empty = false
181
+
182
+ @first[x].each do |f|
183
+ if f == :'<empty>'
184
+ x_produces_empty = true
185
+ else
186
+ result << f unless result.include?(f)
187
+ end
188
+ end
189
+
190
+ if x_produces_empty
191
+ next
192
+ else
193
+ should_add_empty = false
194
+ break
195
+ end
196
+ end
197
+ result << :'<empty>' if should_add_empty
198
+
199
+ result
200
+ end
201
+
202
+ def prec_for_rightmost_terminal(symbols)
203
+ symbols.reverse_each do |sym|
204
+ next unless @terminals[sym]
205
+
206
+ return @precedence[sym] || [:right, 0]
207
+ end
208
+ [:right, 0]
209
+ end
210
+ end
211
+ end
@@ -0,0 +1,32 @@
1
+ module Rly
2
+ class LRItem
3
+ attr_accessor :lr_after, :lr_before, :lr_next
4
+ attr_reader :prod, :name, :usyms, :lr_index, :length, :lookaheads, :index
5
+
6
+ def initialize(p, n)
7
+ @name = p.name
8
+ @prod = p.prod.dup
9
+ @index = p.index
10
+ @lr_index = n
11
+ @lookaheads = {}
12
+ @prod.insert(n, :'.')
13
+ @length = @prod.length
14
+ @usyms = p.usyms
15
+
16
+ @lr_items = []
17
+ @lr_next = nil
18
+ end
19
+
20
+ def to_s
21
+ if @prod
22
+ "#{@name} -> #{@prod.join(' ')}"
23
+ else
24
+ "#{@name} -> <empty>"
25
+ end
26
+ end
27
+
28
+ def inspect
29
+ "#<LRItem #{to_s}>"
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,529 @@
1
+ require "set"
2
+ require "rly/parse/ply_dump"
3
+
4
+ module Rly
5
+ class LRTable
6
+ MAXINT = (2**(0.size * 8 -2) -1)
7
+
8
+ attr_reader :lr_action, :lr_goto, :lr_productions
9
+
10
+ def initialize(grammar, method=:LALR)
11
+ raise ArgumentError unless [:LALR, :SLR].include?(method)
12
+
13
+ @grammar = grammar
14
+ @lr_method = method
15
+
16
+ @lr_action = {}
17
+ @lr_goto = {}
18
+ @lr_productions = grammar.productions
19
+ @lr_goto_cache = {}
20
+ @lr0_cidhash = {}
21
+
22
+ @add_count = 0
23
+
24
+ @sr_conflict = 0
25
+ @rr_conflict = 0
26
+ @conflicts = []
27
+
28
+ @sr_conflicts = []
29
+ @rr_conflicts = []
30
+
31
+ grammar.build_lritems
32
+ grammar.compute_first
33
+ grammar.compute_follow
34
+ end
35
+
36
+ def parse_table(log=PlyDump.stub)
37
+ productions = @grammar.productions
38
+ precedence = @grammar.precedence
39
+
40
+ actionp = {}
41
+
42
+ log.info("Parsing method: %s", @lr_method)
43
+
44
+ c = lr0_items
45
+
46
+ add_lalr_lookaheads(c) if @lr_method == :LALR
47
+
48
+ # Build the parser table, state by state
49
+ st = 0
50
+ c.each do |i|
51
+ # Loop over each production in I
52
+ actlist = [] # List of actions
53
+ st_action = {}
54
+ st_actionp = {}
55
+ st_goto = {}
56
+ log.info("")
57
+ log.info("state %d", st)
58
+ log.info("")
59
+ i.each { |p| log.info(" (%d) %s", p.index, p.to_s) }
60
+ log.info("")
61
+
62
+ i.each do |p|
63
+ if p.length == p.lr_index + 1
64
+ if p.name == :"S'"
65
+ # Start symbol. Accept!
66
+ st_action[:"$end"] = 0
67
+ st_actionp[:"$end"] = p
68
+ else
69
+ # We are at the end of a production. Reduce!
70
+ if @lr_method == :LALR
71
+ laheads = p.lookaheads[st]
72
+ else
73
+ laheads = @grammar.follow[p.name]
74
+ end
75
+ laheads.each do |a|
76
+ actlist << [a, p, sprintf("reduce using rule %d (%s)", p.index, p)]
77
+ r = st_action[a]
78
+ if r
79
+ # Whoa. Have a shift/reduce or reduce/reduce conflict
80
+ if r > 0
81
+ # Need to decide on shift or reduce here
82
+ # By default we favor shifting. Need to add
83
+ # some precedence rules here.
84
+ sprec, slevel = productions[st_actionp[a].number].prec
85
+ rprec, rlevel = precedence[a] || [:right, 0]
86
+ if (slevel < rlevel) || ((slevel == rlevel) && (rprec == :left))
87
+ # We really need to reduce here.
88
+ st_action[a] = -p.number
89
+ st_actionp[a] = p
90
+ if ! slevel && ! rlevel
91
+ log.info(" ! shift/reduce conflict for %s resolved as reduce",a)
92
+ @sr_conflicts << [st, a, 'reduce']
93
+ end
94
+ productions[p.number].reduced += 1
95
+ elsif (slevel == rlevel) && (rprec == :nonassoc)
96
+ st_action[a] = nil
97
+ else
98
+ # Hmmm. Guess we'll keep the shift
99
+ unless rlevel
100
+ log.info(" ! shift/reduce conflict for %s resolved as shift",a)
101
+ @sr_conflicts << [st,a,'shift']
102
+ end
103
+ end
104
+ elsif r < 0
105
+ # Reduce/reduce conflict. In this case, we favor the rule
106
+ # that was defined first in the grammar file
107
+ oldp = productions[-r]
108
+ pp = productions[p.number]
109
+ if oldp.line > pp.line
110
+ st_action[a] = -p.number
111
+ st_actionp[a] = p
112
+ chosenp = pp
113
+ rejectp = oldp
114
+ productions[p.number].reduced += 1
115
+ productions[oldp.number].reduced -= 1
116
+ else
117
+ chosenp,rejectp = oldp,pp
118
+ end
119
+ @rr_conflicts << [st, chosenp, rejectp]
120
+ log.info(" ! reduce/reduce conflict for %s resolved using rule %d (%s)", a, st_actionp[a].number, st_actionp[a])
121
+ else
122
+ raise RuntimeError("Unknown conflict in state #{st}")
123
+ end
124
+ else
125
+ st_action[a] = -p.index
126
+ st_actionp[a] = p
127
+ productions[p.index].reduced += 1
128
+ end
129
+ end
130
+ end
131
+ else # <-- level ok
132
+ # i = p.lr_index
133
+ a = p.prod[p.lr_index+1] # Get symbol right after the "."
134
+ if @grammar.terminals.include?(a)
135
+ g = lr0_goto(i, a)
136
+ j = @lr0_cidhash[g.hash] || -1
137
+ if j >= 0
138
+ # We are in a shift state
139
+ actlist << [a, p, sprintf("shift and go to state %d", j)]
140
+ r = st_action[a]
141
+ if r
142
+ # Whoa have a shift/reduce or shift/shift conflict
143
+ if r > 0
144
+ if r != j
145
+ raise RuntimeError("Shift/shift conflict in state #{st}")
146
+ end
147
+ elsif r < 0
148
+ # Do a precedence check.
149
+ # - if precedence of reduce rule is higher, we reduce.
150
+ # - if precedence of reduce is same and left assoc, we reduce.
151
+ # - otherwise we shift
152
+ rprec, rlevel = productions[st_actionp[a].index].precedence
153
+ sprec, slevel = precedence[a] || [:right, 0]
154
+ if (slevel > rlevel) || ((slevel == rlevel) && (rprec == :right))
155
+ # We decide to shift here... highest precedence to shift
156
+ productions[st_actionp[a].index].reduced -= 1
157
+ st_action[a] = j
158
+ st_actionp[a] = p
159
+ unless rlevel
160
+ log.info(" ! shift/reduce conflict for %s resolved as shift",a)
161
+ @sr_conflicts << [st, a, 'shift']
162
+ end
163
+ elsif (slevel == rlevel) && (rprec == :nonassoc)
164
+ st_action[a] = nil
165
+ else
166
+ # Hmmm. Guess we'll keep the reduce
167
+ if ! slevel && ! rlevel
168
+ log.info(" ! shift/reduce conflict for %s resolved as reduce",a)
169
+ @sr_conflicts << [st, a, 'reduce']
170
+ end
171
+ end
172
+ else
173
+ raise RuntimeError("Unknown conflict in state #{st}")
174
+ end
175
+ else
176
+ st_action[a] = j
177
+ st_actionp[a] = p
178
+ end
179
+ end
180
+ end
181
+ end
182
+ end
183
+
184
+ # Print the actions associated with each terminal
185
+ _actprint = {}
186
+ actlist.each do |a, p, m|
187
+ if st_action[a]
188
+ if p == st_actionp[a]
189
+ log.info(" %-15s %s",a,m)
190
+ _actprint[[a,m]] = 1
191
+ end
192
+ end
193
+ end
194
+ log.info("")
195
+ # Print the actions that were not used. (debugging)
196
+ not_used = false
197
+ actlist.each do |a, p, m|
198
+ if st_action[a]
199
+ unless p == st_actionp[a]
200
+ unless _actprint[[a,m]]
201
+ log.debug(" ! %-15s [ %s ]", a, m)
202
+ not_used = true
203
+ _actprint[[a,m]] = 1
204
+ end
205
+ end
206
+ end
207
+ end
208
+ log.debug("") if not_used
209
+
210
+ # Construct the goto table for this state
211
+
212
+ nkeys = {}
213
+ i.each do |ii|
214
+ ii.usyms.each do |s|
215
+ nkeys[s] = nil if @grammar.nonterminals.include?(s)
216
+ end
217
+ end
218
+ nkeys.each do |n, _|
219
+ g = lr0_goto(i, n)
220
+ j = @lr0_cidhash[g.hash] || -1
221
+ if j >= 0
222
+ st_goto[n] = j
223
+ log.info(" %-30s shift and go to state %d",n,j)
224
+ end
225
+ end
226
+
227
+ @lr_action[st] = st_action
228
+ actionp[st] = st_actionp
229
+ @lr_goto[st] = st_goto
230
+ st += 1
231
+ end
232
+ end
233
+
234
+ private
235
+ def add_lalr_lookaheads(c)
236
+ nullable = compute_nullable_nonterminals
237
+ trans = find_nonterminal_transitions(c)
238
+ readsets = compute_read_sets(c, trans, nullable)
239
+ lookd, included = compute_lookback_includes(c, trans, nullable)
240
+ followsets = compute_follow_sets(trans, readsets, included)
241
+ add_lookaheads(lookd, followsets)
242
+ end
243
+
244
+ def lr0_closure(i)
245
+ @add_count += 1
246
+
247
+ # Add everything in I to J
248
+ j = i.dup
249
+ didadd = true
250
+ while didadd
251
+ didadd = false
252
+ j.each do |k|
253
+ k.lr_after.each do |x|
254
+ next if x.lr0_added == @add_count
255
+ # Add B --> .G to J
256
+ j << x.lr_next
257
+ x.lr0_added = @add_count
258
+ didadd = true
259
+ end
260
+ end
261
+ end
262
+ j
263
+ end
264
+
265
+ def lr0_goto(i, x)
266
+ g = @lr_goto_cache[[i.hash, x]]
267
+ return g if g
268
+
269
+ s = @lr_goto_cache[x]
270
+ unless s
271
+ s = {}
272
+ @lr_goto_cache[x] = s
273
+ end
274
+
275
+ gs = []
276
+ i.each do |p|
277
+ n = p.lr_next
278
+ if n and n.lr_before == x
279
+ s1 = s[n.hash]
280
+ unless s1
281
+ s1 = {}
282
+ s[n.hash] = s1
283
+ end
284
+ gs << n
285
+ s = s1
286
+ end
287
+ end
288
+ g = s[:'$end']
289
+ unless g
290
+ if gs
291
+ g = lr0_closure(gs)
292
+ s[:'$end'] = g
293
+ else
294
+ s[:'$end'] = gs
295
+ end
296
+ end
297
+ @lr_goto_cache[[i.hash,x]] = g
298
+ g
299
+ end
300
+
301
+ def lr0_items
302
+ c = [ lr0_closure([@grammar.productions[0].lr_next]) ]
303
+
304
+ c.each_with_index { |c_i, j| @lr0_cidhash[c_i.hash] = j }
305
+
306
+ i = 0
307
+ while i < c.length
308
+ c_i = c[i]
309
+ i += 1
310
+
311
+ asyms = Set.new
312
+ c_i.each { |ii| ii.usyms.each { |s| asyms << s } }
313
+
314
+ asyms.each do |x|
315
+ g = lr0_goto(c_i, x)
316
+ next if g.empty?
317
+ next if @lr0_cidhash[g.hash]
318
+ @lr0_cidhash[g.hash] = c.length
319
+ c << g
320
+ end
321
+ end
322
+ c
323
+ end
324
+
325
+ def compute_nullable_nonterminals
326
+ nullable = {}
327
+ num_nullable = 0
328
+ while true
329
+ @grammar.productions[1..-1].each do |p|
330
+ if p.length == 0
331
+ nullable[p.name] = 1
332
+ next
333
+ end
334
+ found_t = false
335
+ p.prod.each do |t|
336
+ unless nullable[t]
337
+ found_t = true
338
+ break
339
+ end
340
+ end
341
+ nullable[p.name] = 1 unless found_t
342
+ end
343
+ break if nullable.length == num_nullable
344
+ num_nullable = nullable.length
345
+ end
346
+ nullable
347
+ end
348
+
349
+ def find_nonterminal_transitions(c)
350
+ trans = []
351
+ c.each_with_index do |a, state|
352
+ a.each do |p|
353
+ if p.lr_index < p.length - 1
354
+ next_prod = p.prod[p.lr_index+1]
355
+ if @grammar.nonterminals[next_prod]
356
+ t = [state, next_prod]
357
+ trans << t unless trans.include?(t)
358
+ end
359
+ end
360
+ end
361
+ end
362
+ trans
363
+ end
364
+
365
+ def compute_read_sets(c, ntrans, nullable)
366
+ fp = lambda { |x| dr_relation(c, x, nullable) }
367
+ r = lambda { |x| reads_relation(c, x, nullable) }
368
+ digraph(ntrans, r, fp)
369
+ end
370
+
371
+ def dr_relation(c, trans, nullable)
372
+ dr_set = {}
373
+ state, n = trans
374
+ terms = []
375
+
376
+ g = lr0_goto(c[state], n)
377
+ g.each do |p|
378
+ if p.lr_index < p.length - 1
379
+ a = p.prod[p.lr_index+1]
380
+ if @grammar.terminals.include?(a)
381
+ terms << a unless terms.include?(a)
382
+ end
383
+ end
384
+ end
385
+
386
+ terms << :'$end' if state == 0 && n == @grammar.productions[0].prod[0]
387
+
388
+ terms
389
+ end
390
+
391
+ def reads_relation(c, trans, empty)
392
+ rel = []
393
+ state, n = trans
394
+
395
+ g = lr0_goto(c[state], n)
396
+ j = @lr0_cidhash[g.hash] || -1
397
+ g.each do |p|
398
+ if p.lr_index < p.length - 1
399
+ a = p.prod[p.lr_index + 1]
400
+ rel << [j, a] if empty.include?(a)
401
+ end
402
+ end
403
+
404
+ rel
405
+ end
406
+
407
+ def digraph(x, r, fp)
408
+ n = {}
409
+ x.each { |xx| n[xx] = 0 }
410
+ stack = []
411
+ f = {}
412
+ x.each do |xx|
413
+ traverse(xx, n, stack, f, x, r, fp) if n[xx] == 0
414
+ end
415
+ f
416
+ end
417
+
418
+ def traverse(xx, n, stack, f, x, r, fp)
419
+ stack.push(xx)
420
+ d = stack.length
421
+ n[xx] = d
422
+ f[xx] = fp.call(xx)
423
+
424
+ rel = r.call(xx)
425
+ rel.each do |y|
426
+ traverse(y, n, stack, f, x, r, fp) if n[y] == 0
427
+
428
+ n[xx] = [n[xx], n[y]].min
429
+
430
+ arr = f[y] || []
431
+ arr.each do |a|
432
+ f[xx] << a unless f[xx].include?(a)
433
+ end
434
+ end
435
+ if n[xx] == d
436
+ n[stack[-1]] = MAXINT
437
+ f[stack[-1]] = f[xx]
438
+ element = stack.pop()
439
+ while element != xx
440
+ n[stack[-1]] = MAXINT
441
+ f[stack[-1]] = f[xx]
442
+ element = stack.pop()
443
+ end
444
+ end
445
+ end
446
+
447
+ def compute_lookback_includes(c, trans, nullable)
448
+ lookdict = {}
449
+ includedict = {}
450
+
451
+ dtrans = trans.each_with_object({}) { |k, h| h[k] = 1 }
452
+
453
+ trans.each do |state, n|
454
+ lookb = []
455
+ includes = []
456
+ c[state].each do |p|
457
+ next unless p.name == n
458
+
459
+ lr_index = p.lr_index
460
+ j = state
461
+ while lr_index < p.length - 1
462
+ lr_index = lr_index + 1
463
+ t = p.prod[lr_index]
464
+
465
+ if dtrans.include?([j,t])
466
+ li = lr_index + 1
467
+ escaped = false
468
+ while li < p.length
469
+ if @grammar.terminals[p.prod[li]]
470
+ escaped = true
471
+ break
472
+ end
473
+ unless nullable[p.prod[li]]
474
+ escaped = true
475
+ break
476
+ end
477
+ li = li + 1
478
+ end
479
+ includes << [j,t] unless escaped
480
+ end
481
+
482
+ g = lr0_goto(c[j],t)
483
+ j = @lr0_cidhash[g.hash] || -1
484
+ end
485
+
486
+ c[j].each do |r|
487
+ next unless r.name == p.name
488
+ next unless r.length == p.length
489
+ i = 0
490
+ escaped = false
491
+ while i < r.lr_index
492
+ unless r.prod[i] == p.prod[i+1]
493
+ escaped = true
494
+ break
495
+ end
496
+ i = i + 1
497
+ end
498
+ lookb << [j,r] unless escaped
499
+ end
500
+ end
501
+ includes.each do |i|
502
+ includedict[i] = [] unless includedict[i]
503
+ includedict[i] << [state, n]
504
+ end
505
+ lookdict[[state,n]] = lookb
506
+ end
507
+
508
+ [lookdict, includedict]
509
+ end
510
+
511
+ def compute_follow_sets(ntrans, readsets, inclsets)
512
+ fp = lambda { |x| readsets[x] }
513
+ r = lambda { |x| inclsets[x] || [] }
514
+ digraph(ntrans, r, fp)
515
+ end
516
+
517
+ def add_lookaheads(lookbacks, followset)
518
+ lookbacks.each do |trans, lb|
519
+ lb.each do |state, p|
520
+ p.lookaheads[state] = [] unless p.lookaheads[state]
521
+ f = followset[trans] || []
522
+ f.each do |a|
523
+ p.lookaheads[state] << a unless p.lookaheads[state].include?(a)
524
+ end
525
+ end
526
+ end
527
+ end
528
+ end
529
+ end