rly 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -2,8 +2,8 @@
2
2
 
3
3
  # Rly
4
4
 
5
- Rly is a lexer and parser generator for ruby, based on ideas and solutions of
6
- Python's [Ply](http://www.dabeaz.com/ply/).
5
+ Rly is a lexer and parser generator for ruby (O RLY?), based on ideas and solutions of
6
+ Python's [Ply](http://www.dabeaz.com/ply/) (in some places it's a total rip off actually).
7
7
 
8
8
  ## Installation
9
9
 
@@ -0,0 +1,15 @@
1
+ Created by PLY version 3.4 (http://www.dabeaz.com/ply)
2
+
3
+ Grammar
4
+
5
+ <% for p in g.productions %>Rule <%= sprintf("%-5d", p.index) %> <%= p %>
6
+ <% end %>
7
+ Terminals, with rules where they appear
8
+
9
+ <% @t = g.terminals.keys.map {|k| k.to_s }.sort; for t in @t %><%= sprintf("%-20s : %s", t, (g.terminals[t] || g.terminals[t.to_sym]).join(' ')) %>
10
+ <% end %>
11
+ Nonterminals, with rules where they appear
12
+
13
+ <% @t = g.nonterminals.keys.sort; for t in @t %><%= sprintf("%-20s : %s", t, g.nonterminals[t].join(' ')) %>
14
+ <% end %>
15
+ <%= backlog %>
data/lib/rly.rb CHANGED
@@ -1,5 +1,7 @@
1
1
  require "rly/version"
2
2
  require "rly/lex"
3
+ require "rly/yacc"
4
+ require "rly/parse/rule_parser"
3
5
 
4
6
  module Rly
5
7
  # Your code goes here...
@@ -12,10 +12,10 @@ module Rly
12
12
  # configuration (check the methods documentation for details).
13
13
  #
14
14
  # Once you got your lexer configured, you can create its instances passing a
15
- # String to be tokenized. You can then use either {#each} method or common
16
- # *Enumerable* methods to get the processed tokens.
15
+ # String to be tokenized. You can then use {#next} method to get tokens. If you
16
+ # have more string to tokenize, you can append it to input buffer at any time with
17
+ # {#input}.
17
18
  class Lex
18
- include Enumerable
19
19
 
20
20
  # Tracks the current line number for generated tokens
21
21
  #
@@ -55,21 +55,46 @@ module Rly
55
55
  # end
56
56
  #
57
57
  # lex = MyLexer.new("hello WORLD")
58
- # lex.each do |tok|
59
- # puts "#{tok.type} -> #{tok.value}" #=> "LOWERS -> hello"
60
- # #=> "UPPERS -> WORLD"
61
- # end
58
+ # t = lex.next
59
+ # puts "#{tok.type} -> #{tok.value}" #=> "LOWERS -> hello"
60
+ # t = lex.next
61
+ # puts "#{tok.type} -> #{tok.value}" #=> "UPPERS -> WORLD"
62
+ # t = lex.next # => nil
62
63
  def initialize(input="")
63
64
  @input = input
64
65
  @pos = 0
65
66
  @lineno = 0
66
67
  end
67
68
 
69
+ def inspect
70
+ "#<#{self.class} pos=#{@pos} len=#{@input.length} lineno=#{@lineno}>"
71
+ end
72
+
73
+ # Appends string to input buffer
74
+ #
75
+ # The given string is appended to input buffer, further {#next} calls will
76
+ # tokenize it as usual.
77
+ #
78
+ # @api public
79
+ #
80
+ # @example
81
+ # lex = MyLexer.new("hello")
82
+ #
83
+ # t = lex.next
84
+ # puts "#{tok.type} -> #{tok.value}" #=> "LOWERS -> hello"
85
+ # t = lex.next # => nil
86
+ # lex.input("WORLD")
87
+ # t = lex.next
88
+ # puts "#{tok.type} -> #{tok.value}" #=> "UPPERS -> WORLD"
89
+ # t = lex.next # => nil
90
+ def input(input)
91
+ @input << input
92
+ end
93
+
68
94
  # Processes the next token in input
69
95
  #
70
- # This is the main interface to lexer. If block is given, {#each} behaves like
71
- # an usual enumerator, yielding the next token. If there is no block, {#each}
72
- # returns an Enumerator object.
96
+ # This is the main interface to lexer. It returns next available token or **nil**
97
+ # if there are no more tokens available in the input string.
73
98
  #
74
99
  # {#each} Raises {LexError} if the input cannot be processed. This happens if
75
100
  # there were no matches by 'token' rules and no matches by 'literals' rule.
@@ -78,23 +103,19 @@ module Rly
78
103
  # after returning from error handler is still unchanged.
79
104
  #
80
105
  # @api public
81
- # @yieldparam tok [LexToken] next processed token
82
106
  # @raise [LexError] if the input cannot be processed
83
- # @return [Enumerator] if block is not given
84
- # @return [nil] if block is given
107
+ # @return [LexToken] if the next chunk of input was processed successfully
108
+ # @return [nil] if there are no more tokens available in input
85
109
  #
86
110
  # @example
87
111
  # lex = MyLexer.new("hello WORLD")
88
112
  #
89
- # lex.each #=> #<Enumerator: ...>
90
- #
91
- # lex.each do |tok|
92
- # puts "#{tok.type} -> #{tok.value}" #=> "LOWERS -> hello"
93
- # #=> "UPPERS -> WORLD"
94
- # end
95
- def each
96
- return self.to_enum unless block_given?
97
-
113
+ # t = lex.next
114
+ # puts "#{tok.type} -> #{tok.value}" #=> "LOWERS -> hello"
115
+ # t = lex.next
116
+ # puts "#{tok.type} -> #{tok.value}" #=> "UPPERS -> WORLD"
117
+ # t = lex.next # => nil
118
+ def next
98
119
  while @pos < @input.length
99
120
  if self.class.ignores_list[@input[@pos]]
100
121
  @pos += 1
@@ -105,15 +126,17 @@ module Rly
105
126
  self.class.tokens.each do |type, rule, block|
106
127
  m = rule.match(@input, @pos)
107
128
  next unless m
129
+ next unless m.begin(0) == @pos
108
130
 
109
131
  tok = LexToken.new(type, m[0], self)
110
132
 
111
133
  matched = true
112
134
 
113
135
  tok = block.call(tok) if block
114
- yield tok if tok.type
115
136
 
116
137
  @pos = m.end(0)
138
+
139
+ return tok if tok.type
117
140
  end
118
141
 
119
142
  unless matched
@@ -121,8 +144,10 @@ module Rly
121
144
  tok = LexToken.new(@input[@pos], @input[@pos], self)
122
145
 
123
146
  matched = true
124
- yield tok
147
+
125
148
  @pos += 1
149
+
150
+ return tok
126
151
  end
127
152
  end
128
153
 
@@ -134,16 +159,20 @@ module Rly
134
159
  if pos == @pos
135
160
  raise LexError.new("Illegal character '#{@input[@pos]}' at index #{@pos}")
136
161
  else
137
- yield tok if tok && tok.type
162
+ return tok if tok && tok.type
138
163
  end
139
164
  else
140
165
  raise LexError.new("Illegal character '#{@input[@pos]}' at index #{@pos}")
141
166
  end
142
167
  end
143
168
  end
169
+ return nil
144
170
  end
145
171
 
146
172
  class << self
173
+ def terminals
174
+ self.tokens.map { |t,r,b| t }.compact + self.literals_list.chars.to_a
175
+ end
147
176
  # Returns the list of registered tokens
148
177
  #
149
178
  # @api private
@@ -9,5 +9,13 @@ module Rly
9
9
  @value = value
10
10
  @lexer = lexer
11
11
  end
12
+
13
+ def to_s
14
+ @value.to_s
15
+ end
16
+
17
+ def inspect
18
+ "#<LexToken #{@type} '#{@value}'>"
19
+ end
12
20
  end
13
21
  end
@@ -0,0 +1,211 @@
1
+ require "rly/parse/production"
2
+ require "rly/parse/lr_item"
3
+
4
+ module Rly
5
+ class Grammar
6
+ attr_reader :terminals, :nonterminals, :productions, :prodnames, :start, :precedence
7
+
8
+ def initialize(terminals)
9
+ @productions = [nil]
10
+ @prodnames = {}
11
+ @prodmap = {}
12
+
13
+ @terminals = {}
14
+ terminals.each do |t|
15
+ raise ArgumentError unless t.upcase == t
16
+ @terminals[t] = []
17
+ end
18
+ @terminals[:error] = []
19
+
20
+ @nonterminals = {}
21
+ @first = {}
22
+ @follow = {}
23
+ @precedence = {}
24
+ @used_precedence = {}
25
+ @start = nil
26
+ end
27
+
28
+ def add_production(name, symbols, &block)
29
+ raise ArgumentError unless name.downcase == name
30
+ raise ArgumentError if name == :error
31
+
32
+ symbols.each do |sym|
33
+ if sym.is_a?(String)
34
+ raise ArgumentError unless sym.length == 1
35
+ @terminals[sym] = [] unless @terminals[sym]
36
+ end
37
+ end
38
+
39
+ precedence = prec_for_rightmost_terminal(symbols)
40
+
41
+ mapname = "#{name.to_s} -> #{symbols.to_s}"
42
+ raise ArgumentError if @prodmap[mapname]
43
+
44
+ index = @productions.count
45
+ @nonterminals[name] = [] unless @nonterminals[name]
46
+
47
+ symbols.each do |sym|
48
+ if @terminals[sym]
49
+ @terminals[sym] << index
50
+ else
51
+ @nonterminals[sym] = [] unless @nonterminals[sym]
52
+ @nonterminals[sym] << index
53
+ end
54
+ end
55
+
56
+ p = Production.new(index, name, symbols, precedence, block)
57
+
58
+ @productions << p
59
+ @prodmap[mapname] = p
60
+
61
+ @prodnames[name] = [] unless @prodnames[name]
62
+ @prodnames[name] << p
63
+
64
+ p
65
+ end
66
+
67
+ def set_precedence(term, assoc, level)
68
+ raise RuntimeError if @productions != [nil]
69
+ raise ArgumentError if @precedence[term]
70
+ raise ArgumentError unless [:left, :right, :noassoc].include?(assoc)
71
+
72
+ @precedence[term] = [assoc, level]
73
+ end
74
+
75
+ def set_start(symbol=nil)
76
+ symbol = @productions[1].name unless symbol
77
+ raise ArgumentError unless @nonterminals[symbol]
78
+ @productions[0] = Production.new(0, :"S'", [symbol])
79
+ @nonterminals[symbol] << 0
80
+ @start = symbol
81
+ end
82
+
83
+ def build_lritems
84
+ @productions.each do |p|
85
+ lastlri = p
86
+ i = 0
87
+ lr_items = []
88
+ while true do
89
+ if i > p.length
90
+ lri = nil
91
+ else
92
+ lri = LRItem.new(p,i)
93
+ lri.lr_after = @prodnames[lri.prod[i+1]] || []
94
+ lri.lr_before = lri.prod[i-1] || nil
95
+ end
96
+
97
+ lastlri.lr_next = lri
98
+ break unless lri
99
+ lr_items << lri
100
+ lastlri = lri
101
+ i += 1
102
+ end
103
+ p.lr_items = lr_items
104
+ end
105
+ end
106
+
107
+ def compute_first
108
+ return @first unless @first.empty?
109
+
110
+ @terminals.keys.each { |t| @first[t] = [t] }
111
+ @first[:'$end'] = [:'$end']
112
+ @nonterminals.keys.each { |n| @first[n] = [] }
113
+ while true
114
+ any_changes = false
115
+ nonterminals.keys.each do |n|
116
+ @prodnames[n].each do |p|
117
+ _first(p.prod).each do |f|
118
+ unless @first[n].include?(f)
119
+ @first[n] << f
120
+ any_changes = true
121
+ end
122
+ end
123
+ end
124
+ end
125
+ break unless any_changes
126
+ end
127
+
128
+ @first
129
+ end
130
+
131
+ def compute_follow(start=nil)
132
+ return @follow unless @follow.empty?
133
+
134
+ compute_first if @first.empty?
135
+
136
+ @nonterminals.keys.each { |n| @follow[n] = [] }
137
+
138
+ start = @productions[1].name unless start
139
+
140
+ @follow[start] = [:'$end']
141
+
142
+ while true
143
+ didadd = false
144
+ @productions[1..-1].each do |p|
145
+ p.prod.length.times do |i|
146
+ b = p.prod[i]
147
+ next unless @nonterminals.include?(b)
148
+
149
+ fst = _first(p.prod[i+1..-1])
150
+ hasempty = false
151
+ fst.each do |f|
152
+ if f != :'<empty>' && !@follow[b].include?(f)
153
+ @follow[b] << f
154
+ didadd = true
155
+ end
156
+ hasempty = true if f == :'<empty>'
157
+ end
158
+ if hasempty || i == p.prod.length - 1
159
+ @follow[p.name].each do |f|
160
+ unless @follow[b].include?(f)
161
+ @follow[b] << f
162
+ didadd = true
163
+ end
164
+ end
165
+ end
166
+ end
167
+ end
168
+ break unless didadd
169
+ end
170
+
171
+ @follow
172
+ end
173
+
174
+ private
175
+ def _first(beta)
176
+ result = []
177
+ should_add_empty = true
178
+
179
+ beta.each do |x|
180
+ x_produces_empty = false
181
+
182
+ @first[x].each do |f|
183
+ if f == :'<empty>'
184
+ x_produces_empty = true
185
+ else
186
+ result << f unless result.include?(f)
187
+ end
188
+ end
189
+
190
+ if x_produces_empty
191
+ next
192
+ else
193
+ should_add_empty = false
194
+ break
195
+ end
196
+ end
197
+ result << :'<empty>' if should_add_empty
198
+
199
+ result
200
+ end
201
+
202
+ def prec_for_rightmost_terminal(symbols)
203
+ symbols.reverse_each do |sym|
204
+ next unless @terminals[sym]
205
+
206
+ return @precedence[sym] || [:right, 0]
207
+ end
208
+ [:right, 0]
209
+ end
210
+ end
211
+ end
@@ -0,0 +1,32 @@
1
+ module Rly
2
+ class LRItem
3
+ attr_accessor :lr_after, :lr_before, :lr_next
4
+ attr_reader :prod, :name, :usyms, :lr_index, :length, :lookaheads, :index
5
+
6
+ def initialize(p, n)
7
+ @name = p.name
8
+ @prod = p.prod.dup
9
+ @index = p.index
10
+ @lr_index = n
11
+ @lookaheads = {}
12
+ @prod.insert(n, :'.')
13
+ @length = @prod.length
14
+ @usyms = p.usyms
15
+
16
+ @lr_items = []
17
+ @lr_next = nil
18
+ end
19
+
20
+ def to_s
21
+ if @prod
22
+ "#{@name} -> #{@prod.join(' ')}"
23
+ else
24
+ "#{@name} -> <empty>"
25
+ end
26
+ end
27
+
28
+ def inspect
29
+ "#<LRItem #{to_s}>"
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,529 @@
1
+ require "set"
2
+ require "rly/parse/ply_dump"
3
+
4
+ module Rly
5
+ class LRTable
6
+ MAXINT = (2**(0.size * 8 -2) -1)
7
+
8
+ attr_reader :lr_action, :lr_goto, :lr_productions
9
+
10
+ def initialize(grammar, method=:LALR)
11
+ raise ArgumentError unless [:LALR, :SLR].include?(method)
12
+
13
+ @grammar = grammar
14
+ @lr_method = method
15
+
16
+ @lr_action = {}
17
+ @lr_goto = {}
18
+ @lr_productions = grammar.productions
19
+ @lr_goto_cache = {}
20
+ @lr0_cidhash = {}
21
+
22
+ @add_count = 0
23
+
24
+ @sr_conflict = 0
25
+ @rr_conflict = 0
26
+ @conflicts = []
27
+
28
+ @sr_conflicts = []
29
+ @rr_conflicts = []
30
+
31
+ grammar.build_lritems
32
+ grammar.compute_first
33
+ grammar.compute_follow
34
+ end
35
+
36
+ def parse_table(log=PlyDump.stub)
37
+ productions = @grammar.productions
38
+ precedence = @grammar.precedence
39
+
40
+ actionp = {}
41
+
42
+ log.info("Parsing method: %s", @lr_method)
43
+
44
+ c = lr0_items
45
+
46
+ add_lalr_lookaheads(c) if @lr_method == :LALR
47
+
48
+ # Build the parser table, state by state
49
+ st = 0
50
+ c.each do |i|
51
+ # Loop over each production in I
52
+ actlist = [] # List of actions
53
+ st_action = {}
54
+ st_actionp = {}
55
+ st_goto = {}
56
+ log.info("")
57
+ log.info("state %d", st)
58
+ log.info("")
59
+ i.each { |p| log.info(" (%d) %s", p.index, p.to_s) }
60
+ log.info("")
61
+
62
+ i.each do |p|
63
+ if p.length == p.lr_index + 1
64
+ if p.name == :"S'"
65
+ # Start symbol. Accept!
66
+ st_action[:"$end"] = 0
67
+ st_actionp[:"$end"] = p
68
+ else
69
+ # We are at the end of a production. Reduce!
70
+ if @lr_method == :LALR
71
+ laheads = p.lookaheads[st]
72
+ else
73
+ laheads = @grammar.follow[p.name]
74
+ end
75
+ laheads.each do |a|
76
+ actlist << [a, p, sprintf("reduce using rule %d (%s)", p.index, p)]
77
+ r = st_action[a]
78
+ if r
79
+ # Whoa. Have a shift/reduce or reduce/reduce conflict
80
+ if r > 0
81
+ # Need to decide on shift or reduce here
82
+ # By default we favor shifting. Need to add
83
+ # some precedence rules here.
84
+ sprec, slevel = productions[st_actionp[a].number].prec
85
+ rprec, rlevel = precedence[a] || [:right, 0]
86
+ if (slevel < rlevel) || ((slevel == rlevel) && (rprec == :left))
87
+ # We really need to reduce here.
88
+ st_action[a] = -p.number
89
+ st_actionp[a] = p
90
+ if ! slevel && ! rlevel
91
+ log.info(" ! shift/reduce conflict for %s resolved as reduce",a)
92
+ @sr_conflicts << [st, a, 'reduce']
93
+ end
94
+ productions[p.number].reduced += 1
95
+ elsif (slevel == rlevel) && (rprec == :nonassoc)
96
+ st_action[a] = nil
97
+ else
98
+ # Hmmm. Guess we'll keep the shift
99
+ unless rlevel
100
+ log.info(" ! shift/reduce conflict for %s resolved as shift",a)
101
+ @sr_conflicts << [st,a,'shift']
102
+ end
103
+ end
104
+ elsif r < 0
105
+ # Reduce/reduce conflict. In this case, we favor the rule
106
+ # that was defined first in the grammar file
107
+ oldp = productions[-r]
108
+ pp = productions[p.number]
109
+ if oldp.line > pp.line
110
+ st_action[a] = -p.number
111
+ st_actionp[a] = p
112
+ chosenp = pp
113
+ rejectp = oldp
114
+ productions[p.number].reduced += 1
115
+ productions[oldp.number].reduced -= 1
116
+ else
117
+ chosenp,rejectp = oldp,pp
118
+ end
119
+ @rr_conflicts << [st, chosenp, rejectp]
120
+ log.info(" ! reduce/reduce conflict for %s resolved using rule %d (%s)", a, st_actionp[a].number, st_actionp[a])
121
+ else
122
+ raise RuntimeError("Unknown conflict in state #{st}")
123
+ end
124
+ else
125
+ st_action[a] = -p.index
126
+ st_actionp[a] = p
127
+ productions[p.index].reduced += 1
128
+ end
129
+ end
130
+ end
131
+ else # <-- level ok
132
+ # i = p.lr_index
133
+ a = p.prod[p.lr_index+1] # Get symbol right after the "."
134
+ if @grammar.terminals.include?(a)
135
+ g = lr0_goto(i, a)
136
+ j = @lr0_cidhash[g.hash] || -1
137
+ if j >= 0
138
+ # We are in a shift state
139
+ actlist << [a, p, sprintf("shift and go to state %d", j)]
140
+ r = st_action[a]
141
+ if r
142
+ # Whoa have a shift/reduce or shift/shift conflict
143
+ if r > 0
144
+ if r != j
145
+ raise RuntimeError("Shift/shift conflict in state #{st}")
146
+ end
147
+ elsif r < 0
148
+ # Do a precedence check.
149
+ # - if precedence of reduce rule is higher, we reduce.
150
+ # - if precedence of reduce is same and left assoc, we reduce.
151
+ # - otherwise we shift
152
+ rprec, rlevel = productions[st_actionp[a].index].precedence
153
+ sprec, slevel = precedence[a] || [:right, 0]
154
+ if (slevel > rlevel) || ((slevel == rlevel) && (rprec == :right))
155
+ # We decide to shift here... highest precedence to shift
156
+ productions[st_actionp[a].index].reduced -= 1
157
+ st_action[a] = j
158
+ st_actionp[a] = p
159
+ unless rlevel
160
+ log.info(" ! shift/reduce conflict for %s resolved as shift",a)
161
+ @sr_conflicts << [st, a, 'shift']
162
+ end
163
+ elsif (slevel == rlevel) && (rprec == :nonassoc)
164
+ st_action[a] = nil
165
+ else
166
+ # Hmmm. Guess we'll keep the reduce
167
+ if ! slevel && ! rlevel
168
+ log.info(" ! shift/reduce conflict for %s resolved as reduce",a)
169
+ @sr_conflicts << [st, a, 'reduce']
170
+ end
171
+ end
172
+ else
173
+ raise RuntimeError("Unknown conflict in state #{st}")
174
+ end
175
+ else
176
+ st_action[a] = j
177
+ st_actionp[a] = p
178
+ end
179
+ end
180
+ end
181
+ end
182
+ end
183
+
184
+ # Print the actions associated with each terminal
185
+ _actprint = {}
186
+ actlist.each do |a, p, m|
187
+ if st_action[a]
188
+ if p == st_actionp[a]
189
+ log.info(" %-15s %s",a,m)
190
+ _actprint[[a,m]] = 1
191
+ end
192
+ end
193
+ end
194
+ log.info("")
195
+ # Print the actions that were not used. (debugging)
196
+ not_used = false
197
+ actlist.each do |a, p, m|
198
+ if st_action[a]
199
+ unless p == st_actionp[a]
200
+ unless _actprint[[a,m]]
201
+ log.debug(" ! %-15s [ %s ]", a, m)
202
+ not_used = true
203
+ _actprint[[a,m]] = 1
204
+ end
205
+ end
206
+ end
207
+ end
208
+ log.debug("") if not_used
209
+
210
+ # Construct the goto table for this state
211
+
212
+ nkeys = {}
213
+ i.each do |ii|
214
+ ii.usyms.each do |s|
215
+ nkeys[s] = nil if @grammar.nonterminals.include?(s)
216
+ end
217
+ end
218
+ nkeys.each do |n, _|
219
+ g = lr0_goto(i, n)
220
+ j = @lr0_cidhash[g.hash] || -1
221
+ if j >= 0
222
+ st_goto[n] = j
223
+ log.info(" %-30s shift and go to state %d",n,j)
224
+ end
225
+ end
226
+
227
+ @lr_action[st] = st_action
228
+ actionp[st] = st_actionp
229
+ @lr_goto[st] = st_goto
230
+ st += 1
231
+ end
232
+ end
233
+
234
+ private
235
+ def add_lalr_lookaheads(c)
236
+ nullable = compute_nullable_nonterminals
237
+ trans = find_nonterminal_transitions(c)
238
+ readsets = compute_read_sets(c, trans, nullable)
239
+ lookd, included = compute_lookback_includes(c, trans, nullable)
240
+ followsets = compute_follow_sets(trans, readsets, included)
241
+ add_lookaheads(lookd, followsets)
242
+ end
243
+
244
+ def lr0_closure(i)
245
+ @add_count += 1
246
+
247
+ # Add everything in I to J
248
+ j = i.dup
249
+ didadd = true
250
+ while didadd
251
+ didadd = false
252
+ j.each do |k|
253
+ k.lr_after.each do |x|
254
+ next if x.lr0_added == @add_count
255
+ # Add B --> .G to J
256
+ j << x.lr_next
257
+ x.lr0_added = @add_count
258
+ didadd = true
259
+ end
260
+ end
261
+ end
262
+ j
263
+ end
264
+
265
+ def lr0_goto(i, x)
266
+ g = @lr_goto_cache[[i.hash, x]]
267
+ return g if g
268
+
269
+ s = @lr_goto_cache[x]
270
+ unless s
271
+ s = {}
272
+ @lr_goto_cache[x] = s
273
+ end
274
+
275
+ gs = []
276
+ i.each do |p|
277
+ n = p.lr_next
278
+ if n and n.lr_before == x
279
+ s1 = s[n.hash]
280
+ unless s1
281
+ s1 = {}
282
+ s[n.hash] = s1
283
+ end
284
+ gs << n
285
+ s = s1
286
+ end
287
+ end
288
+ g = s[:'$end']
289
+ unless g
290
+ if gs
291
+ g = lr0_closure(gs)
292
+ s[:'$end'] = g
293
+ else
294
+ s[:'$end'] = gs
295
+ end
296
+ end
297
+ @lr_goto_cache[[i.hash,x]] = g
298
+ g
299
+ end
300
+
301
+ def lr0_items
302
+ c = [ lr0_closure([@grammar.productions[0].lr_next]) ]
303
+
304
+ c.each_with_index { |c_i, j| @lr0_cidhash[c_i.hash] = j }
305
+
306
+ i = 0
307
+ while i < c.length
308
+ c_i = c[i]
309
+ i += 1
310
+
311
+ asyms = Set.new
312
+ c_i.each { |ii| ii.usyms.each { |s| asyms << s } }
313
+
314
+ asyms.each do |x|
315
+ g = lr0_goto(c_i, x)
316
+ next if g.empty?
317
+ next if @lr0_cidhash[g.hash]
318
+ @lr0_cidhash[g.hash] = c.length
319
+ c << g
320
+ end
321
+ end
322
+ c
323
+ end
324
+
325
+ def compute_nullable_nonterminals
326
+ nullable = {}
327
+ num_nullable = 0
328
+ while true
329
+ @grammar.productions[1..-1].each do |p|
330
+ if p.length == 0
331
+ nullable[p.name] = 1
332
+ next
333
+ end
334
+ found_t = false
335
+ p.prod.each do |t|
336
+ unless nullable[t]
337
+ found_t = true
338
+ break
339
+ end
340
+ end
341
+ nullable[p.name] = 1 unless found_t
342
+ end
343
+ break if nullable.length == num_nullable
344
+ num_nullable = nullable.length
345
+ end
346
+ nullable
347
+ end
348
+
349
+ def find_nonterminal_transitions(c)
350
+ trans = []
351
+ c.each_with_index do |a, state|
352
+ a.each do |p|
353
+ if p.lr_index < p.length - 1
354
+ next_prod = p.prod[p.lr_index+1]
355
+ if @grammar.nonterminals[next_prod]
356
+ t = [state, next_prod]
357
+ trans << t unless trans.include?(t)
358
+ end
359
+ end
360
+ end
361
+ end
362
+ trans
363
+ end
364
+
365
+ def compute_read_sets(c, ntrans, nullable)
366
+ fp = lambda { |x| dr_relation(c, x, nullable) }
367
+ r = lambda { |x| reads_relation(c, x, nullable) }
368
+ digraph(ntrans, r, fp)
369
+ end
370
+
371
+ def dr_relation(c, trans, nullable)
372
+ dr_set = {}
373
+ state, n = trans
374
+ terms = []
375
+
376
+ g = lr0_goto(c[state], n)
377
+ g.each do |p|
378
+ if p.lr_index < p.length - 1
379
+ a = p.prod[p.lr_index+1]
380
+ if @grammar.terminals.include?(a)
381
+ terms << a unless terms.include?(a)
382
+ end
383
+ end
384
+ end
385
+
386
+ terms << :'$end' if state == 0 && n == @grammar.productions[0].prod[0]
387
+
388
+ terms
389
+ end
390
+
391
+ def reads_relation(c, trans, empty)
392
+ rel = []
393
+ state, n = trans
394
+
395
+ g = lr0_goto(c[state], n)
396
+ j = @lr0_cidhash[g.hash] || -1
397
+ g.each do |p|
398
+ if p.lr_index < p.length - 1
399
+ a = p.prod[p.lr_index + 1]
400
+ rel << [j, a] if empty.include?(a)
401
+ end
402
+ end
403
+
404
+ rel
405
+ end
406
+
407
+ def digraph(x, r, fp)
408
+ n = {}
409
+ x.each { |xx| n[xx] = 0 }
410
+ stack = []
411
+ f = {}
412
+ x.each do |xx|
413
+ traverse(xx, n, stack, f, x, r, fp) if n[xx] == 0
414
+ end
415
+ f
416
+ end
417
+
418
+ def traverse(xx, n, stack, f, x, r, fp)
419
+ stack.push(xx)
420
+ d = stack.length
421
+ n[xx] = d
422
+ f[xx] = fp.call(xx)
423
+
424
+ rel = r.call(xx)
425
+ rel.each do |y|
426
+ traverse(y, n, stack, f, x, r, fp) if n[y] == 0
427
+
428
+ n[xx] = [n[xx], n[y]].min
429
+
430
+ arr = f[y] || []
431
+ arr.each do |a|
432
+ f[xx] << a unless f[xx].include?(a)
433
+ end
434
+ end
435
+ if n[xx] == d
436
+ n[stack[-1]] = MAXINT
437
+ f[stack[-1]] = f[xx]
438
+ element = stack.pop()
439
+ while element != xx
440
+ n[stack[-1]] = MAXINT
441
+ f[stack[-1]] = f[xx]
442
+ element = stack.pop()
443
+ end
444
+ end
445
+ end
446
+
447
+ def compute_lookback_includes(c, trans, nullable)
448
+ lookdict = {}
449
+ includedict = {}
450
+
451
+ dtrans = trans.each_with_object({}) { |k, h| h[k] = 1 }
452
+
453
+ trans.each do |state, n|
454
+ lookb = []
455
+ includes = []
456
+ c[state].each do |p|
457
+ next unless p.name == n
458
+
459
+ lr_index = p.lr_index
460
+ j = state
461
+ while lr_index < p.length - 1
462
+ lr_index = lr_index + 1
463
+ t = p.prod[lr_index]
464
+
465
+ if dtrans.include?([j,t])
466
+ li = lr_index + 1
467
+ escaped = false
468
+ while li < p.length
469
+ if @grammar.terminals[p.prod[li]]
470
+ escaped = true
471
+ break
472
+ end
473
+ unless nullable[p.prod[li]]
474
+ escaped = true
475
+ break
476
+ end
477
+ li = li + 1
478
+ end
479
+ includes << [j,t] unless escaped
480
+ end
481
+
482
+ g = lr0_goto(c[j],t)
483
+ j = @lr0_cidhash[g.hash] || -1
484
+ end
485
+
486
+ c[j].each do |r|
487
+ next unless r.name == p.name
488
+ next unless r.length == p.length
489
+ i = 0
490
+ escaped = false
491
+ while i < r.lr_index
492
+ unless r.prod[i] == p.prod[i+1]
493
+ escaped = true
494
+ break
495
+ end
496
+ i = i + 1
497
+ end
498
+ lookb << [j,r] unless escaped
499
+ end
500
+ end
501
+ includes.each do |i|
502
+ includedict[i] = [] unless includedict[i]
503
+ includedict[i] << [state, n]
504
+ end
505
+ lookdict[[state,n]] = lookb
506
+ end
507
+
508
+ [lookdict, includedict]
509
+ end
510
+
511
+ def compute_follow_sets(ntrans, readsets, inclsets)
512
+ fp = lambda { |x| readsets[x] }
513
+ r = lambda { |x| inclsets[x] || [] }
514
+ digraph(ntrans, r, fp)
515
+ end
516
+
517
+ def add_lookaheads(lookbacks, followset)
518
+ lookbacks.each do |trans, lb|
519
+ lb.each do |state, p|
520
+ p.lookaheads[state] = [] unless p.lookaheads[state]
521
+ f = followset[trans] || []
522
+ f.each do |a|
523
+ p.lookaheads[state] << a unless p.lookaheads[state].include?(a)
524
+ end
525
+ end
526
+ end
527
+ end
528
+ end
529
+ end