kanocc 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -31,6 +31,7 @@ require "kanocc"
31
31
  # PackageList ::= Package
32
32
  # | PackageList Package
33
33
 
34
+ # A forward declaration
34
35
  class PackageList < Kanocc::Nonterminal
35
36
  end
36
37
 
@@ -43,23 +44,23 @@ class Package < Kanocc::Nonterminal
43
44
  rule('{', PackageList , '}') { @val = "{#{@rhs[1].val}}"}
44
45
  rule('[', PackageList , ']') { @val = "[#{@rhs[1].val}]"}
45
46
  # Some error-correcting rules
46
- rule(PackageList, ')') {@val = "(#{@rhs[0].val})"}; prec -2
47
- rule('(', PackageList) {@val = "(#{@rhs[1].val})"}; prec -2
48
- rule(PackageList, '}') {@val = "{#{@rhs[0].val}}"}; prec -2
49
- rule('{', PackageList) {@val = "{#{@rhs[1].val}}"}; prec -2
50
- rule(PackageList, ']') {@val = "[#{@rhs[0].val}]"}; prec -2
51
- rule('[', PackageList) {@val = "[#{@rhs[1].val}]"}; prec -2
47
+ rule(PackageList, ')') {@val = "(#{@rhs[0].val})"}; precedence -2
48
+ rule('(', PackageList) {@val = "(#{@rhs[1].val})"}; precedence -2
49
+ rule(PackageList, '}') {@val = "{#{@rhs[0].val}}"}; precedence -2
50
+ rule('{', PackageList) {@val = "{#{@rhs[1].val}}"}; precedence -2
51
+ rule(PackageList, ']') {@val = "[#{@rhs[0].val}]"}; precedence -2
52
+ rule('[', PackageList) {@val = "[#{@rhs[1].val}]"}; precedence -2
52
53
  end
53
54
 
54
- class PackageList
55
+ class PackageList < Kanocc::Nonterminal
55
56
  attr_reader :val
56
57
  rule(om(Package)){ @val = @rhs[0].elements.map{|p| p.val}.join("") }
57
58
  end
58
59
 
59
60
  # Set up a parser
60
- packageChecker = Kanocc::Kanocc.new(Package)
61
+ packageChecker = Kanocc::Kanocc.new(PackageList)
61
62
 
62
63
  # And go
63
- puts "[(B)] becomes " + packageChecker.parse('[(B)]').val
64
- puts "[[B] becomes " + packageChecker.parse('[[B]').val
65
- puts "[(B)]](B){{(B)] becomes " + packageChecker.parse("[(B)]](B){{(B)]").val
64
+ puts "[(B)] .............becomes........ " + packageChecker.parse('[(B)]').val
65
+ puts "[[B] .............becomes........ " + packageChecker.parse('[[B]').val
66
+ puts "[(B)]](B){{(B)] ...becomes........ " + packageChecker.parse("[(B)]](B){{(B)]").val
@@ -46,7 +46,7 @@ require 'logger'
46
46
  #
47
47
  # NUM a sequence of digits
48
48
  #
49
- # In Kanocc yout could do it like this:
49
+ # In Kanocc you could do it like this:
50
50
  #
51
51
  # require "kanocc"
52
52
  #
@@ -80,52 +80,46 @@ require 'logger'
80
80
  #
81
81
  module Kanocc
82
82
  class Kanocc
83
- attr_accessor :scanner, :parser, :logger
83
+ attr_accessor :parser, :logger
84
84
 
85
85
  # Creates a new instance of Kannocc, with the given start symbol.
86
86
  # From the start_symbol, Kanocc will deduce the grammar and the
87
87
  # grammarsymbols
88
88
  #
89
89
  def initialize(start_symbol)
90
- @start_symbol = start_symbol
90
+ @start_symbol = start_symbol
91
91
  @logger = Logger.new(STDOUT)
92
92
  @logger.datetime_format = ""
93
- @logger.level = Logger::WARN
94
- @scanner = Scanner.new(:logger => @logger)
95
- @parser = EarleyParser.new(self, :logger => @logger)
93
+ @logger.level = Logger::WARN
94
+ @scanner = Scanner.new
95
+ @scanner.set_recognized(*find_tokens(@start_symbol))
96
+ @parser = EarleyParser.new(self, @logger)
96
97
  end
97
98
 
98
99
  def logger=(logger)
99
100
  @logger = logger || logger.new(STDOUT)
100
- @parser.logger = @logger if parser.respond_to?(:logger)
101
- @scanner.logger = @logger if scanner.respond_to?(:logger)
102
- end
103
-
104
- def parser=(parser)
105
- @parser = parser
106
101
  @parser.logger = @logger if parser.respond_to?(:logger=)
107
102
  end
108
-
109
- def scanner=(scanner)
110
- @scanner = scanner
111
- @scanner.logger = @logger if scanner.respond_to?(:logger=)
112
- end
113
-
103
+
104
+
114
105
  # Consume input. Kanocc will parse input according to the rules given, and
115
106
  # - if parsing succeeds - return an instance of the grammars start symbol.
116
107
  # Input may be a String or an IO object.
117
108
  def parse(input)
109
+ if input.is_a?(IO)
110
+ @input = input.readlines.join("")
111
+ elsif input.is_a?(String)
112
+ @input = input
113
+ else
114
+ raise "Input must be a string or an IO object"
115
+ end
118
116
  raise "Start symbol not defined" unless @start_symbol
119
- tell_parser_start_symbol(@start_symbol)
120
- @parser.prepare
117
+ @input = input
118
+ @scanner.input = input
119
+ @parser.start_symbol = @start_symbol
121
120
  @stack = []
122
- @inputPos = 0
123
- @scanner.each_token(input) do |token_match|
124
- @logger.info "got #{token_match.inspect} from scanner"
125
- @inputPos += 1
126
- @parser.consume(token_match)
127
- end
128
- @parser.eof
121
+ @parser.parse(@scanner)
122
+ @logger.info("Stack: " + @stack.inspect)
129
123
  @stack[0][0]
130
124
  end
131
125
 
@@ -160,8 +154,8 @@ module Kanocc
160
154
  def report_reduction(rule)
161
155
  @logger.info "Reducing by " + rule.inspect
162
156
  raise "Fatal: stack too short!" if @stack.length < rule.rhs.length
163
- nonterminal = rule.lhs.new
164
- stack_part = @stack.slice!(-rule.rhs.length, rule.rhs.length)
157
+ nonterminal = rule.lhs.new
158
+ stack_part = @stack.slice!(-rule.rhs.length, rule.rhs.length)
165
159
  if rule.rhs.length > 0
166
160
  start_pos, end_pos = stack_part[0][1], stack_part[-1][2]
167
161
  elsif @stack.length > 0
@@ -170,7 +164,7 @@ module Kanocc
170
164
  start_pos, end_pos = 0,0
171
165
  end
172
166
  if rule.method
173
- rhs = Rhs.new(stack_part.map{|a| a[0]}, start_pos, end_pos)
167
+ rhs = Rhs.new(stack_part.map{|a| a[0]}, start_pos, end_pos, @input)
174
168
  old_rhs = nonterminal.instance_variable_get('@rhs')
175
169
  nonterminal.instance_variable_set('@rhs', rhs)
176
170
  nonterminal.send(rule.method)
@@ -181,81 +175,47 @@ module Kanocc
181
175
  show_stack
182
176
  end
183
177
 
184
- def calculate_start_and_end_pos(rule)
185
- end
186
-
187
- def evaluate_semantics_and_pop(rule, nonterminal)
188
- end
189
-
190
178
  # The parser must call this method when it consumes a token
191
- # As argument it should give the consumed token and the positions
192
- # in the input string corresponding to the token. Positions should be given
193
- # as the position of the first character of the token and the position of the
194
- # first character after the token.
195
- def report_token(tokenmatch, element)
196
- @logger.info("Pushing token: " + element.inspect)
197
- match = tokenmatch[:matches].find do |m|
198
- m[:token] == element || m[:literal] == element
199
- end
200
-
201
- if match[:token]
202
- token = match[:token].new
203
- token.m = match[:regexp].match(tokenmatch[:string])
204
- token.send(match[:method_name]) if match[:method_name]
179
+ # As argument it should give the LexicalMatch and the matched terminal.
180
+ def report_token(lexical_match, terminal)
181
+ start_pos = lexical_match.start_pos
182
+ length = lexical_match.length
183
+ stringpart = @input.slice(start_pos, length)
184
+ if terminal.class == Class # It's a token
185
+ instance = terminal.new
186
+ regexp = lexical_match.regexp(terminal)
187
+ instance.m = regexp.match(stringpart)
188
+ if method = terminal.method(regexp)
189
+ instance.send(method)
190
+ end
205
191
  else # It's a string literal
206
- token = match[:literal]
192
+ instance = terminal
207
193
  end
208
-
209
- start_pos = tokenmatch[:start_pos]
210
- end_pos = start_pos + tokenmatch[:length]
211
- token_with_pos = [token, start_pos, end_pos]
212
-
213
- @stack.push(token_with_pos)
214
- show_stack
215
- end
216
-
217
-
218
- def tell_parser_start_symbol(start_symbol)
219
- @parser.startsymbol = start_symbol
220
- bag_of_terminals = {}
221
- find_tokens(start_symbol, bag_of_terminals)
222
- @logger.debug "tokens = " + bag_of_terminals.keys.inspect
223
- strings = bag_of_terminals.keys.find_all{|ter| ter.is_a? String}
224
- @logger.info("Literals: " + strings.inspect)
225
- tokens = bag_of_terminals.keys.find_all{|ter| ter.is_a? Class and ter.ancestors.member?(Token)}
226
- @logger.info("Tokens: " + tokens.inspect)
227
- @scanner.set_recognized(*(strings + tokens))
228
194
 
229
- # Show rules
230
- @logger.info("Rules:")
231
- nonterminals = [start_symbol]
232
- nonterminals.each do |nonterminal|
233
- nonterminal.rules.each do |rule|
234
- @logger.info(" " + rule.inspect)
235
- rule.rhs.each do |gs|
236
- if gs.is_a? Class and gs.ancestors.member?(Nonterminal) and not nonterminals.member?(gs)
237
- nonterminals.push(gs)
238
- end
239
- end
240
- end
241
- end
195
+ @stack.push([instance, start_pos, start_pos + length])
196
+ show_stack
242
197
  end
243
-
244
- def find_tokens(nonterminal, collectedTokens, visited_nonterminals = {})
198
+
199
+ def find_tokens(nonterminal)
200
+ collected_tokens = {}
201
+ find_tokens_helper(nonterminal, collected_tokens)
202
+ collected_tokens.keys
203
+ end
204
+ def find_tokens_helper(nonterminal, collected_tokens, visited_nonterminals = {})
245
205
  unless visited_nonterminals[nonterminal]
246
206
  visited_nonterminals[nonterminal] = true
247
207
  nonterminal.rules.each do |r|
248
208
  r.rhs.each do |gs|
249
209
  if gs.is_a?(Class) and gs.ancestors.member?(Nonterminal)
250
- find_tokens(gs, collectedTokens, visited_nonterminals)
210
+ find_tokens_helper(gs, collected_tokens, visited_nonterminals)
251
211
  else
252
- collectedTokens[gs] = true
212
+ collected_tokens[gs] = true
253
213
  end
254
214
  end
255
215
  end
256
216
  end
257
217
  end
258
-
218
+
259
219
  # For debugging
260
220
  def show_stack
261
221
  @logger.info("Stack: #{@stack.inspect}") if @logger
@@ -274,30 +234,41 @@ module Kanocc
274
234
  gs.inspect
275
235
  end
276
236
  end
277
-
237
+
278
238
  end
279
239
 
280
240
  class Rhs < Array
281
- attr_accessor :start_pos, :end_pos
282
- def initialize(arr, start_pos, end_pos)
283
- @start_pos, @end_pos = start_pos, end_pos
241
+ attr_reader :start_pos, :end_pos
242
+ def initialize(arr, start_pos, end_pos, input)
243
+ @start_pos, @end_pos, @input = start_pos, end_pos, input
284
244
  super(arr)
285
245
  end
286
246
 
247
+ def text
248
+ @input.slice(start_pos, end_pos - start_pos)
249
+ end
250
+
287
251
  def inspect
288
252
  return "#{super.inspect}, #{start_pos.inspect}, #{end_pos.inspect}"
289
253
  end
290
254
  end
291
-
292
- class ParseException < Exception
293
- attr_accessor :inputPos, :inputSymbol, :expected
294
- def initialize(inputPos, inputSymbol, expected)
295
- @inputPos, @inputSymbol, @expected = inputPos, inputSymbol, expected
296
- end
297
- end
298
255
 
299
256
  class KanoccException < Exception
300
257
  end
301
- end
302
-
303
258
 
259
+ class ParseException < KanoccException
260
+ attr_reader :expectedTerminals, :offendingInput, :pos
261
+ def initialize(expected_terminals, offending_input, pos)
262
+ @expected_terminals, @offending_input, @pos =
263
+ expected_terminals, offending_input, pos
264
+ error_msg = "\n Could not consume input: #{offending_input} at #{pos}"
265
+ if expected_terminals.size > 0
266
+ error_msg += " - expected " +
267
+ "#{expected_terminals.map {|t| t.inspect}.join(" or ")}"
268
+ else
269
+ error_msg += " - no input could be consumed at this point."
270
+ end
271
+ super(error_msg)
272
+ end
273
+ end
274
+ end
@@ -1,4 +1,4 @@
1
- #
1
+ ##
2
2
  # Copyright 2008 Christian Surlykke
3
3
  #
4
4
  # This file is part of Kanocc.
@@ -16,8 +16,12 @@
16
16
  # version 3 along with Kanocc. If not, see <http://www.gnu.org/licenses/>.
17
17
  #
18
18
  require 'kanocc/grammar_rule'
19
+ require 'kanocc/nonterminal'
19
20
  require 'kanocc/token'
20
21
  require 'logger'
22
+
23
+ #require 'rubygems'
24
+
21
25
  module Kanocc
22
26
  #
23
27
  # Parser for Kanocc based on Earleys algorithm. For a description see:
@@ -33,290 +37,331 @@ module Kanocc
33
37
  # Christian Surlykke 2007.
34
38
  #
35
39
  class EarleyParser
36
- attr_accessor :kanocc, :logger
40
+ attr_accessor :kanocc, :logger
37
41
 
38
42
  ErrorRule = GrammarRule.new(Error, [], nil)
39
43
 
40
- def initialize(kanocc, options = {})
44
+ def initialize(kanocc, logger)
41
45
  @kanocc = kanocc
42
- @logger = options[:logger] || Logger.new
46
+ @logger = logger
47
+ end
48
+
49
+ def start_symbol=(start_symbol)
50
+ @start_symbol = Class.new(StartSymbol) do
51
+ def self.to_s
52
+ "S'"
53
+ end
54
+ rule(start_symbol)
55
+ end
43
56
  end
57
+
44
58
 
45
- #
46
- # Sets up the parser, creating itemlist 0.
47
- #
48
- def startsymbol=(startSymbol)
49
- @start_symbol = startSymbol
50
- @itemLists = [ItemList.new(nil, 0)]
51
- @inputPos = 0
52
- @recoveryPoints = []
53
- @itemLists[0].add_all(@start_symbol.rules.map{|rule| Item.new(rule, 0)})
54
- predict_and_complete(0)
59
+ def parse(scanner)
60
+ @scanner = scanner
61
+ prepare
62
+
63
+ while (@scanner.next_match!) do
64
+ @inputPos += 1
65
+ @input_symbols.push(scanner.current_match)
66
+ @items.prepare_for_n(@inputPos)
67
+ # scan, predict and complete until no more can be added
68
+
69
+ scan
70
+
71
+ predict_and_complete(@inputPos)
72
+
73
+ if @logger
74
+ @logger.info("\nItems at #{@inputPos}:\n" +
75
+ @input_symbols[@inputPos].inspect + "\n" +
76
+ @items.items_at_n(@inputPos).map{|item| " " + item.inspect}.join("\n") + "\n")
77
+ end
78
+
79
+ handle_error if @items.number_at_n(@inputPos) == 0
80
+ end
81
+
82
+ reduce
55
83
  end
56
84
 
57
85
  def prepare
58
- @itemLists = @itemLists[0..0]
86
+ @items = ItemSet.new
59
87
  @inputPos = 0
60
- if @recoveryPoints.size > 0 and @recoveryPoints[0] == 0
61
- @recoveryPoints = [0]
62
- else
63
- @recoveryPoints = []
88
+ @input_symbols = [nil]
89
+ @recoveryPoints = []
90
+ @start_symbol.rules.each do |rule|
91
+ @items.add(rule, 0, 0, 0, -1)
92
+ end
93
+ predict_and_complete(0)
94
+ if @logger
95
+ @logger.info("\nItems at 0:\n" +
96
+ @items.items_at_n(0).map{|item| " " + item.inspect}.join("\n") + "\n")
64
97
  end
65
- @logger.info("Itemlist 0:\n" + @itemLists[0].inspect) unless not @logger
66
98
  end
99
+
100
+ # Scan: At position n, for each terminal a in current match, and each item
101
+ # of form [A -> x*ay, i, n-1], add [A -> xa*y, i, n]
102
+ def scan
67
103
 
68
-
69
- def scan(token_match)
70
- token_match[:matches].each do |match|
71
- if match[:token]
72
- symbol = match[:token]
73
- else
74
- symbol = match[:literal]
104
+ @scanner.current_match.terminals.each do |terminal|
105
+ @items.items_n_and_symbol_after_dot(@inputPos -1, terminal).each do |item|
106
+ @items.add(item.rule, item.dot + 1, item.j, @inputPos, @inputPos - 1)
75
107
  end
76
- items = @itemLists[@inputPos - 1].find_matching(symbol)
77
- @itemLists[@inputPos].add_all(items.map{|item| item.move})
78
108
  end
109
+
79
110
  end
80
-
81
- def predict_and_complete(pos)
82
- item_list = @itemLists[pos]
83
- prev_size = 0
84
- while prev_size < item_list.size do
85
- prev_size = item_list.size
86
- item_list.each do |item|
87
- if item.rule.rhs.length <= item.dot
88
- # complete
89
- item_list.add_all(@itemLists[item.j].find_matching(item.rule.lhs).map{|item| item.move})
90
- elsif (nont = item.rule.rhs[item.dot]).respond_to?(:rules)
111
+
112
+
113
+ # Predict: For any item of form [A -> a*Bb, j, n] and for all rules of form
114
+ # B -> c, add [B -> *c, n, n].
115
+ #
116
+ # Complete: Given an item of form [A->X*, j, n], find all items of form
117
+ # [B -> a*Ab, i, j], and add [B -> aA*b, i, n].
118
+ #
119
+ # Predict and complete until nothing further can be added.
120
+ def predict_and_complete(pos, show=false)
121
+ prev_size = 0
122
+ while true do
123
+ break if prev_size >= @items.number_at_n(pos)
124
+ prev_size = @items.number_at_n(pos)
125
+ @items.items_at_n(pos).each do |item|
126
+ if item.dot >= item.rule.rhs.length
127
+ # complete
128
+ @items.items_n_and_symbol_after_dot(item.j, item.rule.lhs).each do |previtem|
129
+ @items.add(previtem.rule, previtem.dot + 1, previtem.j, pos, item.j)
130
+ end
131
+ elsif item.rule.rhs[item.dot].respond_to?(:rules)
91
132
  # predict
92
- item_list.add_all(nont.rules.map {|rule| Item.new(rule, @inputPos)})
93
- end
133
+ item.rule.rhs[item.dot].rules.each do |rule|
134
+ @items.add(rule, 0, pos, pos, -1)
135
+ end
136
+ end
94
137
  end
95
- end
138
+ end
96
139
  end
97
140
 
98
- def add_recovery_points(pos)
99
- if @recoveryPoints[-1] != pos
100
- @itemLists[pos].each do |item|
101
- if Error == item.rule.rhs[item.dot]
102
- @recoveryPoints.push(pos)
103
- break
104
- end
141
+ def handle_error
142
+ if j = find_error_items()
143
+ @items.add(ErrorRule, 0, j, @inputPos - 1, -1)
144
+ predict_and_complete(@inputPos - 1, true)
145
+ if @logger
146
+ @logger.info("Items at #{@inputPos - 1} after error handling:\n" +
147
+ @items.items_at_n(@inputPos - 1).map {|item| item.inspect}.join("\n"))
105
148
  end
149
+ scan
150
+ predict_and_complete(@inputPos)
151
+ if @logger
152
+ @logger.info("Items at #{@inputPos} after error handling:\n" +
153
+ @items.items_at_n(@inputPos).map {|item| item.inspect}.join("\n"))
154
+ end
155
+ else
156
+ expected_terminals =
157
+ @items.items_at_n(@inputPos - 1).map { |item| item.rule.rhs[item.dot]}.find_all do |gs|
158
+ gs.is_a? String or (gs.is_a? Class and gs.ancestors.include?(Token))
159
+ end.uniq
160
+
161
+ pos, length = @scanner.current_match.start_pos, @scanner.current_match.length
162
+ offending_input = @scanner.input[pos, length].inspect
163
+ raise ParseException.new(expected_terminals, offending_input, pos)
106
164
  end
107
165
  end
108
-
109
- #
110
- # Consume and parse next input symbol
111
- #
112
- def consume(token_match)
113
- @inputPos += 1
114
- @itemLists.push(ItemList.new(token_match, @inputPos))
115
-
116
- # scan, predict and complete until no more can be added
117
- scan(token_match)
118
-
119
- if @itemLists[@inputPos].size == 0
120
- @logger.debug("Found no items matching #{token_match} in itemlist #{@inputPos - 1}")
121
- @logger.debug("@recoveryPoints = " + @recoveryPoints.inspect)
122
- for i in 1..@recoveryPoints.length do
123
- if @recoveryPoints[-i] < @inputPos
124
- @itemLists[@inputPos - 1].add(Item.new(ErrorRule, @recoveryPoints[-i]))
125
- predict_and_complete(@inputPos - 1)
126
- scan(token_match)
127
- break if @itemLists[@inputPos].size > 0
128
- end
166
+
167
+ def find_error_items
168
+ for n in (@inputPos - 1).downto(0) do
169
+ if @items.items_n_and_symbol_after_dot(n, Error).size > 0
170
+ return n
129
171
  end
130
172
  end
131
- predict_and_complete(@inputPos)
132
- add_recovery_points(@inputPos)
133
- @logger.info("Itemlist #{@inputPos}:\n" + @itemLists[@inputPos].inspect) if @logger
173
+ return nil
134
174
  end
135
-
136
-
137
175
 
138
-
139
- #
140
- # Signal to the parser that end of input is reached
141
- #
142
- def eof
143
- top_item = find_full_items(@start_symbol, @inputPos).find_all {|item| item.j == 0}.max
144
- if top_item
145
- translate(top_item, @inputPos)
176
+ def reduce
177
+ item = @items.items_at_n(@inputPos).find do |item|
178
+ @start_symbol == item.rule.lhs and item.dot == 1
179
+ end
180
+ if item
181
+ # There is at most one of those
182
+ make_parse(item, @inputPos, 0)
146
183
  else
147
184
  raise(KanoccException, "It didn't parse")
148
185
  end
149
186
  end
150
-
151
- def translate(element, pos)
152
- @logger.debug("translate: " + element.inspect + " on " + pos.inspect)
153
- if element.class == Item
154
- translate_helper(element, pos)
155
- @kanocc.report_reduction(element.rule)
156
- else # Its a token or a string
157
- @kanocc.report_token(@itemLists[pos].inputSymbol, element)
187
+
188
+ # FIXME Generates stack overflow when files are large.
189
+ # 15000-2000 inputsymbols with the calculator syntax.
190
+ # Should be rewritten to something non-recursive
191
+ def make_parse(item, pos, prev_pos)
192
+ return if item.dot <= 0
193
+
194
+ prev_item = @items.find(item.rule, item.dot - 1, item.j, prev_pos)
195
+ prev_prev_pos = prev_item.rule.derives_right ? prev_item.prev_pos_min : prev_item.prev_pos_max
196
+
197
+ if is_nonterminal?(item.symbol_before_dot)
198
+ subitem, sub_prev_pos = pick_subitem(item.symbol_before_dot, pos, prev_pos)
199
+ make_parse(prev_item, prev_pos, prev_prev_pos)
200
+ make_parse(subitem, pos, sub_prev_pos)
201
+ @kanocc.report_reduction(subitem.rule)
202
+ else
203
+ make_parse(prev_item, prev_pos, prev_prev_pos)
204
+ symbol = item.symbol_before_dot
205
+ @kanocc.report_token(@input_symbols[pos], symbol)
158
206
  end
159
207
  end
160
-
161
- def translate_helper(item, pos)
162
- @logger.debug("translateHelper: " + item.inspect + " on " + pos.inspect)
163
- return if item.dot == 0
164
- if item.rule.rhs[item.dot - 1].respond_to?("rules")
165
- # Assume item is of form [A --> aB*c, k] in itemlist i
166
- # Must then find item of form [B --> x*, j] in itemlist i so
167
- # that there exists item of form [A --> a*Bc, k] on itemlist j
168
-
169
- # First: Items of form [B --> x*, j] on list i
170
- candidates = find_full_items(item.rule.rhs[item.dot - 1], pos)
171
-
172
- # Then: Those for which item of form [A --> a*Bc, k] exists
173
- # on list j
174
- candidates = candidates.find_all {|subItem|
175
- @itemLists[subItem.j].find_item(item.rule, item.dot - 1, item.j)
176
- }
177
-
178
- # Precedence: We pick the posibility with the higest precedence
179
- sub_item = candidates.max
180
- prev_item = @itemLists[sub_item.j].find_item(item.rule, item.dot - 1, item.j)
181
- prev_list = sub_item.j
208
+
209
+ def pick_subitem(nonterminal, pos, prev_pos)
210
+ #debugger
211
+ items = @items.full_items_by_lhs_j_and_n(nonterminal, prev_pos, pos)
212
+
213
+ raise "pick_subitem could not find any items" if items.size <= 0
214
+ items = find_highest(items) {|item| precedence(item)}
215
+
216
+ derives_right = all_derives_right(items)
217
+ if derives_right
218
+ items = find_highest(items) {|item| -item.prev_pos_min}
182
219
  else
183
- prev_item = @itemLists[pos - 1].find_item(item.rule, item.dot - 1, item.j)
184
- prev_list = pos - 1
185
- sub_item = item.rule.rhs[item.dot - 1]
220
+ items = find_highest(items){|item| item.prev_pos_max}
186
221
  end
187
- translate_helper(prev_item, prev_list)
188
- translate(sub_item, pos)
222
+
223
+ return items[0], derives_right ? items[0].prev_pos_min : items[0].prev_pos_max
189
224
  end
190
225
 
191
-
192
-
193
- def find_full_items(nonterminal, inputPos)
194
- @itemLists[inputPos].find_all do |item|
195
- item.rule.lhs == nonterminal and item.dot >= item.rule.rhs.length
226
+ def find_highest(items, &expr)
227
+ collect = []
228
+ top_val = nil;
229
+ items.each do |item|
230
+ val = expr.call(item)
231
+ if top_val == nil or top_val < val
232
+ collect = [item]
233
+ top_val = val
234
+ elsif top_val == val
235
+ collect << item
236
+ end
196
237
  end
238
+ return collect
197
239
  end
198
- end
199
-
200
- class ItemList
201
- attr_reader :inputSymbol
202
- attr_accessor :items
203
-
204
- def initialize(inputSymbol, inputPos)
205
- @inputPos = inputPos
206
- @inputSymbol = inputSymbol
207
- @items = Hash.new
208
- end
209
-
210
- def copy
211
- res = clone
212
- res.items = @items.clone
213
- return res
214
- end
215
-
216
- def size
217
- return @items.size
218
- end
219
-
220
- def find_all(&b)
221
- return @items.keys.find_all(&b)
222
- end
223
-
224
- def find_item(rule, dot, j)
225
- return @items.keys.find{ |item|
226
- item.rule == rule and
227
- item.dot == dot and
228
- item.j == j
229
- }
240
+
241
+ def precedence(item)
242
+ item.rule.precedence || 0
230
243
  end
231
-
232
- def each_matching(inputSymbol)
233
- find_matching(inputSymbol).each do |item|
234
- yield(item)
244
+
245
+ def all_derives_right(items)
246
+ items.each do |item|
247
+ return false unless item.rule.derives_right
235
248
  end
249
+ return true
236
250
  end
237
-
238
- def find_matching(inputSymbol)
239
- @items.keys.find_all do |item|
240
- inputSymbol === item.symbol_after_dot or inputSymbol == item.symbol_after_dot
241
- end
251
+
252
+ def is_nonterminal?(symbol)
253
+ symbol.respond_to?(:rules)
242
254
  end
255
+ end
243
256
 
244
- def contains(item)
245
- return @items[item]
257
+ class Item
258
+ attr_reader :rule, :dot, :j, :n
259
+ attr_accessor :prev_pos_min, :prev_pos_max
260
+
261
+ def initialize(rule, dot, j, n, prev_pos_min = 0, prev_pos_max = 0)
262
+ @rule = rule
263
+ @dot = dot
264
+ @j = j
265
+ @n = n
266
+ @prev_pos_min = prev_pos_min
267
+ @prev_pos_max = prev_pos_max
246
268
  end
247
-
248
- def add(item)
249
- @items.store(item, true)
269
+
270
+ def symbol_after_dot
271
+ return @dot < @rule.rhs.size ? @rule.rhs[@dot] : nil
250
272
  end
251
273
 
252
- def add_all(items)
253
- items.each {|item| @items.store(item, true)}
274
+ def symbol_before_dot
275
+ return @dot > 0 ? @rule.rhs[@dot - 1] : nil
254
276
  end
255
277
 
256
- def each
257
- @items.keys.each do |item|
258
- yield item
278
+ def set_prev_pos(new_prev_pos)
279
+ if new_prev_pos < @prev_pos_min
280
+ @prev_pos_min = new_prev_pos
281
+ elsif new_prev_pos > @prev_pos_max
282
+ @prev_pos_max = new_prev_pos
259
283
  end
260
284
  end
261
285
 
262
286
  def inspect
263
- return "[" + @inputSymbol.inspect + "\n " +
264
- @items.keys.map{|item| item.inspect}.join("\n ") + "]\n"
287
+ return "[" +
288
+ @rule.lhs.inspect + " --> " +
289
+ (@rule.rhs.slice(0, dot) + [Dot.instance] +
290
+ @rule.rhs.slice(dot, @rule.rhs.length - dot)).map{|symbol| symbol.inspect}.join(" ") +
291
+ " ; " + @j.inspect + ", " + @n.inspect + "]"
265
292
  end
266
293
  end
267
294
 
268
-
269
- class Item
270
- attr_reader :rule, :j, :dot
271
- @@items = Hash.new
272
-
273
- def Item.new(rule, j, dot = 0)
274
- unless (item = @@items[[rule,j,dot]])
275
- item = super(rule, j, dot)
276
- @@items.store([rule, j, dot], item)
295
+
296
+ class ItemSet
297
+ # FIXME Optimize all this
298
+
299
+ def initialize
300
+ @item_lists = []
301
+ @items_n_and_symbol_after_dot = {}
302
+ @items_rule_dot_j_n = {}
303
+ end
304
+
305
+ def prepare_for_n(n)
306
+ @item_lists[n] = []
307
+ end
308
+
309
+ def add(rule, dot, j, n, prev_pos)
310
+ if item = @items_rule_dot_j_n[[rule,dot,j,n]]
311
+ item.set_prev_pos(prev_pos)
312
+ else
313
+ item = Item.new(rule, dot, j, n, prev_pos, prev_pos)
314
+ @items_rule_dot_j_n[[rule,dot,j,n]] = item
315
+ @item_lists[item.n] = [] unless @item_lists[item.n]
316
+ @item_lists[item.n] << item
317
+
318
+ if item.symbol_after_dot
319
+ unless @items_n_and_symbol_after_dot[[item.n, item.symbol_after_dot]]
320
+ @items_n_and_symbol_after_dot[[item.n, item.symbol_after_dot]] = []
321
+ end
322
+ @items_n_and_symbol_after_dot[[item.n, item.symbol_after_dot]] << item
323
+ end
277
324
  end
278
- return item
279
325
  end
280
-
281
- def symbol_after_dot
282
- return @dot < @rule.rhs.size ? @rule.rhs[@dot] : nil
326
+
327
+ def find(rule, dot, j, n)
328
+ @items_rule_dot_j_n[[rule, dot, j,n]]
283
329
  end
284
-
285
- def initialize(rule, j, dot = 0)
286
- @rule = rule
287
- @j = j
288
- @dot = dot
330
+
331
+ def find_all_by_n(n)
332
+ @item_lists[n].clone
289
333
  end
290
-
291
- def move
292
- return Item.new(@rule, @j, @dot + 1)
334
+
335
+ def number_at_n(n)
336
+ @item_lists[n].length
293
337
  end
294
-
295
- def inspect
296
- return "[" +
297
- @rule.lhs.inspect + " --> " +
298
- (@rule.rhs.slice(0, dot) +
299
- [Dot.new] +
300
- @rule.rhs.slice(dot, @rule.rhs.length - dot)).map{|symbol| symbol.inspect}.join(" ") +
301
- " ; " + @j.to_s + "]"
338
+
339
+ def items_n_and_symbol_after_dot(n, symbol)
340
+ return @items_n_and_symbol_after_dot[[n, symbol]] || []
302
341
  end
303
-
304
- def <=>(other)
305
- res = @rule.prec <=> other.rule.prec;
306
- if res == 0 and @rule.operator_prec and other.rule.operator_prec
307
- res = other.rule.operator_prec <=> @rule.operator_prec
308
- end
309
- if res == 0
310
- res = @j <=> other.j
342
+
343
+ def full_items_by_lhs_j_and_n(lhs, j, n)
344
+ @item_lists[n].find_all do |item|
345
+ item.dot >= item.rule.rhs.size and
346
+ item.j == j and
347
+ item.rule.lhs == lhs
311
348
  end
312
- return res
313
349
  end
350
+
351
+ def items_at_n(n)
352
+ return @item_lists[n].clone
353
+ end
354
+
314
355
  end
315
-
356
+
316
357
  # Just for Item inspect
317
358
  class Dot
359
+ def Dot.instance
360
+ @@instance
361
+ end
318
362
  def inspect
319
363
  return "*"
320
364
  end
365
+ @@instance = Dot.new
321
366
  end
322
- end
367
+ end