kanocc 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/examples/bind.rb +26 -0
- data/examples/calculator.rb +19 -15
- data/examples/doc_calc.rb +42 -0
- data/examples/minipascalsyntax.html +371 -0
- data/examples/ruby_quiz_78.rb +12 -11
- data/lib/kanocc.rb +73 -102
- data/lib/kanocc/earley.rb +262 -217
- data/lib/kanocc/grammar_rule.rb +7 -21
- data/lib/kanocc/nonterminal.rb +67 -65
- data/lib/kanocc/scanner.rb +168 -85
- data/lib/kanocc/token.rb +24 -0
- data/lib/todo +2 -3
- metadata +13 -9
data/examples/ruby_quiz_78.rb
CHANGED
@@ -31,6 +31,7 @@ require "kanocc"
|
|
31
31
|
# PackageList ::= Package
|
32
32
|
# | PackageList Package
|
33
33
|
|
34
|
+
# A forward declaration
|
34
35
|
class PackageList < Kanocc::Nonterminal
|
35
36
|
end
|
36
37
|
|
@@ -43,23 +44,23 @@ class Package < Kanocc::Nonterminal
|
|
43
44
|
rule('{', PackageList , '}') { @val = "{#{@rhs[1].val}}"}
|
44
45
|
rule('[', PackageList , ']') { @val = "[#{@rhs[1].val}]"}
|
45
46
|
# Some error-correcting rules
|
46
|
-
rule(PackageList, ')') {@val = "(#{@rhs[0].val})"};
|
47
|
-
rule('(', PackageList) {@val = "(#{@rhs[1].val})"};
|
48
|
-
rule(PackageList, '}') {@val = "{#{@rhs[0].val}}"};
|
49
|
-
rule('{', PackageList) {@val = "{#{@rhs[1].val}}"};
|
50
|
-
rule(PackageList, ']') {@val = "[#{@rhs[0].val}]"};
|
51
|
-
rule('[', PackageList) {@val = "[#{@rhs[1].val}]"};
|
47
|
+
rule(PackageList, ')') {@val = "(#{@rhs[0].val})"}; precedence -2
|
48
|
+
rule('(', PackageList) {@val = "(#{@rhs[1].val})"}; precedence -2
|
49
|
+
rule(PackageList, '}') {@val = "{#{@rhs[0].val}}"}; precedence -2
|
50
|
+
rule('{', PackageList) {@val = "{#{@rhs[1].val}}"}; precedence -2
|
51
|
+
rule(PackageList, ']') {@val = "[#{@rhs[0].val}]"}; precedence -2
|
52
|
+
rule('[', PackageList) {@val = "[#{@rhs[1].val}]"}; precedence -2
|
52
53
|
end
|
53
54
|
|
54
|
-
class PackageList
|
55
|
+
class PackageList < Kanocc::Nonterminal
|
55
56
|
attr_reader :val
|
56
57
|
rule(om(Package)){ @val = @rhs[0].elements.map{|p| p.val}.join("") }
|
57
58
|
end
|
58
59
|
|
59
60
|
# Set up a parser
|
60
|
-
packageChecker = Kanocc::Kanocc.new(
|
61
|
+
packageChecker = Kanocc::Kanocc.new(PackageList)
|
61
62
|
|
62
63
|
# And go
|
63
|
-
puts "[(B)] becomes " + packageChecker.parse('[(B)]').val
|
64
|
-
puts "[[B]
|
65
|
-
puts "[(B)]](B){{(B)] becomes " + packageChecker.parse("[(B)]](B){{(B)]").val
|
64
|
+
puts "[(B)] .............becomes........ " + packageChecker.parse('[(B)]').val
|
65
|
+
puts "[[B] .............becomes........ " + packageChecker.parse('[[B]').val
|
66
|
+
puts "[(B)]](B){{(B)] ...becomes........ " + packageChecker.parse("[(B)]](B){{(B)]").val
|
data/lib/kanocc.rb
CHANGED
@@ -46,7 +46,7 @@ require 'logger'
|
|
46
46
|
#
|
47
47
|
# NUM a sequence of digits
|
48
48
|
#
|
49
|
-
# In Kanocc
|
49
|
+
# In Kanocc you could do it like this:
|
50
50
|
#
|
51
51
|
# require "kanocc"
|
52
52
|
#
|
@@ -80,52 +80,46 @@ require 'logger'
|
|
80
80
|
#
|
81
81
|
module Kanocc
|
82
82
|
class Kanocc
|
83
|
-
attr_accessor :
|
83
|
+
attr_accessor :parser, :logger
|
84
84
|
|
85
85
|
# Creates a new instance of Kannocc, with the given start symbol.
|
86
86
|
# From the start_symbol, Kanocc will deduce the grammar and the
|
87
87
|
# grammarsymbols
|
88
88
|
#
|
89
89
|
def initialize(start_symbol)
|
90
|
-
@start_symbol = start_symbol
|
90
|
+
@start_symbol = start_symbol
|
91
91
|
@logger = Logger.new(STDOUT)
|
92
92
|
@logger.datetime_format = ""
|
93
|
-
@logger.level = Logger::WARN
|
94
|
-
@scanner = Scanner.new
|
95
|
-
@
|
93
|
+
@logger.level = Logger::WARN
|
94
|
+
@scanner = Scanner.new
|
95
|
+
@scanner.set_recognized(*find_tokens(@start_symbol))
|
96
|
+
@parser = EarleyParser.new(self, @logger)
|
96
97
|
end
|
97
98
|
|
98
99
|
def logger=(logger)
|
99
100
|
@logger = logger || logger.new(STDOUT)
|
100
|
-
@parser.logger = @logger if parser.respond_to?(:logger)
|
101
|
-
@scanner.logger = @logger if scanner.respond_to?(:logger)
|
102
|
-
end
|
103
|
-
|
104
|
-
def parser=(parser)
|
105
|
-
@parser = parser
|
106
101
|
@parser.logger = @logger if parser.respond_to?(:logger=)
|
107
102
|
end
|
108
|
-
|
109
|
-
|
110
|
-
@scanner = scanner
|
111
|
-
@scanner.logger = @logger if scanner.respond_to?(:logger=)
|
112
|
-
end
|
113
|
-
|
103
|
+
|
104
|
+
|
114
105
|
# Consume input. Kanocc will parse input according to the rules given, and
|
115
106
|
# - if parsing succeeds - return an instance of the grammars start symbol.
|
116
107
|
# Input may be a String or an IO object.
|
117
108
|
def parse(input)
|
109
|
+
if input.is_a?(IO)
|
110
|
+
@input = input.readlines.join("")
|
111
|
+
elsif input.is_a?(String)
|
112
|
+
@input = input
|
113
|
+
else
|
114
|
+
raise "Input must be a string or an IO object"
|
115
|
+
end
|
118
116
|
raise "Start symbol not defined" unless @start_symbol
|
119
|
-
|
120
|
-
@
|
117
|
+
@input = input
|
118
|
+
@scanner.input = input
|
119
|
+
@parser.start_symbol = @start_symbol
|
121
120
|
@stack = []
|
122
|
-
@
|
123
|
-
@
|
124
|
-
@logger.info "got #{token_match.inspect} from scanner"
|
125
|
-
@inputPos += 1
|
126
|
-
@parser.consume(token_match)
|
127
|
-
end
|
128
|
-
@parser.eof
|
121
|
+
@parser.parse(@scanner)
|
122
|
+
@logger.info("Stack: " + @stack.inspect)
|
129
123
|
@stack[0][0]
|
130
124
|
end
|
131
125
|
|
@@ -160,8 +154,8 @@ module Kanocc
|
|
160
154
|
def report_reduction(rule)
|
161
155
|
@logger.info "Reducing by " + rule.inspect
|
162
156
|
raise "Fatal: stack too short!" if @stack.length < rule.rhs.length
|
163
|
-
nonterminal = rule.lhs.new
|
164
|
-
stack_part = @stack.slice!(-rule.rhs.length, rule.rhs.length)
|
157
|
+
nonterminal = rule.lhs.new
|
158
|
+
stack_part = @stack.slice!(-rule.rhs.length, rule.rhs.length)
|
165
159
|
if rule.rhs.length > 0
|
166
160
|
start_pos, end_pos = stack_part[0][1], stack_part[-1][2]
|
167
161
|
elsif @stack.length > 0
|
@@ -170,7 +164,7 @@ module Kanocc
|
|
170
164
|
start_pos, end_pos = 0,0
|
171
165
|
end
|
172
166
|
if rule.method
|
173
|
-
rhs = Rhs.new(stack_part.map{|a| a[0]}, start_pos, end_pos)
|
167
|
+
rhs = Rhs.new(stack_part.map{|a| a[0]}, start_pos, end_pos, @input)
|
174
168
|
old_rhs = nonterminal.instance_variable_get('@rhs')
|
175
169
|
nonterminal.instance_variable_set('@rhs', rhs)
|
176
170
|
nonterminal.send(rule.method)
|
@@ -181,81 +175,47 @@ module Kanocc
|
|
181
175
|
show_stack
|
182
176
|
end
|
183
177
|
|
184
|
-
def calculate_start_and_end_pos(rule)
|
185
|
-
end
|
186
|
-
|
187
|
-
def evaluate_semantics_and_pop(rule, nonterminal)
|
188
|
-
end
|
189
|
-
|
190
178
|
# The parser must call this method when it consumes a token
|
191
|
-
# As argument it should give the
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
token.m = match[:regexp].match(tokenmatch[:string])
|
204
|
-
token.send(match[:method_name]) if match[:method_name]
|
179
|
+
# As argument it should give the LexicalMatch and the matched terminal.
|
180
|
+
def report_token(lexical_match, terminal)
|
181
|
+
start_pos = lexical_match.start_pos
|
182
|
+
length = lexical_match.length
|
183
|
+
stringpart = @input.slice(start_pos, length)
|
184
|
+
if terminal.class == Class # It's a token
|
185
|
+
instance = terminal.new
|
186
|
+
regexp = lexical_match.regexp(terminal)
|
187
|
+
instance.m = regexp.match(stringpart)
|
188
|
+
if method = terminal.method(regexp)
|
189
|
+
instance.send(method)
|
190
|
+
end
|
205
191
|
else # It's a string literal
|
206
|
-
|
192
|
+
instance = terminal
|
207
193
|
end
|
208
|
-
|
209
|
-
start_pos = tokenmatch[:start_pos]
|
210
|
-
end_pos = start_pos + tokenmatch[:length]
|
211
|
-
token_with_pos = [token, start_pos, end_pos]
|
212
|
-
|
213
|
-
@stack.push(token_with_pos)
|
214
|
-
show_stack
|
215
|
-
end
|
216
|
-
|
217
|
-
|
218
|
-
def tell_parser_start_symbol(start_symbol)
|
219
|
-
@parser.startsymbol = start_symbol
|
220
|
-
bag_of_terminals = {}
|
221
|
-
find_tokens(start_symbol, bag_of_terminals)
|
222
|
-
@logger.debug "tokens = " + bag_of_terminals.keys.inspect
|
223
|
-
strings = bag_of_terminals.keys.find_all{|ter| ter.is_a? String}
|
224
|
-
@logger.info("Literals: " + strings.inspect)
|
225
|
-
tokens = bag_of_terminals.keys.find_all{|ter| ter.is_a? Class and ter.ancestors.member?(Token)}
|
226
|
-
@logger.info("Tokens: " + tokens.inspect)
|
227
|
-
@scanner.set_recognized(*(strings + tokens))
|
228
194
|
|
229
|
-
|
230
|
-
|
231
|
-
nonterminals = [start_symbol]
|
232
|
-
nonterminals.each do |nonterminal|
|
233
|
-
nonterminal.rules.each do |rule|
|
234
|
-
@logger.info(" " + rule.inspect)
|
235
|
-
rule.rhs.each do |gs|
|
236
|
-
if gs.is_a? Class and gs.ancestors.member?(Nonterminal) and not nonterminals.member?(gs)
|
237
|
-
nonterminals.push(gs)
|
238
|
-
end
|
239
|
-
end
|
240
|
-
end
|
241
|
-
end
|
195
|
+
@stack.push([instance, start_pos, start_pos + length])
|
196
|
+
show_stack
|
242
197
|
end
|
243
|
-
|
244
|
-
def find_tokens(nonterminal
|
198
|
+
|
199
|
+
def find_tokens(nonterminal)
|
200
|
+
collected_tokens = {}
|
201
|
+
find_tokens_helper(nonterminal, collected_tokens)
|
202
|
+
collected_tokens.keys
|
203
|
+
end
|
204
|
+
def find_tokens_helper(nonterminal, collected_tokens, visited_nonterminals = {})
|
245
205
|
unless visited_nonterminals[nonterminal]
|
246
206
|
visited_nonterminals[nonterminal] = true
|
247
207
|
nonterminal.rules.each do |r|
|
248
208
|
r.rhs.each do |gs|
|
249
209
|
if gs.is_a?(Class) and gs.ancestors.member?(Nonterminal)
|
250
|
-
|
210
|
+
find_tokens_helper(gs, collected_tokens, visited_nonterminals)
|
251
211
|
else
|
252
|
-
|
212
|
+
collected_tokens[gs] = true
|
253
213
|
end
|
254
214
|
end
|
255
215
|
end
|
256
216
|
end
|
257
217
|
end
|
258
|
-
|
218
|
+
|
259
219
|
# For debugging
|
260
220
|
def show_stack
|
261
221
|
@logger.info("Stack: #{@stack.inspect}") if @logger
|
@@ -274,30 +234,41 @@ module Kanocc
|
|
274
234
|
gs.inspect
|
275
235
|
end
|
276
236
|
end
|
277
|
-
|
237
|
+
|
278
238
|
end
|
279
239
|
|
280
240
|
class Rhs < Array
|
281
|
-
|
282
|
-
def initialize(arr, start_pos, end_pos)
|
283
|
-
@start_pos, @end_pos = start_pos, end_pos
|
241
|
+
attr_reader :start_pos, :end_pos
|
242
|
+
def initialize(arr, start_pos, end_pos, input)
|
243
|
+
@start_pos, @end_pos, @input = start_pos, end_pos, input
|
284
244
|
super(arr)
|
285
245
|
end
|
286
246
|
|
247
|
+
def text
|
248
|
+
@input.slice(start_pos, end_pos - start_pos)
|
249
|
+
end
|
250
|
+
|
287
251
|
def inspect
|
288
252
|
return "#{super.inspect}, #{start_pos.inspect}, #{end_pos.inspect}"
|
289
253
|
end
|
290
254
|
end
|
291
|
-
|
292
|
-
class ParseException < Exception
|
293
|
-
attr_accessor :inputPos, :inputSymbol, :expected
|
294
|
-
def initialize(inputPos, inputSymbol, expected)
|
295
|
-
@inputPos, @inputSymbol, @expected = inputPos, inputSymbol, expected
|
296
|
-
end
|
297
|
-
end
|
298
255
|
|
299
256
|
class KanoccException < Exception
|
300
257
|
end
|
301
|
-
end
|
302
|
-
|
303
258
|
|
259
|
+
class ParseException < KanoccException
|
260
|
+
attr_reader :expectedTerminals, :offendingInput, :pos
|
261
|
+
def initialize(expected_terminals, offending_input, pos)
|
262
|
+
@expected_terminals, @offending_input, @pos =
|
263
|
+
expected_terminals, offending_input, pos
|
264
|
+
error_msg = "\n Could not consume input: #{offending_input} at #{pos}"
|
265
|
+
if expected_terminals.size > 0
|
266
|
+
error_msg += " - expected " +
|
267
|
+
"#{expected_terminals.map {|t| t.inspect}.join(" or ")}"
|
268
|
+
else
|
269
|
+
error_msg += " - no input could be consumed at this point."
|
270
|
+
end
|
271
|
+
super(error_msg)
|
272
|
+
end
|
273
|
+
end
|
274
|
+
end
|
data/lib/kanocc/earley.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
|
1
|
+
##
|
2
2
|
# Copyright 2008 Christian Surlykke
|
3
3
|
#
|
4
4
|
# This file is part of Kanocc.
|
@@ -16,8 +16,12 @@
|
|
16
16
|
# version 3 along with Kanocc. If not, see <http://www.gnu.org/licenses/>.
|
17
17
|
#
|
18
18
|
require 'kanocc/grammar_rule'
|
19
|
+
require 'kanocc/nonterminal'
|
19
20
|
require 'kanocc/token'
|
20
21
|
require 'logger'
|
22
|
+
|
23
|
+
#require 'rubygems'
|
24
|
+
|
21
25
|
module Kanocc
|
22
26
|
#
|
23
27
|
# Parser for Kanocc based on Earleys algorithm. For a description see:
|
@@ -33,290 +37,331 @@ module Kanocc
|
|
33
37
|
# Christian Surlykke 2007.
|
34
38
|
#
|
35
39
|
class EarleyParser
|
36
|
-
attr_accessor :kanocc, :logger
|
40
|
+
attr_accessor :kanocc, :logger
|
37
41
|
|
38
42
|
ErrorRule = GrammarRule.new(Error, [], nil)
|
39
43
|
|
40
|
-
def initialize(kanocc,
|
44
|
+
def initialize(kanocc, logger)
|
41
45
|
@kanocc = kanocc
|
42
|
-
@logger =
|
46
|
+
@logger = logger
|
47
|
+
end
|
48
|
+
|
49
|
+
def start_symbol=(start_symbol)
|
50
|
+
@start_symbol = Class.new(StartSymbol) do
|
51
|
+
def self.to_s
|
52
|
+
"S'"
|
53
|
+
end
|
54
|
+
rule(start_symbol)
|
55
|
+
end
|
43
56
|
end
|
57
|
+
|
44
58
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
59
|
+
def parse(scanner)
|
60
|
+
@scanner = scanner
|
61
|
+
prepare
|
62
|
+
|
63
|
+
while (@scanner.next_match!) do
|
64
|
+
@inputPos += 1
|
65
|
+
@input_symbols.push(scanner.current_match)
|
66
|
+
@items.prepare_for_n(@inputPos)
|
67
|
+
# scan, predict and complete until no more can be added
|
68
|
+
|
69
|
+
scan
|
70
|
+
|
71
|
+
predict_and_complete(@inputPos)
|
72
|
+
|
73
|
+
if @logger
|
74
|
+
@logger.info("\nItems at #{@inputPos}:\n" +
|
75
|
+
@input_symbols[@inputPos].inspect + "\n" +
|
76
|
+
@items.items_at_n(@inputPos).map{|item| " " + item.inspect}.join("\n") + "\n")
|
77
|
+
end
|
78
|
+
|
79
|
+
handle_error if @items.number_at_n(@inputPos) == 0
|
80
|
+
end
|
81
|
+
|
82
|
+
reduce
|
55
83
|
end
|
56
84
|
|
57
85
|
def prepare
|
58
|
-
@
|
86
|
+
@items = ItemSet.new
|
59
87
|
@inputPos = 0
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
@
|
88
|
+
@input_symbols = [nil]
|
89
|
+
@recoveryPoints = []
|
90
|
+
@start_symbol.rules.each do |rule|
|
91
|
+
@items.add(rule, 0, 0, 0, -1)
|
92
|
+
end
|
93
|
+
predict_and_complete(0)
|
94
|
+
if @logger
|
95
|
+
@logger.info("\nItems at 0:\n" +
|
96
|
+
@items.items_at_n(0).map{|item| " " + item.inspect}.join("\n") + "\n")
|
64
97
|
end
|
65
|
-
@logger.info("Itemlist 0:\n" + @itemLists[0].inspect) unless not @logger
|
66
98
|
end
|
99
|
+
|
100
|
+
# Scan: At position n, for each terminal a in current match, and each item
|
101
|
+
# of form [A -> x*ay, i, n-1], add [A -> xa*y, i, n]
|
102
|
+
def scan
|
67
103
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
if match[:token]
|
72
|
-
symbol = match[:token]
|
73
|
-
else
|
74
|
-
symbol = match[:literal]
|
104
|
+
@scanner.current_match.terminals.each do |terminal|
|
105
|
+
@items.items_n_and_symbol_after_dot(@inputPos -1, terminal).each do |item|
|
106
|
+
@items.add(item.rule, item.dot + 1, item.j, @inputPos, @inputPos - 1)
|
75
107
|
end
|
76
|
-
items = @itemLists[@inputPos - 1].find_matching(symbol)
|
77
|
-
@itemLists[@inputPos].add_all(items.map{|item| item.move})
|
78
108
|
end
|
109
|
+
|
79
110
|
end
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
111
|
+
|
112
|
+
|
113
|
+
# Predict: For any item of form [A -> a*Bb, j, n] and for all rules of form
|
114
|
+
# B -> c, add [B -> *c, n, n].
|
115
|
+
#
|
116
|
+
# Complete: Given an item of form [A->X*, j, n], find all items of form
|
117
|
+
# [B -> a*Ab, i, j], and add [B -> aA*b, i, n].
|
118
|
+
#
|
119
|
+
# Predict and complete until nothing further can be added.
|
120
|
+
def predict_and_complete(pos, show=false)
|
121
|
+
prev_size = 0
|
122
|
+
while true do
|
123
|
+
break if prev_size >= @items.number_at_n(pos)
|
124
|
+
prev_size = @items.number_at_n(pos)
|
125
|
+
@items.items_at_n(pos).each do |item|
|
126
|
+
if item.dot >= item.rule.rhs.length
|
127
|
+
# complete
|
128
|
+
@items.items_n_and_symbol_after_dot(item.j, item.rule.lhs).each do |previtem|
|
129
|
+
@items.add(previtem.rule, previtem.dot + 1, previtem.j, pos, item.j)
|
130
|
+
end
|
131
|
+
elsif item.rule.rhs[item.dot].respond_to?(:rules)
|
91
132
|
# predict
|
92
|
-
|
93
|
-
|
133
|
+
item.rule.rhs[item.dot].rules.each do |rule|
|
134
|
+
@items.add(rule, 0, pos, pos, -1)
|
135
|
+
end
|
136
|
+
end
|
94
137
|
end
|
95
|
-
end
|
138
|
+
end
|
96
139
|
end
|
97
140
|
|
98
|
-
def
|
99
|
-
if
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
141
|
+
def handle_error
|
142
|
+
if j = find_error_items()
|
143
|
+
@items.add(ErrorRule, 0, j, @inputPos - 1, -1)
|
144
|
+
predict_and_complete(@inputPos - 1, true)
|
145
|
+
if @logger
|
146
|
+
@logger.info("Items at #{@inputPos - 1} after error handling:\n" +
|
147
|
+
@items.items_at_n(@inputPos - 1).map {|item| item.inspect}.join("\n"))
|
105
148
|
end
|
149
|
+
scan
|
150
|
+
predict_and_complete(@inputPos)
|
151
|
+
if @logger
|
152
|
+
@logger.info("Items at #{@inputPos} after error handling:\n" +
|
153
|
+
@items.items_at_n(@inputPos).map {|item| item.inspect}.join("\n"))
|
154
|
+
end
|
155
|
+
else
|
156
|
+
expected_terminals =
|
157
|
+
@items.items_at_n(@inputPos - 1).map { |item| item.rule.rhs[item.dot]}.find_all do |gs|
|
158
|
+
gs.is_a? String or (gs.is_a? Class and gs.ancestors.include?(Token))
|
159
|
+
end.uniq
|
160
|
+
|
161
|
+
pos, length = @scanner.current_match.start_pos, @scanner.current_match.length
|
162
|
+
offending_input = @scanner.input[pos, length].inspect
|
163
|
+
raise ParseException.new(expected_terminals, offending_input, pos)
|
106
164
|
end
|
107
165
|
end
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
@inputPos += 1
|
114
|
-
@itemLists.push(ItemList.new(token_match, @inputPos))
|
115
|
-
|
116
|
-
# scan, predict and complete until no more can be added
|
117
|
-
scan(token_match)
|
118
|
-
|
119
|
-
if @itemLists[@inputPos].size == 0
|
120
|
-
@logger.debug("Found no items matching #{token_match} in itemlist #{@inputPos - 1}")
|
121
|
-
@logger.debug("@recoveryPoints = " + @recoveryPoints.inspect)
|
122
|
-
for i in 1..@recoveryPoints.length do
|
123
|
-
if @recoveryPoints[-i] < @inputPos
|
124
|
-
@itemLists[@inputPos - 1].add(Item.new(ErrorRule, @recoveryPoints[-i]))
|
125
|
-
predict_and_complete(@inputPos - 1)
|
126
|
-
scan(token_match)
|
127
|
-
break if @itemLists[@inputPos].size > 0
|
128
|
-
end
|
166
|
+
|
167
|
+
def find_error_items
|
168
|
+
for n in (@inputPos - 1).downto(0) do
|
169
|
+
if @items.items_n_and_symbol_after_dot(n, Error).size > 0
|
170
|
+
return n
|
129
171
|
end
|
130
172
|
end
|
131
|
-
|
132
|
-
add_recovery_points(@inputPos)
|
133
|
-
@logger.info("Itemlist #{@inputPos}:\n" + @itemLists[@inputPos].inspect) if @logger
|
173
|
+
return nil
|
134
174
|
end
|
135
|
-
|
136
|
-
|
137
175
|
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
translate(top_item, @inputPos)
|
176
|
+
def reduce
|
177
|
+
item = @items.items_at_n(@inputPos).find do |item|
|
178
|
+
@start_symbol == item.rule.lhs and item.dot == 1
|
179
|
+
end
|
180
|
+
if item
|
181
|
+
# There is at most one of those
|
182
|
+
make_parse(item, @inputPos, 0)
|
146
183
|
else
|
147
184
|
raise(KanoccException, "It didn't parse")
|
148
185
|
end
|
149
186
|
end
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
187
|
+
|
188
|
+
# FIXME Generates stack overflow when files are large.
|
189
|
+
# 15000-2000 inputsymbols with the calculator syntax.
|
190
|
+
# Should be rewritten to something non-recursive
|
191
|
+
def make_parse(item, pos, prev_pos)
|
192
|
+
return if item.dot <= 0
|
193
|
+
|
194
|
+
prev_item = @items.find(item.rule, item.dot - 1, item.j, prev_pos)
|
195
|
+
prev_prev_pos = prev_item.rule.derives_right ? prev_item.prev_pos_min : prev_item.prev_pos_max
|
196
|
+
|
197
|
+
if is_nonterminal?(item.symbol_before_dot)
|
198
|
+
subitem, sub_prev_pos = pick_subitem(item.symbol_before_dot, pos, prev_pos)
|
199
|
+
make_parse(prev_item, prev_pos, prev_prev_pos)
|
200
|
+
make_parse(subitem, pos, sub_prev_pos)
|
201
|
+
@kanocc.report_reduction(subitem.rule)
|
202
|
+
else
|
203
|
+
make_parse(prev_item, prev_pos, prev_prev_pos)
|
204
|
+
symbol = item.symbol_before_dot
|
205
|
+
@kanocc.report_token(@input_symbols[pos], symbol)
|
158
206
|
end
|
159
207
|
end
|
160
|
-
|
161
|
-
def
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
# Then: Those for which item of form [A --> a*Bc, k] exists
|
173
|
-
# on list j
|
174
|
-
candidates = candidates.find_all {|subItem|
|
175
|
-
@itemLists[subItem.j].find_item(item.rule, item.dot - 1, item.j)
|
176
|
-
}
|
177
|
-
|
178
|
-
# Precedence: We pick the posibility with the higest precedence
|
179
|
-
sub_item = candidates.max
|
180
|
-
prev_item = @itemLists[sub_item.j].find_item(item.rule, item.dot - 1, item.j)
|
181
|
-
prev_list = sub_item.j
|
208
|
+
|
209
|
+
def pick_subitem(nonterminal, pos, prev_pos)
|
210
|
+
#debugger
|
211
|
+
items = @items.full_items_by_lhs_j_and_n(nonterminal, prev_pos, pos)
|
212
|
+
|
213
|
+
raise "pick_subitem could not find any items" if items.size <= 0
|
214
|
+
items = find_highest(items) {|item| precedence(item)}
|
215
|
+
|
216
|
+
derives_right = all_derives_right(items)
|
217
|
+
if derives_right
|
218
|
+
items = find_highest(items) {|item| -item.prev_pos_min}
|
182
219
|
else
|
183
|
-
|
184
|
-
prev_list = pos - 1
|
185
|
-
sub_item = item.rule.rhs[item.dot - 1]
|
220
|
+
items = find_highest(items){|item| item.prev_pos_max}
|
186
221
|
end
|
187
|
-
|
188
|
-
|
222
|
+
|
223
|
+
return items[0], derives_right ? items[0].prev_pos_min : items[0].prev_pos_max
|
189
224
|
end
|
190
225
|
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
226
|
+
def find_highest(items, &expr)
|
227
|
+
collect = []
|
228
|
+
top_val = nil;
|
229
|
+
items.each do |item|
|
230
|
+
val = expr.call(item)
|
231
|
+
if top_val == nil or top_val < val
|
232
|
+
collect = [item]
|
233
|
+
top_val = val
|
234
|
+
elsif top_val == val
|
235
|
+
collect << item
|
236
|
+
end
|
196
237
|
end
|
238
|
+
return collect
|
197
239
|
end
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
attr_reader :inputSymbol
|
202
|
-
attr_accessor :items
|
203
|
-
|
204
|
-
def initialize(inputSymbol, inputPos)
|
205
|
-
@inputPos = inputPos
|
206
|
-
@inputSymbol = inputSymbol
|
207
|
-
@items = Hash.new
|
208
|
-
end
|
209
|
-
|
210
|
-
def copy
|
211
|
-
res = clone
|
212
|
-
res.items = @items.clone
|
213
|
-
return res
|
214
|
-
end
|
215
|
-
|
216
|
-
def size
|
217
|
-
return @items.size
|
218
|
-
end
|
219
|
-
|
220
|
-
def find_all(&b)
|
221
|
-
return @items.keys.find_all(&b)
|
222
|
-
end
|
223
|
-
|
224
|
-
def find_item(rule, dot, j)
|
225
|
-
return @items.keys.find{ |item|
|
226
|
-
item.rule == rule and
|
227
|
-
item.dot == dot and
|
228
|
-
item.j == j
|
229
|
-
}
|
240
|
+
|
241
|
+
def precedence(item)
|
242
|
+
item.rule.precedence || 0
|
230
243
|
end
|
231
|
-
|
232
|
-
def
|
233
|
-
|
234
|
-
|
244
|
+
|
245
|
+
def all_derives_right(items)
|
246
|
+
items.each do |item|
|
247
|
+
return false unless item.rule.derives_right
|
235
248
|
end
|
249
|
+
return true
|
236
250
|
end
|
237
|
-
|
238
|
-
def
|
239
|
-
|
240
|
-
inputSymbol === item.symbol_after_dot or inputSymbol == item.symbol_after_dot
|
241
|
-
end
|
251
|
+
|
252
|
+
def is_nonterminal?(symbol)
|
253
|
+
symbol.respond_to?(:rules)
|
242
254
|
end
|
255
|
+
end
|
243
256
|
|
244
|
-
|
245
|
-
|
257
|
+
class Item
|
258
|
+
attr_reader :rule, :dot, :j, :n
|
259
|
+
attr_accessor :prev_pos_min, :prev_pos_max
|
260
|
+
|
261
|
+
def initialize(rule, dot, j, n, prev_pos_min = 0, prev_pos_max = 0)
|
262
|
+
@rule = rule
|
263
|
+
@dot = dot
|
264
|
+
@j = j
|
265
|
+
@n = n
|
266
|
+
@prev_pos_min = prev_pos_min
|
267
|
+
@prev_pos_max = prev_pos_max
|
246
268
|
end
|
247
|
-
|
248
|
-
def
|
249
|
-
@
|
269
|
+
|
270
|
+
def symbol_after_dot
|
271
|
+
return @dot < @rule.rhs.size ? @rule.rhs[@dot] : nil
|
250
272
|
end
|
251
273
|
|
252
|
-
def
|
253
|
-
|
274
|
+
def symbol_before_dot
|
275
|
+
return @dot > 0 ? @rule.rhs[@dot - 1] : nil
|
254
276
|
end
|
255
277
|
|
256
|
-
def
|
257
|
-
|
258
|
-
|
278
|
+
def set_prev_pos(new_prev_pos)
|
279
|
+
if new_prev_pos < @prev_pos_min
|
280
|
+
@prev_pos_min = new_prev_pos
|
281
|
+
elsif new_prev_pos > @prev_pos_max
|
282
|
+
@prev_pos_max = new_prev_pos
|
259
283
|
end
|
260
284
|
end
|
261
285
|
|
262
286
|
def inspect
|
263
|
-
return "[" +
|
264
|
-
|
287
|
+
return "[" +
|
288
|
+
@rule.lhs.inspect + " --> " +
|
289
|
+
(@rule.rhs.slice(0, dot) + [Dot.instance] +
|
290
|
+
@rule.rhs.slice(dot, @rule.rhs.length - dot)).map{|symbol| symbol.inspect}.join(" ") +
|
291
|
+
" ; " + @j.inspect + ", " + @n.inspect + "]"
|
265
292
|
end
|
266
293
|
end
|
267
294
|
|
268
|
-
|
269
|
-
class
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
295
|
+
|
296
|
+
class ItemSet
|
297
|
+
# FIXME Optimize all this
|
298
|
+
|
299
|
+
def initialize
|
300
|
+
@item_lists = []
|
301
|
+
@items_n_and_symbol_after_dot = {}
|
302
|
+
@items_rule_dot_j_n = {}
|
303
|
+
end
|
304
|
+
|
305
|
+
def prepare_for_n(n)
|
306
|
+
@item_lists[n] = []
|
307
|
+
end
|
308
|
+
|
309
|
+
def add(rule, dot, j, n, prev_pos)
|
310
|
+
if item = @items_rule_dot_j_n[[rule,dot,j,n]]
|
311
|
+
item.set_prev_pos(prev_pos)
|
312
|
+
else
|
313
|
+
item = Item.new(rule, dot, j, n, prev_pos, prev_pos)
|
314
|
+
@items_rule_dot_j_n[[rule,dot,j,n]] = item
|
315
|
+
@item_lists[item.n] = [] unless @item_lists[item.n]
|
316
|
+
@item_lists[item.n] << item
|
317
|
+
|
318
|
+
if item.symbol_after_dot
|
319
|
+
unless @items_n_and_symbol_after_dot[[item.n, item.symbol_after_dot]]
|
320
|
+
@items_n_and_symbol_after_dot[[item.n, item.symbol_after_dot]] = []
|
321
|
+
end
|
322
|
+
@items_n_and_symbol_after_dot[[item.n, item.symbol_after_dot]] << item
|
323
|
+
end
|
277
324
|
end
|
278
|
-
return item
|
279
325
|
end
|
280
|
-
|
281
|
-
def
|
282
|
-
|
326
|
+
|
327
|
+
def find(rule, dot, j, n)
|
328
|
+
@items_rule_dot_j_n[[rule, dot, j,n]]
|
283
329
|
end
|
284
|
-
|
285
|
-
def
|
286
|
-
@
|
287
|
-
@j = j
|
288
|
-
@dot = dot
|
330
|
+
|
331
|
+
def find_all_by_n(n)
|
332
|
+
@item_lists[n].clone
|
289
333
|
end
|
290
|
-
|
291
|
-
def
|
292
|
-
|
334
|
+
|
335
|
+
def number_at_n(n)
|
336
|
+
@item_lists[n].length
|
293
337
|
end
|
294
|
-
|
295
|
-
def
|
296
|
-
return
|
297
|
-
@rule.lhs.inspect + " --> " +
|
298
|
-
(@rule.rhs.slice(0, dot) +
|
299
|
-
[Dot.new] +
|
300
|
-
@rule.rhs.slice(dot, @rule.rhs.length - dot)).map{|symbol| symbol.inspect}.join(" ") +
|
301
|
-
" ; " + @j.to_s + "]"
|
338
|
+
|
339
|
+
def items_n_and_symbol_after_dot(n, symbol)
|
340
|
+
return @items_n_and_symbol_after_dot[[n, symbol]] || []
|
302
341
|
end
|
303
|
-
|
304
|
-
def
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
if res == 0
|
310
|
-
res = @j <=> other.j
|
342
|
+
|
343
|
+
def full_items_by_lhs_j_and_n(lhs, j, n)
|
344
|
+
@item_lists[n].find_all do |item|
|
345
|
+
item.dot >= item.rule.rhs.size and
|
346
|
+
item.j == j and
|
347
|
+
item.rule.lhs == lhs
|
311
348
|
end
|
312
|
-
return res
|
313
349
|
end
|
350
|
+
|
351
|
+
def items_at_n(n)
|
352
|
+
return @item_lists[n].clone
|
353
|
+
end
|
354
|
+
|
314
355
|
end
|
315
|
-
|
356
|
+
|
316
357
|
# Just for Item inspect
|
317
358
|
class Dot
|
359
|
+
def Dot.instance
|
360
|
+
@@instance
|
361
|
+
end
|
318
362
|
def inspect
|
319
363
|
return "*"
|
320
364
|
end
|
365
|
+
@@instance = Dot.new
|
321
366
|
end
|
322
|
-
end
|
367
|
+
end
|